Files
nemotron-terminal-data_scie…/trainer_state.json

2860 lines
79 KiB
JSON
Raw Normal View History

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.0,
"eval_steps": 500,
"global_step": 1281,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0273224043715847,
"grad_norm": 11.838881491183738,
"learning_rate": 1.2403100775193799e-06,
"loss": 0.781,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2679702043533325,
"step": 5,
"valid_targets_mean": 8483.3,
"valid_targets_min": 3529
},
{
"epoch": 0.0546448087431694,
"grad_norm": 8.917193900897011,
"learning_rate": 2.790697674418605e-06,
"loss": 0.7665,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2544458508491516,
"step": 10,
"valid_targets_mean": 9028.7,
"valid_targets_min": 3229
},
{
"epoch": 0.08196721311475409,
"grad_norm": 4.5763398940842155,
"learning_rate": 4.34108527131783e-06,
"loss": 0.7206,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2399713397026062,
"step": 15,
"valid_targets_mean": 8973.2,
"valid_targets_min": 2531
},
{
"epoch": 0.1092896174863388,
"grad_norm": 2.0138887147015327,
"learning_rate": 5.891472868217055e-06,
"loss": 0.6551,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21829384565353394,
"step": 20,
"valid_targets_mean": 9968.4,
"valid_targets_min": 3738
},
{
"epoch": 0.1366120218579235,
"grad_norm": 1.2798621873265643,
"learning_rate": 7.44186046511628e-06,
"loss": 0.6161,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1876898854970932,
"step": 25,
"valid_targets_mean": 8451.4,
"valid_targets_min": 824
},
{
"epoch": 0.16393442622950818,
"grad_norm": 1.0982561722021733,
"learning_rate": 8.992248062015505e-06,
"loss": 0.5942,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19325317442417145,
"step": 30,
"valid_targets_mean": 7975.0,
"valid_targets_min": 2809
},
{
"epoch": 0.1912568306010929,
"grad_norm": 0.7393796649528086,
"learning_rate": 1.0542635658914731e-05,
"loss": 0.5691,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21067436039447784,
"step": 35,
"valid_targets_mean": 9765.5,
"valid_targets_min": 4534
},
{
"epoch": 0.2185792349726776,
"grad_norm": 0.5772669792435342,
"learning_rate": 1.2093023255813954e-05,
"loss": 0.5515,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1745697259902954,
"step": 40,
"valid_targets_mean": 9180.5,
"valid_targets_min": 3882
},
{
"epoch": 0.2459016393442623,
"grad_norm": 0.49439644059594706,
"learning_rate": 1.3643410852713179e-05,
"loss": 0.5309,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17998377978801727,
"step": 45,
"valid_targets_mean": 9357.6,
"valid_targets_min": 4159
},
{
"epoch": 0.273224043715847,
"grad_norm": 0.3884582710415025,
"learning_rate": 1.5193798449612405e-05,
"loss": 0.5012,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14882785081863403,
"step": 50,
"valid_targets_mean": 7960.4,
"valid_targets_min": 1379
},
{
"epoch": 0.3005464480874317,
"grad_norm": 0.3526692098911082,
"learning_rate": 1.674418604651163e-05,
"loss": 0.5,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15850183367729187,
"step": 55,
"valid_targets_mean": 8110.5,
"valid_targets_min": 3249
},
{
"epoch": 0.32786885245901637,
"grad_norm": 0.31239085933982574,
"learning_rate": 1.8294573643410854e-05,
"loss": 0.473,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14768633246421814,
"step": 60,
"valid_targets_mean": 9303.1,
"valid_targets_min": 4146
},
{
"epoch": 0.3551912568306011,
"grad_norm": 0.2501662208549285,
"learning_rate": 1.9844961240310078e-05,
"loss": 0.4613,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1516689956188202,
"step": 65,
"valid_targets_mean": 8677.5,
"valid_targets_min": 3997
},
{
"epoch": 0.3825136612021858,
"grad_norm": 0.2594417146172949,
"learning_rate": 2.1395348837209303e-05,
"loss": 0.4435,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1571815013885498,
"step": 70,
"valid_targets_mean": 9267.3,
"valid_targets_min": 3503
},
{
"epoch": 0.4098360655737705,
"grad_norm": 0.24083691213954445,
"learning_rate": 2.294573643410853e-05,
"loss": 0.447,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14124611020088196,
"step": 75,
"valid_targets_mean": 8758.0,
"valid_targets_min": 3576
},
{
"epoch": 0.4371584699453552,
"grad_norm": 0.2508353728645926,
"learning_rate": 2.449612403100775e-05,
"loss": 0.4181,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13334429264068604,
"step": 80,
"valid_targets_mean": 8603.5,
"valid_targets_min": 2661
},
{
"epoch": 0.4644808743169399,
"grad_norm": 0.2379197029399351,
"learning_rate": 2.604651162790698e-05,
"loss": 0.4213,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13828732073307037,
"step": 85,
"valid_targets_mean": 9615.9,
"valid_targets_min": 2674
},
{
"epoch": 0.4918032786885246,
"grad_norm": 0.22995401239255409,
"learning_rate": 2.7596899224806204e-05,
"loss": 0.4103,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1267620027065277,
"step": 90,
"valid_targets_mean": 8508.2,
"valid_targets_min": 1611
},
{
"epoch": 0.5191256830601093,
"grad_norm": 0.21968035219511445,
"learning_rate": 2.914728682170543e-05,
"loss": 0.3985,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1344222128391266,
"step": 95,
"valid_targets_mean": 9016.9,
"valid_targets_min": 2853
},
{
"epoch": 0.546448087431694,
"grad_norm": 0.2291396621839033,
"learning_rate": 3.0697674418604656e-05,
"loss": 0.3942,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12746687233448029,
"step": 100,
"valid_targets_mean": 9609.2,
"valid_targets_min": 3162
},
{
"epoch": 0.5737704918032787,
"grad_norm": 0.26633772490859486,
"learning_rate": 3.224806201550388e-05,
"loss": 0.3951,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12758472561836243,
"step": 105,
"valid_targets_mean": 9060.2,
"valid_targets_min": 3599
},
{
"epoch": 0.6010928961748634,
"grad_norm": 0.21028182137462473,
"learning_rate": 3.37984496124031e-05,
"loss": 0.3886,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11767546087503433,
"step": 110,
"valid_targets_mean": 8864.8,
"valid_targets_min": 3255
},
{
"epoch": 0.6284153005464481,
"grad_norm": 0.23885654048701244,
"learning_rate": 3.5348837209302326e-05,
"loss": 0.3856,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1476028561592102,
"step": 115,
"valid_targets_mean": 10285.3,
"valid_targets_min": 5453
},
{
"epoch": 0.6557377049180327,
"grad_norm": 0.2303960817635363,
"learning_rate": 3.6899224806201554e-05,
"loss": 0.3767,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12939713895320892,
"step": 120,
"valid_targets_mean": 9426.7,
"valid_targets_min": 1574
},
{
"epoch": 0.6830601092896175,
"grad_norm": 0.22422648576742915,
"learning_rate": 3.844961240310078e-05,
"loss": 0.3824,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12232644855976105,
"step": 125,
"valid_targets_mean": 9503.8,
"valid_targets_min": 2188
},
{
"epoch": 0.7103825136612022,
"grad_norm": 0.22671595678179757,
"learning_rate": 4e-05,
"loss": 0.3683,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12635521590709686,
"step": 130,
"valid_targets_mean": 9434.6,
"valid_targets_min": 4111
},
{
"epoch": 0.7377049180327869,
"grad_norm": 0.20637831279045626,
"learning_rate": 3.9998140791624865e-05,
"loss": 0.3741,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11574558913707733,
"step": 135,
"valid_targets_mean": 8746.7,
"valid_targets_min": 1626
},
{
"epoch": 0.7650273224043715,
"grad_norm": 0.22119347463803593,
"learning_rate": 3.999256351216504e-05,
"loss": 0.3706,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11761431396007538,
"step": 140,
"valid_targets_mean": 9328.4,
"valid_targets_min": 285
},
{
"epoch": 0.7923497267759563,
"grad_norm": 0.2529631456141331,
"learning_rate": 3.9983269198552975e-05,
"loss": 0.3671,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12752871215343475,
"step": 145,
"valid_targets_mean": 10542.8,
"valid_targets_min": 4902
},
{
"epoch": 0.819672131147541,
"grad_norm": 0.27077929233544257,
"learning_rate": 3.9970259578795265e-05,
"loss": 0.3662,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12587572634220123,
"step": 150,
"valid_targets_mean": 9893.7,
"valid_targets_min": 4229
},
{
"epoch": 0.8469945355191257,
"grad_norm": 0.24792011083795368,
"learning_rate": 3.99535370716513e-05,
"loss": 0.3593,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11843468248844147,
"step": 155,
"valid_targets_mean": 9644.2,
"valid_targets_min": 3843
},
{
"epoch": 0.8743169398907104,
"grad_norm": 0.25208723026736696,
"learning_rate": 3.993310478618361e-05,
"loss": 0.364,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12046317011117935,
"step": 160,
"valid_targets_mean": 9617.7,
"valid_targets_min": 3083
},
{
"epoch": 0.9016393442622951,
"grad_norm": 0.5699971833507397,
"learning_rate": 3.990896652117983e-05,
"loss": 0.3672,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10490091145038605,
"step": 165,
"valid_targets_mean": 9126.5,
"valid_targets_min": 3756
},
{
"epoch": 0.9289617486338798,
"grad_norm": 0.21915062353617268,
"learning_rate": 3.988112676444639e-05,
"loss": 0.3617,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10891340672969818,
"step": 170,
"valid_targets_mean": 7968.0,
"valid_targets_min": 1920
},
{
"epoch": 0.9562841530054644,
"grad_norm": 0.22162783285385126,
"learning_rate": 3.9849590691974206e-05,
"loss": 0.3591,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1189221441745758,
"step": 175,
"valid_targets_mean": 9948.0,
"valid_targets_min": 1449
},
{
"epoch": 0.9836065573770492,
"grad_norm": 0.2505497323569372,
"learning_rate": 3.981436416697625e-05,
"loss": 0.3534,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13000932335853577,
"step": 180,
"valid_targets_mean": 9891.6,
"valid_targets_min": 3077
},
{
"epoch": 1.010928961748634,
"grad_norm": 0.257548615241039,
"learning_rate": 3.977545373879759e-05,
"loss": 0.3574,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11166360974311829,
"step": 185,
"valid_targets_mean": 8307.2,
"valid_targets_min": 2528
},
{
"epoch": 1.0382513661202186,
"grad_norm": 0.2350098965649614,
"learning_rate": 3.9732866641697586e-05,
"loss": 0.3478,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10175259411334991,
"step": 190,
"valid_targets_mean": 7673.5,
"valid_targets_min": 921
},
{
"epoch": 1.0655737704918034,
"grad_norm": 0.25048777204583766,
"learning_rate": 3.968661079350501e-05,
"loss": 0.3517,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11078484356403351,
"step": 195,
"valid_targets_mean": 7534.5,
"valid_targets_min": 2569
},
{
"epoch": 1.092896174863388,
"grad_norm": 0.29042532002341653,
"learning_rate": 3.963669479414591e-05,
"loss": 0.3487,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10936549305915833,
"step": 200,
"valid_targets_mean": 9098.6,
"valid_targets_min": 3617
},
{
"epoch": 1.1202185792349726,
"grad_norm": 0.24753762424635495,
"learning_rate": 3.958312792404468e-05,
"loss": 0.3446,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11017704755067825,
"step": 205,
"valid_targets_mean": 8941.7,
"valid_targets_min": 3491
},
{
"epoch": 1.1475409836065573,
"grad_norm": 0.2857582899316236,
"learning_rate": 3.952592014239867e-05,
"loss": 0.3477,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11123620718717575,
"step": 210,
"valid_targets_mean": 8961.5,
"valid_targets_min": 3218
},
{
"epoch": 1.174863387978142,
"grad_norm": 0.3107975974617015,
"learning_rate": 3.946508208532656e-05,
"loss": 0.3508,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10983406007289886,
"step": 215,
"valid_targets_mean": 9042.6,
"valid_targets_min": 3869
},
{
"epoch": 1.2021857923497268,
"grad_norm": 0.2794395978672758,
"learning_rate": 3.940062506389089e-05,
"loss": 0.3504,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.108359694480896,
"step": 220,
"valid_targets_mean": 8741.7,
"valid_targets_min": 1611
},
{
"epoch": 1.2295081967213115,
"grad_norm": 0.2681446844893584,
"learning_rate": 3.9332561061995036e-05,
"loss": 0.3513,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11848342418670654,
"step": 225,
"valid_targets_mean": 9721.8,
"valid_targets_min": 4370
},
{
"epoch": 1.2568306010928962,
"grad_norm": 0.2298534429968579,
"learning_rate": 3.926090273415526e-05,
"loss": 0.3414,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10455862432718277,
"step": 230,
"valid_targets_mean": 7769.4,
"valid_targets_min": 930
},
{
"epoch": 1.2841530054644807,
"grad_norm": 0.22827152947020307,
"learning_rate": 3.918566340314788e-05,
"loss": 0.3433,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10215024650096893,
"step": 235,
"valid_targets_mean": 8790.8,
"valid_targets_min": 4165
},
{
"epoch": 1.3114754098360657,
"grad_norm": 0.2488097440722295,
"learning_rate": 3.910685705753233e-05,
"loss": 0.3472,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11619973182678223,
"step": 240,
"valid_targets_mean": 9364.5,
"valid_targets_min": 2827
},
{
"epoch": 1.3387978142076502,
"grad_norm": 0.22057735394385433,
"learning_rate": 3.9024498349050385e-05,
"loss": 0.3477,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11552197486162186,
"step": 245,
"valid_targets_mean": 9655.0,
"valid_targets_min": 4307
},
{
"epoch": 1.366120218579235,
"grad_norm": 0.2163271712853194,
"learning_rate": 3.893860258990212e-05,
"loss": 0.3324,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1090204268693924,
"step": 250,
"valid_targets_mean": 9703.5,
"valid_targets_min": 3165
},
{
"epoch": 1.3934426229508197,
"grad_norm": 0.2528209867857862,
"learning_rate": 3.8849185749898996e-05,
"loss": 0.3367,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11540880799293518,
"step": 255,
"valid_targets_mean": 10452.7,
"valid_targets_min": 4897
},
{
"epoch": 1.4207650273224044,
"grad_norm": 0.22333938650940646,
"learning_rate": 3.87562644534948e-05,
"loss": 0.3382,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11605552583932877,
"step": 260,
"valid_targets_mean": 8834.8,
"valid_targets_min": 852
},
{
"epoch": 1.4480874316939891,
"grad_norm": 0.2670327230642673,
"learning_rate": 3.865985597669478e-05,
"loss": 0.3427,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11774078011512756,
"step": 265,
"valid_targets_mean": 9915.6,
"valid_targets_min": 1283
},
{
"epoch": 1.4754098360655736,
"grad_norm": 0.24992505310393578,
"learning_rate": 3.855997824384369e-05,
"loss": 0.3433,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11916451156139374,
"step": 270,
"valid_targets_mean": 9151.8,
"valid_targets_min": 4312
},
{
"epoch": 1.5027322404371586,
"grad_norm": 0.23459272261084146,
"learning_rate": 3.845664982429328e-05,
"loss": 0.3363,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11957293748855591,
"step": 275,
"valid_targets_mean": 9786.7,
"valid_targets_min": 3164
},
{
"epoch": 1.530054644808743,
"grad_norm": 0.23828026929857207,
"learning_rate": 3.834988992894983e-05,
"loss": 0.3319,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12325535714626312,
"step": 280,
"valid_targets_mean": 9396.9,
"valid_targets_min": 4421
},
{
"epoch": 1.5573770491803278,
"grad_norm": 0.21959387205650174,
"learning_rate": 3.823971840670251e-05,
"loss": 0.3387,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12241716682910919,
"step": 285,
"valid_targets_mean": 9773.9,
"valid_targets_min": 3191
},
{
"epoch": 1.5846994535519126,
"grad_norm": 0.22362204727522375,
"learning_rate": 3.812615574073301e-05,
"loss": 0.3377,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10839522629976273,
"step": 290,
"valid_targets_mean": 9549.2,
"valid_targets_min": 3643
},
{
"epoch": 1.6120218579234973,
"grad_norm": 0.23343607016117385,
"learning_rate": 3.800922304470728e-05,
"loss": 0.3313,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10157692432403564,
"step": 295,
"valid_targets_mean": 8760.2,
"valid_targets_min": 2346
},
{
"epoch": 1.639344262295082,
"grad_norm": 0.25421982858320236,
"learning_rate": 3.7888942058850105e-05,
"loss": 0.3359,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11778868734836578,
"step": 300,
"valid_targets_mean": 9254.8,
"valid_targets_min": 3701
},
{
"epoch": 1.6666666666666665,
"grad_norm": 0.24398684968303413,
"learning_rate": 3.7765335145903124e-05,
"loss": 0.3301,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10742858797311783,
"step": 305,
"valid_targets_mean": 10601.6,
"valid_targets_min": 4584
},
{
"epoch": 1.6939890710382515,
"grad_norm": 0.22746398932508854,
"learning_rate": 3.76384252869671e-05,
"loss": 0.3322,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1208631843328476,
"step": 310,
"valid_targets_mean": 9742.0,
"valid_targets_min": 3459
},
{
"epoch": 1.721311475409836,
"grad_norm": 0.24480565463344894,
"learning_rate": 3.750823607722931e-05,
"loss": 0.3285,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11582465469837189,
"step": 315,
"valid_targets_mean": 9165.2,
"valid_targets_min": 3364
},
{
"epoch": 1.748633879781421,
"grad_norm": 0.2180718409308529,
"learning_rate": 3.737479172157665e-05,
"loss": 0.3357,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1275629997253418,
"step": 320,
"valid_targets_mean": 9666.4,
"valid_targets_min": 4633
},
{
"epoch": 1.7759562841530054,
"grad_norm": 0.24489899725053552,
"learning_rate": 3.723811703009549e-05,
"loss": 0.3308,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10959871858358383,
"step": 325,
"valid_targets_mean": 8244.2,
"valid_targets_min": 3566
},
{
"epoch": 1.8032786885245902,
"grad_norm": 0.2479273797839025,
"learning_rate": 3.709823741345894e-05,
"loss": 0.331,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11131682991981506,
"step": 330,
"valid_targets_mean": 8796.8,
"valid_targets_min": 2477
},
{
"epoch": 1.830601092896175,
"grad_norm": 0.2571862418094793,
"learning_rate": 3.695517887820247e-05,
"loss": 0.3274,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10547453165054321,
"step": 335,
"valid_targets_mean": 8258.8,
"valid_targets_min": 1715
},
{
"epoch": 1.8579234972677594,
"grad_norm": 0.22348674772376723,
"learning_rate": 3.680896802188876e-05,
"loss": 0.33,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1058754026889801,
"step": 340,
"valid_targets_mean": 8889.8,
"valid_targets_min": 913
},
{
"epoch": 1.8852459016393444,
"grad_norm": 0.21028274581991213,
"learning_rate": 3.66596320281627e-05,
"loss": 0.3296,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12007634341716766,
"step": 345,
"valid_targets_mean": 10142.2,
"valid_targets_min": 3243
},
{
"epoch": 1.9125683060109289,
"grad_norm": 0.22986592321070748,
"learning_rate": 3.6507198661697276e-05,
"loss": 0.3338,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11267776787281036,
"step": 350,
"valid_targets_mean": 8925.9,
"valid_targets_min": 2910
},
{
"epoch": 1.9398907103825138,
"grad_norm": 0.24096406367512213,
"learning_rate": 3.635169626303168e-05,
"loss": 0.3307,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10832738876342773,
"step": 355,
"valid_targets_mean": 8725.2,
"valid_targets_min": 3893
},
{
"epoch": 1.9672131147540983,
"grad_norm": 0.2754776858828639,
"learning_rate": 3.619315374330208e-05,
"loss": 0.3345,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11519007384777069,
"step": 360,
"valid_targets_mean": 9125.4,
"valid_targets_min": 1016
},
{
"epoch": 1.994535519125683,
"grad_norm": 0.2486552522027151,
"learning_rate": 3.603160057886655e-05,
"loss": 0.3334,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11450847238302231,
"step": 365,
"valid_targets_mean": 8982.2,
"valid_targets_min": 3333
},
{
"epoch": 2.021857923497268,
"grad_norm": 0.2743213274153223,
"learning_rate": 3.586706680582471e-05,
"loss": 0.3253,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10886304080486298,
"step": 370,
"valid_targets_mean": 9614.8,
"valid_targets_min": 3756
},
{
"epoch": 2.0491803278688523,
"grad_norm": 0.255653642834224,
"learning_rate": 3.569958301443344e-05,
"loss": 0.321,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11270833015441895,
"step": 375,
"valid_targets_mean": 9813.9,
"valid_targets_min": 2054
},
{
"epoch": 2.0765027322404372,
"grad_norm": 0.24133265109850416,
"learning_rate": 3.552918034341952e-05,
"loss": 0.3195,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11231839656829834,
"step": 380,
"valid_targets_mean": 8981.4,
"valid_targets_min": 2826
},
{
"epoch": 2.1038251366120218,
"grad_norm": 0.25526011562795836,
"learning_rate": 3.5355890474190244e-05,
"loss": 0.3288,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11535529792308807,
"step": 385,
"valid_targets_mean": 9212.4,
"valid_targets_min": 4995
},
{
"epoch": 2.1311475409836067,
"grad_norm": 0.23604329145357902,
"learning_rate": 3.517974562494324e-05,
"loss": 0.3231,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10826494544744492,
"step": 390,
"valid_targets_mean": 8674.5,
"valid_targets_min": 717
},
{
"epoch": 2.158469945355191,
"grad_norm": 0.21848145690843088,
"learning_rate": 3.5000778544676404e-05,
"loss": 0.3254,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11187595129013062,
"step": 395,
"valid_targets_mean": 8702.6,
"valid_targets_min": 359
},
{
"epoch": 2.185792349726776,
"grad_norm": 0.24660363230345203,
"learning_rate": 3.4819022507099184e-05,
"loss": 0.3217,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1158638447523117,
"step": 400,
"valid_targets_mean": 9505.6,
"valid_targets_min": 4652
},
{
"epoch": 2.2131147540983607,
"grad_norm": 0.2317494188303323,
"learning_rate": 3.463451130444631e-05,
"loss": 0.324,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10151409357786179,
"step": 405,
"valid_targets_mean": 8673.2,
"valid_targets_min": 3371
},
{
"epoch": 2.240437158469945,
"grad_norm": 0.2558065717810365,
"learning_rate": 3.444727924119511e-05,
"loss": 0.328,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10935921221971512,
"step": 410,
"valid_targets_mean": 8613.8,
"valid_targets_min": 1834
},
{
"epoch": 2.26775956284153,
"grad_norm": 0.2468684240893415,
"learning_rate": 3.42573611276876e-05,
"loss": 0.3169,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09871050715446472,
"step": 415,
"valid_targets_mean": 8523.5,
"valid_targets_min": 4438
},
{
"epoch": 2.2950819672131146,
"grad_norm": 0.2606641720946758,
"learning_rate": 3.4064792273658494e-05,
"loss": 0.3226,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09632217884063721,
"step": 420,
"valid_targets_mean": 7952.3,
"valid_targets_min": 576
},
{
"epoch": 2.3224043715846996,
"grad_norm": 0.2349015992186721,
"learning_rate": 3.386960848167041e-05,
"loss": 0.3197,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12064424157142639,
"step": 425,
"valid_targets_mean": 8627.5,
"valid_targets_min": 2693
},
{
"epoch": 2.349726775956284,
"grad_norm": 0.26046775499998737,
"learning_rate": 3.367184604045743e-05,
"loss": 0.3222,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10175183415412903,
"step": 430,
"valid_targets_mean": 8524.4,
"valid_targets_min": 3249
},
{
"epoch": 2.3770491803278686,
"grad_norm": 0.22651301464400855,
"learning_rate": 3.347154171817825e-05,
"loss": 0.3248,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11612209677696228,
"step": 435,
"valid_targets_mean": 9728.2,
"valid_targets_min": 3810
},
{
"epoch": 2.4043715846994536,
"grad_norm": 0.23039268184631387,
"learning_rate": 3.3268732755580226e-05,
"loss": 0.318,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11132186651229858,
"step": 440,
"valid_targets_mean": 8813.7,
"valid_targets_min": 2476
},
{
"epoch": 2.431693989071038,
"grad_norm": 0.23764518989398273,
"learning_rate": 3.306345685907553e-05,
"loss": 0.3132,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10579263418912888,
"step": 445,
"valid_targets_mean": 8756.8,
"valid_targets_min": 2697
},
{
"epoch": 2.459016393442623,
"grad_norm": 0.23123022720630637,
"learning_rate": 3.285575219373079e-05,
"loss": 0.3212,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1140381246805191,
"step": 450,
"valid_targets_mean": 9343.9,
"valid_targets_min": 2043
},
{
"epoch": 2.4863387978142075,
"grad_norm": 0.269360392744887,
"learning_rate": 3.264565737617132e-05,
"loss": 0.3169,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10570000857114792,
"step": 455,
"valid_targets_mean": 9540.1,
"valid_targets_min": 4028
},
{
"epoch": 2.5136612021857925,
"grad_norm": 0.221114564777979,
"learning_rate": 3.243321146740155e-05,
"loss": 0.3269,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12460845708847046,
"step": 460,
"valid_targets_mean": 9913.7,
"valid_targets_min": 2636
},
{
"epoch": 2.540983606557377,
"grad_norm": 0.2233016882583844,
"learning_rate": 3.2218453965542785e-05,
"loss": 0.3254,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.107123464345932,
"step": 465,
"valid_targets_mean": 9036.3,
"valid_targets_min": 2631
},
{
"epoch": 2.5683060109289615,
"grad_norm": 0.2159132253652805,
"learning_rate": 3.2001424798489625e-05,
"loss": 0.3179,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10757862031459808,
"step": 470,
"valid_targets_mean": 8646.6,
"valid_targets_min": 1410
},
{
"epoch": 2.5956284153005464,
"grad_norm": 0.24917088168724738,
"learning_rate": 3.1782164316486566e-05,
"loss": 0.3152,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10982252657413483,
"step": 475,
"valid_targets_mean": 9668.7,
"valid_targets_min": 1508
},
{
"epoch": 2.6229508196721314,
"grad_norm": 0.2174015848024412,
"learning_rate": 3.156071328462607e-05,
"loss": 0.3165,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09340976923704147,
"step": 480,
"valid_targets_mean": 9737.8,
"valid_targets_min": 3792
},
{
"epoch": 2.650273224043716,
"grad_norm": 0.2512049654033023,
"learning_rate": 3.1337112875269436e-05,
"loss": 0.3167,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09841690212488174,
"step": 485,
"valid_targets_mean": 8864.9,
"valid_targets_min": 2941
},
{
"epoch": 2.6775956284153004,
"grad_norm": 0.22880792088164945,
"learning_rate": 3.111140466039205e-05,
"loss": 0.32,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10708227008581161,
"step": 490,
"valid_targets_mean": 8706.9,
"valid_targets_min": 2747
},
{
"epoch": 2.7049180327868854,
"grad_norm": 0.231439018967597,
"learning_rate": 3.088363060385424e-05,
"loss": 0.3131,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11007743328809738,
"step": 495,
"valid_targets_mean": 10188.8,
"valid_targets_min": 3932
},
{
"epoch": 2.73224043715847,
"grad_norm": 0.2428291380653805,
"learning_rate": 3.065383305359938e-05,
"loss": 0.3155,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10607362538576126,
"step": 500,
"valid_targets_mean": 8751.4,
"valid_targets_min": 2661
},
{
"epoch": 2.7595628415300544,
"grad_norm": 0.2113160568789913,
"learning_rate": 3.0422054733780474e-05,
"loss": 0.3214,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11031060665845871,
"step": 505,
"valid_targets_mean": 9535.6,
"valid_targets_min": 4219
},
{
"epoch": 2.7868852459016393,
"grad_norm": 0.23720201082771936,
"learning_rate": 3.018833873681684e-05,
"loss": 0.3203,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10088831186294556,
"step": 510,
"valid_targets_mean": 8321.9,
"valid_targets_min": 3472
},
{
"epoch": 2.8142076502732243,
"grad_norm": 0.27037560406468397,
"learning_rate": 2.9952728515382383e-05,
"loss": 0.3125,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1012275442481041,
"step": 515,
"valid_targets_mean": 8525.2,
"valid_targets_min": 930
},
{
"epoch": 2.841530054644809,
"grad_norm": 0.21901703489991992,
"learning_rate": 2.9715267874326805e-05,
"loss": 0.3167,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11215992271900177,
"step": 520,
"valid_targets_mean": 9853.0,
"valid_targets_min": 1782
},
{
"epoch": 2.8688524590163933,
"grad_norm": 0.2473747830382745,
"learning_rate": 2.947600096253136e-05,
"loss": 0.3169,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0938570499420166,
"step": 525,
"valid_targets_mean": 8734.0,
"valid_targets_min": 3715
},
{
"epoch": 2.8961748633879782,
"grad_norm": 0.19923680438401953,
"learning_rate": 2.9234972264700687e-05,
"loss": 0.3138,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10463187098503113,
"step": 530,
"valid_targets_mean": 8936.3,
"valid_targets_min": 2301
},
{
"epoch": 2.9234972677595628,
"grad_norm": 0.22922414429489327,
"learning_rate": 2.8992226593092135e-05,
"loss": 0.3133,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11466294527053833,
"step": 535,
"valid_targets_mean": 8984.4,
"valid_targets_min": 3255
},
{
"epoch": 2.9508196721311473,
"grad_norm": 0.20989994462632877,
"learning_rate": 2.874780907918429e-05,
"loss": 0.3165,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09710144996643066,
"step": 540,
"valid_targets_mean": 8905.9,
"valid_targets_min": 3163
},
{
"epoch": 2.978142076502732,
"grad_norm": 0.21839827056447508,
"learning_rate": 2.8501765165286025e-05,
"loss": 0.3196,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10432031750679016,
"step": 545,
"valid_targets_mean": 9281.0,
"valid_targets_min": 2322
},
{
"epoch": 3.0054644808743167,
"grad_norm": 0.2285305181088268,
"learning_rate": 2.8254140596087897e-05,
"loss": 0.3094,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10178789496421814,
"step": 550,
"valid_targets_mean": 8931.3,
"valid_targets_min": 3180
},
{
"epoch": 3.0327868852459017,
"grad_norm": 0.2017808641425933,
"learning_rate": 2.8004981410157187e-05,
"loss": 0.3079,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09892808645963669,
"step": 555,
"valid_targets_mean": 9654.6,
"valid_targets_min": 2883
},
{
"epoch": 3.060109289617486,
"grad_norm": 0.2251907804199061,
"learning_rate": 2.775433393137841e-05,
"loss": 0.3004,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10582572966814041,
"step": 560,
"valid_targets_mean": 8835.2,
"valid_targets_min": 2457
},
{
"epoch": 3.087431693989071,
"grad_norm": 0.2516272437175909,
"learning_rate": 2.750224476034076e-05,
"loss": 0.3089,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1124773770570755,
"step": 565,
"valid_targets_mean": 9312.9,
"valid_targets_min": 2875
},
{
"epoch": 3.1147540983606556,
"grad_norm": 0.2227184002299704,
"learning_rate": 2.7248760765674033e-05,
"loss": 0.3132,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10076034069061279,
"step": 570,
"valid_targets_mean": 8707.6,
"valid_targets_min": 2296
},
{
"epoch": 3.1420765027322406,
"grad_norm": 0.22416125947761548,
"learning_rate": 2.699392907533482e-05,
"loss": 0.3057,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11026563495397568,
"step": 575,
"valid_targets_mean": 9723.8,
"valid_targets_min": 3465
},
{
"epoch": 3.169398907103825,
"grad_norm": 0.20899809066246997,
"learning_rate": 2.6737797067844403e-05,
"loss": 0.3131,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10523172467947006,
"step": 580,
"valid_targets_mean": 9560.2,
"valid_targets_min": 4534
},
{
"epoch": 3.19672131147541,
"grad_norm": 0.23222838408930938,
"learning_rate": 2.6480412363480138e-05,
"loss": 0.3096,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10207949578762054,
"step": 585,
"valid_targets_mean": 9779.5,
"valid_targets_min": 4601
},
{
"epoch": 3.2240437158469946,
"grad_norm": 0.2802069274859931,
"learning_rate": 2.6221822815421817e-05,
"loss": 0.3069,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10540831089019775,
"step": 590,
"valid_targets_mean": 10070.7,
"valid_targets_min": 2866
},
{
"epoch": 3.251366120218579,
"grad_norm": 0.20542978202801448,
"learning_rate": 2.5962076500854804e-05,
"loss": 0.3108,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08749028295278549,
"step": 595,
"valid_targets_mean": 8740.9,
"valid_targets_min": 2098
},
{
"epoch": 3.278688524590164,
"grad_norm": 0.22733842428820036,
"learning_rate": 2.570122171203142e-05,
"loss": 0.3088,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09117378294467926,
"step": 600,
"valid_targets_mean": 8412.1,
"valid_targets_min": 805
},
{
"epoch": 3.3060109289617485,
"grad_norm": 0.2438261017999824,
"learning_rate": 2.5439306947292485e-05,
"loss": 0.3072,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11073037981987,
"step": 605,
"valid_targets_mean": 9375.6,
"valid_targets_min": 2173
},
{
"epoch": 3.3333333333333335,
"grad_norm": 0.24465291986437376,
"learning_rate": 2.5176380902050418e-05,
"loss": 0.3192,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09698040783405304,
"step": 610,
"valid_targets_mean": 8386.6,
"valid_targets_min": 1156
},
{
"epoch": 3.360655737704918,
"grad_norm": 0.2008109008659842,
"learning_rate": 2.4912492459735752e-05,
"loss": 0.3062,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09698103368282318,
"step": 615,
"valid_targets_mean": 9220.1,
"valid_targets_min": 4023
},
{
"epoch": 3.387978142076503,
"grad_norm": 0.20970173880663623,
"learning_rate": 2.4647690682708695e-05,
"loss": 0.3092,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10502799600362778,
"step": 620,
"valid_targets_mean": 10270.1,
"valid_targets_min": 4413
},
{
"epoch": 3.4153005464480874,
"grad_norm": 0.20616431756721573,
"learning_rate": 2.4382024803137396e-05,
"loss": 0.3114,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10608991235494614,
"step": 625,
"valid_targets_mean": 9512.1,
"valid_targets_min": 3351
},
{
"epoch": 3.442622950819672,
"grad_norm": 0.20966320270309535,
"learning_rate": 2.41155442138447e-05,
"loss": 0.3066,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09832080453634262,
"step": 630,
"valid_targets_mean": 8836.7,
"valid_targets_min": 2827
},
{
"epoch": 3.469945355191257,
"grad_norm": 0.2143240783515177,
"learning_rate": 2.384829845912494e-05,
"loss": 0.3131,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09457945823669434,
"step": 635,
"valid_targets_mean": 9319.6,
"valid_targets_min": 2697
},
{
"epoch": 3.4972677595628414,
"grad_norm": 0.217793398021425,
"learning_rate": 2.3580337225532663e-05,
"loss": 0.3071,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09785357862710953,
"step": 640,
"valid_targets_mean": 8962.5,
"valid_targets_min": 2043
},
{
"epoch": 3.5245901639344264,
"grad_norm": 0.21567616833440192,
"learning_rate": 2.331171033264482e-05,
"loss": 0.3139,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0948730856180191,
"step": 645,
"valid_targets_mean": 8423.4,
"valid_targets_min": 3658
},
{
"epoch": 3.551912568306011,
"grad_norm": 0.21559528731790625,
"learning_rate": 2.3042467723798335e-05,
"loss": 0.3081,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10408905148506165,
"step": 650,
"valid_targets_mean": 9092.1,
"valid_targets_min": 2668
},
{
"epoch": 3.579234972677596,
"grad_norm": 0.211833304339463,
"learning_rate": 2.2772659456804537e-05,
"loss": 0.3129,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10551073402166367,
"step": 655,
"valid_targets_mean": 9483.7,
"valid_targets_min": 3771
},
{
"epoch": 3.6065573770491803,
"grad_norm": 0.22483771277803868,
"learning_rate": 2.2502335694642388e-05,
"loss": 0.3107,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10551051795482635,
"step": 660,
"valid_targets_mean": 8905.8,
"valid_targets_min": 3549
},
{
"epoch": 3.633879781420765,
"grad_norm": 0.22386943756040195,
"learning_rate": 2.223154669613215e-05,
"loss": 0.3085,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10307562351226807,
"step": 665,
"valid_targets_mean": 8859.0,
"valid_targets_min": 3300
},
{
"epoch": 3.66120218579235,
"grad_norm": 0.22170142391966322,
"learning_rate": 2.196034280659122e-05,
"loss": 0.3104,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09553318470716476,
"step": 670,
"valid_targets_mean": 8777.2,
"valid_targets_min": 2879
},
{
"epoch": 3.6885245901639343,
"grad_norm": 0.2142352891381397,
"learning_rate": 2.1688774448473863e-05,
"loss": 0.3109,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10052451491355896,
"step": 675,
"valid_targets_mean": 8826.8,
"valid_targets_min": 4146
},
{
"epoch": 3.7158469945355193,
"grad_norm": 0.19977901434675086,
"learning_rate": 2.1416892111996685e-05,
"loss": 0.3029,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11022960394620895,
"step": 680,
"valid_targets_mean": 9945.0,
"valid_targets_min": 3549
},
{
"epoch": 3.7431693989071038,
"grad_norm": 0.193299048595288,
"learning_rate": 2.114474634575138e-05,
"loss": 0.3104,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10461431741714478,
"step": 685,
"valid_targets_mean": 8731.8,
"valid_targets_min": 2403
},
{
"epoch": 3.7704918032786887,
"grad_norm": 0.20778558224455732,
"learning_rate": 2.0872387747306725e-05,
"loss": 0.3151,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10199467837810516,
"step": 690,
"valid_targets_mean": 9486.6,
"valid_targets_min": 3255
},
{
"epoch": 3.797814207650273,
"grad_norm": 0.23682061658623274,
"learning_rate": 2.0599866953801456e-05,
"loss": 0.3037,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09984984993934631,
"step": 695,
"valid_targets_mean": 9708.8,
"valid_targets_min": 1938
},
{
"epoch": 3.8251366120218577,
"grad_norm": 0.235770071313055,
"learning_rate": 2.0327234632529738e-05,
"loss": 0.3125,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11001993715763092,
"step": 700,
"valid_targets_mean": 9621.1,
"valid_targets_min": 4455
},
{
"epoch": 3.8524590163934427,
"grad_norm": 0.24380201762655782,
"learning_rate": 2.005454147152108e-05,
"loss": 0.3088,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10252358019351959,
"step": 705,
"valid_targets_mean": 8781.7,
"valid_targets_min": 2964
},
{
"epoch": 3.879781420765027,
"grad_norm": 0.22538655705455618,
"learning_rate": 1.9781838170116357e-05,
"loss": 0.3014,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09685234725475311,
"step": 710,
"valid_targets_mean": 8708.4,
"valid_targets_min": 3186
},
{
"epoch": 3.907103825136612,
"grad_norm": 0.2225369904461113,
"learning_rate": 1.950917542954176e-05,
"loss": 0.31,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1139606386423111,
"step": 715,
"valid_targets_mean": 9864.2,
"valid_targets_min": 3034
},
{
"epoch": 3.9344262295081966,
"grad_norm": 0.1900714759599998,
"learning_rate": 1.923660394348237e-05,
"loss": 0.3022,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09253083169460297,
"step": 720,
"valid_targets_mean": 8268.0,
"valid_targets_min": 2846
},
{
"epoch": 3.9617486338797816,
"grad_norm": 0.19618089341199454,
"learning_rate": 1.8964174388657167e-05,
"loss": 0.312,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09711985290050507,
"step": 725,
"valid_targets_mean": 9442.0,
"valid_targets_min": 4287
},
{
"epoch": 3.989071038251366,
"grad_norm": 0.21451682228357283,
"learning_rate": 1.869193741539714e-05,
"loss": 0.306,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11297638714313507,
"step": 730,
"valid_targets_mean": 9649.7,
"valid_targets_min": 2839
},
{
"epoch": 4.016393442622951,
"grad_norm": 0.2267764588190383,
"learning_rate": 1.8419943638228362e-05,
"loss": 0.3005,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10513165593147278,
"step": 735,
"valid_targets_mean": 9610.8,
"valid_targets_min": 3783
},
{
"epoch": 4.043715846994536,
"grad_norm": 0.23188280374229425,
"learning_rate": 1.8148243626461693e-05,
"loss": 0.3077,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10651808232069016,
"step": 740,
"valid_targets_mean": 9377.2,
"valid_targets_min": 2556
},
{
"epoch": 4.0710382513661205,
"grad_norm": 0.20234257345492324,
"learning_rate": 1.7876887894790856e-05,
"loss": 0.3051,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10677113384008408,
"step": 745,
"valid_targets_mean": 9526.9,
"valid_targets_min": 1232
},
{
"epoch": 4.098360655737705,
"grad_norm": 0.2006664192357114,
"learning_rate": 1.7605926893900755e-05,
"loss": 0.3073,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10224565118551254,
"step": 750,
"valid_targets_mean": 9564.0,
"valid_targets_min": 3879
},
{
"epoch": 4.1256830601092895,
"grad_norm": 0.21611930448024275,
"learning_rate": 1.7335411001087604e-05,
"loss": 0.3029,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10021790862083435,
"step": 755,
"valid_targets_mean": 9495.2,
"valid_targets_min": 4249
},
{
"epoch": 4.1530054644808745,
"grad_norm": 0.20492543825613885,
"learning_rate": 1.7065390510892767e-05,
"loss": 0.3062,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1090320274233818,
"step": 760,
"valid_targets_mean": 9487.8,
"valid_targets_min": 2368
},
{
"epoch": 4.180327868852459,
"grad_norm": 0.22303909905490066,
"learning_rate": 1.6795915625751916e-05,
"loss": 0.3033,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09251723438501358,
"step": 765,
"valid_targets_mean": 7996.1,
"valid_targets_min": 2941
},
{
"epoch": 4.2076502732240435,
"grad_norm": 0.216240174021842,
"learning_rate": 1.6527036446661396e-05,
"loss": 0.2986,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09408397972583771,
"step": 770,
"valid_targets_mean": 9731.0,
"valid_targets_min": 3576
},
{
"epoch": 4.2349726775956285,
"grad_norm": 0.21817938152151142,
"learning_rate": 1.625880296386336e-05,
"loss": 0.3002,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09434144198894501,
"step": 775,
"valid_targets_mean": 9463.3,
"valid_targets_min": 4429
},
{
"epoch": 4.262295081967213,
"grad_norm": 0.2117746616975632,
"learning_rate": 1.599126504755159e-05,
"loss": 0.3031,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09988536685705185,
"step": 780,
"valid_targets_mean": 8893.2,
"valid_targets_min": 1144
},
{
"epoch": 4.2896174863387975,
"grad_norm": 0.24386065924604794,
"learning_rate": 1.5724472438599554e-05,
"loss": 0.2994,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09023170173168182,
"step": 785,
"valid_targets_mean": 8590.4,
"valid_targets_min": 2235
},
{
"epoch": 4.316939890710382,
"grad_norm": 0.21134859055315403,
"learning_rate": 1.545847473931254e-05,
"loss": 0.3035,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10283560305833817,
"step": 790,
"valid_targets_mean": 10174.0,
"valid_targets_min": 3954
},
{
"epoch": 4.344262295081967,
"grad_norm": 0.20065533277407502,
"learning_rate": 1.5193321404205583e-05,
"loss": 0.3026,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10268115997314453,
"step": 795,
"valid_targets_mean": 9324.8,
"valid_targets_min": 1283
},
{
"epoch": 4.371584699453552,
"grad_norm": 0.215674808813511,
"learning_rate": 1.4929061730808813e-05,
"loss": 0.3026,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09143080562353134,
"step": 800,
"valid_targets_mean": 9008.8,
"valid_targets_min": 2476
},
{
"epoch": 4.398907103825136,
"grad_norm": 0.19754230475541815,
"learning_rate": 1.4665744850502035e-05,
"loss": 0.2991,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09778804332017899,
"step": 805,
"valid_targets_mean": 8793.2,
"valid_targets_min": 917
},
{
"epoch": 4.426229508196721,
"grad_norm": 0.21173287315948128,
"learning_rate": 1.4403419719380161e-05,
"loss": 0.3026,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10597346723079681,
"step": 810,
"valid_targets_mean": 8464.4,
"valid_targets_min": 4000
},
{
"epoch": 4.453551912568306,
"grad_norm": 0.21557820949214415,
"learning_rate": 1.4142135109151273e-05,
"loss": 0.3018,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10664163529872894,
"step": 815,
"valid_targets_mean": 8985.9,
"valid_targets_min": 3113
},
{
"epoch": 4.48087431693989,
"grad_norm": 0.2057707972831818,
"learning_rate": 1.388193959806893e-05,
"loss": 0.3029,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.100834421813488,
"step": 820,
"valid_targets_mean": 9252.4,
"valid_targets_min": 3107
},
{
"epoch": 4.508196721311475,
"grad_norm": 0.2035704407094551,
"learning_rate": 1.3622881561900476e-05,
"loss": 0.3017,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09915086627006531,
"step": 825,
"valid_targets_mean": 9511.0,
"valid_targets_min": 2668
},
{
"epoch": 4.53551912568306,
"grad_norm": 0.19575235441533922,
"learning_rate": 1.3365009164932964e-05,
"loss": 0.3091,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10887756943702698,
"step": 830,
"valid_targets_mean": 9509.0,
"valid_targets_min": 3946
},
{
"epoch": 4.562841530054644,
"grad_norm": 0.19655396398761169,
"learning_rate": 1.3108370351018393e-05,
"loss": 0.2992,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09594590961933136,
"step": 835,
"valid_targets_mean": 9079.8,
"valid_targets_min": 3052
},
{
"epoch": 4.590163934426229,
"grad_norm": 0.21253925854859518,
"learning_rate": 1.285301283466e-05,
"loss": 0.303,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0905645489692688,
"step": 840,
"valid_targets_mean": 8182.4,
"valid_targets_min": 3218
},
{
"epoch": 4.617486338797814,
"grad_norm": 0.20072810587848328,
"learning_rate": 1.2598984092141083e-05,
"loss": 0.3033,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10311625897884369,
"step": 845,
"valid_targets_mean": 9788.3,
"valid_targets_min": 3318
},
{
"epoch": 4.644808743169399,
"grad_norm": 0.18665016397140438,
"learning_rate": 1.2346331352698206e-05,
"loss": 0.2985,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09972083568572998,
"step": 850,
"valid_targets_mean": 9216.6,
"valid_targets_min": 3940
},
{
"epoch": 4.672131147540983,
"grad_norm": 0.19938029887487937,
"learning_rate": 1.2095101589740291e-05,
"loss": 0.3015,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10425819456577301,
"step": 855,
"valid_targets_mean": 9457.9,
"valid_targets_min": 3127
},
{
"epoch": 4.699453551912568,
"grad_norm": 0.1909538138649328,
"learning_rate": 1.1845341512115267e-05,
"loss": 0.3034,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09648244082927704,
"step": 860,
"valid_targets_mean": 9526.7,
"valid_targets_min": 1217
},
{
"epoch": 4.726775956284153,
"grad_norm": 0.20046945627414886,
"learning_rate": 1.1597097555425954e-05,
"loss": 0.3059,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1106850728392601,
"step": 865,
"valid_targets_mean": 9296.9,
"valid_targets_min": 1941
},
{
"epoch": 4.754098360655737,
"grad_norm": 0.19632506560486584,
"learning_rate": 1.1350415873396673e-05,
"loss": 0.298,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11077627539634705,
"step": 870,
"valid_targets_mean": 10079.2,
"valid_targets_min": 3742
},
{
"epoch": 4.781420765027322,
"grad_norm": 0.20226350645324653,
"learning_rate": 1.1105342329292368e-05,
"loss": 0.2986,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09841288626194,
"step": 875,
"valid_targets_mean": 9524.3,
"valid_targets_min": 2640
},
{
"epoch": 4.808743169398907,
"grad_norm": 0.19833226467815507,
"learning_rate": 1.0861922487391588e-05,
"loss": 0.3036,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11468029022216797,
"step": 880,
"valid_targets_mean": 9712.4,
"valid_targets_min": 2192
},
{
"epoch": 4.836065573770492,
"grad_norm": 0.2078497374363878,
"learning_rate": 1.0620201604515225e-05,
"loss": 0.2988,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09936510026454926,
"step": 885,
"valid_targets_mean": 9141.4,
"valid_targets_min": 3946
},
{
"epoch": 4.863387978142076,
"grad_norm": 0.2088859014000221,
"learning_rate": 1.0380224621612252e-05,
"loss": 0.3092,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09573344886302948,
"step": 890,
"valid_targets_mean": 8703.2,
"valid_targets_min": 3200
},
{
"epoch": 4.890710382513661,
"grad_norm": 0.20305305414716004,
"learning_rate": 1.0142036155404322e-05,
"loss": 0.2958,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10266251862049103,
"step": 895,
"valid_targets_mean": 9156.1,
"valid_targets_min": 2608
},
{
"epoch": 4.918032786885246,
"grad_norm": 0.19630418224220209,
"learning_rate": 9.905680490090557e-06,
"loss": 0.3014,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09524133801460266,
"step": 900,
"valid_targets_mean": 8565.4,
"valid_targets_min": 3327
},
{
"epoch": 4.945355191256831,
"grad_norm": 0.20168250280220565,
"learning_rate": 9.671201569114213e-06,
"loss": 0.3004,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1022343784570694,
"step": 905,
"valid_targets_mean": 9478.6,
"valid_targets_min": 3999
},
{
"epoch": 4.972677595628415,
"grad_norm": 0.18721802046864403,
"learning_rate": 9.438642986992641e-06,
"loss": 0.3017,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09467854350805283,
"step": 910,
"valid_targets_mean": 9398.7,
"valid_targets_min": 3953
},
{
"epoch": 5.0,
"grad_norm": 0.1849748779265062,
"learning_rate": 9.20804798121221e-06,
"loss": 0.3005,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10355669260025024,
"step": 915,
"valid_targets_mean": 10328.0,
"valid_targets_min": 5001
},
{
"epoch": 5.027322404371585,
"grad_norm": 0.19962321315407866,
"learning_rate": 8.979459424189525e-06,
"loss": 0.3005,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09472991526126862,
"step": 920,
"valid_targets_mean": 9011.2,
"valid_targets_min": 3075
},
{
"epoch": 5.054644808743169,
"grad_norm": 0.20809601692737512,
"learning_rate": 8.752919815300541e-06,
"loss": 0.3,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09876327216625214,
"step": 925,
"valid_targets_mean": 8703.9,
"valid_targets_min": 3837
},
{
"epoch": 5.081967213114754,
"grad_norm": 0.19153145876651814,
"learning_rate": 8.528471272979083e-06,
"loss": 0.3006,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0934215560555458,
"step": 930,
"valid_targets_mean": 8727.2,
"valid_targets_min": 1647
},
{
"epoch": 5.109289617486339,
"grad_norm": 0.19078481475641737,
"learning_rate": 8.30615552688611e-06,
"loss": 0.3012,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10409128665924072,
"step": 935,
"valid_targets_mean": 9045.9,
"valid_targets_min": 3195
},
{
"epoch": 5.136612021857924,
"grad_norm": 0.1771089600396969,
"learning_rate": 8.086013910151334e-06,
"loss": 0.2989,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09420415759086609,
"step": 940,
"valid_targets_mean": 9303.5,
"valid_targets_min": 1773
},
{
"epoch": 5.163934426229508,
"grad_norm": 0.2047670944434993,
"learning_rate": 7.868087351688508e-06,
"loss": 0.2981,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10569000244140625,
"step": 945,
"valid_targets_mean": 9336.9,
"valid_targets_min": 3503
},
{
"epoch": 5.191256830601093,
"grad_norm": 0.19039028840148223,
"learning_rate": 7.652416368585904e-06,
"loss": 0.3008,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09627662599086761,
"step": 950,
"valid_targets_mean": 8417.0,
"valid_targets_min": 4164
},
{
"epoch": 5.218579234972678,
"grad_norm": 0.18347981206285033,
"learning_rate": 7.4390410585733176e-06,
"loss": 0.3038,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10283610969781876,
"step": 955,
"valid_targets_mean": 10466.1,
"valid_targets_min": 3068
},
{
"epoch": 5.245901639344262,
"grad_norm": 0.1879595722998192,
"learning_rate": 7.228001092567094e-06,
"loss": 0.2949,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10533443093299866,
"step": 960,
"valid_targets_mean": 9295.6,
"valid_targets_min": 3928
},
{
"epoch": 5.273224043715847,
"grad_norm": 0.192338487969101,
"learning_rate": 7.01933570729447e-06,
"loss": 0.2972,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10413585603237152,
"step": 965,
"valid_targets_mean": 9177.1,
"valid_targets_min": 2608
},
{
"epoch": 5.300546448087432,
"grad_norm": 0.18316019445161066,
"learning_rate": 6.8130836979986236e-06,
"loss": 0.3004,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09926620125770569,
"step": 970,
"valid_targets_mean": 9234.4,
"valid_targets_min": 898
},
{
"epoch": 5.327868852459017,
"grad_norm": 0.1938556846923826,
"learning_rate": 6.609283411225873e-06,
"loss": 0.3033,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1027819961309433,
"step": 975,
"valid_targets_mean": 10174.5,
"valid_targets_min": 5253
},
{
"epoch": 5.355191256830601,
"grad_norm": 0.2002549610033798,
"learning_rate": 6.407972737696211e-06,
"loss": 0.2984,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10788215696811676,
"step": 980,
"valid_targets_mean": 9056.6,
"valid_targets_min": 1809
},
{
"epoch": 5.382513661202186,
"grad_norm": 0.23628923762267479,
"learning_rate": 6.209189105258661e-06,
"loss": 0.3033,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10605168342590332,
"step": 985,
"valid_targets_mean": 9745.0,
"valid_targets_min": 335
},
{
"epoch": 5.409836065573771,
"grad_norm": 0.20266745514688106,
"learning_rate": 6.012969471932657e-06,
"loss": 0.2998,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08208920806646347,
"step": 990,
"valid_targets_mean": 7854.0,
"valid_targets_min": 3258
},
{
"epoch": 5.437158469945355,
"grad_norm": 0.19564425846701008,
"learning_rate": 5.819350319036765e-06,
"loss": 0.2918,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09307222813367844,
"step": 995,
"valid_targets_mean": 9187.5,
"valid_targets_min": 4142
},
{
"epoch": 5.46448087431694,
"grad_norm": 0.18755944297837682,
"learning_rate": 5.628367644406039e-06,
"loss": 0.2944,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10457966476678848,
"step": 1000,
"valid_targets_mean": 10211.1,
"valid_targets_min": 2593
},
{
"epoch": 5.491803278688525,
"grad_norm": 0.17901385962822888,
"learning_rate": 5.440056955699304e-06,
"loss": 0.2939,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10055485367774963,
"step": 1005,
"valid_targets_mean": 9671.6,
"valid_targets_min": 3367
},
{
"epoch": 5.51912568306011,
"grad_norm": 0.1847063215491466,
"learning_rate": 5.254453263797521e-06,
"loss": 0.2983,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10656660050153732,
"step": 1010,
"valid_targets_mean": 9140.8,
"valid_targets_min": 2622
},
{
"epoch": 5.546448087431694,
"grad_norm": 0.19112949612412253,
"learning_rate": 5.0715910762945245e-06,
"loss": 0.3021,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09959493577480316,
"step": 1015,
"valid_targets_mean": 9179.6,
"valid_targets_min": 3364
},
{
"epoch": 5.573770491803279,
"grad_norm": 0.18311052180054005,
"learning_rate": 4.8915043910813745e-06,
"loss": 0.2891,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08283189684152603,
"step": 1020,
"valid_targets_mean": 8305.5,
"valid_targets_min": 3385
},
{
"epoch": 5.601092896174864,
"grad_norm": 0.20148162135858644,
"learning_rate": 4.7142266900254006e-06,
"loss": 0.2968,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09707757830619812,
"step": 1025,
"valid_targets_mean": 8936.8,
"valid_targets_min": 1156
},
{
"epoch": 5.628415300546449,
"grad_norm": 0.20074382989705658,
"learning_rate": 4.53979093274526e-06,
"loss": 0.2994,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0964854508638382,
"step": 1030,
"valid_targets_mean": 8863.9,
"valid_targets_min": 2173
},
{
"epoch": 5.655737704918033,
"grad_norm": 0.1885114584658996,
"learning_rate": 4.3682295504830474e-06,
"loss": 0.3026,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09963271021842957,
"step": 1035,
"valid_targets_mean": 9199.4,
"valid_targets_min": 1574
},
{
"epoch": 5.683060109289618,
"grad_norm": 0.18296985933463858,
"learning_rate": 4.199574440074623e-06,
"loss": 0.2976,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10074016451835632,
"step": 1040,
"valid_targets_mean": 10138.1,
"valid_targets_min": 857
},
{
"epoch": 5.7103825136612025,
"grad_norm": 0.2863001675152787,
"learning_rate": 4.033856958019371e-06,
"loss": 0.2997,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09942857921123505,
"step": 1045,
"valid_targets_mean": 9787.4,
"valid_targets_min": 3371
},
{
"epoch": 5.737704918032787,
"grad_norm": 0.1898147998476062,
"learning_rate": 3.8711079146503474e-06,
"loss": 0.303,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10072211176156998,
"step": 1050,
"valid_targets_mean": 9318.2,
"valid_targets_min": 4205
},
{
"epoch": 5.7650273224043715,
"grad_norm": 0.1919997166958646,
"learning_rate": 3.7113575684060045e-06,
"loss": 0.2935,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09942483901977539,
"step": 1055,
"valid_targets_mean": 8786.4,
"valid_targets_min": 3289
},
{
"epoch": 5.7923497267759565,
"grad_norm": 0.18796578742686923,
"learning_rate": 3.554635620204503e-06,
"loss": 0.298,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10854838043451309,
"step": 1060,
"valid_targets_mean": 9333.5,
"valid_targets_min": 2235
},
{
"epoch": 5.8196721311475414,
"grad_norm": 0.19241542356969288,
"learning_rate": 3.400971207921706e-06,
"loss": 0.2952,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10749229788780212,
"step": 1065,
"valid_targets_mean": 9327.5,
"valid_targets_min": 3286
},
{
"epoch": 5.8469945355191255,
"grad_norm": 0.1767732898447703,
"learning_rate": 3.2503929009738443e-06,
"loss": 0.2973,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10737217217683792,
"step": 1070,
"valid_targets_mean": 10028.6,
"valid_targets_min": 3630
},
{
"epoch": 5.8743169398907105,
"grad_norm": 0.18563482469756257,
"learning_rate": 3.102928695005858e-06,
"loss": 0.3014,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10022571682929993,
"step": 1075,
"valid_targets_mean": 9828.6,
"valid_targets_min": 2734
},
{
"epoch": 5.901639344262295,
"grad_norm": 0.19161764885066354,
"learning_rate": 2.9586060066864286e-06,
"loss": 0.3016,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09793832898139954,
"step": 1080,
"valid_targets_mean": 8281.3,
"valid_targets_min": 3716
},
{
"epoch": 5.9289617486338795,
"grad_norm": 0.18758187439096646,
"learning_rate": 2.8174516686106334e-06,
"loss": 0.2953,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09064612537622452,
"step": 1085,
"valid_targets_mean": 8252.4,
"valid_targets_min": 3597
},
{
"epoch": 5.956284153005464,
"grad_norm": 0.17933861822945216,
"learning_rate": 2.679491924311226e-06,
"loss": 0.2939,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08484348654747009,
"step": 1090,
"valid_targets_mean": 8447.0,
"valid_targets_min": 1606
},
{
"epoch": 5.983606557377049,
"grad_norm": 0.2184035657642095,
"learning_rate": 2.5447524233794154e-06,
"loss": 0.2952,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10270243883132935,
"step": 1095,
"valid_targets_mean": 9839.3,
"valid_targets_min": 2240
},
{
"epoch": 6.0109289617486334,
"grad_norm": 0.18016930588695532,
"learning_rate": 2.4132582166960594e-06,
"loss": 0.2993,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09060963988304138,
"step": 1100,
"valid_targets_mean": 8204.4,
"valid_targets_min": 4318
},
{
"epoch": 6.038251366120218,
"grad_norm": 0.1807505264729319,
"learning_rate": 2.2850337517741926e-06,
"loss": 0.2953,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11188945174217224,
"step": 1105,
"valid_targets_mean": 9868.6,
"valid_targets_min": 2986
},
{
"epoch": 6.065573770491803,
"grad_norm": 0.17725550842049373,
"learning_rate": 2.1601028682137184e-06,
"loss": 0.2982,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10796324908733368,
"step": 1110,
"valid_targets_mean": 9903.1,
"valid_targets_min": 4404
},
{
"epoch": 6.092896174863388,
"grad_norm": 0.17843385797969258,
"learning_rate": 2.038488793269142e-06,
"loss": 0.2962,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10193131864070892,
"step": 1115,
"valid_targets_mean": 9263.0,
"valid_targets_min": 2531
},
{
"epoch": 6.120218579234972,
"grad_norm": 0.18922618634146468,
"learning_rate": 1.9202141375311335e-06,
"loss": 0.2987,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11177726089954376,
"step": 1120,
"valid_targets_mean": 10449.2,
"valid_targets_min": 1942
},
{
"epoch": 6.147540983606557,
"grad_norm": 0.18531454529279406,
"learning_rate": 1.8053008907227454e-06,
"loss": 0.2971,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09427063167095184,
"step": 1125,
"valid_targets_mean": 8988.2,
"valid_targets_min": 3419
},
{
"epoch": 6.174863387978142,
"grad_norm": 0.16935445242728722,
"learning_rate": 1.6937704176110582e-06,
"loss": 0.3047,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10389615595340729,
"step": 1130,
"valid_targets_mean": 9609.2,
"valid_targets_min": 4159
},
{
"epoch": 6.202185792349727,
"grad_norm": 0.16871211622471122,
"learning_rate": 1.5856434540350462e-06,
"loss": 0.2888,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09071569889783859,
"step": 1135,
"valid_targets_mean": 9943.8,
"valid_targets_min": 1810
},
{
"epoch": 6.229508196721311,
"grad_norm": 0.17256264539708166,
"learning_rate": 1.4809401030503345e-06,
"loss": 0.2958,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09398217499256134,
"step": 1140,
"valid_targets_mean": 9639.2,
"valid_targets_min": 1414
},
{
"epoch": 6.256830601092896,
"grad_norm": 0.18438155928726757,
"learning_rate": 1.3796798311916337e-06,
"loss": 0.2961,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09930949658155441,
"step": 1145,
"valid_targets_mean": 8487.8,
"valid_targets_min": 2400
},
{
"epoch": 6.284153005464481,
"grad_norm": 0.1997423930316252,
"learning_rate": 1.2818814648534895e-06,
"loss": 0.2962,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1077326089143753,
"step": 1150,
"valid_targets_mean": 9475.9,
"valid_targets_min": 3065
},
{
"epoch": 6.311475409836065,
"grad_norm": 0.18503322678054349,
"learning_rate": 1.187563186790075e-06,
"loss": 0.2974,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10482992976903915,
"step": 1155,
"valid_targets_mean": 9660.8,
"valid_targets_min": 785
},
{
"epoch": 6.33879781420765,
"grad_norm": 0.18384891461773237,
"learning_rate": 1.0967425327346447e-06,
"loss": 0.2923,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1072668582201004,
"step": 1160,
"valid_targets_mean": 10043.8,
"valid_targets_min": 2948
},
{
"epoch": 6.366120218579235,
"grad_norm": 0.17679068550790888,
"learning_rate": 1.0094363881392665e-06,
"loss": 0.2925,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08984339237213135,
"step": 1165,
"valid_targets_mean": 9161.0,
"valid_targets_min": 2769
},
{
"epoch": 6.39344262295082,
"grad_norm": 0.1832802026343294,
"learning_rate": 9.256609850354636e-07,
"loss": 0.2969,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08827763050794601,
"step": 1170,
"valid_targets_mean": 7520.1,
"valid_targets_min": 1938
},
{
"epoch": 6.420765027322404,
"grad_norm": 0.1866939536850409,
"learning_rate": 8.45431899016338e-07,
"loss": 0.2994,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10817040503025055,
"step": 1175,
"valid_targets_mean": 9274.0,
"valid_targets_min": 418
},
{
"epoch": 6.448087431693989,
"grad_norm": 0.1744012616260319,
"learning_rate": 7.687640463407597e-07,
"loss": 0.2936,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09306332468986511,
"step": 1180,
"valid_targets_mean": 8181.0,
"valid_targets_min": 2941
},
{
"epoch": 6.475409836065574,
"grad_norm": 0.17390086899257662,
"learning_rate": 6.956716811601106e-07,
"loss": 0.2957,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09819996356964111,
"step": 1185,
"valid_targets_mean": 9636.3,
"valid_targets_min": 3687
},
{
"epoch": 6.502732240437158,
"grad_norm": 0.18082724032868075,
"learning_rate": 6.261683928681383e-07,
"loss": 0.2948,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09933126717805862,
"step": 1190,
"valid_targets_mean": 9554.2,
"valid_targets_min": 3068
},
{
"epoch": 6.530054644808743,
"grad_norm": 0.1835801622956054,
"learning_rate": 5.602671035744123e-07,
"loss": 0.2963,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0911635234951973,
"step": 1195,
"valid_targets_mean": 8480.4,
"valid_targets_min": 2558
},
{
"epoch": 6.557377049180328,
"grad_norm": 0.1824726825862776,
"learning_rate": 4.979800657018308e-07,
"loss": 0.2996,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09349572658538818,
"step": 1200,
"valid_targets_mean": 9020.2,
"valid_targets_min": 2332
},
{
"epoch": 6.584699453551913,
"grad_norm": 0.17047555364265193,
"learning_rate": 4.393188597086395e-07,
"loss": 0.29,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09120497107505798,
"step": 1205,
"valid_targets_mean": 8665.4,
"valid_targets_min": 3232
},
{
"epoch": 6.612021857923497,
"grad_norm": 0.18297000199911517,
"learning_rate": 3.842943919353914e-07,
"loss": 0.2949,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09089861810207367,
"step": 1210,
"valid_targets_mean": 8642.9,
"valid_targets_min": 3945
},
{
"epoch": 6.639344262295082,
"grad_norm": 0.17410787574033595,
"learning_rate": 3.3291689257721526e-07,
"loss": 0.3004,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09799309074878693,
"step": 1215,
"valid_targets_mean": 10470.1,
"valid_targets_min": 2412
},
{
"epoch": 6.666666666666667,
"grad_norm": 0.1760736064792558,
"learning_rate": 2.8519591378181944e-07,
"loss": 0.2948,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09015963971614838,
"step": 1220,
"valid_targets_mean": 8567.0,
"valid_targets_min": 3465
},
{
"epoch": 6.693989071038251,
"grad_norm": 0.17863351381213097,
"learning_rate": 2.4114032787355246e-07,
"loss": 0.3015,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1061282753944397,
"step": 1225,
"valid_targets_mean": 8916.6,
"valid_targets_min": 816
},
{
"epoch": 6.721311475409836,
"grad_norm": 0.1734492997559578,
"learning_rate": 2.0075832570384257e-07,
"loss": 0.298,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09828498959541321,
"step": 1230,
"valid_targets_mean": 9364.9,
"valid_targets_min": 3576
},
{
"epoch": 6.748633879781421,
"grad_norm": 0.1716812499454898,
"learning_rate": 1.6405741512835137e-07,
"loss": 0.2962,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1010083481669426,
"step": 1235,
"valid_targets_mean": 9363.5,
"valid_targets_min": 2674
},
{
"epoch": 6.775956284153006,
"grad_norm": 0.17904530357170415,
"learning_rate": 1.310444196111127e-07,
"loss": 0.2995,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10142147541046143,
"step": 1240,
"valid_targets_mean": 9184.9,
"valid_targets_min": 3464
},
{
"epoch": 6.80327868852459,
"grad_norm": 0.1737805584907467,
"learning_rate": 1.0172547695590062e-07,
"loss": 0.2992,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10017992556095123,
"step": 1245,
"valid_targets_mean": 10094.7,
"valid_targets_min": 3627
},
{
"epoch": 6.830601092896175,
"grad_norm": 0.1845270392071439,
"learning_rate": 7.61060381650891e-08,
"loss": 0.2947,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10975556075572968,
"step": 1250,
"valid_targets_mean": 10162.3,
"valid_targets_min": 3446
},
{
"epoch": 6.85792349726776,
"grad_norm": 0.175468702056559,
"learning_rate": 5.4190866426195866e-08,
"loss": 0.2945,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11438390612602234,
"step": 1255,
"valid_targets_mean": 10338.3,
"valid_targets_min": 4181
},
{
"epoch": 6.885245901639344,
"grad_norm": 0.1695938568274096,
"learning_rate": 3.59840362263042e-08,
"loss": 0.2939,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09429032355546951,
"step": 1260,
"valid_targets_mean": 8700.3,
"valid_targets_min": 2328
},
{
"epoch": 6.912568306010929,
"grad_norm": 0.18293122350836902,
"learning_rate": 2.148893259453111e-08,
"loss": 0.2965,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09766453504562378,
"step": 1265,
"valid_targets_mean": 9036.6,
"valid_targets_min": 3738
},
{
"epoch": 6.939890710382514,
"grad_norm": 0.17737915360825743,
"learning_rate": 1.070825047268631e-08,
"loss": 0.2973,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09531472623348236,
"step": 1270,
"valid_targets_mean": 9068.0,
"valid_targets_min": 3580
},
{
"epoch": 6.967213114754099,
"grad_norm": 0.18806311305133536,
"learning_rate": 3.6439942142196815e-09,
"loss": 0.3026,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10511855781078339,
"step": 1275,
"valid_targets_mean": 9262.5,
"valid_targets_min": 2472
},
{
"epoch": 6.994535519125683,
"grad_norm": 0.18563493223557567,
"learning_rate": 2.974772115682534e-10,
"loss": 0.303,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09767770022153854,
"step": 1280,
"valid_targets_mean": 9584.3,
"valid_targets_min": 3673
},
{
"epoch": 7.0,
"step": 1281,
"total_flos": 5.272385024627311e+18,
"train_loss": 0.0,
"train_runtime": 1.2338,
"train_samples_per_second": 99654.386,
"train_steps_per_second": 1038.244
}
],
"logging_steps": 5,
"max_steps": 1281,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 300,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.272385024627311e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}