Files
nemotron-terminal-adapters_…/trainer_state.json

3678 lines
102 KiB
JSON
Raw Permalink Normal View History

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 1650,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.015151515151515152,
"grad_norm": 10.781819515475682,
"learning_rate": 9.696969696969698e-07,
"loss": 0.9617,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3307165205478668,
"step": 5,
"valid_targets_mean": 17257.9,
"valid_targets_min": 6573
},
{
"epoch": 0.030303030303030304,
"grad_norm": 5.8514795468386165,
"learning_rate": 2.181818181818182e-06,
"loss": 0.9303,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3343563675880432,
"step": 10,
"valid_targets_mean": 17250.3,
"valid_targets_min": 6149
},
{
"epoch": 0.045454545454545456,
"grad_norm": 2.4089715505491576,
"learning_rate": 3.3939393939393946e-06,
"loss": 0.8651,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30503979325294495,
"step": 15,
"valid_targets_mean": 16585.8,
"valid_targets_min": 4802
},
{
"epoch": 0.06060606060606061,
"grad_norm": 1.805436533710044,
"learning_rate": 4.606060606060606e-06,
"loss": 0.8108,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29714435338974,
"step": 20,
"valid_targets_mean": 16405.9,
"valid_targets_min": 4141
},
{
"epoch": 0.07575757575757576,
"grad_norm": 1.287151579235644,
"learning_rate": 5.8181818181818185e-06,
"loss": 0.7799,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23542305827140808,
"step": 25,
"valid_targets_mean": 14672.1,
"valid_targets_min": 5753
},
{
"epoch": 0.09090909090909091,
"grad_norm": 0.8990922328705272,
"learning_rate": 7.030303030303031e-06,
"loss": 0.7383,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2207568883895874,
"step": 30,
"valid_targets_mean": 13728.5,
"valid_targets_min": 2603
},
{
"epoch": 0.10606060606060606,
"grad_norm": 0.6293271791502106,
"learning_rate": 8.242424242424243e-06,
"loss": 0.6976,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2205752432346344,
"step": 35,
"valid_targets_mean": 14978.6,
"valid_targets_min": 6184
},
{
"epoch": 0.12121212121212122,
"grad_norm": 0.4829386836834071,
"learning_rate": 9.454545454545456e-06,
"loss": 0.6631,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2082587331533432,
"step": 40,
"valid_targets_mean": 15860.1,
"valid_targets_min": 4007
},
{
"epoch": 0.13636363636363635,
"grad_norm": 0.3907933259500624,
"learning_rate": 1.0666666666666667e-05,
"loss": 0.6355,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2100229263305664,
"step": 45,
"valid_targets_mean": 15454.4,
"valid_targets_min": 3146
},
{
"epoch": 0.15151515151515152,
"grad_norm": 0.2966565614873845,
"learning_rate": 1.187878787878788e-05,
"loss": 0.6043,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2182207554578781,
"step": 50,
"valid_targets_mean": 16907.9,
"valid_targets_min": 7771
},
{
"epoch": 0.16666666666666666,
"grad_norm": 0.30850551847505775,
"learning_rate": 1.3090909090909092e-05,
"loss": 0.5897,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20089182257652283,
"step": 55,
"valid_targets_mean": 14385.9,
"valid_targets_min": 4756
},
{
"epoch": 0.18181818181818182,
"grad_norm": 0.24779819106568451,
"learning_rate": 1.4303030303030305e-05,
"loss": 0.5724,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1999066174030304,
"step": 60,
"valid_targets_mean": 16691.1,
"valid_targets_min": 7386
},
{
"epoch": 0.19696969696969696,
"grad_norm": 0.22104440124868213,
"learning_rate": 1.5515151515151516e-05,
"loss": 0.5586,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1938043236732483,
"step": 65,
"valid_targets_mean": 15472.4,
"valid_targets_min": 4913
},
{
"epoch": 0.21212121212121213,
"grad_norm": 0.24959933484773072,
"learning_rate": 1.672727272727273e-05,
"loss": 0.5451,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17949575185775757,
"step": 70,
"valid_targets_mean": 14789.4,
"valid_targets_min": 3882
},
{
"epoch": 0.22727272727272727,
"grad_norm": 0.22005550174278024,
"learning_rate": 1.7939393939393942e-05,
"loss": 0.5295,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17324337363243103,
"step": 75,
"valid_targets_mean": 15401.0,
"valid_targets_min": 3272
},
{
"epoch": 0.24242424242424243,
"grad_norm": 0.22802634959112467,
"learning_rate": 1.9151515151515152e-05,
"loss": 0.5232,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17860084772109985,
"step": 80,
"valid_targets_mean": 15247.9,
"valid_targets_min": 3948
},
{
"epoch": 0.25757575757575757,
"grad_norm": 0.2624979989553396,
"learning_rate": 2.0363636363636365e-05,
"loss": 0.5194,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16321659088134766,
"step": 85,
"valid_targets_mean": 13104.3,
"valid_targets_min": 6923
},
{
"epoch": 0.2727272727272727,
"grad_norm": 0.2436635264309524,
"learning_rate": 2.1575757575757578e-05,
"loss": 0.515,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1677224040031433,
"step": 90,
"valid_targets_mean": 14824.3,
"valid_targets_min": 4583
},
{
"epoch": 0.2878787878787879,
"grad_norm": 0.29651493137861273,
"learning_rate": 2.278787878787879e-05,
"loss": 0.5038,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1670367568731308,
"step": 95,
"valid_targets_mean": 15513.7,
"valid_targets_min": 1267
},
{
"epoch": 0.30303030303030304,
"grad_norm": 0.2831831041789719,
"learning_rate": 2.4e-05,
"loss": 0.5042,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1713901311159134,
"step": 100,
"valid_targets_mean": 14562.0,
"valid_targets_min": 3932
},
{
"epoch": 0.3181818181818182,
"grad_norm": 0.23308416680278246,
"learning_rate": 2.5212121212121214e-05,
"loss": 0.5005,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17232999205589294,
"step": 105,
"valid_targets_mean": 15750.6,
"valid_targets_min": 4312
},
{
"epoch": 0.3333333333333333,
"grad_norm": 0.2890264891487217,
"learning_rate": 2.6424242424242427e-05,
"loss": 0.4915,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1580529510974884,
"step": 110,
"valid_targets_mean": 14890.7,
"valid_targets_min": 6327
},
{
"epoch": 0.3484848484848485,
"grad_norm": 0.2904637604741024,
"learning_rate": 2.763636363636364e-05,
"loss": 0.4901,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17326104640960693,
"step": 115,
"valid_targets_mean": 15371.7,
"valid_targets_min": 5359
},
{
"epoch": 0.36363636363636365,
"grad_norm": 0.26161939834322806,
"learning_rate": 2.884848484848485e-05,
"loss": 0.4865,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17404524981975555,
"step": 120,
"valid_targets_mean": 16654.5,
"valid_targets_min": 5580
},
{
"epoch": 0.3787878787878788,
"grad_norm": 0.33212371681558334,
"learning_rate": 3.0060606060606062e-05,
"loss": 0.4898,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1589001715183258,
"step": 125,
"valid_targets_mean": 14516.2,
"valid_targets_min": 6215
},
{
"epoch": 0.3939393939393939,
"grad_norm": 0.35155860659846594,
"learning_rate": 3.127272727272728e-05,
"loss": 0.4813,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15316787362098694,
"step": 130,
"valid_targets_mean": 15876.2,
"valid_targets_min": 4197
},
{
"epoch": 0.4090909090909091,
"grad_norm": 0.3186605453681469,
"learning_rate": 3.2484848484848485e-05,
"loss": 0.4827,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14787974953651428,
"step": 135,
"valid_targets_mean": 15059.2,
"valid_targets_min": 5205
},
{
"epoch": 0.42424242424242425,
"grad_norm": 0.292885635887715,
"learning_rate": 3.36969696969697e-05,
"loss": 0.4809,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17198166251182556,
"step": 140,
"valid_targets_mean": 16177.4,
"valid_targets_min": 4596
},
{
"epoch": 0.4393939393939394,
"grad_norm": 0.42141852592961904,
"learning_rate": 3.490909090909091e-05,
"loss": 0.4823,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15170565247535706,
"step": 145,
"valid_targets_mean": 15029.5,
"valid_targets_min": 3573
},
{
"epoch": 0.45454545454545453,
"grad_norm": 0.30639003045513324,
"learning_rate": 3.6121212121212124e-05,
"loss": 0.4726,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1611180603504181,
"step": 150,
"valid_targets_mean": 15518.5,
"valid_targets_min": 6323
},
{
"epoch": 0.4696969696969697,
"grad_norm": 0.3495360074509179,
"learning_rate": 3.733333333333334e-05,
"loss": 0.476,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18164387345314026,
"step": 155,
"valid_targets_mean": 16635.5,
"valid_targets_min": 7029
},
{
"epoch": 0.48484848484848486,
"grad_norm": 0.3999563868846935,
"learning_rate": 3.854545454545455e-05,
"loss": 0.469,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.155188649892807,
"step": 160,
"valid_targets_mean": 15272.1,
"valid_targets_min": 3593
},
{
"epoch": 0.5,
"grad_norm": 0.3449656470737079,
"learning_rate": 3.9757575757575757e-05,
"loss": 0.4689,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15367814898490906,
"step": 165,
"valid_targets_mean": 14878.1,
"valid_targets_min": 2953
},
{
"epoch": 0.5151515151515151,
"grad_norm": 0.3665757358488547,
"learning_rate": 3.999928391557286e-05,
"loss": 0.4709,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15468040108680725,
"step": 170,
"valid_targets_mean": 15413.5,
"valid_targets_min": 5168
},
{
"epoch": 0.5303030303030303,
"grad_norm": 0.2788745773590794,
"learning_rate": 3.999637491047052e-05,
"loss": 0.4693,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13528694212436676,
"step": 175,
"valid_targets_mean": 13168.3,
"valid_targets_min": 2921
},
{
"epoch": 0.5454545454545454,
"grad_norm": 0.4240308998832283,
"learning_rate": 3.999122855464813e-05,
"loss": 0.4647,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15115290880203247,
"step": 180,
"valid_targets_mean": 15130.8,
"valid_targets_min": 6243
},
{
"epoch": 0.5606060606060606,
"grad_norm": 0.318176983186771,
"learning_rate": 3.998384542392021e-05,
"loss": 0.4688,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14690656960010529,
"step": 185,
"valid_targets_mean": 15081.4,
"valid_targets_min": 2460
},
{
"epoch": 0.5757575757575758,
"grad_norm": 0.3531303282866239,
"learning_rate": 3.9974226344369124e-05,
"loss": 0.4676,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16395458579063416,
"step": 190,
"valid_targets_mean": 16651.0,
"valid_targets_min": 2044
},
{
"epoch": 0.5909090909090909,
"grad_norm": 0.3185804484156167,
"learning_rate": 3.996237239225268e-05,
"loss": 0.4641,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14105798304080963,
"step": 195,
"valid_targets_mean": 15282.0,
"valid_targets_min": 3829
},
{
"epoch": 0.6060606060606061,
"grad_norm": 0.3306774162967017,
"learning_rate": 3.994828489388371e-05,
"loss": 0.4606,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15033362805843353,
"step": 200,
"valid_targets_mean": 14899.4,
"valid_targets_min": 4148
},
{
"epoch": 0.6212121212121212,
"grad_norm": 0.2666534138366462,
"learning_rate": 3.993196542548162e-05,
"loss": 0.4659,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1435295045375824,
"step": 205,
"valid_targets_mean": 13889.3,
"valid_targets_min": 1537
},
{
"epoch": 0.6363636363636364,
"grad_norm": 0.2619401719498242,
"learning_rate": 3.991341581299609e-05,
"loss": 0.4614,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15735077857971191,
"step": 210,
"valid_targets_mean": 16223.8,
"valid_targets_min": 7738
},
{
"epoch": 0.6515151515151515,
"grad_norm": 0.31758013245846667,
"learning_rate": 3.9892638131902765e-05,
"loss": 0.4546,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1687242090702057,
"step": 215,
"valid_targets_mean": 16545.2,
"valid_targets_min": 5321
},
{
"epoch": 0.6666666666666666,
"grad_norm": 0.28828120577659794,
"learning_rate": 3.9869634706971e-05,
"loss": 0.4597,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15184861421585083,
"step": 220,
"valid_targets_mean": 15488.8,
"valid_targets_min": 4785
},
{
"epoch": 0.6818181818181818,
"grad_norm": 0.32648424933952014,
"learning_rate": 3.984440811200379e-05,
"loss": 0.4574,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14916986227035522,
"step": 225,
"valid_targets_mean": 14360.5,
"valid_targets_min": 3511
},
{
"epoch": 0.696969696969697,
"grad_norm": 0.3724763305936334,
"learning_rate": 3.981696116954973e-05,
"loss": 0.4533,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1421215534210205,
"step": 230,
"valid_targets_mean": 13722.8,
"valid_targets_min": 5643
},
{
"epoch": 0.7121212121212122,
"grad_norm": 0.42652803865674005,
"learning_rate": 3.978729695058729e-05,
"loss": 0.4534,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14489291608333588,
"step": 235,
"valid_targets_mean": 14735.2,
"valid_targets_min": 3745
},
{
"epoch": 0.7272727272727273,
"grad_norm": 0.4770959081747357,
"learning_rate": 3.9755418774181146e-05,
"loss": 0.456,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16189418733119965,
"step": 240,
"valid_targets_mean": 17361.2,
"valid_targets_min": 4950
},
{
"epoch": 0.7424242424242424,
"grad_norm": 0.493013737568908,
"learning_rate": 3.9721330207110835e-05,
"loss": 0.4509,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15398374199867249,
"step": 245,
"valid_targets_mean": 15619.1,
"valid_targets_min": 6005
},
{
"epoch": 0.7575757575757576,
"grad_norm": 0.3225495200220724,
"learning_rate": 3.9685035063471675e-05,
"loss": 0.4494,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15332111716270447,
"step": 250,
"valid_targets_mean": 15341.6,
"valid_targets_min": 5202
},
{
"epoch": 0.7727272727272727,
"grad_norm": 0.34133731195657563,
"learning_rate": 3.964653740424804e-05,
"loss": 0.449,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1565016657114029,
"step": 255,
"valid_targets_mean": 16527.7,
"valid_targets_min": 4140
},
{
"epoch": 0.7878787878787878,
"grad_norm": 0.4303037712575851,
"learning_rate": 3.960584153685895e-05,
"loss": 0.4535,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1628049910068512,
"step": 260,
"valid_targets_mean": 15461.1,
"valid_targets_min": 3340
},
{
"epoch": 0.803030303030303,
"grad_norm": 0.3548169264545474,
"learning_rate": 3.9562952014676116e-05,
"loss": 0.4507,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16039399802684784,
"step": 265,
"valid_targets_mean": 16171.8,
"valid_targets_min": 7126
},
{
"epoch": 0.8181818181818182,
"grad_norm": 0.25424846956268254,
"learning_rate": 3.9517873636514525e-05,
"loss": 0.4545,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14358675479888916,
"step": 270,
"valid_targets_mean": 14434.2,
"valid_targets_min": 2628
},
{
"epoch": 0.8333333333333334,
"grad_norm": 0.2694812489208858,
"learning_rate": 3.947061144609546e-05,
"loss": 0.4421,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1384536325931549,
"step": 275,
"valid_targets_mean": 15667.6,
"valid_targets_min": 5195
},
{
"epoch": 0.8484848484848485,
"grad_norm": 0.3012250215938447,
"learning_rate": 3.942117073148221e-05,
"loss": 0.4457,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15101709961891174,
"step": 280,
"valid_targets_mean": 13737.2,
"valid_targets_min": 3769
},
{
"epoch": 0.8636363636363636,
"grad_norm": 0.33412324364329166,
"learning_rate": 3.9369557024488345e-05,
"loss": 0.4494,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13744959235191345,
"step": 285,
"valid_targets_mean": 14516.1,
"valid_targets_min": 4159
},
{
"epoch": 0.8787878787878788,
"grad_norm": 0.28861130651445016,
"learning_rate": 3.931577610005883e-05,
"loss": 0.4482,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12138031423091888,
"step": 290,
"valid_targets_mean": 12381.9,
"valid_targets_min": 3785
},
{
"epoch": 0.8939393939393939,
"grad_norm": 0.33189139451222754,
"learning_rate": 3.925983397562385e-05,
"loss": 0.4483,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14846676588058472,
"step": 295,
"valid_targets_mean": 15067.7,
"valid_targets_min": 2642
},
{
"epoch": 0.9090909090909091,
"grad_norm": 0.26725549323789616,
"learning_rate": 3.920173691042554e-05,
"loss": 0.4486,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15860885381698608,
"step": 300,
"valid_targets_mean": 17651.8,
"valid_targets_min": 6461
},
{
"epoch": 0.9242424242424242,
"grad_norm": 0.28840546967609465,
"learning_rate": 3.914149140481766e-05,
"loss": 0.4445,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1289260983467102,
"step": 305,
"valid_targets_mean": 14103.6,
"valid_targets_min": 4383
},
{
"epoch": 0.9393939393939394,
"grad_norm": 0.27544900609307754,
"learning_rate": 3.9079104199538256e-05,
"loss": 0.4468,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12725478410720825,
"step": 310,
"valid_targets_mean": 13964.7,
"valid_targets_min": 5266
},
{
"epoch": 0.9545454545454546,
"grad_norm": 0.3210299225048718,
"learning_rate": 3.901458227495549e-05,
"loss": 0.4455,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14999495446681976,
"step": 315,
"valid_targets_mean": 14422.9,
"valid_targets_min": 4184
},
{
"epoch": 0.9696969696969697,
"grad_norm": 0.37741796391171567,
"learning_rate": 3.8947932850286585e-05,
"loss": 0.4453,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1468706727027893,
"step": 320,
"valid_targets_mean": 15338.0,
"valid_targets_min": 4021
},
{
"epoch": 0.9848484848484849,
"grad_norm": 0.34149777743054666,
"learning_rate": 3.887916338279014e-05,
"loss": 0.44,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13318268954753876,
"step": 325,
"valid_targets_mean": 14689.9,
"valid_targets_min": 7070
},
{
"epoch": 1.0,
"grad_norm": 0.2870293910765331,
"learning_rate": 3.8808281566931675e-05,
"loss": 0.4371,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15207594633102417,
"step": 330,
"valid_targets_mean": 15349.0,
"valid_targets_min": 4947
},
{
"epoch": 1.0151515151515151,
"grad_norm": 0.36309118319901884,
"learning_rate": 3.873529533352277e-05,
"loss": 0.4349,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1315964013338089,
"step": 335,
"valid_targets_mean": 14656.5,
"valid_targets_min": 2950
},
{
"epoch": 1.0303030303030303,
"grad_norm": 0.276061831173288,
"learning_rate": 3.8660212848833705e-05,
"loss": 0.4208,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1446404904127121,
"step": 340,
"valid_targets_mean": 15676.8,
"valid_targets_min": 1267
},
{
"epoch": 1.0454545454545454,
"grad_norm": 0.26011173133082927,
"learning_rate": 3.858304251367972e-05,
"loss": 0.4322,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1397808939218521,
"step": 345,
"valid_targets_mean": 15134.7,
"valid_targets_min": 4839
},
{
"epoch": 1.0606060606060606,
"grad_norm": 0.402489225579918,
"learning_rate": 3.850379296248107e-05,
"loss": 0.4323,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15310615301132202,
"step": 350,
"valid_targets_mean": 15064.1,
"valid_targets_min": 5758
},
{
"epoch": 1.0757575757575757,
"grad_norm": 0.4129599441197316,
"learning_rate": 3.8422473062297e-05,
"loss": 0.4289,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13137058913707733,
"step": 355,
"valid_targets_mean": 14610.6,
"valid_targets_min": 4768
},
{
"epoch": 1.0909090909090908,
"grad_norm": 0.3280334521493676,
"learning_rate": 3.8339091911833545e-05,
"loss": 0.4337,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15061558783054352,
"step": 360,
"valid_targets_mean": 14947.8,
"valid_targets_min": 4483
},
{
"epoch": 1.106060606060606,
"grad_norm": 0.2996280506022408,
"learning_rate": 3.825365884042553e-05,
"loss": 0.4375,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15207991003990173,
"step": 365,
"valid_targets_mean": 15574.6,
"valid_targets_min": 3271
},
{
"epoch": 1.121212121212121,
"grad_norm": 0.33383024855953686,
"learning_rate": 3.8166183406992745e-05,
"loss": 0.4302,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13507221639156342,
"step": 370,
"valid_targets_mean": 13192.1,
"valid_targets_min": 2792
},
{
"epoch": 1.1363636363636362,
"grad_norm": 0.31909323691142444,
"learning_rate": 3.807667539897041e-05,
"loss": 0.4305,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13816970586776733,
"step": 375,
"valid_targets_mean": 15480.5,
"valid_targets_min": 3988
},
{
"epoch": 1.1515151515151516,
"grad_norm": 0.31075543582482734,
"learning_rate": 3.798514483121408e-05,
"loss": 0.4309,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13919055461883545,
"step": 380,
"valid_targets_mean": 14907.4,
"valid_targets_min": 6132
},
{
"epoch": 1.1666666666666667,
"grad_norm": 0.2983371224518664,
"learning_rate": 3.789160194487908e-05,
"loss": 0.4273,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13649028539657593,
"step": 385,
"valid_targets_mean": 15354.1,
"valid_targets_min": 6891
},
{
"epoch": 1.1818181818181819,
"grad_norm": 0.3905433335206102,
"learning_rate": 3.7796057206274686e-05,
"loss": 0.4272,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14891022443771362,
"step": 390,
"valid_targets_mean": 16094.3,
"valid_targets_min": 5444
},
{
"epoch": 1.196969696969697,
"grad_norm": 0.33769374363353943,
"learning_rate": 3.769852130569304e-05,
"loss": 0.4341,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15502581000328064,
"step": 395,
"valid_targets_mean": 15651.5,
"valid_targets_min": 6372
},
{
"epoch": 1.2121212121212122,
"grad_norm": 0.29012360041686464,
"learning_rate": 3.7599005156213066e-05,
"loss": 0.4299,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13702401518821716,
"step": 400,
"valid_targets_mean": 14399.8,
"valid_targets_min": 3372
},
{
"epoch": 1.2272727272727273,
"grad_norm": 0.31947365816457735,
"learning_rate": 3.74975198924794e-05,
"loss": 0.4297,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1433682143688202,
"step": 405,
"valid_targets_mean": 13850.6,
"valid_targets_min": 3607
},
{
"epoch": 1.2424242424242424,
"grad_norm": 0.31718143050221165,
"learning_rate": 3.739407686945658e-05,
"loss": 0.4277,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14280961453914642,
"step": 410,
"valid_targets_mean": 16541.7,
"valid_targets_min": 4460
},
{
"epoch": 1.2575757575757576,
"grad_norm": 0.45231696742504074,
"learning_rate": 3.728868766115854e-05,
"loss": 0.4296,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13743780553340912,
"step": 415,
"valid_targets_mean": 15450.7,
"valid_targets_min": 3921
},
{
"epoch": 1.2727272727272727,
"grad_norm": 0.28084769935009196,
"learning_rate": 3.718136405935365e-05,
"loss": 0.4278,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1414492428302765,
"step": 420,
"valid_targets_mean": 16063.2,
"valid_targets_min": 3552
},
{
"epoch": 1.2878787878787878,
"grad_norm": 0.3631765153651125,
"learning_rate": 3.707211807224534e-05,
"loss": 0.4276,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13481499254703522,
"step": 425,
"valid_targets_mean": 15713.6,
"valid_targets_min": 5051
},
{
"epoch": 1.303030303030303,
"grad_norm": 0.3023685657164912,
"learning_rate": 3.696096192312852e-05,
"loss": 0.4309,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14772221446037292,
"step": 430,
"valid_targets_mean": 16079.5,
"valid_targets_min": 6001
},
{
"epoch": 1.3181818181818181,
"grad_norm": 0.3557818222724191,
"learning_rate": 3.684790804902199e-05,
"loss": 0.4266,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14126922190189362,
"step": 435,
"valid_targets_mean": 15307.2,
"valid_targets_min": 4925
},
{
"epoch": 1.3333333333333333,
"grad_norm": 0.28051924385178323,
"learning_rate": 3.673296909927682e-05,
"loss": 0.4253,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1278437376022339,
"step": 440,
"valid_targets_mean": 14616.2,
"valid_targets_min": 4863
},
{
"epoch": 1.3484848484848486,
"grad_norm": 0.24904301125597772,
"learning_rate": 3.661615793416109e-05,
"loss": 0.4241,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1396869719028473,
"step": 445,
"valid_targets_mean": 15681.2,
"valid_targets_min": 4148
},
{
"epoch": 1.3636363636363638,
"grad_norm": 0.23949545569806505,
"learning_rate": 3.649748762342098e-05,
"loss": 0.429,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14017486572265625,
"step": 450,
"valid_targets_mean": 15908.3,
"valid_targets_min": 4265
},
{
"epoch": 1.378787878787879,
"grad_norm": 0.3421703032120446,
"learning_rate": 3.637697144481839e-05,
"loss": 0.4246,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14308439195156097,
"step": 455,
"valid_targets_mean": 15564.7,
"valid_targets_min": 5007
},
{
"epoch": 1.393939393939394,
"grad_norm": 0.35779594775223517,
"learning_rate": 3.625462288264536e-05,
"loss": 0.4174,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14782720804214478,
"step": 460,
"valid_targets_mean": 15151.2,
"valid_targets_min": 3472
},
{
"epoch": 1.4090909090909092,
"grad_norm": 0.3544625075767238,
"learning_rate": 3.613045562621533e-05,
"loss": 0.4223,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14591552317142487,
"step": 465,
"valid_targets_mean": 15464.0,
"valid_targets_min": 5397
},
{
"epoch": 1.4242424242424243,
"grad_norm": 0.2739256716466335,
"learning_rate": 3.600448356833146e-05,
"loss": 0.4242,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1398126780986786,
"step": 470,
"valid_targets_mean": 15358.1,
"valid_targets_min": 4249
},
{
"epoch": 1.4393939393939394,
"grad_norm": 0.2670796299577025,
"learning_rate": 3.587672080373219e-05,
"loss": 0.4253,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13911236822605133,
"step": 475,
"valid_targets_mean": 14657.8,
"valid_targets_min": 4850
},
{
"epoch": 1.4545454545454546,
"grad_norm": 0.2716034921366766,
"learning_rate": 3.574718162751426e-05,
"loss": 0.4245,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1379561722278595,
"step": 480,
"valid_targets_mean": 16109.5,
"valid_targets_min": 3613
},
{
"epoch": 1.4696969696969697,
"grad_norm": 0.23873812035866543,
"learning_rate": 3.561588053353319e-05,
"loss": 0.4248,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13202598690986633,
"step": 485,
"valid_targets_mean": 13867.1,
"valid_targets_min": 1931
},
{
"epoch": 1.4848484848484849,
"grad_norm": 0.23167780418432207,
"learning_rate": 3.5482832212781655e-05,
"loss": 0.4208,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14886680245399475,
"step": 490,
"valid_targets_mean": 15954.8,
"valid_targets_min": 3850
},
{
"epoch": 1.5,
"grad_norm": 0.27255259888582417,
"learning_rate": 3.53480515517457e-05,
"loss": 0.4254,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15491536259651184,
"step": 495,
"valid_targets_mean": 15829.9,
"valid_targets_min": 3933
},
{
"epoch": 1.5151515151515151,
"grad_norm": 0.25403824366371247,
"learning_rate": 3.5211553630739166e-05,
"loss": 0.4262,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13875356316566467,
"step": 500,
"valid_targets_mean": 14701.8,
"valid_targets_min": 5768
},
{
"epoch": 1.5303030303030303,
"grad_norm": 0.26870819873349766,
"learning_rate": 3.5073353722216334e-05,
"loss": 0.4184,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13507430255413055,
"step": 505,
"valid_targets_mean": 14002.1,
"valid_targets_min": 3568
},
{
"epoch": 1.5454545454545454,
"grad_norm": 0.3584406752763479,
"learning_rate": 3.4933467289063156e-05,
"loss": 0.4205,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13867157697677612,
"step": 510,
"valid_targets_mean": 15053.2,
"valid_targets_min": 3150
},
{
"epoch": 1.5606060606060606,
"grad_norm": 0.26154597010466046,
"learning_rate": 3.4791909982867175e-05,
"loss": 0.4188,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12970539927482605,
"step": 515,
"valid_targets_mean": 14832.3,
"valid_targets_min": 5373
},
{
"epoch": 1.5757575757575757,
"grad_norm": 0.3362188714691207,
"learning_rate": 3.464869764216622e-05,
"loss": 0.4238,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12605808675289154,
"step": 520,
"valid_targets_mean": 14519.6,
"valid_targets_min": 4981
},
{
"epoch": 1.5909090909090908,
"grad_norm": 0.28016874775885214,
"learning_rate": 3.450384629067635e-05,
"loss": 0.4249,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1421700119972229,
"step": 525,
"valid_targets_mean": 14849.1,
"valid_targets_min": 5271
},
{
"epoch": 1.606060606060606,
"grad_norm": 0.31364330936128626,
"learning_rate": 3.435737213549896e-05,
"loss": 0.4188,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1353946477174759,
"step": 530,
"valid_targets_mean": 15212.0,
"valid_targets_min": 3960
},
{
"epoch": 1.621212121212121,
"grad_norm": 0.3130590718949519,
"learning_rate": 3.420929156530738e-05,
"loss": 0.4191,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12870505452156067,
"step": 535,
"valid_targets_mean": 13804.9,
"valid_targets_min": 4409
},
{
"epoch": 1.6363636363636362,
"grad_norm": 0.32648439826942116,
"learning_rate": 3.405962114851324e-05,
"loss": 0.4188,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13703852891921997,
"step": 540,
"valid_targets_mean": 14489.3,
"valid_targets_min": 6422
},
{
"epoch": 1.6515151515151514,
"grad_norm": 0.23558741635634878,
"learning_rate": 3.390837763141261e-05,
"loss": 0.425,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1324833631515503,
"step": 545,
"valid_targets_mean": 15401.4,
"valid_targets_min": 4438
},
{
"epoch": 1.6666666666666665,
"grad_norm": 0.2562753721750844,
"learning_rate": 3.3755577936312344e-05,
"loss": 0.4184,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14327028393745422,
"step": 550,
"valid_targets_mean": 15883.0,
"valid_targets_min": 3181
},
{
"epoch": 1.6818181818181817,
"grad_norm": 0.2573839797506366,
"learning_rate": 3.360123915963662e-05,
"loss": 0.4202,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14354410767555237,
"step": 555,
"valid_targets_mean": 15241.0,
"valid_targets_min": 4218
},
{
"epoch": 1.696969696969697,
"grad_norm": 0.2393022935992319,
"learning_rate": 3.3445378570014125e-05,
"loss": 0.4188,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14465731382369995,
"step": 560,
"valid_targets_mean": 16679.2,
"valid_targets_min": 6613
},
{
"epoch": 1.7121212121212122,
"grad_norm": 0.3130578183647946,
"learning_rate": 3.328801360634585e-05,
"loss": 0.4231,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.131614089012146,
"step": 565,
"valid_targets_mean": 15283.2,
"valid_targets_min": 5069
},
{
"epoch": 1.7272727272727273,
"grad_norm": 0.22969999891245066,
"learning_rate": 3.312916187585392e-05,
"loss": 0.424,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14087988436222076,
"step": 570,
"valid_targets_mean": 15229.3,
"valid_targets_min": 4248
},
{
"epoch": 1.7424242424242424,
"grad_norm": 0.25242347409882654,
"learning_rate": 3.296884115211157e-05,
"loss": 0.4202,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1289135217666626,
"step": 575,
"valid_targets_mean": 13637.0,
"valid_targets_min": 1585
},
{
"epoch": 1.7575757575757576,
"grad_norm": 0.27395829853778514,
"learning_rate": 3.280706937305445e-05,
"loss": 0.4233,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13507923483848572,
"step": 580,
"valid_targets_mean": 14127.3,
"valid_targets_min": 3212
},
{
"epoch": 1.7727272727272727,
"grad_norm": 0.25592535738698435,
"learning_rate": 3.2643864638973645e-05,
"loss": 0.4175,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13018617033958435,
"step": 585,
"valid_targets_mean": 14483.5,
"valid_targets_min": 3185
},
{
"epoch": 1.7878787878787878,
"grad_norm": 0.24574039674480253,
"learning_rate": 3.2479245210490434e-05,
"loss": 0.4133,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14168305695056915,
"step": 590,
"valid_targets_mean": 15052.1,
"valid_targets_min": 3733
},
{
"epoch": 1.803030303030303,
"grad_norm": 0.257946888158228,
"learning_rate": 3.2313229506513167e-05,
"loss": 0.4096,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11595556139945984,
"step": 595,
"valid_targets_mean": 13657.6,
"valid_targets_min": 4073
},
{
"epoch": 1.8181818181818183,
"grad_norm": 0.2423108123895089,
"learning_rate": 3.2145836102176424e-05,
"loss": 0.4217,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13753411173820496,
"step": 600,
"valid_targets_mean": 14381.9,
"valid_targets_min": 4035
},
{
"epoch": 1.8333333333333335,
"grad_norm": 0.3673066567758278,
"learning_rate": 3.197708372676265e-05,
"loss": 0.4177,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14095187187194824,
"step": 605,
"valid_targets_mean": 15813.5,
"valid_targets_min": 7248
},
{
"epoch": 1.8484848484848486,
"grad_norm": 0.3410767287068366,
"learning_rate": 3.1806991261606604e-05,
"loss": 0.4236,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13289013504981995,
"step": 610,
"valid_targets_mean": 15133.6,
"valid_targets_min": 5425
},
{
"epoch": 1.8636363636363638,
"grad_norm": 0.3928398693107199,
"learning_rate": 3.163557773798276e-05,
"loss": 0.4151,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13538768887519836,
"step": 615,
"valid_targets_mean": 15416.2,
"valid_targets_min": 4358
},
{
"epoch": 1.878787878787879,
"grad_norm": 0.30018552288843936,
"learning_rate": 3.146286233497593e-05,
"loss": 0.4151,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12879617512226105,
"step": 620,
"valid_targets_mean": 14728.0,
"valid_targets_min": 4779
},
{
"epoch": 1.893939393939394,
"grad_norm": 0.27871218248182866,
"learning_rate": 3.128886437733539e-05,
"loss": 0.4176,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14107659459114075,
"step": 625,
"valid_targets_mean": 16196.8,
"valid_targets_min": 5645
},
{
"epoch": 1.9090909090909092,
"grad_norm": 0.31342858815473673,
"learning_rate": 3.111360333331263e-05,
"loss": 0.4189,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1214226558804512,
"step": 630,
"valid_targets_mean": 13641.1,
"valid_targets_min": 3487
},
{
"epoch": 1.9242424242424243,
"grad_norm": 0.28759225199732685,
"learning_rate": 3.093709881248312e-05,
"loss": 0.4137,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12949924170970917,
"step": 635,
"valid_targets_mean": 15259.6,
"valid_targets_min": 4815
},
{
"epoch": 1.9393939393939394,
"grad_norm": 0.4720940997340626,
"learning_rate": 3.075937056355225e-05,
"loss": 0.4187,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1339273750782013,
"step": 640,
"valid_targets_mean": 14075.0,
"valid_targets_min": 4063
},
{
"epoch": 1.9545454545454546,
"grad_norm": 0.2820424534133633,
"learning_rate": 3.0580438472145665e-05,
"loss": 0.419,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13889871537685394,
"step": 645,
"valid_targets_mean": 13273.1,
"valid_targets_min": 5062
},
{
"epoch": 1.9696969696969697,
"grad_norm": 0.2494556245622125,
"learning_rate": 3.0400322558584308e-05,
"loss": 0.4143,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13408786058425903,
"step": 650,
"valid_targets_mean": 14133.0,
"valid_targets_min": 6186
},
{
"epoch": 1.9848484848484849,
"grad_norm": 0.2637423161671155,
"learning_rate": 3.0219042975644415e-05,
"loss": 0.4134,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.144476979970932,
"step": 655,
"valid_targets_mean": 15395.8,
"valid_targets_min": 4386
},
{
"epoch": 2.0,
"grad_norm": 0.2301269495494442,
"learning_rate": 3.0036620006302624e-05,
"loss": 0.4168,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1381838023662567,
"step": 660,
"valid_targets_mean": 15590.2,
"valid_targets_min": 3399
},
{
"epoch": 2.015151515151515,
"grad_norm": 0.28406665058518976,
"learning_rate": 2.9853074061466602e-05,
"loss": 0.4032,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13899505138397217,
"step": 665,
"valid_targets_mean": 15553.7,
"valid_targets_min": 6431
},
{
"epoch": 2.0303030303030303,
"grad_norm": 0.21285203894708732,
"learning_rate": 2.9668425677691278e-05,
"loss": 0.4022,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12502285838127136,
"step": 670,
"valid_targets_mean": 15203.8,
"valid_targets_min": 5171
},
{
"epoch": 2.0454545454545454,
"grad_norm": 0.2445604222211269,
"learning_rate": 2.948269551488108e-05,
"loss": 0.403,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13765007257461548,
"step": 675,
"valid_targets_mean": 14323.1,
"valid_targets_min": 3205
},
{
"epoch": 2.0606060606060606,
"grad_norm": 0.23550700899136703,
"learning_rate": 2.929590435397832e-05,
"loss": 0.4098,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1422361582517624,
"step": 680,
"valid_targets_mean": 16510.7,
"valid_targets_min": 3186
},
{
"epoch": 2.0757575757575757,
"grad_norm": 0.26247099670181223,
"learning_rate": 2.9108073094638066e-05,
"loss": 0.399,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12394432723522186,
"step": 685,
"valid_targets_mean": 15157.7,
"valid_targets_min": 4880
},
{
"epoch": 2.090909090909091,
"grad_norm": 0.27020259267918656,
"learning_rate": 2.8919222752889727e-05,
"loss": 0.4038,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12068557739257812,
"step": 690,
"valid_targets_mean": 14247.4,
"valid_targets_min": 4780
},
{
"epoch": 2.106060606060606,
"grad_norm": 0.20481401345814657,
"learning_rate": 2.8729374458785647e-05,
"loss": 0.4027,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11181310564279556,
"step": 695,
"valid_targets_mean": 13585.7,
"valid_targets_min": 6632
},
{
"epoch": 2.121212121212121,
"grad_norm": 0.24273965833842234,
"learning_rate": 2.8538549454036838e-05,
"loss": 0.4043,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1391579806804657,
"step": 700,
"valid_targets_mean": 15396.5,
"valid_targets_min": 2746
},
{
"epoch": 2.1363636363636362,
"grad_norm": 0.23541344015418184,
"learning_rate": 2.834676908963636e-05,
"loss": 0.3985,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13219976425170898,
"step": 705,
"valid_targets_mean": 17154.9,
"valid_targets_min": 6336
},
{
"epoch": 2.1515151515151514,
"grad_norm": 0.26258542741948365,
"learning_rate": 2.815405482347037e-05,
"loss": 0.4034,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13979566097259521,
"step": 710,
"valid_targets_mean": 15050.4,
"valid_targets_min": 3770
},
{
"epoch": 2.1666666666666665,
"grad_norm": 0.2957549576693815,
"learning_rate": 2.796042821791725e-05,
"loss": 0.408,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.140570729970932,
"step": 715,
"valid_targets_mean": 16650.1,
"valid_targets_min": 7019
},
{
"epoch": 2.1818181818181817,
"grad_norm": 0.2674390870851971,
"learning_rate": 2.776591093743505e-05,
"loss": 0.4032,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13701064884662628,
"step": 720,
"valid_targets_mean": 15406.2,
"valid_targets_min": 5127
},
{
"epoch": 2.196969696969697,
"grad_norm": 0.2751077668087683,
"learning_rate": 2.7570524746137485e-05,
"loss": 0.4031,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.145437553524971,
"step": 725,
"valid_targets_mean": 17099.9,
"valid_targets_min": 6081
},
{
"epoch": 2.212121212121212,
"grad_norm": 0.2390588517173356,
"learning_rate": 2.7374291505358818e-05,
"loss": 0.4014,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13979849219322205,
"step": 730,
"valid_targets_mean": 15471.2,
"valid_targets_min": 3270
},
{
"epoch": 2.227272727272727,
"grad_norm": 0.2582220951331803,
"learning_rate": 2.7177233171207817e-05,
"loss": 0.3995,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14698559045791626,
"step": 735,
"valid_targets_mean": 15787.5,
"valid_targets_min": 2702
},
{
"epoch": 2.242424242424242,
"grad_norm": 0.23340513449605393,
"learning_rate": 2.6979371792111147e-05,
"loss": 0.3992,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13736939430236816,
"step": 740,
"valid_targets_mean": 15320.6,
"valid_targets_min": 4022
},
{
"epoch": 2.257575757575758,
"grad_norm": 0.2536860517731456,
"learning_rate": 2.678072950634641e-05,
"loss": 0.4036,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1464948058128357,
"step": 745,
"valid_targets_mean": 16852.1,
"valid_targets_min": 6696
},
{
"epoch": 2.2727272727272725,
"grad_norm": 0.2638340109418724,
"learning_rate": 2.6581328539565184e-05,
"loss": 0.3975,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14072707295417786,
"step": 750,
"valid_targets_mean": 16809.4,
"valid_targets_min": 7765
},
{
"epoch": 2.287878787878788,
"grad_norm": 0.24108039938243508,
"learning_rate": 2.638119120230616e-05,
"loss": 0.4014,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1345876157283783,
"step": 755,
"valid_targets_mean": 15781.1,
"valid_targets_min": 2401
},
{
"epoch": 2.303030303030303,
"grad_norm": 0.25499518648680075,
"learning_rate": 2.618033988749895e-05,
"loss": 0.4014,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12714101374149323,
"step": 760,
"valid_targets_mean": 14556.9,
"valid_targets_min": 3789
},
{
"epoch": 2.3181818181818183,
"grad_norm": 0.2412901463663965,
"learning_rate": 2.5978797067958542e-05,
"loss": 0.4036,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13545730710029602,
"step": 765,
"valid_targets_mean": 16024.8,
"valid_targets_min": 7272
},
{
"epoch": 2.3333333333333335,
"grad_norm": 0.23198272890585742,
"learning_rate": 2.5776585293870877e-05,
"loss": 0.407,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1482217013835907,
"step": 770,
"valid_targets_mean": 17007.0,
"valid_targets_min": 6087
},
{
"epoch": 2.3484848484848486,
"grad_norm": 0.2227473278480235,
"learning_rate": 2.557372719026976e-05,
"loss": 0.4031,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1505884975194931,
"step": 775,
"valid_targets_mean": 16449.6,
"valid_targets_min": 6102
},
{
"epoch": 2.3636363636363638,
"grad_norm": 0.2387955795302762,
"learning_rate": 2.537024545450539e-05,
"loss": 0.4047,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13995446264743805,
"step": 780,
"valid_targets_mean": 14589.6,
"valid_targets_min": 3986
},
{
"epoch": 2.378787878787879,
"grad_norm": 0.22960591186951138,
"learning_rate": 2.5166162853704825e-05,
"loss": 0.4016,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.132080078125,
"step": 785,
"valid_targets_mean": 14911.7,
"valid_targets_min": 3358
},
{
"epoch": 2.393939393939394,
"grad_norm": 0.2317325527335211,
"learning_rate": 2.496150222222458e-05,
"loss": 0.3987,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11762173473834991,
"step": 790,
"valid_targets_mean": 14146.1,
"valid_targets_min": 4815
},
{
"epoch": 2.409090909090909,
"grad_norm": 0.23950789716559473,
"learning_rate": 2.475628645909576e-05,
"loss": 0.3967,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14730790257453918,
"step": 795,
"valid_targets_mean": 16316.9,
"valid_targets_min": 4600
},
{
"epoch": 2.4242424242424243,
"grad_norm": 0.2194727783836063,
"learning_rate": 2.4550538525461963e-05,
"loss": 0.4029,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15803992748260498,
"step": 800,
"valid_targets_mean": 16688.9,
"valid_targets_min": 6363
},
{
"epoch": 2.4393939393939394,
"grad_norm": 0.20726302913342787,
"learning_rate": 2.434428144201016e-05,
"loss": 0.3994,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14212462306022644,
"step": 805,
"valid_targets_mean": 16110.2,
"valid_targets_min": 4638
},
{
"epoch": 2.4545454545454546,
"grad_norm": 0.23512860005080613,
"learning_rate": 2.4137538286394976e-05,
"loss": 0.4021,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14163710176944733,
"step": 810,
"valid_targets_mean": 17403.3,
"valid_targets_min": 3852
},
{
"epoch": 2.4696969696969697,
"grad_norm": 0.2631327448975476,
"learning_rate": 2.3930332190656604e-05,
"loss": 0.4019,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12924142181873322,
"step": 815,
"valid_targets_mean": 13943.7,
"valid_targets_min": 4675
},
{
"epoch": 2.484848484848485,
"grad_norm": 0.2794719573674816,
"learning_rate": 2.3722686338632602e-05,
"loss": 0.4023,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1343647837638855,
"step": 820,
"valid_targets_mean": 14427.6,
"valid_targets_min": 3926
},
{
"epoch": 2.5,
"grad_norm": 0.25305791183693377,
"learning_rate": 2.3514623963363886e-05,
"loss": 0.3971,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12937594950199127,
"step": 825,
"valid_targets_mean": 15417.3,
"valid_targets_min": 3750
},
{
"epoch": 2.515151515151515,
"grad_norm": 0.22286330070889673,
"learning_rate": 2.330616834449525e-05,
"loss": 0.4024,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14154498279094696,
"step": 830,
"valid_targets_mean": 15068.0,
"valid_targets_min": 5629
},
{
"epoch": 2.5303030303030303,
"grad_norm": 0.27527212660583167,
"learning_rate": 2.309734280567065e-05,
"loss": 0.397,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1428302824497223,
"step": 835,
"valid_targets_mean": 15865.2,
"valid_targets_min": 3471
},
{
"epoch": 2.5454545454545454,
"grad_norm": 0.2013095273828789,
"learning_rate": 2.28881707119236e-05,
"loss": 0.4034,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1361142098903656,
"step": 840,
"valid_targets_mean": 15358.7,
"valid_targets_min": 5102
},
{
"epoch": 2.5606060606060606,
"grad_norm": 0.20518416839823062,
"learning_rate": 2.267867546706287e-05,
"loss": 0.3978,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12422050535678864,
"step": 845,
"valid_targets_mean": 14425.1,
"valid_targets_min": 6548
},
{
"epoch": 2.5757575757575757,
"grad_norm": 0.23015921617387777,
"learning_rate": 2.2468880511053896e-05,
"loss": 0.3996,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13271577656269073,
"step": 850,
"valid_targets_mean": 14928.2,
"valid_targets_min": 3566
},
{
"epoch": 2.590909090909091,
"grad_norm": 0.22763155127997378,
"learning_rate": 2.2258809317396163e-05,
"loss": 0.4005,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13653181493282318,
"step": 855,
"valid_targets_mean": 15972.4,
"valid_targets_min": 6524
},
{
"epoch": 2.606060606060606,
"grad_norm": 0.20780381709855494,
"learning_rate": 2.2048485390496757e-05,
"loss": 0.3993,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1356944590806961,
"step": 860,
"valid_targets_mean": 16200.0,
"valid_targets_min": 4921
},
{
"epoch": 2.621212121212121,
"grad_norm": 0.21150841424868858,
"learning_rate": 2.1837932263040553e-05,
"loss": 0.4028,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13849398493766785,
"step": 865,
"valid_targets_mean": 14841.7,
"valid_targets_min": 5522
},
{
"epoch": 2.6363636363636362,
"grad_norm": 0.19123760683599647,
"learning_rate": 2.1627173493357167e-05,
"loss": 0.3992,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1281318962574005,
"step": 870,
"valid_targets_mean": 14613.7,
"valid_targets_min": 5498
},
{
"epoch": 2.6515151515151514,
"grad_norm": 0.20328337012713912,
"learning_rate": 2.1416232662785084e-05,
"loss": 0.4002,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13393211364746094,
"step": 875,
"valid_targets_mean": 15199.2,
"valid_targets_min": 4813
},
{
"epoch": 2.6666666666666665,
"grad_norm": 0.2106198865750161,
"learning_rate": 2.1205133373033173e-05,
"loss": 0.3987,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12791499495506287,
"step": 880,
"valid_targets_mean": 14866.7,
"valid_targets_min": 3781
},
{
"epoch": 2.6818181818181817,
"grad_norm": 0.20369389176189004,
"learning_rate": 2.0993899243539953e-05,
"loss": 0.3989,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13717877864837646,
"step": 885,
"valid_targets_mean": 14771.3,
"valid_targets_min": 5034
},
{
"epoch": 2.6969696969696972,
"grad_norm": 0.20309062401939268,
"learning_rate": 2.0782553908830887e-05,
"loss": 0.3975,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1538437306880951,
"step": 890,
"valid_targets_mean": 16498.7,
"valid_targets_min": 3952
},
{
"epoch": 2.712121212121212,
"grad_norm": 0.20491160735105923,
"learning_rate": 2.0571121015873924e-05,
"loss": 0.3995,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12048079073429108,
"step": 895,
"valid_targets_mean": 14294.9,
"valid_targets_min": 4430
},
{
"epoch": 2.7272727272727275,
"grad_norm": 0.19593978433356782,
"learning_rate": 2.0359624221433728e-05,
"loss": 0.3978,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1178448349237442,
"step": 900,
"valid_targets_mean": 12869.5,
"valid_targets_min": 5145
},
{
"epoch": 2.742424242424242,
"grad_norm": 0.29488921317193234,
"learning_rate": 2.014808718942476e-05,
"loss": 0.3994,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12907974421977997,
"step": 905,
"valid_targets_mean": 14723.8,
"valid_targets_min": 6297
},
{
"epoch": 2.757575757575758,
"grad_norm": 0.2459744310035717,
"learning_rate": 1.9936533588263557e-05,
"loss": 0.4003,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13112419843673706,
"step": 910,
"valid_targets_mean": 14698.9,
"valid_targets_min": 3593
},
{
"epoch": 2.7727272727272725,
"grad_norm": 0.19945761974335,
"learning_rate": 1.9724987088220565e-05,
"loss": 0.4004,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12379374355077744,
"step": 915,
"valid_targets_mean": 14036.3,
"valid_targets_min": 5990
},
{
"epoch": 2.787878787878788,
"grad_norm": 0.20489817601379762,
"learning_rate": 1.951347135877169e-05,
"loss": 0.3986,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13167142868041992,
"step": 920,
"valid_targets_mean": 15465.9,
"valid_targets_min": 5550
},
{
"epoch": 2.8030303030303028,
"grad_norm": 0.20180242794079498,
"learning_rate": 1.930201006594999e-05,
"loss": 0.3983,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12798179686069489,
"step": 925,
"valid_targets_mean": 14284.5,
"valid_targets_min": 4595
},
{
"epoch": 2.8181818181818183,
"grad_norm": 0.21789914003171218,
"learning_rate": 1.9090626869697714e-05,
"loss": 0.3976,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1261674016714096,
"step": 930,
"valid_targets_mean": 14832.6,
"valid_targets_min": 3916
},
{
"epoch": 2.8333333333333335,
"grad_norm": 0.20986970672170555,
"learning_rate": 1.8879345421219063e-05,
"loss": 0.395,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13479703664779663,
"step": 935,
"valid_targets_mean": 15754.2,
"valid_targets_min": 2824
},
{
"epoch": 2.8484848484848486,
"grad_norm": 0.2232787287586755,
"learning_rate": 1.8668189360333923e-05,
"loss": 0.3995,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14347530901432037,
"step": 940,
"valid_targets_mean": 15561.6,
"valid_targets_min": 3982
},
{
"epoch": 2.8636363636363638,
"grad_norm": 0.22167749659610708,
"learning_rate": 1.845718231283281e-05,
"loss": 0.4025,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12210417538881302,
"step": 945,
"valid_targets_mean": 13407.8,
"valid_targets_min": 4874
},
{
"epoch": 2.878787878787879,
"grad_norm": 0.20761225343562079,
"learning_rate": 1.8246347887833457e-05,
"loss": 0.3966,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13165885210037231,
"step": 950,
"valid_targets_mean": 14655.1,
"valid_targets_min": 3867
},
{
"epoch": 2.893939393939394,
"grad_norm": 0.20391281242171244,
"learning_rate": 1.8035709675139258e-05,
"loss": 0.3966,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14313405752182007,
"step": 955,
"valid_targets_mean": 15892.5,
"valid_targets_min": 3390
},
{
"epoch": 2.909090909090909,
"grad_norm": 0.20034367259133895,
"learning_rate": 1.7825291242599837e-05,
"loss": 0.4008,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14369603991508484,
"step": 960,
"valid_targets_mean": 17404.2,
"valid_targets_min": 8916
},
{
"epoch": 2.9242424242424243,
"grad_norm": 0.18368085390931338,
"learning_rate": 1.7615116133474084e-05,
"loss": 0.4013,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13607482612133026,
"step": 965,
"valid_targets_mean": 15710.0,
"valid_targets_min": 6646
},
{
"epoch": 2.9393939393939394,
"grad_norm": 0.19418731665454458,
"learning_rate": 1.7405207863795966e-05,
"loss": 0.397,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1205495223402977,
"step": 970,
"valid_targets_mean": 13890.4,
"valid_targets_min": 4121
},
{
"epoch": 2.9545454545454546,
"grad_norm": 0.18400200321758117,
"learning_rate": 1.719558991974339e-05,
"loss": 0.3986,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14622147381305695,
"step": 975,
"valid_targets_mean": 16142.8,
"valid_targets_min": 5173
},
{
"epoch": 2.9696969696969697,
"grad_norm": 0.18605958040871526,
"learning_rate": 1.698628575501034e-05,
"loss": 0.3955,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15678343176841736,
"step": 980,
"valid_targets_mean": 17844.7,
"valid_targets_min": 5582
},
{
"epoch": 2.984848484848485,
"grad_norm": 0.18691576935828805,
"learning_rate": 1.6777318788182723e-05,
"loss": 0.4034,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1485750377178192,
"step": 985,
"valid_targets_mean": 16792.2,
"valid_targets_min": 5099
},
{
"epoch": 3.0,
"grad_norm": 0.20524703713347764,
"learning_rate": 1.6568712400118102e-05,
"loss": 0.392,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12552806735038757,
"step": 990,
"valid_targets_mean": 14872.3,
"valid_targets_min": 2839
},
{
"epoch": 3.015151515151515,
"grad_norm": 0.20201499679456203,
"learning_rate": 1.636048993132969e-05,
"loss": 0.386,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12483127415180206,
"step": 995,
"valid_targets_mean": 14800.0,
"valid_targets_min": 6232
},
{
"epoch": 3.0303030303030303,
"grad_norm": 0.2139699492207323,
"learning_rate": 1.615267467937479e-05,
"loss": 0.3863,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13610896468162537,
"step": 1000,
"valid_targets_mean": 15750.8,
"valid_targets_min": 2792
},
{
"epoch": 3.0454545454545454,
"grad_norm": 0.18244176766072634,
"learning_rate": 1.59452898962481e-05,
"loss": 0.3856,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14060482382774353,
"step": 1005,
"valid_targets_mean": 16487.1,
"valid_targets_min": 5947
},
{
"epoch": 3.0606060606060606,
"grad_norm": 0.23920001444657255,
"learning_rate": 1.573835878578013e-05,
"loss": 0.3942,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12809117138385773,
"step": 1010,
"valid_targets_mean": 15580.5,
"valid_targets_min": 3920
},
{
"epoch": 3.0757575757575757,
"grad_norm": 0.239805468147421,
"learning_rate": 1.5531904501040917e-05,
"loss": 0.3869,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13520100712776184,
"step": 1015,
"valid_targets_mean": 15625.3,
"valid_targets_min": 3340
},
{
"epoch": 3.090909090909091,
"grad_norm": 0.19295973970998992,
"learning_rate": 1.5325950141749522e-05,
"loss": 0.3839,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14272280037403107,
"step": 1020,
"valid_targets_mean": 16487.4,
"valid_targets_min": 7824
},
{
"epoch": 3.106060606060606,
"grad_norm": 0.20762988883475636,
"learning_rate": 1.5120518751689438e-05,
"loss": 0.3886,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12633001804351807,
"step": 1025,
"valid_targets_mean": 15774.8,
"valid_targets_min": 3584
},
{
"epoch": 3.121212121212121,
"grad_norm": 0.21762044442781273,
"learning_rate": 1.4915633316130267e-05,
"loss": 0.3815,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13853636384010315,
"step": 1030,
"valid_targets_mean": 16518.2,
"valid_targets_min": 5029
},
{
"epoch": 3.1363636363636362,
"grad_norm": 0.22228023340753078,
"learning_rate": 1.4711316759255963e-05,
"loss": 0.3843,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1272241175174713,
"step": 1035,
"valid_targets_mean": 13752.9,
"valid_targets_min": 3877
},
{
"epoch": 3.1515151515151514,
"grad_norm": 0.18759116521143898,
"learning_rate": 1.450759194159987e-05,
"loss": 0.3878,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1292319893836975,
"step": 1040,
"valid_targets_mean": 15550.9,
"valid_targets_min": 3552
},
{
"epoch": 3.1666666666666665,
"grad_norm": 0.22202339372765256,
"learning_rate": 1.4304481657486955e-05,
"loss": 0.3874,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13702820241451263,
"step": 1045,
"valid_targets_mean": 16725.6,
"valid_targets_min": 7579
},
{
"epoch": 3.1818181818181817,
"grad_norm": 0.20389491383532993,
"learning_rate": 1.4102008632483344e-05,
"loss": 0.383,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12035155296325684,
"step": 1050,
"valid_targets_mean": 13422.4,
"valid_targets_min": 3566
},
{
"epoch": 3.196969696969697,
"grad_norm": 0.1994171138487457,
"learning_rate": 1.3900195520853628e-05,
"loss": 0.3835,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1312486231327057,
"step": 1055,
"valid_targets_mean": 14952.1,
"valid_targets_min": 4675
},
{
"epoch": 3.212121212121212,
"grad_norm": 0.1797074361183551,
"learning_rate": 1.3699064903026149e-05,
"loss": 0.3847,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12870073318481445,
"step": 1060,
"valid_targets_mean": 15102.5,
"valid_targets_min": 5087
},
{
"epoch": 3.227272727272727,
"grad_norm": 0.18502274249189762,
"learning_rate": 1.34986392830665e-05,
"loss": 0.3854,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1210193783044815,
"step": 1065,
"valid_targets_mean": 13492.0,
"valid_targets_min": 4595
},
{
"epoch": 3.242424242424242,
"grad_norm": 0.1838392123173521,
"learning_rate": 1.3298941086159598e-05,
"loss": 0.3861,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12977665662765503,
"step": 1070,
"valid_targets_mean": 14951.2,
"valid_targets_min": 3850
},
{
"epoch": 3.257575757575758,
"grad_norm": 0.1860250488205784,
"learning_rate": 1.3099992656100592e-05,
"loss": 0.381,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1121118813753128,
"step": 1075,
"valid_targets_mean": 13794.9,
"valid_targets_min": 4445
},
{
"epoch": 3.2727272727272725,
"grad_norm": 0.1906435044849517,
"learning_rate": 1.2901816252794848e-05,
"loss": 0.3837,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12230115383863449,
"step": 1080,
"valid_targets_mean": 14962.3,
"valid_targets_min": 6351
},
{
"epoch": 3.287878787878788,
"grad_norm": 0.19806332929237902,
"learning_rate": 1.2704434049767356e-05,
"loss": 0.387,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1422664374113083,
"step": 1085,
"valid_targets_mean": 17608.7,
"valid_targets_min": 6032
},
{
"epoch": 3.303030303030303,
"grad_norm": 0.1835996076247742,
"learning_rate": 1.250786813168176e-05,
"loss": 0.3853,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13839831948280334,
"step": 1090,
"valid_targets_mean": 15443.4,
"valid_targets_min": 5234
},
{
"epoch": 3.3181818181818183,
"grad_norm": 0.21384278054170996,
"learning_rate": 1.2312140491869369e-05,
"loss": 0.3835,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1129048764705658,
"step": 1095,
"valid_targets_mean": 13407.2,
"valid_targets_min": 2746
},
{
"epoch": 3.3333333333333335,
"grad_norm": 0.1818098211773201,
"learning_rate": 1.2117273029868362e-05,
"loss": 0.3855,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12763696908950806,
"step": 1100,
"valid_targets_mean": 14486.6,
"valid_targets_min": 6386
},
{
"epoch": 3.3484848484848486,
"grad_norm": 0.20710370150037746,
"learning_rate": 1.1923287548973508e-05,
"loss": 0.3845,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11949425935745239,
"step": 1105,
"valid_targets_mean": 15547.2,
"valid_targets_min": 6096
},
{
"epoch": 3.3636363636363638,
"grad_norm": 0.2216599163029605,
"learning_rate": 1.1730205753796631e-05,
"loss": 0.388,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13013041019439697,
"step": 1110,
"valid_targets_mean": 16238.2,
"valid_targets_min": 2701
},
{
"epoch": 3.378787878787879,
"grad_norm": 0.16276094809715302,
"learning_rate": 1.1538049247838128e-05,
"loss": 0.3865,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1291944533586502,
"step": 1115,
"valid_targets_mean": 16817.2,
"valid_targets_min": 5225
},
{
"epoch": 3.393939393939394,
"grad_norm": 0.1826167977343321,
"learning_rate": 1.134683953106983e-05,
"loss": 0.3855,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11070194095373154,
"step": 1120,
"valid_targets_mean": 13068.9,
"valid_targets_min": 3355
},
{
"epoch": 3.409090909090909,
"grad_norm": 0.18012880538402126,
"learning_rate": 1.115659799752938e-05,
"loss": 0.3824,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13216200470924377,
"step": 1125,
"valid_targets_mean": 15118.6,
"valid_targets_min": 6631
},
{
"epoch": 3.4242424242424243,
"grad_norm": 0.18366888272284204,
"learning_rate": 1.096734593292649e-05,
"loss": 0.3817,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.132125124335289,
"step": 1130,
"valid_targets_mean": 16672.8,
"valid_targets_min": 6299
},
{
"epoch": 3.4393939393939394,
"grad_norm": 0.20226724300959517,
"learning_rate": 1.077910451226138e-05,
"loss": 0.388,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13653744757175446,
"step": 1135,
"valid_targets_mean": 15401.3,
"valid_targets_min": 2991
},
{
"epoch": 3.4545454545454546,
"grad_norm": 0.2069362208879119,
"learning_rate": 1.0591894797455526e-05,
"loss": 0.3895,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12484444677829742,
"step": 1140,
"valid_targets_mean": 14162.0,
"valid_targets_min": 5335
},
{
"epoch": 3.4696969696969697,
"grad_norm": 0.1830668026563319,
"learning_rate": 1.0405737734995083e-05,
"loss": 0.3889,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12676018476486206,
"step": 1145,
"valid_targets_mean": 14280.0,
"valid_targets_min": 3472
},
{
"epoch": 3.484848484848485,
"grad_norm": 0.19111910951479347,
"learning_rate": 1.0220654153587225e-05,
"loss": 0.3868,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12387235462665558,
"step": 1150,
"valid_targets_mean": 14188.9,
"valid_targets_min": 3640
},
{
"epoch": 3.5,
"grad_norm": 0.18368088740310942,
"learning_rate": 1.00366647618297e-05,
"loss": 0.3827,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10632310807704926,
"step": 1155,
"valid_targets_mean": 13244.9,
"valid_targets_min": 4697
},
{
"epoch": 3.515151515151515,
"grad_norm": 0.17788062044386901,
"learning_rate": 9.853790145893742e-06,
"loss": 0.39,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12414000928401947,
"step": 1160,
"valid_targets_mean": 14982.0,
"valid_targets_min": 3896
},
{
"epoch": 3.5303030303030303,
"grad_norm": 0.1784902396662646,
"learning_rate": 9.672050767220765e-06,
"loss": 0.3851,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1390392780303955,
"step": 1165,
"valid_targets_mean": 16485.2,
"valid_targets_min": 6081
},
{
"epoch": 3.5454545454545454,
"grad_norm": 0.1946533441284041,
"learning_rate": 9.491466960232955e-06,
"loss": 0.3851,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12741807103157043,
"step": 1170,
"valid_targets_mean": 15234.1,
"valid_targets_min": 1570
},
{
"epoch": 3.5606060606060606,
"grad_norm": 0.1927912363049172,
"learning_rate": 9.312058930058114e-06,
"loss": 0.3908,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13081388175487518,
"step": 1175,
"valid_targets_mean": 15956.8,
"valid_targets_min": 3704
},
{
"epoch": 3.5757575757575757,
"grad_norm": 0.1784659315169825,
"learning_rate": 9.133846750268945e-06,
"loss": 0.3863,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12337636202573776,
"step": 1180,
"valid_targets_mean": 14114.9,
"valid_targets_min": 4155
},
{
"epoch": 3.590909090909091,
"grad_norm": 0.17640977231862776,
"learning_rate": 8.956850360637046e-06,
"loss": 0.3845,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14645136892795563,
"step": 1185,
"valid_targets_mean": 16319.7,
"valid_targets_min": 4803
},
{
"epoch": 3.606060606060606,
"grad_norm": 0.16321215876127312,
"learning_rate": 8.78108956490194e-06,
"loss": 0.3872,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1280919909477234,
"step": 1190,
"valid_targets_mean": 15500.8,
"valid_targets_min": 4314
},
{
"epoch": 3.621212121212121,
"grad_norm": 0.1680205179173797,
"learning_rate": 8.606584028555225e-06,
"loss": 0.384,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12240514159202576,
"step": 1195,
"valid_targets_mean": 14351.5,
"valid_targets_min": 3453
},
{
"epoch": 3.6363636363636362,
"grad_norm": 0.19907785765048092,
"learning_rate": 8.43335327664027e-06,
"loss": 0.3913,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11995619535446167,
"step": 1200,
"valid_targets_mean": 15079.6,
"valid_targets_min": 4701
},
{
"epoch": 3.6515151515151514,
"grad_norm": 0.21371967500820704,
"learning_rate": 8.261416691567601e-06,
"loss": 0.3753,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13895614445209503,
"step": 1205,
"valid_targets_mean": 16545.2,
"valid_targets_min": 5321
},
{
"epoch": 3.6666666666666665,
"grad_norm": 0.19506572014903165,
"learning_rate": 8.090793510946242e-06,
"loss": 0.3803,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12509584426879883,
"step": 1210,
"valid_targets_mean": 15488.8,
"valid_targets_min": 4785
},
{
"epoch": 3.6818181818181817,
"grad_norm": 0.19064347792276345,
"learning_rate": 7.921502825431258e-06,
"loss": 0.3787,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12347602099180222,
"step": 1215,
"valid_targets_mean": 14360.5,
"valid_targets_min": 3511
},
{
"epoch": 3.6969696969696972,
"grad_norm": 0.182287532948687,
"learning_rate": 7.753563576587753e-06,
"loss": 0.3766,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11818146705627441,
"step": 1220,
"valid_targets_mean": 13722.8,
"valid_targets_min": 5643
},
{
"epoch": 3.712121212121212,
"grad_norm": 0.17895536092055953,
"learning_rate": 7.5869945547715275e-06,
"loss": 0.3769,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12111832946538925,
"step": 1225,
"valid_targets_mean": 14735.2,
"valid_targets_min": 3745
},
{
"epoch": 3.7272727272727275,
"grad_norm": 0.16815427567438399,
"learning_rate": 7.421814397026674e-06,
"loss": 0.3802,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1355920135974884,
"step": 1230,
"valid_targets_mean": 17361.2,
"valid_targets_min": 4950
},
{
"epoch": 3.742424242424242,
"grad_norm": 0.1771976885465761,
"learning_rate": 7.258041585000317e-06,
"loss": 0.3769,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1293790638446808,
"step": 1235,
"valid_targets_mean": 15619.1,
"valid_targets_min": 6005
},
{
"epoch": 3.757575757575758,
"grad_norm": 0.17415525773658588,
"learning_rate": 7.095694442874743e-06,
"loss": 0.3756,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1277199685573578,
"step": 1240,
"valid_targets_mean": 15341.6,
"valid_targets_min": 5202
},
{
"epoch": 3.7727272727272725,
"grad_norm": 0.15865554989600855,
"learning_rate": 6.934791135317147e-06,
"loss": 0.376,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1316549926996231,
"step": 1245,
"valid_targets_mean": 16527.7,
"valid_targets_min": 4140
},
{
"epoch": 3.787878787878788,
"grad_norm": 0.16688937849086424,
"learning_rate": 6.775349665447222e-06,
"loss": 0.3802,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13643357157707214,
"step": 1250,
"valid_targets_mean": 15461.1,
"valid_targets_min": 3340
},
{
"epoch": 3.8030303030303028,
"grad_norm": 0.16416148418612428,
"learning_rate": 6.617387872822842e-06,
"loss": 0.3785,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13461175560951233,
"step": 1255,
"valid_targets_mean": 16171.8,
"valid_targets_min": 7126
},
{
"epoch": 3.8181818181818183,
"grad_norm": 0.16313507588756762,
"learning_rate": 6.460923431444015e-06,
"loss": 0.3816,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12053517997264862,
"step": 1260,
"valid_targets_mean": 14434.2,
"valid_targets_min": 2628
},
{
"epoch": 3.8333333333333335,
"grad_norm": 0.16325464900607975,
"learning_rate": 6.305973847775406e-06,
"loss": 0.3717,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1159006804227829,
"step": 1265,
"valid_targets_mean": 15667.6,
"valid_targets_min": 5195
},
{
"epoch": 3.8484848484848486,
"grad_norm": 0.18254604746396977,
"learning_rate": 6.152556458787546e-06,
"loss": 0.3753,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12738105654716492,
"step": 1270,
"valid_targets_mean": 13737.2,
"valid_targets_min": 3769
},
{
"epoch": 3.8636363636363638,
"grad_norm": 0.16908110201722565,
"learning_rate": 6.000688430017048e-06,
"loss": 0.3785,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11613453924655914,
"step": 1275,
"valid_targets_mean": 14516.1,
"valid_targets_min": 4159
},
{
"epoch": 3.878787878787879,
"grad_norm": 0.20048809597109035,
"learning_rate": 5.850386753645998e-06,
"loss": 0.3774,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10215884447097778,
"step": 1280,
"valid_targets_mean": 12381.9,
"valid_targets_min": 3785
},
{
"epoch": 3.893939393939394,
"grad_norm": 0.16139526738714188,
"learning_rate": 5.701668246600731e-06,
"loss": 0.3781,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1251106560230255,
"step": 1285,
"valid_targets_mean": 15067.7,
"valid_targets_min": 2642
},
{
"epoch": 3.909090909090909,
"grad_norm": 0.157151377694226,
"learning_rate": 5.554549548670227e-06,
"loss": 0.3786,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13455966114997864,
"step": 1290,
"valid_targets_mean": 17651.8,
"valid_targets_min": 6461
},
{
"epoch": 3.9242424242424243,
"grad_norm": 0.15702018588442043,
"learning_rate": 5.409047120644307e-06,
"loss": 0.3759,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10969670116901398,
"step": 1295,
"valid_targets_mean": 14103.6,
"valid_targets_min": 4383
},
{
"epoch": 3.9393939393939394,
"grad_norm": 0.18062443113496127,
"learning_rate": 5.265177242471899e-06,
"loss": 0.3782,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10825863480567932,
"step": 1300,
"valid_targets_mean": 13964.7,
"valid_targets_min": 5266
},
{
"epoch": 3.9545454545454546,
"grad_norm": 0.1726265649492319,
"learning_rate": 5.122956011439486e-06,
"loss": 0.3782,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1273096799850464,
"step": 1305,
"valid_targets_mean": 14422.9,
"valid_targets_min": 4184
},
{
"epoch": 3.9696969696969697,
"grad_norm": 0.1638570708647358,
"learning_rate": 4.982399340370017e-06,
"loss": 0.3788,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12474849075078964,
"step": 1310,
"valid_targets_mean": 15338.0,
"valid_targets_min": 4021
},
{
"epoch": 3.984848484848485,
"grad_norm": 0.15821455878356205,
"learning_rate": 4.843522955842464e-06,
"loss": 0.3745,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11333755403757095,
"step": 1315,
"valid_targets_mean": 14689.9,
"valid_targets_min": 7070
},
{
"epoch": 4.0,
"grad_norm": 0.18144836630716482,
"learning_rate": 4.706342396432213e-06,
"loss": 0.3722,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12877650558948517,
"step": 1320,
"valid_targets_mean": 15349.0,
"valid_targets_min": 4947
},
{
"epoch": 4.015151515151516,
"grad_norm": 0.1665315993678895,
"learning_rate": 4.570873010972477e-06,
"loss": 0.3799,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11058478057384491,
"step": 1325,
"valid_targets_mean": 13188.7,
"valid_targets_min": 4378
},
{
"epoch": 4.03030303030303,
"grad_norm": 0.16355510134396425,
"learning_rate": 4.43712995683695e-06,
"loss": 0.3776,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13307559490203857,
"step": 1330,
"valid_targets_mean": 14841.8,
"valid_targets_min": 2461
},
{
"epoch": 4.045454545454546,
"grad_norm": 0.16568197826559009,
"learning_rate": 4.305128198243888e-06,
"loss": 0.3816,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13005071878433228,
"step": 1335,
"valid_targets_mean": 16517.5,
"valid_targets_min": 5013
},
{
"epoch": 4.0606060606060606,
"grad_norm": 0.1515725227529857,
"learning_rate": 4.174882504581794e-06,
"loss": 0.3758,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12472750246524811,
"step": 1340,
"valid_targets_mean": 16017.4,
"valid_targets_min": 5848
},
{
"epoch": 4.075757575757576,
"grad_norm": 0.1560043296087169,
"learning_rate": 4.046407448756895e-06,
"loss": 0.3721,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1273220330476761,
"step": 1345,
"valid_targets_mean": 15190.7,
"valid_targets_min": 4258
},
{
"epoch": 4.090909090909091,
"grad_norm": 0.16171551990487193,
"learning_rate": 3.91971740556262e-06,
"loss": 0.3765,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11427200585603714,
"step": 1350,
"valid_targets_mean": 13788.2,
"valid_targets_min": 2628
},
{
"epoch": 4.106060606060606,
"grad_norm": 0.1621886915892531,
"learning_rate": 3.7948265500712313e-06,
"loss": 0.3787,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1313973218202591,
"step": 1355,
"valid_targets_mean": 15764.5,
"valid_targets_min": 6736
},
{
"epoch": 4.121212121212121,
"grad_norm": 0.15216405342287734,
"learning_rate": 3.6717488560478096e-06,
"loss": 0.3784,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13022267818450928,
"step": 1360,
"valid_targets_mean": 16802.9,
"valid_targets_min": 6235
},
{
"epoch": 4.136363636363637,
"grad_norm": 0.16451681783578678,
"learning_rate": 3.5504980943867538e-06,
"loss": 0.3751,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13295704126358032,
"step": 1365,
"valid_targets_mean": 16706.9,
"valid_targets_min": 7398
},
{
"epoch": 4.151515151515151,
"grad_norm": 0.15362560588681468,
"learning_rate": 3.4310878315710074e-06,
"loss": 0.3785,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1346280872821808,
"step": 1370,
"valid_targets_mean": 17013.3,
"valid_targets_min": 4415
},
{
"epoch": 4.166666666666667,
"grad_norm": 0.1637770046947873,
"learning_rate": 3.3135314281540954e-06,
"loss": 0.3743,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13173696398735046,
"step": 1375,
"valid_targets_mean": 15431.9,
"valid_targets_min": 5145
},
{
"epoch": 4.181818181818182,
"grad_norm": 0.15262780010059376,
"learning_rate": 3.1978420372652776e-06,
"loss": 0.375,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13413605093955994,
"step": 1380,
"valid_targets_mean": 15826.1,
"valid_targets_min": 6007
},
{
"epoch": 4.196969696969697,
"grad_norm": 0.15750588369933005,
"learning_rate": 3.084032603137852e-06,
"loss": 0.3805,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1388297826051712,
"step": 1385,
"valid_targets_mean": 16882.4,
"valid_targets_min": 2141
},
{
"epoch": 4.212121212121212,
"grad_norm": 0.15951360984080548,
"learning_rate": 2.9721158596608622e-06,
"loss": 0.3769,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12477941066026688,
"step": 1390,
"valid_targets_mean": 14877.5,
"valid_targets_min": 4983
},
{
"epoch": 4.2272727272727275,
"grad_norm": 0.15989308204374056,
"learning_rate": 2.8621043289543314e-06,
"loss": 0.3784,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12433609366416931,
"step": 1395,
"valid_targets_mean": 14682.3,
"valid_targets_min": 4653
},
{
"epoch": 4.242424242424242,
"grad_norm": 0.15862256305188707,
"learning_rate": 2.754010319968181e-06,
"loss": 0.3794,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1250750571489334,
"step": 1400,
"valid_targets_mean": 15762.9,
"valid_targets_min": 5095
},
{
"epoch": 4.257575757575758,
"grad_norm": 0.144033968194764,
"learning_rate": 2.647845927105015e-06,
"loss": 0.378,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13507190346717834,
"step": 1405,
"valid_targets_mean": 17072.8,
"valid_targets_min": 3713
},
{
"epoch": 4.2727272727272725,
"grad_norm": 0.15293749452569375,
"learning_rate": 2.543623028866915e-06,
"loss": 0.3811,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12047475576400757,
"step": 1410,
"valid_targets_mean": 14718.1,
"valid_targets_min": 3986
},
{
"epoch": 4.287878787878788,
"grad_norm": 0.1519293936485469,
"learning_rate": 2.4413532865263533e-06,
"loss": 0.3816,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13895002007484436,
"step": 1415,
"valid_targets_mean": 15430.9,
"valid_targets_min": 7704
},
{
"epoch": 4.303030303030303,
"grad_norm": 0.15934597894119953,
"learning_rate": 2.3410481428214602e-06,
"loss": 0.3744,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11791396141052246,
"step": 1420,
"valid_targets_mean": 15282.0,
"valid_targets_min": 5252
},
{
"epoch": 4.318181818181818,
"grad_norm": 0.1565540137275141,
"learning_rate": 2.242718820675718e-06,
"loss": 0.3774,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13728006184101105,
"step": 1425,
"valid_targets_mean": 16404.4,
"valid_targets_min": 5168
},
{
"epoch": 4.333333333333333,
"grad_norm": 0.15300286050496542,
"learning_rate": 2.1463763219422495e-06,
"loss": 0.3766,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13043278455734253,
"step": 1430,
"valid_targets_mean": 15364.1,
"valid_targets_min": 4904
},
{
"epoch": 4.348484848484849,
"grad_norm": 0.15256071826231843,
"learning_rate": 2.0520314261728357e-06,
"loss": 0.3761,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11972136795520782,
"step": 1435,
"valid_targets_mean": 13876.5,
"valid_targets_min": 4299
},
{
"epoch": 4.363636363636363,
"grad_norm": 0.1498901275439139,
"learning_rate": 1.9596946894118306e-06,
"loss": 0.3738,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12425568699836731,
"step": 1440,
"valid_targets_mean": 15735.1,
"valid_targets_min": 6018
},
{
"epoch": 4.378787878787879,
"grad_norm": 0.15525357240670695,
"learning_rate": 1.8693764430150696e-06,
"loss": 0.3743,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11837613582611084,
"step": 1445,
"valid_targets_mean": 14592.8,
"valid_targets_min": 3275
},
{
"epoch": 4.393939393939394,
"grad_norm": 0.14241103294840823,
"learning_rate": 1.7810867924938978e-06,
"loss": 0.3752,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11680356413125992,
"step": 1450,
"valid_targets_mean": 15398.9,
"valid_targets_min": 6343
},
{
"epoch": 4.409090909090909,
"grad_norm": 0.1421325675854043,
"learning_rate": 1.6948356163845048e-06,
"loss": 0.3808,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1380445659160614,
"step": 1455,
"valid_targets_mean": 17377.7,
"valid_targets_min": 6781
},
{
"epoch": 4.424242424242424,
"grad_norm": 0.15004001547168774,
"learning_rate": 1.610632565142627e-06,
"loss": 0.3819,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12191175669431686,
"step": 1460,
"valid_targets_mean": 14812.8,
"valid_targets_min": 5848
},
{
"epoch": 4.4393939393939394,
"grad_norm": 0.1444461782333066,
"learning_rate": 1.5284870600637813e-06,
"loss": 0.3773,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12871713936328888,
"step": 1465,
"valid_targets_mean": 15949.8,
"valid_targets_min": 7108
},
{
"epoch": 4.454545454545454,
"grad_norm": 0.1489245036934525,
"learning_rate": 1.4484082922291376e-06,
"loss": 0.3777,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12368868291378021,
"step": 1470,
"valid_targets_mean": 16389.0,
"valid_targets_min": 5677
},
{
"epoch": 4.46969696969697,
"grad_norm": 0.1472974113093515,
"learning_rate": 1.3704052214771513e-06,
"loss": 0.3758,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11158767342567444,
"step": 1475,
"valid_targets_mean": 13701.0,
"valid_targets_min": 4705
},
{
"epoch": 4.484848484848484,
"grad_norm": 0.15064209648897592,
"learning_rate": 1.2944865754010682e-06,
"loss": 0.3732,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11993849277496338,
"step": 1480,
"valid_targets_mean": 14086.5,
"valid_targets_min": 5697
},
{
"epoch": 4.5,
"grad_norm": 0.1439829400641997,
"learning_rate": 1.2206608483724013e-06,
"loss": 0.3755,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1392856389284134,
"step": 1485,
"valid_targets_mean": 16722.5,
"valid_targets_min": 4973
},
{
"epoch": 4.515151515151516,
"grad_norm": 0.14654997539211856,
"learning_rate": 1.1489363005905241e-06,
"loss": 0.3746,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1073300838470459,
"step": 1490,
"valid_targets_mean": 13875.4,
"valid_targets_min": 3778
},
{
"epoch": 4.53030303030303,
"grad_norm": 0.1450634174975392,
"learning_rate": 1.0793209571584562e-06,
"loss": 0.3847,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13987794518470764,
"step": 1495,
"valid_targets_mean": 16870.2,
"valid_targets_min": 5447
},
{
"epoch": 4.545454545454545,
"grad_norm": 0.1466804723012743,
"learning_rate": 1.0118226071849424e-06,
"loss": 0.3761,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12252548336982727,
"step": 1500,
"valid_targets_mean": 14106.8,
"valid_targets_min": 4578
},
{
"epoch": 4.5606060606060606,
"grad_norm": 0.16197123748968928,
"learning_rate": 9.464488029129581e-07,
"loss": 0.3792,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14925982058048248,
"step": 1505,
"valid_targets_mean": 18311.2,
"valid_targets_min": 8146
},
{
"epoch": 4.575757575757576,
"grad_norm": 0.14327511237462578,
"learning_rate": 8.832068588746945e-07,
"loss": 0.3725,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1239233985543251,
"step": 1510,
"valid_targets_mean": 16076.7,
"valid_targets_min": 3317
},
{
"epoch": 4.590909090909091,
"grad_norm": 0.14823288821280842,
"learning_rate": 8.221038510731704e-07,
"loss": 0.3746,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1282537877559662,
"step": 1515,
"valid_targets_mean": 14536.7,
"valid_targets_min": 3646
},
{
"epoch": 4.606060606060606,
"grad_norm": 0.14105179064235956,
"learning_rate": 7.631466161904821e-07,
"loss": 0.3749,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12889966368675232,
"step": 1520,
"valid_targets_mean": 16155.5,
"valid_targets_min": 6424
},
{
"epoch": 4.621212121212121,
"grad_norm": 0.14818970968513234,
"learning_rate": 7.063417508228876e-07,
"loss": 0.3768,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1282745897769928,
"step": 1525,
"valid_targets_mean": 15349.0,
"valid_targets_min": 6302
},
{
"epoch": 4.636363636363637,
"grad_norm": 0.14449126106516016,
"learning_rate": 6.516956107427241e-07,
"loss": 0.3734,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12247820198535919,
"step": 1530,
"valid_targets_mean": 15549.9,
"valid_targets_min": 4505
},
{
"epoch": 4.651515151515151,
"grad_norm": 0.15005279315436212,
"learning_rate": 5.992143101872638e-07,
"loss": 0.3839,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11950863152742386,
"step": 1535,
"valid_targets_mean": 13700.6,
"valid_targets_min": 3590
},
{
"epoch": 4.666666666666667,
"grad_norm": 0.14661165136036405,
"learning_rate": 5.489037211746184e-07,
"loss": 0.3804,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11654527485370636,
"step": 1540,
"valid_targets_mean": 14126.2,
"valid_targets_min": 5009
},
{
"epoch": 4.681818181818182,
"grad_norm": 0.14432957174327468,
"learning_rate": 5.007694728467228e-07,
"loss": 0.377,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12720376253128052,
"step": 1545,
"valid_targets_mean": 14868.3,
"valid_targets_min": 4415
},
{
"epoch": 4.696969696969697,
"grad_norm": 0.13809526163891875,
"learning_rate": 4.548169508395028e-07,
"loss": 0.3775,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12525103986263275,
"step": 1550,
"valid_targets_mean": 16000.2,
"valid_targets_min": 4921
},
{
"epoch": 4.712121212121212,
"grad_norm": 0.14070611024208945,
"learning_rate": 4.1105129668029595e-07,
"loss": 0.3809,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1342814564704895,
"step": 1555,
"valid_targets_mean": 17020.2,
"valid_targets_min": 7375
},
{
"epoch": 4.7272727272727275,
"grad_norm": 0.1472076939859629,
"learning_rate": 3.6947740721257066e-07,
"loss": 0.3781,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12940089404582977,
"step": 1560,
"valid_targets_mean": 15519.5,
"valid_targets_min": 4962
},
{
"epoch": 4.742424242424242,
"grad_norm": 0.1370972068518527,
"learning_rate": 3.3009993404802486e-07,
"loss": 0.3787,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12228408455848694,
"step": 1565,
"valid_targets_mean": 14750.1,
"valid_targets_min": 3171
},
{
"epoch": 4.757575757575758,
"grad_norm": 0.13755006199041775,
"learning_rate": 2.929232830461404e-07,
"loss": 0.376,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1286831796169281,
"step": 1570,
"valid_targets_mean": 15723.0,
"valid_targets_min": 5089
},
{
"epoch": 4.7727272727272725,
"grad_norm": 0.14560076346202475,
"learning_rate": 2.579516138212101e-07,
"loss": 0.3793,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13214346766471863,
"step": 1575,
"valid_targets_mean": 15091.0,
"valid_targets_min": 2044
},
{
"epoch": 4.787878787878788,
"grad_norm": 0.14612474449230148,
"learning_rate": 2.2518883927692857e-07,
"loss": 0.3773,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1365845948457718,
"step": 1580,
"valid_targets_mean": 15611.6,
"valid_targets_min": 4544
},
{
"epoch": 4.803030303030303,
"grad_norm": 0.14192509401084216,
"learning_rate": 1.9463862516859277e-07,
"loss": 0.3766,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11990797519683838,
"step": 1585,
"valid_targets_mean": 14435.0,
"valid_targets_min": 2745
},
{
"epoch": 4.818181818181818,
"grad_norm": 0.14022031514520483,
"learning_rate": 1.6630438969294615e-07,
"loss": 0.3785,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14073169231414795,
"step": 1590,
"valid_targets_mean": 18670.8,
"valid_targets_min": 6489
},
{
"epoch": 4.833333333333333,
"grad_norm": 0.14572269457660778,
"learning_rate": 1.4018930310571553e-07,
"loss": 0.3781,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1340230405330658,
"step": 1595,
"valid_targets_mean": 15346.7,
"valid_targets_min": 7399
},
{
"epoch": 4.848484848484849,
"grad_norm": 0.14668101679572776,
"learning_rate": 1.1629628736690824e-07,
"loss": 0.3727,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11541043221950531,
"step": 1600,
"valid_targets_mean": 13274.7,
"valid_targets_min": 3833
},
{
"epoch": 4.863636363636363,
"grad_norm": 0.14319080604843668,
"learning_rate": 9.46280158138757e-08,
"loss": 0.3751,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13603129982948303,
"step": 1605,
"valid_targets_mean": 15658.5,
"valid_targets_min": 7066
},
{
"epoch": 4.878787878787879,
"grad_norm": 0.14301657158586537,
"learning_rate": 7.518691286220625e-08,
"loss": 0.3834,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13067832589149475,
"step": 1610,
"valid_targets_mean": 14509.7,
"valid_targets_min": 3622
},
{
"epoch": 4.893939393939394,
"grad_norm": 0.14024618123182978,
"learning_rate": 5.797515373445084e-08,
"loss": 0.375,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11804800480604172,
"step": 1615,
"valid_targets_mean": 14769.3,
"valid_targets_min": 3072
},
{
"epoch": 4.909090909090909,
"grad_norm": 0.1569062465599826,
"learning_rate": 4.299466421675113e-08,
"loss": 0.374,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12300266325473785,
"step": 1620,
"valid_targets_mean": 14378.3,
"valid_targets_min": 1594
},
{
"epoch": 4.924242424242424,
"grad_norm": 0.14063309925412415,
"learning_rate": 3.0247120443362976e-08,
"loss": 0.3778,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12379644811153412,
"step": 1625,
"valid_targets_mean": 16364.5,
"valid_targets_min": 5420
},
{
"epoch": 4.9393939393939394,
"grad_norm": 0.13865249610748842,
"learning_rate": 1.973394870912193e-08,
"loss": 0.3823,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11910425871610641,
"step": 1630,
"valid_targets_mean": 14783.9,
"valid_targets_min": 4717
},
{
"epoch": 4.954545454545455,
"grad_norm": 0.16786783713364176,
"learning_rate": 1.145632530985541e-08,
"loss": 0.3814,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12280435860157013,
"step": 1635,
"valid_targets_mean": 15160.9,
"valid_targets_min": 2570
},
{
"epoch": 4.96969696969697,
"grad_norm": 0.1446243387874453,
"learning_rate": 5.415176410765721e-09,
"loss": 0.3775,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13177569210529327,
"step": 1640,
"valid_targets_mean": 14764.5,
"valid_targets_min": 4690
},
{
"epoch": 4.984848484848484,
"grad_norm": 0.13388250594773463,
"learning_rate": 1.611177942812958e-09,
"loss": 0.3779,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11846122145652771,
"step": 1645,
"valid_targets_mean": 15252.2,
"valid_targets_min": 6161
},
{
"epoch": 5.0,
"grad_norm": 0.1337804023582898,
"learning_rate": 4.475552707772224e-11,
"loss": 0.3752,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12205812335014343,
"step": 1650,
"valid_targets_mean": 15604.5,
"valid_targets_min": 4811
},
{
"epoch": 5.0,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12205812335014343,
"step": 1650,
"total_flos": 9.560237722870743e+18,
"train_loss": 0.10292447277993867,
"train_runtime": 13793.2861,
"train_samples_per_second": 11.477,
"train_steps_per_second": 0.12,
"valid_targets_mean": 15604.5,
"valid_targets_min": 4811
}
],
"logging_steps": 5,
"max_steps": 1650,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 300,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9.560237722870743e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}