9651 lines
268 KiB
JSON
9651 lines
268 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4368,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.00801924619085806,
|
|
"grad_norm": 25.93893709425859,
|
|
"learning_rate": 3.661327231121282e-07,
|
|
"loss": 0.8949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44168591499328613,
|
|
"step": 5,
|
|
"valid_targets_mean": 4533.4,
|
|
"valid_targets_min": 3755
|
|
},
|
|
{
|
|
"epoch": 0.01603849238171612,
|
|
"grad_norm": 25.718698095236213,
|
|
"learning_rate": 8.237986270022884e-07,
|
|
"loss": 0.8763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42450276017189026,
|
|
"step": 10,
|
|
"valid_targets_mean": 4418.2,
|
|
"valid_targets_min": 3704
|
|
},
|
|
{
|
|
"epoch": 0.024057738572574178,
|
|
"grad_norm": 23.47097415796801,
|
|
"learning_rate": 1.2814645308924487e-06,
|
|
"loss": 0.8519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46040844917297363,
|
|
"step": 15,
|
|
"valid_targets_mean": 4440.6,
|
|
"valid_targets_min": 3496
|
|
},
|
|
{
|
|
"epoch": 0.03207698476343224,
|
|
"grad_norm": 15.95704975059495,
|
|
"learning_rate": 1.7391304347826088e-06,
|
|
"loss": 0.7995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3881476819515228,
|
|
"step": 20,
|
|
"valid_targets_mean": 4266.5,
|
|
"valid_targets_min": 3498
|
|
},
|
|
{
|
|
"epoch": 0.040096230954290296,
|
|
"grad_norm": 8.24928516974332,
|
|
"learning_rate": 2.196796338672769e-06,
|
|
"loss": 0.7075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3142762780189514,
|
|
"step": 25,
|
|
"valid_targets_mean": 4273.2,
|
|
"valid_targets_min": 3370
|
|
},
|
|
{
|
|
"epoch": 0.048115477145148355,
|
|
"grad_norm": 4.822695848233011,
|
|
"learning_rate": 2.654462242562929e-06,
|
|
"loss": 0.6662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31888872385025024,
|
|
"step": 30,
|
|
"valid_targets_mean": 4276.8,
|
|
"valid_targets_min": 2977
|
|
},
|
|
{
|
|
"epoch": 0.056134723336006415,
|
|
"grad_norm": 2.6631334530001625,
|
|
"learning_rate": 3.1121281464530894e-06,
|
|
"loss": 0.6118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3037104308605194,
|
|
"step": 35,
|
|
"valid_targets_mean": 4417.9,
|
|
"valid_targets_min": 3211
|
|
},
|
|
{
|
|
"epoch": 0.06415396952686447,
|
|
"grad_norm": 1.8825692737354132,
|
|
"learning_rate": 3.56979405034325e-06,
|
|
"loss": 0.5785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2887840270996094,
|
|
"step": 40,
|
|
"valid_targets_mean": 4329.6,
|
|
"valid_targets_min": 3842
|
|
},
|
|
{
|
|
"epoch": 0.07217321571772253,
|
|
"grad_norm": 1.5160486309892962,
|
|
"learning_rate": 4.0274599542334094e-06,
|
|
"loss": 0.5647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.271957665681839,
|
|
"step": 45,
|
|
"valid_targets_mean": 4685.4,
|
|
"valid_targets_min": 4147
|
|
},
|
|
{
|
|
"epoch": 0.08019246190858059,
|
|
"grad_norm": 1.3330617549187238,
|
|
"learning_rate": 4.48512585812357e-06,
|
|
"loss": 0.5602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3069368004798889,
|
|
"step": 50,
|
|
"valid_targets_mean": 4644.8,
|
|
"valid_targets_min": 3828
|
|
},
|
|
{
|
|
"epoch": 0.08821170809943865,
|
|
"grad_norm": 1.0453758337335766,
|
|
"learning_rate": 4.94279176201373e-06,
|
|
"loss": 0.5395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2378009706735611,
|
|
"step": 55,
|
|
"valid_targets_mean": 4015.2,
|
|
"valid_targets_min": 3083
|
|
},
|
|
{
|
|
"epoch": 0.09623095429029671,
|
|
"grad_norm": 0.8198296766691122,
|
|
"learning_rate": 5.400457665903891e-06,
|
|
"loss": 0.5019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23031440377235413,
|
|
"step": 60,
|
|
"valid_targets_mean": 4888.8,
|
|
"valid_targets_min": 4012
|
|
},
|
|
{
|
|
"epoch": 0.10425020048115477,
|
|
"grad_norm": 0.845932476168274,
|
|
"learning_rate": 5.858123569794051e-06,
|
|
"loss": 0.4972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2365550547838211,
|
|
"step": 65,
|
|
"valid_targets_mean": 3945.9,
|
|
"valid_targets_min": 2168
|
|
},
|
|
{
|
|
"epoch": 0.11226944667201283,
|
|
"grad_norm": 0.7627974664470367,
|
|
"learning_rate": 6.31578947368421e-06,
|
|
"loss": 0.475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22639679908752441,
|
|
"step": 70,
|
|
"valid_targets_mean": 4471.0,
|
|
"valid_targets_min": 3549
|
|
},
|
|
{
|
|
"epoch": 0.12028869286287089,
|
|
"grad_norm": 0.6884623638889261,
|
|
"learning_rate": 6.773455377574372e-06,
|
|
"loss": 0.4733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25498753786087036,
|
|
"step": 75,
|
|
"valid_targets_mean": 4519.4,
|
|
"valid_targets_min": 3265
|
|
},
|
|
{
|
|
"epoch": 0.12830793905372895,
|
|
"grad_norm": 0.6613354515330631,
|
|
"learning_rate": 7.231121281464531e-06,
|
|
"loss": 0.4443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2024919092655182,
|
|
"step": 80,
|
|
"valid_targets_mean": 4282.4,
|
|
"valid_targets_min": 3655
|
|
},
|
|
{
|
|
"epoch": 0.136327185244587,
|
|
"grad_norm": 0.6641922188764587,
|
|
"learning_rate": 7.688787185354691e-06,
|
|
"loss": 0.4416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24795110523700714,
|
|
"step": 85,
|
|
"valid_targets_mean": 4464.6,
|
|
"valid_targets_min": 2553
|
|
},
|
|
{
|
|
"epoch": 0.14434643143544507,
|
|
"grad_norm": 0.628697459423609,
|
|
"learning_rate": 8.146453089244852e-06,
|
|
"loss": 0.4363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23787499964237213,
|
|
"step": 90,
|
|
"valid_targets_mean": 4557.0,
|
|
"valid_targets_min": 2805
|
|
},
|
|
{
|
|
"epoch": 0.15236567762630313,
|
|
"grad_norm": 0.6370068982331477,
|
|
"learning_rate": 8.604118993135013e-06,
|
|
"loss": 0.4078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2016415148973465,
|
|
"step": 95,
|
|
"valid_targets_mean": 4477.1,
|
|
"valid_targets_min": 4041
|
|
},
|
|
{
|
|
"epoch": 0.16038492381716118,
|
|
"grad_norm": 0.6280792265309377,
|
|
"learning_rate": 9.061784897025172e-06,
|
|
"loss": 0.402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19570821523666382,
|
|
"step": 100,
|
|
"valid_targets_mean": 4459.1,
|
|
"valid_targets_min": 3568
|
|
},
|
|
{
|
|
"epoch": 0.16840417000801924,
|
|
"grad_norm": 0.5849957822848729,
|
|
"learning_rate": 9.519450800915333e-06,
|
|
"loss": 0.4169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21672260761260986,
|
|
"step": 105,
|
|
"valid_targets_mean": 4764.8,
|
|
"valid_targets_min": 3607
|
|
},
|
|
{
|
|
"epoch": 0.1764234161988773,
|
|
"grad_norm": 0.6046937662896614,
|
|
"learning_rate": 9.977116704805492e-06,
|
|
"loss": 0.3859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20225200057029724,
|
|
"step": 110,
|
|
"valid_targets_mean": 4600.9,
|
|
"valid_targets_min": 4050
|
|
},
|
|
{
|
|
"epoch": 0.18444266238973536,
|
|
"grad_norm": 0.5845649899691997,
|
|
"learning_rate": 1.0434782608695653e-05,
|
|
"loss": 0.3714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19251415133476257,
|
|
"step": 115,
|
|
"valid_targets_mean": 4493.6,
|
|
"valid_targets_min": 3348
|
|
},
|
|
{
|
|
"epoch": 0.19246190858059342,
|
|
"grad_norm": 0.5721188755104001,
|
|
"learning_rate": 1.0892448512585814e-05,
|
|
"loss": 0.3923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17891809344291687,
|
|
"step": 120,
|
|
"valid_targets_mean": 4480.1,
|
|
"valid_targets_min": 3590
|
|
},
|
|
{
|
|
"epoch": 0.20048115477145148,
|
|
"grad_norm": 0.6214227515410161,
|
|
"learning_rate": 1.1350114416475973e-05,
|
|
"loss": 0.3865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21907271444797516,
|
|
"step": 125,
|
|
"valid_targets_mean": 4788.8,
|
|
"valid_targets_min": 3541
|
|
},
|
|
{
|
|
"epoch": 0.20850040096230954,
|
|
"grad_norm": 0.5890661882125992,
|
|
"learning_rate": 1.1807780320366134e-05,
|
|
"loss": 0.3709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19938123226165771,
|
|
"step": 130,
|
|
"valid_targets_mean": 4667.6,
|
|
"valid_targets_min": 4108
|
|
},
|
|
{
|
|
"epoch": 0.2165196471531676,
|
|
"grad_norm": 0.6124683017118417,
|
|
"learning_rate": 1.2265446224256295e-05,
|
|
"loss": 0.3739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20271918177604675,
|
|
"step": 135,
|
|
"valid_targets_mean": 4578.2,
|
|
"valid_targets_min": 3825
|
|
},
|
|
{
|
|
"epoch": 0.22453889334402566,
|
|
"grad_norm": 0.5860572596484608,
|
|
"learning_rate": 1.2723112128146454e-05,
|
|
"loss": 0.3665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18677105009555817,
|
|
"step": 140,
|
|
"valid_targets_mean": 4766.1,
|
|
"valid_targets_min": 3987
|
|
},
|
|
{
|
|
"epoch": 0.23255813953488372,
|
|
"grad_norm": 0.599560405051918,
|
|
"learning_rate": 1.3180778032036615e-05,
|
|
"loss": 0.359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20332522690296173,
|
|
"step": 145,
|
|
"valid_targets_mean": 4647.2,
|
|
"valid_targets_min": 4189
|
|
},
|
|
{
|
|
"epoch": 0.24057738572574178,
|
|
"grad_norm": 0.6067020292858505,
|
|
"learning_rate": 1.3638443935926776e-05,
|
|
"loss": 0.351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16541624069213867,
|
|
"step": 150,
|
|
"valid_targets_mean": 4432.4,
|
|
"valid_targets_min": 3700
|
|
},
|
|
{
|
|
"epoch": 0.24859663191659984,
|
|
"grad_norm": 0.5796049971335495,
|
|
"learning_rate": 1.4096109839816933e-05,
|
|
"loss": 0.3695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15345534682273865,
|
|
"step": 155,
|
|
"valid_targets_mean": 4438.0,
|
|
"valid_targets_min": 4018
|
|
},
|
|
{
|
|
"epoch": 0.2566158781074579,
|
|
"grad_norm": 0.5493657265375891,
|
|
"learning_rate": 1.4553775743707096e-05,
|
|
"loss": 0.346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1659364104270935,
|
|
"step": 160,
|
|
"valid_targets_mean": 4752.1,
|
|
"valid_targets_min": 4460
|
|
},
|
|
{
|
|
"epoch": 0.264635124298316,
|
|
"grad_norm": 0.6087060237640283,
|
|
"learning_rate": 1.5011441647597256e-05,
|
|
"loss": 0.358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1554056704044342,
|
|
"step": 165,
|
|
"valid_targets_mean": 4277.9,
|
|
"valid_targets_min": 3519
|
|
},
|
|
{
|
|
"epoch": 0.272654370489174,
|
|
"grad_norm": 0.5624823177707295,
|
|
"learning_rate": 1.5469107551487414e-05,
|
|
"loss": 0.3436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16040264070034027,
|
|
"step": 170,
|
|
"valid_targets_mean": 4405.2,
|
|
"valid_targets_min": 3462
|
|
},
|
|
{
|
|
"epoch": 0.2806736166800321,
|
|
"grad_norm": 0.5984432570477501,
|
|
"learning_rate": 1.5926773455377575e-05,
|
|
"loss": 0.3537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14715197682380676,
|
|
"step": 175,
|
|
"valid_targets_mean": 4093.1,
|
|
"valid_targets_min": 3606
|
|
},
|
|
{
|
|
"epoch": 0.28869286287089013,
|
|
"grad_norm": 0.5697909818749695,
|
|
"learning_rate": 1.6384439359267736e-05,
|
|
"loss": 0.3281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1709306240081787,
|
|
"step": 180,
|
|
"valid_targets_mean": 4883.2,
|
|
"valid_targets_min": 4300
|
|
},
|
|
{
|
|
"epoch": 0.2967121090617482,
|
|
"grad_norm": 0.5638629999487501,
|
|
"learning_rate": 1.6842105263157896e-05,
|
|
"loss": 0.3491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16787275671958923,
|
|
"step": 185,
|
|
"valid_targets_mean": 4753.9,
|
|
"valid_targets_min": 4441
|
|
},
|
|
{
|
|
"epoch": 0.30473135525260625,
|
|
"grad_norm": 0.6325002665986126,
|
|
"learning_rate": 1.7299771167048057e-05,
|
|
"loss": 0.3323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16780641674995422,
|
|
"step": 190,
|
|
"valid_targets_mean": 4809.8,
|
|
"valid_targets_min": 3864
|
|
},
|
|
{
|
|
"epoch": 0.31275060144346434,
|
|
"grad_norm": 0.6358796316761818,
|
|
"learning_rate": 1.7757437070938218e-05,
|
|
"loss": 0.3476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1687561273574829,
|
|
"step": 195,
|
|
"valid_targets_mean": 4546.9,
|
|
"valid_targets_min": 3626
|
|
},
|
|
{
|
|
"epoch": 0.32076984763432237,
|
|
"grad_norm": 1.0421748402923832,
|
|
"learning_rate": 1.8215102974828376e-05,
|
|
"loss": 0.3332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17210674285888672,
|
|
"step": 200,
|
|
"valid_targets_mean": 4559.0,
|
|
"valid_targets_min": 3782
|
|
},
|
|
{
|
|
"epoch": 0.32878909382518046,
|
|
"grad_norm": 0.6014143772179982,
|
|
"learning_rate": 1.8672768878718537e-05,
|
|
"loss": 0.3287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19059178233146667,
|
|
"step": 205,
|
|
"valid_targets_mean": 4662.4,
|
|
"valid_targets_min": 2977
|
|
},
|
|
{
|
|
"epoch": 0.3368083400160385,
|
|
"grad_norm": 0.6660148366578167,
|
|
"learning_rate": 1.9130434782608697e-05,
|
|
"loss": 0.3321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15581390261650085,
|
|
"step": 210,
|
|
"valid_targets_mean": 4340.6,
|
|
"valid_targets_min": 985
|
|
},
|
|
{
|
|
"epoch": 0.3448275862068966,
|
|
"grad_norm": 0.5377629092701176,
|
|
"learning_rate": 1.9588100686498858e-05,
|
|
"loss": 0.3247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13782639801502228,
|
|
"step": 215,
|
|
"valid_targets_mean": 4399.9,
|
|
"valid_targets_min": 3433
|
|
},
|
|
{
|
|
"epoch": 0.3528468323977546,
|
|
"grad_norm": 0.598075567627835,
|
|
"learning_rate": 2.004576659038902e-05,
|
|
"loss": 0.3342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17541082203388214,
|
|
"step": 220,
|
|
"valid_targets_mean": 4597.4,
|
|
"valid_targets_min": 4031
|
|
},
|
|
{
|
|
"epoch": 0.3608660785886127,
|
|
"grad_norm": 0.6850692726033889,
|
|
"learning_rate": 2.050343249427918e-05,
|
|
"loss": 0.3312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1495487093925476,
|
|
"step": 225,
|
|
"valid_targets_mean": 4273.9,
|
|
"valid_targets_min": 3410
|
|
},
|
|
{
|
|
"epoch": 0.3688853247794707,
|
|
"grad_norm": 0.575458350999104,
|
|
"learning_rate": 2.0961098398169337e-05,
|
|
"loss": 0.3234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16065005958080292,
|
|
"step": 230,
|
|
"valid_targets_mean": 4433.1,
|
|
"valid_targets_min": 3288
|
|
},
|
|
{
|
|
"epoch": 0.3769045709703288,
|
|
"grad_norm": 0.6478385101009763,
|
|
"learning_rate": 2.14187643020595e-05,
|
|
"loss": 0.3164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14961275458335876,
|
|
"step": 235,
|
|
"valid_targets_mean": 4266.0,
|
|
"valid_targets_min": 2942
|
|
},
|
|
{
|
|
"epoch": 0.38492381716118684,
|
|
"grad_norm": 0.610877732370752,
|
|
"learning_rate": 2.187643020594966e-05,
|
|
"loss": 0.3194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1404804289340973,
|
|
"step": 240,
|
|
"valid_targets_mean": 4499.8,
|
|
"valid_targets_min": 3576
|
|
},
|
|
{
|
|
"epoch": 0.39294306335204493,
|
|
"grad_norm": 0.9029036576631587,
|
|
"learning_rate": 2.2334096109839817e-05,
|
|
"loss": 0.3339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16802749037742615,
|
|
"step": 245,
|
|
"valid_targets_mean": 4724.5,
|
|
"valid_targets_min": 3540
|
|
},
|
|
{
|
|
"epoch": 0.40096230954290296,
|
|
"grad_norm": 0.5878736054933581,
|
|
"learning_rate": 2.279176201372998e-05,
|
|
"loss": 0.3139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1772792935371399,
|
|
"step": 250,
|
|
"valid_targets_mean": 4733.0,
|
|
"valid_targets_min": 3796
|
|
},
|
|
{
|
|
"epoch": 0.40898155573376105,
|
|
"grad_norm": 0.6152125003240627,
|
|
"learning_rate": 2.3249427917620138e-05,
|
|
"loss": 0.312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14559751749038696,
|
|
"step": 255,
|
|
"valid_targets_mean": 4158.2,
|
|
"valid_targets_min": 3240
|
|
},
|
|
{
|
|
"epoch": 0.4170008019246191,
|
|
"grad_norm": 0.566623809156889,
|
|
"learning_rate": 2.37070938215103e-05,
|
|
"loss": 0.3195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16707779467105865,
|
|
"step": 260,
|
|
"valid_targets_mean": 4677.6,
|
|
"valid_targets_min": 3978
|
|
},
|
|
{
|
|
"epoch": 0.42502004811547717,
|
|
"grad_norm": 0.604668538312701,
|
|
"learning_rate": 2.4164759725400463e-05,
|
|
"loss": 0.3045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13855025172233582,
|
|
"step": 265,
|
|
"valid_targets_mean": 4116.8,
|
|
"valid_targets_min": 3502
|
|
},
|
|
{
|
|
"epoch": 0.4330392943063352,
|
|
"grad_norm": 0.6205737604994523,
|
|
"learning_rate": 2.462242562929062e-05,
|
|
"loss": 0.3083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14771416783332825,
|
|
"step": 270,
|
|
"valid_targets_mean": 4581.4,
|
|
"valid_targets_min": 3717
|
|
},
|
|
{
|
|
"epoch": 0.4410585404971933,
|
|
"grad_norm": 0.5996813085688731,
|
|
"learning_rate": 2.508009153318078e-05,
|
|
"loss": 0.3224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15043914318084717,
|
|
"step": 275,
|
|
"valid_targets_mean": 4340.0,
|
|
"valid_targets_min": 3202
|
|
},
|
|
{
|
|
"epoch": 0.4490777866880513,
|
|
"grad_norm": 0.6358509904492083,
|
|
"learning_rate": 2.5537757437070943e-05,
|
|
"loss": 0.3141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14119401574134827,
|
|
"step": 280,
|
|
"valid_targets_mean": 4143.2,
|
|
"valid_targets_min": 1950
|
|
},
|
|
{
|
|
"epoch": 0.4570970328789094,
|
|
"grad_norm": 0.5742849042142552,
|
|
"learning_rate": 2.59954233409611e-05,
|
|
"loss": 0.3042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1485038846731186,
|
|
"step": 285,
|
|
"valid_targets_mean": 4349.6,
|
|
"valid_targets_min": 2709
|
|
},
|
|
{
|
|
"epoch": 0.46511627906976744,
|
|
"grad_norm": 0.5985162333894835,
|
|
"learning_rate": 2.645308924485126e-05,
|
|
"loss": 0.3252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16479596495628357,
|
|
"step": 290,
|
|
"valid_targets_mean": 4764.4,
|
|
"valid_targets_min": 3992
|
|
},
|
|
{
|
|
"epoch": 0.4731355252606255,
|
|
"grad_norm": 0.5645064863068383,
|
|
"learning_rate": 2.6910755148741422e-05,
|
|
"loss": 0.3052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15917937457561493,
|
|
"step": 295,
|
|
"valid_targets_mean": 4673.4,
|
|
"valid_targets_min": 3751
|
|
},
|
|
{
|
|
"epoch": 0.48115477145148355,
|
|
"grad_norm": 0.5646048732213123,
|
|
"learning_rate": 2.7368421052631583e-05,
|
|
"loss": 0.3224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17208397388458252,
|
|
"step": 300,
|
|
"valid_targets_mean": 4654.1,
|
|
"valid_targets_min": 3754
|
|
},
|
|
{
|
|
"epoch": 0.48917401764234164,
|
|
"grad_norm": 0.6081949222879935,
|
|
"learning_rate": 2.782608695652174e-05,
|
|
"loss": 0.3196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14883257448673248,
|
|
"step": 305,
|
|
"valid_targets_mean": 4522.8,
|
|
"valid_targets_min": 3740
|
|
},
|
|
{
|
|
"epoch": 0.4971932638331997,
|
|
"grad_norm": 0.6142440880089384,
|
|
"learning_rate": 2.8283752860411904e-05,
|
|
"loss": 0.3114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14264944195747375,
|
|
"step": 310,
|
|
"valid_targets_mean": 4579.0,
|
|
"valid_targets_min": 4272
|
|
},
|
|
{
|
|
"epoch": 0.5052125100240578,
|
|
"grad_norm": 0.9752812739921565,
|
|
"learning_rate": 2.8741418764302062e-05,
|
|
"loss": 0.3053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1498848795890808,
|
|
"step": 315,
|
|
"valid_targets_mean": 4651.8,
|
|
"valid_targets_min": 3760
|
|
},
|
|
{
|
|
"epoch": 0.5132317562149158,
|
|
"grad_norm": 0.6331708410136568,
|
|
"learning_rate": 2.9199084668192223e-05,
|
|
"loss": 0.307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14827972650527954,
|
|
"step": 320,
|
|
"valid_targets_mean": 4352.1,
|
|
"valid_targets_min": 3749
|
|
},
|
|
{
|
|
"epoch": 0.5212510024057738,
|
|
"grad_norm": 0.609165475277606,
|
|
"learning_rate": 2.9656750572082384e-05,
|
|
"loss": 0.298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13555066287517548,
|
|
"step": 325,
|
|
"valid_targets_mean": 4264.2,
|
|
"valid_targets_min": 3307
|
|
},
|
|
{
|
|
"epoch": 0.529270248596632,
|
|
"grad_norm": 0.6051457640372894,
|
|
"learning_rate": 3.0114416475972544e-05,
|
|
"loss": 0.3112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14938730001449585,
|
|
"step": 330,
|
|
"valid_targets_mean": 4613.2,
|
|
"valid_targets_min": 4178
|
|
},
|
|
{
|
|
"epoch": 0.53728949478749,
|
|
"grad_norm": 0.6163806809990549,
|
|
"learning_rate": 3.05720823798627e-05,
|
|
"loss": 0.294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1401451677083969,
|
|
"step": 335,
|
|
"valid_targets_mean": 4492.5,
|
|
"valid_targets_min": 3571
|
|
},
|
|
{
|
|
"epoch": 0.545308740978348,
|
|
"grad_norm": 0.5552930926085583,
|
|
"learning_rate": 3.102974828375286e-05,
|
|
"loss": 0.3046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13711142539978027,
|
|
"step": 340,
|
|
"valid_targets_mean": 4672.6,
|
|
"valid_targets_min": 3911
|
|
},
|
|
{
|
|
"epoch": 0.5533279871692061,
|
|
"grad_norm": 0.6194566606069442,
|
|
"learning_rate": 3.1487414187643024e-05,
|
|
"loss": 0.3036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14930705726146698,
|
|
"step": 345,
|
|
"valid_targets_mean": 4439.6,
|
|
"valid_targets_min": 3374
|
|
},
|
|
{
|
|
"epoch": 0.5613472333600642,
|
|
"grad_norm": 0.6202027712263462,
|
|
"learning_rate": 3.1945080091533184e-05,
|
|
"loss": 0.3021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1559915393590927,
|
|
"step": 350,
|
|
"valid_targets_mean": 4606.4,
|
|
"valid_targets_min": 3574
|
|
},
|
|
{
|
|
"epoch": 0.5693664795509222,
|
|
"grad_norm": 0.5781069048374654,
|
|
"learning_rate": 3.240274599542334e-05,
|
|
"loss": 0.3066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1775219440460205,
|
|
"step": 355,
|
|
"valid_targets_mean": 4725.4,
|
|
"valid_targets_min": 3540
|
|
},
|
|
{
|
|
"epoch": 0.5773857257417803,
|
|
"grad_norm": 0.6421379145530671,
|
|
"learning_rate": 3.2860411899313506e-05,
|
|
"loss": 0.3139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16551250219345093,
|
|
"step": 360,
|
|
"valid_targets_mean": 4605.9,
|
|
"valid_targets_min": 3256
|
|
},
|
|
{
|
|
"epoch": 0.5854049719326383,
|
|
"grad_norm": 0.5799549018196022,
|
|
"learning_rate": 3.331807780320366e-05,
|
|
"loss": 0.2943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14630819857120514,
|
|
"step": 365,
|
|
"valid_targets_mean": 4414.2,
|
|
"valid_targets_min": 3666
|
|
},
|
|
{
|
|
"epoch": 0.5934242181234964,
|
|
"grad_norm": 0.5747689501708185,
|
|
"learning_rate": 3.377574370709382e-05,
|
|
"loss": 0.3074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15600350499153137,
|
|
"step": 370,
|
|
"valid_targets_mean": 4742.4,
|
|
"valid_targets_min": 4012
|
|
},
|
|
{
|
|
"epoch": 0.6014434643143545,
|
|
"grad_norm": 0.5662240566239958,
|
|
"learning_rate": 3.423340961098399e-05,
|
|
"loss": 0.3048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1513952612876892,
|
|
"step": 375,
|
|
"valid_targets_mean": 4661.5,
|
|
"valid_targets_min": 3865
|
|
},
|
|
{
|
|
"epoch": 0.6094627105052125,
|
|
"grad_norm": 0.6800789097555724,
|
|
"learning_rate": 3.469107551487414e-05,
|
|
"loss": 0.2995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1483420729637146,
|
|
"step": 380,
|
|
"valid_targets_mean": 4598.8,
|
|
"valid_targets_min": 4113
|
|
},
|
|
{
|
|
"epoch": 0.6174819566960705,
|
|
"grad_norm": 0.557563116624825,
|
|
"learning_rate": 3.5148741418764304e-05,
|
|
"loss": 0.2984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14878618717193604,
|
|
"step": 385,
|
|
"valid_targets_mean": 5068.9,
|
|
"valid_targets_min": 4116
|
|
},
|
|
{
|
|
"epoch": 0.6255012028869287,
|
|
"grad_norm": 0.576046265981817,
|
|
"learning_rate": 3.5606407322654464e-05,
|
|
"loss": 0.3045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.143460214138031,
|
|
"step": 390,
|
|
"valid_targets_mean": 4421.2,
|
|
"valid_targets_min": 2872
|
|
},
|
|
{
|
|
"epoch": 0.6335204490777867,
|
|
"grad_norm": 0.5843540062651988,
|
|
"learning_rate": 3.6064073226544625e-05,
|
|
"loss": 0.2965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1505216807126999,
|
|
"step": 395,
|
|
"valid_targets_mean": 4390.6,
|
|
"valid_targets_min": 3656
|
|
},
|
|
{
|
|
"epoch": 0.6415396952686447,
|
|
"grad_norm": 0.5591427098714248,
|
|
"learning_rate": 3.6521739130434786e-05,
|
|
"loss": 0.302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.155622199177742,
|
|
"step": 400,
|
|
"valid_targets_mean": 4932.4,
|
|
"valid_targets_min": 3737
|
|
},
|
|
{
|
|
"epoch": 0.6495589414595028,
|
|
"grad_norm": 0.5741974021514135,
|
|
"learning_rate": 3.697940503432495e-05,
|
|
"loss": 0.296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1400773525238037,
|
|
"step": 405,
|
|
"valid_targets_mean": 4671.6,
|
|
"valid_targets_min": 4163
|
|
},
|
|
{
|
|
"epoch": 0.6575781876503609,
|
|
"grad_norm": 0.6606145247637403,
|
|
"learning_rate": 3.743707093821511e-05,
|
|
"loss": 0.3054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15526138246059418,
|
|
"step": 410,
|
|
"valid_targets_mean": 4999.5,
|
|
"valid_targets_min": 4210
|
|
},
|
|
{
|
|
"epoch": 0.6655974338412189,
|
|
"grad_norm": 0.5711338088362218,
|
|
"learning_rate": 3.789473684210526e-05,
|
|
"loss": 0.3055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15937569737434387,
|
|
"step": 415,
|
|
"valid_targets_mean": 4468.0,
|
|
"valid_targets_min": 3227
|
|
},
|
|
{
|
|
"epoch": 0.673616680032077,
|
|
"grad_norm": 0.5559250137662823,
|
|
"learning_rate": 3.835240274599543e-05,
|
|
"loss": 0.2919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1304740160703659,
|
|
"step": 420,
|
|
"valid_targets_mean": 4309.5,
|
|
"valid_targets_min": 2998
|
|
},
|
|
{
|
|
"epoch": 0.681635926222935,
|
|
"grad_norm": 0.592425604596401,
|
|
"learning_rate": 3.8810068649885584e-05,
|
|
"loss": 0.2944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1449316442012787,
|
|
"step": 425,
|
|
"valid_targets_mean": 4364.6,
|
|
"valid_targets_min": 3682
|
|
},
|
|
{
|
|
"epoch": 0.6896551724137931,
|
|
"grad_norm": 0.5709008607782596,
|
|
"learning_rate": 3.9267734553775745e-05,
|
|
"loss": 0.2928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1241619810461998,
|
|
"step": 430,
|
|
"valid_targets_mean": 4013.4,
|
|
"valid_targets_min": 2399
|
|
},
|
|
{
|
|
"epoch": 0.6976744186046512,
|
|
"grad_norm": 0.5456450873082244,
|
|
"learning_rate": 3.9725400457665905e-05,
|
|
"loss": 0.3011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15716767311096191,
|
|
"step": 435,
|
|
"valid_targets_mean": 4646.8,
|
|
"valid_targets_min": 3764
|
|
},
|
|
{
|
|
"epoch": 0.7056936647955092,
|
|
"grad_norm": 0.5481396179437047,
|
|
"learning_rate": 3.999997445219712e-05,
|
|
"loss": 0.2945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1532319188117981,
|
|
"step": 440,
|
|
"valid_targets_mean": 4735.2,
|
|
"valid_targets_min": 4203
|
|
},
|
|
{
|
|
"epoch": 0.7137129109863672,
|
|
"grad_norm": 0.5697181577041776,
|
|
"learning_rate": 3.999968704016428e-05,
|
|
"loss": 0.29,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15176308155059814,
|
|
"step": 445,
|
|
"valid_targets_mean": 4572.2,
|
|
"valid_targets_min": 3573
|
|
},
|
|
{
|
|
"epoch": 0.7217321571772254,
|
|
"grad_norm": 0.5624254746825745,
|
|
"learning_rate": 3.9999080285949514e-05,
|
|
"loss": 0.2993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1311446726322174,
|
|
"step": 450,
|
|
"valid_targets_mean": 4172.8,
|
|
"valid_targets_min": 3528
|
|
},
|
|
{
|
|
"epoch": 0.7297514033680834,
|
|
"grad_norm": 0.5411827901476868,
|
|
"learning_rate": 3.999815419924108e-05,
|
|
"loss": 0.2958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16425827145576477,
|
|
"step": 455,
|
|
"valid_targets_mean": 4748.2,
|
|
"valid_targets_min": 3523
|
|
},
|
|
{
|
|
"epoch": 0.7377706495589414,
|
|
"grad_norm": 0.5753881977271454,
|
|
"learning_rate": 3.999690879482614e-05,
|
|
"loss": 0.2914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15344473719596863,
|
|
"step": 460,
|
|
"valid_targets_mean": 4673.5,
|
|
"valid_targets_min": 4099
|
|
},
|
|
{
|
|
"epoch": 0.7457898957497995,
|
|
"grad_norm": 0.5491417042874043,
|
|
"learning_rate": 3.9995344092590506e-05,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13455438613891602,
|
|
"step": 465,
|
|
"valid_targets_mean": 4442.8,
|
|
"valid_targets_min": 3760
|
|
},
|
|
{
|
|
"epoch": 0.7538091419406576,
|
|
"grad_norm": 0.5476010418758165,
|
|
"learning_rate": 3.999346011751835e-05,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15775763988494873,
|
|
"step": 470,
|
|
"valid_targets_mean": 4624.8,
|
|
"valid_targets_min": 3591
|
|
},
|
|
{
|
|
"epoch": 0.7618283881315157,
|
|
"grad_norm": 0.5895723172552697,
|
|
"learning_rate": 3.999125689969176e-05,
|
|
"loss": 0.2946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14919300377368927,
|
|
"step": 475,
|
|
"valid_targets_mean": 4280.5,
|
|
"valid_targets_min": 3216
|
|
},
|
|
{
|
|
"epoch": 0.7698476343223737,
|
|
"grad_norm": 0.8031061148558112,
|
|
"learning_rate": 3.9988734474290324e-05,
|
|
"loss": 0.2709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13357609510421753,
|
|
"step": 480,
|
|
"valid_targets_mean": 4590.2,
|
|
"valid_targets_min": 3505
|
|
},
|
|
{
|
|
"epoch": 0.7778668805132317,
|
|
"grad_norm": 0.5028444897147156,
|
|
"learning_rate": 3.9985892881590513e-05,
|
|
"loss": 0.2867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13696768879890442,
|
|
"step": 485,
|
|
"valid_targets_mean": 4476.8,
|
|
"valid_targets_min": 3096
|
|
},
|
|
{
|
|
"epoch": 0.7858861267040899,
|
|
"grad_norm": 0.4946582379386875,
|
|
"learning_rate": 3.9982732166965054e-05,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1260722279548645,
|
|
"step": 490,
|
|
"valid_targets_mean": 4814.1,
|
|
"valid_targets_min": 4302
|
|
},
|
|
{
|
|
"epoch": 0.7939053728949479,
|
|
"grad_norm": 0.5382233433962449,
|
|
"learning_rate": 3.997925238088221e-05,
|
|
"loss": 0.2905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13558533787727356,
|
|
"step": 495,
|
|
"valid_targets_mean": 4484.9,
|
|
"valid_targets_min": 4155
|
|
},
|
|
{
|
|
"epoch": 0.8019246190858059,
|
|
"grad_norm": 0.578411635262446,
|
|
"learning_rate": 3.9975453578904975e-05,
|
|
"loss": 0.2966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12712955474853516,
|
|
"step": 500,
|
|
"valid_targets_mean": 4359.4,
|
|
"valid_targets_min": 3211
|
|
},
|
|
{
|
|
"epoch": 0.809943865276664,
|
|
"grad_norm": 0.5584823407340346,
|
|
"learning_rate": 3.997133582169018e-05,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15413126349449158,
|
|
"step": 505,
|
|
"valid_targets_mean": 4680.0,
|
|
"valid_targets_min": 4023
|
|
},
|
|
{
|
|
"epoch": 0.8179631114675221,
|
|
"grad_norm": 0.5698107323006046,
|
|
"learning_rate": 3.996689917498754e-05,
|
|
"loss": 0.2806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.128324493765831,
|
|
"step": 510,
|
|
"valid_targets_mean": 4253.2,
|
|
"valid_targets_min": 3762
|
|
},
|
|
{
|
|
"epoch": 0.8259823576583801,
|
|
"grad_norm": 0.5676352989821747,
|
|
"learning_rate": 3.9962143709638585e-05,
|
|
"loss": 0.2891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14801359176635742,
|
|
"step": 515,
|
|
"valid_targets_mean": 4673.0,
|
|
"valid_targets_min": 3566
|
|
},
|
|
{
|
|
"epoch": 0.8340016038492382,
|
|
"grad_norm": 0.5240845567033441,
|
|
"learning_rate": 3.995706950157554e-05,
|
|
"loss": 0.2955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1805700659751892,
|
|
"step": 520,
|
|
"valid_targets_mean": 5876.4,
|
|
"valid_targets_min": 3711
|
|
},
|
|
{
|
|
"epoch": 0.8420208500400962,
|
|
"grad_norm": 0.5002705945989283,
|
|
"learning_rate": 3.995167663182008e-05,
|
|
"loss": 0.2896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13804933428764343,
|
|
"step": 525,
|
|
"valid_targets_mean": 4360.1,
|
|
"valid_targets_min": 3193
|
|
},
|
|
{
|
|
"epoch": 0.8500400962309543,
|
|
"grad_norm": 0.5219478356948187,
|
|
"learning_rate": 3.994596518648214e-05,
|
|
"loss": 0.2811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13879656791687012,
|
|
"step": 530,
|
|
"valid_targets_mean": 4250.8,
|
|
"valid_targets_min": 3454
|
|
},
|
|
{
|
|
"epoch": 0.8580593424218124,
|
|
"grad_norm": 0.5361743024688889,
|
|
"learning_rate": 3.993993525675838e-05,
|
|
"loss": 0.2968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19499969482421875,
|
|
"step": 535,
|
|
"valid_targets_mean": 5412.9,
|
|
"valid_targets_min": 3853
|
|
},
|
|
{
|
|
"epoch": 0.8660785886126704,
|
|
"grad_norm": 0.5060153401815819,
|
|
"learning_rate": 3.993358693893086e-05,
|
|
"loss": 0.2863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14376498758792877,
|
|
"step": 540,
|
|
"valid_targets_mean": 4866.4,
|
|
"valid_targets_min": 2408
|
|
},
|
|
{
|
|
"epoch": 0.8740978348035284,
|
|
"grad_norm": 0.542055617340528,
|
|
"learning_rate": 3.9926920334365457e-05,
|
|
"loss": 0.291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13623861968517303,
|
|
"step": 545,
|
|
"valid_targets_mean": 4351.4,
|
|
"valid_targets_min": 3412
|
|
},
|
|
{
|
|
"epoch": 0.8821170809943866,
|
|
"grad_norm": 0.5254446872102225,
|
|
"learning_rate": 3.991993554951023e-05,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1522407829761505,
|
|
"step": 550,
|
|
"valid_targets_mean": 4532.6,
|
|
"valid_targets_min": 3343
|
|
},
|
|
{
|
|
"epoch": 0.8901363271852446,
|
|
"grad_norm": 0.49534381153594353,
|
|
"learning_rate": 3.991263269589376e-05,
|
|
"loss": 0.2885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14857111871242523,
|
|
"step": 555,
|
|
"valid_targets_mean": 4463.9,
|
|
"valid_targets_min": 3573
|
|
},
|
|
{
|
|
"epoch": 0.8981555733761026,
|
|
"grad_norm": 0.4981887867628107,
|
|
"learning_rate": 3.990501189012332e-05,
|
|
"loss": 0.2754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1421220898628235,
|
|
"step": 560,
|
|
"valid_targets_mean": 4591.1,
|
|
"valid_targets_min": 3793
|
|
},
|
|
{
|
|
"epoch": 0.9061748195669607,
|
|
"grad_norm": 0.5099892287999711,
|
|
"learning_rate": 3.989707325388305e-05,
|
|
"loss": 0.2832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13454025983810425,
|
|
"step": 565,
|
|
"valid_targets_mean": 4170.0,
|
|
"valid_targets_min": 3319
|
|
},
|
|
{
|
|
"epoch": 0.9141940657578188,
|
|
"grad_norm": 0.49925882618205747,
|
|
"learning_rate": 3.9888816913932016e-05,
|
|
"loss": 0.2789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1298651546239853,
|
|
"step": 570,
|
|
"valid_targets_mean": 4787.0,
|
|
"valid_targets_min": 3605
|
|
},
|
|
{
|
|
"epoch": 0.9222133119486768,
|
|
"grad_norm": 0.47587103879847314,
|
|
"learning_rate": 3.988024300210215e-05,
|
|
"loss": 0.2891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14305976033210754,
|
|
"step": 575,
|
|
"valid_targets_mean": 4647.4,
|
|
"valid_targets_min": 3109
|
|
},
|
|
{
|
|
"epoch": 0.9302325581395349,
|
|
"grad_norm": 0.4755689705093128,
|
|
"learning_rate": 3.987135165529618e-05,
|
|
"loss": 0.2758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14349956810474396,
|
|
"step": 580,
|
|
"valid_targets_mean": 4912.1,
|
|
"valid_targets_min": 3959
|
|
},
|
|
{
|
|
"epoch": 0.9382518043303929,
|
|
"grad_norm": 0.523216241926599,
|
|
"learning_rate": 3.9862143015485446e-05,
|
|
"loss": 0.2931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1294865906238556,
|
|
"step": 585,
|
|
"valid_targets_mean": 4589.0,
|
|
"valid_targets_min": 4125
|
|
},
|
|
{
|
|
"epoch": 0.946271050521251,
|
|
"grad_norm": 0.5293730143488768,
|
|
"learning_rate": 3.985261722970759e-05,
|
|
"loss": 0.2819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13711950182914734,
|
|
"step": 590,
|
|
"valid_targets_mean": 4533.4,
|
|
"valid_targets_min": 3767
|
|
},
|
|
{
|
|
"epoch": 0.9542902967121091,
|
|
"grad_norm": 0.485235738093815,
|
|
"learning_rate": 3.984277445006426e-05,
|
|
"loss": 0.2819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15331220626831055,
|
|
"step": 595,
|
|
"valid_targets_mean": 4663.4,
|
|
"valid_targets_min": 4290
|
|
},
|
|
{
|
|
"epoch": 0.9623095429029671,
|
|
"grad_norm": 0.49428144095597415,
|
|
"learning_rate": 3.9832614833718654e-05,
|
|
"loss": 0.2851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14586973190307617,
|
|
"step": 600,
|
|
"valid_targets_mean": 4711.4,
|
|
"valid_targets_min": 3903
|
|
},
|
|
{
|
|
"epoch": 0.9703287890938251,
|
|
"grad_norm": 0.5409809942905914,
|
|
"learning_rate": 3.9822138542893005e-05,
|
|
"loss": 0.2841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15789353847503662,
|
|
"step": 605,
|
|
"valid_targets_mean": 4643.1,
|
|
"valid_targets_min": 3570
|
|
},
|
|
{
|
|
"epoch": 0.9783480352846833,
|
|
"grad_norm": 0.5534906296852654,
|
|
"learning_rate": 3.9811345744866014e-05,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1503206193447113,
|
|
"step": 610,
|
|
"valid_targets_mean": 4324.6,
|
|
"valid_targets_min": 3441
|
|
},
|
|
{
|
|
"epoch": 0.9863672814755413,
|
|
"grad_norm": 0.5633051624329332,
|
|
"learning_rate": 3.980023661197016e-05,
|
|
"loss": 0.2868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12011225521564484,
|
|
"step": 615,
|
|
"valid_targets_mean": 4225.4,
|
|
"valid_targets_min": 3537
|
|
},
|
|
{
|
|
"epoch": 0.9943865276663993,
|
|
"grad_norm": 0.5040900294144279,
|
|
"learning_rate": 3.978881132158896e-05,
|
|
"loss": 0.2867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12355516105890274,
|
|
"step": 620,
|
|
"valid_targets_mean": 4300.6,
|
|
"valid_targets_min": 3563
|
|
},
|
|
{
|
|
"epoch": 1.0016038492381716,
|
|
"grad_norm": 0.5370817797783234,
|
|
"learning_rate": 3.9777070056154124e-05,
|
|
"loss": 0.3028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1492697149515152,
|
|
"step": 625,
|
|
"valid_targets_mean": 4748.9,
|
|
"valid_targets_min": 3909
|
|
},
|
|
{
|
|
"epoch": 1.0096230954290297,
|
|
"grad_norm": 0.4951524022911903,
|
|
"learning_rate": 3.976501300314264e-05,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12941189110279083,
|
|
"step": 630,
|
|
"valid_targets_mean": 4441.1,
|
|
"valid_targets_min": 2831
|
|
},
|
|
{
|
|
"epoch": 1.0176423416198876,
|
|
"grad_norm": 0.492539373450662,
|
|
"learning_rate": 3.9752640355073825e-05,
|
|
"loss": 0.2765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14492177963256836,
|
|
"step": 635,
|
|
"valid_targets_mean": 4296.6,
|
|
"valid_targets_min": 3206
|
|
},
|
|
{
|
|
"epoch": 1.0256615878107458,
|
|
"grad_norm": 0.5215625672382057,
|
|
"learning_rate": 3.9739952309506175e-05,
|
|
"loss": 0.2705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14619646966457367,
|
|
"step": 640,
|
|
"valid_targets_mean": 4576.0,
|
|
"valid_targets_min": 3238
|
|
},
|
|
{
|
|
"epoch": 1.033680834001604,
|
|
"grad_norm": 0.47893168570108885,
|
|
"learning_rate": 3.972694906903427e-05,
|
|
"loss": 0.2621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13103602826595306,
|
|
"step": 645,
|
|
"valid_targets_mean": 4565.1,
|
|
"valid_targets_min": 3917
|
|
},
|
|
{
|
|
"epoch": 1.0417000801924619,
|
|
"grad_norm": 0.5597038490100954,
|
|
"learning_rate": 3.971363084128552e-05,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13752716779708862,
|
|
"step": 650,
|
|
"valid_targets_mean": 4595.2,
|
|
"valid_targets_min": 3805
|
|
},
|
|
{
|
|
"epoch": 1.04971932638332,
|
|
"grad_norm": 0.5326770753660939,
|
|
"learning_rate": 3.969999783891685e-05,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12854057550430298,
|
|
"step": 655,
|
|
"valid_targets_mean": 4284.6,
|
|
"valid_targets_min": 3390
|
|
},
|
|
{
|
|
"epoch": 1.057738572574178,
|
|
"grad_norm": 0.49794332081324766,
|
|
"learning_rate": 3.96860502796113e-05,
|
|
"loss": 0.2757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1288987547159195,
|
|
"step": 660,
|
|
"valid_targets_mean": 4167.9,
|
|
"valid_targets_min": 1950
|
|
},
|
|
{
|
|
"epoch": 1.065757818765036,
|
|
"grad_norm": 0.5974645166542571,
|
|
"learning_rate": 3.967178838607456e-05,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13639718294143677,
|
|
"step": 665,
|
|
"valid_targets_mean": 4356.2,
|
|
"valid_targets_min": 3373
|
|
},
|
|
{
|
|
"epoch": 1.0737770649558942,
|
|
"grad_norm": 0.5099366817039852,
|
|
"learning_rate": 3.965721238603139e-05,
|
|
"loss": 0.2753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12481896579265594,
|
|
"step": 670,
|
|
"valid_targets_mean": 4289.0,
|
|
"valid_targets_min": 3379
|
|
},
|
|
{
|
|
"epoch": 1.0817963111467521,
|
|
"grad_norm": 0.47904506880373543,
|
|
"learning_rate": 3.964232251222203e-05,
|
|
"loss": 0.2675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12590855360031128,
|
|
"step": 675,
|
|
"valid_targets_mean": 4731.8,
|
|
"valid_targets_min": 3069
|
|
},
|
|
{
|
|
"epoch": 1.0898155573376103,
|
|
"grad_norm": 0.6917807522197288,
|
|
"learning_rate": 3.962711900239844e-05,
|
|
"loss": 0.2739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12959305942058563,
|
|
"step": 680,
|
|
"valid_targets_mean": 4266.8,
|
|
"valid_targets_min": 3526
|
|
},
|
|
{
|
|
"epoch": 1.0978348035284684,
|
|
"grad_norm": 0.484929433947134,
|
|
"learning_rate": 3.961160209932051e-05,
|
|
"loss": 0.2709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1439363956451416,
|
|
"step": 685,
|
|
"valid_targets_mean": 4446.9,
|
|
"valid_targets_min": 3659
|
|
},
|
|
{
|
|
"epoch": 1.1058540497193263,
|
|
"grad_norm": 0.5258094605744518,
|
|
"learning_rate": 3.95957720507522e-05,
|
|
"loss": 0.2614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1172778457403183,
|
|
"step": 690,
|
|
"valid_targets_mean": 4397.9,
|
|
"valid_targets_min": 3672
|
|
},
|
|
{
|
|
"epoch": 1.1138732959101845,
|
|
"grad_norm": 0.5411845135122854,
|
|
"learning_rate": 3.957962910945759e-05,
|
|
"loss": 0.2834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13531604409217834,
|
|
"step": 695,
|
|
"valid_targets_mean": 4707.4,
|
|
"valid_targets_min": 3616
|
|
},
|
|
{
|
|
"epoch": 1.1218925421010426,
|
|
"grad_norm": 0.4896743456345858,
|
|
"learning_rate": 3.9563173533196805e-05,
|
|
"loss": 0.2683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13159900903701782,
|
|
"step": 700,
|
|
"valid_targets_mean": 4461.2,
|
|
"valid_targets_min": 3940
|
|
},
|
|
{
|
|
"epoch": 1.1299117882919005,
|
|
"grad_norm": 0.5066441842471634,
|
|
"learning_rate": 3.954640558472195e-05,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12832167744636536,
|
|
"step": 705,
|
|
"valid_targets_mean": 4522.0,
|
|
"valid_targets_min": 3696
|
|
},
|
|
{
|
|
"epoch": 1.1379310344827587,
|
|
"grad_norm": 0.5241120075590404,
|
|
"learning_rate": 3.952932553177287e-05,
|
|
"loss": 0.2768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13883212208747864,
|
|
"step": 710,
|
|
"valid_targets_mean": 4549.4,
|
|
"valid_targets_min": 3967
|
|
},
|
|
{
|
|
"epoch": 1.1459502806736166,
|
|
"grad_norm": 0.5149265797788679,
|
|
"learning_rate": 3.95119336470729e-05,
|
|
"loss": 0.2754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14655649662017822,
|
|
"step": 715,
|
|
"valid_targets_mean": 4650.1,
|
|
"valid_targets_min": 3900
|
|
},
|
|
{
|
|
"epoch": 1.1539695268644747,
|
|
"grad_norm": 0.4980699259010925,
|
|
"learning_rate": 3.949423020832451e-05,
|
|
"loss": 0.2682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12673446536064148,
|
|
"step": 720,
|
|
"valid_targets_mean": 4231.0,
|
|
"valid_targets_min": 3255
|
|
},
|
|
{
|
|
"epoch": 1.1619887730553329,
|
|
"grad_norm": 0.534314312644579,
|
|
"learning_rate": 3.947621549820485e-05,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1396927833557129,
|
|
"step": 725,
|
|
"valid_targets_mean": 4484.2,
|
|
"valid_targets_min": 3898
|
|
},
|
|
{
|
|
"epoch": 1.1700080192461908,
|
|
"grad_norm": 0.48361221718429703,
|
|
"learning_rate": 3.945788980436129e-05,
|
|
"loss": 0.2765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1377546191215515,
|
|
"step": 730,
|
|
"valid_targets_mean": 4703.8,
|
|
"valid_targets_min": 3506
|
|
},
|
|
{
|
|
"epoch": 1.178027265437049,
|
|
"grad_norm": 0.5313701440679391,
|
|
"learning_rate": 3.943925341940673e-05,
|
|
"loss": 0.2789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14473578333854675,
|
|
"step": 735,
|
|
"valid_targets_mean": 4351.2,
|
|
"valid_targets_min": 3059
|
|
},
|
|
{
|
|
"epoch": 1.1860465116279069,
|
|
"grad_norm": 0.4691329955427814,
|
|
"learning_rate": 3.942030664091503e-05,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1365983784198761,
|
|
"step": 740,
|
|
"valid_targets_mean": 4625.6,
|
|
"valid_targets_min": 3818
|
|
},
|
|
{
|
|
"epoch": 1.194065757818765,
|
|
"grad_norm": 0.5167491268088202,
|
|
"learning_rate": 3.9401049771416214e-05,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12021355330944061,
|
|
"step": 745,
|
|
"valid_targets_mean": 4418.8,
|
|
"valid_targets_min": 3204
|
|
},
|
|
{
|
|
"epoch": 1.2020850040096231,
|
|
"grad_norm": 0.5041493447141461,
|
|
"learning_rate": 3.938148311839162e-05,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15571609139442444,
|
|
"step": 750,
|
|
"valid_targets_mean": 5170.9,
|
|
"valid_targets_min": 3759
|
|
},
|
|
{
|
|
"epoch": 1.210104250200481,
|
|
"grad_norm": 0.5765165202434457,
|
|
"learning_rate": 3.9361606994269014e-05,
|
|
"loss": 0.2748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1377735435962677,
|
|
"step": 755,
|
|
"valid_targets_mean": 4537.4,
|
|
"valid_targets_min": 4111
|
|
},
|
|
{
|
|
"epoch": 1.2181234963913392,
|
|
"grad_norm": 0.49647201085598824,
|
|
"learning_rate": 3.934142171641763e-05,
|
|
"loss": 0.2721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1430518627166748,
|
|
"step": 760,
|
|
"valid_targets_mean": 4589.2,
|
|
"valid_targets_min": 3624
|
|
},
|
|
{
|
|
"epoch": 1.2261427425821974,
|
|
"grad_norm": 0.4595277012034728,
|
|
"learning_rate": 3.9320927607143003e-05,
|
|
"loss": 0.2709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11631577461957932,
|
|
"step": 765,
|
|
"valid_targets_mean": 4293.6,
|
|
"valid_targets_min": 4095
|
|
},
|
|
{
|
|
"epoch": 1.2341619887730553,
|
|
"grad_norm": 0.4735167129227716,
|
|
"learning_rate": 3.9300124993681976e-05,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12255986034870148,
|
|
"step": 770,
|
|
"valid_targets_mean": 4581.0,
|
|
"valid_targets_min": 3876
|
|
},
|
|
{
|
|
"epoch": 1.2421812349639134,
|
|
"grad_norm": 0.49375684376228146,
|
|
"learning_rate": 3.9279014208197317e-05,
|
|
"loss": 0.2687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1495576947927475,
|
|
"step": 775,
|
|
"valid_targets_mean": 4789.1,
|
|
"valid_targets_min": 3139
|
|
},
|
|
{
|
|
"epoch": 1.2502004811547716,
|
|
"grad_norm": 0.48900005968471216,
|
|
"learning_rate": 3.925759558777252e-05,
|
|
"loss": 0.2702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14832660555839539,
|
|
"step": 780,
|
|
"valid_targets_mean": 4618.6,
|
|
"valid_targets_min": 3268
|
|
},
|
|
{
|
|
"epoch": 1.2582197273456295,
|
|
"grad_norm": 0.541426982929122,
|
|
"learning_rate": 3.923586947440639e-05,
|
|
"loss": 0.2674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12705577909946442,
|
|
"step": 785,
|
|
"valid_targets_mean": 4160.4,
|
|
"valid_targets_min": 3352
|
|
},
|
|
{
|
|
"epoch": 1.2662389735364876,
|
|
"grad_norm": 0.46773079436726994,
|
|
"learning_rate": 3.921383621500758e-05,
|
|
"loss": 0.2842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1561151146888733,
|
|
"step": 790,
|
|
"valid_targets_mean": 5730.2,
|
|
"valid_targets_min": 4198
|
|
},
|
|
{
|
|
"epoch": 1.2742582197273458,
|
|
"grad_norm": 0.5448770092848273,
|
|
"learning_rate": 3.919149616138906e-05,
|
|
"loss": 0.26,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14182139933109283,
|
|
"step": 795,
|
|
"valid_targets_mean": 4507.5,
|
|
"valid_targets_min": 3145
|
|
},
|
|
{
|
|
"epoch": 1.2822774659182037,
|
|
"grad_norm": 0.5017648018365521,
|
|
"learning_rate": 3.916884967026246e-05,
|
|
"loss": 0.2748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15019017457962036,
|
|
"step": 800,
|
|
"valid_targets_mean": 4734.0,
|
|
"valid_targets_min": 2977
|
|
},
|
|
{
|
|
"epoch": 1.2902967121090618,
|
|
"grad_norm": 2.0277899438591334,
|
|
"learning_rate": 3.914589710323245e-05,
|
|
"loss": 0.2638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1354740560054779,
|
|
"step": 805,
|
|
"valid_targets_mean": 5037.5,
|
|
"valid_targets_min": 3966
|
|
},
|
|
{
|
|
"epoch": 1.2983159582999197,
|
|
"grad_norm": 0.4648018542722673,
|
|
"learning_rate": 3.912263882679091e-05,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12118496000766754,
|
|
"step": 810,
|
|
"valid_targets_mean": 4691.5,
|
|
"valid_targets_min": 4114
|
|
},
|
|
{
|
|
"epoch": 1.306335204490778,
|
|
"grad_norm": 0.47463261943249935,
|
|
"learning_rate": 3.9099075212311076e-05,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12332876771688461,
|
|
"step": 815,
|
|
"valid_targets_mean": 4730.0,
|
|
"valid_targets_min": 3668
|
|
},
|
|
{
|
|
"epoch": 1.3143544506816358,
|
|
"grad_norm": 0.5381308966934151,
|
|
"learning_rate": 3.9075206636041646e-05,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13695132732391357,
|
|
"step": 820,
|
|
"valid_targets_mean": 4682.0,
|
|
"valid_targets_min": 3291
|
|
},
|
|
{
|
|
"epoch": 1.322373696872494,
|
|
"grad_norm": 0.4748043851995162,
|
|
"learning_rate": 3.905103347910075e-05,
|
|
"loss": 0.2658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11875195056200027,
|
|
"step": 825,
|
|
"valid_targets_mean": 4253.2,
|
|
"valid_targets_min": 3876
|
|
},
|
|
{
|
|
"epoch": 1.330392943063352,
|
|
"grad_norm": 0.49021175318079513,
|
|
"learning_rate": 3.902655612746985e-05,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13804768025875092,
|
|
"step": 830,
|
|
"valid_targets_mean": 4718.9,
|
|
"valid_targets_min": 3995
|
|
},
|
|
{
|
|
"epoch": 1.33841218925421,
|
|
"grad_norm": 0.46201715262181653,
|
|
"learning_rate": 3.900177497198761e-05,
|
|
"loss": 0.2732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14349615573883057,
|
|
"step": 835,
|
|
"valid_targets_mean": 5238.6,
|
|
"valid_targets_min": 3868
|
|
},
|
|
{
|
|
"epoch": 1.3464314354450682,
|
|
"grad_norm": 0.5068048322732528,
|
|
"learning_rate": 3.8976690408343635e-05,
|
|
"loss": 0.2616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12301329523324966,
|
|
"step": 840,
|
|
"valid_targets_mean": 4248.9,
|
|
"valid_targets_min": 2869
|
|
},
|
|
{
|
|
"epoch": 1.3544506816359263,
|
|
"grad_norm": 0.4949879429124787,
|
|
"learning_rate": 3.8951302837072165e-05,
|
|
"loss": 0.2729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12412386387586594,
|
|
"step": 845,
|
|
"valid_targets_mean": 4591.1,
|
|
"valid_targets_min": 3842
|
|
},
|
|
{
|
|
"epoch": 1.3624699278267842,
|
|
"grad_norm": 0.48131740681596347,
|
|
"learning_rate": 3.892561266354566e-05,
|
|
"loss": 0.2688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1296183168888092,
|
|
"step": 850,
|
|
"valid_targets_mean": 4425.5,
|
|
"valid_targets_min": 3710
|
|
},
|
|
{
|
|
"epoch": 1.3704891740176424,
|
|
"grad_norm": 0.46802045495896655,
|
|
"learning_rate": 3.889962029796833e-05,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13692401349544525,
|
|
"step": 855,
|
|
"valid_targets_mean": 4375.9,
|
|
"valid_targets_min": 3781
|
|
},
|
|
{
|
|
"epoch": 1.3785084202085005,
|
|
"grad_norm": 0.4646462990077193,
|
|
"learning_rate": 3.887332615536962e-05,
|
|
"loss": 0.263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1406814306974411,
|
|
"step": 860,
|
|
"valid_targets_mean": 4461.2,
|
|
"valid_targets_min": 3221
|
|
},
|
|
{
|
|
"epoch": 1.3865276663993584,
|
|
"grad_norm": 0.5493045246503131,
|
|
"learning_rate": 3.8846730655597535e-05,
|
|
"loss": 0.2816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13343387842178345,
|
|
"step": 865,
|
|
"valid_targets_mean": 4627.2,
|
|
"valid_targets_min": 3998
|
|
},
|
|
{
|
|
"epoch": 1.3945469125902166,
|
|
"grad_norm": 0.5564518802404607,
|
|
"learning_rate": 3.881983422331198e-05,
|
|
"loss": 0.2768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14045974612236023,
|
|
"step": 870,
|
|
"valid_targets_mean": 4727.4,
|
|
"valid_targets_min": 4177
|
|
},
|
|
{
|
|
"epoch": 1.4025661587810747,
|
|
"grad_norm": 0.4671663150399495,
|
|
"learning_rate": 3.879263728797792e-05,
|
|
"loss": 0.2639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14378923177719116,
|
|
"step": 875,
|
|
"valid_targets_mean": 4674.1,
|
|
"valid_targets_min": 2553
|
|
},
|
|
{
|
|
"epoch": 1.4105854049719326,
|
|
"grad_norm": 0.46058373589240403,
|
|
"learning_rate": 3.876514028385861e-05,
|
|
"loss": 0.2593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13059532642364502,
|
|
"step": 880,
|
|
"valid_targets_mean": 4665.2,
|
|
"valid_targets_min": 4206
|
|
},
|
|
{
|
|
"epoch": 1.4186046511627908,
|
|
"grad_norm": 0.47799115418234694,
|
|
"learning_rate": 3.873734365000857e-05,
|
|
"loss": 0.2644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12087743729352951,
|
|
"step": 885,
|
|
"valid_targets_mean": 4338.4,
|
|
"valid_targets_min": 3539
|
|
},
|
|
{
|
|
"epoch": 1.4266238973536487,
|
|
"grad_norm": 0.8750203657723895,
|
|
"learning_rate": 3.870924783026663e-05,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1534992754459381,
|
|
"step": 890,
|
|
"valid_targets_mean": 4708.1,
|
|
"valid_targets_min": 3803
|
|
},
|
|
{
|
|
"epoch": 1.4346431435445068,
|
|
"grad_norm": 0.4726166010036256,
|
|
"learning_rate": 3.8680853273248826e-05,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1279052495956421,
|
|
"step": 895,
|
|
"valid_targets_mean": 4367.1,
|
|
"valid_targets_min": 3914
|
|
},
|
|
{
|
|
"epoch": 1.4426623897353648,
|
|
"grad_norm": 0.45729425880182195,
|
|
"learning_rate": 3.865216043234126e-05,
|
|
"loss": 0.2712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14801020920276642,
|
|
"step": 900,
|
|
"valid_targets_mean": 4898.8,
|
|
"valid_targets_min": 4237
|
|
},
|
|
{
|
|
"epoch": 1.450681635926223,
|
|
"grad_norm": 0.5405513064955468,
|
|
"learning_rate": 3.862316976569281e-05,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1536647081375122,
|
|
"step": 905,
|
|
"valid_targets_mean": 4715.8,
|
|
"valid_targets_min": 4425
|
|
},
|
|
{
|
|
"epoch": 1.458700882117081,
|
|
"grad_norm": 0.4640741603586292,
|
|
"learning_rate": 3.859388173620785e-05,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13359621167182922,
|
|
"step": 910,
|
|
"valid_targets_mean": 4597.6,
|
|
"valid_targets_min": 3551
|
|
},
|
|
{
|
|
"epoch": 1.466720128307939,
|
|
"grad_norm": 0.5007259606245539,
|
|
"learning_rate": 3.8564296811538874e-05,
|
|
"loss": 0.2665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13288173079490662,
|
|
"step": 915,
|
|
"valid_targets_mean": 4851.5,
|
|
"valid_targets_min": 3969
|
|
},
|
|
{
|
|
"epoch": 1.474739374498797,
|
|
"grad_norm": 0.4677432163396161,
|
|
"learning_rate": 3.853441546407898e-05,
|
|
"loss": 0.2599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12283094227313995,
|
|
"step": 920,
|
|
"valid_targets_mean": 4164.2,
|
|
"valid_targets_min": 2201
|
|
},
|
|
{
|
|
"epoch": 1.4827586206896552,
|
|
"grad_norm": 0.47509319661095817,
|
|
"learning_rate": 3.850423817095438e-05,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12043136358261108,
|
|
"step": 925,
|
|
"valid_targets_mean": 4229.5,
|
|
"valid_targets_min": 3516
|
|
},
|
|
{
|
|
"epoch": 1.4907778668805132,
|
|
"grad_norm": 0.48595343267766494,
|
|
"learning_rate": 3.847376541401674e-05,
|
|
"loss": 0.2584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13850140571594238,
|
|
"step": 930,
|
|
"valid_targets_mean": 4813.9,
|
|
"valid_targets_min": 3819
|
|
},
|
|
{
|
|
"epoch": 1.4987971130713713,
|
|
"grad_norm": 0.4580970029172859,
|
|
"learning_rate": 3.844299767983551e-05,
|
|
"loss": 0.267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12935158610343933,
|
|
"step": 935,
|
|
"valid_targets_mean": 4705.8,
|
|
"valid_targets_min": 3855
|
|
},
|
|
{
|
|
"epoch": 1.5068163592622295,
|
|
"grad_norm": 0.48282484351231114,
|
|
"learning_rate": 3.841193545969015e-05,
|
|
"loss": 0.2665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11401095986366272,
|
|
"step": 940,
|
|
"valid_targets_mean": 4433.9,
|
|
"valid_targets_min": 3803
|
|
},
|
|
{
|
|
"epoch": 1.5148356054530874,
|
|
"grad_norm": 0.46378074123826,
|
|
"learning_rate": 3.8380579249562265e-05,
|
|
"loss": 0.2625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1444937288761139,
|
|
"step": 945,
|
|
"valid_targets_mean": 4821.4,
|
|
"valid_targets_min": 3975
|
|
},
|
|
{
|
|
"epoch": 1.5228548516439455,
|
|
"grad_norm": 0.4359552633065669,
|
|
"learning_rate": 3.8348929550127734e-05,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12056364119052887,
|
|
"step": 950,
|
|
"valid_targets_mean": 4766.0,
|
|
"valid_targets_min": 3674
|
|
},
|
|
{
|
|
"epoch": 1.5308740978348037,
|
|
"grad_norm": 0.4607239911433649,
|
|
"learning_rate": 3.831698686674866e-05,
|
|
"loss": 0.2686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12632989883422852,
|
|
"step": 955,
|
|
"valid_targets_mean": 4359.5,
|
|
"valid_targets_min": 3225
|
|
},
|
|
{
|
|
"epoch": 1.5388933440256616,
|
|
"grad_norm": 0.44379097303144055,
|
|
"learning_rate": 3.828475170946534e-05,
|
|
"loss": 0.2668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12961451709270477,
|
|
"step": 960,
|
|
"valid_targets_mean": 4562.8,
|
|
"valid_targets_min": 3419
|
|
},
|
|
{
|
|
"epoch": 1.5469125902165195,
|
|
"grad_norm": 0.6902357178811657,
|
|
"learning_rate": 3.8252224592988087e-05,
|
|
"loss": 0.26,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12546996772289276,
|
|
"step": 965,
|
|
"valid_targets_mean": 4229.5,
|
|
"valid_targets_min": 3275
|
|
},
|
|
{
|
|
"epoch": 1.5549318364073779,
|
|
"grad_norm": 0.47499637134011247,
|
|
"learning_rate": 3.821940603668906e-05,
|
|
"loss": 0.2638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14275625348091125,
|
|
"step": 970,
|
|
"valid_targets_mean": 4718.2,
|
|
"valid_targets_min": 3919
|
|
},
|
|
{
|
|
"epoch": 1.5629510825982358,
|
|
"grad_norm": 0.47039215861834127,
|
|
"learning_rate": 3.8186296564593924e-05,
|
|
"loss": 0.2807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13292115926742554,
|
|
"step": 975,
|
|
"valid_targets_mean": 4497.9,
|
|
"valid_targets_min": 3700
|
|
},
|
|
{
|
|
"epoch": 1.5709703287890937,
|
|
"grad_norm": 0.4179971662603633,
|
|
"learning_rate": 3.815289670537351e-05,
|
|
"loss": 0.2689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11743569374084473,
|
|
"step": 980,
|
|
"valid_targets_mean": 4450.8,
|
|
"valid_targets_min": 3949
|
|
},
|
|
{
|
|
"epoch": 1.5789895749799518,
|
|
"grad_norm": 0.5685501797589411,
|
|
"learning_rate": 3.811920699233535e-05,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13927525281906128,
|
|
"step": 985,
|
|
"valid_targets_mean": 4626.5,
|
|
"valid_targets_min": 3001
|
|
},
|
|
{
|
|
"epoch": 1.58700882117081,
|
|
"grad_norm": 0.4615433121693005,
|
|
"learning_rate": 3.8085227963415186e-05,
|
|
"loss": 0.2595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13052240014076233,
|
|
"step": 990,
|
|
"valid_targets_mean": 4752.9,
|
|
"valid_targets_min": 4222
|
|
},
|
|
{
|
|
"epoch": 1.595028067361668,
|
|
"grad_norm": 0.4991252127104581,
|
|
"learning_rate": 3.805096016116838e-05,
|
|
"loss": 0.2685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14128005504608154,
|
|
"step": 995,
|
|
"valid_targets_mean": 4485.0,
|
|
"valid_targets_min": 3134
|
|
},
|
|
{
|
|
"epoch": 1.603047313552526,
|
|
"grad_norm": 0.4774603897333772,
|
|
"learning_rate": 3.801640413276121e-05,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1333894431591034,
|
|
"step": 1000,
|
|
"valid_targets_mean": 4698.8,
|
|
"valid_targets_min": 3807
|
|
},
|
|
{
|
|
"epoch": 1.6110665597433842,
|
|
"grad_norm": 0.4378538451777227,
|
|
"learning_rate": 3.7981560429962204e-05,
|
|
"loss": 0.2629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13325348496437073,
|
|
"step": 1005,
|
|
"valid_targets_mean": 4704.4,
|
|
"valid_targets_min": 4120
|
|
},
|
|
{
|
|
"epoch": 1.6190858059342421,
|
|
"grad_norm": 0.4162063871643,
|
|
"learning_rate": 3.7946429609133274e-05,
|
|
"loss": 0.2682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11331897228956223,
|
|
"step": 1010,
|
|
"valid_targets_mean": 4516.1,
|
|
"valid_targets_min": 3555
|
|
},
|
|
{
|
|
"epoch": 1.6271050521251003,
|
|
"grad_norm": 0.45937672459981327,
|
|
"learning_rate": 3.791101223122084e-05,
|
|
"loss": 0.2637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13020652532577515,
|
|
"step": 1015,
|
|
"valid_targets_mean": 4498.6,
|
|
"valid_targets_min": 3447
|
|
},
|
|
{
|
|
"epoch": 1.6351242983159584,
|
|
"grad_norm": 0.4576607903308816,
|
|
"learning_rate": 3.787530886174688e-05,
|
|
"loss": 0.2613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12539979815483093,
|
|
"step": 1020,
|
|
"valid_targets_mean": 4618.9,
|
|
"valid_targets_min": 3217
|
|
},
|
|
{
|
|
"epoch": 1.6431435445068163,
|
|
"grad_norm": 0.446728406621397,
|
|
"learning_rate": 3.783932007079992e-05,
|
|
"loss": 0.2595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11956267803907394,
|
|
"step": 1025,
|
|
"valid_targets_mean": 4341.5,
|
|
"valid_targets_min": 3684
|
|
},
|
|
{
|
|
"epoch": 1.6511627906976745,
|
|
"grad_norm": 0.42656299619687466,
|
|
"learning_rate": 3.7803046433025905e-05,
|
|
"loss": 0.2631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12953732907772064,
|
|
"step": 1030,
|
|
"valid_targets_mean": 4779.0,
|
|
"valid_targets_min": 3443
|
|
},
|
|
{
|
|
"epoch": 1.6591820368885326,
|
|
"grad_norm": 0.43864078545246554,
|
|
"learning_rate": 3.7766488527619024e-05,
|
|
"loss": 0.2666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12472393363714218,
|
|
"step": 1035,
|
|
"valid_targets_mean": 4507.0,
|
|
"valid_targets_min": 4178
|
|
},
|
|
{
|
|
"epoch": 1.6672012830793905,
|
|
"grad_norm": 0.4598120833086436,
|
|
"learning_rate": 3.772964693831247e-05,
|
|
"loss": 0.263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1440608650445938,
|
|
"step": 1040,
|
|
"valid_targets_mean": 4902.9,
|
|
"valid_targets_min": 3786
|
|
},
|
|
{
|
|
"epoch": 1.6752205292702484,
|
|
"grad_norm": 0.4771531565947753,
|
|
"learning_rate": 3.7692522253369136e-05,
|
|
"loss": 0.2632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13366088271141052,
|
|
"step": 1045,
|
|
"valid_targets_mean": 4538.4,
|
|
"valid_targets_min": 3486
|
|
},
|
|
{
|
|
"epoch": 1.6832397754611068,
|
|
"grad_norm": 0.47836961512349896,
|
|
"learning_rate": 3.7655115065572194e-05,
|
|
"loss": 0.2621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13370642066001892,
|
|
"step": 1050,
|
|
"valid_targets_mean": 4222.4,
|
|
"valid_targets_min": 3199
|
|
},
|
|
{
|
|
"epoch": 1.6912590216519647,
|
|
"grad_norm": 0.46022069301573393,
|
|
"learning_rate": 3.7617425972215626e-05,
|
|
"loss": 0.2585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11553645879030228,
|
|
"step": 1055,
|
|
"valid_targets_mean": 4272.8,
|
|
"valid_targets_min": 3471
|
|
},
|
|
{
|
|
"epoch": 1.6992782678428227,
|
|
"grad_norm": 0.46443723650600205,
|
|
"learning_rate": 3.757945557509472e-05,
|
|
"loss": 0.2589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1413208693265915,
|
|
"step": 1060,
|
|
"valid_targets_mean": 4829.1,
|
|
"valid_targets_min": 3801
|
|
},
|
|
{
|
|
"epoch": 1.7072975140336808,
|
|
"grad_norm": 0.486193684611564,
|
|
"learning_rate": 3.7541204480496444e-05,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11970577389001846,
|
|
"step": 1065,
|
|
"valid_targets_mean": 4180.6,
|
|
"valid_targets_min": 3123
|
|
},
|
|
{
|
|
"epoch": 1.715316760224539,
|
|
"grad_norm": 0.49524608577763146,
|
|
"learning_rate": 3.7502673299189745e-05,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15878495573997498,
|
|
"step": 1070,
|
|
"valid_targets_mean": 4968.6,
|
|
"valid_targets_min": 4572
|
|
},
|
|
{
|
|
"epoch": 1.7233360064153969,
|
|
"grad_norm": 0.4742814203633953,
|
|
"learning_rate": 3.746386264641583e-05,
|
|
"loss": 0.2692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1343926638364792,
|
|
"step": 1075,
|
|
"valid_targets_mean": 4714.2,
|
|
"valid_targets_min": 4334
|
|
},
|
|
{
|
|
"epoch": 1.731355252606255,
|
|
"grad_norm": 0.4351258318983007,
|
|
"learning_rate": 3.7424773141878324e-05,
|
|
"loss": 0.2574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14155763387680054,
|
|
"step": 1080,
|
|
"valid_targets_mean": 4613.2,
|
|
"valid_targets_min": 4135
|
|
},
|
|
{
|
|
"epoch": 1.7393744987971131,
|
|
"grad_norm": 0.4549391371883608,
|
|
"learning_rate": 3.738540540973338e-05,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1515810489654541,
|
|
"step": 1085,
|
|
"valid_targets_mean": 5037.2,
|
|
"valid_targets_min": 3668
|
|
},
|
|
{
|
|
"epoch": 1.747393744987971,
|
|
"grad_norm": 0.4565454333090086,
|
|
"learning_rate": 3.7345760078579695e-05,
|
|
"loss": 0.272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12990587949752808,
|
|
"step": 1090,
|
|
"valid_targets_mean": 4393.5,
|
|
"valid_targets_min": 3553
|
|
},
|
|
{
|
|
"epoch": 1.7554129911788292,
|
|
"grad_norm": 0.446088203218931,
|
|
"learning_rate": 3.730583778144852e-05,
|
|
"loss": 0.2702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1316516250371933,
|
|
"step": 1095,
|
|
"valid_targets_mean": 4772.4,
|
|
"valid_targets_min": 4165
|
|
},
|
|
{
|
|
"epoch": 1.7634322373696873,
|
|
"grad_norm": 0.4549966817760552,
|
|
"learning_rate": 3.7265639155793494e-05,
|
|
"loss": 0.2651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1281229853630066,
|
|
"step": 1100,
|
|
"valid_targets_mean": 4386.9,
|
|
"valid_targets_min": 3857
|
|
},
|
|
{
|
|
"epoch": 1.7714514835605453,
|
|
"grad_norm": 0.4652211672502556,
|
|
"learning_rate": 3.7225164843480503e-05,
|
|
"loss": 0.2662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1119842380285263,
|
|
"step": 1105,
|
|
"valid_targets_mean": 4174.8,
|
|
"valid_targets_min": 3523
|
|
},
|
|
{
|
|
"epoch": 1.7794707297514034,
|
|
"grad_norm": 0.4747681741860802,
|
|
"learning_rate": 3.7184415490777426e-05,
|
|
"loss": 0.2751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13028305768966675,
|
|
"step": 1110,
|
|
"valid_targets_mean": 4692.9,
|
|
"valid_targets_min": 4209
|
|
},
|
|
{
|
|
"epoch": 1.7874899759422616,
|
|
"grad_norm": 0.4484285524692438,
|
|
"learning_rate": 3.714339174834379e-05,
|
|
"loss": 0.2568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11924941837787628,
|
|
"step": 1115,
|
|
"valid_targets_mean": 4381.9,
|
|
"valid_targets_min": 3122
|
|
},
|
|
{
|
|
"epoch": 1.7955092221331195,
|
|
"grad_norm": 0.46947300047989654,
|
|
"learning_rate": 3.710209427122044e-05,
|
|
"loss": 0.2603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1563485562801361,
|
|
"step": 1120,
|
|
"valid_targets_mean": 5019.5,
|
|
"valid_targets_min": 3274
|
|
},
|
|
{
|
|
"epoch": 1.8035284683239774,
|
|
"grad_norm": 0.469193917638861,
|
|
"learning_rate": 3.7060523718819e-05,
|
|
"loss": 0.267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13799817860126495,
|
|
"step": 1125,
|
|
"valid_targets_mean": 4704.5,
|
|
"valid_targets_min": 4103
|
|
},
|
|
{
|
|
"epoch": 1.8115477145148358,
|
|
"grad_norm": 0.9288863576025495,
|
|
"learning_rate": 3.701868075491139e-05,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1120939701795578,
|
|
"step": 1130,
|
|
"valid_targets_mean": 4424.5,
|
|
"valid_targets_min": 4099
|
|
},
|
|
{
|
|
"epoch": 1.8195669607056937,
|
|
"grad_norm": 0.45006286476906426,
|
|
"learning_rate": 3.697656604761926e-05,
|
|
"loss": 0.2625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1323501467704773,
|
|
"step": 1135,
|
|
"valid_targets_mean": 4450.8,
|
|
"valid_targets_min": 2918
|
|
},
|
|
{
|
|
"epoch": 1.8275862068965516,
|
|
"grad_norm": 0.4508015955371608,
|
|
"learning_rate": 3.693418026940325e-05,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13123148679733276,
|
|
"step": 1140,
|
|
"valid_targets_mean": 4556.8,
|
|
"valid_targets_min": 3683
|
|
},
|
|
{
|
|
"epoch": 1.8356054530874097,
|
|
"grad_norm": 0.45478237745416666,
|
|
"learning_rate": 3.689152409705229e-05,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11863987147808075,
|
|
"step": 1145,
|
|
"valid_targets_mean": 4358.1,
|
|
"valid_targets_min": 3668
|
|
},
|
|
{
|
|
"epoch": 1.8436246992782679,
|
|
"grad_norm": 0.4454535399784579,
|
|
"learning_rate": 3.6848598211672794e-05,
|
|
"loss": 0.2582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14110371470451355,
|
|
"step": 1150,
|
|
"valid_targets_mean": 4584.1,
|
|
"valid_targets_min": 2919
|
|
},
|
|
{
|
|
"epoch": 1.8516439454691258,
|
|
"grad_norm": 0.4466718734353,
|
|
"learning_rate": 3.6805403298677797e-05,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1321553885936737,
|
|
"step": 1155,
|
|
"valid_targets_mean": 4380.5,
|
|
"valid_targets_min": 3488
|
|
},
|
|
{
|
|
"epoch": 1.859663191659984,
|
|
"grad_norm": 0.43222510337084774,
|
|
"learning_rate": 3.6761940047775966e-05,
|
|
"loss": 0.2709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11924001574516296,
|
|
"step": 1160,
|
|
"valid_targets_mean": 4507.2,
|
|
"valid_targets_min": 3953
|
|
},
|
|
{
|
|
"epoch": 1.867682437850842,
|
|
"grad_norm": 0.45393693253709644,
|
|
"learning_rate": 3.671820915296063e-05,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11272458732128143,
|
|
"step": 1165,
|
|
"valid_targets_mean": 4438.6,
|
|
"valid_targets_min": 3791
|
|
},
|
|
{
|
|
"epoch": 1.8757016840417,
|
|
"grad_norm": 0.4629405900829253,
|
|
"learning_rate": 3.667421131249869e-05,
|
|
"loss": 0.2637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13634073734283447,
|
|
"step": 1170,
|
|
"valid_targets_mean": 4376.9,
|
|
"valid_targets_min": 3540
|
|
},
|
|
{
|
|
"epoch": 1.8837209302325582,
|
|
"grad_norm": 0.4640974234390392,
|
|
"learning_rate": 3.662994722891946e-05,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12292435765266418,
|
|
"step": 1175,
|
|
"valid_targets_mean": 4375.5,
|
|
"valid_targets_min": 3498
|
|
},
|
|
{
|
|
"epoch": 1.8917401764234163,
|
|
"grad_norm": 0.4568366204417289,
|
|
"learning_rate": 3.658541760900344e-05,
|
|
"loss": 0.2716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13310210406780243,
|
|
"step": 1180,
|
|
"valid_targets_mean": 5022.5,
|
|
"valid_targets_min": 3608
|
|
},
|
|
{
|
|
"epoch": 1.8997594226142742,
|
|
"grad_norm": 0.45138961371281894,
|
|
"learning_rate": 3.654062316377106e-05,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13871417939662933,
|
|
"step": 1185,
|
|
"valid_targets_mean": 4629.0,
|
|
"valid_targets_min": 3767
|
|
},
|
|
{
|
|
"epoch": 1.9077786688051324,
|
|
"grad_norm": 0.42626473423666417,
|
|
"learning_rate": 3.649556460847131e-05,
|
|
"loss": 0.2656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15712636709213257,
|
|
"step": 1190,
|
|
"valid_targets_mean": 5115.9,
|
|
"valid_targets_min": 4649
|
|
},
|
|
{
|
|
"epoch": 1.9157979149959905,
|
|
"grad_norm": 0.4385619719234738,
|
|
"learning_rate": 3.6450242662570314e-05,
|
|
"loss": 0.2653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14854022860527039,
|
|
"step": 1195,
|
|
"valid_targets_mean": 4824.0,
|
|
"valid_targets_min": 2832
|
|
},
|
|
{
|
|
"epoch": 1.9238171611868484,
|
|
"grad_norm": 0.46947928753787654,
|
|
"learning_rate": 3.6404658049739854e-05,
|
|
"loss": 0.2601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14080548286437988,
|
|
"step": 1200,
|
|
"valid_targets_mean": 4682.6,
|
|
"valid_targets_min": 3675
|
|
},
|
|
{
|
|
"epoch": 1.9318364073777063,
|
|
"grad_norm": 0.49021372515787504,
|
|
"learning_rate": 3.63588114978458e-05,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.128452330827713,
|
|
"step": 1205,
|
|
"valid_targets_mean": 4667.0,
|
|
"valid_targets_min": 3823
|
|
},
|
|
{
|
|
"epoch": 1.9398556535685647,
|
|
"grad_norm": 0.4496112101343852,
|
|
"learning_rate": 3.6312703738936504e-05,
|
|
"loss": 0.2661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13371461629867554,
|
|
"step": 1210,
|
|
"valid_targets_mean": 4747.9,
|
|
"valid_targets_min": 4288
|
|
},
|
|
{
|
|
"epoch": 1.9478748997594226,
|
|
"grad_norm": 0.46553657039396606,
|
|
"learning_rate": 3.626633550923111e-05,
|
|
"loss": 0.2578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1263556033372879,
|
|
"step": 1215,
|
|
"valid_targets_mean": 4350.1,
|
|
"valid_targets_min": 3739
|
|
},
|
|
{
|
|
"epoch": 1.9558941459502805,
|
|
"grad_norm": 0.45435097799939045,
|
|
"learning_rate": 3.621970754910778e-05,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14147964119911194,
|
|
"step": 1220,
|
|
"valid_targets_mean": 4779.9,
|
|
"valid_targets_min": 3380
|
|
},
|
|
{
|
|
"epoch": 1.9639133921411387,
|
|
"grad_norm": 0.4822687569139011,
|
|
"learning_rate": 3.6172820603091885e-05,
|
|
"loss": 0.262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1444627344608307,
|
|
"step": 1225,
|
|
"valid_targets_mean": 4867.9,
|
|
"valid_targets_min": 3944
|
|
},
|
|
{
|
|
"epoch": 1.9719326383319968,
|
|
"grad_norm": 0.4801168672123505,
|
|
"learning_rate": 3.612567541984413e-05,
|
|
"loss": 0.2616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1330825388431549,
|
|
"step": 1230,
|
|
"valid_targets_mean": 4416.5,
|
|
"valid_targets_min": 3365
|
|
},
|
|
{
|
|
"epoch": 1.9799518845228548,
|
|
"grad_norm": 0.5112451146896348,
|
|
"learning_rate": 3.6078272752148574e-05,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13198651373386383,
|
|
"step": 1235,
|
|
"valid_targets_mean": 4533.5,
|
|
"valid_targets_min": 3403
|
|
},
|
|
{
|
|
"epoch": 1.987971130713713,
|
|
"grad_norm": 0.4643271743141443,
|
|
"learning_rate": 3.6030613356900635e-05,
|
|
"loss": 0.2587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11557991802692413,
|
|
"step": 1240,
|
|
"valid_targets_mean": 4204.1,
|
|
"valid_targets_min": 3265
|
|
},
|
|
{
|
|
"epoch": 1.995990376904571,
|
|
"grad_norm": 0.49542866407295805,
|
|
"learning_rate": 3.598269799509498e-05,
|
|
"loss": 0.2651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13507486879825592,
|
|
"step": 1245,
|
|
"valid_targets_mean": 4443.2,
|
|
"valid_targets_min": 3444
|
|
},
|
|
{
|
|
"epoch": 2.003207698476343,
|
|
"grad_norm": 0.4484189572336587,
|
|
"learning_rate": 3.5934527431813385e-05,
|
|
"loss": 0.2533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1106710135936737,
|
|
"step": 1250,
|
|
"valid_targets_mean": 4103.6,
|
|
"valid_targets_min": 3007
|
|
},
|
|
{
|
|
"epoch": 2.011226944667201,
|
|
"grad_norm": 0.44787999524454697,
|
|
"learning_rate": 3.5886102436212536e-05,
|
|
"loss": 0.2557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.127839133143425,
|
|
"step": 1255,
|
|
"valid_targets_mean": 4630.5,
|
|
"valid_targets_min": 4094
|
|
},
|
|
{
|
|
"epoch": 2.0192461908580595,
|
|
"grad_norm": 0.47756011449925073,
|
|
"learning_rate": 3.583742378151171e-05,
|
|
"loss": 0.2446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11221938580274582,
|
|
"step": 1260,
|
|
"valid_targets_mean": 4119.1,
|
|
"valid_targets_min": 2164
|
|
},
|
|
{
|
|
"epoch": 2.0272654370489174,
|
|
"grad_norm": 0.4348692969045766,
|
|
"learning_rate": 3.5788492244980464e-05,
|
|
"loss": 0.2622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11949443817138672,
|
|
"step": 1265,
|
|
"valid_targets_mean": 4687.4,
|
|
"valid_targets_min": 3987
|
|
},
|
|
{
|
|
"epoch": 2.0352846832397753,
|
|
"grad_norm": 0.4405892509823894,
|
|
"learning_rate": 3.573930860792621e-05,
|
|
"loss": 0.2614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1255490481853485,
|
|
"step": 1270,
|
|
"valid_targets_mean": 5069.9,
|
|
"valid_targets_min": 3974
|
|
},
|
|
{
|
|
"epoch": 2.0433039294306337,
|
|
"grad_norm": 0.4493625859412955,
|
|
"learning_rate": 3.568987365568173e-05,
|
|
"loss": 0.2453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12091197073459625,
|
|
"step": 1275,
|
|
"valid_targets_mean": 4486.6,
|
|
"valid_targets_min": 3734
|
|
},
|
|
{
|
|
"epoch": 2.0513231756214916,
|
|
"grad_norm": 0.43718579406573327,
|
|
"learning_rate": 3.564018817759266e-05,
|
|
"loss": 0.2468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1234881654381752,
|
|
"step": 1280,
|
|
"valid_targets_mean": 4410.6,
|
|
"valid_targets_min": 3158
|
|
},
|
|
{
|
|
"epoch": 2.0593424218123495,
|
|
"grad_norm": 0.4445618562378576,
|
|
"learning_rate": 3.559025296700484e-05,
|
|
"loss": 0.2395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11287961900234222,
|
|
"step": 1285,
|
|
"valid_targets_mean": 4642.0,
|
|
"valid_targets_min": 3865
|
|
},
|
|
{
|
|
"epoch": 2.067361668003208,
|
|
"grad_norm": 0.43303207902047886,
|
|
"learning_rate": 3.554006882125173e-05,
|
|
"loss": 0.2444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12652646005153656,
|
|
"step": 1290,
|
|
"valid_targets_mean": 4345.9,
|
|
"valid_targets_min": 3632
|
|
},
|
|
{
|
|
"epoch": 2.075380914194066,
|
|
"grad_norm": 0.4721676194442314,
|
|
"learning_rate": 3.5489636541641586e-05,
|
|
"loss": 0.2512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12730160355567932,
|
|
"step": 1295,
|
|
"valid_targets_mean": 4501.5,
|
|
"valid_targets_min": 3520
|
|
},
|
|
{
|
|
"epoch": 2.0834001603849237,
|
|
"grad_norm": 0.4688834894783075,
|
|
"learning_rate": 3.543895693344472e-05,
|
|
"loss": 0.2516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10443314164876938,
|
|
"step": 1300,
|
|
"valid_targets_mean": 4235.2,
|
|
"valid_targets_min": 3321
|
|
},
|
|
{
|
|
"epoch": 2.091419406575782,
|
|
"grad_norm": 0.4704712251662637,
|
|
"learning_rate": 3.538803080588063e-05,
|
|
"loss": 0.2512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12140490114688873,
|
|
"step": 1305,
|
|
"valid_targets_mean": 4464.2,
|
|
"valid_targets_min": 3939
|
|
},
|
|
{
|
|
"epoch": 2.09943865276664,
|
|
"grad_norm": 0.46929141858908846,
|
|
"learning_rate": 3.5336858972105076e-05,
|
|
"loss": 0.2449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12943628430366516,
|
|
"step": 1310,
|
|
"valid_targets_mean": 4879.8,
|
|
"valid_targets_min": 4100
|
|
},
|
|
{
|
|
"epoch": 2.107457898957498,
|
|
"grad_norm": 0.481437792417147,
|
|
"learning_rate": 3.528544224919708e-05,
|
|
"loss": 0.2513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12798956036567688,
|
|
"step": 1315,
|
|
"valid_targets_mean": 4566.2,
|
|
"valid_targets_min": 3819
|
|
},
|
|
{
|
|
"epoch": 2.115477145148356,
|
|
"grad_norm": 0.4768162936658602,
|
|
"learning_rate": 3.5233781458145934e-05,
|
|
"loss": 0.2427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12486916780471802,
|
|
"step": 1320,
|
|
"valid_targets_mean": 4832.2,
|
|
"valid_targets_min": 4092
|
|
},
|
|
{
|
|
"epoch": 2.123496391339214,
|
|
"grad_norm": 0.46304914344675097,
|
|
"learning_rate": 3.5181877423838034e-05,
|
|
"loss": 0.245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09448068588972092,
|
|
"step": 1325,
|
|
"valid_targets_mean": 4269.4,
|
|
"valid_targets_min": 3311
|
|
},
|
|
{
|
|
"epoch": 2.131515637530072,
|
|
"grad_norm": 0.47121329444745363,
|
|
"learning_rate": 3.512973097504371e-05,
|
|
"loss": 0.2568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13567031919956207,
|
|
"step": 1330,
|
|
"valid_targets_mean": 4822.1,
|
|
"valid_targets_min": 3773
|
|
},
|
|
{
|
|
"epoch": 2.13953488372093,
|
|
"grad_norm": 0.4692696841396045,
|
|
"learning_rate": 3.507734294440403e-05,
|
|
"loss": 0.2401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11287178099155426,
|
|
"step": 1335,
|
|
"valid_targets_mean": 4447.5,
|
|
"valid_targets_min": 3392
|
|
},
|
|
{
|
|
"epoch": 2.1475541299117884,
|
|
"grad_norm": 0.4620412005087607,
|
|
"learning_rate": 3.50247141684175e-05,
|
|
"loss": 0.2487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11445814371109009,
|
|
"step": 1340,
|
|
"valid_targets_mean": 4398.9,
|
|
"valid_targets_min": 3043
|
|
},
|
|
{
|
|
"epoch": 2.1555733761026463,
|
|
"grad_norm": 0.4618383572915151,
|
|
"learning_rate": 3.497184548742667e-05,
|
|
"loss": 0.257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1137077808380127,
|
|
"step": 1345,
|
|
"valid_targets_mean": 4430.0,
|
|
"valid_targets_min": 3444
|
|
},
|
|
{
|
|
"epoch": 2.1635926222935042,
|
|
"grad_norm": 0.4587949455249212,
|
|
"learning_rate": 3.491873774560473e-05,
|
|
"loss": 0.2459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1305333375930786,
|
|
"step": 1350,
|
|
"valid_targets_mean": 4532.9,
|
|
"valid_targets_min": 4037
|
|
},
|
|
{
|
|
"epoch": 2.1716118684843626,
|
|
"grad_norm": 0.4563886478468335,
|
|
"learning_rate": 3.486539179094208e-05,
|
|
"loss": 0.2511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12961995601654053,
|
|
"step": 1355,
|
|
"valid_targets_mean": 4218.5,
|
|
"valid_targets_min": 3730
|
|
},
|
|
{
|
|
"epoch": 2.1796311146752205,
|
|
"grad_norm": 0.5467808895330146,
|
|
"learning_rate": 3.481180847523272e-05,
|
|
"loss": 0.2433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13722699880599976,
|
|
"step": 1360,
|
|
"valid_targets_mean": 4646.6,
|
|
"valid_targets_min": 4072
|
|
},
|
|
{
|
|
"epoch": 2.1876503608660784,
|
|
"grad_norm": 0.4441046371318325,
|
|
"learning_rate": 3.4757988654060684e-05,
|
|
"loss": 0.2509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11470891535282135,
|
|
"step": 1365,
|
|
"valid_targets_mean": 4398.8,
|
|
"valid_targets_min": 3223
|
|
},
|
|
{
|
|
"epoch": 2.195669607056937,
|
|
"grad_norm": 0.4576759866028164,
|
|
"learning_rate": 3.470393318678637e-05,
|
|
"loss": 0.2492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12511396408081055,
|
|
"step": 1370,
|
|
"valid_targets_mean": 4476.6,
|
|
"valid_targets_min": 3867
|
|
},
|
|
{
|
|
"epoch": 2.2036888532477947,
|
|
"grad_norm": 0.4436449285696209,
|
|
"learning_rate": 3.4649642936532836e-05,
|
|
"loss": 0.2385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11693766713142395,
|
|
"step": 1375,
|
|
"valid_targets_mean": 4467.8,
|
|
"valid_targets_min": 3705
|
|
},
|
|
{
|
|
"epoch": 2.2117080994386527,
|
|
"grad_norm": 0.5243651640631248,
|
|
"learning_rate": 3.4595118770171984e-05,
|
|
"loss": 0.2563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14250776171684265,
|
|
"step": 1380,
|
|
"valid_targets_mean": 4897.5,
|
|
"valid_targets_min": 4105
|
|
},
|
|
{
|
|
"epoch": 2.219727345629511,
|
|
"grad_norm": 0.4545969619595745,
|
|
"learning_rate": 3.454036155831077e-05,
|
|
"loss": 0.2475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12261779606342316,
|
|
"step": 1385,
|
|
"valid_targets_mean": 4223.6,
|
|
"valid_targets_min": 3281
|
|
},
|
|
{
|
|
"epoch": 2.227746591820369,
|
|
"grad_norm": 0.42734949616466533,
|
|
"learning_rate": 3.4485372175277236e-05,
|
|
"loss": 0.2514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1379728615283966,
|
|
"step": 1390,
|
|
"valid_targets_mean": 4739.9,
|
|
"valid_targets_min": 4221
|
|
},
|
|
{
|
|
"epoch": 2.235765838011227,
|
|
"grad_norm": 0.4517904159419686,
|
|
"learning_rate": 3.44301514991066e-05,
|
|
"loss": 0.2451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12049839645624161,
|
|
"step": 1395,
|
|
"valid_targets_mean": 4253.9,
|
|
"valid_targets_min": 3904
|
|
},
|
|
{
|
|
"epoch": 2.2437850842020852,
|
|
"grad_norm": 0.4326991178094109,
|
|
"learning_rate": 3.4374700411527225e-05,
|
|
"loss": 0.2498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11669956147670746,
|
|
"step": 1400,
|
|
"valid_targets_mean": 4466.4,
|
|
"valid_targets_min": 3537
|
|
},
|
|
{
|
|
"epoch": 2.251804330392943,
|
|
"grad_norm": 0.5508982577619679,
|
|
"learning_rate": 3.431901979794653e-05,
|
|
"loss": 0.2529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11390509456396103,
|
|
"step": 1405,
|
|
"valid_targets_mean": 4403.6,
|
|
"valid_targets_min": 3586
|
|
},
|
|
{
|
|
"epoch": 2.259823576583801,
|
|
"grad_norm": 0.44755244221843216,
|
|
"learning_rate": 3.426311054743685e-05,
|
|
"loss": 0.2478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12196841835975647,
|
|
"step": 1410,
|
|
"valid_targets_mean": 4503.9,
|
|
"valid_targets_min": 3557
|
|
},
|
|
{
|
|
"epoch": 2.267842822774659,
|
|
"grad_norm": 0.4939142824253628,
|
|
"learning_rate": 3.420697355272127e-05,
|
|
"loss": 0.2445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12235177308320999,
|
|
"step": 1415,
|
|
"valid_targets_mean": 4407.1,
|
|
"valid_targets_min": 3277
|
|
},
|
|
{
|
|
"epoch": 2.2758620689655173,
|
|
"grad_norm": 0.4467510038984226,
|
|
"learning_rate": 3.415060971015933e-05,
|
|
"loss": 0.248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10986562073230743,
|
|
"step": 1420,
|
|
"valid_targets_mean": 4384.0,
|
|
"valid_targets_min": 3441
|
|
},
|
|
{
|
|
"epoch": 2.2838813151563753,
|
|
"grad_norm": 0.42765705189129516,
|
|
"learning_rate": 3.4094019919732736e-05,
|
|
"loss": 0.2497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12837448716163635,
|
|
"step": 1425,
|
|
"valid_targets_mean": 4651.8,
|
|
"valid_targets_min": 4185
|
|
},
|
|
{
|
|
"epoch": 2.291900561347233,
|
|
"grad_norm": 0.4432709320574013,
|
|
"learning_rate": 3.403720508503098e-05,
|
|
"loss": 0.251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11661627888679504,
|
|
"step": 1430,
|
|
"valid_targets_mean": 4347.5,
|
|
"valid_targets_min": 1068
|
|
},
|
|
{
|
|
"epoch": 2.2999198075380916,
|
|
"grad_norm": 0.43910076299608336,
|
|
"learning_rate": 3.398016611323693e-05,
|
|
"loss": 0.2493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12829893827438354,
|
|
"step": 1435,
|
|
"valid_targets_mean": 4374.9,
|
|
"valid_targets_min": 3475
|
|
},
|
|
{
|
|
"epoch": 2.3079390537289495,
|
|
"grad_norm": 0.4395024525809469,
|
|
"learning_rate": 3.392290391511232e-05,
|
|
"loss": 0.2442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11006221175193787,
|
|
"step": 1440,
|
|
"valid_targets_mean": 4275.0,
|
|
"valid_targets_min": 3606
|
|
},
|
|
{
|
|
"epoch": 2.3159582999198074,
|
|
"grad_norm": 0.43719998391015996,
|
|
"learning_rate": 3.386541940498322e-05,
|
|
"loss": 0.25,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12393558025360107,
|
|
"step": 1445,
|
|
"valid_targets_mean": 4449.9,
|
|
"valid_targets_min": 3694
|
|
},
|
|
{
|
|
"epoch": 2.3239775461106658,
|
|
"grad_norm": 0.43988257974903405,
|
|
"learning_rate": 3.380771350072543e-05,
|
|
"loss": 0.2487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14202933013439178,
|
|
"step": 1450,
|
|
"valid_targets_mean": 4681.8,
|
|
"valid_targets_min": 4108
|
|
},
|
|
{
|
|
"epoch": 2.3319967923015237,
|
|
"grad_norm": 0.45571379150451713,
|
|
"learning_rate": 3.374978712374986e-05,
|
|
"loss": 0.2496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13203249871730804,
|
|
"step": 1455,
|
|
"valid_targets_mean": 4501.0,
|
|
"valid_targets_min": 3837
|
|
},
|
|
{
|
|
"epoch": 2.3400160384923816,
|
|
"grad_norm": 0.46660351797335814,
|
|
"learning_rate": 3.369164119898774e-05,
|
|
"loss": 0.2484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10981815308332443,
|
|
"step": 1460,
|
|
"valid_targets_mean": 3995.9,
|
|
"valid_targets_min": 3333
|
|
},
|
|
{
|
|
"epoch": 2.34803528468324,
|
|
"grad_norm": 0.4607644314645957,
|
|
"learning_rate": 3.363327665487593e-05,
|
|
"loss": 0.2459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13661104440689087,
|
|
"step": 1465,
|
|
"valid_targets_mean": 4825.6,
|
|
"valid_targets_min": 4007
|
|
},
|
|
{
|
|
"epoch": 2.356054530874098,
|
|
"grad_norm": 0.4400511432076749,
|
|
"learning_rate": 3.357469442334206e-05,
|
|
"loss": 0.2524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12087226659059525,
|
|
"step": 1470,
|
|
"valid_targets_mean": 4582.6,
|
|
"valid_targets_min": 3814
|
|
},
|
|
{
|
|
"epoch": 2.364073777064956,
|
|
"grad_norm": 0.4640361310799892,
|
|
"learning_rate": 3.351589543978965e-05,
|
|
"loss": 0.2436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11705543100833893,
|
|
"step": 1475,
|
|
"valid_targets_mean": 4032.6,
|
|
"valid_targets_min": 3333
|
|
},
|
|
{
|
|
"epoch": 2.3720930232558137,
|
|
"grad_norm": 0.4142857236517898,
|
|
"learning_rate": 3.345688064308317e-05,
|
|
"loss": 0.2412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10477858781814575,
|
|
"step": 1480,
|
|
"valid_targets_mean": 4258.0,
|
|
"valid_targets_min": 2671
|
|
},
|
|
{
|
|
"epoch": 2.380112269446672,
|
|
"grad_norm": 0.4588795914425846,
|
|
"learning_rate": 3.339765097553307e-05,
|
|
"loss": 0.2551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10702922940254211,
|
|
"step": 1485,
|
|
"valid_targets_mean": 4295.6,
|
|
"valid_targets_min": 3648
|
|
},
|
|
{
|
|
"epoch": 2.38813151563753,
|
|
"grad_norm": 0.48158140310070846,
|
|
"learning_rate": 3.33382073828807e-05,
|
|
"loss": 0.2525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13355718553066254,
|
|
"step": 1490,
|
|
"valid_targets_mean": 4987.5,
|
|
"valid_targets_min": 3836
|
|
},
|
|
{
|
|
"epoch": 2.3961507618283884,
|
|
"grad_norm": 0.46654909989488824,
|
|
"learning_rate": 3.327855081428326e-05,
|
|
"loss": 0.2466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1285693645477295,
|
|
"step": 1495,
|
|
"valid_targets_mean": 4632.6,
|
|
"valid_targets_min": 3702
|
|
},
|
|
{
|
|
"epoch": 2.4041700080192463,
|
|
"grad_norm": 0.4344994555515022,
|
|
"learning_rate": 3.3218682222298584e-05,
|
|
"loss": 0.2531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12388145178556442,
|
|
"step": 1500,
|
|
"valid_targets_mean": 4366.9,
|
|
"valid_targets_min": 2998
|
|
},
|
|
{
|
|
"epoch": 2.412189254210104,
|
|
"grad_norm": 0.4326918190126839,
|
|
"learning_rate": 3.315860256286996e-05,
|
|
"loss": 0.2481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12570232152938843,
|
|
"step": 1505,
|
|
"valid_targets_mean": 4875.4,
|
|
"valid_targets_min": 3994
|
|
},
|
|
{
|
|
"epoch": 2.420208500400962,
|
|
"grad_norm": 0.4446219560161335,
|
|
"learning_rate": 3.3098312795310894e-05,
|
|
"loss": 0.2489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11991243064403534,
|
|
"step": 1510,
|
|
"valid_targets_mean": 4583.2,
|
|
"valid_targets_min": 3730
|
|
},
|
|
{
|
|
"epoch": 2.4282277465918205,
|
|
"grad_norm": 0.44028543615849797,
|
|
"learning_rate": 3.303781388228974e-05,
|
|
"loss": 0.2517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1338050216436386,
|
|
"step": 1515,
|
|
"valid_targets_mean": 4675.8,
|
|
"valid_targets_min": 4232
|
|
},
|
|
{
|
|
"epoch": 2.4362469927826784,
|
|
"grad_norm": 0.445999456362336,
|
|
"learning_rate": 3.297710678981435e-05,
|
|
"loss": 0.2495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11989299952983856,
|
|
"step": 1520,
|
|
"valid_targets_mean": 4547.4,
|
|
"valid_targets_min": 3874
|
|
},
|
|
{
|
|
"epoch": 2.4442662389735363,
|
|
"grad_norm": 0.48081662804378444,
|
|
"learning_rate": 3.291619248721667e-05,
|
|
"loss": 0.2543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11357638984918594,
|
|
"step": 1525,
|
|
"valid_targets_mean": 4308.8,
|
|
"valid_targets_min": 3423
|
|
},
|
|
{
|
|
"epoch": 2.4522854851643947,
|
|
"grad_norm": 0.4255923831302389,
|
|
"learning_rate": 3.285507194713724e-05,
|
|
"loss": 0.247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12975367903709412,
|
|
"step": 1530,
|
|
"valid_targets_mean": 4759.8,
|
|
"valid_targets_min": 4164
|
|
},
|
|
{
|
|
"epoch": 2.4603047313552526,
|
|
"grad_norm": 0.4682152850169925,
|
|
"learning_rate": 3.279374614550966e-05,
|
|
"loss": 0.2489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13911470770835876,
|
|
"step": 1535,
|
|
"valid_targets_mean": 4729.8,
|
|
"valid_targets_min": 2687
|
|
},
|
|
{
|
|
"epoch": 2.4683239775461105,
|
|
"grad_norm": 0.4569669569334634,
|
|
"learning_rate": 3.2732216061545e-05,
|
|
"loss": 0.2593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1284954845905304,
|
|
"step": 1540,
|
|
"valid_targets_mean": 4939.9,
|
|
"valid_targets_min": 3982
|
|
},
|
|
{
|
|
"epoch": 2.476343223736969,
|
|
"grad_norm": 0.4892606864147685,
|
|
"learning_rate": 3.2670482677716214e-05,
|
|
"loss": 0.2555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11731338500976562,
|
|
"step": 1545,
|
|
"valid_targets_mean": 4655.9,
|
|
"valid_targets_min": 3364
|
|
},
|
|
{
|
|
"epoch": 2.484362469927827,
|
|
"grad_norm": 0.4911229622843176,
|
|
"learning_rate": 3.2608546979742394e-05,
|
|
"loss": 0.2499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14442631602287292,
|
|
"step": 1550,
|
|
"valid_targets_mean": 4836.8,
|
|
"valid_targets_min": 3994
|
|
},
|
|
{
|
|
"epoch": 2.4923817161186848,
|
|
"grad_norm": 0.45863140906782923,
|
|
"learning_rate": 3.254640995657307e-05,
|
|
"loss": 0.2369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12766748666763306,
|
|
"step": 1555,
|
|
"valid_targets_mean": 4577.2,
|
|
"valid_targets_min": 3234
|
|
},
|
|
{
|
|
"epoch": 2.500400962309543,
|
|
"grad_norm": 0.4264937183036546,
|
|
"learning_rate": 3.248407260037239e-05,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11833836138248444,
|
|
"step": 1560,
|
|
"valid_targets_mean": 4770.0,
|
|
"valid_targets_min": 3274
|
|
},
|
|
{
|
|
"epoch": 2.508420208500401,
|
|
"grad_norm": 0.4383219043150754,
|
|
"learning_rate": 3.24215359065033e-05,
|
|
"loss": 0.2513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12234435975551605,
|
|
"step": 1565,
|
|
"valid_targets_mean": 4639.1,
|
|
"valid_targets_min": 4132
|
|
},
|
|
{
|
|
"epoch": 2.516439454691259,
|
|
"grad_norm": 0.4351595518082832,
|
|
"learning_rate": 3.235880087351164e-05,
|
|
"loss": 0.243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13066501915454865,
|
|
"step": 1570,
|
|
"valid_targets_mean": 4270.6,
|
|
"valid_targets_min": 3268
|
|
},
|
|
{
|
|
"epoch": 2.524458700882117,
|
|
"grad_norm": 0.4286661133039368,
|
|
"learning_rate": 3.2295868503110184e-05,
|
|
"loss": 0.2539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12741625308990479,
|
|
"step": 1575,
|
|
"valid_targets_mean": 4369.5,
|
|
"valid_targets_min": 3139
|
|
},
|
|
{
|
|
"epoch": 2.5324779470729752,
|
|
"grad_norm": 0.45011062369631516,
|
|
"learning_rate": 3.22327398001627e-05,
|
|
"loss": 0.2398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1314491182565689,
|
|
"step": 1580,
|
|
"valid_targets_mean": 4382.0,
|
|
"valid_targets_min": 3499
|
|
},
|
|
{
|
|
"epoch": 2.540497193263833,
|
|
"grad_norm": 0.4435149751339369,
|
|
"learning_rate": 3.216941577266783e-05,
|
|
"loss": 0.244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11645232886075974,
|
|
"step": 1585,
|
|
"valid_targets_mean": 4541.5,
|
|
"valid_targets_min": 3710
|
|
},
|
|
{
|
|
"epoch": 2.5485164394546915,
|
|
"grad_norm": 0.4530921852122173,
|
|
"learning_rate": 3.210589743174308e-05,
|
|
"loss": 0.2411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12014880776405334,
|
|
"step": 1590,
|
|
"valid_targets_mean": 4330.8,
|
|
"valid_targets_min": 3681
|
|
},
|
|
{
|
|
"epoch": 2.5565356856455494,
|
|
"grad_norm": 0.4405746397399662,
|
|
"learning_rate": 3.204218579160857e-05,
|
|
"loss": 0.25,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13733181357383728,
|
|
"step": 1595,
|
|
"valid_targets_mean": 4807.2,
|
|
"valid_targets_min": 3763
|
|
},
|
|
{
|
|
"epoch": 2.5645549318364074,
|
|
"grad_norm": 0.42660504055750914,
|
|
"learning_rate": 3.197828186957094e-05,
|
|
"loss": 0.238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12578924000263214,
|
|
"step": 1600,
|
|
"valid_targets_mean": 4983.4,
|
|
"valid_targets_min": 4584
|
|
},
|
|
{
|
|
"epoch": 2.5725741780272653,
|
|
"grad_norm": 0.4659620919194967,
|
|
"learning_rate": 3.191418668600705e-05,
|
|
"loss": 0.253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1428525149822235,
|
|
"step": 1605,
|
|
"valid_targets_mean": 4698.2,
|
|
"valid_targets_min": 4020
|
|
},
|
|
{
|
|
"epoch": 2.5805934242181237,
|
|
"grad_norm": 0.4427170510178025,
|
|
"learning_rate": 3.184990126434771e-05,
|
|
"loss": 0.2482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12472105026245117,
|
|
"step": 1610,
|
|
"valid_targets_mean": 4773.5,
|
|
"valid_targets_min": 3889
|
|
},
|
|
{
|
|
"epoch": 2.5886126704089816,
|
|
"grad_norm": 0.428580022188613,
|
|
"learning_rate": 3.178542663106131e-05,
|
|
"loss": 0.248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11747145652770996,
|
|
"step": 1615,
|
|
"valid_targets_mean": 4407.4,
|
|
"valid_targets_min": 4002
|
|
},
|
|
{
|
|
"epoch": 2.5966319165998395,
|
|
"grad_norm": 0.45351599671688686,
|
|
"learning_rate": 3.172076381563748e-05,
|
|
"loss": 0.2457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12936542928218842,
|
|
"step": 1620,
|
|
"valid_targets_mean": 4604.1,
|
|
"valid_targets_min": 4067
|
|
},
|
|
{
|
|
"epoch": 2.604651162790698,
|
|
"grad_norm": 0.4637904039689647,
|
|
"learning_rate": 3.165591385057058e-05,
|
|
"loss": 0.2457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10163810849189758,
|
|
"step": 1625,
|
|
"valid_targets_mean": 4257.8,
|
|
"valid_targets_min": 3682
|
|
},
|
|
{
|
|
"epoch": 2.612670408981556,
|
|
"grad_norm": 0.436360123168036,
|
|
"learning_rate": 3.1590877771343316e-05,
|
|
"loss": 0.2381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11006264388561249,
|
|
"step": 1630,
|
|
"valid_targets_mean": 4165.9,
|
|
"valid_targets_min": 3447
|
|
},
|
|
{
|
|
"epoch": 2.6206896551724137,
|
|
"grad_norm": 0.4400798906383125,
|
|
"learning_rate": 3.152565661641008e-05,
|
|
"loss": 0.2479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11051885783672333,
|
|
"step": 1635,
|
|
"valid_targets_mean": 4658.2,
|
|
"valid_targets_min": 3589
|
|
},
|
|
{
|
|
"epoch": 2.6287089013632716,
|
|
"grad_norm": 0.44302610444568225,
|
|
"learning_rate": 3.1460251427180474e-05,
|
|
"loss": 0.2578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15973548591136932,
|
|
"step": 1640,
|
|
"valid_targets_mean": 4994.2,
|
|
"valid_targets_min": 4457
|
|
},
|
|
{
|
|
"epoch": 2.63672814755413,
|
|
"grad_norm": 0.4539054428932729,
|
|
"learning_rate": 3.139466324800263e-05,
|
|
"loss": 0.2494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11821211874485016,
|
|
"step": 1645,
|
|
"valid_targets_mean": 4568.2,
|
|
"valid_targets_min": 3758
|
|
},
|
|
{
|
|
"epoch": 2.644747393744988,
|
|
"grad_norm": 0.4356354851347946,
|
|
"learning_rate": 3.132889312614655e-05,
|
|
"loss": 0.2574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11955422163009644,
|
|
"step": 1650,
|
|
"valid_targets_mean": 4401.2,
|
|
"valid_targets_min": 3514
|
|
},
|
|
{
|
|
"epoch": 2.6527666399358463,
|
|
"grad_norm": 0.41661471575061415,
|
|
"learning_rate": 3.126294211178737e-05,
|
|
"loss": 0.2488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11831101030111313,
|
|
"step": 1655,
|
|
"valid_targets_mean": 4669.0,
|
|
"valid_targets_min": 3898
|
|
},
|
|
{
|
|
"epoch": 2.660785886126704,
|
|
"grad_norm": 0.40951320870922364,
|
|
"learning_rate": 3.1196811257988634e-05,
|
|
"loss": 0.2462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10993427038192749,
|
|
"step": 1660,
|
|
"valid_targets_mean": 4228.0,
|
|
"valid_targets_min": 3445
|
|
},
|
|
{
|
|
"epoch": 2.668805132317562,
|
|
"grad_norm": 0.44441556271738364,
|
|
"learning_rate": 3.1130501620685394e-05,
|
|
"loss": 0.2632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12350396066904068,
|
|
"step": 1665,
|
|
"valid_targets_mean": 4436.6,
|
|
"valid_targets_min": 3603
|
|
},
|
|
{
|
|
"epoch": 2.67682437850842,
|
|
"grad_norm": 0.42684692932950835,
|
|
"learning_rate": 3.106401425866745e-05,
|
|
"loss": 0.2463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12227390706539154,
|
|
"step": 1670,
|
|
"valid_targets_mean": 4605.8,
|
|
"valid_targets_min": 3908
|
|
},
|
|
{
|
|
"epoch": 2.6848436246992784,
|
|
"grad_norm": 0.4357311452131072,
|
|
"learning_rate": 3.099735023356236e-05,
|
|
"loss": 0.2486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1202009841799736,
|
|
"step": 1675,
|
|
"valid_targets_mean": 4648.4,
|
|
"valid_targets_min": 4101
|
|
},
|
|
{
|
|
"epoch": 2.6928628708901363,
|
|
"grad_norm": 0.42931121067121164,
|
|
"learning_rate": 3.0930510609818564e-05,
|
|
"loss": 0.2447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12490322440862656,
|
|
"step": 1680,
|
|
"valid_targets_mean": 4490.0,
|
|
"valid_targets_min": 3440
|
|
},
|
|
{
|
|
"epoch": 2.7008821170809942,
|
|
"grad_norm": 0.4428447773981804,
|
|
"learning_rate": 3.086349645468831e-05,
|
|
"loss": 0.2366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11199919134378433,
|
|
"step": 1685,
|
|
"valid_targets_mean": 4689.8,
|
|
"valid_targets_min": 3810
|
|
},
|
|
{
|
|
"epoch": 2.7089013632718526,
|
|
"grad_norm": 0.44082126987766646,
|
|
"learning_rate": 3.079630883821067e-05,
|
|
"loss": 0.2515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12008436024188995,
|
|
"step": 1690,
|
|
"valid_targets_mean": 4202.6,
|
|
"valid_targets_min": 3877
|
|
},
|
|
{
|
|
"epoch": 2.7169206094627105,
|
|
"grad_norm": 0.4133214260974467,
|
|
"learning_rate": 3.0728948833194436e-05,
|
|
"loss": 0.2467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10182264447212219,
|
|
"step": 1695,
|
|
"valid_targets_mean": 4144.8,
|
|
"valid_targets_min": 3022
|
|
},
|
|
{
|
|
"epoch": 2.7249398556535684,
|
|
"grad_norm": 0.42564401270817726,
|
|
"learning_rate": 3.066141751520099e-05,
|
|
"loss": 0.2424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11474806070327759,
|
|
"step": 1700,
|
|
"valid_targets_mean": 4633.8,
|
|
"valid_targets_min": 4244
|
|
},
|
|
{
|
|
"epoch": 2.7329591018444264,
|
|
"grad_norm": 0.4490981837047607,
|
|
"learning_rate": 3.059371596252712e-05,
|
|
"loss": 0.2496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1435544192790985,
|
|
"step": 1705,
|
|
"valid_targets_mean": 4999.0,
|
|
"valid_targets_min": 3606
|
|
},
|
|
{
|
|
"epoch": 2.7409783480352847,
|
|
"grad_norm": 0.4223979898270412,
|
|
"learning_rate": 3.0525845256187834e-05,
|
|
"loss": 0.2568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11942510306835175,
|
|
"step": 1710,
|
|
"valid_targets_mean": 4630.9,
|
|
"valid_targets_min": 3674
|
|
},
|
|
{
|
|
"epoch": 2.7489975942261426,
|
|
"grad_norm": 0.4349747026305887,
|
|
"learning_rate": 3.0457806479899044e-05,
|
|
"loss": 0.2443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13514211773872375,
|
|
"step": 1715,
|
|
"valid_targets_mean": 4550.1,
|
|
"valid_targets_min": 3558
|
|
},
|
|
{
|
|
"epoch": 2.757016840417001,
|
|
"grad_norm": 0.455767171600073,
|
|
"learning_rate": 3.0389600720060318e-05,
|
|
"loss": 0.2484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11199100315570831,
|
|
"step": 1720,
|
|
"valid_targets_mean": 4168.4,
|
|
"valid_targets_min": 3274
|
|
},
|
|
{
|
|
"epoch": 2.765036086607859,
|
|
"grad_norm": 0.44392803200208547,
|
|
"learning_rate": 3.0321229065737522e-05,
|
|
"loss": 0.2575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14299574494361877,
|
|
"step": 1725,
|
|
"valid_targets_mean": 4599.9,
|
|
"valid_targets_min": 3831
|
|
},
|
|
{
|
|
"epoch": 2.773055332798717,
|
|
"grad_norm": 0.42939556610607105,
|
|
"learning_rate": 3.0252692608645384e-05,
|
|
"loss": 0.2436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12667006254196167,
|
|
"step": 1730,
|
|
"valid_targets_mean": 4458.5,
|
|
"valid_targets_min": 3059
|
|
},
|
|
{
|
|
"epoch": 2.7810745789895748,
|
|
"grad_norm": 0.4618315157411454,
|
|
"learning_rate": 3.0183992443130127e-05,
|
|
"loss": 0.2381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12378603965044022,
|
|
"step": 1735,
|
|
"valid_targets_mean": 4398.9,
|
|
"valid_targets_min": 3342
|
|
},
|
|
{
|
|
"epoch": 2.789093825180433,
|
|
"grad_norm": 0.45417676618537334,
|
|
"learning_rate": 3.011512966615195e-05,
|
|
"loss": 0.2546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12788331508636475,
|
|
"step": 1740,
|
|
"valid_targets_mean": 4519.4,
|
|
"valid_targets_min": 3964
|
|
},
|
|
{
|
|
"epoch": 2.797113071371291,
|
|
"grad_norm": 0.44820814128990916,
|
|
"learning_rate": 3.0046105377267523e-05,
|
|
"loss": 0.2507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15033498406410217,
|
|
"step": 1745,
|
|
"valid_targets_mean": 4721.8,
|
|
"valid_targets_min": 4113
|
|
},
|
|
{
|
|
"epoch": 2.8051323175621494,
|
|
"grad_norm": 0.4264026098065216,
|
|
"learning_rate": 2.9976920678612456e-05,
|
|
"loss": 0.2472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12685447931289673,
|
|
"step": 1750,
|
|
"valid_targets_mean": 4370.2,
|
|
"valid_targets_min": 3815
|
|
},
|
|
{
|
|
"epoch": 2.8131515637530073,
|
|
"grad_norm": 0.4435041095781467,
|
|
"learning_rate": 2.9907576674883664e-05,
|
|
"loss": 0.2523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.135772243142128,
|
|
"step": 1755,
|
|
"valid_targets_mean": 4689.5,
|
|
"valid_targets_min": 3605
|
|
},
|
|
{
|
|
"epoch": 2.8211708099438653,
|
|
"grad_norm": 0.44427628124598306,
|
|
"learning_rate": 2.983807447332174e-05,
|
|
"loss": 0.254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13170257210731506,
|
|
"step": 1760,
|
|
"valid_targets_mean": 4629.0,
|
|
"valid_targets_min": 4143
|
|
},
|
|
{
|
|
"epoch": 2.829190056134723,
|
|
"grad_norm": 0.4476931253205714,
|
|
"learning_rate": 2.9768415183693293e-05,
|
|
"loss": 0.2415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1310819685459137,
|
|
"step": 1765,
|
|
"valid_targets_mean": 4634.6,
|
|
"valid_targets_min": 3610
|
|
},
|
|
{
|
|
"epoch": 2.8372093023255816,
|
|
"grad_norm": 0.4733962469903618,
|
|
"learning_rate": 2.9698599918273197e-05,
|
|
"loss": 0.2416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10514672845602036,
|
|
"step": 1770,
|
|
"valid_targets_mean": 4152.6,
|
|
"valid_targets_min": 3263
|
|
},
|
|
{
|
|
"epoch": 2.8452285485164395,
|
|
"grad_norm": 0.4414578689621758,
|
|
"learning_rate": 2.962862979182686e-05,
|
|
"loss": 0.2459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11392098665237427,
|
|
"step": 1775,
|
|
"valid_targets_mean": 4216.4,
|
|
"valid_targets_min": 2993
|
|
},
|
|
{
|
|
"epoch": 2.8532477947072974,
|
|
"grad_norm": 0.44491762219350983,
|
|
"learning_rate": 2.95585059215924e-05,
|
|
"loss": 0.2507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12735602259635925,
|
|
"step": 1780,
|
|
"valid_targets_mean": 4546.1,
|
|
"valid_targets_min": 4061
|
|
},
|
|
{
|
|
"epoch": 2.8612670408981558,
|
|
"grad_norm": 0.43169479473876265,
|
|
"learning_rate": 2.948822942726284e-05,
|
|
"loss": 0.2548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13146014511585236,
|
|
"step": 1785,
|
|
"valid_targets_mean": 4735.9,
|
|
"valid_targets_min": 3684
|
|
},
|
|
{
|
|
"epoch": 2.8692862870890137,
|
|
"grad_norm": 0.42228774903919836,
|
|
"learning_rate": 2.941780143096817e-05,
|
|
"loss": 0.2411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11027516424655914,
|
|
"step": 1790,
|
|
"valid_targets_mean": 4564.6,
|
|
"valid_targets_min": 3814
|
|
},
|
|
{
|
|
"epoch": 2.8773055332798716,
|
|
"grad_norm": 0.43897162843774007,
|
|
"learning_rate": 2.9347223057257505e-05,
|
|
"loss": 0.2545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12551634013652802,
|
|
"step": 1795,
|
|
"valid_targets_mean": 4758.4,
|
|
"valid_targets_min": 3833
|
|
},
|
|
{
|
|
"epoch": 2.8853247794707295,
|
|
"grad_norm": 1.0177910700585242,
|
|
"learning_rate": 2.927649543308106e-05,
|
|
"loss": 0.2471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13276471197605133,
|
|
"step": 1800,
|
|
"valid_targets_mean": 4575.6,
|
|
"valid_targets_min": 3715
|
|
},
|
|
{
|
|
"epoch": 2.893344025661588,
|
|
"grad_norm": 0.44796859440632525,
|
|
"learning_rate": 2.9205619687772212e-05,
|
|
"loss": 0.2463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13284848630428314,
|
|
"step": 1805,
|
|
"valid_targets_mean": 4802.2,
|
|
"valid_targets_min": 2992
|
|
},
|
|
{
|
|
"epoch": 2.901363271852446,
|
|
"grad_norm": 0.4551547142908476,
|
|
"learning_rate": 2.9134596953029413e-05,
|
|
"loss": 0.2464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12998361885547638,
|
|
"step": 1810,
|
|
"valid_targets_mean": 4257.1,
|
|
"valid_targets_min": 3512
|
|
},
|
|
{
|
|
"epoch": 2.909382518043304,
|
|
"grad_norm": 0.44136789588445813,
|
|
"learning_rate": 2.9063428362898168e-05,
|
|
"loss": 0.2383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12531471252441406,
|
|
"step": 1815,
|
|
"valid_targets_mean": 4532.4,
|
|
"valid_targets_min": 3490
|
|
},
|
|
{
|
|
"epoch": 2.917401764234162,
|
|
"grad_norm": 0.4333217486247768,
|
|
"learning_rate": 2.8992115053752905e-05,
|
|
"loss": 0.2444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12176503986120224,
|
|
"step": 1820,
|
|
"valid_targets_mean": 4782.4,
|
|
"valid_targets_min": 3847
|
|
},
|
|
{
|
|
"epoch": 2.92542101042502,
|
|
"grad_norm": 0.432127225531553,
|
|
"learning_rate": 2.8920658164278816e-05,
|
|
"loss": 0.2452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11124090850353241,
|
|
"step": 1825,
|
|
"valid_targets_mean": 4295.5,
|
|
"valid_targets_min": 3536
|
|
},
|
|
{
|
|
"epoch": 2.933440256615878,
|
|
"grad_norm": 0.4480841083738825,
|
|
"learning_rate": 2.884905883545373e-05,
|
|
"loss": 0.2484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12632077932357788,
|
|
"step": 1830,
|
|
"valid_targets_mean": 4484.6,
|
|
"valid_targets_min": 3664
|
|
},
|
|
{
|
|
"epoch": 2.9414595028067363,
|
|
"grad_norm": 0.44823683058494945,
|
|
"learning_rate": 2.877731821052981e-05,
|
|
"loss": 0.252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12645769119262695,
|
|
"step": 1835,
|
|
"valid_targets_mean": 4093.8,
|
|
"valid_targets_min": 2829
|
|
},
|
|
{
|
|
"epoch": 2.949478748997594,
|
|
"grad_norm": 0.44064841396257604,
|
|
"learning_rate": 2.8705437435015375e-05,
|
|
"loss": 0.2456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1382758915424347,
|
|
"step": 1840,
|
|
"valid_targets_mean": 4758.5,
|
|
"valid_targets_min": 4244
|
|
},
|
|
{
|
|
"epoch": 2.957497995188452,
|
|
"grad_norm": 0.44563431850609575,
|
|
"learning_rate": 2.8633417656656566e-05,
|
|
"loss": 0.2524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13682959973812103,
|
|
"step": 1845,
|
|
"valid_targets_mean": 4884.0,
|
|
"valid_targets_min": 3723
|
|
},
|
|
{
|
|
"epoch": 2.9655172413793105,
|
|
"grad_norm": 0.40080491159353965,
|
|
"learning_rate": 2.8561260025419036e-05,
|
|
"loss": 0.2548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11973556876182556,
|
|
"step": 1850,
|
|
"valid_targets_mean": 4446.6,
|
|
"valid_targets_min": 3850
|
|
},
|
|
{
|
|
"epoch": 2.9735364875701684,
|
|
"grad_norm": 0.42221628014975865,
|
|
"learning_rate": 2.8488965693469583e-05,
|
|
"loss": 0.2546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1225736141204834,
|
|
"step": 1855,
|
|
"valid_targets_mean": 4411.5,
|
|
"valid_targets_min": 3778
|
|
},
|
|
{
|
|
"epoch": 2.9815557337610263,
|
|
"grad_norm": 0.43107225860058773,
|
|
"learning_rate": 2.8416535815157763e-05,
|
|
"loss": 0.2534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13042236864566803,
|
|
"step": 1860,
|
|
"valid_targets_mean": 4657.2,
|
|
"valid_targets_min": 3185
|
|
},
|
|
{
|
|
"epoch": 2.9895749799518843,
|
|
"grad_norm": 0.4166950716266511,
|
|
"learning_rate": 2.8343971546997434e-05,
|
|
"loss": 0.2416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11648573726415634,
|
|
"step": 1865,
|
|
"valid_targets_mean": 4351.0,
|
|
"valid_targets_min": 3617
|
|
},
|
|
{
|
|
"epoch": 2.9975942261427426,
|
|
"grad_norm": 0.42421759362966105,
|
|
"learning_rate": 2.827127404764831e-05,
|
|
"loss": 0.2497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15005409717559814,
|
|
"step": 1870,
|
|
"valid_targets_mean": 5140.0,
|
|
"valid_targets_min": 3935
|
|
},
|
|
{
|
|
"epoch": 3.0048115477145148,
|
|
"grad_norm": 0.4410948962533022,
|
|
"learning_rate": 2.8198444477897467e-05,
|
|
"loss": 0.2413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12251558154821396,
|
|
"step": 1875,
|
|
"valid_targets_mean": 4476.1,
|
|
"valid_targets_min": 3764
|
|
},
|
|
{
|
|
"epoch": 3.0128307939053727,
|
|
"grad_norm": 0.43706607666192504,
|
|
"learning_rate": 2.8125484000640787e-05,
|
|
"loss": 0.2401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12440548092126846,
|
|
"step": 1880,
|
|
"valid_targets_mean": 4962.0,
|
|
"valid_targets_min": 3786
|
|
},
|
|
{
|
|
"epoch": 3.020850040096231,
|
|
"grad_norm": 1.6470083769249302,
|
|
"learning_rate": 2.8052393780864394e-05,
|
|
"loss": 0.2379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10791447758674622,
|
|
"step": 1885,
|
|
"valid_targets_mean": 4414.5,
|
|
"valid_targets_min": 3857
|
|
},
|
|
{
|
|
"epoch": 3.028869286287089,
|
|
"grad_norm": 0.48923016774880257,
|
|
"learning_rate": 2.797917498562607e-05,
|
|
"loss": 0.2305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1145155131816864,
|
|
"step": 1890,
|
|
"valid_targets_mean": 4466.6,
|
|
"valid_targets_min": 3429
|
|
},
|
|
{
|
|
"epoch": 3.036888532477947,
|
|
"grad_norm": 0.5023491144532222,
|
|
"learning_rate": 2.7905828784036596e-05,
|
|
"loss": 0.2312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11858996748924255,
|
|
"step": 1895,
|
|
"valid_targets_mean": 4997.6,
|
|
"valid_targets_min": 4130
|
|
},
|
|
{
|
|
"epoch": 3.0449077786688052,
|
|
"grad_norm": 0.44468087311398863,
|
|
"learning_rate": 2.78323563472411e-05,
|
|
"loss": 0.2385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12344604730606079,
|
|
"step": 1900,
|
|
"valid_targets_mean": 4591.1,
|
|
"valid_targets_min": 3644
|
|
},
|
|
{
|
|
"epoch": 3.052927024859663,
|
|
"grad_norm": 0.45223509736189726,
|
|
"learning_rate": 2.7758758848400354e-05,
|
|
"loss": 0.2372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11537761241197586,
|
|
"step": 1905,
|
|
"valid_targets_mean": 4596.5,
|
|
"valid_targets_min": 2903
|
|
},
|
|
{
|
|
"epoch": 3.060946271050521,
|
|
"grad_norm": 0.43386665375556954,
|
|
"learning_rate": 2.7685037462672043e-05,
|
|
"loss": 0.2366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11979789286851883,
|
|
"step": 1910,
|
|
"valid_targets_mean": 4798.6,
|
|
"valid_targets_min": 4088
|
|
},
|
|
{
|
|
"epoch": 3.0689655172413794,
|
|
"grad_norm": 0.44194606951013193,
|
|
"learning_rate": 2.7611193367191993e-05,
|
|
"loss": 0.2329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11390023678541183,
|
|
"step": 1915,
|
|
"valid_targets_mean": 4591.8,
|
|
"valid_targets_min": 2891
|
|
},
|
|
{
|
|
"epoch": 3.0769847634322374,
|
|
"grad_norm": 0.4191432021254672,
|
|
"learning_rate": 2.7537227741055378e-05,
|
|
"loss": 0.2275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11557844281196594,
|
|
"step": 1920,
|
|
"valid_targets_mean": 4439.4,
|
|
"valid_targets_min": 3468
|
|
},
|
|
{
|
|
"epoch": 3.0850040096230953,
|
|
"grad_norm": 0.4382384720655598,
|
|
"learning_rate": 2.746314176529791e-05,
|
|
"loss": 0.232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09825605154037476,
|
|
"step": 1925,
|
|
"valid_targets_mean": 4285.4,
|
|
"valid_targets_min": 3626
|
|
},
|
|
{
|
|
"epoch": 3.0930232558139537,
|
|
"grad_norm": 0.4329416844339899,
|
|
"learning_rate": 2.7388936622876957e-05,
|
|
"loss": 0.2367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1162625253200531,
|
|
"step": 1930,
|
|
"valid_targets_mean": 4583.1,
|
|
"valid_targets_min": 3320
|
|
},
|
|
{
|
|
"epoch": 3.1010425020048116,
|
|
"grad_norm": 0.4548897686993759,
|
|
"learning_rate": 2.7314613498652663e-05,
|
|
"loss": 0.2392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12285508215427399,
|
|
"step": 1935,
|
|
"valid_targets_mean": 4037.8,
|
|
"valid_targets_min": 2671
|
|
},
|
|
{
|
|
"epoch": 3.1090617481956695,
|
|
"grad_norm": 0.44123491027667555,
|
|
"learning_rate": 2.7240173579369025e-05,
|
|
"loss": 0.2334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11508432775735855,
|
|
"step": 1940,
|
|
"valid_targets_mean": 4888.8,
|
|
"valid_targets_min": 3141
|
|
},
|
|
{
|
|
"epoch": 3.117080994386528,
|
|
"grad_norm": 0.4383024404067555,
|
|
"learning_rate": 2.7165618053634962e-05,
|
|
"loss": 0.2286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12815740704536438,
|
|
"step": 1945,
|
|
"valid_targets_mean": 4867.0,
|
|
"valid_targets_min": 3899
|
|
},
|
|
{
|
|
"epoch": 3.125100240577386,
|
|
"grad_norm": 0.45026303790543026,
|
|
"learning_rate": 2.7090948111905304e-05,
|
|
"loss": 0.2286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1080777645111084,
|
|
"step": 1950,
|
|
"valid_targets_mean": 4384.9,
|
|
"valid_targets_min": 3639
|
|
},
|
|
{
|
|
"epoch": 3.1331194867682437,
|
|
"grad_norm": 0.46732450770833317,
|
|
"learning_rate": 2.701616494646183e-05,
|
|
"loss": 0.2346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10922130942344666,
|
|
"step": 1955,
|
|
"valid_targets_mean": 4354.6,
|
|
"valid_targets_min": 3288
|
|
},
|
|
{
|
|
"epoch": 3.141138732959102,
|
|
"grad_norm": 0.45094067426515444,
|
|
"learning_rate": 2.6941269751394174e-05,
|
|
"loss": 0.2281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12189055234193802,
|
|
"step": 1960,
|
|
"valid_targets_mean": 4669.5,
|
|
"valid_targets_min": 2994
|
|
},
|
|
{
|
|
"epoch": 3.14915797914996,
|
|
"grad_norm": 0.4406915625641638,
|
|
"learning_rate": 2.686626372258081e-05,
|
|
"loss": 0.2334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11827784776687622,
|
|
"step": 1965,
|
|
"valid_targets_mean": 4825.1,
|
|
"valid_targets_min": 3639
|
|
},
|
|
{
|
|
"epoch": 3.157177225340818,
|
|
"grad_norm": 0.4712206381859978,
|
|
"learning_rate": 2.6791148057669913e-05,
|
|
"loss": 0.2333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10916715860366821,
|
|
"step": 1970,
|
|
"valid_targets_mean": 4329.6,
|
|
"valid_targets_min": 3211
|
|
},
|
|
{
|
|
"epoch": 3.165196471531676,
|
|
"grad_norm": 0.4336810029868549,
|
|
"learning_rate": 2.671592395606027e-05,
|
|
"loss": 0.2308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11787257343530655,
|
|
"step": 1975,
|
|
"valid_targets_mean": 4481.2,
|
|
"valid_targets_min": 3845
|
|
},
|
|
{
|
|
"epoch": 3.173215717722534,
|
|
"grad_norm": 0.4652715433415896,
|
|
"learning_rate": 2.6640592618882114e-05,
|
|
"loss": 0.2356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12377145886421204,
|
|
"step": 1980,
|
|
"valid_targets_mean": 4669.9,
|
|
"valid_targets_min": 3435
|
|
},
|
|
{
|
|
"epoch": 3.181234963913392,
|
|
"grad_norm": 0.4320312790248458,
|
|
"learning_rate": 2.656515524897795e-05,
|
|
"loss": 0.234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12487772107124329,
|
|
"step": 1985,
|
|
"valid_targets_mean": 4774.6,
|
|
"valid_targets_min": 4175
|
|
},
|
|
{
|
|
"epoch": 3.18925421010425,
|
|
"grad_norm": 0.43376518253723034,
|
|
"learning_rate": 2.6489613050883343e-05,
|
|
"loss": 0.2309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11846788227558136,
|
|
"step": 1990,
|
|
"valid_targets_mean": 4733.5,
|
|
"valid_targets_min": 3913
|
|
},
|
|
{
|
|
"epoch": 3.1972734562951084,
|
|
"grad_norm": 0.43244533109994143,
|
|
"learning_rate": 2.6413967230807677e-05,
|
|
"loss": 0.2349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13316035270690918,
|
|
"step": 1995,
|
|
"valid_targets_mean": 4666.8,
|
|
"valid_targets_min": 3273
|
|
},
|
|
{
|
|
"epoch": 3.2052927024859663,
|
|
"grad_norm": 0.4516036007416719,
|
|
"learning_rate": 2.6338218996614924e-05,
|
|
"loss": 0.2391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12227605283260345,
|
|
"step": 2000,
|
|
"valid_targets_mean": 4380.1,
|
|
"valid_targets_min": 3412
|
|
},
|
|
{
|
|
"epoch": 3.2133119486768242,
|
|
"grad_norm": 0.453849489890549,
|
|
"learning_rate": 2.6262369557804325e-05,
|
|
"loss": 0.2448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12108680605888367,
|
|
"step": 2005,
|
|
"valid_targets_mean": 4641.8,
|
|
"valid_targets_min": 3634
|
|
},
|
|
{
|
|
"epoch": 3.2213311948676826,
|
|
"grad_norm": 0.4440194949144129,
|
|
"learning_rate": 2.6186420125491094e-05,
|
|
"loss": 0.2332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12102767080068588,
|
|
"step": 2010,
|
|
"valid_targets_mean": 4558.9,
|
|
"valid_targets_min": 4029
|
|
},
|
|
{
|
|
"epoch": 3.2293504410585405,
|
|
"grad_norm": 0.43143349652788593,
|
|
"learning_rate": 2.6110371912387083e-05,
|
|
"loss": 0.2451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1204310953617096,
|
|
"step": 2015,
|
|
"valid_targets_mean": 4528.6,
|
|
"valid_targets_min": 3241
|
|
},
|
|
{
|
|
"epoch": 3.2373696872493984,
|
|
"grad_norm": 0.4283760206897851,
|
|
"learning_rate": 2.6034226132781407e-05,
|
|
"loss": 0.2395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12349432706832886,
|
|
"step": 2020,
|
|
"valid_targets_mean": 4735.6,
|
|
"valid_targets_min": 3374
|
|
},
|
|
{
|
|
"epoch": 3.245388933440257,
|
|
"grad_norm": 0.43461514224234993,
|
|
"learning_rate": 2.5957984002521066e-05,
|
|
"loss": 0.2387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11712443828582764,
|
|
"step": 2025,
|
|
"valid_targets_mean": 4527.0,
|
|
"valid_targets_min": 3443
|
|
},
|
|
{
|
|
"epoch": 3.2534081796311147,
|
|
"grad_norm": 0.41642226946302063,
|
|
"learning_rate": 2.588164673899151e-05,
|
|
"loss": 0.2351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11360670626163483,
|
|
"step": 2030,
|
|
"valid_targets_mean": 4569.0,
|
|
"valid_targets_min": 3617
|
|
},
|
|
{
|
|
"epoch": 3.2614274258219726,
|
|
"grad_norm": 0.6987030365557052,
|
|
"learning_rate": 2.580521556109724e-05,
|
|
"loss": 0.2289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11612153053283691,
|
|
"step": 2035,
|
|
"valid_targets_mean": 4461.2,
|
|
"valid_targets_min": 3860
|
|
},
|
|
{
|
|
"epoch": 3.2694466720128306,
|
|
"grad_norm": 0.4750827765231753,
|
|
"learning_rate": 2.57286916892423e-05,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11053307354450226,
|
|
"step": 2040,
|
|
"valid_targets_mean": 4246.4,
|
|
"valid_targets_min": 3301
|
|
},
|
|
{
|
|
"epoch": 3.277465918203689,
|
|
"grad_norm": 0.49250449847547284,
|
|
"learning_rate": 2.5652076345310822e-05,
|
|
"loss": 0.233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12812067568302155,
|
|
"step": 2045,
|
|
"valid_targets_mean": 4884.5,
|
|
"valid_targets_min": 4199
|
|
},
|
|
{
|
|
"epoch": 3.285485164394547,
|
|
"grad_norm": 0.4637508888854383,
|
|
"learning_rate": 2.5575370752647507e-05,
|
|
"loss": 0.2328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12095724046230316,
|
|
"step": 2050,
|
|
"valid_targets_mean": 4718.1,
|
|
"valid_targets_min": 3919
|
|
},
|
|
{
|
|
"epoch": 3.293504410585405,
|
|
"grad_norm": 0.44873448443828834,
|
|
"learning_rate": 2.5498576136038077e-05,
|
|
"loss": 0.2354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1326775997877121,
|
|
"step": 2055,
|
|
"valid_targets_mean": 4480.5,
|
|
"valid_targets_min": 2962
|
|
},
|
|
{
|
|
"epoch": 3.301523656776263,
|
|
"grad_norm": 0.5008261689936729,
|
|
"learning_rate": 2.542169372168976e-05,
|
|
"loss": 0.234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12333418428897858,
|
|
"step": 2060,
|
|
"valid_targets_mean": 4211.9,
|
|
"valid_targets_min": 3267
|
|
},
|
|
{
|
|
"epoch": 3.309542902967121,
|
|
"grad_norm": 0.42176163124432314,
|
|
"learning_rate": 2.5344724737211646e-05,
|
|
"loss": 0.2315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10478554666042328,
|
|
"step": 2065,
|
|
"valid_targets_mean": 4238.4,
|
|
"valid_targets_min": 3221
|
|
},
|
|
{
|
|
"epoch": 3.317562149157979,
|
|
"grad_norm": 0.4324748782929499,
|
|
"learning_rate": 2.5267670411595152e-05,
|
|
"loss": 0.2437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10704857856035233,
|
|
"step": 2070,
|
|
"valid_targets_mean": 4335.5,
|
|
"valid_targets_min": 3624
|
|
},
|
|
{
|
|
"epoch": 3.3255813953488373,
|
|
"grad_norm": 0.4490067616891909,
|
|
"learning_rate": 2.5190531975194345e-05,
|
|
"loss": 0.235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11168612539768219,
|
|
"step": 2075,
|
|
"valid_targets_mean": 4265.0,
|
|
"valid_targets_min": 3520
|
|
},
|
|
{
|
|
"epoch": 3.3336006415396953,
|
|
"grad_norm": 0.46203423150169737,
|
|
"learning_rate": 2.5113310659706322e-05,
|
|
"loss": 0.2431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12241938710212708,
|
|
"step": 2080,
|
|
"valid_targets_mean": 4321.1,
|
|
"valid_targets_min": 3412
|
|
},
|
|
{
|
|
"epoch": 3.341619887730553,
|
|
"grad_norm": 0.6747548595763763,
|
|
"learning_rate": 2.5036007698151553e-05,
|
|
"loss": 0.2301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13035881519317627,
|
|
"step": 2085,
|
|
"valid_targets_mean": 4733.8,
|
|
"valid_targets_min": 4049
|
|
},
|
|
{
|
|
"epoch": 3.3496391339214115,
|
|
"grad_norm": 0.4061004765835621,
|
|
"learning_rate": 2.4958624324854185e-05,
|
|
"loss": 0.2428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12872913479804993,
|
|
"step": 2090,
|
|
"valid_targets_mean": 4852.8,
|
|
"valid_targets_min": 3802
|
|
},
|
|
{
|
|
"epoch": 3.3576583801122695,
|
|
"grad_norm": 0.4399943534766506,
|
|
"learning_rate": 2.4881161775422303e-05,
|
|
"loss": 0.2329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10161960124969482,
|
|
"step": 2095,
|
|
"valid_targets_mean": 4181.4,
|
|
"valid_targets_min": 3411
|
|
},
|
|
{
|
|
"epoch": 3.3656776263031274,
|
|
"grad_norm": 0.4414262232936544,
|
|
"learning_rate": 2.480362128672824e-05,
|
|
"loss": 0.24,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12310343980789185,
|
|
"step": 2100,
|
|
"valid_targets_mean": 4810.6,
|
|
"valid_targets_min": 4197
|
|
},
|
|
{
|
|
"epoch": 3.3736968724939858,
|
|
"grad_norm": 0.4442529338684338,
|
|
"learning_rate": 2.4726004096888817e-05,
|
|
"loss": 0.2342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1314464807510376,
|
|
"step": 2105,
|
|
"valid_targets_mean": 4864.8,
|
|
"valid_targets_min": 4039
|
|
},
|
|
{
|
|
"epoch": 3.3817161186848437,
|
|
"grad_norm": 0.42636484061721686,
|
|
"learning_rate": 2.4648311445245558e-05,
|
|
"loss": 0.2285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09112414717674255,
|
|
"step": 2110,
|
|
"valid_targets_mean": 3885.0,
|
|
"valid_targets_min": 2919
|
|
},
|
|
{
|
|
"epoch": 3.3897353648757016,
|
|
"grad_norm": 0.42060462222824213,
|
|
"learning_rate": 2.457054457234493e-05,
|
|
"loss": 0.2288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10578779876232147,
|
|
"step": 2115,
|
|
"valid_targets_mean": 4138.1,
|
|
"valid_targets_min": 3417
|
|
},
|
|
{
|
|
"epoch": 3.39775461106656,
|
|
"grad_norm": 0.4486738879188632,
|
|
"learning_rate": 2.4492704719918497e-05,
|
|
"loss": 0.2338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11530248820781708,
|
|
"step": 2120,
|
|
"valid_targets_mean": 4608.2,
|
|
"valid_targets_min": 3801
|
|
},
|
|
{
|
|
"epoch": 3.405773857257418,
|
|
"grad_norm": 0.43091384611780603,
|
|
"learning_rate": 2.4414793130863134e-05,
|
|
"loss": 0.2332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10951997339725494,
|
|
"step": 2125,
|
|
"valid_targets_mean": 4361.4,
|
|
"valid_targets_min": 3824
|
|
},
|
|
{
|
|
"epoch": 3.413793103448276,
|
|
"grad_norm": 0.44180746325914816,
|
|
"learning_rate": 2.433681104922114e-05,
|
|
"loss": 0.2345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10009285807609558,
|
|
"step": 2130,
|
|
"valid_targets_mean": 3985.0,
|
|
"valid_targets_min": 3206
|
|
},
|
|
{
|
|
"epoch": 3.4218123496391337,
|
|
"grad_norm": 0.42535031049419314,
|
|
"learning_rate": 2.4258759720160412e-05,
|
|
"loss": 0.2296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11079800128936768,
|
|
"step": 2135,
|
|
"valid_targets_mean": 4258.6,
|
|
"valid_targets_min": 3622
|
|
},
|
|
{
|
|
"epoch": 3.429831595829992,
|
|
"grad_norm": 0.42775167438037254,
|
|
"learning_rate": 2.4180640389954534e-05,
|
|
"loss": 0.2337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.127212256193161,
|
|
"step": 2140,
|
|
"valid_targets_mean": 4885.2,
|
|
"valid_targets_min": 4106
|
|
},
|
|
{
|
|
"epoch": 3.43785084202085,
|
|
"grad_norm": 0.41388320883524143,
|
|
"learning_rate": 2.4102454305962892e-05,
|
|
"loss": 0.2293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1042267382144928,
|
|
"step": 2145,
|
|
"valid_targets_mean": 4282.9,
|
|
"valid_targets_min": 3269
|
|
},
|
|
{
|
|
"epoch": 3.445870088211708,
|
|
"grad_norm": 0.44760640355459735,
|
|
"learning_rate": 2.402420271661076e-05,
|
|
"loss": 0.2271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13042280077934265,
|
|
"step": 2150,
|
|
"valid_targets_mean": 4738.1,
|
|
"valid_targets_min": 3935
|
|
},
|
|
{
|
|
"epoch": 3.4538893344025663,
|
|
"grad_norm": 0.4318958481941097,
|
|
"learning_rate": 2.3945886871369338e-05,
|
|
"loss": 0.2349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13023288547992706,
|
|
"step": 2155,
|
|
"valid_targets_mean": 4529.9,
|
|
"valid_targets_min": 3003
|
|
},
|
|
{
|
|
"epoch": 3.461908580593424,
|
|
"grad_norm": 0.4483406912137362,
|
|
"learning_rate": 2.3867508020735865e-05,
|
|
"loss": 0.2358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11931728571653366,
|
|
"step": 2160,
|
|
"valid_targets_mean": 4711.2,
|
|
"valid_targets_min": 3892
|
|
},
|
|
{
|
|
"epoch": 3.469927826784282,
|
|
"grad_norm": 0.4490096108475199,
|
|
"learning_rate": 2.3789067416213568e-05,
|
|
"loss": 0.2298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12042243033647537,
|
|
"step": 2165,
|
|
"valid_targets_mean": 4502.8,
|
|
"valid_targets_min": 3919
|
|
},
|
|
{
|
|
"epoch": 3.4779470729751405,
|
|
"grad_norm": 0.44538988938517954,
|
|
"learning_rate": 2.3710566310291733e-05,
|
|
"loss": 0.2341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11183471977710724,
|
|
"step": 2170,
|
|
"valid_targets_mean": 4221.8,
|
|
"valid_targets_min": 3132
|
|
},
|
|
{
|
|
"epoch": 3.4859663191659984,
|
|
"grad_norm": 0.43224451385482304,
|
|
"learning_rate": 2.36320059564257e-05,
|
|
"loss": 0.236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1251022219657898,
|
|
"step": 2175,
|
|
"valid_targets_mean": 4886.5,
|
|
"valid_targets_min": 3911
|
|
},
|
|
{
|
|
"epoch": 3.4939855653568563,
|
|
"grad_norm": 0.42297392175305987,
|
|
"learning_rate": 2.3553387609016833e-05,
|
|
"loss": 0.2318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11595537513494492,
|
|
"step": 2180,
|
|
"valid_targets_mean": 4836.2,
|
|
"valid_targets_min": 3471
|
|
},
|
|
{
|
|
"epoch": 3.5020048115477147,
|
|
"grad_norm": 0.4361524245375558,
|
|
"learning_rate": 2.347471252339252e-05,
|
|
"loss": 0.24,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10998405516147614,
|
|
"step": 2185,
|
|
"valid_targets_mean": 4604.6,
|
|
"valid_targets_min": 4023
|
|
},
|
|
{
|
|
"epoch": 3.5100240577385726,
|
|
"grad_norm": 0.44759402583300884,
|
|
"learning_rate": 2.339598195578608e-05,
|
|
"loss": 0.2429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12136473506689072,
|
|
"step": 2190,
|
|
"valid_targets_mean": 4984.4,
|
|
"valid_targets_min": 3819
|
|
},
|
|
{
|
|
"epoch": 3.5180433039294305,
|
|
"grad_norm": 0.4321721360539588,
|
|
"learning_rate": 2.3317197163316757e-05,
|
|
"loss": 0.2256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14261505007743835,
|
|
"step": 2195,
|
|
"valid_targets_mean": 4750.0,
|
|
"valid_targets_min": 3991
|
|
},
|
|
{
|
|
"epoch": 3.5260625501202885,
|
|
"grad_norm": 0.4282805899098533,
|
|
"learning_rate": 2.3238359403969608e-05,
|
|
"loss": 0.2352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10841204226016998,
|
|
"step": 2200,
|
|
"valid_targets_mean": 4169.4,
|
|
"valid_targets_min": 3258
|
|
},
|
|
{
|
|
"epoch": 3.534081796311147,
|
|
"grad_norm": 0.43324934251396197,
|
|
"learning_rate": 2.315946993657543e-05,
|
|
"loss": 0.2378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11573737859725952,
|
|
"step": 2205,
|
|
"valid_targets_mean": 4483.8,
|
|
"valid_targets_min": 3404
|
|
},
|
|
{
|
|
"epoch": 3.5421010425020047,
|
|
"grad_norm": 0.42849411180194574,
|
|
"learning_rate": 2.3080530020790673e-05,
|
|
"loss": 0.2326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11111554503440857,
|
|
"step": 2210,
|
|
"valid_targets_mean": 4326.2,
|
|
"valid_targets_min": 3189
|
|
},
|
|
{
|
|
"epoch": 3.550120288692863,
|
|
"grad_norm": 0.4393297168754308,
|
|
"learning_rate": 2.300154091707731e-05,
|
|
"loss": 0.2325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11741004139184952,
|
|
"step": 2215,
|
|
"valid_targets_mean": 4408.4,
|
|
"valid_targets_min": 3103
|
|
},
|
|
{
|
|
"epoch": 3.558139534883721,
|
|
"grad_norm": 0.4440414404271576,
|
|
"learning_rate": 2.2922503886682706e-05,
|
|
"loss": 0.2358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10594650357961655,
|
|
"step": 2220,
|
|
"valid_targets_mean": 4100.4,
|
|
"valid_targets_min": 3206
|
|
},
|
|
{
|
|
"epoch": 3.566158781074579,
|
|
"grad_norm": 0.443487565845114,
|
|
"learning_rate": 2.28434201916195e-05,
|
|
"loss": 0.2328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11110691726207733,
|
|
"step": 2225,
|
|
"valid_targets_mean": 4448.2,
|
|
"valid_targets_min": 3810
|
|
},
|
|
{
|
|
"epoch": 3.574178027265437,
|
|
"grad_norm": 0.4352777959338471,
|
|
"learning_rate": 2.2764291094645446e-05,
|
|
"loss": 0.2469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1338285207748413,
|
|
"step": 2230,
|
|
"valid_targets_mean": 4760.9,
|
|
"valid_targets_min": 3653
|
|
},
|
|
{
|
|
"epoch": 3.5821972734562952,
|
|
"grad_norm": 0.42312771523424436,
|
|
"learning_rate": 2.2685117859243223e-05,
|
|
"loss": 0.2317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10058571398258209,
|
|
"step": 2235,
|
|
"valid_targets_mean": 4471.5,
|
|
"valid_targets_min": 3636
|
|
},
|
|
{
|
|
"epoch": 3.590216519647153,
|
|
"grad_norm": 0.42501565185113444,
|
|
"learning_rate": 2.2605901749600312e-05,
|
|
"loss": 0.232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12162692844867706,
|
|
"step": 2240,
|
|
"valid_targets_mean": 4560.9,
|
|
"valid_targets_min": 2183
|
|
},
|
|
{
|
|
"epoch": 3.598235765838011,
|
|
"grad_norm": 0.42852155599575226,
|
|
"learning_rate": 2.2526644030588764e-05,
|
|
"loss": 0.2325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1177423745393753,
|
|
"step": 2245,
|
|
"valid_targets_mean": 4580.2,
|
|
"valid_targets_min": 3523
|
|
},
|
|
{
|
|
"epoch": 3.6062550120288694,
|
|
"grad_norm": 0.43406432767282205,
|
|
"learning_rate": 2.2447345967745036e-05,
|
|
"loss": 0.2357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1278018057346344,
|
|
"step": 2250,
|
|
"valid_targets_mean": 4823.4,
|
|
"valid_targets_min": 3910
|
|
},
|
|
{
|
|
"epoch": 3.6142742582197274,
|
|
"grad_norm": 0.4451054481447289,
|
|
"learning_rate": 2.2368008827249756e-05,
|
|
"loss": 0.2371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13117927312850952,
|
|
"step": 2255,
|
|
"valid_targets_mean": 5154.5,
|
|
"valid_targets_min": 3694
|
|
},
|
|
{
|
|
"epoch": 3.6222935044105853,
|
|
"grad_norm": 0.4601077252659941,
|
|
"learning_rate": 2.228863387590752e-05,
|
|
"loss": 0.2471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12340235710144043,
|
|
"step": 2260,
|
|
"valid_targets_mean": 4284.5,
|
|
"valid_targets_min": 2860
|
|
},
|
|
{
|
|
"epoch": 3.630312750601443,
|
|
"grad_norm": 0.4380830657688424,
|
|
"learning_rate": 2.2209222381126687e-05,
|
|
"loss": 0.2384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12199253588914871,
|
|
"step": 2265,
|
|
"valid_targets_mean": 4680.5,
|
|
"valid_targets_min": 3350
|
|
},
|
|
{
|
|
"epoch": 3.6383319967923016,
|
|
"grad_norm": 0.42414634597332546,
|
|
"learning_rate": 2.212977561089908e-05,
|
|
"loss": 0.235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11920738220214844,
|
|
"step": 2270,
|
|
"valid_targets_mean": 4617.5,
|
|
"valid_targets_min": 4138
|
|
},
|
|
{
|
|
"epoch": 3.6463512429831595,
|
|
"grad_norm": 0.4342257285147413,
|
|
"learning_rate": 2.20502948337798e-05,
|
|
"loss": 0.2317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12234988808631897,
|
|
"step": 2275,
|
|
"valid_targets_mean": 4742.4,
|
|
"valid_targets_min": 3841
|
|
},
|
|
{
|
|
"epoch": 3.654370489174018,
|
|
"grad_norm": 0.44354854298176044,
|
|
"learning_rate": 2.1970781318866953e-05,
|
|
"loss": 0.2371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10629850625991821,
|
|
"step": 2280,
|
|
"valid_targets_mean": 4590.0,
|
|
"valid_targets_min": 4044
|
|
},
|
|
{
|
|
"epoch": 3.6623897353648758,
|
|
"grad_norm": 0.43944247977070316,
|
|
"learning_rate": 2.1891236335781363e-05,
|
|
"loss": 0.2328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11103914678096771,
|
|
"step": 2285,
|
|
"valid_targets_mean": 4420.6,
|
|
"valid_targets_min": 3859
|
|
},
|
|
{
|
|
"epoch": 3.6704089815557337,
|
|
"grad_norm": 0.43650636742907506,
|
|
"learning_rate": 2.1811661154646332e-05,
|
|
"loss": 0.2336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10542796552181244,
|
|
"step": 2290,
|
|
"valid_targets_mean": 4276.6,
|
|
"valid_targets_min": 2831
|
|
},
|
|
{
|
|
"epoch": 3.6784282277465916,
|
|
"grad_norm": 0.4456064140159044,
|
|
"learning_rate": 2.173205704606735e-05,
|
|
"loss": 0.2351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11473973095417023,
|
|
"step": 2295,
|
|
"valid_targets_mean": 4526.4,
|
|
"valid_targets_min": 3617
|
|
},
|
|
{
|
|
"epoch": 3.68644747393745,
|
|
"grad_norm": 0.41558215224957995,
|
|
"learning_rate": 2.1652425281111785e-05,
|
|
"loss": 0.2318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14504100382328033,
|
|
"step": 2300,
|
|
"valid_targets_mean": 5651.6,
|
|
"valid_targets_min": 3314
|
|
},
|
|
{
|
|
"epoch": 3.694466720128308,
|
|
"grad_norm": 0.45095279820866296,
|
|
"learning_rate": 2.1572767131288607e-05,
|
|
"loss": 0.2301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1170191615819931,
|
|
"step": 2305,
|
|
"valid_targets_mean": 4612.6,
|
|
"valid_targets_min": 4211
|
|
},
|
|
{
|
|
"epoch": 3.7024859663191663,
|
|
"grad_norm": 0.510903841850311,
|
|
"learning_rate": 2.1493083868528095e-05,
|
|
"loss": 0.2407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1028449684381485,
|
|
"step": 2310,
|
|
"valid_targets_mean": 4451.4,
|
|
"valid_targets_min": 3614
|
|
},
|
|
{
|
|
"epoch": 3.710505212510024,
|
|
"grad_norm": 0.42727293284746287,
|
|
"learning_rate": 2.141337676516151e-05,
|
|
"loss": 0.235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11934205889701843,
|
|
"step": 2315,
|
|
"valid_targets_mean": 4551.8,
|
|
"valid_targets_min": 3630
|
|
},
|
|
{
|
|
"epoch": 3.718524458700882,
|
|
"grad_norm": 0.42430678915591147,
|
|
"learning_rate": 2.1333647093900772e-05,
|
|
"loss": 0.235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12220919132232666,
|
|
"step": 2320,
|
|
"valid_targets_mean": 4708.8,
|
|
"valid_targets_min": 3649
|
|
},
|
|
{
|
|
"epoch": 3.72654370489174,
|
|
"grad_norm": 0.4398353855403426,
|
|
"learning_rate": 2.1253896127818175e-05,
|
|
"loss": 0.2402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12084709107875824,
|
|
"step": 2325,
|
|
"valid_targets_mean": 4341.1,
|
|
"valid_targets_min": 3255
|
|
},
|
|
{
|
|
"epoch": 3.7345629510825984,
|
|
"grad_norm": 0.42926491502851094,
|
|
"learning_rate": 2.1174125140326013e-05,
|
|
"loss": 0.2345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10496345907449722,
|
|
"step": 2330,
|
|
"valid_targets_mean": 4244.6,
|
|
"valid_targets_min": 3263
|
|
},
|
|
{
|
|
"epoch": 3.7425821972734563,
|
|
"grad_norm": 0.41843814687843933,
|
|
"learning_rate": 2.1094335405156277e-05,
|
|
"loss": 0.2335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11735595762729645,
|
|
"step": 2335,
|
|
"valid_targets_mean": 4803.4,
|
|
"valid_targets_min": 3637
|
|
},
|
|
{
|
|
"epoch": 3.7506014434643142,
|
|
"grad_norm": 0.5366973953951486,
|
|
"learning_rate": 2.1014528196340316e-05,
|
|
"loss": 0.2457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10406701266765594,
|
|
"step": 2340,
|
|
"valid_targets_mean": 4461.0,
|
|
"valid_targets_min": 4064
|
|
},
|
|
{
|
|
"epoch": 3.7586206896551726,
|
|
"grad_norm": 0.42274467669927585,
|
|
"learning_rate": 2.093470478818847e-05,
|
|
"loss": 0.235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12189609557390213,
|
|
"step": 2345,
|
|
"valid_targets_mean": 4523.4,
|
|
"valid_targets_min": 3103
|
|
},
|
|
{
|
|
"epoch": 3.7666399358460305,
|
|
"grad_norm": 0.4591912246503016,
|
|
"learning_rate": 2.0854866455269756e-05,
|
|
"loss": 0.2371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12553012371063232,
|
|
"step": 2350,
|
|
"valid_targets_mean": 4705.1,
|
|
"valid_targets_min": 4061
|
|
},
|
|
{
|
|
"epoch": 3.7746591820368884,
|
|
"grad_norm": 0.4396817014085877,
|
|
"learning_rate": 2.0775014472391496e-05,
|
|
"loss": 0.2415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11814923584461212,
|
|
"step": 2355,
|
|
"valid_targets_mean": 4438.5,
|
|
"valid_targets_min": 3702
|
|
},
|
|
{
|
|
"epoch": 3.7826784282277464,
|
|
"grad_norm": 0.44553411717648345,
|
|
"learning_rate": 2.0695150114578958e-05,
|
|
"loss": 0.2318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11837076395750046,
|
|
"step": 2360,
|
|
"valid_targets_mean": 4643.8,
|
|
"valid_targets_min": 3791
|
|
},
|
|
{
|
|
"epoch": 3.7906976744186047,
|
|
"grad_norm": 0.42664590245095835,
|
|
"learning_rate": 2.061527465705502e-05,
|
|
"loss": 0.237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10687233507633209,
|
|
"step": 2365,
|
|
"valid_targets_mean": 4368.2,
|
|
"valid_targets_min": 3201
|
|
},
|
|
{
|
|
"epoch": 3.7987169206094626,
|
|
"grad_norm": 0.43613098673448425,
|
|
"learning_rate": 2.0535389375219773e-05,
|
|
"loss": 0.235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12023443728685379,
|
|
"step": 2370,
|
|
"valid_targets_mean": 4595.8,
|
|
"valid_targets_min": 3274
|
|
},
|
|
{
|
|
"epoch": 3.806736166800321,
|
|
"grad_norm": 0.4341230555517685,
|
|
"learning_rate": 2.045549554463019e-05,
|
|
"loss": 0.227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1117076575756073,
|
|
"step": 2375,
|
|
"valid_targets_mean": 4577.4,
|
|
"valid_targets_min": 3620
|
|
},
|
|
{
|
|
"epoch": 3.814755412991179,
|
|
"grad_norm": 0.4131415486165763,
|
|
"learning_rate": 2.0375594440979744e-05,
|
|
"loss": 0.2296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10525750368833542,
|
|
"step": 2380,
|
|
"valid_targets_mean": 4574.9,
|
|
"valid_targets_min": 3353
|
|
},
|
|
{
|
|
"epoch": 3.822774659182037,
|
|
"grad_norm": 0.4546741224707646,
|
|
"learning_rate": 2.0295687340078037e-05,
|
|
"loss": 0.2304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11676950752735138,
|
|
"step": 2385,
|
|
"valid_targets_mean": 4806.6,
|
|
"valid_targets_min": 3767
|
|
},
|
|
{
|
|
"epoch": 3.8307939053728948,
|
|
"grad_norm": 0.4877672969395193,
|
|
"learning_rate": 2.0215775517830437e-05,
|
|
"loss": 0.2354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.123058021068573,
|
|
"step": 2390,
|
|
"valid_targets_mean": 4767.5,
|
|
"valid_targets_min": 4102
|
|
},
|
|
{
|
|
"epoch": 3.838813151563753,
|
|
"grad_norm": 0.5007094263568346,
|
|
"learning_rate": 2.013586025021769e-05,
|
|
"loss": 0.2359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1357235610485077,
|
|
"step": 2395,
|
|
"valid_targets_mean": 4597.2,
|
|
"valid_targets_min": 4151
|
|
},
|
|
{
|
|
"epoch": 3.846832397754611,
|
|
"grad_norm": 0.42628716924710286,
|
|
"learning_rate": 2.0055942813275564e-05,
|
|
"loss": 0.2396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11947204172611237,
|
|
"step": 2400,
|
|
"valid_targets_mean": 4553.1,
|
|
"valid_targets_min": 3251
|
|
},
|
|
{
|
|
"epoch": 3.854851643945469,
|
|
"grad_norm": 0.5009048153087782,
|
|
"learning_rate": 1.9976024483074456e-05,
|
|
"loss": 0.2339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13679635524749756,
|
|
"step": 2405,
|
|
"valid_targets_mean": 4786.9,
|
|
"valid_targets_min": 3783
|
|
},
|
|
{
|
|
"epoch": 3.8628708901363273,
|
|
"grad_norm": 0.4473073023236582,
|
|
"learning_rate": 1.9896106535699025e-05,
|
|
"loss": 0.2385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11934912204742432,
|
|
"step": 2410,
|
|
"valid_targets_mean": 4422.1,
|
|
"valid_targets_min": 3442
|
|
},
|
|
{
|
|
"epoch": 3.8708901363271853,
|
|
"grad_norm": 0.44701462264875735,
|
|
"learning_rate": 1.9816190247227834e-05,
|
|
"loss": 0.2282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10774150490760803,
|
|
"step": 2415,
|
|
"valid_targets_mean": 4505.2,
|
|
"valid_targets_min": 3877
|
|
},
|
|
{
|
|
"epoch": 3.878909382518043,
|
|
"grad_norm": 0.4489907829241997,
|
|
"learning_rate": 1.9736276893712954e-05,
|
|
"loss": 0.2398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13242198526859283,
|
|
"step": 2420,
|
|
"valid_targets_mean": 4582.8,
|
|
"valid_targets_min": 2948
|
|
},
|
|
{
|
|
"epoch": 3.886928628708901,
|
|
"grad_norm": 0.42830440115890894,
|
|
"learning_rate": 1.9656367751159565e-05,
|
|
"loss": 0.2304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11485099792480469,
|
|
"step": 2425,
|
|
"valid_targets_mean": 4673.0,
|
|
"valid_targets_min": 4075
|
|
},
|
|
{
|
|
"epoch": 3.8949478748997595,
|
|
"grad_norm": 0.4245728390850089,
|
|
"learning_rate": 1.957646409550565e-05,
|
|
"loss": 0.2414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12118405103683472,
|
|
"step": 2430,
|
|
"valid_targets_mean": 4794.4,
|
|
"valid_targets_min": 3800
|
|
},
|
|
{
|
|
"epoch": 3.9029671210906174,
|
|
"grad_norm": 0.41904214108130394,
|
|
"learning_rate": 1.9496567202601545e-05,
|
|
"loss": 0.2371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09998699277639389,
|
|
"step": 2435,
|
|
"valid_targets_mean": 4254.4,
|
|
"valid_targets_min": 3353
|
|
},
|
|
{
|
|
"epoch": 3.9109863672814758,
|
|
"grad_norm": 0.40783137300877575,
|
|
"learning_rate": 1.9416678348189627e-05,
|
|
"loss": 0.2354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11874105036258698,
|
|
"step": 2440,
|
|
"valid_targets_mean": 4826.5,
|
|
"valid_targets_min": 3573
|
|
},
|
|
{
|
|
"epoch": 3.9190056134723337,
|
|
"grad_norm": 0.4803137061490347,
|
|
"learning_rate": 1.9336798807883907e-05,
|
|
"loss": 0.2343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11187771707773209,
|
|
"step": 2445,
|
|
"valid_targets_mean": 4758.8,
|
|
"valid_targets_min": 4199
|
|
},
|
|
{
|
|
"epoch": 3.9270248596631916,
|
|
"grad_norm": 0.4626303391782172,
|
|
"learning_rate": 1.9256929857149686e-05,
|
|
"loss": 0.2416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1344926655292511,
|
|
"step": 2450,
|
|
"valid_targets_mean": 4611.2,
|
|
"valid_targets_min": 3254
|
|
},
|
|
{
|
|
"epoch": 3.9350441058540495,
|
|
"grad_norm": 0.45357645417201875,
|
|
"learning_rate": 1.9177072771283167e-05,
|
|
"loss": 0.2364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11596687138080597,
|
|
"step": 2455,
|
|
"valid_targets_mean": 4376.2,
|
|
"valid_targets_min": 3811
|
|
},
|
|
{
|
|
"epoch": 3.943063352044908,
|
|
"grad_norm": 0.4253081658033569,
|
|
"learning_rate": 1.9097228825391087e-05,
|
|
"loss": 0.2344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11824223399162292,
|
|
"step": 2460,
|
|
"valid_targets_mean": 4575.1,
|
|
"valid_targets_min": 4185
|
|
},
|
|
{
|
|
"epoch": 3.951082598235766,
|
|
"grad_norm": 0.43021169725334213,
|
|
"learning_rate": 1.9017399294370413e-05,
|
|
"loss": 0.2356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1269446313381195,
|
|
"step": 2465,
|
|
"valid_targets_mean": 4714.9,
|
|
"valid_targets_min": 3711
|
|
},
|
|
{
|
|
"epoch": 3.959101844426624,
|
|
"grad_norm": 0.4189035908841239,
|
|
"learning_rate": 1.893758545288791e-05,
|
|
"loss": 0.2379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12213868647813797,
|
|
"step": 2470,
|
|
"valid_targets_mean": 4402.2,
|
|
"valid_targets_min": 3013
|
|
},
|
|
{
|
|
"epoch": 3.967121090617482,
|
|
"grad_norm": 0.430020095069685,
|
|
"learning_rate": 1.8857788575359847e-05,
|
|
"loss": 0.2345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13784058392047882,
|
|
"step": 2475,
|
|
"valid_targets_mean": 5298.0,
|
|
"valid_targets_min": 3568
|
|
},
|
|
{
|
|
"epoch": 3.97514033680834,
|
|
"grad_norm": 0.4298179145262354,
|
|
"learning_rate": 1.87780099359316e-05,
|
|
"loss": 0.2454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12829814851284027,
|
|
"step": 2480,
|
|
"valid_targets_mean": 4597.1,
|
|
"valid_targets_min": 3644
|
|
},
|
|
{
|
|
"epoch": 3.983159582999198,
|
|
"grad_norm": 0.4284603622728122,
|
|
"learning_rate": 1.869825080845734e-05,
|
|
"loss": 0.2293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1103266254067421,
|
|
"step": 2485,
|
|
"valid_targets_mean": 4687.0,
|
|
"valid_targets_min": 3697
|
|
},
|
|
{
|
|
"epoch": 3.9911788291900563,
|
|
"grad_norm": 0.4495729092146307,
|
|
"learning_rate": 1.8618512466479686e-05,
|
|
"loss": 0.2328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12403489649295807,
|
|
"step": 2490,
|
|
"valid_targets_mean": 4541.2,
|
|
"valid_targets_min": 3967
|
|
},
|
|
{
|
|
"epoch": 3.999198075380914,
|
|
"grad_norm": 0.42017833017405004,
|
|
"learning_rate": 1.8538796183209373e-05,
|
|
"loss": 0.2356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10433878004550934,
|
|
"step": 2495,
|
|
"valid_targets_mean": 4370.2,
|
|
"valid_targets_min": 3441
|
|
},
|
|
{
|
|
"epoch": 4.006415396952686,
|
|
"grad_norm": 0.42090422869274463,
|
|
"learning_rate": 1.845910323150491e-05,
|
|
"loss": 0.2221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1206853985786438,
|
|
"step": 2500,
|
|
"valid_targets_mean": 4883.9,
|
|
"valid_targets_min": 3705
|
|
},
|
|
{
|
|
"epoch": 4.014434643143544,
|
|
"grad_norm": 0.4715608305819128,
|
|
"learning_rate": 1.8379434883852255e-05,
|
|
"loss": 0.2214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11390087008476257,
|
|
"step": 2505,
|
|
"valid_targets_mean": 4592.9,
|
|
"valid_targets_min": 3635
|
|
},
|
|
{
|
|
"epoch": 4.022453889334402,
|
|
"grad_norm": 0.5326067771405758,
|
|
"learning_rate": 1.8299792412344524e-05,
|
|
"loss": 0.2291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1131330356001854,
|
|
"step": 2510,
|
|
"valid_targets_mean": 4511.6,
|
|
"valid_targets_min": 3892
|
|
},
|
|
{
|
|
"epoch": 4.030473135525261,
|
|
"grad_norm": 0.44607081908700863,
|
|
"learning_rate": 1.8220177088661635e-05,
|
|
"loss": 0.2204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11239536851644516,
|
|
"step": 2515,
|
|
"valid_targets_mean": 4902.2,
|
|
"valid_targets_min": 3319
|
|
},
|
|
{
|
|
"epoch": 4.038492381716119,
|
|
"grad_norm": 0.45777152423114603,
|
|
"learning_rate": 1.814059018405004e-05,
|
|
"loss": 0.2181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09788191318511963,
|
|
"step": 2520,
|
|
"valid_targets_mean": 4060.9,
|
|
"valid_targets_min": 3331
|
|
},
|
|
{
|
|
"epoch": 4.046511627906977,
|
|
"grad_norm": 0.42890806853468694,
|
|
"learning_rate": 1.806103296930243e-05,
|
|
"loss": 0.2287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11311686038970947,
|
|
"step": 2525,
|
|
"valid_targets_mean": 4670.2,
|
|
"valid_targets_min": 4111
|
|
},
|
|
{
|
|
"epoch": 4.054530874097835,
|
|
"grad_norm": 0.4494251599040584,
|
|
"learning_rate": 1.7981506714737392e-05,
|
|
"loss": 0.2214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11883391439914703,
|
|
"step": 2530,
|
|
"valid_targets_mean": 4605.9,
|
|
"valid_targets_min": 3962
|
|
},
|
|
{
|
|
"epoch": 4.062550120288693,
|
|
"grad_norm": 0.44327290522808616,
|
|
"learning_rate": 1.7902012690179188e-05,
|
|
"loss": 0.2249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11705680191516876,
|
|
"step": 2535,
|
|
"valid_targets_mean": 4647.0,
|
|
"valid_targets_min": 4259
|
|
},
|
|
{
|
|
"epoch": 4.070569366479551,
|
|
"grad_norm": 0.44637026221570525,
|
|
"learning_rate": 1.7822552164937437e-05,
|
|
"loss": 0.2269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10788983106613159,
|
|
"step": 2540,
|
|
"valid_targets_mean": 4248.5,
|
|
"valid_targets_min": 3162
|
|
},
|
|
{
|
|
"epoch": 4.078588612670409,
|
|
"grad_norm": 0.46180890176347567,
|
|
"learning_rate": 1.7743126407786873e-05,
|
|
"loss": 0.2211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.111435666680336,
|
|
"step": 2545,
|
|
"valid_targets_mean": 4487.1,
|
|
"valid_targets_min": 4134
|
|
},
|
|
{
|
|
"epoch": 4.086607858861267,
|
|
"grad_norm": 0.48278577355097363,
|
|
"learning_rate": 1.766373668694707e-05,
|
|
"loss": 0.2237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11119714379310608,
|
|
"step": 2550,
|
|
"valid_targets_mean": 4507.8,
|
|
"valid_targets_min": 3594
|
|
},
|
|
{
|
|
"epoch": 4.094627105052125,
|
|
"grad_norm": 0.4530198173203554,
|
|
"learning_rate": 1.7584384270062195e-05,
|
|
"loss": 0.2199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09740154445171356,
|
|
"step": 2555,
|
|
"valid_targets_mean": 4308.2,
|
|
"valid_targets_min": 3524
|
|
},
|
|
{
|
|
"epoch": 4.102646351242983,
|
|
"grad_norm": 0.4534234007110222,
|
|
"learning_rate": 1.7505070424180772e-05,
|
|
"loss": 0.2245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12073633074760437,
|
|
"step": 2560,
|
|
"valid_targets_mean": 4190.1,
|
|
"valid_targets_min": 2617
|
|
},
|
|
{
|
|
"epoch": 4.110665597433841,
|
|
"grad_norm": 0.4529428913341258,
|
|
"learning_rate": 1.7425796415735454e-05,
|
|
"loss": 0.2287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09724746644496918,
|
|
"step": 2565,
|
|
"valid_targets_mean": 4785.0,
|
|
"valid_targets_min": 3701
|
|
},
|
|
{
|
|
"epoch": 4.118684843624699,
|
|
"grad_norm": 0.4610222289144795,
|
|
"learning_rate": 1.7346563510522783e-05,
|
|
"loss": 0.2238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10644858330488205,
|
|
"step": 2570,
|
|
"valid_targets_mean": 4736.1,
|
|
"valid_targets_min": 4028
|
|
},
|
|
{
|
|
"epoch": 4.126704089815557,
|
|
"grad_norm": 0.4340465639323575,
|
|
"learning_rate": 1.7267372973682998e-05,
|
|
"loss": 0.2225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10987432301044464,
|
|
"step": 2575,
|
|
"valid_targets_mean": 4720.6,
|
|
"valid_targets_min": 3693
|
|
},
|
|
{
|
|
"epoch": 4.134723336006416,
|
|
"grad_norm": 0.43866919753287315,
|
|
"learning_rate": 1.7188226069679834e-05,
|
|
"loss": 0.2239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11476965993642807,
|
|
"step": 2580,
|
|
"valid_targets_mean": 4532.9,
|
|
"valid_targets_min": 3920
|
|
},
|
|
{
|
|
"epoch": 4.142742582197274,
|
|
"grad_norm": 0.4459654718727247,
|
|
"learning_rate": 1.7109124062280307e-05,
|
|
"loss": 0.2218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11067742109298706,
|
|
"step": 2585,
|
|
"valid_targets_mean": 4407.9,
|
|
"valid_targets_min": 3535
|
|
},
|
|
{
|
|
"epoch": 4.150761828388132,
|
|
"grad_norm": 0.43263144945949256,
|
|
"learning_rate": 1.7030068214534567e-05,
|
|
"loss": 0.2302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11801645159721375,
|
|
"step": 2590,
|
|
"valid_targets_mean": 4857.1,
|
|
"valid_targets_min": 4100
|
|
},
|
|
{
|
|
"epoch": 4.1587810745789895,
|
|
"grad_norm": 0.44656572874311007,
|
|
"learning_rate": 1.695105978875572e-05,
|
|
"loss": 0.227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11068914085626602,
|
|
"step": 2595,
|
|
"valid_targets_mean": 4517.1,
|
|
"valid_targets_min": 3738
|
|
},
|
|
{
|
|
"epoch": 4.166800320769847,
|
|
"grad_norm": 0.4286407889378425,
|
|
"learning_rate": 1.687210004649965e-05,
|
|
"loss": 0.2203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10719306766986847,
|
|
"step": 2600,
|
|
"valid_targets_mean": 4380.6,
|
|
"valid_targets_min": 3070
|
|
},
|
|
{
|
|
"epoch": 4.174819566960705,
|
|
"grad_norm": 0.800769190487109,
|
|
"learning_rate": 1.679319024854491e-05,
|
|
"loss": 0.2276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11300750076770782,
|
|
"step": 2605,
|
|
"valid_targets_mean": 4423.2,
|
|
"valid_targets_min": 3620
|
|
},
|
|
{
|
|
"epoch": 4.182838813151564,
|
|
"grad_norm": 0.4578896219725048,
|
|
"learning_rate": 1.6714331654872564e-05,
|
|
"loss": 0.2298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10927052050828934,
|
|
"step": 2610,
|
|
"valid_targets_mean": 4627.6,
|
|
"valid_targets_min": 3665
|
|
},
|
|
{
|
|
"epoch": 4.190858059342422,
|
|
"grad_norm": 0.4144887583802508,
|
|
"learning_rate": 1.663552552464609e-05,
|
|
"loss": 0.2192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1116843968629837,
|
|
"step": 2615,
|
|
"valid_targets_mean": 4791.4,
|
|
"valid_targets_min": 3374
|
|
},
|
|
{
|
|
"epoch": 4.19887730553328,
|
|
"grad_norm": 0.425135894786461,
|
|
"learning_rate": 1.6556773116191257e-05,
|
|
"loss": 0.2318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1155174970626831,
|
|
"step": 2620,
|
|
"valid_targets_mean": 4583.0,
|
|
"valid_targets_min": 3775
|
|
},
|
|
{
|
|
"epoch": 4.206896551724138,
|
|
"grad_norm": 0.4715630659251054,
|
|
"learning_rate": 1.647807568697603e-05,
|
|
"loss": 0.2221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10761673748493195,
|
|
"step": 2625,
|
|
"valid_targets_mean": 4064.0,
|
|
"valid_targets_min": 2877
|
|
},
|
|
{
|
|
"epoch": 4.214915797914996,
|
|
"grad_norm": 0.49257107829988017,
|
|
"learning_rate": 1.6399434493590524e-05,
|
|
"loss": 0.2267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10741441696882248,
|
|
"step": 2630,
|
|
"valid_targets_mean": 4378.2,
|
|
"valid_targets_min": 3259
|
|
},
|
|
{
|
|
"epoch": 4.222935044105854,
|
|
"grad_norm": 0.46151584946266394,
|
|
"learning_rate": 1.6320850791726884e-05,
|
|
"loss": 0.2187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10715599358081818,
|
|
"step": 2635,
|
|
"valid_targets_mean": 4446.6,
|
|
"valid_targets_min": 3427
|
|
},
|
|
{
|
|
"epoch": 4.230954290296712,
|
|
"grad_norm": 0.4238400189650433,
|
|
"learning_rate": 1.6242325836159304e-05,
|
|
"loss": 0.2298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1296149492263794,
|
|
"step": 2640,
|
|
"valid_targets_mean": 5152.4,
|
|
"valid_targets_min": 4552
|
|
},
|
|
{
|
|
"epoch": 4.2389735364875705,
|
|
"grad_norm": 0.44969767369758945,
|
|
"learning_rate": 1.6163860880723923e-05,
|
|
"loss": 0.2253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12346767634153366,
|
|
"step": 2645,
|
|
"valid_targets_mean": 4584.5,
|
|
"valid_targets_min": 4073
|
|
},
|
|
{
|
|
"epoch": 4.246992782678428,
|
|
"grad_norm": 0.44626346202318756,
|
|
"learning_rate": 1.6085457178298866e-05,
|
|
"loss": 0.2195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1061326265335083,
|
|
"step": 2650,
|
|
"valid_targets_mean": 4181.9,
|
|
"valid_targets_min": 2622
|
|
},
|
|
{
|
|
"epoch": 4.255012028869286,
|
|
"grad_norm": 0.45544631340052066,
|
|
"learning_rate": 1.6007115980784182e-05,
|
|
"loss": 0.2253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10439412295818329,
|
|
"step": 2655,
|
|
"valid_targets_mean": 4334.9,
|
|
"valid_targets_min": 3872
|
|
},
|
|
{
|
|
"epoch": 4.263031275060144,
|
|
"grad_norm": 0.45193513219369014,
|
|
"learning_rate": 1.592883853908188e-05,
|
|
"loss": 0.226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1063997894525528,
|
|
"step": 2660,
|
|
"valid_targets_mean": 4583.1,
|
|
"valid_targets_min": 3433
|
|
},
|
|
{
|
|
"epoch": 4.271050521251002,
|
|
"grad_norm": 0.4534506249111318,
|
|
"learning_rate": 1.585062610307599e-05,
|
|
"loss": 0.224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11017262190580368,
|
|
"step": 2665,
|
|
"valid_targets_mean": 4831.9,
|
|
"valid_targets_min": 4388
|
|
},
|
|
{
|
|
"epoch": 4.27906976744186,
|
|
"grad_norm": 0.46620796303746204,
|
|
"learning_rate": 1.5772479921612543e-05,
|
|
"loss": 0.228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10645690560340881,
|
|
"step": 2670,
|
|
"valid_targets_mean": 4694.9,
|
|
"valid_targets_min": 3644
|
|
},
|
|
{
|
|
"epoch": 4.287089013632719,
|
|
"grad_norm": 0.42495578120662075,
|
|
"learning_rate": 1.5694401242479677e-05,
|
|
"loss": 0.2265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11107391119003296,
|
|
"step": 2675,
|
|
"valid_targets_mean": 4900.6,
|
|
"valid_targets_min": 4227
|
|
},
|
|
{
|
|
"epoch": 4.295108259823577,
|
|
"grad_norm": 0.41935439342470293,
|
|
"learning_rate": 1.5616391312387683e-05,
|
|
"loss": 0.2184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11314061284065247,
|
|
"step": 2680,
|
|
"valid_targets_mean": 4382.4,
|
|
"valid_targets_min": 3651
|
|
},
|
|
{
|
|
"epoch": 4.303127506014435,
|
|
"grad_norm": 0.44949800970521475,
|
|
"learning_rate": 1.5538451376949106e-05,
|
|
"loss": 0.2243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09370071440935135,
|
|
"step": 2685,
|
|
"valid_targets_mean": 4307.1,
|
|
"valid_targets_min": 3762
|
|
},
|
|
{
|
|
"epoch": 4.311146752205293,
|
|
"grad_norm": 0.4765665974401367,
|
|
"learning_rate": 1.5460582680658888e-05,
|
|
"loss": 0.228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09890519082546234,
|
|
"step": 2690,
|
|
"valid_targets_mean": 4282.8,
|
|
"valid_targets_min": 3189
|
|
},
|
|
{
|
|
"epoch": 4.319165998396151,
|
|
"grad_norm": 0.4507184612545111,
|
|
"learning_rate": 1.5382786466874446e-05,
|
|
"loss": 0.223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11422993987798691,
|
|
"step": 2695,
|
|
"valid_targets_mean": 4631.4,
|
|
"valid_targets_min": 4065
|
|
},
|
|
{
|
|
"epoch": 4.3271852445870085,
|
|
"grad_norm": 0.44206841615070447,
|
|
"learning_rate": 1.5305063977795856e-05,
|
|
"loss": 0.2273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14120200276374817,
|
|
"step": 2700,
|
|
"valid_targets_mean": 5053.9,
|
|
"valid_targets_min": 4330
|
|
},
|
|
{
|
|
"epoch": 4.335204490777867,
|
|
"grad_norm": 0.4365211155313861,
|
|
"learning_rate": 1.5227416454445995e-05,
|
|
"loss": 0.2197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09654676169157028,
|
|
"step": 2705,
|
|
"valid_targets_mean": 4172.9,
|
|
"valid_targets_min": 3084
|
|
},
|
|
{
|
|
"epoch": 4.343223736968725,
|
|
"grad_norm": 0.4434527595924658,
|
|
"learning_rate": 1.5149845136650748e-05,
|
|
"loss": 0.2211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08743572235107422,
|
|
"step": 2710,
|
|
"valid_targets_mean": 3981.6,
|
|
"valid_targets_min": 3146
|
|
},
|
|
{
|
|
"epoch": 4.351242983159583,
|
|
"grad_norm": 0.4585056808465754,
|
|
"learning_rate": 1.5072351263019177e-05,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10992768406867981,
|
|
"step": 2715,
|
|
"valid_targets_mean": 4345.0,
|
|
"valid_targets_min": 3368
|
|
},
|
|
{
|
|
"epoch": 4.359262229350441,
|
|
"grad_norm": 0.45873444645433376,
|
|
"learning_rate": 1.4994936070923784e-05,
|
|
"loss": 0.2255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14215907454490662,
|
|
"step": 2720,
|
|
"valid_targets_mean": 5086.8,
|
|
"valid_targets_min": 4132
|
|
},
|
|
{
|
|
"epoch": 4.367281475541299,
|
|
"grad_norm": 0.4778337457598854,
|
|
"learning_rate": 1.4917600796480745e-05,
|
|
"loss": 0.2198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10027343034744263,
|
|
"step": 2725,
|
|
"valid_targets_mean": 4370.0,
|
|
"valid_targets_min": 2886
|
|
},
|
|
{
|
|
"epoch": 4.375300721732157,
|
|
"grad_norm": 0.4365118630678763,
|
|
"learning_rate": 1.4840346674530122e-05,
|
|
"loss": 0.2208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11423096060752869,
|
|
"step": 2730,
|
|
"valid_targets_mean": 4445.0,
|
|
"valid_targets_min": 3048
|
|
},
|
|
{
|
|
"epoch": 4.383319967923015,
|
|
"grad_norm": 0.44000766275616815,
|
|
"learning_rate": 1.4763174938616232e-05,
|
|
"loss": 0.2161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11950388550758362,
|
|
"step": 2735,
|
|
"valid_targets_mean": 4598.4,
|
|
"valid_targets_min": 3300
|
|
},
|
|
{
|
|
"epoch": 4.391339214113874,
|
|
"grad_norm": 0.4805122800842504,
|
|
"learning_rate": 1.4686086820967865e-05,
|
|
"loss": 0.2287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11088831722736359,
|
|
"step": 2740,
|
|
"valid_targets_mean": 4438.6,
|
|
"valid_targets_min": 1950
|
|
},
|
|
{
|
|
"epoch": 4.3993584603047315,
|
|
"grad_norm": 0.4743677189532152,
|
|
"learning_rate": 1.460908355247868e-05,
|
|
"loss": 0.2261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13322047889232635,
|
|
"step": 2745,
|
|
"valid_targets_mean": 5190.2,
|
|
"valid_targets_min": 4031
|
|
},
|
|
{
|
|
"epoch": 4.4073777064955895,
|
|
"grad_norm": 0.4597739667106283,
|
|
"learning_rate": 1.4532166362687507e-05,
|
|
"loss": 0.2281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11058811843395233,
|
|
"step": 2750,
|
|
"valid_targets_mean": 4922.8,
|
|
"valid_targets_min": 4301
|
|
},
|
|
{
|
|
"epoch": 4.415396952686447,
|
|
"grad_norm": 0.40960246791913674,
|
|
"learning_rate": 1.445533647975871e-05,
|
|
"loss": 0.2245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12078719586133957,
|
|
"step": 2755,
|
|
"valid_targets_mean": 4542.2,
|
|
"valid_targets_min": 3923
|
|
},
|
|
{
|
|
"epoch": 4.423416198877305,
|
|
"grad_norm": 0.4458926210055893,
|
|
"learning_rate": 1.437859513046263e-05,
|
|
"loss": 0.2187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11029720306396484,
|
|
"step": 2760,
|
|
"valid_targets_mean": 4617.5,
|
|
"valid_targets_min": 3573
|
|
},
|
|
{
|
|
"epoch": 4.431435445068163,
|
|
"grad_norm": 0.552752104831424,
|
|
"learning_rate": 1.4301943540155914e-05,
|
|
"loss": 0.2175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1041911393404007,
|
|
"step": 2765,
|
|
"valid_targets_mean": 4475.8,
|
|
"valid_targets_min": 3043
|
|
},
|
|
{
|
|
"epoch": 4.439454691259022,
|
|
"grad_norm": 0.43440778457486234,
|
|
"learning_rate": 1.4225382932762033e-05,
|
|
"loss": 0.2223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11560443788766861,
|
|
"step": 2770,
|
|
"valid_targets_mean": 4849.2,
|
|
"valid_targets_min": 3022
|
|
},
|
|
{
|
|
"epoch": 4.44747393744988,
|
|
"grad_norm": 0.4244354559976438,
|
|
"learning_rate": 1.4148914530751681e-05,
|
|
"loss": 0.2144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10497516393661499,
|
|
"step": 2775,
|
|
"valid_targets_mean": 4694.6,
|
|
"valid_targets_min": 4042
|
|
},
|
|
{
|
|
"epoch": 4.455493183640738,
|
|
"grad_norm": 0.44754281386830547,
|
|
"learning_rate": 1.4072539555123292e-05,
|
|
"loss": 0.2228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11638794839382172,
|
|
"step": 2780,
|
|
"valid_targets_mean": 4410.6,
|
|
"valid_targets_min": 3542
|
|
},
|
|
{
|
|
"epoch": 4.463512429831596,
|
|
"grad_norm": 0.47444008267448595,
|
|
"learning_rate": 1.3996259225383514e-05,
|
|
"loss": 0.2228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10932406783103943,
|
|
"step": 2785,
|
|
"valid_targets_mean": 4224.5,
|
|
"valid_targets_min": 3353
|
|
},
|
|
{
|
|
"epoch": 4.471531676022454,
|
|
"grad_norm": 0.4472905009878009,
|
|
"learning_rate": 1.3920074759527737e-05,
|
|
"loss": 0.2247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11519868671894073,
|
|
"step": 2790,
|
|
"valid_targets_mean": 4747.1,
|
|
"valid_targets_min": 3513
|
|
},
|
|
{
|
|
"epoch": 4.479550922213312,
|
|
"grad_norm": 0.44209987811278106,
|
|
"learning_rate": 1.3843987374020689e-05,
|
|
"loss": 0.231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11424921452999115,
|
|
"step": 2795,
|
|
"valid_targets_mean": 4544.1,
|
|
"valid_targets_min": 3877
|
|
},
|
|
{
|
|
"epoch": 4.4875701684041704,
|
|
"grad_norm": 0.47867828781937927,
|
|
"learning_rate": 1.376799828377696e-05,
|
|
"loss": 0.2288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10771223157644272,
|
|
"step": 2800,
|
|
"valid_targets_mean": 4411.0,
|
|
"valid_targets_min": 3651
|
|
},
|
|
{
|
|
"epoch": 4.495589414595028,
|
|
"grad_norm": 0.4508713545560454,
|
|
"learning_rate": 1.3692108702141642e-05,
|
|
"loss": 0.2187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10167445242404938,
|
|
"step": 2805,
|
|
"valid_targets_mean": 4444.6,
|
|
"valid_targets_min": 3636
|
|
},
|
|
{
|
|
"epoch": 4.503608660785886,
|
|
"grad_norm": 0.47721841410710714,
|
|
"learning_rate": 1.361631984087091e-05,
|
|
"loss": 0.2281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11219213902950287,
|
|
"step": 2810,
|
|
"valid_targets_mean": 4327.2,
|
|
"valid_targets_min": 2386
|
|
},
|
|
{
|
|
"epoch": 4.511627906976744,
|
|
"grad_norm": 0.4265319322971436,
|
|
"learning_rate": 1.354063291011273e-05,
|
|
"loss": 0.2125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10052736103534698,
|
|
"step": 2815,
|
|
"valid_targets_mean": 4212.9,
|
|
"valid_targets_min": 3878
|
|
},
|
|
{
|
|
"epoch": 4.519647153167602,
|
|
"grad_norm": 0.4394865453740494,
|
|
"learning_rate": 1.3465049118387486e-05,
|
|
"loss": 0.2277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12264704704284668,
|
|
"step": 2820,
|
|
"valid_targets_mean": 4994.5,
|
|
"valid_targets_min": 4307
|
|
},
|
|
{
|
|
"epoch": 4.52766639935846,
|
|
"grad_norm": 0.47036043498045066,
|
|
"learning_rate": 1.3389569672568707e-05,
|
|
"loss": 0.2243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14842426776885986,
|
|
"step": 2825,
|
|
"valid_targets_mean": 5577.1,
|
|
"valid_targets_min": 4206
|
|
},
|
|
{
|
|
"epoch": 4.535685645549318,
|
|
"grad_norm": 0.4681006439741867,
|
|
"learning_rate": 1.331419577786381e-05,
|
|
"loss": 0.2233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11775127053260803,
|
|
"step": 2830,
|
|
"valid_targets_mean": 4396.8,
|
|
"valid_targets_min": 3194
|
|
},
|
|
{
|
|
"epoch": 4.543704891740177,
|
|
"grad_norm": 0.4345059491500198,
|
|
"learning_rate": 1.3238928637794816e-05,
|
|
"loss": 0.2242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10529690980911255,
|
|
"step": 2835,
|
|
"valid_targets_mean": 4609.9,
|
|
"valid_targets_min": 3851
|
|
},
|
|
{
|
|
"epoch": 4.551724137931035,
|
|
"grad_norm": 0.5122913301533479,
|
|
"learning_rate": 1.3163769454179183e-05,
|
|
"loss": 0.2294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12328939884901047,
|
|
"step": 2840,
|
|
"valid_targets_mean": 4628.9,
|
|
"valid_targets_min": 3740
|
|
},
|
|
{
|
|
"epoch": 4.559743384121893,
|
|
"grad_norm": 0.4772041459839202,
|
|
"learning_rate": 1.3088719427110552e-05,
|
|
"loss": 0.2299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0997372716665268,
|
|
"step": 2845,
|
|
"valid_targets_mean": 4424.1,
|
|
"valid_targets_min": 3690
|
|
},
|
|
{
|
|
"epoch": 4.5677626303127505,
|
|
"grad_norm": 0.4218398425383349,
|
|
"learning_rate": 1.3013779754939666e-05,
|
|
"loss": 0.2224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11892329901456833,
|
|
"step": 2850,
|
|
"valid_targets_mean": 4738.4,
|
|
"valid_targets_min": 3822
|
|
},
|
|
{
|
|
"epoch": 4.5757818765036085,
|
|
"grad_norm": 0.45132358366236874,
|
|
"learning_rate": 1.2938951634255164e-05,
|
|
"loss": 0.2281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11912679672241211,
|
|
"step": 2855,
|
|
"valid_targets_mean": 4636.4,
|
|
"valid_targets_min": 3403
|
|
},
|
|
{
|
|
"epoch": 4.583801122694466,
|
|
"grad_norm": 0.46679710060225993,
|
|
"learning_rate": 1.2864236259864495e-05,
|
|
"loss": 0.2239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1310253143310547,
|
|
"step": 2860,
|
|
"valid_targets_mean": 4566.0,
|
|
"valid_targets_min": 3520
|
|
},
|
|
{
|
|
"epoch": 4.591820368885324,
|
|
"grad_norm": 0.4579399789557092,
|
|
"learning_rate": 1.2789634824774887e-05,
|
|
"loss": 0.2225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11996282637119293,
|
|
"step": 2865,
|
|
"valid_targets_mean": 4808.5,
|
|
"valid_targets_min": 3831
|
|
},
|
|
{
|
|
"epoch": 4.599839615076183,
|
|
"grad_norm": 0.4537733623414512,
|
|
"learning_rate": 1.2715148520174206e-05,
|
|
"loss": 0.221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1146460473537445,
|
|
"step": 2870,
|
|
"valid_targets_mean": 4924.6,
|
|
"valid_targets_min": 3868
|
|
},
|
|
{
|
|
"epoch": 4.607858861267041,
|
|
"grad_norm": 0.4417303627840008,
|
|
"learning_rate": 1.2640778535412036e-05,
|
|
"loss": 0.2347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10878900438547134,
|
|
"step": 2875,
|
|
"valid_targets_mean": 4476.9,
|
|
"valid_targets_min": 3454
|
|
},
|
|
{
|
|
"epoch": 4.615878107457899,
|
|
"grad_norm": 0.4443345385444329,
|
|
"learning_rate": 1.2566526057980608e-05,
|
|
"loss": 0.2218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10644914954900742,
|
|
"step": 2880,
|
|
"valid_targets_mean": 4496.5,
|
|
"valid_targets_min": 3903
|
|
},
|
|
{
|
|
"epoch": 4.623897353648757,
|
|
"grad_norm": 0.43700683466320267,
|
|
"learning_rate": 1.2492392273495879e-05,
|
|
"loss": 0.222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11886905133724213,
|
|
"step": 2885,
|
|
"valid_targets_mean": 5133.6,
|
|
"valid_targets_min": 4636
|
|
},
|
|
{
|
|
"epoch": 4.631916599839615,
|
|
"grad_norm": 0.43490517454747857,
|
|
"learning_rate": 1.2418378365678612e-05,
|
|
"loss": 0.2274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1031545028090477,
|
|
"step": 2890,
|
|
"valid_targets_mean": 4275.4,
|
|
"valid_targets_min": 3360
|
|
},
|
|
{
|
|
"epoch": 4.639935846030474,
|
|
"grad_norm": 0.46565288967552093,
|
|
"learning_rate": 1.234448551633542e-05,
|
|
"loss": 0.2209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12304367125034332,
|
|
"step": 2895,
|
|
"valid_targets_mean": 4457.2,
|
|
"valid_targets_min": 3746
|
|
},
|
|
{
|
|
"epoch": 4.6479550922213315,
|
|
"grad_norm": 0.4518892158072798,
|
|
"learning_rate": 1.2270714905339969e-05,
|
|
"loss": 0.2319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12746621668338776,
|
|
"step": 2900,
|
|
"valid_targets_mean": 4723.5,
|
|
"valid_targets_min": 3370
|
|
},
|
|
{
|
|
"epoch": 4.655974338412189,
|
|
"grad_norm": 0.49337552609953167,
|
|
"learning_rate": 1.2197067710614075e-05,
|
|
"loss": 0.2174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11030030250549316,
|
|
"step": 2905,
|
|
"valid_targets_mean": 4289.8,
|
|
"valid_targets_min": 2555
|
|
},
|
|
{
|
|
"epoch": 4.663993584603047,
|
|
"grad_norm": 0.4634074251040503,
|
|
"learning_rate": 1.2123545108108943e-05,
|
|
"loss": 0.2222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10341023653745651,
|
|
"step": 2910,
|
|
"valid_targets_mean": 4340.1,
|
|
"valid_targets_min": 3796
|
|
},
|
|
{
|
|
"epoch": 4.672012830793905,
|
|
"grad_norm": 0.4378805208012731,
|
|
"learning_rate": 1.2050148271786348e-05,
|
|
"loss": 0.222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10629647225141525,
|
|
"step": 2915,
|
|
"valid_targets_mean": 4171.4,
|
|
"valid_targets_min": 3423
|
|
},
|
|
{
|
|
"epoch": 4.680032076984763,
|
|
"grad_norm": 0.5165020664965885,
|
|
"learning_rate": 1.1976878373599928e-05,
|
|
"loss": 0.2248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13473045825958252,
|
|
"step": 2920,
|
|
"valid_targets_mean": 4686.2,
|
|
"valid_targets_min": 3223
|
|
},
|
|
{
|
|
"epoch": 4.688051323175621,
|
|
"grad_norm": 0.4393104782894382,
|
|
"learning_rate": 1.1903736583476441e-05,
|
|
"loss": 0.2307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12219338864088058,
|
|
"step": 2925,
|
|
"valid_targets_mean": 4836.0,
|
|
"valid_targets_min": 3319
|
|
},
|
|
{
|
|
"epoch": 4.69607056936648,
|
|
"grad_norm": 0.446905625487015,
|
|
"learning_rate": 1.1830724069297106e-05,
|
|
"loss": 0.2263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11796309053897858,
|
|
"step": 2930,
|
|
"valid_targets_mean": 4469.4,
|
|
"valid_targets_min": 3201
|
|
},
|
|
{
|
|
"epoch": 4.704089815557338,
|
|
"grad_norm": 0.41513916017817193,
|
|
"learning_rate": 1.1757841996878957e-05,
|
|
"loss": 0.2234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11159396171569824,
|
|
"step": 2935,
|
|
"valid_targets_mean": 4420.6,
|
|
"valid_targets_min": 3791
|
|
},
|
|
{
|
|
"epoch": 4.712109061748196,
|
|
"grad_norm": 0.440626317179724,
|
|
"learning_rate": 1.1685091529956187e-05,
|
|
"loss": 0.2277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12319649010896683,
|
|
"step": 2940,
|
|
"valid_targets_mean": 4624.9,
|
|
"valid_targets_min": 3764
|
|
},
|
|
{
|
|
"epoch": 4.720128307939054,
|
|
"grad_norm": 0.4413434667776622,
|
|
"learning_rate": 1.161247383016163e-05,
|
|
"loss": 0.2214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11547272652387619,
|
|
"step": 2945,
|
|
"valid_targets_mean": 4670.8,
|
|
"valid_targets_min": 3632
|
|
},
|
|
{
|
|
"epoch": 4.728147554129912,
|
|
"grad_norm": 0.4510501969604686,
|
|
"learning_rate": 1.1539990057008166e-05,
|
|
"loss": 0.228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11070597171783447,
|
|
"step": 2950,
|
|
"valid_targets_mean": 4593.9,
|
|
"valid_targets_min": 3818
|
|
},
|
|
{
|
|
"epoch": 4.7361668003207695,
|
|
"grad_norm": 0.4483801492504096,
|
|
"learning_rate": 1.1467641367870198e-05,
|
|
"loss": 0.2252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10552556812763214,
|
|
"step": 2955,
|
|
"valid_targets_mean": 4590.6,
|
|
"valid_targets_min": 4121
|
|
},
|
|
{
|
|
"epoch": 4.7441860465116275,
|
|
"grad_norm": 0.4568102238118538,
|
|
"learning_rate": 1.1395428917965239e-05,
|
|
"loss": 0.2261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10727407038211823,
|
|
"step": 2960,
|
|
"valid_targets_mean": 4408.0,
|
|
"valid_targets_min": 3301
|
|
},
|
|
{
|
|
"epoch": 4.752205292702486,
|
|
"grad_norm": 0.43960399334212047,
|
|
"learning_rate": 1.1323353860335385e-05,
|
|
"loss": 0.226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10946053266525269,
|
|
"step": 2965,
|
|
"valid_targets_mean": 4545.4,
|
|
"valid_targets_min": 3697
|
|
},
|
|
{
|
|
"epoch": 4.760224538893344,
|
|
"grad_norm": 0.4604162056940319,
|
|
"learning_rate": 1.1251417345828962e-05,
|
|
"loss": 0.2233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09722255915403366,
|
|
"step": 2970,
|
|
"valid_targets_mean": 4117.8,
|
|
"valid_targets_min": 3478
|
|
},
|
|
{
|
|
"epoch": 4.768243785084202,
|
|
"grad_norm": 0.45992071237154797,
|
|
"learning_rate": 1.1179620523082107e-05,
|
|
"loss": 0.2241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09946942329406738,
|
|
"step": 2975,
|
|
"valid_targets_mean": 4286.6,
|
|
"valid_targets_min": 3227
|
|
},
|
|
{
|
|
"epoch": 4.77626303127506,
|
|
"grad_norm": 0.44221388475382256,
|
|
"learning_rate": 1.110796453850047e-05,
|
|
"loss": 0.2177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1089097410440445,
|
|
"step": 2980,
|
|
"valid_targets_mean": 4543.8,
|
|
"valid_targets_min": 3715
|
|
},
|
|
{
|
|
"epoch": 4.784282277465918,
|
|
"grad_norm": 0.5486689332382207,
|
|
"learning_rate": 1.1036450536240877e-05,
|
|
"loss": 0.2308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10295435786247253,
|
|
"step": 2985,
|
|
"valid_targets_mean": 4516.0,
|
|
"valid_targets_min": 3732
|
|
},
|
|
{
|
|
"epoch": 4.792301523656777,
|
|
"grad_norm": 0.45801154124532056,
|
|
"learning_rate": 1.0965079658193068e-05,
|
|
"loss": 0.2264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09951834380626678,
|
|
"step": 2990,
|
|
"valid_targets_mean": 4609.1,
|
|
"valid_targets_min": 3617
|
|
},
|
|
{
|
|
"epoch": 4.800320769847635,
|
|
"grad_norm": 0.4467906433364755,
|
|
"learning_rate": 1.0893853043961475e-05,
|
|
"loss": 0.2168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11292466521263123,
|
|
"step": 2995,
|
|
"valid_targets_mean": 4798.5,
|
|
"valid_targets_min": 4075
|
|
},
|
|
{
|
|
"epoch": 4.808340016038493,
|
|
"grad_norm": 0.46294872778133617,
|
|
"learning_rate": 1.0822771830847011e-05,
|
|
"loss": 0.2257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1136772483587265,
|
|
"step": 3000,
|
|
"valid_targets_mean": 4722.1,
|
|
"valid_targets_min": 4214
|
|
},
|
|
{
|
|
"epoch": 4.8163592622293505,
|
|
"grad_norm": 0.45156503891979605,
|
|
"learning_rate": 1.0751837153828926e-05,
|
|
"loss": 0.226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12383154779672623,
|
|
"step": 3005,
|
|
"valid_targets_mean": 4483.0,
|
|
"valid_targets_min": 3862
|
|
},
|
|
{
|
|
"epoch": 4.824378508420208,
|
|
"grad_norm": 0.45884830539834587,
|
|
"learning_rate": 1.0681050145546666e-05,
|
|
"loss": 0.2156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10457588732242584,
|
|
"step": 3010,
|
|
"valid_targets_mean": 4475.5,
|
|
"valid_targets_min": 3227
|
|
},
|
|
{
|
|
"epoch": 4.832397754611066,
|
|
"grad_norm": 0.4714259152212644,
|
|
"learning_rate": 1.0610411936281801e-05,
|
|
"loss": 0.2195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10883314907550812,
|
|
"step": 3015,
|
|
"valid_targets_mean": 4603.6,
|
|
"valid_targets_min": 3646
|
|
},
|
|
{
|
|
"epoch": 4.840417000801924,
|
|
"grad_norm": 0.4425823738981264,
|
|
"learning_rate": 1.0539923653939978e-05,
|
|
"loss": 0.2226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10977053642272949,
|
|
"step": 3020,
|
|
"valid_targets_mean": 4711.9,
|
|
"valid_targets_min": 3865
|
|
},
|
|
{
|
|
"epoch": 4.848436246992783,
|
|
"grad_norm": 0.4495702915653514,
|
|
"learning_rate": 1.0469586424032903e-05,
|
|
"loss": 0.2254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10819928348064423,
|
|
"step": 3025,
|
|
"valid_targets_mean": 4236.9,
|
|
"valid_targets_min": 3141
|
|
},
|
|
{
|
|
"epoch": 4.856455493183641,
|
|
"grad_norm": 0.4535838674669791,
|
|
"learning_rate": 1.0399401369660369e-05,
|
|
"loss": 0.2235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1195032075047493,
|
|
"step": 3030,
|
|
"valid_targets_mean": 4758.1,
|
|
"valid_targets_min": 3413
|
|
},
|
|
{
|
|
"epoch": 4.864474739374499,
|
|
"grad_norm": 0.4527480915890418,
|
|
"learning_rate": 1.0329369611492334e-05,
|
|
"loss": 0.2226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11731770634651184,
|
|
"step": 3035,
|
|
"valid_targets_mean": 4584.0,
|
|
"valid_targets_min": 3616
|
|
},
|
|
{
|
|
"epoch": 4.872493985565357,
|
|
"grad_norm": 0.4398592741444549,
|
|
"learning_rate": 1.0259492267751022e-05,
|
|
"loss": 0.2328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12308873981237411,
|
|
"step": 3040,
|
|
"valid_targets_mean": 4735.5,
|
|
"valid_targets_min": 4275
|
|
},
|
|
{
|
|
"epoch": 4.880513231756215,
|
|
"grad_norm": 0.4549822300674908,
|
|
"learning_rate": 1.0189770454193052e-05,
|
|
"loss": 0.224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10972632467746735,
|
|
"step": 3045,
|
|
"valid_targets_mean": 4409.2,
|
|
"valid_targets_min": 4008
|
|
},
|
|
{
|
|
"epoch": 4.888532477947073,
|
|
"grad_norm": 0.45589081348864496,
|
|
"learning_rate": 1.0120205284091673e-05,
|
|
"loss": 0.2271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11361020803451538,
|
|
"step": 3050,
|
|
"valid_targets_mean": 4220.4,
|
|
"valid_targets_min": 3672
|
|
},
|
|
{
|
|
"epoch": 4.896551724137931,
|
|
"grad_norm": 0.43774799436100587,
|
|
"learning_rate": 1.0050797868218907e-05,
|
|
"loss": 0.2248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13184767961502075,
|
|
"step": 3055,
|
|
"valid_targets_mean": 4925.2,
|
|
"valid_targets_min": 4681
|
|
},
|
|
{
|
|
"epoch": 4.904570970328789,
|
|
"grad_norm": 0.4284188624153602,
|
|
"learning_rate": 9.981549314827876e-06,
|
|
"loss": 0.2238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14543475210666656,
|
|
"step": 3060,
|
|
"valid_targets_mean": 5007.5,
|
|
"valid_targets_min": 4259
|
|
},
|
|
{
|
|
"epoch": 4.912590216519647,
|
|
"grad_norm": 0.43129894839101307,
|
|
"learning_rate": 9.912460729635097e-06,
|
|
"loss": 0.2205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10845914483070374,
|
|
"step": 3065,
|
|
"valid_targets_mean": 4634.9,
|
|
"valid_targets_min": 3553
|
|
},
|
|
{
|
|
"epoch": 4.920609462710505,
|
|
"grad_norm": 0.5053279904382639,
|
|
"learning_rate": 9.843533215802796e-06,
|
|
"loss": 0.2316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12285682559013367,
|
|
"step": 3070,
|
|
"valid_targets_mean": 4614.2,
|
|
"valid_targets_min": 3585
|
|
},
|
|
{
|
|
"epoch": 4.928628708901363,
|
|
"grad_norm": 0.4861518504762364,
|
|
"learning_rate": 9.774767873921357e-06,
|
|
"loss": 0.2254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1222497746348381,
|
|
"step": 3075,
|
|
"valid_targets_mean": 4340.6,
|
|
"valid_targets_min": 3340
|
|
},
|
|
{
|
|
"epoch": 4.936647955092221,
|
|
"grad_norm": 0.4421267926391212,
|
|
"learning_rate": 9.706165801991651e-06,
|
|
"loss": 0.2249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12266582995653152,
|
|
"step": 3080,
|
|
"valid_targets_mean": 4533.1,
|
|
"valid_targets_min": 4097
|
|
},
|
|
{
|
|
"epoch": 4.944667201283079,
|
|
"grad_norm": 0.43869897365845967,
|
|
"learning_rate": 9.637728095407593e-06,
|
|
"loss": 0.2241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10509057343006134,
|
|
"step": 3085,
|
|
"valid_targets_mean": 4196.1,
|
|
"valid_targets_min": 1986
|
|
},
|
|
{
|
|
"epoch": 4.952686447473938,
|
|
"grad_norm": 0.415120332341541,
|
|
"learning_rate": 9.56945584693861e-06,
|
|
"loss": 0.2194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11263457685709,
|
|
"step": 3090,
|
|
"valid_targets_mean": 4558.4,
|
|
"valid_targets_min": 3932
|
|
},
|
|
{
|
|
"epoch": 4.960705693664796,
|
|
"grad_norm": 0.42633433771612794,
|
|
"learning_rate": 9.501350146712193e-06,
|
|
"loss": 0.2226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1101694405078888,
|
|
"step": 3095,
|
|
"valid_targets_mean": 4488.2,
|
|
"valid_targets_min": 3757
|
|
},
|
|
{
|
|
"epoch": 4.968724939855654,
|
|
"grad_norm": 0.4470018756497917,
|
|
"learning_rate": 9.433412082196527e-06,
|
|
"loss": 0.217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11263976246118546,
|
|
"step": 3100,
|
|
"valid_targets_mean": 4674.8,
|
|
"valid_targets_min": 4086
|
|
},
|
|
{
|
|
"epoch": 4.976744186046512,
|
|
"grad_norm": 0.4390867569974353,
|
|
"learning_rate": 9.365642738183044e-06,
|
|
"loss": 0.225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1217423528432846,
|
|
"step": 3105,
|
|
"valid_targets_mean": 4720.0,
|
|
"valid_targets_min": 4132
|
|
},
|
|
{
|
|
"epoch": 4.9847634322373695,
|
|
"grad_norm": 0.4225751945569841,
|
|
"learning_rate": 9.298043196769217e-06,
|
|
"loss": 0.2277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11532281339168549,
|
|
"step": 3110,
|
|
"valid_targets_mean": 4746.4,
|
|
"valid_targets_min": 4265
|
|
},
|
|
{
|
|
"epoch": 4.992782678428227,
|
|
"grad_norm": 0.4303453417634223,
|
|
"learning_rate": 9.230614537341167e-06,
|
|
"loss": 0.2224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1274029165506363,
|
|
"step": 3115,
|
|
"valid_targets_mean": 4802.1,
|
|
"valid_targets_min": 4242
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.6086848861519545,
|
|
"learning_rate": 9.163357836556498e-06,
|
|
"loss": 0.2281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22303923964500427,
|
|
"step": 3120,
|
|
"valid_targets_mean": 4606.5,
|
|
"valid_targets_min": 3831
|
|
},
|
|
{
|
|
"epoch": 5.008019246190858,
|
|
"grad_norm": 0.4300305493000558,
|
|
"learning_rate": 9.096274168327122e-06,
|
|
"loss": 0.2196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09399233013391495,
|
|
"step": 3125,
|
|
"valid_targets_mean": 4364.6,
|
|
"valid_targets_min": 2972
|
|
},
|
|
{
|
|
"epoch": 5.016038492381716,
|
|
"grad_norm": 0.44788224203192173,
|
|
"learning_rate": 9.029364603802017e-06,
|
|
"loss": 0.2114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12407524883747101,
|
|
"step": 3130,
|
|
"valid_targets_mean": 4796.0,
|
|
"valid_targets_min": 4094
|
|
},
|
|
{
|
|
"epoch": 5.024057738572574,
|
|
"grad_norm": 0.4499909410780941,
|
|
"learning_rate": 8.962630211350248e-06,
|
|
"loss": 0.2203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1148647591471672,
|
|
"step": 3135,
|
|
"valid_targets_mean": 4909.1,
|
|
"valid_targets_min": 3926
|
|
},
|
|
{
|
|
"epoch": 5.032076984763433,
|
|
"grad_norm": 0.44835309103578463,
|
|
"learning_rate": 8.89607205654378e-06,
|
|
"loss": 0.2149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10908360779285431,
|
|
"step": 3140,
|
|
"valid_targets_mean": 4366.8,
|
|
"valid_targets_min": 3509
|
|
},
|
|
{
|
|
"epoch": 5.0400962309542905,
|
|
"grad_norm": 0.4416227497759126,
|
|
"learning_rate": 8.829691202140591e-06,
|
|
"loss": 0.2089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10101351141929626,
|
|
"step": 3145,
|
|
"valid_targets_mean": 4210.9,
|
|
"valid_targets_min": 2664
|
|
},
|
|
{
|
|
"epoch": 5.048115477145148,
|
|
"grad_norm": 0.48020634441004045,
|
|
"learning_rate": 8.763488708067604e-06,
|
|
"loss": 0.2291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09856002032756805,
|
|
"step": 3150,
|
|
"valid_targets_mean": 4236.6,
|
|
"valid_targets_min": 3118
|
|
},
|
|
{
|
|
"epoch": 5.056134723336006,
|
|
"grad_norm": 0.4515181045226206,
|
|
"learning_rate": 8.69746563140379e-06,
|
|
"loss": 0.218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1193251758813858,
|
|
"step": 3155,
|
|
"valid_targets_mean": 4782.0,
|
|
"valid_targets_min": 3365
|
|
},
|
|
{
|
|
"epoch": 5.064153969526864,
|
|
"grad_norm": 0.4528173265827618,
|
|
"learning_rate": 8.631623026363331e-06,
|
|
"loss": 0.217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1187511682510376,
|
|
"step": 3160,
|
|
"valid_targets_mean": 4648.0,
|
|
"valid_targets_min": 3588
|
|
},
|
|
{
|
|
"epoch": 5.072173215717722,
|
|
"grad_norm": 0.4362667074273132,
|
|
"learning_rate": 8.56596194427873e-06,
|
|
"loss": 0.2081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10173220187425613,
|
|
"step": 3165,
|
|
"valid_targets_mean": 4599.9,
|
|
"valid_targets_min": 3520
|
|
},
|
|
{
|
|
"epoch": 5.080192461908581,
|
|
"grad_norm": 0.4563139482875103,
|
|
"learning_rate": 8.500483433584054e-06,
|
|
"loss": 0.2191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09373918920755386,
|
|
"step": 3170,
|
|
"valid_targets_mean": 4551.1,
|
|
"valid_targets_min": 3824
|
|
},
|
|
{
|
|
"epoch": 5.088211708099439,
|
|
"grad_norm": 0.4603357621846481,
|
|
"learning_rate": 8.435188539798187e-06,
|
|
"loss": 0.2177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10584408044815063,
|
|
"step": 3175,
|
|
"valid_targets_mean": 3960.5,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 5.096230954290297,
|
|
"grad_norm": 0.47474661979112576,
|
|
"learning_rate": 8.370078305508136e-06,
|
|
"loss": 0.2237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10983745008707047,
|
|
"step": 3180,
|
|
"valid_targets_mean": 4749.4,
|
|
"valid_targets_min": 4310
|
|
},
|
|
{
|
|
"epoch": 5.104250200481155,
|
|
"grad_norm": 0.4730811730093893,
|
|
"learning_rate": 8.305153770352384e-06,
|
|
"loss": 0.2171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1294356882572174,
|
|
"step": 3185,
|
|
"valid_targets_mean": 5301.8,
|
|
"valid_targets_min": 4040
|
|
},
|
|
{
|
|
"epoch": 5.112269446672013,
|
|
"grad_norm": 0.46196347945997107,
|
|
"learning_rate": 8.240415971004285e-06,
|
|
"loss": 0.2152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11681726574897766,
|
|
"step": 3190,
|
|
"valid_targets_mean": 5054.6,
|
|
"valid_targets_min": 4133
|
|
},
|
|
{
|
|
"epoch": 5.120288692862871,
|
|
"grad_norm": 0.47955515558427136,
|
|
"learning_rate": 8.175865941155525e-06,
|
|
"loss": 0.2191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1141970083117485,
|
|
"step": 3195,
|
|
"valid_targets_mean": 4414.6,
|
|
"valid_targets_min": 3281
|
|
},
|
|
{
|
|
"epoch": 5.1283079390537285,
|
|
"grad_norm": 0.435923462596563,
|
|
"learning_rate": 8.111504711499598e-06,
|
|
"loss": 0.2062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09432478249073029,
|
|
"step": 3200,
|
|
"valid_targets_mean": 4219.6,
|
|
"valid_targets_min": 3412
|
|
},
|
|
{
|
|
"epoch": 5.136327185244587,
|
|
"grad_norm": 0.479987221483574,
|
|
"learning_rate": 8.04733330971536e-06,
|
|
"loss": 0.2177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11199472844600677,
|
|
"step": 3205,
|
|
"valid_targets_mean": 4405.8,
|
|
"valid_targets_min": 2890
|
|
},
|
|
{
|
|
"epoch": 5.144346431435445,
|
|
"grad_norm": 0.4449410930241248,
|
|
"learning_rate": 7.983352760450618e-06,
|
|
"loss": 0.2182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11100111901760101,
|
|
"step": 3210,
|
|
"valid_targets_mean": 4931.2,
|
|
"valid_targets_min": 3955
|
|
},
|
|
{
|
|
"epoch": 5.152365677626303,
|
|
"grad_norm": 0.45611817813716415,
|
|
"learning_rate": 7.919564085305768e-06,
|
|
"loss": 0.2144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10806944221258163,
|
|
"step": 3215,
|
|
"valid_targets_mean": 4160.8,
|
|
"valid_targets_min": 3464
|
|
},
|
|
{
|
|
"epoch": 5.160384923817161,
|
|
"grad_norm": 0.47249792271976476,
|
|
"learning_rate": 7.855968302817487e-06,
|
|
"loss": 0.2195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10879048705101013,
|
|
"step": 3220,
|
|
"valid_targets_mean": 4345.4,
|
|
"valid_targets_min": 3292
|
|
},
|
|
{
|
|
"epoch": 5.168404170008019,
|
|
"grad_norm": 0.4396784043923612,
|
|
"learning_rate": 7.792566428442456e-06,
|
|
"loss": 0.2218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10148850083351135,
|
|
"step": 3225,
|
|
"valid_targets_mean": 4481.1,
|
|
"valid_targets_min": 3917
|
|
},
|
|
{
|
|
"epoch": 5.176423416198877,
|
|
"grad_norm": 0.41580425429820006,
|
|
"learning_rate": 7.729359474541168e-06,
|
|
"loss": 0.2175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10237696766853333,
|
|
"step": 3230,
|
|
"valid_targets_mean": 4547.1,
|
|
"valid_targets_min": 3857
|
|
},
|
|
{
|
|
"epoch": 5.184442662389736,
|
|
"grad_norm": 0.4839680715413035,
|
|
"learning_rate": 7.666348450361737e-06,
|
|
"loss": 0.2126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0968901515007019,
|
|
"step": 3235,
|
|
"valid_targets_mean": 4119.2,
|
|
"valid_targets_min": 2709
|
|
},
|
|
{
|
|
"epoch": 5.192461908580594,
|
|
"grad_norm": 0.47380258321193036,
|
|
"learning_rate": 7.60353436202381e-06,
|
|
"loss": 0.2122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09668052196502686,
|
|
"step": 3240,
|
|
"valid_targets_mean": 4430.8,
|
|
"valid_targets_min": 3639
|
|
},
|
|
{
|
|
"epoch": 5.200481154771452,
|
|
"grad_norm": 0.45388005370392004,
|
|
"learning_rate": 7.540918212502479e-06,
|
|
"loss": 0.2157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12379284203052521,
|
|
"step": 3245,
|
|
"valid_targets_mean": 4577.1,
|
|
"valid_targets_min": 3865
|
|
},
|
|
{
|
|
"epoch": 5.2085004009623095,
|
|
"grad_norm": 0.4425541353767829,
|
|
"learning_rate": 7.478501001612281e-06,
|
|
"loss": 0.217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11665067076683044,
|
|
"step": 3250,
|
|
"valid_targets_mean": 4644.8,
|
|
"valid_targets_min": 3799
|
|
},
|
|
{
|
|
"epoch": 5.216519647153167,
|
|
"grad_norm": 0.44625487682210413,
|
|
"learning_rate": 7.416283725991229e-06,
|
|
"loss": 0.2178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10748773068189621,
|
|
"step": 3255,
|
|
"valid_targets_mean": 4583.9,
|
|
"valid_targets_min": 3775
|
|
},
|
|
{
|
|
"epoch": 5.224538893344025,
|
|
"grad_norm": 0.45027343188499547,
|
|
"learning_rate": 7.354267379084896e-06,
|
|
"loss": 0.2198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1143893152475357,
|
|
"step": 3260,
|
|
"valid_targets_mean": 4628.2,
|
|
"valid_targets_min": 3226
|
|
},
|
|
{
|
|
"epoch": 5.232558139534884,
|
|
"grad_norm": 1.3858024138393799,
|
|
"learning_rate": 7.292452951130548e-06,
|
|
"loss": 0.2135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10777521133422852,
|
|
"step": 3265,
|
|
"valid_targets_mean": 4959.8,
|
|
"valid_targets_min": 3353
|
|
},
|
|
{
|
|
"epoch": 5.240577385725742,
|
|
"grad_norm": 0.48063501579351675,
|
|
"learning_rate": 7.230841429141347e-06,
|
|
"loss": 0.2208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12338593602180481,
|
|
"step": 3270,
|
|
"valid_targets_mean": 4699.8,
|
|
"valid_targets_min": 2484
|
|
},
|
|
{
|
|
"epoch": 5.2485966319166,
|
|
"grad_norm": 0.4339916977867986,
|
|
"learning_rate": 7.169433796890595e-06,
|
|
"loss": 0.2096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09881225228309631,
|
|
"step": 3275,
|
|
"valid_targets_mean": 4391.4,
|
|
"valid_targets_min": 3553
|
|
},
|
|
{
|
|
"epoch": 5.256615878107458,
|
|
"grad_norm": 0.4681531624724502,
|
|
"learning_rate": 7.108231034895976e-06,
|
|
"loss": 0.2231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11527010053396225,
|
|
"step": 3280,
|
|
"valid_targets_mean": 4348.0,
|
|
"valid_targets_min": 3541
|
|
},
|
|
{
|
|
"epoch": 5.264635124298316,
|
|
"grad_norm": 0.45793165671314323,
|
|
"learning_rate": 7.047234120403972e-06,
|
|
"loss": 0.2099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10204260051250458,
|
|
"step": 3285,
|
|
"valid_targets_mean": 4710.4,
|
|
"valid_targets_min": 4004
|
|
},
|
|
{
|
|
"epoch": 5.272654370489174,
|
|
"grad_norm": 0.4484909534744471,
|
|
"learning_rate": 6.986444027374211e-06,
|
|
"loss": 0.2216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10809170454740524,
|
|
"step": 3290,
|
|
"valid_targets_mean": 4683.8,
|
|
"valid_targets_min": 3845
|
|
},
|
|
{
|
|
"epoch": 5.280673616680032,
|
|
"grad_norm": 0.45206747643276307,
|
|
"learning_rate": 6.925861726463919e-06,
|
|
"loss": 0.2079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09189019352197647,
|
|
"step": 3295,
|
|
"valid_targets_mean": 4381.2,
|
|
"valid_targets_min": 2993
|
|
},
|
|
{
|
|
"epoch": 5.2886928628708905,
|
|
"grad_norm": 0.4463371317679835,
|
|
"learning_rate": 6.865488185012464e-06,
|
|
"loss": 0.216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10915966331958771,
|
|
"step": 3300,
|
|
"valid_targets_mean": 4563.6,
|
|
"valid_targets_min": 4114
|
|
},
|
|
{
|
|
"epoch": 5.296712109061748,
|
|
"grad_norm": 0.4516704144176012,
|
|
"learning_rate": 6.805324367025825e-06,
|
|
"loss": 0.2136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.097736656665802,
|
|
"step": 3305,
|
|
"valid_targets_mean": 4475.2,
|
|
"valid_targets_min": 4025
|
|
},
|
|
{
|
|
"epoch": 5.304731355252606,
|
|
"grad_norm": 0.42718342797667946,
|
|
"learning_rate": 6.745371233161309e-06,
|
|
"loss": 0.2177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10516424477100372,
|
|
"step": 3310,
|
|
"valid_targets_mean": 4697.9,
|
|
"valid_targets_min": 3876
|
|
},
|
|
{
|
|
"epoch": 5.312750601443464,
|
|
"grad_norm": 0.4531445597582339,
|
|
"learning_rate": 6.685629740712103e-06,
|
|
"loss": 0.2108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10034641623497009,
|
|
"step": 3315,
|
|
"valid_targets_mean": 4337.5,
|
|
"valid_targets_min": 3459
|
|
},
|
|
{
|
|
"epoch": 5.320769847634322,
|
|
"grad_norm": 0.44387879424805904,
|
|
"learning_rate": 6.6261008435920605e-06,
|
|
"loss": 0.214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10502344369888306,
|
|
"step": 3320,
|
|
"valid_targets_mean": 4458.6,
|
|
"valid_targets_min": 3643
|
|
},
|
|
{
|
|
"epoch": 5.32878909382518,
|
|
"grad_norm": 0.4753026200789722,
|
|
"learning_rate": 6.566785492320471e-06,
|
|
"loss": 0.2161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10981746762990952,
|
|
"step": 3325,
|
|
"valid_targets_mean": 4823.0,
|
|
"valid_targets_min": 4209
|
|
},
|
|
{
|
|
"epoch": 5.336808340016039,
|
|
"grad_norm": 0.43941454299861077,
|
|
"learning_rate": 6.507684634006815e-06,
|
|
"loss": 0.2208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12721127271652222,
|
|
"step": 3330,
|
|
"valid_targets_mean": 5262.5,
|
|
"valid_targets_min": 3891
|
|
},
|
|
{
|
|
"epoch": 5.344827586206897,
|
|
"grad_norm": 0.48890754293194144,
|
|
"learning_rate": 6.448799212335734e-06,
|
|
"loss": 0.2197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10444176197052002,
|
|
"step": 3335,
|
|
"valid_targets_mean": 4336.5,
|
|
"valid_targets_min": 2992
|
|
},
|
|
{
|
|
"epoch": 5.352846832397755,
|
|
"grad_norm": 0.4556232167860715,
|
|
"learning_rate": 6.390130167551869e-06,
|
|
"loss": 0.22,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11160598695278168,
|
|
"step": 3340,
|
|
"valid_targets_mean": 4596.5,
|
|
"valid_targets_min": 3850
|
|
},
|
|
{
|
|
"epoch": 5.360866078588613,
|
|
"grad_norm": 0.450648126611957,
|
|
"learning_rate": 6.331678436444939e-06,
|
|
"loss": 0.2165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12426158785820007,
|
|
"step": 3345,
|
|
"valid_targets_mean": 4872.1,
|
|
"valid_targets_min": 3946
|
|
},
|
|
{
|
|
"epoch": 5.368885324779471,
|
|
"grad_norm": 0.47332789705693923,
|
|
"learning_rate": 6.273444952334713e-06,
|
|
"loss": 0.2108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1028081476688385,
|
|
"step": 3350,
|
|
"valid_targets_mean": 4233.0,
|
|
"valid_targets_min": 3728
|
|
},
|
|
{
|
|
"epoch": 5.3769045709703285,
|
|
"grad_norm": 0.456525392040167,
|
|
"learning_rate": 6.2154306450561175e-06,
|
|
"loss": 0.2166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10002676397562027,
|
|
"step": 3355,
|
|
"valid_targets_mean": 4471.8,
|
|
"valid_targets_min": 3928
|
|
},
|
|
{
|
|
"epoch": 5.384923817161187,
|
|
"grad_norm": 0.4614663161714611,
|
|
"learning_rate": 6.157636440944445e-06,
|
|
"loss": 0.2154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10217326879501343,
|
|
"step": 3360,
|
|
"valid_targets_mean": 4580.0,
|
|
"valid_targets_min": 3803
|
|
},
|
|
{
|
|
"epoch": 5.392943063352045,
|
|
"grad_norm": 0.4706593290035969,
|
|
"learning_rate": 6.100063262820474e-06,
|
|
"loss": 0.2171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1167377457022667,
|
|
"step": 3365,
|
|
"valid_targets_mean": 4650.5,
|
|
"valid_targets_min": 4094
|
|
},
|
|
{
|
|
"epoch": 5.400962309542903,
|
|
"grad_norm": 0.4214232636725003,
|
|
"learning_rate": 6.0427120299758236e-06,
|
|
"loss": 0.2178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11388294398784637,
|
|
"step": 3370,
|
|
"valid_targets_mean": 4681.4,
|
|
"valid_targets_min": 3820
|
|
},
|
|
{
|
|
"epoch": 5.408981555733761,
|
|
"grad_norm": 0.47388063329089547,
|
|
"learning_rate": 5.985583658158212e-06,
|
|
"loss": 0.2189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12076931446790695,
|
|
"step": 3375,
|
|
"valid_targets_mean": 4938.8,
|
|
"valid_targets_min": 3739
|
|
},
|
|
{
|
|
"epoch": 5.417000801924619,
|
|
"grad_norm": 0.4655039188416919,
|
|
"learning_rate": 5.928679059556852e-06,
|
|
"loss": 0.2193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10407209396362305,
|
|
"step": 3380,
|
|
"valid_targets_mean": 4331.2,
|
|
"valid_targets_min": 3653
|
|
},
|
|
{
|
|
"epoch": 5.425020048115477,
|
|
"grad_norm": 0.4511860917431392,
|
|
"learning_rate": 5.871999142787908e-06,
|
|
"loss": 0.2104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10842403769493103,
|
|
"step": 3385,
|
|
"valid_targets_mean": 4720.9,
|
|
"valid_targets_min": 3822
|
|
},
|
|
{
|
|
"epoch": 5.433039294306335,
|
|
"grad_norm": 0.4284121653759454,
|
|
"learning_rate": 5.815544812879936e-06,
|
|
"loss": 0.2087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10570953786373138,
|
|
"step": 3390,
|
|
"valid_targets_mean": 4631.6,
|
|
"valid_targets_min": 3655
|
|
},
|
|
{
|
|
"epoch": 5.441058540497194,
|
|
"grad_norm": 0.4589688819863075,
|
|
"learning_rate": 5.759316971259503e-06,
|
|
"loss": 0.2214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10312160849571228,
|
|
"step": 3395,
|
|
"valid_targets_mean": 4122.1,
|
|
"valid_targets_min": 3578
|
|
},
|
|
{
|
|
"epoch": 5.4490777866880515,
|
|
"grad_norm": 0.4262111780771323,
|
|
"learning_rate": 5.703316515736734e-06,
|
|
"loss": 0.2162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10516688227653503,
|
|
"step": 3400,
|
|
"valid_targets_mean": 4691.5,
|
|
"valid_targets_min": 3718
|
|
},
|
|
{
|
|
"epoch": 5.4570970328789095,
|
|
"grad_norm": 0.46748019278285985,
|
|
"learning_rate": 5.647544340491007e-06,
|
|
"loss": 0.22,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11228135228157043,
|
|
"step": 3405,
|
|
"valid_targets_mean": 4761.6,
|
|
"valid_targets_min": 3616
|
|
},
|
|
{
|
|
"epoch": 5.465116279069767,
|
|
"grad_norm": 0.44941654459317326,
|
|
"learning_rate": 5.592001336056659e-06,
|
|
"loss": 0.2195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11021216213703156,
|
|
"step": 3410,
|
|
"valid_targets_mean": 4497.0,
|
|
"valid_targets_min": 3164
|
|
},
|
|
{
|
|
"epoch": 5.473135525260625,
|
|
"grad_norm": 0.4446796761804415,
|
|
"learning_rate": 5.536688389308782e-06,
|
|
"loss": 0.2225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11163399368524551,
|
|
"step": 3415,
|
|
"valid_targets_mean": 4586.8,
|
|
"valid_targets_min": 3709
|
|
},
|
|
{
|
|
"epoch": 5.481154771451483,
|
|
"grad_norm": 0.48512965921351514,
|
|
"learning_rate": 5.4816063834490496e-06,
|
|
"loss": 0.2224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11870346963405609,
|
|
"step": 3420,
|
|
"valid_targets_mean": 4721.8,
|
|
"valid_targets_min": 4004
|
|
},
|
|
{
|
|
"epoch": 5.489174017642342,
|
|
"grad_norm": 0.45141290272356793,
|
|
"learning_rate": 5.426756197991625e-06,
|
|
"loss": 0.2135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10871092975139618,
|
|
"step": 3425,
|
|
"valid_targets_mean": 4529.0,
|
|
"valid_targets_min": 3872
|
|
},
|
|
{
|
|
"epoch": 5.4971932638332,
|
|
"grad_norm": 0.46292649421757076,
|
|
"learning_rate": 5.372138708749104e-06,
|
|
"loss": 0.2202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1279624104499817,
|
|
"step": 3430,
|
|
"valid_targets_mean": 5082.1,
|
|
"valid_targets_min": 3590
|
|
},
|
|
{
|
|
"epoch": 5.505212510024058,
|
|
"grad_norm": 0.4515981582748995,
|
|
"learning_rate": 5.3177547878185436e-06,
|
|
"loss": 0.2142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11459192633628845,
|
|
"step": 3435,
|
|
"valid_targets_mean": 4476.1,
|
|
"valid_targets_min": 3934
|
|
},
|
|
{
|
|
"epoch": 5.513231756214916,
|
|
"grad_norm": 0.46010048194888914,
|
|
"learning_rate": 5.263605303567532e-06,
|
|
"loss": 0.2151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1156245768070221,
|
|
"step": 3440,
|
|
"valid_targets_mean": 4650.2,
|
|
"valid_targets_min": 3648
|
|
},
|
|
{
|
|
"epoch": 5.521251002405774,
|
|
"grad_norm": 0.4534357390523584,
|
|
"learning_rate": 5.20969112062032e-06,
|
|
"loss": 0.217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11926677078008652,
|
|
"step": 3445,
|
|
"valid_targets_mean": 4298.1,
|
|
"valid_targets_min": 1986
|
|
},
|
|
{
|
|
"epoch": 5.529270248596632,
|
|
"grad_norm": 0.47338780263411756,
|
|
"learning_rate": 5.156013099844017e-06,
|
|
"loss": 0.2154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11273340880870819,
|
|
"step": 3450,
|
|
"valid_targets_mean": 4817.5,
|
|
"valid_targets_min": 4436
|
|
},
|
|
{
|
|
"epoch": 5.53728949478749,
|
|
"grad_norm": 0.44989616631080054,
|
|
"learning_rate": 5.1025720983348544e-06,
|
|
"loss": 0.2167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11134511977434158,
|
|
"step": 3455,
|
|
"valid_targets_mean": 4767.6,
|
|
"valid_targets_min": 3880
|
|
},
|
|
{
|
|
"epoch": 5.545308740978348,
|
|
"grad_norm": 0.4504501707653401,
|
|
"learning_rate": 5.049368969404484e-06,
|
|
"loss": 0.2156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10127928853034973,
|
|
"step": 3460,
|
|
"valid_targets_mean": 5067.0,
|
|
"valid_targets_min": 3921
|
|
},
|
|
{
|
|
"epoch": 5.553327987169206,
|
|
"grad_norm": 0.4439200616480011,
|
|
"learning_rate": 4.99640456256636e-06,
|
|
"loss": 0.2168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10942606627941132,
|
|
"step": 3465,
|
|
"valid_targets_mean": 4699.2,
|
|
"valid_targets_min": 3551
|
|
},
|
|
{
|
|
"epoch": 5.561347233360064,
|
|
"grad_norm": 0.44975770573773516,
|
|
"learning_rate": 4.9436797235221814e-06,
|
|
"loss": 0.2107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09773830324411392,
|
|
"step": 3470,
|
|
"valid_targets_mean": 4662.5,
|
|
"valid_targets_min": 3503
|
|
},
|
|
{
|
|
"epoch": 5.569366479550922,
|
|
"grad_norm": 0.447359656102331,
|
|
"learning_rate": 4.891195294148376e-06,
|
|
"loss": 0.2241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10515010356903076,
|
|
"step": 3475,
|
|
"valid_targets_mean": 4355.0,
|
|
"valid_targets_min": 3484
|
|
},
|
|
{
|
|
"epoch": 5.57738572574178,
|
|
"grad_norm": 0.477081424681057,
|
|
"learning_rate": 4.838952112482671e-06,
|
|
"loss": 0.2155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1058318242430687,
|
|
"step": 3480,
|
|
"valid_targets_mean": 4530.2,
|
|
"valid_targets_min": 3665
|
|
},
|
|
{
|
|
"epoch": 5.585404971932638,
|
|
"grad_norm": 0.4438629851718538,
|
|
"learning_rate": 4.786951012710699e-06,
|
|
"loss": 0.2128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10466770827770233,
|
|
"step": 3485,
|
|
"valid_targets_mean": 4437.5,
|
|
"valid_targets_min": 3846
|
|
},
|
|
{
|
|
"epoch": 5.593424218123497,
|
|
"grad_norm": 0.474655021384595,
|
|
"learning_rate": 4.735192825152686e-06,
|
|
"loss": 0.2188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11859377473592758,
|
|
"step": 3490,
|
|
"valid_targets_mean": 4468.1,
|
|
"valid_targets_min": 3992
|
|
},
|
|
{
|
|
"epoch": 5.601443464314355,
|
|
"grad_norm": 0.4633056811428741,
|
|
"learning_rate": 4.683678376250189e-06,
|
|
"loss": 0.2073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08488088846206665,
|
|
"step": 3495,
|
|
"valid_targets_mean": 3748.6,
|
|
"valid_targets_min": 2401
|
|
},
|
|
{
|
|
"epoch": 5.609462710505213,
|
|
"grad_norm": 0.4577832210460976,
|
|
"learning_rate": 4.6324084885529086e-06,
|
|
"loss": 0.2173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0966181606054306,
|
|
"step": 3500,
|
|
"valid_targets_mean": 4289.0,
|
|
"valid_targets_min": 3225
|
|
},
|
|
{
|
|
"epoch": 5.6174819566960705,
|
|
"grad_norm": 0.4527670530882612,
|
|
"learning_rate": 4.581383980705538e-06,
|
|
"loss": 0.2085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10629314929246902,
|
|
"step": 3505,
|
|
"valid_targets_mean": 4455.8,
|
|
"valid_targets_min": 2164
|
|
},
|
|
{
|
|
"epoch": 5.6255012028869285,
|
|
"grad_norm": 0.44381241016276474,
|
|
"learning_rate": 4.530605667434727e-06,
|
|
"loss": 0.2087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09071381390094757,
|
|
"step": 3510,
|
|
"valid_targets_mean": 4043.5,
|
|
"valid_targets_min": 2543
|
|
},
|
|
{
|
|
"epoch": 5.633520449077786,
|
|
"grad_norm": 0.5021074247658274,
|
|
"learning_rate": 4.480074359536013e-06,
|
|
"loss": 0.2154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1129186823964119,
|
|
"step": 3515,
|
|
"valid_targets_mean": 4766.0,
|
|
"valid_targets_min": 4077
|
|
},
|
|
{
|
|
"epoch": 5.641539695268644,
|
|
"grad_norm": 0.4524615106227792,
|
|
"learning_rate": 4.429790863860934e-06,
|
|
"loss": 0.2167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10058964043855667,
|
|
"step": 3520,
|
|
"valid_targets_mean": 4521.8,
|
|
"valid_targets_min": 3761
|
|
},
|
|
{
|
|
"epoch": 5.649558941459503,
|
|
"grad_norm": 0.4846907191530838,
|
|
"learning_rate": 4.3797559833041146e-06,
|
|
"loss": 0.2181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09921002388000488,
|
|
"step": 3525,
|
|
"valid_targets_mean": 4416.2,
|
|
"valid_targets_min": 3624
|
|
},
|
|
{
|
|
"epoch": 5.657578187650361,
|
|
"grad_norm": 0.4704948999446332,
|
|
"learning_rate": 4.329970516790447e-06,
|
|
"loss": 0.2142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0935068428516388,
|
|
"step": 3530,
|
|
"valid_targets_mean": 4289.2,
|
|
"valid_targets_min": 3195
|
|
},
|
|
{
|
|
"epoch": 5.665597433841219,
|
|
"grad_norm": 0.5209877087890661,
|
|
"learning_rate": 4.280435259262363e-06,
|
|
"loss": 0.2224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12153840810060501,
|
|
"step": 3535,
|
|
"valid_targets_mean": 4670.5,
|
|
"valid_targets_min": 4034
|
|
},
|
|
{
|
|
"epoch": 5.673616680032077,
|
|
"grad_norm": 0.4499407800945331,
|
|
"learning_rate": 4.231151001667077e-06,
|
|
"loss": 0.2109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.113419309258461,
|
|
"step": 3540,
|
|
"valid_targets_mean": 4656.1,
|
|
"valid_targets_min": 4104
|
|
},
|
|
{
|
|
"epoch": 5.681635926222935,
|
|
"grad_norm": 0.4596103281218751,
|
|
"learning_rate": 4.182118530944044e-06,
|
|
"loss": 0.2107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09523070603609085,
|
|
"step": 3545,
|
|
"valid_targets_mean": 4227.5,
|
|
"valid_targets_min": 3487
|
|
},
|
|
{
|
|
"epoch": 5.689655172413794,
|
|
"grad_norm": 0.4451820919815352,
|
|
"learning_rate": 4.133338630012307e-06,
|
|
"loss": 0.2164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09828858822584152,
|
|
"step": 3550,
|
|
"valid_targets_mean": 4288.5,
|
|
"valid_targets_min": 3141
|
|
},
|
|
{
|
|
"epoch": 5.6976744186046515,
|
|
"grad_norm": 0.4694232832968929,
|
|
"learning_rate": 4.0848120777580554e-06,
|
|
"loss": 0.2217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1064469963312149,
|
|
"step": 3555,
|
|
"valid_targets_mean": 4594.1,
|
|
"valid_targets_min": 3725
|
|
},
|
|
{
|
|
"epoch": 5.705693664795509,
|
|
"grad_norm": 0.49035359309055715,
|
|
"learning_rate": 4.036539649022182e-06,
|
|
"loss": 0.2175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12154176831245422,
|
|
"step": 3560,
|
|
"valid_targets_mean": 4615.1,
|
|
"valid_targets_min": 3784
|
|
},
|
|
{
|
|
"epoch": 5.713712910986367,
|
|
"grad_norm": 0.48064401827560344,
|
|
"learning_rate": 3.988522114587865e-06,
|
|
"loss": 0.213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09964503347873688,
|
|
"step": 3565,
|
|
"valid_targets_mean": 4515.5,
|
|
"valid_targets_min": 4012
|
|
},
|
|
{
|
|
"epoch": 5.721732157177225,
|
|
"grad_norm": 0.4563843425097047,
|
|
"learning_rate": 3.940760241168331e-06,
|
|
"loss": 0.2077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10791806876659393,
|
|
"step": 3570,
|
|
"valid_targets_mean": 4484.9,
|
|
"valid_targets_min": 3512
|
|
},
|
|
{
|
|
"epoch": 5.729751403368083,
|
|
"grad_norm": 0.4595985296247818,
|
|
"learning_rate": 3.893254791394541e-06,
|
|
"loss": 0.2168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10560589283704758,
|
|
"step": 3575,
|
|
"valid_targets_mean": 4651.6,
|
|
"valid_targets_min": 3746
|
|
},
|
|
{
|
|
"epoch": 5.737770649558941,
|
|
"grad_norm": 0.43383077920848173,
|
|
"learning_rate": 3.846006523803074e-06,
|
|
"loss": 0.2135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10724681615829468,
|
|
"step": 3580,
|
|
"valid_targets_mean": 4624.1,
|
|
"valid_targets_min": 3631
|
|
},
|
|
{
|
|
"epoch": 5.7457898957498,
|
|
"grad_norm": 0.4752727301463115,
|
|
"learning_rate": 3.799016192823981e-06,
|
|
"loss": 0.2148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11441905796527863,
|
|
"step": 3585,
|
|
"valid_targets_mean": 4486.2,
|
|
"valid_targets_min": 3055
|
|
},
|
|
{
|
|
"epoch": 5.753809141940658,
|
|
"grad_norm": 0.45929881711088494,
|
|
"learning_rate": 3.7522845487687276e-06,
|
|
"loss": 0.2202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11218512058258057,
|
|
"step": 3590,
|
|
"valid_targets_mean": 4537.4,
|
|
"valid_targets_min": 3696
|
|
},
|
|
{
|
|
"epoch": 5.761828388131516,
|
|
"grad_norm": 0.4632134801387505,
|
|
"learning_rate": 3.7058123378182664e-06,
|
|
"loss": 0.2156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10906368494033813,
|
|
"step": 3595,
|
|
"valid_targets_mean": 4308.9,
|
|
"valid_targets_min": 2999
|
|
},
|
|
{
|
|
"epoch": 5.769847634322374,
|
|
"grad_norm": 0.46676069938312253,
|
|
"learning_rate": 3.6596003020110636e-06,
|
|
"loss": 0.2157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12287290394306183,
|
|
"step": 3600,
|
|
"valid_targets_mean": 4801.8,
|
|
"valid_targets_min": 3473
|
|
},
|
|
{
|
|
"epoch": 5.777866880513232,
|
|
"grad_norm": 0.44792787499039255,
|
|
"learning_rate": 3.613649179231287e-06,
|
|
"loss": 0.2161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10086522996425629,
|
|
"step": 3605,
|
|
"valid_targets_mean": 4374.8,
|
|
"valid_targets_min": 4012
|
|
},
|
|
{
|
|
"epoch": 5.7858861267040895,
|
|
"grad_norm": 0.4621183793949488,
|
|
"learning_rate": 3.5679597031970017e-06,
|
|
"loss": 0.2189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10532127320766449,
|
|
"step": 3610,
|
|
"valid_targets_mean": 4325.4,
|
|
"valid_targets_min": 2872
|
|
},
|
|
{
|
|
"epoch": 5.7939053728949474,
|
|
"grad_norm": 0.4653297926475882,
|
|
"learning_rate": 3.5225326034484764e-06,
|
|
"loss": 0.2228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11459863930940628,
|
|
"step": 3615,
|
|
"valid_targets_mean": 4209.9,
|
|
"valid_targets_min": 3254
|
|
},
|
|
{
|
|
"epoch": 5.801924619085806,
|
|
"grad_norm": 0.4674193068730399,
|
|
"learning_rate": 3.4773686053365197e-06,
|
|
"loss": 0.2137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10900762677192688,
|
|
"step": 3620,
|
|
"valid_targets_mean": 4529.1,
|
|
"valid_targets_min": 3651
|
|
},
|
|
{
|
|
"epoch": 5.809943865276664,
|
|
"grad_norm": 0.4582379349057464,
|
|
"learning_rate": 3.4324684300109003e-06,
|
|
"loss": 0.2115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11418825387954712,
|
|
"step": 3625,
|
|
"valid_targets_mean": 4497.8,
|
|
"valid_targets_min": 3819
|
|
},
|
|
{
|
|
"epoch": 5.817963111467522,
|
|
"grad_norm": 0.453545309682996,
|
|
"learning_rate": 3.387832794408832e-06,
|
|
"loss": 0.2151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09799555689096451,
|
|
"step": 3630,
|
|
"valid_targets_mean": 3862.6,
|
|
"valid_targets_min": 2625
|
|
},
|
|
{
|
|
"epoch": 5.82598235765838,
|
|
"grad_norm": 0.4572341666668046,
|
|
"learning_rate": 3.3434624112435342e-06,
|
|
"loss": 0.2192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10879440605640411,
|
|
"step": 3635,
|
|
"valid_targets_mean": 4516.6,
|
|
"valid_targets_min": 3524
|
|
},
|
|
{
|
|
"epoch": 5.834001603849238,
|
|
"grad_norm": 0.4422174661082933,
|
|
"learning_rate": 3.2993579889928397e-06,
|
|
"loss": 0.2152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09629951417446136,
|
|
"step": 3640,
|
|
"valid_targets_mean": 4537.9,
|
|
"valid_targets_min": 3202
|
|
},
|
|
{
|
|
"epoch": 5.842020850040096,
|
|
"grad_norm": 0.43392486618894355,
|
|
"learning_rate": 3.25552023188789e-06,
|
|
"loss": 0.2121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11012008041143417,
|
|
"step": 3645,
|
|
"valid_targets_mean": 4445.6,
|
|
"valid_targets_min": 3949
|
|
},
|
|
{
|
|
"epoch": 5.850040096230955,
|
|
"grad_norm": 0.46475156621307595,
|
|
"learning_rate": 3.211949839901889e-06,
|
|
"loss": 0.2222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10912737995386124,
|
|
"step": 3650,
|
|
"valid_targets_mean": 4386.0,
|
|
"valid_targets_min": 3308
|
|
},
|
|
{
|
|
"epoch": 5.858059342421813,
|
|
"grad_norm": 0.455894709859725,
|
|
"learning_rate": 3.168647508738927e-06,
|
|
"loss": 0.2133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.101384237408638,
|
|
"step": 3655,
|
|
"valid_targets_mean": 4713.9,
|
|
"valid_targets_min": 4020
|
|
},
|
|
{
|
|
"epoch": 5.8660785886126705,
|
|
"grad_norm": 0.45725663321543325,
|
|
"learning_rate": 3.125613929822866e-06,
|
|
"loss": 0.2189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11432838439941406,
|
|
"step": 3660,
|
|
"valid_targets_mean": 4910.9,
|
|
"valid_targets_min": 4099
|
|
},
|
|
{
|
|
"epoch": 5.874097834803528,
|
|
"grad_norm": 0.44031953617990727,
|
|
"learning_rate": 3.0828497902863106e-06,
|
|
"loss": 0.2084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09244678914546967,
|
|
"step": 3665,
|
|
"valid_targets_mean": 3945.9,
|
|
"valid_targets_min": 3296
|
|
},
|
|
{
|
|
"epoch": 5.882117080994386,
|
|
"grad_norm": 0.4472139074622762,
|
|
"learning_rate": 3.0403557729596267e-06,
|
|
"loss": 0.2316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11110793799161911,
|
|
"step": 3670,
|
|
"valid_targets_mean": 4797.1,
|
|
"valid_targets_min": 3987
|
|
},
|
|
{
|
|
"epoch": 5.890136327185244,
|
|
"grad_norm": 0.44835315909306644,
|
|
"learning_rate": 2.998132556360038e-06,
|
|
"loss": 0.2125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1091860681772232,
|
|
"step": 3675,
|
|
"valid_targets_mean": 4397.5,
|
|
"valid_targets_min": 3557
|
|
},
|
|
{
|
|
"epoch": 5.898155573376103,
|
|
"grad_norm": 0.46262840713415043,
|
|
"learning_rate": 2.9561808146808068e-06,
|
|
"loss": 0.2096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1139296442270279,
|
|
"step": 3680,
|
|
"valid_targets_mean": 4792.9,
|
|
"valid_targets_min": 4400
|
|
},
|
|
{
|
|
"epoch": 5.906174819566961,
|
|
"grad_norm": 0.49050318666076126,
|
|
"learning_rate": 2.9145012177804476e-06,
|
|
"loss": 0.2106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09822183102369308,
|
|
"step": 3685,
|
|
"valid_targets_mean": 4595.9,
|
|
"valid_targets_min": 3820
|
|
},
|
|
{
|
|
"epoch": 5.914194065757819,
|
|
"grad_norm": 0.43143457420086395,
|
|
"learning_rate": 2.8730944311720454e-06,
|
|
"loss": 0.2114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11099233478307724,
|
|
"step": 3690,
|
|
"valid_targets_mean": 4537.4,
|
|
"valid_targets_min": 3583
|
|
},
|
|
{
|
|
"epoch": 5.922213311948677,
|
|
"grad_norm": 0.45599767775496214,
|
|
"learning_rate": 2.8319611160126226e-06,
|
|
"loss": 0.219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10986532270908356,
|
|
"step": 3695,
|
|
"valid_targets_mean": 4713.0,
|
|
"valid_targets_min": 4064
|
|
},
|
|
{
|
|
"epoch": 5.930232558139535,
|
|
"grad_norm": 0.44587463277614875,
|
|
"learning_rate": 2.791101929092592e-06,
|
|
"loss": 0.214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10897990316152573,
|
|
"step": 3700,
|
|
"valid_targets_mean": 4972.6,
|
|
"valid_targets_min": 3660
|
|
},
|
|
{
|
|
"epoch": 5.938251804330393,
|
|
"grad_norm": 0.44573219882972864,
|
|
"learning_rate": 2.750517522825251e-06,
|
|
"loss": 0.2141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09754989296197891,
|
|
"step": 3705,
|
|
"valid_targets_mean": 4213.1,
|
|
"valid_targets_min": 3773
|
|
},
|
|
{
|
|
"epoch": 5.946271050521251,
|
|
"grad_norm": 0.44561056945295324,
|
|
"learning_rate": 2.710208545236397e-06,
|
|
"loss": 0.209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12135400623083115,
|
|
"step": 3710,
|
|
"valid_targets_mean": 4783.9,
|
|
"valid_targets_min": 3606
|
|
},
|
|
{
|
|
"epoch": 5.954290296712109,
|
|
"grad_norm": 0.4596336668307515,
|
|
"learning_rate": 2.670175639953929e-06,
|
|
"loss": 0.2132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10317400097846985,
|
|
"step": 3715,
|
|
"valid_targets_mean": 4433.1,
|
|
"valid_targets_min": 3645
|
|
},
|
|
{
|
|
"epoch": 5.962309542902967,
|
|
"grad_norm": 0.45209612989758996,
|
|
"learning_rate": 2.6304194461976207e-06,
|
|
"loss": 0.2207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09174099564552307,
|
|
"step": 3720,
|
|
"valid_targets_mean": 4209.2,
|
|
"valid_targets_min": 3392
|
|
},
|
|
{
|
|
"epoch": 5.970328789093825,
|
|
"grad_norm": 0.45365190295364155,
|
|
"learning_rate": 2.5909405987688896e-06,
|
|
"loss": 0.219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09441766142845154,
|
|
"step": 3725,
|
|
"valid_targets_mean": 4130.9,
|
|
"valid_targets_min": 2028
|
|
},
|
|
{
|
|
"epoch": 5.978348035284683,
|
|
"grad_norm": 0.5730074182466711,
|
|
"learning_rate": 2.5517397280406565e-06,
|
|
"loss": 0.2215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12971524894237518,
|
|
"step": 3730,
|
|
"valid_targets_mean": 4634.0,
|
|
"valid_targets_min": 3360
|
|
},
|
|
{
|
|
"epoch": 5.986367281475541,
|
|
"grad_norm": 0.44177154806462204,
|
|
"learning_rate": 2.512817459947312e-06,
|
|
"loss": 0.2118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10600416362285614,
|
|
"step": 3735,
|
|
"valid_targets_mean": 4687.1,
|
|
"valid_targets_min": 3259
|
|
},
|
|
{
|
|
"epoch": 5.994386527666399,
|
|
"grad_norm": 0.4280374658694157,
|
|
"learning_rate": 2.4741744159746618e-06,
|
|
"loss": 0.2151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10365656018257141,
|
|
"step": 3740,
|
|
"valid_targets_mean": 4436.8,
|
|
"valid_targets_min": 3514
|
|
},
|
|
{
|
|
"epoch": 6.001603849238172,
|
|
"grad_norm": 0.42668563495416695,
|
|
"learning_rate": 2.435811213150079e-06,
|
|
"loss": 0.2226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12547534704208374,
|
|
"step": 3745,
|
|
"valid_targets_mean": 5177.1,
|
|
"valid_targets_min": 4438
|
|
},
|
|
{
|
|
"epoch": 6.0096230954290295,
|
|
"grad_norm": 0.4598195308302302,
|
|
"learning_rate": 2.3977284640325805e-06,
|
|
"loss": 0.2131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11580699682235718,
|
|
"step": 3750,
|
|
"valid_targets_mean": 4933.4,
|
|
"valid_targets_min": 3899
|
|
},
|
|
{
|
|
"epoch": 6.017642341619887,
|
|
"grad_norm": 0.5523592661947574,
|
|
"learning_rate": 2.359926776703092e-06,
|
|
"loss": 0.2086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10423484444618225,
|
|
"step": 3755,
|
|
"valid_targets_mean": 4325.2,
|
|
"valid_targets_min": 3420
|
|
},
|
|
{
|
|
"epoch": 6.025661587810745,
|
|
"grad_norm": 0.4710778545975448,
|
|
"learning_rate": 2.3224067547547357e-06,
|
|
"loss": 0.2107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10377326607704163,
|
|
"step": 3760,
|
|
"valid_targets_mean": 4195.2,
|
|
"valid_targets_min": 3225
|
|
},
|
|
{
|
|
"epoch": 6.033680834001604,
|
|
"grad_norm": 0.46931081523494317,
|
|
"learning_rate": 2.2851689972831536e-06,
|
|
"loss": 0.2135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11574733257293701,
|
|
"step": 3765,
|
|
"valid_targets_mean": 4952.4,
|
|
"valid_targets_min": 4052
|
|
},
|
|
{
|
|
"epoch": 6.041700080192462,
|
|
"grad_norm": 0.4381827029690205,
|
|
"learning_rate": 2.248214098877002e-06,
|
|
"loss": 0.215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11084946990013123,
|
|
"step": 3770,
|
|
"valid_targets_mean": 4302.1,
|
|
"valid_targets_min": 3265
|
|
},
|
|
{
|
|
"epoch": 6.04971932638332,
|
|
"grad_norm": 0.4678374193753871,
|
|
"learning_rate": 2.2115426496083958e-06,
|
|
"loss": 0.2111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09367729723453522,
|
|
"step": 3775,
|
|
"valid_targets_mean": 3907.4,
|
|
"valid_targets_min": 2957
|
|
},
|
|
{
|
|
"epoch": 6.057738572574178,
|
|
"grad_norm": 0.44852646809163055,
|
|
"learning_rate": 2.175155235023536e-06,
|
|
"loss": 0.2092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09929785877466202,
|
|
"step": 3780,
|
|
"valid_targets_mean": 4535.1,
|
|
"valid_targets_min": 3504
|
|
},
|
|
{
|
|
"epoch": 6.065757818765036,
|
|
"grad_norm": 0.46087727767263514,
|
|
"learning_rate": 2.1390524361333355e-06,
|
|
"loss": 0.2121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10589083284139633,
|
|
"step": 3785,
|
|
"valid_targets_mean": 4633.9,
|
|
"valid_targets_min": 3889
|
|
},
|
|
{
|
|
"epoch": 6.073777064955894,
|
|
"grad_norm": 0.45364368798734,
|
|
"learning_rate": 2.1032348294041305e-06,
|
|
"loss": 0.2156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10089395940303802,
|
|
"step": 3790,
|
|
"valid_targets_mean": 4577.9,
|
|
"valid_targets_min": 3642
|
|
},
|
|
{
|
|
"epoch": 6.081796311146753,
|
|
"grad_norm": 0.4748970994598991,
|
|
"learning_rate": 2.067702986748521e-06,
|
|
"loss": 0.2073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10249247401952744,
|
|
"step": 3795,
|
|
"valid_targets_mean": 4337.8,
|
|
"valid_targets_min": 3244
|
|
},
|
|
{
|
|
"epoch": 6.0898155573376105,
|
|
"grad_norm": 0.4760174400551712,
|
|
"learning_rate": 2.0324574755161764e-06,
|
|
"loss": 0.2116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1158999353647232,
|
|
"step": 3800,
|
|
"valid_targets_mean": 4798.4,
|
|
"valid_targets_min": 3930
|
|
},
|
|
{
|
|
"epoch": 6.097834803528468,
|
|
"grad_norm": 0.45298412527868703,
|
|
"learning_rate": 1.9974988584848385e-06,
|
|
"loss": 0.2219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10676100850105286,
|
|
"step": 3805,
|
|
"valid_targets_mean": 4433.9,
|
|
"valid_targets_min": 3722
|
|
},
|
|
{
|
|
"epoch": 6.105854049719326,
|
|
"grad_norm": 0.4914195026955669,
|
|
"learning_rate": 1.96282769385129e-06,
|
|
"loss": 0.2104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10328128933906555,
|
|
"step": 3810,
|
|
"valid_targets_mean": 4558.4,
|
|
"valid_targets_min": 3672
|
|
},
|
|
{
|
|
"epoch": 6.113873295910184,
|
|
"grad_norm": 0.4543563383351851,
|
|
"learning_rate": 1.9284445352224625e-06,
|
|
"loss": 0.2111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10607825219631195,
|
|
"step": 3815,
|
|
"valid_targets_mean": 4465.5,
|
|
"valid_targets_min": 2975
|
|
},
|
|
{
|
|
"epoch": 6.121892542101042,
|
|
"grad_norm": 0.4865298421853967,
|
|
"learning_rate": 1.894349931606596e-06,
|
|
"loss": 0.2141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11714957654476166,
|
|
"step": 3820,
|
|
"valid_targets_mean": 4879.6,
|
|
"valid_targets_min": 4227
|
|
},
|
|
{
|
|
"epoch": 6.129911788291901,
|
|
"grad_norm": 0.44723961612866103,
|
|
"learning_rate": 1.8605444274044493e-06,
|
|
"loss": 0.2101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10705840587615967,
|
|
"step": 3825,
|
|
"valid_targets_mean": 4810.8,
|
|
"valid_targets_min": 4262
|
|
},
|
|
{
|
|
"epoch": 6.137931034482759,
|
|
"grad_norm": 0.43901529236266873,
|
|
"learning_rate": 1.827028562400659e-06,
|
|
"loss": 0.2142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10404671728610992,
|
|
"step": 3830,
|
|
"valid_targets_mean": 4577.2,
|
|
"valid_targets_min": 3796
|
|
},
|
|
{
|
|
"epoch": 6.145950280673617,
|
|
"grad_norm": 0.49193083891409656,
|
|
"learning_rate": 1.793802871755066e-06,
|
|
"loss": 0.2088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10195772349834442,
|
|
"step": 3835,
|
|
"valid_targets_mean": 4264.2,
|
|
"valid_targets_min": 3761
|
|
},
|
|
{
|
|
"epoch": 6.153969526864475,
|
|
"grad_norm": 0.48667612155419543,
|
|
"learning_rate": 1.760867885994202e-06,
|
|
"loss": 0.2133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0987631306052208,
|
|
"step": 3840,
|
|
"valid_targets_mean": 4363.4,
|
|
"valid_targets_min": 3410
|
|
},
|
|
{
|
|
"epoch": 6.161988773055333,
|
|
"grad_norm": 0.43538718027329615,
|
|
"learning_rate": 1.7282241310028047e-06,
|
|
"loss": 0.204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09949157387018204,
|
|
"step": 3845,
|
|
"valid_targets_mean": 4502.1,
|
|
"valid_targets_min": 3471
|
|
},
|
|
{
|
|
"epoch": 6.170008019246191,
|
|
"grad_norm": 0.4670552998545342,
|
|
"learning_rate": 1.6958721280154232e-06,
|
|
"loss": 0.2107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10528303682804108,
|
|
"step": 3850,
|
|
"valid_targets_mean": 4617.4,
|
|
"valid_targets_min": 3572
|
|
},
|
|
{
|
|
"epoch": 6.1780272654370485,
|
|
"grad_norm": 0.4347364295125891,
|
|
"learning_rate": 1.6638123936081085e-06,
|
|
"loss": 0.2134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09164345264434814,
|
|
"step": 3855,
|
|
"valid_targets_mean": 4552.1,
|
|
"valid_targets_min": 4134
|
|
},
|
|
{
|
|
"epoch": 6.186046511627907,
|
|
"grad_norm": 0.44809558444216946,
|
|
"learning_rate": 1.6320454396901463e-06,
|
|
"loss": 0.2106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09363134205341339,
|
|
"step": 3860,
|
|
"valid_targets_mean": 4539.2,
|
|
"valid_targets_min": 3899
|
|
},
|
|
{
|
|
"epoch": 6.194065757818765,
|
|
"grad_norm": 0.46393826225457996,
|
|
"learning_rate": 1.6005717734958914e-06,
|
|
"loss": 0.2106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10422304272651672,
|
|
"step": 3865,
|
|
"valid_targets_mean": 4760.5,
|
|
"valid_targets_min": 4155
|
|
},
|
|
{
|
|
"epoch": 6.202085004009623,
|
|
"grad_norm": 0.450637422161937,
|
|
"learning_rate": 1.569391897576671e-06,
|
|
"loss": 0.2114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11025909334421158,
|
|
"step": 3870,
|
|
"valid_targets_mean": 4446.0,
|
|
"valid_targets_min": 2619
|
|
},
|
|
{
|
|
"epoch": 6.210104250200481,
|
|
"grad_norm": 0.4621170594344566,
|
|
"learning_rate": 1.5385063097927533e-06,
|
|
"loss": 0.2118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10195848345756531,
|
|
"step": 3875,
|
|
"valid_targets_mean": 4667.2,
|
|
"valid_targets_min": 3318
|
|
},
|
|
{
|
|
"epoch": 6.218123496391339,
|
|
"grad_norm": 0.5060124714187377,
|
|
"learning_rate": 1.5079155033054104e-06,
|
|
"loss": 0.2163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11259067803621292,
|
|
"step": 3880,
|
|
"valid_targets_mean": 4763.2,
|
|
"valid_targets_min": 3684
|
|
},
|
|
{
|
|
"epoch": 6.226142742582197,
|
|
"grad_norm": 0.46317589812510773,
|
|
"learning_rate": 1.4776199665690239e-06,
|
|
"loss": 0.2087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09253337979316711,
|
|
"step": 3885,
|
|
"valid_targets_mean": 4367.8,
|
|
"valid_targets_min": 3670
|
|
},
|
|
{
|
|
"epoch": 6.234161988773056,
|
|
"grad_norm": 0.4614601570034186,
|
|
"learning_rate": 1.4476201833233084e-06,
|
|
"loss": 0.2107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11421079933643341,
|
|
"step": 3890,
|
|
"valid_targets_mean": 4856.1,
|
|
"valid_targets_min": 3861
|
|
},
|
|
{
|
|
"epoch": 6.242181234963914,
|
|
"grad_norm": 0.4584528993653797,
|
|
"learning_rate": 1.4179166325855676e-06,
|
|
"loss": 0.2069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11212031543254852,
|
|
"step": 3895,
|
|
"valid_targets_mean": 5205.0,
|
|
"valid_targets_min": 3890
|
|
},
|
|
{
|
|
"epoch": 6.250200481154772,
|
|
"grad_norm": 0.43659319699607,
|
|
"learning_rate": 1.3885097886430599e-06,
|
|
"loss": 0.206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10124218463897705,
|
|
"step": 3900,
|
|
"valid_targets_mean": 4453.1,
|
|
"valid_targets_min": 4013
|
|
},
|
|
{
|
|
"epoch": 6.2582197273456295,
|
|
"grad_norm": 0.4852901390364178,
|
|
"learning_rate": 1.35940012104542e-06,
|
|
"loss": 0.201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10278389602899551,
|
|
"step": 3905,
|
|
"valid_targets_mean": 4733.2,
|
|
"valid_targets_min": 4032
|
|
},
|
|
{
|
|
"epoch": 6.266238973536487,
|
|
"grad_norm": 0.4648611119413731,
|
|
"learning_rate": 1.3305880945971583e-06,
|
|
"loss": 0.2216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12161250412464142,
|
|
"step": 3910,
|
|
"valid_targets_mean": 4871.6,
|
|
"valid_targets_min": 3740
|
|
},
|
|
{
|
|
"epoch": 6.274258219727345,
|
|
"grad_norm": 0.4403696923402282,
|
|
"learning_rate": 1.3020741693502403e-06,
|
|
"loss": 0.2122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09692840278148651,
|
|
"step": 3915,
|
|
"valid_targets_mean": 4575.1,
|
|
"valid_targets_min": 3921
|
|
},
|
|
{
|
|
"epoch": 6.282277465918204,
|
|
"grad_norm": 0.4539408847361249,
|
|
"learning_rate": 1.27385880059675e-06,
|
|
"loss": 0.2152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09935731440782547,
|
|
"step": 3920,
|
|
"valid_targets_mean": 4649.4,
|
|
"valid_targets_min": 3701
|
|
},
|
|
{
|
|
"epoch": 6.290296712109062,
|
|
"grad_norm": 0.4633739525359953,
|
|
"learning_rate": 1.245942438861607e-06,
|
|
"loss": 0.21,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10724038630723953,
|
|
"step": 3925,
|
|
"valid_targets_mean": 4539.1,
|
|
"valid_targets_min": 3771
|
|
},
|
|
{
|
|
"epoch": 6.29831595829992,
|
|
"grad_norm": 0.4747930795535427,
|
|
"learning_rate": 1.2183255298953788e-06,
|
|
"loss": 0.2058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1183721274137497,
|
|
"step": 3930,
|
|
"valid_targets_mean": 4793.1,
|
|
"valid_targets_min": 3899
|
|
},
|
|
{
|
|
"epoch": 6.306335204490778,
|
|
"grad_norm": 0.43042630452583963,
|
|
"learning_rate": 1.1910085146671645e-06,
|
|
"loss": 0.2157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10537775605916977,
|
|
"step": 3935,
|
|
"valid_targets_mean": 4465.4,
|
|
"valid_targets_min": 3475
|
|
},
|
|
{
|
|
"epoch": 6.314354450681636,
|
|
"grad_norm": 0.4912678322230296,
|
|
"learning_rate": 1.1639918293575492e-06,
|
|
"loss": 0.2097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09717830270528793,
|
|
"step": 3940,
|
|
"valid_targets_mean": 4289.9,
|
|
"valid_targets_min": 2832
|
|
},
|
|
{
|
|
"epoch": 6.322373696872494,
|
|
"grad_norm": 0.45624173048465894,
|
|
"learning_rate": 1.1372759053516536e-06,
|
|
"loss": 0.2115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0961437076330185,
|
|
"step": 3945,
|
|
"valid_targets_mean": 4489.6,
|
|
"valid_targets_min": 3465
|
|
},
|
|
{
|
|
"epoch": 6.330392943063352,
|
|
"grad_norm": 0.4278320150073397,
|
|
"learning_rate": 1.1108611692322157e-06,
|
|
"loss": 0.2092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10570381581783295,
|
|
"step": 3950,
|
|
"valid_targets_mean": 4703.2,
|
|
"valid_targets_min": 4304
|
|
},
|
|
{
|
|
"epoch": 6.3384121892542105,
|
|
"grad_norm": 0.4879745193251374,
|
|
"learning_rate": 1.0847480427728142e-06,
|
|
"loss": 0.2093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1099633201956749,
|
|
"step": 3955,
|
|
"valid_targets_mean": 4502.4,
|
|
"valid_targets_min": 3902
|
|
},
|
|
{
|
|
"epoch": 6.346431435445068,
|
|
"grad_norm": 0.4704433773223622,
|
|
"learning_rate": 1.0589369429311125e-06,
|
|
"loss": 0.2083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09593408554792404,
|
|
"step": 3960,
|
|
"valid_targets_mean": 4394.9,
|
|
"valid_targets_min": 3345
|
|
},
|
|
{
|
|
"epoch": 6.354450681635926,
|
|
"grad_norm": 0.5020486838011342,
|
|
"learning_rate": 1.0334282818422037e-06,
|
|
"loss": 0.2076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0998045802116394,
|
|
"step": 3965,
|
|
"valid_targets_mean": 4598.1,
|
|
"valid_targets_min": 3842
|
|
},
|
|
{
|
|
"epoch": 6.362469927826784,
|
|
"grad_norm": 0.44365887342512617,
|
|
"learning_rate": 1.008222466812041e-06,
|
|
"loss": 0.2217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09395066648721695,
|
|
"step": 3970,
|
|
"valid_targets_mean": 4399.6,
|
|
"valid_targets_min": 3937
|
|
},
|
|
{
|
|
"epoch": 6.370489174017642,
|
|
"grad_norm": 0.4700215702487181,
|
|
"learning_rate": 9.83319900310915e-07,
|
|
"loss": 0.2093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09363153576850891,
|
|
"step": 3975,
|
|
"valid_targets_mean": 4544.5,
|
|
"valid_targets_min": 3495
|
|
},
|
|
{
|
|
"epoch": 6.3785084202085,
|
|
"grad_norm": 0.4523593694660502,
|
|
"learning_rate": 9.587209799670495e-07,
|
|
"loss": 0.2211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09407342970371246,
|
|
"step": 3980,
|
|
"valid_targets_mean": 4460.0,
|
|
"valid_targets_min": 3445
|
|
},
|
|
{
|
|
"epoch": 6.386527666399359,
|
|
"grad_norm": 0.46809844711855386,
|
|
"learning_rate": 9.344260985602327e-07,
|
|
"loss": 0.2205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09152912348508835,
|
|
"step": 3985,
|
|
"valid_targets_mean": 4088.5,
|
|
"valid_targets_min": 3084
|
|
},
|
|
{
|
|
"epoch": 6.394546912590217,
|
|
"grad_norm": 0.47245248959624675,
|
|
"learning_rate": 9.104356440155526e-07,
|
|
"loss": 0.211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09654849767684937,
|
|
"step": 3990,
|
|
"valid_targets_mean": 4095.6,
|
|
"valid_targets_min": 3590
|
|
},
|
|
{
|
|
"epoch": 6.402566158781075,
|
|
"grad_norm": 0.5006013416863727,
|
|
"learning_rate": 8.867499993972162e-07,
|
|
"loss": 0.209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08933766931295395,
|
|
"step": 3995,
|
|
"valid_targets_mean": 4206.1,
|
|
"valid_targets_min": 3303
|
|
},
|
|
{
|
|
"epoch": 6.410585404971933,
|
|
"grad_norm": 0.4455387757763163,
|
|
"learning_rate": 8.633695429024058e-07,
|
|
"loss": 0.2134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11078263819217682,
|
|
"step": 4000,
|
|
"valid_targets_mean": 4965.9,
|
|
"valid_targets_min": 4299
|
|
},
|
|
{
|
|
"epoch": 6.4186046511627906,
|
|
"grad_norm": 0.4736922016536859,
|
|
"learning_rate": 8.402946478552732e-07,
|
|
"loss": 0.209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11487339437007904,
|
|
"step": 4005,
|
|
"valid_targets_mean": 4571.6,
|
|
"valid_targets_min": 3752
|
|
},
|
|
{
|
|
"epoch": 6.4266238973536485,
|
|
"grad_norm": 0.49006988197921764,
|
|
"learning_rate": 8.175256827009392e-07,
|
|
"loss": 0.2116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10278570652008057,
|
|
"step": 4010,
|
|
"valid_targets_mean": 4506.6,
|
|
"valid_targets_min": 3378
|
|
},
|
|
{
|
|
"epoch": 6.434643143544507,
|
|
"grad_norm": 0.4914138162268631,
|
|
"learning_rate": 7.95063010999646e-07,
|
|
"loss": 0.218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10657691210508347,
|
|
"step": 4015,
|
|
"valid_targets_mean": 4407.0,
|
|
"valid_targets_min": 3351
|
|
},
|
|
{
|
|
"epoch": 6.442662389735365,
|
|
"grad_norm": 0.47961031644091195,
|
|
"learning_rate": 7.729069914209409e-07,
|
|
"loss": 0.2086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10455227643251419,
|
|
"step": 4020,
|
|
"valid_targets_mean": 4456.5,
|
|
"valid_targets_min": 3849
|
|
},
|
|
{
|
|
"epoch": 6.450681635926223,
|
|
"grad_norm": 0.4918550517748648,
|
|
"learning_rate": 7.510579777379345e-07,
|
|
"loss": 0.2182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11138023436069489,
|
|
"step": 4025,
|
|
"valid_targets_mean": 4532.6,
|
|
"valid_targets_min": 3942
|
|
},
|
|
{
|
|
"epoch": 6.458700882117081,
|
|
"grad_norm": 0.4556493200445796,
|
|
"learning_rate": 7.295163188216792e-07,
|
|
"loss": 0.2185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11020553112030029,
|
|
"step": 4030,
|
|
"valid_targets_mean": 4592.6,
|
|
"valid_targets_min": 3566
|
|
},
|
|
{
|
|
"epoch": 6.466720128307939,
|
|
"grad_norm": 0.45876977277446723,
|
|
"learning_rate": 7.08282358635568e-07,
|
|
"loss": 0.2196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10683296620845795,
|
|
"step": 4035,
|
|
"valid_targets_mean": 4457.5,
|
|
"valid_targets_min": 3144
|
|
},
|
|
{
|
|
"epoch": 6.474739374498797,
|
|
"grad_norm": 0.4544310850838144,
|
|
"learning_rate": 6.87356436229869e-07,
|
|
"loss": 0.213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10212244838476181,
|
|
"step": 4040,
|
|
"valid_targets_mean": 4392.4,
|
|
"valid_targets_min": 3651
|
|
},
|
|
{
|
|
"epoch": 6.482758620689655,
|
|
"grad_norm": 0.47164124179504324,
|
|
"learning_rate": 6.667388857362977e-07,
|
|
"loss": 0.2114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09916101396083832,
|
|
"step": 4045,
|
|
"valid_targets_mean": 3992.1,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 6.490777866880514,
|
|
"grad_norm": 0.5136364933324513,
|
|
"learning_rate": 6.464300363626797e-07,
|
|
"loss": 0.2086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10205759108066559,
|
|
"step": 4050,
|
|
"valid_targets_mean": 4486.4,
|
|
"valid_targets_min": 3746
|
|
},
|
|
{
|
|
"epoch": 6.4987971130713715,
|
|
"grad_norm": 0.46779581861555314,
|
|
"learning_rate": 6.264302123877053e-07,
|
|
"loss": 0.211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10563259571790695,
|
|
"step": 4055,
|
|
"valid_targets_mean": 4548.1,
|
|
"valid_targets_min": 3160
|
|
},
|
|
{
|
|
"epoch": 6.5068163592622295,
|
|
"grad_norm": 0.45851575654083343,
|
|
"learning_rate": 6.067397331557412e-07,
|
|
"loss": 0.2072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11079549044370651,
|
|
"step": 4060,
|
|
"valid_targets_mean": 4624.5,
|
|
"valid_targets_min": 3799
|
|
},
|
|
{
|
|
"epoch": 6.514835605453087,
|
|
"grad_norm": 0.45215505585303234,
|
|
"learning_rate": 5.873589130717405e-07,
|
|
"loss": 0.2047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10103313624858856,
|
|
"step": 4065,
|
|
"valid_targets_mean": 4070.1,
|
|
"valid_targets_min": 3158
|
|
},
|
|
{
|
|
"epoch": 6.522854851643945,
|
|
"grad_norm": 0.4750382150112356,
|
|
"learning_rate": 5.682880615962116e-07,
|
|
"loss": 0.2067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10561086982488632,
|
|
"step": 4070,
|
|
"valid_targets_mean": 4514.0,
|
|
"valid_targets_min": 3722
|
|
},
|
|
{
|
|
"epoch": 6.530874097834803,
|
|
"grad_norm": 0.4756799758983407,
|
|
"learning_rate": 5.495274832402841e-07,
|
|
"loss": 0.2108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1048189103603363,
|
|
"step": 4075,
|
|
"valid_targets_mean": 4787.0,
|
|
"valid_targets_min": 3214
|
|
},
|
|
{
|
|
"epoch": 6.538893344025661,
|
|
"grad_norm": 0.5062328140111715,
|
|
"learning_rate": 5.310774775608529e-07,
|
|
"loss": 0.2068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10136038064956665,
|
|
"step": 4080,
|
|
"valid_targets_mean": 4555.8,
|
|
"valid_targets_min": 4004
|
|
},
|
|
{
|
|
"epoch": 6.54691259021652,
|
|
"grad_norm": 0.480834955392123,
|
|
"learning_rate": 5.129383391557751e-07,
|
|
"loss": 0.2254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1158549040555954,
|
|
"step": 4085,
|
|
"valid_targets_mean": 4309.9,
|
|
"valid_targets_min": 2393
|
|
},
|
|
{
|
|
"epoch": 6.554931836407378,
|
|
"grad_norm": 0.4567212538245473,
|
|
"learning_rate": 4.951103576591876e-07,
|
|
"loss": 0.2114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10591988265514374,
|
|
"step": 4090,
|
|
"valid_targets_mean": 4814.8,
|
|
"valid_targets_min": 3583
|
|
},
|
|
{
|
|
"epoch": 6.562951082598236,
|
|
"grad_norm": 0.4543878467879687,
|
|
"learning_rate": 4.7759381773687e-07,
|
|
"loss": 0.2136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10710999369621277,
|
|
"step": 4095,
|
|
"valid_targets_mean": 4434.9,
|
|
"valid_targets_min": 3539
|
|
},
|
|
{
|
|
"epoch": 6.570970328789094,
|
|
"grad_norm": 0.4684759955911961,
|
|
"learning_rate": 4.6038899908170234e-07,
|
|
"loss": 0.2142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09386956691741943,
|
|
"step": 4100,
|
|
"valid_targets_mean": 4118.5,
|
|
"valid_targets_min": 2164
|
|
},
|
|
{
|
|
"epoch": 6.578989574979952,
|
|
"grad_norm": 0.461808179198195,
|
|
"learning_rate": 4.4349617640920164e-07,
|
|
"loss": 0.2118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0990794450044632,
|
|
"step": 4105,
|
|
"valid_targets_mean": 4304.5,
|
|
"valid_targets_min": 3893
|
|
},
|
|
{
|
|
"epoch": 6.58700882117081,
|
|
"grad_norm": 0.45472923384234204,
|
|
"learning_rate": 4.2691561945312764e-07,
|
|
"loss": 0.2124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10852346569299698,
|
|
"step": 4110,
|
|
"valid_targets_mean": 4297.1,
|
|
"valid_targets_min": 2847
|
|
},
|
|
{
|
|
"epoch": 6.595028067361668,
|
|
"grad_norm": 0.44394639955864407,
|
|
"learning_rate": 4.106475929611886e-07,
|
|
"loss": 0.2051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.102170929312706,
|
|
"step": 4115,
|
|
"valid_targets_mean": 4830.1,
|
|
"valid_targets_min": 3838
|
|
},
|
|
{
|
|
"epoch": 6.603047313552526,
|
|
"grad_norm": 0.4726300496397618,
|
|
"learning_rate": 3.9469235669080007e-07,
|
|
"loss": 0.2107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1033250242471695,
|
|
"step": 4120,
|
|
"valid_targets_mean": 4948.0,
|
|
"valid_targets_min": 3987
|
|
},
|
|
{
|
|
"epoch": 6.611066559743384,
|
|
"grad_norm": 0.45499215451671254,
|
|
"learning_rate": 3.7905016540495053e-07,
|
|
"loss": 0.2023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10895930230617523,
|
|
"step": 4125,
|
|
"valid_targets_mean": 4841.9,
|
|
"valid_targets_min": 3938
|
|
},
|
|
{
|
|
"epoch": 6.619085805934242,
|
|
"grad_norm": 0.4690896730751305,
|
|
"learning_rate": 3.63721268868118e-07,
|
|
"loss": 0.2136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11667926609516144,
|
|
"step": 4130,
|
|
"valid_targets_mean": 4928.0,
|
|
"valid_targets_min": 4167
|
|
},
|
|
{
|
|
"epoch": 6.6271050521251,
|
|
"grad_norm": 0.4367075486106248,
|
|
"learning_rate": 3.487059118422997e-07,
|
|
"loss": 0.2105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12400277704000473,
|
|
"step": 4135,
|
|
"valid_targets_mean": 4708.5,
|
|
"valid_targets_min": 4076
|
|
},
|
|
{
|
|
"epoch": 6.635124298315958,
|
|
"grad_norm": 0.4619096861797313,
|
|
"learning_rate": 3.3400433408308895e-07,
|
|
"loss": 0.2139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10418310016393661,
|
|
"step": 4140,
|
|
"valid_targets_mean": 4370.1,
|
|
"valid_targets_min": 3544
|
|
},
|
|
{
|
|
"epoch": 6.643143544506817,
|
|
"grad_norm": 0.4752510330375891,
|
|
"learning_rate": 3.196167703358577e-07,
|
|
"loss": 0.2186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12772074341773987,
|
|
"step": 4145,
|
|
"valid_targets_mean": 4873.1,
|
|
"valid_targets_min": 3809
|
|
},
|
|
{
|
|
"epoch": 6.651162790697675,
|
|
"grad_norm": 0.47223416548896396,
|
|
"learning_rate": 3.0554345033199985e-07,
|
|
"loss": 0.2151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0961679071187973,
|
|
"step": 4150,
|
|
"valid_targets_mean": 4422.4,
|
|
"valid_targets_min": 3210
|
|
},
|
|
{
|
|
"epoch": 6.659182036888533,
|
|
"grad_norm": 0.4841818406995206,
|
|
"learning_rate": 2.917845987852652e-07,
|
|
"loss": 0.2179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09021811932325363,
|
|
"step": 4155,
|
|
"valid_targets_mean": 4130.0,
|
|
"valid_targets_min": 2662
|
|
},
|
|
{
|
|
"epoch": 6.6672012830793905,
|
|
"grad_norm": 0.4863982906428031,
|
|
"learning_rate": 2.783404353881758e-07,
|
|
"loss": 0.2171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09987124800682068,
|
|
"step": 4160,
|
|
"valid_targets_mean": 4752.0,
|
|
"valid_targets_min": 3828
|
|
},
|
|
{
|
|
"epoch": 6.6752205292702484,
|
|
"grad_norm": 0.46922400578848467,
|
|
"learning_rate": 2.652111748085151e-07,
|
|
"loss": 0.2099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10341273248195648,
|
|
"step": 4165,
|
|
"valid_targets_mean": 4340.4,
|
|
"valid_targets_min": 2604
|
|
},
|
|
{
|
|
"epoch": 6.683239775461106,
|
|
"grad_norm": 0.4616317963367416,
|
|
"learning_rate": 2.523970266859044e-07,
|
|
"loss": 0.2111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09709709882736206,
|
|
"step": 4170,
|
|
"valid_targets_mean": 4691.5,
|
|
"valid_targets_min": 3840
|
|
},
|
|
{
|
|
"epoch": 6.691259021651964,
|
|
"grad_norm": 0.48321752999150674,
|
|
"learning_rate": 2.398981956284363e-07,
|
|
"loss": 0.2086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10197799652814865,
|
|
"step": 4175,
|
|
"valid_targets_mean": 4229.8,
|
|
"valid_targets_min": 3354
|
|
},
|
|
{
|
|
"epoch": 6.699278267842823,
|
|
"grad_norm": 0.44227836196761117,
|
|
"learning_rate": 2.2771488120944207e-07,
|
|
"loss": 0.2152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1122627779841423,
|
|
"step": 4180,
|
|
"valid_targets_mean": 4550.9,
|
|
"valid_targets_min": 3819
|
|
},
|
|
{
|
|
"epoch": 6.707297514033681,
|
|
"grad_norm": 0.47746216288882015,
|
|
"learning_rate": 2.1584727796427174e-07,
|
|
"loss": 0.2079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10047496110200882,
|
|
"step": 4185,
|
|
"valid_targets_mean": 4379.0,
|
|
"valid_targets_min": 3915
|
|
},
|
|
{
|
|
"epoch": 6.715316760224539,
|
|
"grad_norm": 0.45814558141201744,
|
|
"learning_rate": 2.0429557538720556e-07,
|
|
"loss": 0.2068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10335575044155121,
|
|
"step": 4190,
|
|
"valid_targets_mean": 4787.8,
|
|
"valid_targets_min": 3974
|
|
},
|
|
{
|
|
"epoch": 6.723336006415397,
|
|
"grad_norm": 0.47695928532878007,
|
|
"learning_rate": 1.930599579284298e-07,
|
|
"loss": 0.2173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09930959343910217,
|
|
"step": 4195,
|
|
"valid_targets_mean": 4341.2,
|
|
"valid_targets_min": 2758
|
|
},
|
|
{
|
|
"epoch": 6.731355252606255,
|
|
"grad_norm": 0.4692894067713825,
|
|
"learning_rate": 1.8214060499107679e-07,
|
|
"loss": 0.2092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10605455935001373,
|
|
"step": 4200,
|
|
"valid_targets_mean": 4294.0,
|
|
"valid_targets_min": 2772
|
|
},
|
|
{
|
|
"epoch": 6.739374498797113,
|
|
"grad_norm": 0.4690223178963461,
|
|
"learning_rate": 1.7153769092837614e-07,
|
|
"loss": 0.2133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10487551987171173,
|
|
"step": 4205,
|
|
"valid_targets_mean": 4360.5,
|
|
"valid_targets_min": 3715
|
|
},
|
|
{
|
|
"epoch": 6.7473937449879715,
|
|
"grad_norm": 0.46777091725588543,
|
|
"learning_rate": 1.6125138504086146e-07,
|
|
"loss": 0.2106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10005223006010056,
|
|
"step": 4210,
|
|
"valid_targets_mean": 4048.0,
|
|
"valid_targets_min": 3201
|
|
},
|
|
{
|
|
"epoch": 6.755412991178829,
|
|
"grad_norm": 0.4568278592980705,
|
|
"learning_rate": 1.5128185157367247e-07,
|
|
"loss": 0.2101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09427569061517715,
|
|
"step": 4215,
|
|
"valid_targets_mean": 4330.2,
|
|
"valid_targets_min": 2588
|
|
},
|
|
{
|
|
"epoch": 6.763432237369687,
|
|
"grad_norm": 0.4829918293749772,
|
|
"learning_rate": 1.4162924971393044e-07,
|
|
"loss": 0.2192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11825500428676605,
|
|
"step": 4220,
|
|
"valid_targets_mean": 4679.9,
|
|
"valid_targets_min": 3784
|
|
},
|
|
{
|
|
"epoch": 6.771451483560545,
|
|
"grad_norm": 0.43548728169134787,
|
|
"learning_rate": 1.322937335881891e-07,
|
|
"loss": 0.2104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10248410701751709,
|
|
"step": 4225,
|
|
"valid_targets_mean": 4484.4,
|
|
"valid_targets_min": 3430
|
|
},
|
|
{
|
|
"epoch": 6.779470729751403,
|
|
"grad_norm": 0.45053238843463916,
|
|
"learning_rate": 1.2327545225999215e-07,
|
|
"loss": 0.2208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09821924567222595,
|
|
"step": 4230,
|
|
"valid_targets_mean": 4458.9,
|
|
"valid_targets_min": 3097
|
|
},
|
|
{
|
|
"epoch": 6.787489975942261,
|
|
"grad_norm": 0.49070311897840085,
|
|
"learning_rate": 1.145745497274664e-07,
|
|
"loss": 0.2113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10371457785367966,
|
|
"step": 4235,
|
|
"valid_targets_mean": 3927.8,
|
|
"valid_targets_min": 2428
|
|
},
|
|
{
|
|
"epoch": 6.79550922213312,
|
|
"grad_norm": 0.4390649224362788,
|
|
"learning_rate": 1.061911649210523e-07,
|
|
"loss": 0.2046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10334643721580505,
|
|
"step": 4240,
|
|
"valid_targets_mean": 4214.4,
|
|
"valid_targets_min": 3503
|
|
},
|
|
{
|
|
"epoch": 6.803528468323978,
|
|
"grad_norm": 0.4709846000983619,
|
|
"learning_rate": 9.812543170126365e-08,
|
|
"loss": 0.2076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08917765319347382,
|
|
"step": 4245,
|
|
"valid_targets_mean": 4225.2,
|
|
"valid_targets_min": 1068
|
|
},
|
|
{
|
|
"epoch": 6.811547714514836,
|
|
"grad_norm": 0.4604998090084913,
|
|
"learning_rate": 9.03774788565559e-08,
|
|
"loss": 0.2073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10518975555896759,
|
|
"step": 4250,
|
|
"valid_targets_mean": 4765.0,
|
|
"valid_targets_min": 3806
|
|
},
|
|
{
|
|
"epoch": 6.819566960705694,
|
|
"grad_norm": 0.45345343103246877,
|
|
"learning_rate": 8.294743010127448e-08,
|
|
"loss": 0.2078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.108390212059021,
|
|
"step": 4255,
|
|
"valid_targets_mean": 4600.9,
|
|
"valid_targets_min": 3845
|
|
},
|
|
{
|
|
"epoch": 6.827586206896552,
|
|
"grad_norm": 0.4870985654965032,
|
|
"learning_rate": 7.583540407367418e-08,
|
|
"loss": 0.2116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12185350805521011,
|
|
"step": 4260,
|
|
"valid_targets_mean": 4664.4,
|
|
"valid_targets_min": 3686
|
|
},
|
|
{
|
|
"epoch": 6.8356054530874095,
|
|
"grad_norm": 0.4738033034034639,
|
|
"learning_rate": 6.904151433402728e-08,
|
|
"loss": 0.2129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09747340530157089,
|
|
"step": 4265,
|
|
"valid_targets_mean": 4639.5,
|
|
"valid_targets_min": 3375
|
|
},
|
|
{
|
|
"epoch": 6.843624699278267,
|
|
"grad_norm": 0.45442783543080495,
|
|
"learning_rate": 6.256586936281172e-08,
|
|
"loss": 0.2068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1090766042470932,
|
|
"step": 4270,
|
|
"valid_targets_mean": 4658.6,
|
|
"valid_targets_min": 4004
|
|
},
|
|
{
|
|
"epoch": 6.851643945469126,
|
|
"grad_norm": 0.5039517442438474,
|
|
"learning_rate": 5.6408572558972475e-08,
|
|
"loss": 0.2086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0976313129067421,
|
|
"step": 4275,
|
|
"valid_targets_mean": 4394.4,
|
|
"valid_targets_min": 3731
|
|
},
|
|
{
|
|
"epoch": 6.859663191659984,
|
|
"grad_norm": 0.4886928755649215,
|
|
"learning_rate": 5.0569722238280605e-08,
|
|
"loss": 0.2122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.114060178399086,
|
|
"step": 4280,
|
|
"valid_targets_mean": 4720.4,
|
|
"valid_targets_min": 4130
|
|
},
|
|
{
|
|
"epoch": 6.867682437850842,
|
|
"grad_norm": 0.45774509882508313,
|
|
"learning_rate": 4.504941163175236e-08,
|
|
"loss": 0.2087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10239440947771072,
|
|
"step": 4285,
|
|
"valid_targets_mean": 4865.5,
|
|
"valid_targets_min": 3851
|
|
},
|
|
{
|
|
"epoch": 6.8757016840417,
|
|
"grad_norm": 0.4626175195720664,
|
|
"learning_rate": 3.984772888417032e-08,
|
|
"loss": 0.2136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10935470461845398,
|
|
"step": 4290,
|
|
"valid_targets_mean": 4823.6,
|
|
"valid_targets_min": 4123
|
|
},
|
|
{
|
|
"epoch": 6.883720930232558,
|
|
"grad_norm": 0.7692866359577154,
|
|
"learning_rate": 3.4964757052671216e-08,
|
|
"loss": 0.2163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10009350627660751,
|
|
"step": 4295,
|
|
"valid_targets_mean": 4379.0,
|
|
"valid_targets_min": 3718
|
|
},
|
|
{
|
|
"epoch": 6.891740176423416,
|
|
"grad_norm": 0.4566029935247009,
|
|
"learning_rate": 3.0400574105415856e-08,
|
|
"loss": 0.2128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10371087491512299,
|
|
"step": 4300,
|
|
"valid_targets_mean": 4816.8,
|
|
"valid_targets_min": 3894
|
|
},
|
|
{
|
|
"epoch": 6.899759422614274,
|
|
"grad_norm": 0.4414303729061531,
|
|
"learning_rate": 2.615525292035459e-08,
|
|
"loss": 0.221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10968127846717834,
|
|
"step": 4305,
|
|
"valid_targets_mean": 4708.9,
|
|
"valid_targets_min": 3777
|
|
},
|
|
{
|
|
"epoch": 6.907778668805133,
|
|
"grad_norm": 0.46310995093441076,
|
|
"learning_rate": 2.222886128405266e-08,
|
|
"loss": 0.2112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10364767909049988,
|
|
"step": 4310,
|
|
"valid_targets_mean": 4779.6,
|
|
"valid_targets_min": 3971
|
|
},
|
|
{
|
|
"epoch": 6.9157979149959905,
|
|
"grad_norm": 0.5109754302496413,
|
|
"learning_rate": 1.8621461890617752e-08,
|
|
"loss": 0.2147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08972665667533875,
|
|
"step": 4315,
|
|
"valid_targets_mean": 4220.0,
|
|
"valid_targets_min": 3181
|
|
},
|
|
{
|
|
"epoch": 6.923817161186848,
|
|
"grad_norm": 0.48813026722878833,
|
|
"learning_rate": 1.5333112340687463e-08,
|
|
"loss": 0.2158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10656268149614334,
|
|
"step": 4320,
|
|
"valid_targets_mean": 4678.5,
|
|
"valid_targets_min": 3538
|
|
},
|
|
{
|
|
"epoch": 6.931836407377706,
|
|
"grad_norm": 0.4752558644102115,
|
|
"learning_rate": 1.2363865140518905e-08,
|
|
"loss": 0.2093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11378395557403564,
|
|
"step": 4325,
|
|
"valid_targets_mean": 4758.8,
|
|
"valid_targets_min": 3934
|
|
},
|
|
{
|
|
"epoch": 6.939855653568564,
|
|
"grad_norm": 0.4577974202012719,
|
|
"learning_rate": 9.713767701151621e-09,
|
|
"loss": 0.2109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10873262584209442,
|
|
"step": 4330,
|
|
"valid_targets_mean": 4641.9,
|
|
"valid_targets_min": 3967
|
|
},
|
|
{
|
|
"epoch": 6.947874899759423,
|
|
"grad_norm": 0.46465115257584827,
|
|
"learning_rate": 7.382862337641516e-09,
|
|
"loss": 0.2195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09418153762817383,
|
|
"step": 4335,
|
|
"valid_targets_mean": 4664.0,
|
|
"valid_targets_min": 3610
|
|
},
|
|
{
|
|
"epoch": 6.955894145950281,
|
|
"grad_norm": 0.4501345600203577,
|
|
"learning_rate": 5.371186268390283e-09,
|
|
"loss": 0.2098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12410692125558853,
|
|
"step": 4340,
|
|
"valid_targets_mean": 4647.4,
|
|
"valid_targets_min": 3209
|
|
},
|
|
{
|
|
"epoch": 6.963913392141139,
|
|
"grad_norm": 0.4645537735819525,
|
|
"learning_rate": 3.678771614550325e-09,
|
|
"loss": 0.2107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0899016410112381,
|
|
"step": 4345,
|
|
"valid_targets_mean": 4041.9,
|
|
"valid_targets_min": 3177
|
|
},
|
|
{
|
|
"epoch": 6.971932638331997,
|
|
"grad_norm": 0.47998855719509,
|
|
"learning_rate": 2.3056453995162763e-09,
|
|
"loss": 0.2116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1091904565691948,
|
|
"step": 4350,
|
|
"valid_targets_mean": 4942.9,
|
|
"valid_targets_min": 3265
|
|
},
|
|
{
|
|
"epoch": 6.979951884522855,
|
|
"grad_norm": 0.47530246504626505,
|
|
"learning_rate": 1.2518295484875708e-09,
|
|
"loss": 0.2041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09671194851398468,
|
|
"step": 4355,
|
|
"valid_targets_mean": 4432.5,
|
|
"valid_targets_min": 3204
|
|
},
|
|
{
|
|
"epoch": 6.987971130713713,
|
|
"grad_norm": 0.5062265981618611,
|
|
"learning_rate": 5.173408881198328e-10,
|
|
"loss": 0.2098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09857651591300964,
|
|
"step": 4360,
|
|
"valid_targets_mean": 4707.9,
|
|
"valid_targets_min": 3751
|
|
},
|
|
{
|
|
"epoch": 6.995990376904571,
|
|
"grad_norm": 0.43453956594420007,
|
|
"learning_rate": 1.0219114625398263e-10,
|
|
"loss": 0.2114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11844588071107864,
|
|
"step": 4365,
|
|
"valid_targets_mean": 4660.8,
|
|
"valid_targets_min": 3519
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2099834680557251,
|
|
"step": 4368,
|
|
"total_flos": 2.718655109289476e+18,
|
|
"train_loss": 0.2512514257169032,
|
|
"train_runtime": 55817.4038,
|
|
"train_samples_per_second": 1.251,
|
|
"train_steps_per_second": 0.078,
|
|
"valid_targets_mean": 4285.2,
|
|
"valid_targets_min": 3693
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4368,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2.718655109289476e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|