9651 lines
268 KiB
JSON
9651 lines
268 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4368,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.008012820512820512,
|
|
"grad_norm": 19.879163519026537,
|
|
"learning_rate": 3.661327231121282e-07,
|
|
"loss": 0.8272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8405616879463196,
|
|
"step": 5,
|
|
"valid_targets_mean": 3674.4,
|
|
"valid_targets_min": 1689
|
|
},
|
|
{
|
|
"epoch": 0.016025641025641024,
|
|
"grad_norm": 16.241622640859035,
|
|
"learning_rate": 8.237986270022884e-07,
|
|
"loss": 0.7792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8294421434402466,
|
|
"step": 10,
|
|
"valid_targets_mean": 3478.4,
|
|
"valid_targets_min": 320
|
|
},
|
|
{
|
|
"epoch": 0.02403846153846154,
|
|
"grad_norm": 13.729710057537002,
|
|
"learning_rate": 1.2814645308924487e-06,
|
|
"loss": 0.7564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7618365287780762,
|
|
"step": 15,
|
|
"valid_targets_mean": 4128.4,
|
|
"valid_targets_min": 1643
|
|
},
|
|
{
|
|
"epoch": 0.03205128205128205,
|
|
"grad_norm": 13.07548119294594,
|
|
"learning_rate": 1.7391304347826088e-06,
|
|
"loss": 0.76,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8027458190917969,
|
|
"step": 20,
|
|
"valid_targets_mean": 3266.2,
|
|
"valid_targets_min": 1715
|
|
},
|
|
{
|
|
"epoch": 0.04006410256410257,
|
|
"grad_norm": 8.584873330406953,
|
|
"learning_rate": 2.196796338672769e-06,
|
|
"loss": 0.6846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6333841681480408,
|
|
"step": 25,
|
|
"valid_targets_mean": 3774.2,
|
|
"valid_targets_min": 1811
|
|
},
|
|
{
|
|
"epoch": 0.04807692307692308,
|
|
"grad_norm": 4.675883893427763,
|
|
"learning_rate": 2.654462242562929e-06,
|
|
"loss": 0.6292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5961663126945496,
|
|
"step": 30,
|
|
"valid_targets_mean": 3380.9,
|
|
"valid_targets_min": 247
|
|
},
|
|
{
|
|
"epoch": 0.05608974358974359,
|
|
"grad_norm": 2.8044998329599147,
|
|
"learning_rate": 3.1121281464530894e-06,
|
|
"loss": 0.5932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5892564058303833,
|
|
"step": 35,
|
|
"valid_targets_mean": 3549.8,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 0.0641025641025641,
|
|
"grad_norm": 1.93974754269056,
|
|
"learning_rate": 3.56979405034325e-06,
|
|
"loss": 0.565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.534325897693634,
|
|
"step": 40,
|
|
"valid_targets_mean": 3281.8,
|
|
"valid_targets_min": 1729
|
|
},
|
|
{
|
|
"epoch": 0.07211538461538461,
|
|
"grad_norm": 1.7246213352284163,
|
|
"learning_rate": 4.0274599542334094e-06,
|
|
"loss": 0.57,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6538747549057007,
|
|
"step": 45,
|
|
"valid_targets_mean": 3394.2,
|
|
"valid_targets_min": 2063
|
|
},
|
|
{
|
|
"epoch": 0.08012820512820513,
|
|
"grad_norm": 1.2010022045969184,
|
|
"learning_rate": 4.48512585812357e-06,
|
|
"loss": 0.4973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4948710501194,
|
|
"step": 50,
|
|
"valid_targets_mean": 3626.2,
|
|
"valid_targets_min": 1684
|
|
},
|
|
{
|
|
"epoch": 0.08814102564102565,
|
|
"grad_norm": 1.0021386564368988,
|
|
"learning_rate": 4.94279176201373e-06,
|
|
"loss": 0.4607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4545423090457916,
|
|
"step": 55,
|
|
"valid_targets_mean": 3759.3,
|
|
"valid_targets_min": 1884
|
|
},
|
|
{
|
|
"epoch": 0.09615384615384616,
|
|
"grad_norm": 0.8828334064941602,
|
|
"learning_rate": 5.400457665903891e-06,
|
|
"loss": 0.5137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5267658233642578,
|
|
"step": 60,
|
|
"valid_targets_mean": 3870.8,
|
|
"valid_targets_min": 2195
|
|
},
|
|
{
|
|
"epoch": 0.10416666666666667,
|
|
"grad_norm": 0.8508838206539899,
|
|
"learning_rate": 5.858123569794051e-06,
|
|
"loss": 0.4561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5161157250404358,
|
|
"step": 65,
|
|
"valid_targets_mean": 3584.2,
|
|
"valid_targets_min": 1879
|
|
},
|
|
{
|
|
"epoch": 0.11217948717948718,
|
|
"grad_norm": 0.8490931245749613,
|
|
"learning_rate": 6.31578947368421e-06,
|
|
"loss": 0.4702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43000292778015137,
|
|
"step": 70,
|
|
"valid_targets_mean": 2902.6,
|
|
"valid_targets_min": 418
|
|
},
|
|
{
|
|
"epoch": 0.1201923076923077,
|
|
"grad_norm": 0.7905351709375654,
|
|
"learning_rate": 6.773455377574372e-06,
|
|
"loss": 0.459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4744901657104492,
|
|
"step": 75,
|
|
"valid_targets_mean": 2864.9,
|
|
"valid_targets_min": 341
|
|
},
|
|
{
|
|
"epoch": 0.1282051282051282,
|
|
"grad_norm": 0.6208154753019489,
|
|
"learning_rate": 7.231121281464531e-06,
|
|
"loss": 0.3937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3898765444755554,
|
|
"step": 80,
|
|
"valid_targets_mean": 4238.9,
|
|
"valid_targets_min": 2064
|
|
},
|
|
{
|
|
"epoch": 0.1362179487179487,
|
|
"grad_norm": 0.6498817168712755,
|
|
"learning_rate": 7.688787185354691e-06,
|
|
"loss": 0.4217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4282737374305725,
|
|
"step": 85,
|
|
"valid_targets_mean": 3655.1,
|
|
"valid_targets_min": 2268
|
|
},
|
|
{
|
|
"epoch": 0.14423076923076922,
|
|
"grad_norm": 0.6460440529509714,
|
|
"learning_rate": 8.146453089244852e-06,
|
|
"loss": 0.4334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.391543447971344,
|
|
"step": 90,
|
|
"valid_targets_mean": 3515.1,
|
|
"valid_targets_min": 2125
|
|
},
|
|
{
|
|
"epoch": 0.15224358974358973,
|
|
"grad_norm": 0.6964132469349131,
|
|
"learning_rate": 8.604118993135013e-06,
|
|
"loss": 0.4022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39982515573501587,
|
|
"step": 95,
|
|
"valid_targets_mean": 3153.3,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 0.16025641025641027,
|
|
"grad_norm": 0.7900128089325914,
|
|
"learning_rate": 9.061784897025172e-06,
|
|
"loss": 0.4219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4130443036556244,
|
|
"step": 100,
|
|
"valid_targets_mean": 3466.8,
|
|
"valid_targets_min": 1967
|
|
},
|
|
{
|
|
"epoch": 0.16826923076923078,
|
|
"grad_norm": 0.7439846618966071,
|
|
"learning_rate": 9.519450800915333e-06,
|
|
"loss": 0.3881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4345982074737549,
|
|
"step": 105,
|
|
"valid_targets_mean": 3018.2,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 0.1762820512820513,
|
|
"grad_norm": 0.7102582754236836,
|
|
"learning_rate": 9.977116704805492e-06,
|
|
"loss": 0.4092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43670815229415894,
|
|
"step": 110,
|
|
"valid_targets_mean": 3333.6,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 0.1842948717948718,
|
|
"grad_norm": 0.7593872791968305,
|
|
"learning_rate": 1.0434782608695653e-05,
|
|
"loss": 0.3711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3753435015678406,
|
|
"step": 115,
|
|
"valid_targets_mean": 2670.1,
|
|
"valid_targets_min": 316
|
|
},
|
|
{
|
|
"epoch": 0.19230769230769232,
|
|
"grad_norm": 0.58061569172075,
|
|
"learning_rate": 1.0892448512585814e-05,
|
|
"loss": 0.3748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37627002596855164,
|
|
"step": 120,
|
|
"valid_targets_mean": 3750.1,
|
|
"valid_targets_min": 2147
|
|
},
|
|
{
|
|
"epoch": 0.20032051282051283,
|
|
"grad_norm": 0.7535485276584365,
|
|
"learning_rate": 1.1350114416475973e-05,
|
|
"loss": 0.4141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4423384666442871,
|
|
"step": 125,
|
|
"valid_targets_mean": 2803.3,
|
|
"valid_targets_min": 900
|
|
},
|
|
{
|
|
"epoch": 0.20833333333333334,
|
|
"grad_norm": 0.6813290969582381,
|
|
"learning_rate": 1.1807780320366134e-05,
|
|
"loss": 0.3891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4192976951599121,
|
|
"step": 130,
|
|
"valid_targets_mean": 3172.6,
|
|
"valid_targets_min": 2085
|
|
},
|
|
{
|
|
"epoch": 0.21634615384615385,
|
|
"grad_norm": 0.5032182735262825,
|
|
"learning_rate": 1.2265446224256295e-05,
|
|
"loss": 0.3456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3107292056083679,
|
|
"step": 135,
|
|
"valid_targets_mean": 4446.8,
|
|
"valid_targets_min": 1607
|
|
},
|
|
{
|
|
"epoch": 0.22435897435897437,
|
|
"grad_norm": 0.5595984890274238,
|
|
"learning_rate": 1.2723112128146454e-05,
|
|
"loss": 0.3679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37036430835723877,
|
|
"step": 140,
|
|
"valid_targets_mean": 3787.6,
|
|
"valid_targets_min": 1870
|
|
},
|
|
{
|
|
"epoch": 0.23237179487179488,
|
|
"grad_norm": 0.6093079141585656,
|
|
"learning_rate": 1.3180778032036615e-05,
|
|
"loss": 0.3286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3455902338027954,
|
|
"step": 145,
|
|
"valid_targets_mean": 3455.2,
|
|
"valid_targets_min": 2049
|
|
},
|
|
{
|
|
"epoch": 0.2403846153846154,
|
|
"grad_norm": 0.6053924365486686,
|
|
"learning_rate": 1.3638443935926776e-05,
|
|
"loss": 0.3574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31911373138427734,
|
|
"step": 150,
|
|
"valid_targets_mean": 3641.8,
|
|
"valid_targets_min": 379
|
|
},
|
|
{
|
|
"epoch": 0.2483974358974359,
|
|
"grad_norm": 0.610771646331601,
|
|
"learning_rate": 1.4096109839816933e-05,
|
|
"loss": 0.3313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3442545235157013,
|
|
"step": 155,
|
|
"valid_targets_mean": 3966.2,
|
|
"valid_targets_min": 1755
|
|
},
|
|
{
|
|
"epoch": 0.2564102564102564,
|
|
"grad_norm": 0.6293411508598329,
|
|
"learning_rate": 1.4553775743707096e-05,
|
|
"loss": 0.3544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34595659375190735,
|
|
"step": 160,
|
|
"valid_targets_mean": 3334.4,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 0.2644230769230769,
|
|
"grad_norm": 0.6171797139949151,
|
|
"learning_rate": 1.5011441647597256e-05,
|
|
"loss": 0.3211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2953029274940491,
|
|
"step": 165,
|
|
"valid_targets_mean": 3222.6,
|
|
"valid_targets_min": 440
|
|
},
|
|
{
|
|
"epoch": 0.2724358974358974,
|
|
"grad_norm": 0.6190047507550094,
|
|
"learning_rate": 1.5469107551487414e-05,
|
|
"loss": 0.3297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35699936747550964,
|
|
"step": 170,
|
|
"valid_targets_mean": 3371.2,
|
|
"valid_targets_min": 1543
|
|
},
|
|
{
|
|
"epoch": 0.28044871794871795,
|
|
"grad_norm": 0.7333222604739893,
|
|
"learning_rate": 1.5926773455377575e-05,
|
|
"loss": 0.3601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3310508131980896,
|
|
"step": 175,
|
|
"valid_targets_mean": 2892.6,
|
|
"valid_targets_min": 254
|
|
},
|
|
{
|
|
"epoch": 0.28846153846153844,
|
|
"grad_norm": 0.5628086160657125,
|
|
"learning_rate": 1.6384439359267736e-05,
|
|
"loss": 0.3672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30004677176475525,
|
|
"step": 180,
|
|
"valid_targets_mean": 3743.0,
|
|
"valid_targets_min": 1925
|
|
},
|
|
{
|
|
"epoch": 0.296474358974359,
|
|
"grad_norm": 0.6768840470894368,
|
|
"learning_rate": 1.6842105263157896e-05,
|
|
"loss": 0.3621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3543248176574707,
|
|
"step": 185,
|
|
"valid_targets_mean": 3018.8,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 0.30448717948717946,
|
|
"grad_norm": 0.5051527163277465,
|
|
"learning_rate": 1.7299771167048057e-05,
|
|
"loss": 0.3476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3271610736846924,
|
|
"step": 190,
|
|
"valid_targets_mean": 4514.3,
|
|
"valid_targets_min": 1734
|
|
},
|
|
{
|
|
"epoch": 0.3125,
|
|
"grad_norm": 0.5888252395327391,
|
|
"learning_rate": 1.7757437070938218e-05,
|
|
"loss": 0.3522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3883243203163147,
|
|
"step": 195,
|
|
"valid_targets_mean": 3935.9,
|
|
"valid_targets_min": 351
|
|
},
|
|
{
|
|
"epoch": 0.32051282051282054,
|
|
"grad_norm": 0.6576488120247566,
|
|
"learning_rate": 1.8215102974828376e-05,
|
|
"loss": 0.3276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.318558931350708,
|
|
"step": 200,
|
|
"valid_targets_mean": 3090.2,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 0.328525641025641,
|
|
"grad_norm": 0.6235407940452166,
|
|
"learning_rate": 1.8672768878718537e-05,
|
|
"loss": 0.3255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3435075879096985,
|
|
"step": 205,
|
|
"valid_targets_mean": 3503.6,
|
|
"valid_targets_min": 2068
|
|
},
|
|
{
|
|
"epoch": 0.33653846153846156,
|
|
"grad_norm": 0.6927027884314421,
|
|
"learning_rate": 1.9130434782608697e-05,
|
|
"loss": 0.3368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35067087411880493,
|
|
"step": 210,
|
|
"valid_targets_mean": 3377.3,
|
|
"valid_targets_min": 2069
|
|
},
|
|
{
|
|
"epoch": 0.34455128205128205,
|
|
"grad_norm": 0.659190951187864,
|
|
"learning_rate": 1.9588100686498858e-05,
|
|
"loss": 0.3524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.373557448387146,
|
|
"step": 215,
|
|
"valid_targets_mean": 3157.6,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 0.3525641025641026,
|
|
"grad_norm": 0.7225360744876216,
|
|
"learning_rate": 2.004576659038902e-05,
|
|
"loss": 0.3242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35884422063827515,
|
|
"step": 220,
|
|
"valid_targets_mean": 2915.2,
|
|
"valid_targets_min": 1351
|
|
},
|
|
{
|
|
"epoch": 0.3605769230769231,
|
|
"grad_norm": 0.6968676707115277,
|
|
"learning_rate": 2.050343249427918e-05,
|
|
"loss": 0.3248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34566766023635864,
|
|
"step": 225,
|
|
"valid_targets_mean": 3111.0,
|
|
"valid_targets_min": 1926
|
|
},
|
|
{
|
|
"epoch": 0.3685897435897436,
|
|
"grad_norm": 0.6527690721075753,
|
|
"learning_rate": 2.0961098398169337e-05,
|
|
"loss": 0.3261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.348101943731308,
|
|
"step": 230,
|
|
"valid_targets_mean": 3777.4,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 0.3766025641025641,
|
|
"grad_norm": 0.5509178981095164,
|
|
"learning_rate": 2.14187643020595e-05,
|
|
"loss": 0.336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3583675026893616,
|
|
"step": 235,
|
|
"valid_targets_mean": 4796.3,
|
|
"valid_targets_min": 1766
|
|
},
|
|
{
|
|
"epoch": 0.38461538461538464,
|
|
"grad_norm": 0.6953704961476025,
|
|
"learning_rate": 2.187643020594966e-05,
|
|
"loss": 0.3189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33428576588630676,
|
|
"step": 240,
|
|
"valid_targets_mean": 3084.4,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 0.3926282051282051,
|
|
"grad_norm": 0.6779641767614547,
|
|
"learning_rate": 2.2334096109839817e-05,
|
|
"loss": 0.3149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32355090975761414,
|
|
"step": 245,
|
|
"valid_targets_mean": 3073.9,
|
|
"valid_targets_min": 317
|
|
},
|
|
{
|
|
"epoch": 0.40064102564102566,
|
|
"grad_norm": 0.5552383550989171,
|
|
"learning_rate": 2.279176201372998e-05,
|
|
"loss": 0.3455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28732290863990784,
|
|
"step": 250,
|
|
"valid_targets_mean": 3637.4,
|
|
"valid_targets_min": 1476
|
|
},
|
|
{
|
|
"epoch": 0.40865384615384615,
|
|
"grad_norm": 0.5245790306333418,
|
|
"learning_rate": 2.3249427917620138e-05,
|
|
"loss": 0.2893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2623603641986847,
|
|
"step": 255,
|
|
"valid_targets_mean": 5096.8,
|
|
"valid_targets_min": 2403
|
|
},
|
|
{
|
|
"epoch": 0.4166666666666667,
|
|
"grad_norm": 0.6195113913098604,
|
|
"learning_rate": 2.37070938215103e-05,
|
|
"loss": 0.3117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30828219652175903,
|
|
"step": 260,
|
|
"valid_targets_mean": 3688.0,
|
|
"valid_targets_min": 1285
|
|
},
|
|
{
|
|
"epoch": 0.42467948717948717,
|
|
"grad_norm": 0.6048964562434808,
|
|
"learning_rate": 2.4164759725400463e-05,
|
|
"loss": 0.3035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3320136070251465,
|
|
"step": 265,
|
|
"valid_targets_mean": 4328.0,
|
|
"valid_targets_min": 469
|
|
},
|
|
{
|
|
"epoch": 0.4326923076923077,
|
|
"grad_norm": 0.7734208396405547,
|
|
"learning_rate": 2.462242562929062e-05,
|
|
"loss": 0.3183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33223971724510193,
|
|
"step": 270,
|
|
"valid_targets_mean": 3306.9,
|
|
"valid_targets_min": 383
|
|
},
|
|
{
|
|
"epoch": 0.4407051282051282,
|
|
"grad_norm": 0.693343532490161,
|
|
"learning_rate": 2.508009153318078e-05,
|
|
"loss": 0.3216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36366701126098633,
|
|
"step": 275,
|
|
"valid_targets_mean": 3546.5,
|
|
"valid_targets_min": 2021
|
|
},
|
|
{
|
|
"epoch": 0.44871794871794873,
|
|
"grad_norm": 0.5065572861491737,
|
|
"learning_rate": 2.5537757437070943e-05,
|
|
"loss": 0.3094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26778098940849304,
|
|
"step": 280,
|
|
"valid_targets_mean": 4116.9,
|
|
"valid_targets_min": 1896
|
|
},
|
|
{
|
|
"epoch": 0.4567307692307692,
|
|
"grad_norm": 0.7350400440914835,
|
|
"learning_rate": 2.59954233409611e-05,
|
|
"loss": 0.3018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32177820801734924,
|
|
"step": 285,
|
|
"valid_targets_mean": 3166.9,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 0.46474358974358976,
|
|
"grad_norm": 0.6174748140430125,
|
|
"learning_rate": 2.645308924485126e-05,
|
|
"loss": 0.3169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31665194034576416,
|
|
"step": 290,
|
|
"valid_targets_mean": 3801.4,
|
|
"valid_targets_min": 511
|
|
},
|
|
{
|
|
"epoch": 0.47275641025641024,
|
|
"grad_norm": 0.6946647941876152,
|
|
"learning_rate": 2.6910755148741422e-05,
|
|
"loss": 0.301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32155466079711914,
|
|
"step": 295,
|
|
"valid_targets_mean": 3313.8,
|
|
"valid_targets_min": 396
|
|
},
|
|
{
|
|
"epoch": 0.4807692307692308,
|
|
"grad_norm": 0.5710354149261667,
|
|
"learning_rate": 2.7368421052631583e-05,
|
|
"loss": 0.3263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25513502955436707,
|
|
"step": 300,
|
|
"valid_targets_mean": 3716.9,
|
|
"valid_targets_min": 2064
|
|
},
|
|
{
|
|
"epoch": 0.48878205128205127,
|
|
"grad_norm": 0.7373833498036741,
|
|
"learning_rate": 2.782608695652174e-05,
|
|
"loss": 0.3068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.289334237575531,
|
|
"step": 305,
|
|
"valid_targets_mean": 3066.1,
|
|
"valid_targets_min": 432
|
|
},
|
|
{
|
|
"epoch": 0.4967948717948718,
|
|
"grad_norm": 0.5591989242192875,
|
|
"learning_rate": 2.8283752860411904e-05,
|
|
"loss": 0.3046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27973124384880066,
|
|
"step": 310,
|
|
"valid_targets_mean": 3920.6,
|
|
"valid_targets_min": 450
|
|
},
|
|
{
|
|
"epoch": 0.5048076923076923,
|
|
"grad_norm": 0.6782061420012974,
|
|
"learning_rate": 2.8741418764302062e-05,
|
|
"loss": 0.3162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34713077545166016,
|
|
"step": 315,
|
|
"valid_targets_mean": 3371.8,
|
|
"valid_targets_min": 1638
|
|
},
|
|
{
|
|
"epoch": 0.5128205128205128,
|
|
"grad_norm": 0.6699054785113124,
|
|
"learning_rate": 2.9199084668192223e-05,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23538798093795776,
|
|
"step": 320,
|
|
"valid_targets_mean": 3664.9,
|
|
"valid_targets_min": 2039
|
|
},
|
|
{
|
|
"epoch": 0.5208333333333334,
|
|
"grad_norm": 0.6116276917307956,
|
|
"learning_rate": 2.9656750572082384e-05,
|
|
"loss": 0.3189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3189501166343689,
|
|
"step": 325,
|
|
"valid_targets_mean": 3389.8,
|
|
"valid_targets_min": 1667
|
|
},
|
|
{
|
|
"epoch": 0.5288461538461539,
|
|
"grad_norm": 0.5857592319111676,
|
|
"learning_rate": 3.0114416475972544e-05,
|
|
"loss": 0.2898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23993510007858276,
|
|
"step": 330,
|
|
"valid_targets_mean": 3283.4,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 0.5368589743589743,
|
|
"grad_norm": 0.6736826581101825,
|
|
"learning_rate": 3.05720823798627e-05,
|
|
"loss": 0.3304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36864790320396423,
|
|
"step": 335,
|
|
"valid_targets_mean": 3316.4,
|
|
"valid_targets_min": 1653
|
|
},
|
|
{
|
|
"epoch": 0.5448717948717948,
|
|
"grad_norm": 0.7664383053440278,
|
|
"learning_rate": 3.102974828375286e-05,
|
|
"loss": 0.3013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2832982838153839,
|
|
"step": 340,
|
|
"valid_targets_mean": 3277.5,
|
|
"valid_targets_min": 1646
|
|
},
|
|
{
|
|
"epoch": 0.5528846153846154,
|
|
"grad_norm": 0.6889272863397136,
|
|
"learning_rate": 3.1487414187643024e-05,
|
|
"loss": 0.2838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2998211979866028,
|
|
"step": 345,
|
|
"valid_targets_mean": 3290.5,
|
|
"valid_targets_min": 407
|
|
},
|
|
{
|
|
"epoch": 0.5608974358974359,
|
|
"grad_norm": 0.6193116923226962,
|
|
"learning_rate": 3.1945080091533184e-05,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30396538972854614,
|
|
"step": 350,
|
|
"valid_targets_mean": 3548.9,
|
|
"valid_targets_min": 1571
|
|
},
|
|
{
|
|
"epoch": 0.5689102564102564,
|
|
"grad_norm": 0.6347900047328812,
|
|
"learning_rate": 3.240274599542334e-05,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25522372126579285,
|
|
"step": 355,
|
|
"valid_targets_mean": 3351.9,
|
|
"valid_targets_min": 1902
|
|
},
|
|
{
|
|
"epoch": 0.5769230769230769,
|
|
"grad_norm": 0.573848093038453,
|
|
"learning_rate": 3.2860411899313506e-05,
|
|
"loss": 0.3059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26962196826934814,
|
|
"step": 360,
|
|
"valid_targets_mean": 4155.2,
|
|
"valid_targets_min": 1606
|
|
},
|
|
{
|
|
"epoch": 0.5849358974358975,
|
|
"grad_norm": 0.6857168008513769,
|
|
"learning_rate": 3.331807780320366e-05,
|
|
"loss": 0.3306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34512871503829956,
|
|
"step": 365,
|
|
"valid_targets_mean": 3127.6,
|
|
"valid_targets_min": 1484
|
|
},
|
|
{
|
|
"epoch": 0.592948717948718,
|
|
"grad_norm": 0.6299501708112862,
|
|
"learning_rate": 3.377574370709382e-05,
|
|
"loss": 0.3045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.281450092792511,
|
|
"step": 370,
|
|
"valid_targets_mean": 3588.4,
|
|
"valid_targets_min": 2335
|
|
},
|
|
{
|
|
"epoch": 0.6009615384615384,
|
|
"grad_norm": 0.650842919082811,
|
|
"learning_rate": 3.423340961098399e-05,
|
|
"loss": 0.283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2898961901664734,
|
|
"step": 375,
|
|
"valid_targets_mean": 4018.8,
|
|
"valid_targets_min": 1989
|
|
},
|
|
{
|
|
"epoch": 0.6089743589743589,
|
|
"grad_norm": 0.5459269291399794,
|
|
"learning_rate": 3.469107551487414e-05,
|
|
"loss": 0.2683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25225377082824707,
|
|
"step": 380,
|
|
"valid_targets_mean": 3866.1,
|
|
"valid_targets_min": 2103
|
|
},
|
|
{
|
|
"epoch": 0.6169871794871795,
|
|
"grad_norm": 0.615096370203391,
|
|
"learning_rate": 3.5148741418764304e-05,
|
|
"loss": 0.314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3192776143550873,
|
|
"step": 385,
|
|
"valid_targets_mean": 3499.0,
|
|
"valid_targets_min": 1815
|
|
},
|
|
{
|
|
"epoch": 0.625,
|
|
"grad_norm": 0.5876066893036459,
|
|
"learning_rate": 3.5606407322654464e-05,
|
|
"loss": 0.3292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40550702810287476,
|
|
"step": 390,
|
|
"valid_targets_mean": 4450.1,
|
|
"valid_targets_min": 1736
|
|
},
|
|
{
|
|
"epoch": 0.6330128205128205,
|
|
"grad_norm": 0.636103916586035,
|
|
"learning_rate": 3.6064073226544625e-05,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3379051685333252,
|
|
"step": 395,
|
|
"valid_targets_mean": 3188.5,
|
|
"valid_targets_min": 388
|
|
},
|
|
{
|
|
"epoch": 0.6410256410256411,
|
|
"grad_norm": 0.5330337179042688,
|
|
"learning_rate": 3.6521739130434786e-05,
|
|
"loss": 0.2867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22062329947948456,
|
|
"step": 400,
|
|
"valid_targets_mean": 3464.9,
|
|
"valid_targets_min": 445
|
|
},
|
|
{
|
|
"epoch": 0.6490384615384616,
|
|
"grad_norm": 0.5889308075943929,
|
|
"learning_rate": 3.697940503432495e-05,
|
|
"loss": 0.2936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30935603380203247,
|
|
"step": 405,
|
|
"valid_targets_mean": 3471.9,
|
|
"valid_targets_min": 1535
|
|
},
|
|
{
|
|
"epoch": 0.657051282051282,
|
|
"grad_norm": 0.5895846290146585,
|
|
"learning_rate": 3.743707093821511e-05,
|
|
"loss": 0.2945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2988286018371582,
|
|
"step": 410,
|
|
"valid_targets_mean": 3568.8,
|
|
"valid_targets_min": 1891
|
|
},
|
|
{
|
|
"epoch": 0.6650641025641025,
|
|
"grad_norm": 0.6156981066113356,
|
|
"learning_rate": 3.789473684210526e-05,
|
|
"loss": 0.2943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2502792775630951,
|
|
"step": 415,
|
|
"valid_targets_mean": 3166.1,
|
|
"valid_targets_min": 264
|
|
},
|
|
{
|
|
"epoch": 0.6730769230769231,
|
|
"grad_norm": 0.6129692991693171,
|
|
"learning_rate": 3.835240274599543e-05,
|
|
"loss": 0.2829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3190169930458069,
|
|
"step": 420,
|
|
"valid_targets_mean": 3300.9,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 0.6810897435897436,
|
|
"grad_norm": 0.5946384971781778,
|
|
"learning_rate": 3.8810068649885584e-05,
|
|
"loss": 0.2926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28893283009529114,
|
|
"step": 425,
|
|
"valid_targets_mean": 3541.3,
|
|
"valid_targets_min": 2064
|
|
},
|
|
{
|
|
"epoch": 0.6891025641025641,
|
|
"grad_norm": 0.5928560768818655,
|
|
"learning_rate": 3.9267734553775745e-05,
|
|
"loss": 0.2969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30574142932891846,
|
|
"step": 430,
|
|
"valid_targets_mean": 3345.2,
|
|
"valid_targets_min": 1593
|
|
},
|
|
{
|
|
"epoch": 0.6971153846153846,
|
|
"grad_norm": 0.6686330453947963,
|
|
"learning_rate": 3.9725400457665905e-05,
|
|
"loss": 0.2876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2704697847366333,
|
|
"step": 435,
|
|
"valid_targets_mean": 3345.6,
|
|
"valid_targets_min": 317
|
|
},
|
|
{
|
|
"epoch": 0.7051282051282052,
|
|
"grad_norm": 0.8157234796308768,
|
|
"learning_rate": 3.999997445219712e-05,
|
|
"loss": 0.2731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2545115351676941,
|
|
"step": 440,
|
|
"valid_targets_mean": 3026.8,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 0.7131410256410257,
|
|
"grad_norm": 0.6376986115267951,
|
|
"learning_rate": 3.999968704016428e-05,
|
|
"loss": 0.3057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31646421551704407,
|
|
"step": 445,
|
|
"valid_targets_mean": 3348.3,
|
|
"valid_targets_min": 1511
|
|
},
|
|
{
|
|
"epoch": 0.7211538461538461,
|
|
"grad_norm": 0.6242518345573395,
|
|
"learning_rate": 3.9999080285949514e-05,
|
|
"loss": 0.2815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2892857491970062,
|
|
"step": 450,
|
|
"valid_targets_mean": 4161.2,
|
|
"valid_targets_min": 384
|
|
},
|
|
{
|
|
"epoch": 0.7291666666666666,
|
|
"grad_norm": 0.6432850525140379,
|
|
"learning_rate": 3.999815419924108e-05,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2984856069087982,
|
|
"step": 455,
|
|
"valid_targets_mean": 3093.6,
|
|
"valid_targets_min": 367
|
|
},
|
|
{
|
|
"epoch": 0.7371794871794872,
|
|
"grad_norm": 0.6612758056837322,
|
|
"learning_rate": 3.999690879482614e-05,
|
|
"loss": 0.3013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33815231919288635,
|
|
"step": 460,
|
|
"valid_targets_mean": 3653.8,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 0.7451923076923077,
|
|
"grad_norm": 0.72681333588269,
|
|
"learning_rate": 3.9995344092590506e-05,
|
|
"loss": 0.2861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31242284178733826,
|
|
"step": 465,
|
|
"valid_targets_mean": 2851.2,
|
|
"valid_targets_min": 376
|
|
},
|
|
{
|
|
"epoch": 0.7532051282051282,
|
|
"grad_norm": 0.6194654033752973,
|
|
"learning_rate": 3.999346011751835e-05,
|
|
"loss": 0.2837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2921714782714844,
|
|
"step": 470,
|
|
"valid_targets_mean": 3197.2,
|
|
"valid_targets_min": 831
|
|
},
|
|
{
|
|
"epoch": 0.7612179487179487,
|
|
"grad_norm": 0.4808513607264086,
|
|
"learning_rate": 3.999125689969176e-05,
|
|
"loss": 0.2757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23430654406547546,
|
|
"step": 475,
|
|
"valid_targets_mean": 4196.6,
|
|
"valid_targets_min": 1727
|
|
},
|
|
{
|
|
"epoch": 0.7692307692307693,
|
|
"grad_norm": 0.5941447275387179,
|
|
"learning_rate": 3.9988734474290324e-05,
|
|
"loss": 0.276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2832925021648407,
|
|
"step": 480,
|
|
"valid_targets_mean": 3244.9,
|
|
"valid_targets_min": 321
|
|
},
|
|
{
|
|
"epoch": 0.7772435897435898,
|
|
"grad_norm": 0.5783663517369084,
|
|
"learning_rate": 3.9985892881590513e-05,
|
|
"loss": 0.3073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3123394846916199,
|
|
"step": 485,
|
|
"valid_targets_mean": 4001.1,
|
|
"valid_targets_min": 352
|
|
},
|
|
{
|
|
"epoch": 0.7852564102564102,
|
|
"grad_norm": 0.6290196783075886,
|
|
"learning_rate": 3.9982732166965054e-05,
|
|
"loss": 0.2838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26579058170318604,
|
|
"step": 490,
|
|
"valid_targets_mean": 3206.8,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 0.7932692307692307,
|
|
"grad_norm": 0.5515059267107406,
|
|
"learning_rate": 3.997925238088221e-05,
|
|
"loss": 0.3046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2969035804271698,
|
|
"step": 495,
|
|
"valid_targets_mean": 3815.4,
|
|
"valid_targets_min": 2430
|
|
},
|
|
{
|
|
"epoch": 0.8012820512820513,
|
|
"grad_norm": 0.6216409807648683,
|
|
"learning_rate": 3.9975453578904975e-05,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29659634828567505,
|
|
"step": 500,
|
|
"valid_targets_mean": 3265.8,
|
|
"valid_targets_min": 2020
|
|
},
|
|
{
|
|
"epoch": 0.8092948717948718,
|
|
"grad_norm": 0.5969522650575887,
|
|
"learning_rate": 3.997133582169018e-05,
|
|
"loss": 0.299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3164765238761902,
|
|
"step": 505,
|
|
"valid_targets_mean": 3195.0,
|
|
"valid_targets_min": 1424
|
|
},
|
|
{
|
|
"epoch": 0.8173076923076923,
|
|
"grad_norm": 0.5979583258428498,
|
|
"learning_rate": 3.996689917498754e-05,
|
|
"loss": 0.3014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30864858627319336,
|
|
"step": 510,
|
|
"valid_targets_mean": 3398.8,
|
|
"valid_targets_min": 403
|
|
},
|
|
{
|
|
"epoch": 0.8253205128205128,
|
|
"grad_norm": 0.6316919330012107,
|
|
"learning_rate": 3.9962143709638585e-05,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30174243450164795,
|
|
"step": 515,
|
|
"valid_targets_mean": 3205.8,
|
|
"valid_targets_min": 898
|
|
},
|
|
{
|
|
"epoch": 0.8333333333333334,
|
|
"grad_norm": 0.5430126120597503,
|
|
"learning_rate": 3.995706950157554e-05,
|
|
"loss": 0.2914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23059752583503723,
|
|
"step": 520,
|
|
"valid_targets_mean": 3918.1,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 0.8413461538461539,
|
|
"grad_norm": 0.7031490460182316,
|
|
"learning_rate": 3.995167663182008e-05,
|
|
"loss": 0.3043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33885687589645386,
|
|
"step": 525,
|
|
"valid_targets_mean": 3232.4,
|
|
"valid_targets_min": 1682
|
|
},
|
|
{
|
|
"epoch": 0.8493589743589743,
|
|
"grad_norm": 0.519767450790238,
|
|
"learning_rate": 3.994596518648214e-05,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27818453311920166,
|
|
"step": 530,
|
|
"valid_targets_mean": 3525.6,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 0.8573717948717948,
|
|
"grad_norm": 0.563048053238403,
|
|
"learning_rate": 3.993993525675838e-05,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2743377089500427,
|
|
"step": 535,
|
|
"valid_targets_mean": 3589.6,
|
|
"valid_targets_min": 380
|
|
},
|
|
{
|
|
"epoch": 0.8653846153846154,
|
|
"grad_norm": 0.6322924611591424,
|
|
"learning_rate": 3.993358693893086e-05,
|
|
"loss": 0.2886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31251460313796997,
|
|
"step": 540,
|
|
"valid_targets_mean": 2750.7,
|
|
"valid_targets_min": 288
|
|
},
|
|
{
|
|
"epoch": 0.8733974358974359,
|
|
"grad_norm": 0.5801810360218201,
|
|
"learning_rate": 3.9926920334365457e-05,
|
|
"loss": 0.3023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3175027668476105,
|
|
"step": 545,
|
|
"valid_targets_mean": 3746.8,
|
|
"valid_targets_min": 1828
|
|
},
|
|
{
|
|
"epoch": 0.8814102564102564,
|
|
"grad_norm": 0.5092914875070896,
|
|
"learning_rate": 3.991993554951023e-05,
|
|
"loss": 0.2896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26988404989242554,
|
|
"step": 550,
|
|
"valid_targets_mean": 3991.2,
|
|
"valid_targets_min": 1851
|
|
},
|
|
{
|
|
"epoch": 0.8894230769230769,
|
|
"grad_norm": 0.5005063451467932,
|
|
"learning_rate": 3.991263269589376e-05,
|
|
"loss": 0.2507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2650403380393982,
|
|
"step": 555,
|
|
"valid_targets_mean": 3498.0,
|
|
"valid_targets_min": 1983
|
|
},
|
|
{
|
|
"epoch": 0.8974358974358975,
|
|
"grad_norm": 0.6082126141999025,
|
|
"learning_rate": 3.990501189012332e-05,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27788347005844116,
|
|
"step": 560,
|
|
"valid_targets_mean": 3092.6,
|
|
"valid_targets_min": 354
|
|
},
|
|
{
|
|
"epoch": 0.905448717948718,
|
|
"grad_norm": 0.5009998118620846,
|
|
"learning_rate": 3.989707325388305e-05,
|
|
"loss": 0.2726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2369709610939026,
|
|
"step": 565,
|
|
"valid_targets_mean": 4009.0,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 0.9134615384615384,
|
|
"grad_norm": 0.6500666574085029,
|
|
"learning_rate": 3.9888816913932016e-05,
|
|
"loss": 0.2986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28920280933380127,
|
|
"step": 570,
|
|
"valid_targets_mean": 3343.9,
|
|
"valid_targets_min": 2076
|
|
},
|
|
{
|
|
"epoch": 0.9214743589743589,
|
|
"grad_norm": 0.512455392251192,
|
|
"learning_rate": 3.988024300210215e-05,
|
|
"loss": 0.2624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2702721059322357,
|
|
"step": 575,
|
|
"valid_targets_mean": 3814.2,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 0.9294871794871795,
|
|
"grad_norm": 0.4624357477991238,
|
|
"learning_rate": 3.987135165529618e-05,
|
|
"loss": 0.2864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.226173996925354,
|
|
"step": 580,
|
|
"valid_targets_mean": 3846.5,
|
|
"valid_targets_min": 2247
|
|
},
|
|
{
|
|
"epoch": 0.9375,
|
|
"grad_norm": 0.4416044819347505,
|
|
"learning_rate": 3.9862143015485446e-05,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23104633390903473,
|
|
"step": 585,
|
|
"valid_targets_mean": 4618.9,
|
|
"valid_targets_min": 1440
|
|
},
|
|
{
|
|
"epoch": 0.9455128205128205,
|
|
"grad_norm": 0.6903001095230034,
|
|
"learning_rate": 3.985261722970759e-05,
|
|
"loss": 0.2938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31117039918899536,
|
|
"step": 590,
|
|
"valid_targets_mean": 2427.8,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 0.9535256410256411,
|
|
"grad_norm": 0.5555874802584456,
|
|
"learning_rate": 3.984277445006426e-05,
|
|
"loss": 0.25,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.264557421207428,
|
|
"step": 595,
|
|
"valid_targets_mean": 3561.6,
|
|
"valid_targets_min": 1795
|
|
},
|
|
{
|
|
"epoch": 0.9615384615384616,
|
|
"grad_norm": 0.5008464052732203,
|
|
"learning_rate": 3.9832614833718654e-05,
|
|
"loss": 0.2679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2578531503677368,
|
|
"step": 600,
|
|
"valid_targets_mean": 3733.9,
|
|
"valid_targets_min": 2536
|
|
},
|
|
{
|
|
"epoch": 0.969551282051282,
|
|
"grad_norm": 0.5124597026534315,
|
|
"learning_rate": 3.9822138542893005e-05,
|
|
"loss": 0.2977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29258236289024353,
|
|
"step": 605,
|
|
"valid_targets_mean": 4174.6,
|
|
"valid_targets_min": 369
|
|
},
|
|
{
|
|
"epoch": 0.9775641025641025,
|
|
"grad_norm": 0.5830748091656482,
|
|
"learning_rate": 3.9811345744866014e-05,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.274571031332016,
|
|
"step": 610,
|
|
"valid_targets_mean": 3175.8,
|
|
"valid_targets_min": 2039
|
|
},
|
|
{
|
|
"epoch": 0.9855769230769231,
|
|
"grad_norm": 0.5812110493543656,
|
|
"learning_rate": 3.980023661197016e-05,
|
|
"loss": 0.2714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2767738103866577,
|
|
"step": 615,
|
|
"valid_targets_mean": 3417.7,
|
|
"valid_targets_min": 2031
|
|
},
|
|
{
|
|
"epoch": 0.9935897435897436,
|
|
"grad_norm": 0.5058825321899566,
|
|
"learning_rate": 3.978881132158896e-05,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24452310800552368,
|
|
"step": 620,
|
|
"valid_targets_mean": 4135.4,
|
|
"valid_targets_min": 317
|
|
},
|
|
{
|
|
"epoch": 1.001602564102564,
|
|
"grad_norm": 0.6221013298738255,
|
|
"learning_rate": 3.9777070056154124e-05,
|
|
"loss": 0.2703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3130282759666443,
|
|
"step": 625,
|
|
"valid_targets_mean": 3976.1,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 1.0096153846153846,
|
|
"grad_norm": 0.5498571953724251,
|
|
"learning_rate": 3.976501300314264e-05,
|
|
"loss": 0.2692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24536371231079102,
|
|
"step": 630,
|
|
"valid_targets_mean": 3277.8,
|
|
"valid_targets_min": 1944
|
|
},
|
|
{
|
|
"epoch": 1.017628205128205,
|
|
"grad_norm": 0.5803974168836421,
|
|
"learning_rate": 3.9752640355073825e-05,
|
|
"loss": 0.2668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2587840259075165,
|
|
"step": 635,
|
|
"valid_targets_mean": 3612.0,
|
|
"valid_targets_min": 2015
|
|
},
|
|
{
|
|
"epoch": 1.0256410256410255,
|
|
"grad_norm": 0.4989478569760299,
|
|
"learning_rate": 3.9739952309506175e-05,
|
|
"loss": 0.2815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2712844908237457,
|
|
"step": 640,
|
|
"valid_targets_mean": 3861.4,
|
|
"valid_targets_min": 2026
|
|
},
|
|
{
|
|
"epoch": 1.0336538461538463,
|
|
"grad_norm": 0.5731516438747354,
|
|
"learning_rate": 3.972694906903427e-05,
|
|
"loss": 0.2505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2606925070285797,
|
|
"step": 645,
|
|
"valid_targets_mean": 3275.2,
|
|
"valid_targets_min": 1505
|
|
},
|
|
{
|
|
"epoch": 1.0416666666666667,
|
|
"grad_norm": 0.5992339614399704,
|
|
"learning_rate": 3.971363084128552e-05,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3189937174320221,
|
|
"step": 650,
|
|
"valid_targets_mean": 3909.1,
|
|
"valid_targets_min": 1758
|
|
},
|
|
{
|
|
"epoch": 1.0496794871794872,
|
|
"grad_norm": 0.6286123050979071,
|
|
"learning_rate": 3.969999783891685e-05,
|
|
"loss": 0.2517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24376559257507324,
|
|
"step": 655,
|
|
"valid_targets_mean": 2944.8,
|
|
"valid_targets_min": 1765
|
|
},
|
|
{
|
|
"epoch": 1.0576923076923077,
|
|
"grad_norm": 0.4966019659995407,
|
|
"learning_rate": 3.96860502796113e-05,
|
|
"loss": 0.25,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25725221633911133,
|
|
"step": 660,
|
|
"valid_targets_mean": 4015.9,
|
|
"valid_targets_min": 1927
|
|
},
|
|
{
|
|
"epoch": 1.0657051282051282,
|
|
"grad_norm": 0.5176531998992511,
|
|
"learning_rate": 3.967178838607456e-05,
|
|
"loss": 0.2498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29367154836654663,
|
|
"step": 665,
|
|
"valid_targets_mean": 4043.6,
|
|
"valid_targets_min": 1759
|
|
},
|
|
{
|
|
"epoch": 1.0737179487179487,
|
|
"grad_norm": 0.5958021710987137,
|
|
"learning_rate": 3.965721238603139e-05,
|
|
"loss": 0.2745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2784119248390198,
|
|
"step": 670,
|
|
"valid_targets_mean": 3578.6,
|
|
"valid_targets_min": 2088
|
|
},
|
|
{
|
|
"epoch": 1.0817307692307692,
|
|
"grad_norm": 0.5761545845245867,
|
|
"learning_rate": 3.964232251222203e-05,
|
|
"loss": 0.2832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27019691467285156,
|
|
"step": 675,
|
|
"valid_targets_mean": 3691.7,
|
|
"valid_targets_min": 1619
|
|
},
|
|
{
|
|
"epoch": 1.0897435897435896,
|
|
"grad_norm": 0.46464694268546214,
|
|
"learning_rate": 3.962711900239844e-05,
|
|
"loss": 0.255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22501473128795624,
|
|
"step": 680,
|
|
"valid_targets_mean": 4020.6,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 1.0977564102564104,
|
|
"grad_norm": 0.5923617331224703,
|
|
"learning_rate": 3.961160209932051e-05,
|
|
"loss": 0.2676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.300213098526001,
|
|
"step": 685,
|
|
"valid_targets_mean": 3534.9,
|
|
"valid_targets_min": 275
|
|
},
|
|
{
|
|
"epoch": 1.1057692307692308,
|
|
"grad_norm": 0.49800360693237433,
|
|
"learning_rate": 3.95957720507522e-05,
|
|
"loss": 0.259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2590849697589874,
|
|
"step": 690,
|
|
"valid_targets_mean": 4144.4,
|
|
"valid_targets_min": 312
|
|
},
|
|
{
|
|
"epoch": 1.1137820512820513,
|
|
"grad_norm": 0.5249286912076265,
|
|
"learning_rate": 3.957962910945759e-05,
|
|
"loss": 0.2532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2369239628314972,
|
|
"step": 695,
|
|
"valid_targets_mean": 3731.9,
|
|
"valid_targets_min": 287
|
|
},
|
|
{
|
|
"epoch": 1.1217948717948718,
|
|
"grad_norm": 0.595022424517922,
|
|
"learning_rate": 3.9563173533196805e-05,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2349884808063507,
|
|
"step": 700,
|
|
"valid_targets_mean": 3039.2,
|
|
"valid_targets_min": 1905
|
|
},
|
|
{
|
|
"epoch": 1.1298076923076923,
|
|
"grad_norm": 0.46206851258117854,
|
|
"learning_rate": 3.954640558472195e-05,
|
|
"loss": 0.2535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2336539924144745,
|
|
"step": 705,
|
|
"valid_targets_mean": 4502.6,
|
|
"valid_targets_min": 1925
|
|
},
|
|
{
|
|
"epoch": 1.1378205128205128,
|
|
"grad_norm": 0.4976259228450712,
|
|
"learning_rate": 3.952932553177287e-05,
|
|
"loss": 0.2676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27928584814071655,
|
|
"step": 710,
|
|
"valid_targets_mean": 4550.8,
|
|
"valid_targets_min": 2210
|
|
},
|
|
{
|
|
"epoch": 1.1458333333333333,
|
|
"grad_norm": 0.5827290190538823,
|
|
"learning_rate": 3.95119336470729e-05,
|
|
"loss": 0.2562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30208781361579895,
|
|
"step": 715,
|
|
"valid_targets_mean": 3313.2,
|
|
"valid_targets_min": 322
|
|
},
|
|
{
|
|
"epoch": 1.1538461538461537,
|
|
"grad_norm": 0.5330675063388832,
|
|
"learning_rate": 3.949423020832451e-05,
|
|
"loss": 0.2713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26606252789497375,
|
|
"step": 720,
|
|
"valid_targets_mean": 3708.1,
|
|
"valid_targets_min": 1381
|
|
},
|
|
{
|
|
"epoch": 1.1618589743589745,
|
|
"grad_norm": 0.7492896337130134,
|
|
"learning_rate": 3.947621549820485e-05,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3572145700454712,
|
|
"step": 725,
|
|
"valid_targets_mean": 4327.9,
|
|
"valid_targets_min": 1430
|
|
},
|
|
{
|
|
"epoch": 1.169871794871795,
|
|
"grad_norm": 0.5596584335249928,
|
|
"learning_rate": 3.945788980436129e-05,
|
|
"loss": 0.2547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23527881503105164,
|
|
"step": 730,
|
|
"valid_targets_mean": 3265.1,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 1.1778846153846154,
|
|
"grad_norm": 0.523547937494331,
|
|
"learning_rate": 3.943925341940673e-05,
|
|
"loss": 0.2968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2402142584323883,
|
|
"step": 735,
|
|
"valid_targets_mean": 3584.3,
|
|
"valid_targets_min": 2159
|
|
},
|
|
{
|
|
"epoch": 1.185897435897436,
|
|
"grad_norm": 0.6556254274252725,
|
|
"learning_rate": 3.942030664091503e-05,
|
|
"loss": 0.2508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2958112955093384,
|
|
"step": 740,
|
|
"valid_targets_mean": 3284.9,
|
|
"valid_targets_min": 304
|
|
},
|
|
{
|
|
"epoch": 1.1939102564102564,
|
|
"grad_norm": 0.7172672140383616,
|
|
"learning_rate": 3.9401049771416214e-05,
|
|
"loss": 0.298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2700616121292114,
|
|
"step": 745,
|
|
"valid_targets_mean": 2539.8,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 1.2019230769230769,
|
|
"grad_norm": 0.49454796376028104,
|
|
"learning_rate": 3.938148311839162e-05,
|
|
"loss": 0.2489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2377432882785797,
|
|
"step": 750,
|
|
"valid_targets_mean": 3485.8,
|
|
"valid_targets_min": 1991
|
|
},
|
|
{
|
|
"epoch": 1.2099358974358974,
|
|
"grad_norm": 0.5811069095247919,
|
|
"learning_rate": 3.9361606994269014e-05,
|
|
"loss": 0.2564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2677744925022125,
|
|
"step": 755,
|
|
"valid_targets_mean": 3164.1,
|
|
"valid_targets_min": 322
|
|
},
|
|
{
|
|
"epoch": 1.217948717948718,
|
|
"grad_norm": 0.5648309056629999,
|
|
"learning_rate": 3.934142171641763e-05,
|
|
"loss": 0.2659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2626572847366333,
|
|
"step": 760,
|
|
"valid_targets_mean": 4323.2,
|
|
"valid_targets_min": 1127
|
|
},
|
|
{
|
|
"epoch": 1.2259615384615385,
|
|
"grad_norm": 0.5991473267432794,
|
|
"learning_rate": 3.9320927607143003e-05,
|
|
"loss": 0.2553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28201889991760254,
|
|
"step": 765,
|
|
"valid_targets_mean": 3157.0,
|
|
"valid_targets_min": 1836
|
|
},
|
|
{
|
|
"epoch": 1.233974358974359,
|
|
"grad_norm": 0.5176752233150077,
|
|
"learning_rate": 3.9300124993681976e-05,
|
|
"loss": 0.2517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24023833870887756,
|
|
"step": 770,
|
|
"valid_targets_mean": 3602.4,
|
|
"valid_targets_min": 1934
|
|
},
|
|
{
|
|
"epoch": 1.2419871794871795,
|
|
"grad_norm": 0.5206801167112293,
|
|
"learning_rate": 3.9279014208197317e-05,
|
|
"loss": 0.2325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2505241930484772,
|
|
"step": 775,
|
|
"valid_targets_mean": 3489.1,
|
|
"valid_targets_min": 1891
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"grad_norm": 0.6675315221781205,
|
|
"learning_rate": 3.925759558777252e-05,
|
|
"loss": 0.2597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33495059609413147,
|
|
"step": 780,
|
|
"valid_targets_mean": 3257.1,
|
|
"valid_targets_min": 2044
|
|
},
|
|
{
|
|
"epoch": 1.2580128205128205,
|
|
"grad_norm": 0.6330199967235978,
|
|
"learning_rate": 3.923586947440639e-05,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2804538607597351,
|
|
"step": 785,
|
|
"valid_targets_mean": 3040.4,
|
|
"valid_targets_min": 1650
|
|
},
|
|
{
|
|
"epoch": 1.266025641025641,
|
|
"grad_norm": 0.5983476006441762,
|
|
"learning_rate": 3.921383621500758e-05,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28068268299102783,
|
|
"step": 790,
|
|
"valid_targets_mean": 3209.1,
|
|
"valid_targets_min": 1899
|
|
},
|
|
{
|
|
"epoch": 1.2740384615384617,
|
|
"grad_norm": 0.5652783732328699,
|
|
"learning_rate": 3.919149616138906e-05,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3000967502593994,
|
|
"step": 795,
|
|
"valid_targets_mean": 3471.8,
|
|
"valid_targets_min": 1871
|
|
},
|
|
{
|
|
"epoch": 1.282051282051282,
|
|
"grad_norm": 0.48020791065961055,
|
|
"learning_rate": 3.916884967026246e-05,
|
|
"loss": 0.2642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2259122133255005,
|
|
"step": 800,
|
|
"valid_targets_mean": 3463.9,
|
|
"valid_targets_min": 318
|
|
},
|
|
{
|
|
"epoch": 1.2900641025641026,
|
|
"grad_norm": 0.5952179578546953,
|
|
"learning_rate": 3.914589710323245e-05,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29001694917678833,
|
|
"step": 805,
|
|
"valid_targets_mean": 3119.9,
|
|
"valid_targets_min": 328
|
|
},
|
|
{
|
|
"epoch": 1.2980769230769231,
|
|
"grad_norm": 0.4707806754969901,
|
|
"learning_rate": 3.912263882679091e-05,
|
|
"loss": 0.2584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26366129517555237,
|
|
"step": 810,
|
|
"valid_targets_mean": 4289.3,
|
|
"valid_targets_min": 2249
|
|
},
|
|
{
|
|
"epoch": 1.3060897435897436,
|
|
"grad_norm": 0.5716840203449413,
|
|
"learning_rate": 3.9099075212311076e-05,
|
|
"loss": 0.2559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2654898166656494,
|
|
"step": 815,
|
|
"valid_targets_mean": 2866.3,
|
|
"valid_targets_min": 2021
|
|
},
|
|
{
|
|
"epoch": 1.314102564102564,
|
|
"grad_norm": 0.6469688497693485,
|
|
"learning_rate": 3.9075206636041646e-05,
|
|
"loss": 0.2969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2785649597644806,
|
|
"step": 820,
|
|
"valid_targets_mean": 2493.1,
|
|
"valid_targets_min": 325
|
|
},
|
|
{
|
|
"epoch": 1.3221153846153846,
|
|
"grad_norm": 0.5959874599715644,
|
|
"learning_rate": 3.905103347910075e-05,
|
|
"loss": 0.2577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2832576632499695,
|
|
"step": 825,
|
|
"valid_targets_mean": 2864.1,
|
|
"valid_targets_min": 377
|
|
},
|
|
{
|
|
"epoch": 1.330128205128205,
|
|
"grad_norm": 0.9264271733644018,
|
|
"learning_rate": 3.902655612746985e-05,
|
|
"loss": 0.2487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25454455614089966,
|
|
"step": 830,
|
|
"valid_targets_mean": 3534.1,
|
|
"valid_targets_min": 322
|
|
},
|
|
{
|
|
"epoch": 1.3381410256410255,
|
|
"grad_norm": 0.480142441096767,
|
|
"learning_rate": 3.900177497198761e-05,
|
|
"loss": 0.2384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22106271982192993,
|
|
"step": 835,
|
|
"valid_targets_mean": 4238.7,
|
|
"valid_targets_min": 2572
|
|
},
|
|
{
|
|
"epoch": 1.3461538461538463,
|
|
"grad_norm": 0.5864396452941366,
|
|
"learning_rate": 3.8976690408343635e-05,
|
|
"loss": 0.2846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24299442768096924,
|
|
"step": 840,
|
|
"valid_targets_mean": 2960.3,
|
|
"valid_targets_min": 292
|
|
},
|
|
{
|
|
"epoch": 1.3541666666666667,
|
|
"grad_norm": 0.5825994482188888,
|
|
"learning_rate": 3.8951302837072165e-05,
|
|
"loss": 0.2484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2334529161453247,
|
|
"step": 845,
|
|
"valid_targets_mean": 3193.8,
|
|
"valid_targets_min": 325
|
|
},
|
|
{
|
|
"epoch": 1.3621794871794872,
|
|
"grad_norm": 0.5501476189263841,
|
|
"learning_rate": 3.892561266354566e-05,
|
|
"loss": 0.2824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27922576665878296,
|
|
"step": 850,
|
|
"valid_targets_mean": 3318.4,
|
|
"valid_targets_min": 1616
|
|
},
|
|
{
|
|
"epoch": 1.3701923076923077,
|
|
"grad_norm": 0.547822690666954,
|
|
"learning_rate": 3.889962029796833e-05,
|
|
"loss": 0.2873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3011276125907898,
|
|
"step": 855,
|
|
"valid_targets_mean": 3529.6,
|
|
"valid_targets_min": 1579
|
|
},
|
|
{
|
|
"epoch": 1.3782051282051282,
|
|
"grad_norm": 0.5347820792952802,
|
|
"learning_rate": 3.887332615536962e-05,
|
|
"loss": 0.2754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26003679633140564,
|
|
"step": 860,
|
|
"valid_targets_mean": 3406.0,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 1.3862179487179487,
|
|
"grad_norm": 0.45056781979483984,
|
|
"learning_rate": 3.8846730655597535e-05,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22900214791297913,
|
|
"step": 865,
|
|
"valid_targets_mean": 3888.4,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 1.3942307692307692,
|
|
"grad_norm": 0.5328070405748072,
|
|
"learning_rate": 3.881983422331198e-05,
|
|
"loss": 0.2511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2603125274181366,
|
|
"step": 870,
|
|
"valid_targets_mean": 3456.5,
|
|
"valid_targets_min": 264
|
|
},
|
|
{
|
|
"epoch": 1.4022435897435899,
|
|
"grad_norm": 0.4393271897940895,
|
|
"learning_rate": 3.879263728797792e-05,
|
|
"loss": 0.2743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24921280145645142,
|
|
"step": 875,
|
|
"valid_targets_mean": 4419.8,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 1.4102564102564101,
|
|
"grad_norm": 0.5719674452916136,
|
|
"learning_rate": 3.876514028385861e-05,
|
|
"loss": 0.255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2597891092300415,
|
|
"step": 880,
|
|
"valid_targets_mean": 2635.8,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 1.4182692307692308,
|
|
"grad_norm": 0.5404887543205124,
|
|
"learning_rate": 3.873734365000857e-05,
|
|
"loss": 0.2687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2549996078014374,
|
|
"step": 885,
|
|
"valid_targets_mean": 3332.9,
|
|
"valid_targets_min": 419
|
|
},
|
|
{
|
|
"epoch": 1.4262820512820513,
|
|
"grad_norm": 0.5610352900575674,
|
|
"learning_rate": 3.870924783026663e-05,
|
|
"loss": 0.2591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3065013587474823,
|
|
"step": 890,
|
|
"valid_targets_mean": 4062.5,
|
|
"valid_targets_min": 576
|
|
},
|
|
{
|
|
"epoch": 1.4342948717948718,
|
|
"grad_norm": 0.5589275153379575,
|
|
"learning_rate": 3.8680853273248826e-05,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23422890901565552,
|
|
"step": 895,
|
|
"valid_targets_mean": 3403.2,
|
|
"valid_targets_min": 2185
|
|
},
|
|
{
|
|
"epoch": 1.4423076923076923,
|
|
"grad_norm": 0.553592927050287,
|
|
"learning_rate": 3.865216043234126e-05,
|
|
"loss": 0.2565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.253162145614624,
|
|
"step": 900,
|
|
"valid_targets_mean": 2935.9,
|
|
"valid_targets_min": 1675
|
|
},
|
|
{
|
|
"epoch": 1.4503205128205128,
|
|
"grad_norm": 0.4025061423275714,
|
|
"learning_rate": 3.862316976569281e-05,
|
|
"loss": 0.2328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20285256206989288,
|
|
"step": 905,
|
|
"valid_targets_mean": 4851.9,
|
|
"valid_targets_min": 2013
|
|
},
|
|
{
|
|
"epoch": 1.4583333333333333,
|
|
"grad_norm": 0.4772579094391561,
|
|
"learning_rate": 3.859388173620785e-05,
|
|
"loss": 0.2638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1970895528793335,
|
|
"step": 910,
|
|
"valid_targets_mean": 3699.8,
|
|
"valid_targets_min": 2229
|
|
},
|
|
{
|
|
"epoch": 1.4663461538461537,
|
|
"grad_norm": 0.46501156353751144,
|
|
"learning_rate": 3.8564296811538874e-05,
|
|
"loss": 0.2464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.223637655377388,
|
|
"step": 915,
|
|
"valid_targets_mean": 3805.1,
|
|
"valid_targets_min": 1955
|
|
},
|
|
{
|
|
"epoch": 1.4743589743589745,
|
|
"grad_norm": 0.43790133057938885,
|
|
"learning_rate": 3.853441546407898e-05,
|
|
"loss": 0.2515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20834773778915405,
|
|
"step": 920,
|
|
"valid_targets_mean": 4177.2,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 1.482371794871795,
|
|
"grad_norm": 0.5454748868394167,
|
|
"learning_rate": 3.850423817095438e-05,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34521013498306274,
|
|
"step": 925,
|
|
"valid_targets_mean": 4365.6,
|
|
"valid_targets_min": 1956
|
|
},
|
|
{
|
|
"epoch": 1.4903846153846154,
|
|
"grad_norm": 0.6094400809944239,
|
|
"learning_rate": 3.847376541401674e-05,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2916841506958008,
|
|
"step": 930,
|
|
"valid_targets_mean": 3232.4,
|
|
"valid_targets_min": 317
|
|
},
|
|
{
|
|
"epoch": 1.498397435897436,
|
|
"grad_norm": 0.5102372386762555,
|
|
"learning_rate": 3.844299767983551e-05,
|
|
"loss": 0.2502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22648373246192932,
|
|
"step": 935,
|
|
"valid_targets_mean": 3722.2,
|
|
"valid_targets_min": 2282
|
|
},
|
|
{
|
|
"epoch": 1.5064102564102564,
|
|
"grad_norm": 0.5937064215878244,
|
|
"learning_rate": 3.841193545969015e-05,
|
|
"loss": 0.2426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2533378005027771,
|
|
"step": 940,
|
|
"valid_targets_mean": 3083.6,
|
|
"valid_targets_min": 400
|
|
},
|
|
{
|
|
"epoch": 1.5144230769230769,
|
|
"grad_norm": 0.5764452712073888,
|
|
"learning_rate": 3.8380579249562265e-05,
|
|
"loss": 0.2503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23182162642478943,
|
|
"step": 945,
|
|
"valid_targets_mean": 4151.0,
|
|
"valid_targets_min": 300
|
|
},
|
|
{
|
|
"epoch": 1.5224358974358974,
|
|
"grad_norm": 0.49751167575454874,
|
|
"learning_rate": 3.8348929550127734e-05,
|
|
"loss": 0.2524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20357897877693176,
|
|
"step": 950,
|
|
"valid_targets_mean": 3389.6,
|
|
"valid_targets_min": 230
|
|
},
|
|
{
|
|
"epoch": 1.530448717948718,
|
|
"grad_norm": 0.5830545941112953,
|
|
"learning_rate": 3.831698686674866e-05,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28848379850387573,
|
|
"step": 955,
|
|
"valid_targets_mean": 3175.4,
|
|
"valid_targets_min": 317
|
|
},
|
|
{
|
|
"epoch": 1.5384615384615383,
|
|
"grad_norm": 0.5537102139254313,
|
|
"learning_rate": 3.828475170946534e-05,
|
|
"loss": 0.2612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27478456497192383,
|
|
"step": 960,
|
|
"valid_targets_mean": 3087.1,
|
|
"valid_targets_min": 1798
|
|
},
|
|
{
|
|
"epoch": 1.546474358974359,
|
|
"grad_norm": 0.48142758344751735,
|
|
"learning_rate": 3.8252224592988087e-05,
|
|
"loss": 0.2909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2891562581062317,
|
|
"step": 965,
|
|
"valid_targets_mean": 4012.2,
|
|
"valid_targets_min": 1886
|
|
},
|
|
{
|
|
"epoch": 1.5544871794871795,
|
|
"grad_norm": 0.528773456282276,
|
|
"learning_rate": 3.821940603668906e-05,
|
|
"loss": 0.2821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26372164487838745,
|
|
"step": 970,
|
|
"valid_targets_mean": 3712.9,
|
|
"valid_targets_min": 1890
|
|
},
|
|
{
|
|
"epoch": 1.5625,
|
|
"grad_norm": 0.5233280901371568,
|
|
"learning_rate": 3.8186296564593924e-05,
|
|
"loss": 0.2583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28313058614730835,
|
|
"step": 975,
|
|
"valid_targets_mean": 3477.3,
|
|
"valid_targets_min": 1834
|
|
},
|
|
{
|
|
"epoch": 1.5705128205128205,
|
|
"grad_norm": 0.4892747737589277,
|
|
"learning_rate": 3.815289670537351e-05,
|
|
"loss": 0.2714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2609057128429413,
|
|
"step": 980,
|
|
"valid_targets_mean": 3832.3,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 1.578525641025641,
|
|
"grad_norm": 0.5618123875185596,
|
|
"learning_rate": 3.811920699233535e-05,
|
|
"loss": 0.2695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2744769752025604,
|
|
"step": 985,
|
|
"valid_targets_mean": 3458.1,
|
|
"valid_targets_min": 2020
|
|
},
|
|
{
|
|
"epoch": 1.5865384615384617,
|
|
"grad_norm": 0.5183652853775259,
|
|
"learning_rate": 3.8085227963415186e-05,
|
|
"loss": 0.2781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2586187720298767,
|
|
"step": 990,
|
|
"valid_targets_mean": 3215.7,
|
|
"valid_targets_min": 447
|
|
},
|
|
{
|
|
"epoch": 1.594551282051282,
|
|
"grad_norm": 0.578243895186001,
|
|
"learning_rate": 3.805096016116838e-05,
|
|
"loss": 0.2548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26217278838157654,
|
|
"step": 995,
|
|
"valid_targets_mean": 2837.5,
|
|
"valid_targets_min": 303
|
|
},
|
|
{
|
|
"epoch": 1.6025641025641026,
|
|
"grad_norm": 0.44822872043640527,
|
|
"learning_rate": 3.801640413276121e-05,
|
|
"loss": 0.2587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2369627058506012,
|
|
"step": 1000,
|
|
"valid_targets_mean": 4413.8,
|
|
"valid_targets_min": 2681
|
|
},
|
|
{
|
|
"epoch": 1.6105769230769231,
|
|
"grad_norm": 0.48293479766091263,
|
|
"learning_rate": 3.7981560429962204e-05,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25961318612098694,
|
|
"step": 1005,
|
|
"valid_targets_mean": 3498.2,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 1.6185897435897436,
|
|
"grad_norm": 0.5671456917361367,
|
|
"learning_rate": 3.7946429609133274e-05,
|
|
"loss": 0.2512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24469730257987976,
|
|
"step": 1010,
|
|
"valid_targets_mean": 3181.0,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 1.626602564102564,
|
|
"grad_norm": 0.5238584082964504,
|
|
"learning_rate": 3.791101223122084e-05,
|
|
"loss": 0.2757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2893809676170349,
|
|
"step": 1015,
|
|
"valid_targets_mean": 3797.9,
|
|
"valid_targets_min": 2157
|
|
},
|
|
{
|
|
"epoch": 1.6346153846153846,
|
|
"grad_norm": 0.49326302948662853,
|
|
"learning_rate": 3.787530886174688e-05,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25847795605659485,
|
|
"step": 1020,
|
|
"valid_targets_mean": 3775.4,
|
|
"valid_targets_min": 1848
|
|
},
|
|
{
|
|
"epoch": 1.6426282051282053,
|
|
"grad_norm": 0.5099061677777234,
|
|
"learning_rate": 3.783932007079992e-05,
|
|
"loss": 0.2276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23870500922203064,
|
|
"step": 1025,
|
|
"valid_targets_mean": 3440.9,
|
|
"valid_targets_min": 279
|
|
},
|
|
{
|
|
"epoch": 1.6506410256410255,
|
|
"grad_norm": 0.47352637734693354,
|
|
"learning_rate": 3.7803046433025905e-05,
|
|
"loss": 0.2552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2686188220977783,
|
|
"step": 1030,
|
|
"valid_targets_mean": 4180.7,
|
|
"valid_targets_min": 2031
|
|
},
|
|
{
|
|
"epoch": 1.6586538461538463,
|
|
"grad_norm": 0.5314853755634682,
|
|
"learning_rate": 3.7766488527619024e-05,
|
|
"loss": 0.2838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3336718678474426,
|
|
"step": 1035,
|
|
"valid_targets_mean": 3948.1,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 1.6666666666666665,
|
|
"grad_norm": 0.4921049254100938,
|
|
"learning_rate": 3.772964693831247e-05,
|
|
"loss": 0.2593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2925066351890564,
|
|
"step": 1040,
|
|
"valid_targets_mean": 4141.9,
|
|
"valid_targets_min": 1127
|
|
},
|
|
{
|
|
"epoch": 1.6746794871794872,
|
|
"grad_norm": 0.4877598437145359,
|
|
"learning_rate": 3.7692522253369136e-05,
|
|
"loss": 0.2611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22297003865242004,
|
|
"step": 1045,
|
|
"valid_targets_mean": 3177.9,
|
|
"valid_targets_min": 332
|
|
},
|
|
{
|
|
"epoch": 1.6826923076923077,
|
|
"grad_norm": 0.4446634577336513,
|
|
"learning_rate": 3.7655115065572194e-05,
|
|
"loss": 0.236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24668602645397186,
|
|
"step": 1050,
|
|
"valid_targets_mean": 4311.4,
|
|
"valid_targets_min": 2154
|
|
},
|
|
{
|
|
"epoch": 1.6907051282051282,
|
|
"grad_norm": 0.4967124079594226,
|
|
"learning_rate": 3.7617425972215626e-05,
|
|
"loss": 0.2499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2651454210281372,
|
|
"step": 1055,
|
|
"valid_targets_mean": 3677.1,
|
|
"valid_targets_min": 2064
|
|
},
|
|
{
|
|
"epoch": 1.6987179487179487,
|
|
"grad_norm": 0.4910030484682953,
|
|
"learning_rate": 3.757945557509472e-05,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22594448924064636,
|
|
"step": 1060,
|
|
"valid_targets_mean": 3567.4,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 1.7067307692307692,
|
|
"grad_norm": 0.6228057575059062,
|
|
"learning_rate": 3.7541204480496444e-05,
|
|
"loss": 0.2692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2984437346458435,
|
|
"step": 1065,
|
|
"valid_targets_mean": 2969.2,
|
|
"valid_targets_min": 330
|
|
},
|
|
{
|
|
"epoch": 1.7147435897435899,
|
|
"grad_norm": 0.5067614506057814,
|
|
"learning_rate": 3.7502673299189745e-05,
|
|
"loss": 0.2402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21630270779132843,
|
|
"step": 1070,
|
|
"valid_targets_mean": 3610.4,
|
|
"valid_targets_min": 2071
|
|
},
|
|
{
|
|
"epoch": 1.7227564102564101,
|
|
"grad_norm": 0.5417834073613709,
|
|
"learning_rate": 3.746386264641583e-05,
|
|
"loss": 0.2333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.261353075504303,
|
|
"step": 1075,
|
|
"valid_targets_mean": 3262.4,
|
|
"valid_targets_min": 1729
|
|
},
|
|
{
|
|
"epoch": 1.7307692307692308,
|
|
"grad_norm": 0.441919197890755,
|
|
"learning_rate": 3.7424773141878324e-05,
|
|
"loss": 0.2458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.267980694770813,
|
|
"step": 1080,
|
|
"valid_targets_mean": 4533.0,
|
|
"valid_targets_min": 1974
|
|
},
|
|
{
|
|
"epoch": 1.7387820512820513,
|
|
"grad_norm": 0.5705844782040119,
|
|
"learning_rate": 3.738540540973338e-05,
|
|
"loss": 0.2761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3395107388496399,
|
|
"step": 1085,
|
|
"valid_targets_mean": 3350.6,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 1.7467948717948718,
|
|
"grad_norm": 0.5572125349129156,
|
|
"learning_rate": 3.7345760078579695e-05,
|
|
"loss": 0.2576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24319365620613098,
|
|
"step": 1090,
|
|
"valid_targets_mean": 3060.1,
|
|
"valid_targets_min": 1856
|
|
},
|
|
{
|
|
"epoch": 1.7548076923076923,
|
|
"grad_norm": 0.48020365542220134,
|
|
"learning_rate": 3.730583778144852e-05,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23472854495048523,
|
|
"step": 1095,
|
|
"valid_targets_mean": 3657.5,
|
|
"valid_targets_min": 1830
|
|
},
|
|
{
|
|
"epoch": 1.7628205128205128,
|
|
"grad_norm": 0.5070561827519618,
|
|
"learning_rate": 3.7265639155793494e-05,
|
|
"loss": 0.2576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25640958547592163,
|
|
"step": 1100,
|
|
"valid_targets_mean": 3536.3,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 1.7708333333333335,
|
|
"grad_norm": 0.6071881430446543,
|
|
"learning_rate": 3.7225164843480503e-05,
|
|
"loss": 0.2731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2952703535556793,
|
|
"step": 1105,
|
|
"valid_targets_mean": 2960.6,
|
|
"valid_targets_min": 1856
|
|
},
|
|
{
|
|
"epoch": 1.7788461538461537,
|
|
"grad_norm": 0.4365264125375587,
|
|
"learning_rate": 3.7184415490777426e-05,
|
|
"loss": 0.2316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23112201690673828,
|
|
"step": 1110,
|
|
"valid_targets_mean": 4157.2,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 1.7868589743589745,
|
|
"grad_norm": 0.5330168750066543,
|
|
"learning_rate": 3.714339174834379e-05,
|
|
"loss": 0.259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23355525732040405,
|
|
"step": 1115,
|
|
"valid_targets_mean": 2776.1,
|
|
"valid_targets_min": 465
|
|
},
|
|
{
|
|
"epoch": 1.7948717948717947,
|
|
"grad_norm": 0.49761685732908006,
|
|
"learning_rate": 3.710209427122044e-05,
|
|
"loss": 0.2194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2513016164302826,
|
|
"step": 1120,
|
|
"valid_targets_mean": 3535.9,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 1.8028846153846154,
|
|
"grad_norm": 0.4539573331647677,
|
|
"learning_rate": 3.7060523718819e-05,
|
|
"loss": 0.2358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20890021324157715,
|
|
"step": 1125,
|
|
"valid_targets_mean": 3893.2,
|
|
"valid_targets_min": 2223
|
|
},
|
|
{
|
|
"epoch": 1.810897435897436,
|
|
"grad_norm": 0.4499379546162627,
|
|
"learning_rate": 3.701868075491139e-05,
|
|
"loss": 0.2285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22595152258872986,
|
|
"step": 1130,
|
|
"valid_targets_mean": 3673.0,
|
|
"valid_targets_min": 1725
|
|
},
|
|
{
|
|
"epoch": 1.8189102564102564,
|
|
"grad_norm": 0.5006718742013021,
|
|
"learning_rate": 3.697656604761926e-05,
|
|
"loss": 0.2603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28063613176345825,
|
|
"step": 1135,
|
|
"valid_targets_mean": 3769.2,
|
|
"valid_targets_min": 2063
|
|
},
|
|
{
|
|
"epoch": 1.8269230769230769,
|
|
"grad_norm": 0.47557270821506,
|
|
"learning_rate": 3.693418026940325e-05,
|
|
"loss": 0.2421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2146185040473938,
|
|
"step": 1140,
|
|
"valid_targets_mean": 3377.9,
|
|
"valid_targets_min": 1997
|
|
},
|
|
{
|
|
"epoch": 1.8349358974358974,
|
|
"grad_norm": 0.5718766251672308,
|
|
"learning_rate": 3.689152409705229e-05,
|
|
"loss": 0.2629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26902586221694946,
|
|
"step": 1145,
|
|
"valid_targets_mean": 2666.6,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 1.842948717948718,
|
|
"grad_norm": 0.49857619295485034,
|
|
"learning_rate": 3.6848598211672794e-05,
|
|
"loss": 0.2329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21846452355384827,
|
|
"step": 1150,
|
|
"valid_targets_mean": 3345.5,
|
|
"valid_targets_min": 1624
|
|
},
|
|
{
|
|
"epoch": 1.8509615384615383,
|
|
"grad_norm": 0.5385567351558466,
|
|
"learning_rate": 3.6805403298677797e-05,
|
|
"loss": 0.253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20614317059516907,
|
|
"step": 1155,
|
|
"valid_targets_mean": 2915.1,
|
|
"valid_targets_min": 401
|
|
},
|
|
{
|
|
"epoch": 1.858974358974359,
|
|
"grad_norm": 0.5178461800642131,
|
|
"learning_rate": 3.6761940047775966e-05,
|
|
"loss": 0.2605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2558942437171936,
|
|
"step": 1160,
|
|
"valid_targets_mean": 3066.9,
|
|
"valid_targets_min": 2139
|
|
},
|
|
{
|
|
"epoch": 1.8669871794871795,
|
|
"grad_norm": 0.5473587738559478,
|
|
"learning_rate": 3.671820915296063e-05,
|
|
"loss": 0.2569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2264281064271927,
|
|
"step": 1165,
|
|
"valid_targets_mean": 2752.2,
|
|
"valid_targets_min": 354
|
|
},
|
|
{
|
|
"epoch": 1.875,
|
|
"grad_norm": 0.5742898492979889,
|
|
"learning_rate": 3.667421131249869e-05,
|
|
"loss": 0.2599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2911713123321533,
|
|
"step": 1170,
|
|
"valid_targets_mean": 2875.4,
|
|
"valid_targets_min": 351
|
|
},
|
|
{
|
|
"epoch": 1.8830128205128205,
|
|
"grad_norm": 0.5463479296932019,
|
|
"learning_rate": 3.662994722891946e-05,
|
|
"loss": 0.2639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2465297281742096,
|
|
"step": 1175,
|
|
"valid_targets_mean": 3061.2,
|
|
"valid_targets_min": 361
|
|
},
|
|
{
|
|
"epoch": 1.891025641025641,
|
|
"grad_norm": 0.40591617828758964,
|
|
"learning_rate": 3.658541760900344e-05,
|
|
"loss": 0.2529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2039278745651245,
|
|
"step": 1180,
|
|
"valid_targets_mean": 4176.2,
|
|
"valid_targets_min": 345
|
|
},
|
|
{
|
|
"epoch": 1.8990384615384617,
|
|
"grad_norm": 0.516920853045726,
|
|
"learning_rate": 3.654062316377106e-05,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26839733123779297,
|
|
"step": 1185,
|
|
"valid_targets_mean": 3415.8,
|
|
"valid_targets_min": 2208
|
|
},
|
|
{
|
|
"epoch": 1.907051282051282,
|
|
"grad_norm": 0.49950356462293516,
|
|
"learning_rate": 3.649556460847131e-05,
|
|
"loss": 0.2479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20692500472068787,
|
|
"step": 1190,
|
|
"valid_targets_mean": 3404.6,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 1.9150641025641026,
|
|
"grad_norm": 0.8809308213127865,
|
|
"learning_rate": 3.6450242662570314e-05,
|
|
"loss": 0.2411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2121295928955078,
|
|
"step": 1195,
|
|
"valid_targets_mean": 4477.4,
|
|
"valid_targets_min": 1613
|
|
},
|
|
{
|
|
"epoch": 1.9230769230769231,
|
|
"grad_norm": 0.4779923123123419,
|
|
"learning_rate": 3.6404658049739854e-05,
|
|
"loss": 0.2313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2340589463710785,
|
|
"step": 1200,
|
|
"valid_targets_mean": 3211.6,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 1.9310897435897436,
|
|
"grad_norm": 0.414184625487714,
|
|
"learning_rate": 3.63588114978458e-05,
|
|
"loss": 0.2468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2192663848400116,
|
|
"step": 1205,
|
|
"valid_targets_mean": 4318.8,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 1.939102564102564,
|
|
"grad_norm": 0.5303697493272574,
|
|
"learning_rate": 3.6312703738936504e-05,
|
|
"loss": 0.2409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22840480506420135,
|
|
"step": 1210,
|
|
"valid_targets_mean": 3477.6,
|
|
"valid_targets_min": 2264
|
|
},
|
|
{
|
|
"epoch": 1.9471153846153846,
|
|
"grad_norm": 0.43566224462419423,
|
|
"learning_rate": 3.626633550923111e-05,
|
|
"loss": 0.2545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2339707314968109,
|
|
"step": 1215,
|
|
"valid_targets_mean": 4116.4,
|
|
"valid_targets_min": 1590
|
|
},
|
|
{
|
|
"epoch": 1.9551282051282053,
|
|
"grad_norm": 0.5197010556809256,
|
|
"learning_rate": 3.621970754910778e-05,
|
|
"loss": 0.2377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22132663428783417,
|
|
"step": 1220,
|
|
"valid_targets_mean": 2858.6,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 1.9631410256410255,
|
|
"grad_norm": 0.5473460491584194,
|
|
"learning_rate": 3.6172820603091885e-05,
|
|
"loss": 0.2444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2865564823150635,
|
|
"step": 1225,
|
|
"valid_targets_mean": 3204.2,
|
|
"valid_targets_min": 2245
|
|
},
|
|
{
|
|
"epoch": 1.9711538461538463,
|
|
"grad_norm": 0.4360570251401029,
|
|
"learning_rate": 3.612567541984413e-05,
|
|
"loss": 0.2297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23854400217533112,
|
|
"step": 1230,
|
|
"valid_targets_mean": 4248.2,
|
|
"valid_targets_min": 1842
|
|
},
|
|
{
|
|
"epoch": 1.9791666666666665,
|
|
"grad_norm": 0.4735446183087179,
|
|
"learning_rate": 3.6078272752148574e-05,
|
|
"loss": 0.2314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1998017132282257,
|
|
"step": 1235,
|
|
"valid_targets_mean": 3103.8,
|
|
"valid_targets_min": 1315
|
|
},
|
|
{
|
|
"epoch": 1.9871794871794872,
|
|
"grad_norm": 0.4556927270069073,
|
|
"learning_rate": 3.6030613356900635e-05,
|
|
"loss": 0.2312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18791323900222778,
|
|
"step": 1240,
|
|
"valid_targets_mean": 3856.6,
|
|
"valid_targets_min": 449
|
|
},
|
|
{
|
|
"epoch": 1.9951923076923077,
|
|
"grad_norm": 0.45598040579973126,
|
|
"learning_rate": 3.598269799509498e-05,
|
|
"loss": 0.2629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2848716378211975,
|
|
"step": 1245,
|
|
"valid_targets_mean": 4495.0,
|
|
"valid_targets_min": 2194
|
|
},
|
|
{
|
|
"epoch": 2.003205128205128,
|
|
"grad_norm": 0.48113601096988307,
|
|
"learning_rate": 3.5934527431813385e-05,
|
|
"loss": 0.2382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20847183465957642,
|
|
"step": 1250,
|
|
"valid_targets_mean": 3337.9,
|
|
"valid_targets_min": 2074
|
|
},
|
|
{
|
|
"epoch": 2.011217948717949,
|
|
"grad_norm": 0.4464604635105121,
|
|
"learning_rate": 3.5886102436212536e-05,
|
|
"loss": 0.244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2278617024421692,
|
|
"step": 1255,
|
|
"valid_targets_mean": 4564.4,
|
|
"valid_targets_min": 1381
|
|
},
|
|
{
|
|
"epoch": 2.019230769230769,
|
|
"grad_norm": 0.46286257830943056,
|
|
"learning_rate": 3.583742378151171e-05,
|
|
"loss": 0.2247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21383799612522125,
|
|
"step": 1260,
|
|
"valid_targets_mean": 4035.4,
|
|
"valid_targets_min": 1676
|
|
},
|
|
{
|
|
"epoch": 2.02724358974359,
|
|
"grad_norm": 0.5034513672207104,
|
|
"learning_rate": 3.5788492244980464e-05,
|
|
"loss": 0.2469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22386686503887177,
|
|
"step": 1265,
|
|
"valid_targets_mean": 3764.6,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 2.03525641025641,
|
|
"grad_norm": 0.5178445959097301,
|
|
"learning_rate": 3.573930860792621e-05,
|
|
"loss": 0.2243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22068476676940918,
|
|
"step": 1270,
|
|
"valid_targets_mean": 2955.2,
|
|
"valid_targets_min": 233
|
|
},
|
|
{
|
|
"epoch": 2.043269230769231,
|
|
"grad_norm": 0.4828450286557547,
|
|
"learning_rate": 3.568987365568173e-05,
|
|
"loss": 0.2393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.271858274936676,
|
|
"step": 1275,
|
|
"valid_targets_mean": 4249.4,
|
|
"valid_targets_min": 1675
|
|
},
|
|
{
|
|
"epoch": 2.051282051282051,
|
|
"grad_norm": 0.5734483721951875,
|
|
"learning_rate": 3.564018817759266e-05,
|
|
"loss": 0.2535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2882147431373596,
|
|
"step": 1280,
|
|
"valid_targets_mean": 3304.8,
|
|
"valid_targets_min": 1263
|
|
},
|
|
{
|
|
"epoch": 2.059294871794872,
|
|
"grad_norm": 0.5637741231035333,
|
|
"learning_rate": 3.559025296700484e-05,
|
|
"loss": 0.2239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21064125001430511,
|
|
"step": 1285,
|
|
"valid_targets_mean": 2992.1,
|
|
"valid_targets_min": 469
|
|
},
|
|
{
|
|
"epoch": 2.0673076923076925,
|
|
"grad_norm": 0.5631234037431226,
|
|
"learning_rate": 3.554006882125173e-05,
|
|
"loss": 0.2459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24107757210731506,
|
|
"step": 1290,
|
|
"valid_targets_mean": 3415.8,
|
|
"valid_targets_min": 1812
|
|
},
|
|
{
|
|
"epoch": 2.0753205128205128,
|
|
"grad_norm": 0.47018159183118846,
|
|
"learning_rate": 3.5489636541641586e-05,
|
|
"loss": 0.2144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20437480509281158,
|
|
"step": 1295,
|
|
"valid_targets_mean": 3452.8,
|
|
"valid_targets_min": 332
|
|
},
|
|
{
|
|
"epoch": 2.0833333333333335,
|
|
"grad_norm": 0.4880151515161931,
|
|
"learning_rate": 3.543895693344472e-05,
|
|
"loss": 0.2334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24443921446800232,
|
|
"step": 1300,
|
|
"valid_targets_mean": 3210.9,
|
|
"valid_targets_min": 383
|
|
},
|
|
{
|
|
"epoch": 2.0913461538461537,
|
|
"grad_norm": 0.5616738633854279,
|
|
"learning_rate": 3.538803080588063e-05,
|
|
"loss": 0.2552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2829934358596802,
|
|
"step": 1305,
|
|
"valid_targets_mean": 3090.2,
|
|
"valid_targets_min": 830
|
|
},
|
|
{
|
|
"epoch": 2.0993589743589745,
|
|
"grad_norm": 0.5145409981741939,
|
|
"learning_rate": 3.5336858972105076e-05,
|
|
"loss": 0.2285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26258933544158936,
|
|
"step": 1310,
|
|
"valid_targets_mean": 3607.2,
|
|
"valid_targets_min": 1777
|
|
},
|
|
{
|
|
"epoch": 2.1073717948717947,
|
|
"grad_norm": 0.4770200458940273,
|
|
"learning_rate": 3.528544224919708e-05,
|
|
"loss": 0.2237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20944449305534363,
|
|
"step": 1315,
|
|
"valid_targets_mean": 3943.2,
|
|
"valid_targets_min": 1951
|
|
},
|
|
{
|
|
"epoch": 2.1153846153846154,
|
|
"grad_norm": 0.5351286659289585,
|
|
"learning_rate": 3.5233781458145934e-05,
|
|
"loss": 0.2468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.264823317527771,
|
|
"step": 1320,
|
|
"valid_targets_mean": 3264.5,
|
|
"valid_targets_min": 1890
|
|
},
|
|
{
|
|
"epoch": 2.123397435897436,
|
|
"grad_norm": 0.4364617795621253,
|
|
"learning_rate": 3.5181877423838034e-05,
|
|
"loss": 0.227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21104183793067932,
|
|
"step": 1325,
|
|
"valid_targets_mean": 3822.4,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 2.1314102564102564,
|
|
"grad_norm": 0.5579212740013267,
|
|
"learning_rate": 3.512973097504371e-05,
|
|
"loss": 0.2463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25203123688697815,
|
|
"step": 1330,
|
|
"valid_targets_mean": 3057.1,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 2.139423076923077,
|
|
"grad_norm": 0.5560163039281574,
|
|
"learning_rate": 3.507734294440403e-05,
|
|
"loss": 0.2273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21084235608577728,
|
|
"step": 1335,
|
|
"valid_targets_mean": 3401.2,
|
|
"valid_targets_min": 1968
|
|
},
|
|
{
|
|
"epoch": 2.1474358974358974,
|
|
"grad_norm": 0.5550177081038501,
|
|
"learning_rate": 3.50247141684175e-05,
|
|
"loss": 0.2409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2602787911891937,
|
|
"step": 1340,
|
|
"valid_targets_mean": 2963.6,
|
|
"valid_targets_min": 476
|
|
},
|
|
{
|
|
"epoch": 2.155448717948718,
|
|
"grad_norm": 0.4025082575367705,
|
|
"learning_rate": 3.497184548742667e-05,
|
|
"loss": 0.2287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22489029169082642,
|
|
"step": 1345,
|
|
"valid_targets_mean": 4617.6,
|
|
"valid_targets_min": 1693
|
|
},
|
|
{
|
|
"epoch": 2.1634615384615383,
|
|
"grad_norm": 0.5459612756781311,
|
|
"learning_rate": 3.491873774560473e-05,
|
|
"loss": 0.2304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25036120414733887,
|
|
"step": 1350,
|
|
"valid_targets_mean": 3089.4,
|
|
"valid_targets_min": 1401
|
|
},
|
|
{
|
|
"epoch": 2.171474358974359,
|
|
"grad_norm": 0.5395675971796833,
|
|
"learning_rate": 3.486539179094208e-05,
|
|
"loss": 0.2489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29087430238723755,
|
|
"step": 1355,
|
|
"valid_targets_mean": 3440.6,
|
|
"valid_targets_min": 332
|
|
},
|
|
{
|
|
"epoch": 2.1794871794871793,
|
|
"grad_norm": 0.5092018797334458,
|
|
"learning_rate": 3.481180847523272e-05,
|
|
"loss": 0.2289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21696212887763977,
|
|
"step": 1360,
|
|
"valid_targets_mean": 3313.4,
|
|
"valid_targets_min": 2144
|
|
},
|
|
{
|
|
"epoch": 2.1875,
|
|
"grad_norm": 0.5295673988104233,
|
|
"learning_rate": 3.4757988654060684e-05,
|
|
"loss": 0.2212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2391449213027954,
|
|
"step": 1365,
|
|
"valid_targets_mean": 3206.8,
|
|
"valid_targets_min": 1028
|
|
},
|
|
{
|
|
"epoch": 2.1955128205128207,
|
|
"grad_norm": 1.3613971129842901,
|
|
"learning_rate": 3.470393318678637e-05,
|
|
"loss": 0.2473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.222117081284523,
|
|
"step": 1370,
|
|
"valid_targets_mean": 3327.1,
|
|
"valid_targets_min": 2051
|
|
},
|
|
{
|
|
"epoch": 2.203525641025641,
|
|
"grad_norm": 0.4981966116272187,
|
|
"learning_rate": 3.4649642936532836e-05,
|
|
"loss": 0.2289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23258623480796814,
|
|
"step": 1375,
|
|
"valid_targets_mean": 3291.8,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 2.2115384615384617,
|
|
"grad_norm": 0.43365813086550076,
|
|
"learning_rate": 3.4595118770171984e-05,
|
|
"loss": 0.2328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19812996685504913,
|
|
"step": 1380,
|
|
"valid_targets_mean": 3913.2,
|
|
"valid_targets_min": 368
|
|
},
|
|
{
|
|
"epoch": 2.219551282051282,
|
|
"grad_norm": 0.5247182768095835,
|
|
"learning_rate": 3.454036155831077e-05,
|
|
"loss": 0.2429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1894225925207138,
|
|
"step": 1385,
|
|
"valid_targets_mean": 3751.2,
|
|
"valid_targets_min": 288
|
|
},
|
|
{
|
|
"epoch": 2.2275641025641026,
|
|
"grad_norm": 0.553269199889545,
|
|
"learning_rate": 3.4485372175277236e-05,
|
|
"loss": 0.2224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2299507111310959,
|
|
"step": 1390,
|
|
"valid_targets_mean": 2955.2,
|
|
"valid_targets_min": 1472
|
|
},
|
|
{
|
|
"epoch": 2.235576923076923,
|
|
"grad_norm": 0.5464234829613472,
|
|
"learning_rate": 3.44301514991066e-05,
|
|
"loss": 0.2146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23231425881385803,
|
|
"step": 1395,
|
|
"valid_targets_mean": 3088.8,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 2.2435897435897436,
|
|
"grad_norm": 0.5180228358093808,
|
|
"learning_rate": 3.4374700411527225e-05,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22594726085662842,
|
|
"step": 1400,
|
|
"valid_targets_mean": 3432.4,
|
|
"valid_targets_min": 1748
|
|
},
|
|
{
|
|
"epoch": 2.251602564102564,
|
|
"grad_norm": 0.5488375513914536,
|
|
"learning_rate": 3.431901979794653e-05,
|
|
"loss": 0.2476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24900490045547485,
|
|
"step": 1405,
|
|
"valid_targets_mean": 3217.9,
|
|
"valid_targets_min": 1691
|
|
},
|
|
{
|
|
"epoch": 2.2596153846153846,
|
|
"grad_norm": 0.505291776579264,
|
|
"learning_rate": 3.426311054743685e-05,
|
|
"loss": 0.2405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23657619953155518,
|
|
"step": 1410,
|
|
"valid_targets_mean": 3181.9,
|
|
"valid_targets_min": 420
|
|
},
|
|
{
|
|
"epoch": 2.2676282051282053,
|
|
"grad_norm": 0.5373306561158637,
|
|
"learning_rate": 3.420697355272127e-05,
|
|
"loss": 0.222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20928433537483215,
|
|
"step": 1415,
|
|
"valid_targets_mean": 3262.9,
|
|
"valid_targets_min": 408
|
|
},
|
|
{
|
|
"epoch": 2.2756410256410255,
|
|
"grad_norm": 0.43766174495312943,
|
|
"learning_rate": 3.415060971015933e-05,
|
|
"loss": 0.2297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22512772679328918,
|
|
"step": 1420,
|
|
"valid_targets_mean": 3874.0,
|
|
"valid_targets_min": 2642
|
|
},
|
|
{
|
|
"epoch": 2.2836538461538463,
|
|
"grad_norm": 0.5536725881247396,
|
|
"learning_rate": 3.4094019919732736e-05,
|
|
"loss": 0.2472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2986356019973755,
|
|
"step": 1425,
|
|
"valid_targets_mean": 3210.8,
|
|
"valid_targets_min": 1565
|
|
},
|
|
{
|
|
"epoch": 2.2916666666666665,
|
|
"grad_norm": 0.4934531732234053,
|
|
"learning_rate": 3.403720508503098e-05,
|
|
"loss": 0.2235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2186366319656372,
|
|
"step": 1430,
|
|
"valid_targets_mean": 3384.0,
|
|
"valid_targets_min": 406
|
|
},
|
|
{
|
|
"epoch": 2.2996794871794872,
|
|
"grad_norm": 0.5009294955215486,
|
|
"learning_rate": 3.398016611323693e-05,
|
|
"loss": 0.2224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21268531680107117,
|
|
"step": 1435,
|
|
"valid_targets_mean": 3506.9,
|
|
"valid_targets_min": 1143
|
|
},
|
|
{
|
|
"epoch": 2.3076923076923075,
|
|
"grad_norm": 0.48874671172712036,
|
|
"learning_rate": 3.392290391511232e-05,
|
|
"loss": 0.2137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21587032079696655,
|
|
"step": 1440,
|
|
"valid_targets_mean": 3306.5,
|
|
"valid_targets_min": 2000
|
|
},
|
|
{
|
|
"epoch": 2.315705128205128,
|
|
"grad_norm": 0.5103195501063624,
|
|
"learning_rate": 3.386541940498322e-05,
|
|
"loss": 0.2263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2232041209936142,
|
|
"step": 1445,
|
|
"valid_targets_mean": 3811.5,
|
|
"valid_targets_min": 917
|
|
},
|
|
{
|
|
"epoch": 2.323717948717949,
|
|
"grad_norm": 0.4970664229701647,
|
|
"learning_rate": 3.380771350072543e-05,
|
|
"loss": 0.236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23805755376815796,
|
|
"step": 1450,
|
|
"valid_targets_mean": 3688.0,
|
|
"valid_targets_min": 1925
|
|
},
|
|
{
|
|
"epoch": 2.331730769230769,
|
|
"grad_norm": 0.4510517770993502,
|
|
"learning_rate": 3.374978712374986e-05,
|
|
"loss": 0.23,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21762406826019287,
|
|
"step": 1455,
|
|
"valid_targets_mean": 4083.8,
|
|
"valid_targets_min": 2197
|
|
},
|
|
{
|
|
"epoch": 2.33974358974359,
|
|
"grad_norm": 0.4381525573648137,
|
|
"learning_rate": 3.369164119898774e-05,
|
|
"loss": 0.2375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1905115395784378,
|
|
"step": 1460,
|
|
"valid_targets_mean": 3869.7,
|
|
"valid_targets_min": 1743
|
|
},
|
|
{
|
|
"epoch": 2.34775641025641,
|
|
"grad_norm": 0.4764400553773267,
|
|
"learning_rate": 3.363327665487593e-05,
|
|
"loss": 0.2547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23225094377994537,
|
|
"step": 1465,
|
|
"valid_targets_mean": 3675.2,
|
|
"valid_targets_min": 1755
|
|
},
|
|
{
|
|
"epoch": 2.355769230769231,
|
|
"grad_norm": 0.4939814659194403,
|
|
"learning_rate": 3.357469442334206e-05,
|
|
"loss": 0.241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26246967911720276,
|
|
"step": 1470,
|
|
"valid_targets_mean": 3462.3,
|
|
"valid_targets_min": 576
|
|
},
|
|
{
|
|
"epoch": 2.363782051282051,
|
|
"grad_norm": 0.5401786052759295,
|
|
"learning_rate": 3.351589543978965e-05,
|
|
"loss": 0.2221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23349204659461975,
|
|
"step": 1475,
|
|
"valid_targets_mean": 2899.4,
|
|
"valid_targets_min": 361
|
|
},
|
|
{
|
|
"epoch": 2.371794871794872,
|
|
"grad_norm": 0.5472139904225963,
|
|
"learning_rate": 3.345688064308317e-05,
|
|
"loss": 0.2216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2089230865240097,
|
|
"step": 1480,
|
|
"valid_targets_mean": 3110.4,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 2.3798076923076925,
|
|
"grad_norm": 0.458331587340864,
|
|
"learning_rate": 3.339765097553307e-05,
|
|
"loss": 0.2369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21575820446014404,
|
|
"step": 1485,
|
|
"valid_targets_mean": 3449.1,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 2.3878205128205128,
|
|
"grad_norm": 0.49455068163325516,
|
|
"learning_rate": 3.33382073828807e-05,
|
|
"loss": 0.25,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24723072350025177,
|
|
"step": 1490,
|
|
"valid_targets_mean": 3645.0,
|
|
"valid_targets_min": 446
|
|
},
|
|
{
|
|
"epoch": 2.3958333333333335,
|
|
"grad_norm": 0.4386780332732727,
|
|
"learning_rate": 3.327855081428326e-05,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22455403208732605,
|
|
"step": 1495,
|
|
"valid_targets_mean": 4253.0,
|
|
"valid_targets_min": 2122
|
|
},
|
|
{
|
|
"epoch": 2.4038461538461537,
|
|
"grad_norm": 0.5595129873547966,
|
|
"learning_rate": 3.3218682222298584e-05,
|
|
"loss": 0.2376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26848047971725464,
|
|
"step": 1500,
|
|
"valid_targets_mean": 3699.6,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 2.4118589743589745,
|
|
"grad_norm": 0.5203649586160911,
|
|
"learning_rate": 3.315860256286996e-05,
|
|
"loss": 0.2121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2161019742488861,
|
|
"step": 1505,
|
|
"valid_targets_mean": 2969.0,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 2.4198717948717947,
|
|
"grad_norm": 0.48238723093848074,
|
|
"learning_rate": 3.3098312795310894e-05,
|
|
"loss": 0.2217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20345598459243774,
|
|
"step": 1510,
|
|
"valid_targets_mean": 3430.6,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 2.4278846153846154,
|
|
"grad_norm": 0.6327412024094834,
|
|
"learning_rate": 3.303781388228974e-05,
|
|
"loss": 0.222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22221553325653076,
|
|
"step": 1515,
|
|
"valid_targets_mean": 2144.2,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 2.435897435897436,
|
|
"grad_norm": 0.6837869827215011,
|
|
"learning_rate": 3.297710678981435e-05,
|
|
"loss": 0.2246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1765064001083374,
|
|
"step": 1520,
|
|
"valid_targets_mean": 4076.7,
|
|
"valid_targets_min": 2191
|
|
},
|
|
{
|
|
"epoch": 2.4439102564102564,
|
|
"grad_norm": 0.4872367895970891,
|
|
"learning_rate": 3.291619248721667e-05,
|
|
"loss": 0.2337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2066897302865982,
|
|
"step": 1525,
|
|
"valid_targets_mean": 3390.9,
|
|
"valid_targets_min": 1469
|
|
},
|
|
{
|
|
"epoch": 2.451923076923077,
|
|
"grad_norm": 0.4839123688592673,
|
|
"learning_rate": 3.285507194713724e-05,
|
|
"loss": 0.2202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22590945661067963,
|
|
"step": 1530,
|
|
"valid_targets_mean": 3387.2,
|
|
"valid_targets_min": 2066
|
|
},
|
|
{
|
|
"epoch": 2.4599358974358974,
|
|
"grad_norm": 0.5310641416737413,
|
|
"learning_rate": 3.279374614550966e-05,
|
|
"loss": 0.2291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22865507006645203,
|
|
"step": 1535,
|
|
"valid_targets_mean": 3314.9,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 2.467948717948718,
|
|
"grad_norm": 0.5984703425117173,
|
|
"learning_rate": 3.2732216061545e-05,
|
|
"loss": 0.2197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21253696084022522,
|
|
"step": 1540,
|
|
"valid_targets_mean": 2831.1,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 2.4759615384615383,
|
|
"grad_norm": 0.5213539179148546,
|
|
"learning_rate": 3.2670482677716214e-05,
|
|
"loss": 0.2369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26152336597442627,
|
|
"step": 1545,
|
|
"valid_targets_mean": 3748.8,
|
|
"valid_targets_min": 2097
|
|
},
|
|
{
|
|
"epoch": 2.483974358974359,
|
|
"grad_norm": 0.5596224583501783,
|
|
"learning_rate": 3.2608546979742394e-05,
|
|
"loss": 0.2292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24703338742256165,
|
|
"step": 1550,
|
|
"valid_targets_mean": 3326.6,
|
|
"valid_targets_min": 1712
|
|
},
|
|
{
|
|
"epoch": 2.4919871794871793,
|
|
"grad_norm": 0.5054522907981005,
|
|
"learning_rate": 3.254640995657307e-05,
|
|
"loss": 0.2312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18621867895126343,
|
|
"step": 1555,
|
|
"valid_targets_mean": 2977.2,
|
|
"valid_targets_min": 576
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"grad_norm": 0.5207121321246797,
|
|
"learning_rate": 3.248407260037239e-05,
|
|
"loss": 0.2126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2505119740962982,
|
|
"step": 1560,
|
|
"valid_targets_mean": 3519.9,
|
|
"valid_targets_min": 2315
|
|
},
|
|
{
|
|
"epoch": 2.5080128205128203,
|
|
"grad_norm": 0.5397392067361689,
|
|
"learning_rate": 3.24215359065033e-05,
|
|
"loss": 0.2315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20214936137199402,
|
|
"step": 1565,
|
|
"valid_targets_mean": 3165.1,
|
|
"valid_targets_min": 1968
|
|
},
|
|
{
|
|
"epoch": 2.516025641025641,
|
|
"grad_norm": 0.5486644133797549,
|
|
"learning_rate": 3.235880087351164e-05,
|
|
"loss": 0.2191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2726089358329773,
|
|
"step": 1570,
|
|
"valid_targets_mean": 3464.1,
|
|
"valid_targets_min": 1756
|
|
},
|
|
{
|
|
"epoch": 2.5240384615384617,
|
|
"grad_norm": 0.554056515656035,
|
|
"learning_rate": 3.2295868503110184e-05,
|
|
"loss": 0.2275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18808777630329132,
|
|
"step": 1575,
|
|
"valid_targets_mean": 4925.1,
|
|
"valid_targets_min": 1712
|
|
},
|
|
{
|
|
"epoch": 2.532051282051282,
|
|
"grad_norm": 0.5753807285501622,
|
|
"learning_rate": 3.22327398001627e-05,
|
|
"loss": 0.2258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23577101528644562,
|
|
"step": 1580,
|
|
"valid_targets_mean": 3408.1,
|
|
"valid_targets_min": 1686
|
|
},
|
|
{
|
|
"epoch": 2.5400641025641026,
|
|
"grad_norm": 0.47516390006880044,
|
|
"learning_rate": 3.216941577266783e-05,
|
|
"loss": 0.2541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19967889785766602,
|
|
"step": 1585,
|
|
"valid_targets_mean": 3382.3,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 2.5480769230769234,
|
|
"grad_norm": 0.4871279952298379,
|
|
"learning_rate": 3.210589743174308e-05,
|
|
"loss": 0.2129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20786455273628235,
|
|
"step": 1590,
|
|
"valid_targets_mean": 3622.5,
|
|
"valid_targets_min": 1935
|
|
},
|
|
{
|
|
"epoch": 2.5560897435897436,
|
|
"grad_norm": 0.5420223468811577,
|
|
"learning_rate": 3.204218579160857e-05,
|
|
"loss": 0.2062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18591973185539246,
|
|
"step": 1595,
|
|
"valid_targets_mean": 3835.8,
|
|
"valid_targets_min": 300
|
|
},
|
|
{
|
|
"epoch": 2.564102564102564,
|
|
"grad_norm": 0.4941582897081752,
|
|
"learning_rate": 3.197828186957094e-05,
|
|
"loss": 0.2051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23235294222831726,
|
|
"step": 1600,
|
|
"valid_targets_mean": 3895.9,
|
|
"valid_targets_min": 1975
|
|
},
|
|
{
|
|
"epoch": 2.5721153846153846,
|
|
"grad_norm": 0.5759213015140178,
|
|
"learning_rate": 3.191418668600705e-05,
|
|
"loss": 0.2166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27510184049606323,
|
|
"step": 1605,
|
|
"valid_targets_mean": 3370.2,
|
|
"valid_targets_min": 1699
|
|
},
|
|
{
|
|
"epoch": 2.5801282051282053,
|
|
"grad_norm": 0.4916915979617188,
|
|
"learning_rate": 3.184990126434771e-05,
|
|
"loss": 0.2317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2400546669960022,
|
|
"step": 1610,
|
|
"valid_targets_mean": 3853.6,
|
|
"valid_targets_min": 346
|
|
},
|
|
{
|
|
"epoch": 2.5881410256410255,
|
|
"grad_norm": 0.5134223213888042,
|
|
"learning_rate": 3.178542663106131e-05,
|
|
"loss": 0.2449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25739049911499023,
|
|
"step": 1615,
|
|
"valid_targets_mean": 3560.9,
|
|
"valid_targets_min": 2072
|
|
},
|
|
{
|
|
"epoch": 2.5961538461538463,
|
|
"grad_norm": 0.4647485387209341,
|
|
"learning_rate": 3.172076381563748e-05,
|
|
"loss": 0.225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2345583736896515,
|
|
"step": 1620,
|
|
"valid_targets_mean": 4102.4,
|
|
"valid_targets_min": 1909
|
|
},
|
|
{
|
|
"epoch": 2.6041666666666665,
|
|
"grad_norm": 0.4613965020941828,
|
|
"learning_rate": 3.165591385057058e-05,
|
|
"loss": 0.199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18396924436092377,
|
|
"step": 1625,
|
|
"valid_targets_mean": 3841.2,
|
|
"valid_targets_min": 1765
|
|
},
|
|
{
|
|
"epoch": 2.6121794871794872,
|
|
"grad_norm": 0.5606790940088371,
|
|
"learning_rate": 3.1590877771343316e-05,
|
|
"loss": 0.2211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2248254120349884,
|
|
"step": 1630,
|
|
"valid_targets_mean": 3164.4,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 2.6201923076923075,
|
|
"grad_norm": 0.5628286317328028,
|
|
"learning_rate": 3.152565661641008e-05,
|
|
"loss": 0.245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2901548445224762,
|
|
"step": 1635,
|
|
"valid_targets_mean": 3357.9,
|
|
"valid_targets_min": 2079
|
|
},
|
|
{
|
|
"epoch": 2.628205128205128,
|
|
"grad_norm": 0.4835026603266905,
|
|
"learning_rate": 3.1460251427180474e-05,
|
|
"loss": 0.2333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21858114004135132,
|
|
"step": 1640,
|
|
"valid_targets_mean": 3778.3,
|
|
"valid_targets_min": 302
|
|
},
|
|
{
|
|
"epoch": 2.636217948717949,
|
|
"grad_norm": 0.44764340943402625,
|
|
"learning_rate": 3.139466324800263e-05,
|
|
"loss": 0.2205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19746120274066925,
|
|
"step": 1645,
|
|
"valid_targets_mean": 3783.5,
|
|
"valid_targets_min": 361
|
|
},
|
|
{
|
|
"epoch": 2.644230769230769,
|
|
"grad_norm": 0.4756491019431985,
|
|
"learning_rate": 3.132889312614655e-05,
|
|
"loss": 0.2274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2135154902935028,
|
|
"step": 1650,
|
|
"valid_targets_mean": 4373.3,
|
|
"valid_targets_min": 2480
|
|
},
|
|
{
|
|
"epoch": 2.65224358974359,
|
|
"grad_norm": 0.499792970874057,
|
|
"learning_rate": 3.126294211178737e-05,
|
|
"loss": 0.2076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21535325050354004,
|
|
"step": 1655,
|
|
"valid_targets_mean": 3278.2,
|
|
"valid_targets_min": 2029
|
|
},
|
|
{
|
|
"epoch": 2.66025641025641,
|
|
"grad_norm": 0.509858262662174,
|
|
"learning_rate": 3.1196811257988634e-05,
|
|
"loss": 0.2303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24099260568618774,
|
|
"step": 1660,
|
|
"valid_targets_mean": 3947.8,
|
|
"valid_targets_min": 2214
|
|
},
|
|
{
|
|
"epoch": 2.668269230769231,
|
|
"grad_norm": 0.49425844374690214,
|
|
"learning_rate": 3.1130501620685394e-05,
|
|
"loss": 0.2134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2178635597229004,
|
|
"step": 1665,
|
|
"valid_targets_mean": 3340.4,
|
|
"valid_targets_min": 1623
|
|
},
|
|
{
|
|
"epoch": 2.676282051282051,
|
|
"grad_norm": 0.5160738309260722,
|
|
"learning_rate": 3.106401425866745e-05,
|
|
"loss": 0.2197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2119971215724945,
|
|
"step": 1670,
|
|
"valid_targets_mean": 3309.1,
|
|
"valid_targets_min": 388
|
|
},
|
|
{
|
|
"epoch": 2.684294871794872,
|
|
"grad_norm": 0.4827614168487263,
|
|
"learning_rate": 3.099735023356236e-05,
|
|
"loss": 0.2228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2164524495601654,
|
|
"step": 1675,
|
|
"valid_targets_mean": 3530.8,
|
|
"valid_targets_min": 302
|
|
},
|
|
{
|
|
"epoch": 2.6923076923076925,
|
|
"grad_norm": 0.5031362133138242,
|
|
"learning_rate": 3.0930510609818564e-05,
|
|
"loss": 0.2302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2503780126571655,
|
|
"step": 1680,
|
|
"valid_targets_mean": 3499.4,
|
|
"valid_targets_min": 1833
|
|
},
|
|
{
|
|
"epoch": 2.7003205128205128,
|
|
"grad_norm": 0.4600421225293802,
|
|
"learning_rate": 3.086349645468831e-05,
|
|
"loss": 0.2189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20820432901382446,
|
|
"step": 1685,
|
|
"valid_targets_mean": 3730.4,
|
|
"valid_targets_min": 1619
|
|
},
|
|
{
|
|
"epoch": 2.7083333333333335,
|
|
"grad_norm": 0.4879718110605468,
|
|
"learning_rate": 3.079630883821067e-05,
|
|
"loss": 0.2129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23680952191352844,
|
|
"step": 1690,
|
|
"valid_targets_mean": 3882.0,
|
|
"valid_targets_min": 2360
|
|
},
|
|
{
|
|
"epoch": 2.7163461538461537,
|
|
"grad_norm": 0.43727736757613994,
|
|
"learning_rate": 3.0728948833194436e-05,
|
|
"loss": 0.2174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17322327196598053,
|
|
"step": 1695,
|
|
"valid_targets_mean": 3951.0,
|
|
"valid_targets_min": 2267
|
|
},
|
|
{
|
|
"epoch": 2.7243589743589745,
|
|
"grad_norm": 0.4879962165083738,
|
|
"learning_rate": 3.066141751520099e-05,
|
|
"loss": 0.2221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19267430901527405,
|
|
"step": 1700,
|
|
"valid_targets_mean": 3747.5,
|
|
"valid_targets_min": 1944
|
|
},
|
|
{
|
|
"epoch": 2.7323717948717947,
|
|
"grad_norm": 0.457795201986618,
|
|
"learning_rate": 3.059371596252712e-05,
|
|
"loss": 0.225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18401110172271729,
|
|
"step": 1705,
|
|
"valid_targets_mean": 3678.1,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 2.7403846153846154,
|
|
"grad_norm": 0.5151213135732414,
|
|
"learning_rate": 3.0525845256187834e-05,
|
|
"loss": 0.2267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2076324224472046,
|
|
"step": 1710,
|
|
"valid_targets_mean": 3722.7,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 2.748397435897436,
|
|
"grad_norm": 0.5497157184393276,
|
|
"learning_rate": 3.0457806479899044e-05,
|
|
"loss": 0.2258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23892144858837128,
|
|
"step": 1715,
|
|
"valid_targets_mean": 3113.8,
|
|
"valid_targets_min": 1779
|
|
},
|
|
{
|
|
"epoch": 2.7564102564102564,
|
|
"grad_norm": 0.4177740808857472,
|
|
"learning_rate": 3.0389600720060318e-05,
|
|
"loss": 0.2151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19988121092319489,
|
|
"step": 1720,
|
|
"valid_targets_mean": 4454.9,
|
|
"valid_targets_min": 2529
|
|
},
|
|
{
|
|
"epoch": 2.7644230769230766,
|
|
"grad_norm": 0.40429971426500544,
|
|
"learning_rate": 3.0321229065737522e-05,
|
|
"loss": 0.2071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1552007794380188,
|
|
"step": 1725,
|
|
"valid_targets_mean": 3879.1,
|
|
"valid_targets_min": 2008
|
|
},
|
|
{
|
|
"epoch": 2.7724358974358974,
|
|
"grad_norm": 0.5259528054083097,
|
|
"learning_rate": 3.0252692608645384e-05,
|
|
"loss": 0.2322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2267281860113144,
|
|
"step": 1730,
|
|
"valid_targets_mean": 3345.8,
|
|
"valid_targets_min": 2071
|
|
},
|
|
{
|
|
"epoch": 2.780448717948718,
|
|
"grad_norm": 0.5358999746656999,
|
|
"learning_rate": 3.0183992443130127e-05,
|
|
"loss": 0.2302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26404300332069397,
|
|
"step": 1735,
|
|
"valid_targets_mean": 3579.4,
|
|
"valid_targets_min": 1938
|
|
},
|
|
{
|
|
"epoch": 2.7884615384615383,
|
|
"grad_norm": 0.511982571691961,
|
|
"learning_rate": 3.011512966615195e-05,
|
|
"loss": 0.2274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23434904217720032,
|
|
"step": 1740,
|
|
"valid_targets_mean": 3328.4,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 2.796474358974359,
|
|
"grad_norm": 0.48335543579817647,
|
|
"learning_rate": 3.0046105377267523e-05,
|
|
"loss": 0.223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1833260953426361,
|
|
"step": 1745,
|
|
"valid_targets_mean": 3190.2,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 2.8044871794871797,
|
|
"grad_norm": 0.43039906371305947,
|
|
"learning_rate": 2.9976920678612456e-05,
|
|
"loss": 0.2211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1997445970773697,
|
|
"step": 1750,
|
|
"valid_targets_mean": 4131.9,
|
|
"valid_targets_min": 1676
|
|
},
|
|
{
|
|
"epoch": 2.8125,
|
|
"grad_norm": 0.4933277471312345,
|
|
"learning_rate": 2.9907576674883664e-05,
|
|
"loss": 0.2437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23295000195503235,
|
|
"step": 1755,
|
|
"valid_targets_mean": 3472.1,
|
|
"valid_targets_min": 1975
|
|
},
|
|
{
|
|
"epoch": 2.8205128205128203,
|
|
"grad_norm": 0.5681686885351188,
|
|
"learning_rate": 2.983807447332174e-05,
|
|
"loss": 0.2174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20448961853981018,
|
|
"step": 1760,
|
|
"valid_targets_mean": 2844.5,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 2.828525641025641,
|
|
"grad_norm": 0.5668449658402579,
|
|
"learning_rate": 2.9768415183693293e-05,
|
|
"loss": 0.2335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24366453289985657,
|
|
"step": 1765,
|
|
"valid_targets_mean": 2975.3,
|
|
"valid_targets_min": 322
|
|
},
|
|
{
|
|
"epoch": 2.8365384615384617,
|
|
"grad_norm": 0.5226268711251213,
|
|
"learning_rate": 2.9698599918273197e-05,
|
|
"loss": 0.2155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19929251074790955,
|
|
"step": 1770,
|
|
"valid_targets_mean": 3103.5,
|
|
"valid_targets_min": 1884
|
|
},
|
|
{
|
|
"epoch": 2.844551282051282,
|
|
"grad_norm": 0.5302151675543736,
|
|
"learning_rate": 2.962862979182686e-05,
|
|
"loss": 0.2358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21836702525615692,
|
|
"step": 1775,
|
|
"valid_targets_mean": 3503.3,
|
|
"valid_targets_min": 2282
|
|
},
|
|
{
|
|
"epoch": 2.8525641025641026,
|
|
"grad_norm": 0.5027916097789561,
|
|
"learning_rate": 2.95585059215924e-05,
|
|
"loss": 0.218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17366638779640198,
|
|
"step": 1780,
|
|
"valid_targets_mean": 3490.8,
|
|
"valid_targets_min": 1933
|
|
},
|
|
{
|
|
"epoch": 2.8605769230769234,
|
|
"grad_norm": 0.4601505846710778,
|
|
"learning_rate": 2.948822942726284e-05,
|
|
"loss": 0.2163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18435326218605042,
|
|
"step": 1785,
|
|
"valid_targets_mean": 4212.1,
|
|
"valid_targets_min": 1640
|
|
},
|
|
{
|
|
"epoch": 2.8685897435897436,
|
|
"grad_norm": 0.5293089677478494,
|
|
"learning_rate": 2.941780143096817e-05,
|
|
"loss": 0.2359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23952870070934296,
|
|
"step": 1790,
|
|
"valid_targets_mean": 3439.8,
|
|
"valid_targets_min": 1702
|
|
},
|
|
{
|
|
"epoch": 2.876602564102564,
|
|
"grad_norm": 0.5897045687575828,
|
|
"learning_rate": 2.9347223057257505e-05,
|
|
"loss": 0.2475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2963252067565918,
|
|
"step": 1795,
|
|
"valid_targets_mean": 3266.6,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 2.8846153846153846,
|
|
"grad_norm": 0.48554553955268526,
|
|
"learning_rate": 2.927649543308106e-05,
|
|
"loss": 0.2004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1865456998348236,
|
|
"step": 1800,
|
|
"valid_targets_mean": 3357.2,
|
|
"valid_targets_min": 501
|
|
},
|
|
{
|
|
"epoch": 2.8926282051282053,
|
|
"grad_norm": 0.4898939438514679,
|
|
"learning_rate": 2.9205619687772212e-05,
|
|
"loss": 0.2122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19000311195850372,
|
|
"step": 1805,
|
|
"valid_targets_mean": 3306.9,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 2.9006410256410255,
|
|
"grad_norm": 0.44510432099453506,
|
|
"learning_rate": 2.9134596953029413e-05,
|
|
"loss": 0.2171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19600239396095276,
|
|
"step": 1810,
|
|
"valid_targets_mean": 3557.1,
|
|
"valid_targets_min": 1282
|
|
},
|
|
{
|
|
"epoch": 2.9086538461538463,
|
|
"grad_norm": 0.5066463304119703,
|
|
"learning_rate": 2.9063428362898168e-05,
|
|
"loss": 0.2289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23164188861846924,
|
|
"step": 1815,
|
|
"valid_targets_mean": 3555.6,
|
|
"valid_targets_min": 2122
|
|
},
|
|
{
|
|
"epoch": 2.9166666666666665,
|
|
"grad_norm": 0.5756911724515725,
|
|
"learning_rate": 2.8992115053752905e-05,
|
|
"loss": 0.2197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2190612107515335,
|
|
"step": 1820,
|
|
"valid_targets_mean": 2842.9,
|
|
"valid_targets_min": 418
|
|
},
|
|
{
|
|
"epoch": 2.9246794871794872,
|
|
"grad_norm": 0.5255305070731653,
|
|
"learning_rate": 2.8920658164278816e-05,
|
|
"loss": 0.2115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22007790207862854,
|
|
"step": 1825,
|
|
"valid_targets_mean": 3013.8,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 2.9326923076923075,
|
|
"grad_norm": 0.4959332729505306,
|
|
"learning_rate": 2.884905883545373e-05,
|
|
"loss": 0.2317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2223539650440216,
|
|
"step": 1830,
|
|
"valid_targets_mean": 3726.8,
|
|
"valid_targets_min": 2423
|
|
},
|
|
{
|
|
"epoch": 2.940705128205128,
|
|
"grad_norm": 0.4602219302613781,
|
|
"learning_rate": 2.877731821052981e-05,
|
|
"loss": 0.2108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20421993732452393,
|
|
"step": 1835,
|
|
"valid_targets_mean": 3851.6,
|
|
"valid_targets_min": 777
|
|
},
|
|
{
|
|
"epoch": 2.948717948717949,
|
|
"grad_norm": 0.5758106780745842,
|
|
"learning_rate": 2.8705437435015375e-05,
|
|
"loss": 0.2275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22423945367336273,
|
|
"step": 1840,
|
|
"valid_targets_mean": 2997.2,
|
|
"valid_targets_min": 309
|
|
},
|
|
{
|
|
"epoch": 2.956730769230769,
|
|
"grad_norm": 0.5555841227697801,
|
|
"learning_rate": 2.8633417656656566e-05,
|
|
"loss": 0.2108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24468722939491272,
|
|
"step": 1845,
|
|
"valid_targets_mean": 2970.1,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 2.96474358974359,
|
|
"grad_norm": 0.5519550879359697,
|
|
"learning_rate": 2.8561260025419036e-05,
|
|
"loss": 0.2056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2539096474647522,
|
|
"step": 1850,
|
|
"valid_targets_mean": 3302.1,
|
|
"valid_targets_min": 1767
|
|
},
|
|
{
|
|
"epoch": 2.97275641025641,
|
|
"grad_norm": 0.5521763069467881,
|
|
"learning_rate": 2.8488965693469583e-05,
|
|
"loss": 0.2346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20984898507595062,
|
|
"step": 1855,
|
|
"valid_targets_mean": 3273.4,
|
|
"valid_targets_min": 1086
|
|
},
|
|
{
|
|
"epoch": 2.980769230769231,
|
|
"grad_norm": 0.3863036704502182,
|
|
"learning_rate": 2.8416535815157763e-05,
|
|
"loss": 0.2223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.183609738945961,
|
|
"step": 1860,
|
|
"valid_targets_mean": 4792.1,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 2.988782051282051,
|
|
"grad_norm": 0.5142968904085405,
|
|
"learning_rate": 2.8343971546997434e-05,
|
|
"loss": 0.2147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2554502487182617,
|
|
"step": 1865,
|
|
"valid_targets_mean": 4478.8,
|
|
"valid_targets_min": 2145
|
|
},
|
|
{
|
|
"epoch": 2.996794871794872,
|
|
"grad_norm": 0.4895495909278828,
|
|
"learning_rate": 2.827127404764831e-05,
|
|
"loss": 0.2082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1824975162744522,
|
|
"step": 1870,
|
|
"valid_targets_mean": 3639.9,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 3.0048076923076925,
|
|
"grad_norm": 0.5077644475380257,
|
|
"learning_rate": 2.8198444477897467e-05,
|
|
"loss": 0.2074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20675477385520935,
|
|
"step": 1875,
|
|
"valid_targets_mean": 3273.4,
|
|
"valid_targets_min": 834
|
|
},
|
|
{
|
|
"epoch": 3.0128205128205128,
|
|
"grad_norm": 0.7231231356441381,
|
|
"learning_rate": 2.8125484000640787e-05,
|
|
"loss": 0.2175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21694618463516235,
|
|
"step": 1880,
|
|
"valid_targets_mean": 2460.2,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 3.0208333333333335,
|
|
"grad_norm": 0.5418323064797319,
|
|
"learning_rate": 2.8052393780864394e-05,
|
|
"loss": 0.2214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2313728928565979,
|
|
"step": 1885,
|
|
"valid_targets_mean": 3570.8,
|
|
"valid_targets_min": 2119
|
|
},
|
|
{
|
|
"epoch": 3.0288461538461537,
|
|
"grad_norm": 0.4734695604472562,
|
|
"learning_rate": 2.797917498562607e-05,
|
|
"loss": 0.2044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20120325684547424,
|
|
"step": 1890,
|
|
"valid_targets_mean": 3707.3,
|
|
"valid_targets_min": 1993
|
|
},
|
|
{
|
|
"epoch": 3.0368589743589745,
|
|
"grad_norm": 0.5208029845200504,
|
|
"learning_rate": 2.7905828784036596e-05,
|
|
"loss": 0.2184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20613202452659607,
|
|
"step": 1895,
|
|
"valid_targets_mean": 3343.9,
|
|
"valid_targets_min": 1936
|
|
},
|
|
{
|
|
"epoch": 3.0448717948717947,
|
|
"grad_norm": 0.5636930047627939,
|
|
"learning_rate": 2.78323563472411e-05,
|
|
"loss": 0.2206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2063179761171341,
|
|
"step": 1900,
|
|
"valid_targets_mean": 3101.1,
|
|
"valid_targets_min": 390
|
|
},
|
|
{
|
|
"epoch": 3.0528846153846154,
|
|
"grad_norm": 0.47028704072176375,
|
|
"learning_rate": 2.7758758848400354e-05,
|
|
"loss": 0.2141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2136302888393402,
|
|
"step": 1905,
|
|
"valid_targets_mean": 4080.3,
|
|
"valid_targets_min": 1671
|
|
},
|
|
{
|
|
"epoch": 3.0608974358974357,
|
|
"grad_norm": 0.6406329634245468,
|
|
"learning_rate": 2.7685037462672043e-05,
|
|
"loss": 0.2067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20517919957637787,
|
|
"step": 1910,
|
|
"valid_targets_mean": 2988.4,
|
|
"valid_targets_min": 340
|
|
},
|
|
{
|
|
"epoch": 3.0689102564102564,
|
|
"grad_norm": 0.5208968741487041,
|
|
"learning_rate": 2.7611193367191993e-05,
|
|
"loss": 0.216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2274111807346344,
|
|
"step": 1915,
|
|
"valid_targets_mean": 3925.6,
|
|
"valid_targets_min": 1472
|
|
},
|
|
{
|
|
"epoch": 3.076923076923077,
|
|
"grad_norm": 0.6213844783621272,
|
|
"learning_rate": 2.7537227741055378e-05,
|
|
"loss": 0.2291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29447296261787415,
|
|
"step": 1920,
|
|
"valid_targets_mean": 3099.1,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 3.0849358974358974,
|
|
"grad_norm": 0.6223062142576921,
|
|
"learning_rate": 2.746314176529791e-05,
|
|
"loss": 0.2195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2739785313606262,
|
|
"step": 1925,
|
|
"valid_targets_mean": 2990.8,
|
|
"valid_targets_min": 1535
|
|
},
|
|
{
|
|
"epoch": 3.092948717948718,
|
|
"grad_norm": 0.5068820414313022,
|
|
"learning_rate": 2.7388936622876957e-05,
|
|
"loss": 0.2228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2140953093767166,
|
|
"step": 1930,
|
|
"valid_targets_mean": 3561.2,
|
|
"valid_targets_min": 1882
|
|
},
|
|
{
|
|
"epoch": 3.1009615384615383,
|
|
"grad_norm": 0.6073432596277979,
|
|
"learning_rate": 2.7314613498652663e-05,
|
|
"loss": 0.2248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23374855518341064,
|
|
"step": 1935,
|
|
"valid_targets_mean": 3133.8,
|
|
"valid_targets_min": 1705
|
|
},
|
|
{
|
|
"epoch": 3.108974358974359,
|
|
"grad_norm": 0.5401937808022614,
|
|
"learning_rate": 2.7240173579369025e-05,
|
|
"loss": 0.2127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21288909018039703,
|
|
"step": 1940,
|
|
"valid_targets_mean": 3231.6,
|
|
"valid_targets_min": 383
|
|
},
|
|
{
|
|
"epoch": 3.1169871794871793,
|
|
"grad_norm": 0.5574637188150374,
|
|
"learning_rate": 2.7165618053634962e-05,
|
|
"loss": 0.2236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27656134963035583,
|
|
"step": 1945,
|
|
"valid_targets_mean": 3314.0,
|
|
"valid_targets_min": 2044
|
|
},
|
|
{
|
|
"epoch": 3.125,
|
|
"grad_norm": 0.49173386570489425,
|
|
"learning_rate": 2.7090948111905304e-05,
|
|
"loss": 0.2125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18874695897102356,
|
|
"step": 1950,
|
|
"valid_targets_mean": 3627.7,
|
|
"valid_targets_min": 2175
|
|
},
|
|
{
|
|
"epoch": 3.1330128205128207,
|
|
"grad_norm": 0.6238382719891059,
|
|
"learning_rate": 2.701616494646183e-05,
|
|
"loss": 0.2037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20941844582557678,
|
|
"step": 1955,
|
|
"valid_targets_mean": 3470.2,
|
|
"valid_targets_min": 2062
|
|
},
|
|
{
|
|
"epoch": 3.141025641025641,
|
|
"grad_norm": 0.487803607393044,
|
|
"learning_rate": 2.6941269751394174e-05,
|
|
"loss": 0.1959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16428649425506592,
|
|
"step": 1960,
|
|
"valid_targets_mean": 3103.4,
|
|
"valid_targets_min": 265
|
|
},
|
|
{
|
|
"epoch": 3.1490384615384617,
|
|
"grad_norm": 0.6360432454399796,
|
|
"learning_rate": 2.686626372258081e-05,
|
|
"loss": 0.2169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2702599763870239,
|
|
"step": 1965,
|
|
"valid_targets_mean": 2839.3,
|
|
"valid_targets_min": 1024
|
|
},
|
|
{
|
|
"epoch": 3.157051282051282,
|
|
"grad_norm": 0.49950293386656874,
|
|
"learning_rate": 2.6791148057669913e-05,
|
|
"loss": 0.2133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20391499996185303,
|
|
"step": 1970,
|
|
"valid_targets_mean": 3623.2,
|
|
"valid_targets_min": 1986
|
|
},
|
|
{
|
|
"epoch": 3.1650641025641026,
|
|
"grad_norm": 0.4641725143928944,
|
|
"learning_rate": 2.671592395606027e-05,
|
|
"loss": 0.2351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23568058013916016,
|
|
"step": 1975,
|
|
"valid_targets_mean": 4207.4,
|
|
"valid_targets_min": 1762
|
|
},
|
|
{
|
|
"epoch": 3.173076923076923,
|
|
"grad_norm": 0.5731658630297737,
|
|
"learning_rate": 2.6640592618882114e-05,
|
|
"loss": 0.206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18935617804527283,
|
|
"step": 1980,
|
|
"valid_targets_mean": 2550.4,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 3.1810897435897436,
|
|
"grad_norm": 0.4182322068592183,
|
|
"learning_rate": 2.656515524897795e-05,
|
|
"loss": 0.2097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1807328462600708,
|
|
"step": 1985,
|
|
"valid_targets_mean": 4199.2,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 3.189102564102564,
|
|
"grad_norm": 0.56077657394403,
|
|
"learning_rate": 2.6489613050883343e-05,
|
|
"loss": 0.2156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19504913687705994,
|
|
"step": 1990,
|
|
"valid_targets_mean": 3384.6,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 3.1971153846153846,
|
|
"grad_norm": 0.48875364275581146,
|
|
"learning_rate": 2.6413967230807677e-05,
|
|
"loss": 0.2289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24835677444934845,
|
|
"step": 1995,
|
|
"valid_targets_mean": 4170.1,
|
|
"valid_targets_min": 1950
|
|
},
|
|
{
|
|
"epoch": 3.2051282051282053,
|
|
"grad_norm": 0.465054834927291,
|
|
"learning_rate": 2.6338218996614924e-05,
|
|
"loss": 0.2191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20396825671195984,
|
|
"step": 2000,
|
|
"valid_targets_mean": 4063.9,
|
|
"valid_targets_min": 1911
|
|
},
|
|
{
|
|
"epoch": 3.2131410256410255,
|
|
"grad_norm": 0.5825104361550973,
|
|
"learning_rate": 2.6262369557804325e-05,
|
|
"loss": 0.2194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26649874448776245,
|
|
"step": 2005,
|
|
"valid_targets_mean": 3483.6,
|
|
"valid_targets_min": 1575
|
|
},
|
|
{
|
|
"epoch": 3.2211538461538463,
|
|
"grad_norm": 0.5518087060055086,
|
|
"learning_rate": 2.6186420125491094e-05,
|
|
"loss": 0.2045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21316856145858765,
|
|
"step": 2010,
|
|
"valid_targets_mean": 3461.9,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 3.2291666666666665,
|
|
"grad_norm": 0.4932050175961384,
|
|
"learning_rate": 2.6110371912387083e-05,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2211998999118805,
|
|
"step": 2015,
|
|
"valid_targets_mean": 3540.0,
|
|
"valid_targets_min": 2048
|
|
},
|
|
{
|
|
"epoch": 3.2371794871794872,
|
|
"grad_norm": 0.9755740013917427,
|
|
"learning_rate": 2.6034226132781407e-05,
|
|
"loss": 0.2155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17934134602546692,
|
|
"step": 2020,
|
|
"valid_targets_mean": 3694.6,
|
|
"valid_targets_min": 352
|
|
},
|
|
{
|
|
"epoch": 3.2451923076923075,
|
|
"grad_norm": 0.5365875996452869,
|
|
"learning_rate": 2.5957984002521066e-05,
|
|
"loss": 0.2073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2610040307044983,
|
|
"step": 2025,
|
|
"valid_targets_mean": 3428.6,
|
|
"valid_targets_min": 1713
|
|
},
|
|
{
|
|
"epoch": 3.253205128205128,
|
|
"grad_norm": 0.5319413568742882,
|
|
"learning_rate": 2.588164673899151e-05,
|
|
"loss": 0.1979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22881156206130981,
|
|
"step": 2030,
|
|
"valid_targets_mean": 3281.9,
|
|
"valid_targets_min": 2165
|
|
},
|
|
{
|
|
"epoch": 3.261217948717949,
|
|
"grad_norm": 0.4728884688960481,
|
|
"learning_rate": 2.580521556109724e-05,
|
|
"loss": 0.2176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23082220554351807,
|
|
"step": 2035,
|
|
"valid_targets_mean": 4267.6,
|
|
"valid_targets_min": 2654
|
|
},
|
|
{
|
|
"epoch": 3.269230769230769,
|
|
"grad_norm": 0.5954186304706786,
|
|
"learning_rate": 2.57286916892423e-05,
|
|
"loss": 0.2096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2065027952194214,
|
|
"step": 2040,
|
|
"valid_targets_mean": 3015.3,
|
|
"valid_targets_min": 363
|
|
},
|
|
{
|
|
"epoch": 3.27724358974359,
|
|
"grad_norm": 0.4581325903962391,
|
|
"learning_rate": 2.5652076345310822e-05,
|
|
"loss": 0.2264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1993183046579361,
|
|
"step": 2045,
|
|
"valid_targets_mean": 3884.2,
|
|
"valid_targets_min": 1911
|
|
},
|
|
{
|
|
"epoch": 3.28525641025641,
|
|
"grad_norm": 0.4978220491849944,
|
|
"learning_rate": 2.5575370752647507e-05,
|
|
"loss": 0.2124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21730056405067444,
|
|
"step": 2050,
|
|
"valid_targets_mean": 3592.5,
|
|
"valid_targets_min": 1719
|
|
},
|
|
{
|
|
"epoch": 3.293269230769231,
|
|
"grad_norm": 0.48150684905539476,
|
|
"learning_rate": 2.5498576136038077e-05,
|
|
"loss": 0.2143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2327374815940857,
|
|
"step": 2055,
|
|
"valid_targets_mean": 3775.5,
|
|
"valid_targets_min": 315
|
|
},
|
|
{
|
|
"epoch": 3.301282051282051,
|
|
"grad_norm": 0.6130312357702806,
|
|
"learning_rate": 2.542169372168976e-05,
|
|
"loss": 0.2031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21109043061733246,
|
|
"step": 2060,
|
|
"valid_targets_mean": 3150.4,
|
|
"valid_targets_min": 1877
|
|
},
|
|
{
|
|
"epoch": 3.309294871794872,
|
|
"grad_norm": 0.5526478381871899,
|
|
"learning_rate": 2.5344724737211646e-05,
|
|
"loss": 0.211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24314221739768982,
|
|
"step": 2065,
|
|
"valid_targets_mean": 3232.4,
|
|
"valid_targets_min": 1884
|
|
},
|
|
{
|
|
"epoch": 3.3173076923076925,
|
|
"grad_norm": 0.6292693413383127,
|
|
"learning_rate": 2.5267670411595152e-05,
|
|
"loss": 0.2135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24798676371574402,
|
|
"step": 2070,
|
|
"valid_targets_mean": 3371.9,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 3.3253205128205128,
|
|
"grad_norm": 0.5349155064935684,
|
|
"learning_rate": 2.5190531975194345e-05,
|
|
"loss": 0.2385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24136458337306976,
|
|
"step": 2075,
|
|
"valid_targets_mean": 3143.9,
|
|
"valid_targets_min": 1876
|
|
},
|
|
{
|
|
"epoch": 3.3333333333333335,
|
|
"grad_norm": 0.5778043108253703,
|
|
"learning_rate": 2.5113310659706322e-05,
|
|
"loss": 0.2454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.254383385181427,
|
|
"step": 2080,
|
|
"valid_targets_mean": 3132.8,
|
|
"valid_targets_min": 318
|
|
},
|
|
{
|
|
"epoch": 3.3413461538461537,
|
|
"grad_norm": 0.4561195758923805,
|
|
"learning_rate": 2.5036007698151553e-05,
|
|
"loss": 0.2096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16544273495674133,
|
|
"step": 2085,
|
|
"valid_targets_mean": 3583.4,
|
|
"valid_targets_min": 2025
|
|
},
|
|
{
|
|
"epoch": 3.3493589743589745,
|
|
"grad_norm": 0.472371073834948,
|
|
"learning_rate": 2.4958624324854185e-05,
|
|
"loss": 0.2072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1999911367893219,
|
|
"step": 2090,
|
|
"valid_targets_mean": 3940.1,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 3.3573717948717947,
|
|
"grad_norm": 0.47468781614384653,
|
|
"learning_rate": 2.4881161775422303e-05,
|
|
"loss": 0.2189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20175859332084656,
|
|
"step": 2095,
|
|
"valid_targets_mean": 3614.2,
|
|
"valid_targets_min": 1835
|
|
},
|
|
{
|
|
"epoch": 3.3653846153846154,
|
|
"grad_norm": 0.5463743266445147,
|
|
"learning_rate": 2.480362128672824e-05,
|
|
"loss": 0.2161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2244294434785843,
|
|
"step": 2100,
|
|
"valid_targets_mean": 3027.9,
|
|
"valid_targets_min": 1231
|
|
},
|
|
{
|
|
"epoch": 3.373397435897436,
|
|
"grad_norm": 0.5091167365835734,
|
|
"learning_rate": 2.4726004096888817e-05,
|
|
"loss": 0.2244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22996219992637634,
|
|
"step": 2105,
|
|
"valid_targets_mean": 3577.7,
|
|
"valid_targets_min": 2051
|
|
},
|
|
{
|
|
"epoch": 3.3814102564102564,
|
|
"grad_norm": 0.5525551859037144,
|
|
"learning_rate": 2.4648311445245558e-05,
|
|
"loss": 0.2266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22371171414852142,
|
|
"step": 2110,
|
|
"valid_targets_mean": 3024.2,
|
|
"valid_targets_min": 327
|
|
},
|
|
{
|
|
"epoch": 3.389423076923077,
|
|
"grad_norm": 0.5245484166047455,
|
|
"learning_rate": 2.457054457234493e-05,
|
|
"loss": 0.2354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2585659623146057,
|
|
"step": 2115,
|
|
"valid_targets_mean": 3375.2,
|
|
"valid_targets_min": 2277
|
|
},
|
|
{
|
|
"epoch": 3.3974358974358974,
|
|
"grad_norm": 0.5021432681126327,
|
|
"learning_rate": 2.4492704719918497e-05,
|
|
"loss": 0.1972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20290249586105347,
|
|
"step": 2120,
|
|
"valid_targets_mean": 3324.5,
|
|
"valid_targets_min": 1705
|
|
},
|
|
{
|
|
"epoch": 3.405448717948718,
|
|
"grad_norm": 0.4075352140717916,
|
|
"learning_rate": 2.4414793130863134e-05,
|
|
"loss": 0.1947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17111021280288696,
|
|
"step": 2125,
|
|
"valid_targets_mean": 4749.3,
|
|
"valid_targets_min": 1861
|
|
},
|
|
{
|
|
"epoch": 3.4134615384615383,
|
|
"grad_norm": 0.4912694843388253,
|
|
"learning_rate": 2.433681104922114e-05,
|
|
"loss": 0.205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19937452673912048,
|
|
"step": 2130,
|
|
"valid_targets_mean": 3759.5,
|
|
"valid_targets_min": 1848
|
|
},
|
|
{
|
|
"epoch": 3.421474358974359,
|
|
"grad_norm": 0.46633524863143777,
|
|
"learning_rate": 2.4258759720160412e-05,
|
|
"loss": 0.199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20374462008476257,
|
|
"step": 2135,
|
|
"valid_targets_mean": 4000.0,
|
|
"valid_targets_min": 2040
|
|
},
|
|
{
|
|
"epoch": 3.4294871794871793,
|
|
"grad_norm": 0.5353379245393521,
|
|
"learning_rate": 2.4180640389954534e-05,
|
|
"loss": 0.2123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19159536063671112,
|
|
"step": 2140,
|
|
"valid_targets_mean": 3524.2,
|
|
"valid_targets_min": 1662
|
|
},
|
|
{
|
|
"epoch": 3.4375,
|
|
"grad_norm": 0.5106404124718604,
|
|
"learning_rate": 2.4102454305962892e-05,
|
|
"loss": 0.2012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20838648080825806,
|
|
"step": 2145,
|
|
"valid_targets_mean": 3315.6,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 3.4455128205128207,
|
|
"grad_norm": 0.5117939711713273,
|
|
"learning_rate": 2.402420271661076e-05,
|
|
"loss": 0.2253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21036981046199799,
|
|
"step": 2150,
|
|
"valid_targets_mean": 3242.2,
|
|
"valid_targets_min": 244
|
|
},
|
|
{
|
|
"epoch": 3.453525641025641,
|
|
"grad_norm": 0.5489890660628814,
|
|
"learning_rate": 2.3945886871369338e-05,
|
|
"loss": 0.2311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2170429229736328,
|
|
"step": 2155,
|
|
"valid_targets_mean": 2959.5,
|
|
"valid_targets_min": 1973
|
|
},
|
|
{
|
|
"epoch": 3.4615384615384617,
|
|
"grad_norm": 0.5220848220225659,
|
|
"learning_rate": 2.3867508020735865e-05,
|
|
"loss": 0.2157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23258650302886963,
|
|
"step": 2160,
|
|
"valid_targets_mean": 3513.6,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 3.469551282051282,
|
|
"grad_norm": 0.5177385972382765,
|
|
"learning_rate": 2.3789067416213568e-05,
|
|
"loss": 0.2247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24745973944664001,
|
|
"step": 2165,
|
|
"valid_targets_mean": 3519.9,
|
|
"valid_targets_min": 837
|
|
},
|
|
{
|
|
"epoch": 3.4775641025641026,
|
|
"grad_norm": 0.5008188668113256,
|
|
"learning_rate": 2.3710566310291733e-05,
|
|
"loss": 0.2163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23902934789657593,
|
|
"step": 2170,
|
|
"valid_targets_mean": 3710.1,
|
|
"valid_targets_min": 1978
|
|
},
|
|
{
|
|
"epoch": 3.485576923076923,
|
|
"grad_norm": 0.47611524773645536,
|
|
"learning_rate": 2.36320059564257e-05,
|
|
"loss": 0.2197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20398235321044922,
|
|
"step": 2175,
|
|
"valid_targets_mean": 3758.6,
|
|
"valid_targets_min": 1973
|
|
},
|
|
{
|
|
"epoch": 3.4935897435897436,
|
|
"grad_norm": 0.5366226972721881,
|
|
"learning_rate": 2.3553387609016833e-05,
|
|
"loss": 0.2004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20256412029266357,
|
|
"step": 2180,
|
|
"valid_targets_mean": 2980.2,
|
|
"valid_targets_min": 1285
|
|
},
|
|
{
|
|
"epoch": 3.501602564102564,
|
|
"grad_norm": 0.49018507672277717,
|
|
"learning_rate": 2.347471252339252e-05,
|
|
"loss": 0.2211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20523664355278015,
|
|
"step": 2185,
|
|
"valid_targets_mean": 3595.4,
|
|
"valid_targets_min": 1871
|
|
},
|
|
{
|
|
"epoch": 3.5096153846153846,
|
|
"grad_norm": 0.5651047221687497,
|
|
"learning_rate": 2.339598195578608e-05,
|
|
"loss": 0.2273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22433197498321533,
|
|
"step": 2190,
|
|
"valid_targets_mean": 3616.5,
|
|
"valid_targets_min": 343
|
|
},
|
|
{
|
|
"epoch": 3.5176282051282053,
|
|
"grad_norm": 0.49048026751855617,
|
|
"learning_rate": 2.3317197163316757e-05,
|
|
"loss": 0.2301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22643204033374786,
|
|
"step": 2195,
|
|
"valid_targets_mean": 3851.8,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 3.5256410256410255,
|
|
"grad_norm": 0.48873357577242255,
|
|
"learning_rate": 2.3238359403969608e-05,
|
|
"loss": 0.2158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21824310719966888,
|
|
"step": 2200,
|
|
"valid_targets_mean": 3577.9,
|
|
"valid_targets_min": 1968
|
|
},
|
|
{
|
|
"epoch": 3.5336538461538463,
|
|
"grad_norm": 0.48623991711741665,
|
|
"learning_rate": 2.315946993657543e-05,
|
|
"loss": 0.2086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19976502656936646,
|
|
"step": 2205,
|
|
"valid_targets_mean": 3920.6,
|
|
"valid_targets_min": 1922
|
|
},
|
|
{
|
|
"epoch": 3.5416666666666665,
|
|
"grad_norm": 0.4839874669390242,
|
|
"learning_rate": 2.3080530020790673e-05,
|
|
"loss": 0.2162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20096611976623535,
|
|
"step": 2210,
|
|
"valid_targets_mean": 3286.7,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 3.5496794871794872,
|
|
"grad_norm": 0.4831112771937781,
|
|
"learning_rate": 2.300154091707731e-05,
|
|
"loss": 0.209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.225946307182312,
|
|
"step": 2215,
|
|
"valid_targets_mean": 3587.2,
|
|
"valid_targets_min": 1718
|
|
},
|
|
{
|
|
"epoch": 3.5576923076923075,
|
|
"grad_norm": 0.5885045198895541,
|
|
"learning_rate": 2.2922503886682706e-05,
|
|
"loss": 0.2266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22732587158679962,
|
|
"step": 2220,
|
|
"valid_targets_mean": 3120.9,
|
|
"valid_targets_min": 334
|
|
},
|
|
{
|
|
"epoch": 3.565705128205128,
|
|
"grad_norm": 0.550148887781423,
|
|
"learning_rate": 2.28434201916195e-05,
|
|
"loss": 0.2152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22492703795433044,
|
|
"step": 2225,
|
|
"valid_targets_mean": 3465.1,
|
|
"valid_targets_min": 2393
|
|
},
|
|
{
|
|
"epoch": 3.573717948717949,
|
|
"grad_norm": 0.4938697286515837,
|
|
"learning_rate": 2.2764291094645446e-05,
|
|
"loss": 0.2135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1986878365278244,
|
|
"step": 2230,
|
|
"valid_targets_mean": 3271.9,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 3.581730769230769,
|
|
"grad_norm": 0.5557254876044349,
|
|
"learning_rate": 2.2685117859243223e-05,
|
|
"loss": 0.2045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2523908019065857,
|
|
"step": 2235,
|
|
"valid_targets_mean": 3248.2,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 3.58974358974359,
|
|
"grad_norm": 0.6008327672276413,
|
|
"learning_rate": 2.2605901749600312e-05,
|
|
"loss": 0.2042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25647902488708496,
|
|
"step": 2240,
|
|
"valid_targets_mean": 3110.2,
|
|
"valid_targets_min": 389
|
|
},
|
|
{
|
|
"epoch": 3.59775641025641,
|
|
"grad_norm": 0.532844954299154,
|
|
"learning_rate": 2.2526644030588764e-05,
|
|
"loss": 0.1944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2435721606016159,
|
|
"step": 2245,
|
|
"valid_targets_mean": 3189.6,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 3.605769230769231,
|
|
"grad_norm": 0.545878516940055,
|
|
"learning_rate": 2.2447345967745036e-05,
|
|
"loss": 0.2107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22559991478919983,
|
|
"step": 2250,
|
|
"valid_targets_mean": 3135.8,
|
|
"valid_targets_min": 1988
|
|
},
|
|
{
|
|
"epoch": 3.613782051282051,
|
|
"grad_norm": 0.4562069435188204,
|
|
"learning_rate": 2.2368008827249756e-05,
|
|
"loss": 0.2161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20217636227607727,
|
|
"step": 2255,
|
|
"valid_targets_mean": 3706.4,
|
|
"valid_targets_min": 1288
|
|
},
|
|
{
|
|
"epoch": 3.621794871794872,
|
|
"grad_norm": 0.458353746283704,
|
|
"learning_rate": 2.228863387590752e-05,
|
|
"loss": 0.2083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18142034113407135,
|
|
"step": 2260,
|
|
"valid_targets_mean": 3865.8,
|
|
"valid_targets_min": 1511
|
|
},
|
|
{
|
|
"epoch": 3.6298076923076925,
|
|
"grad_norm": 0.5216787285174662,
|
|
"learning_rate": 2.2209222381126687e-05,
|
|
"loss": 0.2036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20579573512077332,
|
|
"step": 2265,
|
|
"valid_targets_mean": 3342.1,
|
|
"valid_targets_min": 1371
|
|
},
|
|
{
|
|
"epoch": 3.6378205128205128,
|
|
"grad_norm": 0.5925836434071377,
|
|
"learning_rate": 2.212977561089908e-05,
|
|
"loss": 0.2428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2542372941970825,
|
|
"step": 2270,
|
|
"valid_targets_mean": 2954.4,
|
|
"valid_targets_min": 1127
|
|
},
|
|
{
|
|
"epoch": 3.6458333333333335,
|
|
"grad_norm": 0.5313423229891275,
|
|
"learning_rate": 2.20502948337798e-05,
|
|
"loss": 0.2107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20688408613204956,
|
|
"step": 2275,
|
|
"valid_targets_mean": 3146.2,
|
|
"valid_targets_min": 1886
|
|
},
|
|
{
|
|
"epoch": 3.6538461538461537,
|
|
"grad_norm": 0.519268139967825,
|
|
"learning_rate": 2.1970781318866953e-05,
|
|
"loss": 0.2116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26766157150268555,
|
|
"step": 2280,
|
|
"valid_targets_mean": 3711.6,
|
|
"valid_targets_min": 1992
|
|
},
|
|
{
|
|
"epoch": 3.6618589743589745,
|
|
"grad_norm": 0.5198099851729401,
|
|
"learning_rate": 2.1891236335781363e-05,
|
|
"loss": 0.2364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24527433514595032,
|
|
"step": 2285,
|
|
"valid_targets_mean": 3303.0,
|
|
"valid_targets_min": 2143
|
|
},
|
|
{
|
|
"epoch": 3.6698717948717947,
|
|
"grad_norm": 0.4881045184124244,
|
|
"learning_rate": 2.1811661154646332e-05,
|
|
"loss": 0.2005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2145967185497284,
|
|
"step": 2290,
|
|
"valid_targets_mean": 3696.6,
|
|
"valid_targets_min": 1955
|
|
},
|
|
{
|
|
"epoch": 3.6778846153846154,
|
|
"grad_norm": 0.5149448739418462,
|
|
"learning_rate": 2.173205704606735e-05,
|
|
"loss": 0.2164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21173402667045593,
|
|
"step": 2295,
|
|
"valid_targets_mean": 3381.4,
|
|
"valid_targets_min": 362
|
|
},
|
|
{
|
|
"epoch": 3.685897435897436,
|
|
"grad_norm": 0.46762585987689204,
|
|
"learning_rate": 2.1652425281111785e-05,
|
|
"loss": 0.237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18129359185695648,
|
|
"step": 2300,
|
|
"valid_targets_mean": 3209.1,
|
|
"valid_targets_min": 499
|
|
},
|
|
{
|
|
"epoch": 3.6939102564102564,
|
|
"grad_norm": 0.5016573564761132,
|
|
"learning_rate": 2.1572767131288607e-05,
|
|
"loss": 0.2136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1932133138179779,
|
|
"step": 2305,
|
|
"valid_targets_mean": 3831.2,
|
|
"valid_targets_min": 2363
|
|
},
|
|
{
|
|
"epoch": 3.7019230769230766,
|
|
"grad_norm": 0.553031313219947,
|
|
"learning_rate": 2.1493083868528095e-05,
|
|
"loss": 0.2453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2638936936855316,
|
|
"step": 2310,
|
|
"valid_targets_mean": 3288.8,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 3.7099358974358974,
|
|
"grad_norm": 0.5004078443376743,
|
|
"learning_rate": 2.141337676516151e-05,
|
|
"loss": 0.2332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2213425636291504,
|
|
"step": 2315,
|
|
"valid_targets_mean": 3674.1,
|
|
"valid_targets_min": 497
|
|
},
|
|
{
|
|
"epoch": 3.717948717948718,
|
|
"grad_norm": 0.49828281149304193,
|
|
"learning_rate": 2.1333647093900772e-05,
|
|
"loss": 0.2094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19858595728874207,
|
|
"step": 2320,
|
|
"valid_targets_mean": 3288.1,
|
|
"valid_targets_min": 1943
|
|
},
|
|
{
|
|
"epoch": 3.7259615384615383,
|
|
"grad_norm": 0.516956470560625,
|
|
"learning_rate": 2.1253896127818175e-05,
|
|
"loss": 0.2002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19624871015548706,
|
|
"step": 2325,
|
|
"valid_targets_mean": 3053.8,
|
|
"valid_targets_min": 1660
|
|
},
|
|
{
|
|
"epoch": 3.733974358974359,
|
|
"grad_norm": 0.6330396283922977,
|
|
"learning_rate": 2.1174125140326013e-05,
|
|
"loss": 0.2176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25099366903305054,
|
|
"step": 2330,
|
|
"valid_targets_mean": 2462.1,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 3.7419871794871797,
|
|
"grad_norm": 0.4195583425046997,
|
|
"learning_rate": 2.1094335405156277e-05,
|
|
"loss": 0.2132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1856677234172821,
|
|
"step": 2335,
|
|
"valid_targets_mean": 4358.2,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 3.75,
|
|
"grad_norm": 0.5579122745611423,
|
|
"learning_rate": 2.1014528196340316e-05,
|
|
"loss": 0.2216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21907992660999298,
|
|
"step": 2340,
|
|
"valid_targets_mean": 2995.2,
|
|
"valid_targets_min": 1670
|
|
},
|
|
{
|
|
"epoch": 3.7580128205128203,
|
|
"grad_norm": 0.4755276678031606,
|
|
"learning_rate": 2.093470478818847e-05,
|
|
"loss": 0.2191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19798678159713745,
|
|
"step": 2345,
|
|
"valid_targets_mean": 3438.9,
|
|
"valid_targets_min": 1940
|
|
},
|
|
{
|
|
"epoch": 3.766025641025641,
|
|
"grad_norm": 0.5095919410864391,
|
|
"learning_rate": 2.0854866455269756e-05,
|
|
"loss": 0.2187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24966293573379517,
|
|
"step": 2350,
|
|
"valid_targets_mean": 3300.4,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 3.7740384615384617,
|
|
"grad_norm": 0.43308007874052606,
|
|
"learning_rate": 2.0775014472391496e-05,
|
|
"loss": 0.2094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1921013593673706,
|
|
"step": 2355,
|
|
"valid_targets_mean": 4197.9,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 3.782051282051282,
|
|
"grad_norm": 0.5656612684947171,
|
|
"learning_rate": 2.0695150114578958e-05,
|
|
"loss": 0.2245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2663126587867737,
|
|
"step": 2360,
|
|
"valid_targets_mean": 3496.9,
|
|
"valid_targets_min": 427
|
|
},
|
|
{
|
|
"epoch": 3.7900641025641026,
|
|
"grad_norm": 0.5350045307116155,
|
|
"learning_rate": 2.061527465705502e-05,
|
|
"loss": 0.2215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25265276432037354,
|
|
"step": 2365,
|
|
"valid_targets_mean": 3458.9,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 3.7980769230769234,
|
|
"grad_norm": 0.4972661860936192,
|
|
"learning_rate": 2.0535389375219773e-05,
|
|
"loss": 0.2134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22089067101478577,
|
|
"step": 2370,
|
|
"valid_targets_mean": 3173.3,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 3.8060897435897436,
|
|
"grad_norm": 0.5148532675806966,
|
|
"learning_rate": 2.045549554463019e-05,
|
|
"loss": 0.2092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19942928850650787,
|
|
"step": 2375,
|
|
"valid_targets_mean": 3215.6,
|
|
"valid_targets_min": 1924
|
|
},
|
|
{
|
|
"epoch": 3.814102564102564,
|
|
"grad_norm": 0.5911016991139373,
|
|
"learning_rate": 2.0375594440979744e-05,
|
|
"loss": 0.216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21095842123031616,
|
|
"step": 2380,
|
|
"valid_targets_mean": 3474.6,
|
|
"valid_targets_min": 1719
|
|
},
|
|
{
|
|
"epoch": 3.8221153846153846,
|
|
"grad_norm": 0.597719336924374,
|
|
"learning_rate": 2.0295687340078037e-05,
|
|
"loss": 0.2304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2182496190071106,
|
|
"step": 2385,
|
|
"valid_targets_mean": 2813.0,
|
|
"valid_targets_min": 382
|
|
},
|
|
{
|
|
"epoch": 3.8301282051282053,
|
|
"grad_norm": 0.530072710758143,
|
|
"learning_rate": 2.0215775517830437e-05,
|
|
"loss": 0.2067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22930724918842316,
|
|
"step": 2390,
|
|
"valid_targets_mean": 3318.5,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 3.8381410256410255,
|
|
"grad_norm": 0.5154279772668271,
|
|
"learning_rate": 2.013586025021769e-05,
|
|
"loss": 0.245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26545244455337524,
|
|
"step": 2395,
|
|
"valid_targets_mean": 3903.9,
|
|
"valid_targets_min": 1511
|
|
},
|
|
{
|
|
"epoch": 3.8461538461538463,
|
|
"grad_norm": 0.5746132815293595,
|
|
"learning_rate": 2.0055942813275564e-05,
|
|
"loss": 0.2292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23045890033245087,
|
|
"step": 2400,
|
|
"valid_targets_mean": 2854.4,
|
|
"valid_targets_min": 465
|
|
},
|
|
{
|
|
"epoch": 3.8541666666666665,
|
|
"grad_norm": 0.6238290798209558,
|
|
"learning_rate": 1.9976024483074456e-05,
|
|
"loss": 0.2065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23214343190193176,
|
|
"step": 2405,
|
|
"valid_targets_mean": 2401.7,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 3.8621794871794872,
|
|
"grad_norm": 0.5664988549700263,
|
|
"learning_rate": 1.9896106535699025e-05,
|
|
"loss": 0.2114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2216949760913849,
|
|
"step": 2410,
|
|
"valid_targets_mean": 2997.2,
|
|
"valid_targets_min": 1476
|
|
},
|
|
{
|
|
"epoch": 3.8701923076923075,
|
|
"grad_norm": 0.6198688515244216,
|
|
"learning_rate": 1.9816190247227834e-05,
|
|
"loss": 0.2014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24409687519073486,
|
|
"step": 2415,
|
|
"valid_targets_mean": 2904.2,
|
|
"valid_targets_min": 395
|
|
},
|
|
{
|
|
"epoch": 3.878205128205128,
|
|
"grad_norm": 0.5277766343405376,
|
|
"learning_rate": 1.9736276893712954e-05,
|
|
"loss": 0.2264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2225913405418396,
|
|
"step": 2420,
|
|
"valid_targets_mean": 3026.1,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 3.886217948717949,
|
|
"grad_norm": 0.49932532353423154,
|
|
"learning_rate": 1.9656367751159565e-05,
|
|
"loss": 0.2242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21792566776275635,
|
|
"step": 2425,
|
|
"valid_targets_mean": 3857.0,
|
|
"valid_targets_min": 2043
|
|
},
|
|
{
|
|
"epoch": 3.894230769230769,
|
|
"grad_norm": 0.5057026335085257,
|
|
"learning_rate": 1.957646409550565e-05,
|
|
"loss": 0.2307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26214978098869324,
|
|
"step": 2430,
|
|
"valid_targets_mean": 3946.2,
|
|
"valid_targets_min": 2392
|
|
},
|
|
{
|
|
"epoch": 3.90224358974359,
|
|
"grad_norm": 0.4619341239060796,
|
|
"learning_rate": 1.9496567202601545e-05,
|
|
"loss": 0.216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20152977108955383,
|
|
"step": 2435,
|
|
"valid_targets_mean": 3997.2,
|
|
"valid_targets_min": 2271
|
|
},
|
|
{
|
|
"epoch": 3.91025641025641,
|
|
"grad_norm": 0.48590936854836037,
|
|
"learning_rate": 1.9416678348189627e-05,
|
|
"loss": 0.2118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1917511522769928,
|
|
"step": 2440,
|
|
"valid_targets_mean": 3163.1,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 3.918269230769231,
|
|
"grad_norm": 0.5804332092212046,
|
|
"learning_rate": 1.9336798807883907e-05,
|
|
"loss": 0.2137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23004987835884094,
|
|
"step": 2445,
|
|
"valid_targets_mean": 3031.5,
|
|
"valid_targets_min": 302
|
|
},
|
|
{
|
|
"epoch": 3.926282051282051,
|
|
"grad_norm": 0.5355031811260637,
|
|
"learning_rate": 1.9256929857149686e-05,
|
|
"loss": 0.2258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2335416078567505,
|
|
"step": 2450,
|
|
"valid_targets_mean": 3444.2,
|
|
"valid_targets_min": 1546
|
|
},
|
|
{
|
|
"epoch": 3.934294871794872,
|
|
"grad_norm": 0.4376032631537377,
|
|
"learning_rate": 1.9177072771283167e-05,
|
|
"loss": 0.2126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19955921173095703,
|
|
"step": 2455,
|
|
"valid_targets_mean": 4088.3,
|
|
"valid_targets_min": 1712
|
|
},
|
|
{
|
|
"epoch": 3.9423076923076925,
|
|
"grad_norm": 0.4972198496907315,
|
|
"learning_rate": 1.9097228825391087e-05,
|
|
"loss": 0.2098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20892062783241272,
|
|
"step": 2460,
|
|
"valid_targets_mean": 3496.2,
|
|
"valid_targets_min": 1704
|
|
},
|
|
{
|
|
"epoch": 3.9503205128205128,
|
|
"grad_norm": 0.5119397009023006,
|
|
"learning_rate": 1.9017399294370413e-05,
|
|
"loss": 0.2117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20475465059280396,
|
|
"step": 2465,
|
|
"valid_targets_mean": 3078.6,
|
|
"valid_targets_min": 1843
|
|
},
|
|
{
|
|
"epoch": 3.9583333333333335,
|
|
"grad_norm": 0.46958127089075363,
|
|
"learning_rate": 1.893758545288791e-05,
|
|
"loss": 0.2208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23267512023448944,
|
|
"step": 2470,
|
|
"valid_targets_mean": 3967.6,
|
|
"valid_targets_min": 346
|
|
},
|
|
{
|
|
"epoch": 3.9663461538461537,
|
|
"grad_norm": 0.4422338855672534,
|
|
"learning_rate": 1.8857788575359847e-05,
|
|
"loss": 0.1968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18764463067054749,
|
|
"step": 2475,
|
|
"valid_targets_mean": 3658.1,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 3.9743589743589745,
|
|
"grad_norm": 0.45411203405757655,
|
|
"learning_rate": 1.87780099359316e-05,
|
|
"loss": 0.1988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16824612021446228,
|
|
"step": 2480,
|
|
"valid_targets_mean": 3436.4,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 3.9823717948717947,
|
|
"grad_norm": 0.5975349252417311,
|
|
"learning_rate": 1.869825080845734e-05,
|
|
"loss": 0.2534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27063214778900146,
|
|
"step": 2485,
|
|
"valid_targets_mean": 2898.2,
|
|
"valid_targets_min": 1411
|
|
},
|
|
{
|
|
"epoch": 3.9903846153846154,
|
|
"grad_norm": 0.42022271538873057,
|
|
"learning_rate": 1.8618512466479686e-05,
|
|
"loss": 0.1997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16057726740837097,
|
|
"step": 2490,
|
|
"valid_targets_mean": 4243.4,
|
|
"valid_targets_min": 1836
|
|
},
|
|
{
|
|
"epoch": 3.998397435897436,
|
|
"grad_norm": 0.5012809468885003,
|
|
"learning_rate": 1.8538796183209373e-05,
|
|
"loss": 0.2118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2236367017030716,
|
|
"step": 2495,
|
|
"valid_targets_mean": 3480.9,
|
|
"valid_targets_min": 1944
|
|
},
|
|
{
|
|
"epoch": 4.006410256410256,
|
|
"grad_norm": 0.4195723414242144,
|
|
"learning_rate": 1.845910323150491e-05,
|
|
"loss": 0.1996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16902586817741394,
|
|
"step": 2500,
|
|
"valid_targets_mean": 4047.4,
|
|
"valid_targets_min": 2083
|
|
},
|
|
{
|
|
"epoch": 4.014423076923077,
|
|
"grad_norm": 0.5145131919278905,
|
|
"learning_rate": 1.8379434883852255e-05,
|
|
"loss": 0.2,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19503673911094666,
|
|
"step": 2505,
|
|
"valid_targets_mean": 3389.7,
|
|
"valid_targets_min": 427
|
|
},
|
|
{
|
|
"epoch": 4.022435897435898,
|
|
"grad_norm": 0.638446760469446,
|
|
"learning_rate": 1.8299792412344524e-05,
|
|
"loss": 0.2076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2457159012556076,
|
|
"step": 2510,
|
|
"valid_targets_mean": 3125.1,
|
|
"valid_targets_min": 1945
|
|
},
|
|
{
|
|
"epoch": 4.030448717948718,
|
|
"grad_norm": 0.5320510562195919,
|
|
"learning_rate": 1.8220177088661635e-05,
|
|
"loss": 0.2172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28126007318496704,
|
|
"step": 2515,
|
|
"valid_targets_mean": 4158.2,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 4.038461538461538,
|
|
"grad_norm": 0.47272452528662334,
|
|
"learning_rate": 1.814059018405004e-05,
|
|
"loss": 0.2106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2101389467716217,
|
|
"step": 2520,
|
|
"valid_targets_mean": 4075.9,
|
|
"valid_targets_min": 1696
|
|
},
|
|
{
|
|
"epoch": 4.046474358974359,
|
|
"grad_norm": 0.45352211553110705,
|
|
"learning_rate": 1.806103296930243e-05,
|
|
"loss": 0.196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17897215485572815,
|
|
"step": 2525,
|
|
"valid_targets_mean": 3710.6,
|
|
"valid_targets_min": 1507
|
|
},
|
|
{
|
|
"epoch": 4.05448717948718,
|
|
"grad_norm": 0.5064275499739576,
|
|
"learning_rate": 1.7981506714737392e-05,
|
|
"loss": 0.191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20361468195915222,
|
|
"step": 2530,
|
|
"valid_targets_mean": 3783.2,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 4.0625,
|
|
"grad_norm": 0.49068392176466213,
|
|
"learning_rate": 1.7902012690179188e-05,
|
|
"loss": 0.2089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23372966051101685,
|
|
"step": 2535,
|
|
"valid_targets_mean": 4053.6,
|
|
"valid_targets_min": 2273
|
|
},
|
|
{
|
|
"epoch": 4.07051282051282,
|
|
"grad_norm": 0.3999280296624379,
|
|
"learning_rate": 1.7822552164937437e-05,
|
|
"loss": 0.1898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15756747126579285,
|
|
"step": 2540,
|
|
"valid_targets_mean": 4231.2,
|
|
"valid_targets_min": 2032
|
|
},
|
|
{
|
|
"epoch": 4.078525641025641,
|
|
"grad_norm": 0.6411212991277369,
|
|
"learning_rate": 1.7743126407786873e-05,
|
|
"loss": 0.2018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18768355250358582,
|
|
"step": 2545,
|
|
"valid_targets_mean": 3995.1,
|
|
"valid_targets_min": 362
|
|
},
|
|
{
|
|
"epoch": 4.086538461538462,
|
|
"grad_norm": 0.632615400224224,
|
|
"learning_rate": 1.766373668694707e-05,
|
|
"loss": 0.2203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23910491168498993,
|
|
"step": 2550,
|
|
"valid_targets_mean": 2928.5,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 4.094551282051282,
|
|
"grad_norm": 0.5054791393998274,
|
|
"learning_rate": 1.7584384270062195e-05,
|
|
"loss": 0.2221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20476405322551727,
|
|
"step": 2555,
|
|
"valid_targets_mean": 3803.2,
|
|
"valid_targets_min": 2087
|
|
},
|
|
{
|
|
"epoch": 4.102564102564102,
|
|
"grad_norm": 0.6073867684870838,
|
|
"learning_rate": 1.7505070424180772e-05,
|
|
"loss": 0.2018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23343729972839355,
|
|
"step": 2560,
|
|
"valid_targets_mean": 3272.6,
|
|
"valid_targets_min": 1870
|
|
},
|
|
{
|
|
"epoch": 4.110576923076923,
|
|
"grad_norm": 0.48654884113244695,
|
|
"learning_rate": 1.7425796415735454e-05,
|
|
"loss": 0.1941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1814398467540741,
|
|
"step": 2565,
|
|
"valid_targets_mean": 3609.6,
|
|
"valid_targets_min": 1476
|
|
},
|
|
{
|
|
"epoch": 4.118589743589744,
|
|
"grad_norm": 0.5334852365572529,
|
|
"learning_rate": 1.7346563510522783e-05,
|
|
"loss": 0.1964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18280336260795593,
|
|
"step": 2570,
|
|
"valid_targets_mean": 3150.2,
|
|
"valid_targets_min": 455
|
|
},
|
|
{
|
|
"epoch": 4.126602564102564,
|
|
"grad_norm": 0.49068187641004996,
|
|
"learning_rate": 1.7267372973682998e-05,
|
|
"loss": 0.1989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18673810362815857,
|
|
"step": 2575,
|
|
"valid_targets_mean": 3891.6,
|
|
"valid_targets_min": 1943
|
|
},
|
|
{
|
|
"epoch": 4.134615384615385,
|
|
"grad_norm": 0.5199937773523386,
|
|
"learning_rate": 1.7188226069679834e-05,
|
|
"loss": 0.2129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19363847374916077,
|
|
"step": 2580,
|
|
"valid_targets_mean": 3347.2,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 4.142628205128205,
|
|
"grad_norm": 0.5175812136446701,
|
|
"learning_rate": 1.7109124062280307e-05,
|
|
"loss": 0.2145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20378626883029938,
|
|
"step": 2585,
|
|
"valid_targets_mean": 3837.9,
|
|
"valid_targets_min": 2305
|
|
},
|
|
{
|
|
"epoch": 4.1506410256410255,
|
|
"grad_norm": 0.6269435116455723,
|
|
"learning_rate": 1.7030068214534567e-05,
|
|
"loss": 0.1875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1834736168384552,
|
|
"step": 2590,
|
|
"valid_targets_mean": 3715.1,
|
|
"valid_targets_min": 325
|
|
},
|
|
{
|
|
"epoch": 4.158653846153846,
|
|
"grad_norm": 0.5589538239959635,
|
|
"learning_rate": 1.695105978875572e-05,
|
|
"loss": 0.1915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21350257098674774,
|
|
"step": 2595,
|
|
"valid_targets_mean": 2966.5,
|
|
"valid_targets_min": 1723
|
|
},
|
|
{
|
|
"epoch": 4.166666666666667,
|
|
"grad_norm": 0.503812150208315,
|
|
"learning_rate": 1.687210004649965e-05,
|
|
"loss": 0.2164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17916864156723022,
|
|
"step": 2600,
|
|
"valid_targets_mean": 3349.9,
|
|
"valid_targets_min": 1798
|
|
},
|
|
{
|
|
"epoch": 4.174679487179487,
|
|
"grad_norm": 0.45804240872640667,
|
|
"learning_rate": 1.679319024854491e-05,
|
|
"loss": 0.1885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17588487267494202,
|
|
"step": 2605,
|
|
"valid_targets_mean": 3999.6,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 4.1826923076923075,
|
|
"grad_norm": 0.5806614439136704,
|
|
"learning_rate": 1.6714331654872564e-05,
|
|
"loss": 0.1964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2032093107700348,
|
|
"step": 2610,
|
|
"valid_targets_mean": 3521.7,
|
|
"valid_targets_min": 1517
|
|
},
|
|
{
|
|
"epoch": 4.190705128205128,
|
|
"grad_norm": 0.51365300180505,
|
|
"learning_rate": 1.663552552464609e-05,
|
|
"loss": 0.1892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22034162282943726,
|
|
"step": 2615,
|
|
"valid_targets_mean": 3520.1,
|
|
"valid_targets_min": 1472
|
|
},
|
|
{
|
|
"epoch": 4.198717948717949,
|
|
"grad_norm": 0.46595977542803346,
|
|
"learning_rate": 1.6556773116191257e-05,
|
|
"loss": 0.1897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16177070140838623,
|
|
"step": 2620,
|
|
"valid_targets_mean": 3612.4,
|
|
"valid_targets_min": 377
|
|
},
|
|
{
|
|
"epoch": 4.206730769230769,
|
|
"grad_norm": 0.5934619647470252,
|
|
"learning_rate": 1.647807568697603e-05,
|
|
"loss": 0.1863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1621251404285431,
|
|
"step": 2625,
|
|
"valid_targets_mean": 3973.6,
|
|
"valid_targets_min": 1866
|
|
},
|
|
{
|
|
"epoch": 4.214743589743589,
|
|
"grad_norm": 0.5279494545170368,
|
|
"learning_rate": 1.6399434493590524e-05,
|
|
"loss": 0.193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20115870237350464,
|
|
"step": 2630,
|
|
"valid_targets_mean": 3740.2,
|
|
"valid_targets_min": 2234
|
|
},
|
|
{
|
|
"epoch": 4.222756410256411,
|
|
"grad_norm": 0.5402676551960568,
|
|
"learning_rate": 1.6320850791726884e-05,
|
|
"loss": 0.1956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18363741040229797,
|
|
"step": 2635,
|
|
"valid_targets_mean": 3030.2,
|
|
"valid_targets_min": 1729
|
|
},
|
|
{
|
|
"epoch": 4.230769230769231,
|
|
"grad_norm": 0.5309143923757463,
|
|
"learning_rate": 1.6242325836159304e-05,
|
|
"loss": 0.195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20168320834636688,
|
|
"step": 2640,
|
|
"valid_targets_mean": 3316.9,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 4.238782051282051,
|
|
"grad_norm": 0.5108105368110876,
|
|
"learning_rate": 1.6163860880723923e-05,
|
|
"loss": 0.1961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1702432930469513,
|
|
"step": 2645,
|
|
"valid_targets_mean": 3533.9,
|
|
"valid_targets_min": 2080
|
|
},
|
|
{
|
|
"epoch": 4.246794871794872,
|
|
"grad_norm": 0.519843760147622,
|
|
"learning_rate": 1.6085457178298866e-05,
|
|
"loss": 0.1972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18240828812122345,
|
|
"step": 2650,
|
|
"valid_targets_mean": 3165.8,
|
|
"valid_targets_min": 1670
|
|
},
|
|
{
|
|
"epoch": 4.2548076923076925,
|
|
"grad_norm": 0.5077601861968751,
|
|
"learning_rate": 1.6007115980784182e-05,
|
|
"loss": 0.2098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21437841653823853,
|
|
"step": 2655,
|
|
"valid_targets_mean": 3410.4,
|
|
"valid_targets_min": 1535
|
|
},
|
|
{
|
|
"epoch": 4.262820512820513,
|
|
"grad_norm": 0.43324095570540344,
|
|
"learning_rate": 1.592883853908188e-05,
|
|
"loss": 0.1961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16936254501342773,
|
|
"step": 2660,
|
|
"valid_targets_mean": 4128.1,
|
|
"valid_targets_min": 2076
|
|
},
|
|
{
|
|
"epoch": 4.270833333333333,
|
|
"grad_norm": 0.5434487721223978,
|
|
"learning_rate": 1.585062610307599e-05,
|
|
"loss": 0.2005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18581029772758484,
|
|
"step": 2665,
|
|
"valid_targets_mean": 3207.5,
|
|
"valid_targets_min": 318
|
|
},
|
|
{
|
|
"epoch": 4.278846153846154,
|
|
"grad_norm": 0.5185309476056992,
|
|
"learning_rate": 1.5772479921612543e-05,
|
|
"loss": 0.226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19693030416965485,
|
|
"step": 2670,
|
|
"valid_targets_mean": 3433.8,
|
|
"valid_targets_min": 1994
|
|
},
|
|
{
|
|
"epoch": 4.2868589743589745,
|
|
"grad_norm": 0.49328604974342904,
|
|
"learning_rate": 1.5694401242479677e-05,
|
|
"loss": 0.1951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20844605565071106,
|
|
"step": 2675,
|
|
"valid_targets_mean": 3896.2,
|
|
"valid_targets_min": 244
|
|
},
|
|
{
|
|
"epoch": 4.294871794871795,
|
|
"grad_norm": 0.5315762410373522,
|
|
"learning_rate": 1.5616391312387683e-05,
|
|
"loss": 0.2035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19662663340568542,
|
|
"step": 2680,
|
|
"valid_targets_mean": 3516.6,
|
|
"valid_targets_min": 466
|
|
},
|
|
{
|
|
"epoch": 4.302884615384615,
|
|
"grad_norm": 0.5634714984529963,
|
|
"learning_rate": 1.5538451376949106e-05,
|
|
"loss": 0.2133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21461111307144165,
|
|
"step": 2685,
|
|
"valid_targets_mean": 3134.6,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 4.310897435897436,
|
|
"grad_norm": 0.5332640250660052,
|
|
"learning_rate": 1.5460582680658888e-05,
|
|
"loss": 0.194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2080698311328888,
|
|
"step": 2690,
|
|
"valid_targets_mean": 3422.9,
|
|
"valid_targets_min": 1331
|
|
},
|
|
{
|
|
"epoch": 4.318910256410256,
|
|
"grad_norm": 0.5187079147122462,
|
|
"learning_rate": 1.5382786466874446e-05,
|
|
"loss": 0.1822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17928150296211243,
|
|
"step": 2695,
|
|
"valid_targets_mean": 2967.9,
|
|
"valid_targets_min": 455
|
|
},
|
|
{
|
|
"epoch": 4.326923076923077,
|
|
"grad_norm": 0.5327115710533992,
|
|
"learning_rate": 1.5305063977795856e-05,
|
|
"loss": 0.2029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20295903086662292,
|
|
"step": 2700,
|
|
"valid_targets_mean": 3376.3,
|
|
"valid_targets_min": 1958
|
|
},
|
|
{
|
|
"epoch": 4.334935897435898,
|
|
"grad_norm": 0.4895315499876015,
|
|
"learning_rate": 1.5227416454445995e-05,
|
|
"loss": 0.1834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18504248559474945,
|
|
"step": 2705,
|
|
"valid_targets_mean": 3932.8,
|
|
"valid_targets_min": 2011
|
|
},
|
|
{
|
|
"epoch": 4.342948717948718,
|
|
"grad_norm": 0.5046175280672212,
|
|
"learning_rate": 1.5149845136650748e-05,
|
|
"loss": 0.2046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1886163204908371,
|
|
"step": 2710,
|
|
"valid_targets_mean": 3494.1,
|
|
"valid_targets_min": 1993
|
|
},
|
|
{
|
|
"epoch": 4.350961538461538,
|
|
"grad_norm": 0.4259723146821667,
|
|
"learning_rate": 1.5072351263019177e-05,
|
|
"loss": 0.1843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15069478750228882,
|
|
"step": 2715,
|
|
"valid_targets_mean": 3969.4,
|
|
"valid_targets_min": 356
|
|
},
|
|
{
|
|
"epoch": 4.358974358974359,
|
|
"grad_norm": 0.5165494690418552,
|
|
"learning_rate": 1.4994936070923784e-05,
|
|
"loss": 0.2042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21728010475635529,
|
|
"step": 2720,
|
|
"valid_targets_mean": 3352.1,
|
|
"valid_targets_min": 1653
|
|
},
|
|
{
|
|
"epoch": 4.36698717948718,
|
|
"grad_norm": 0.5150257406953364,
|
|
"learning_rate": 1.4917600796480745e-05,
|
|
"loss": 0.1942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16228248178958893,
|
|
"step": 2725,
|
|
"valid_targets_mean": 2981.2,
|
|
"valid_targets_min": 230
|
|
},
|
|
{
|
|
"epoch": 4.375,
|
|
"grad_norm": 0.6145951677631153,
|
|
"learning_rate": 1.4840346674530122e-05,
|
|
"loss": 0.2032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21405372023582458,
|
|
"step": 2730,
|
|
"valid_targets_mean": 2825.2,
|
|
"valid_targets_min": 332
|
|
},
|
|
{
|
|
"epoch": 4.38301282051282,
|
|
"grad_norm": 0.5918044868667718,
|
|
"learning_rate": 1.4763174938616232e-05,
|
|
"loss": 0.2194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22870035469532013,
|
|
"step": 2735,
|
|
"valid_targets_mean": 2888.4,
|
|
"valid_targets_min": 1671
|
|
},
|
|
{
|
|
"epoch": 4.391025641025641,
|
|
"grad_norm": 0.5192510906056325,
|
|
"learning_rate": 1.4686086820967865e-05,
|
|
"loss": 0.1986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20790967345237732,
|
|
"step": 2740,
|
|
"valid_targets_mean": 3779.0,
|
|
"valid_targets_min": 1993
|
|
},
|
|
{
|
|
"epoch": 4.399038461538462,
|
|
"grad_norm": 0.6197195127482825,
|
|
"learning_rate": 1.460908355247868e-05,
|
|
"loss": 0.2031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23618865013122559,
|
|
"step": 2745,
|
|
"valid_targets_mean": 2836.0,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 4.407051282051282,
|
|
"grad_norm": 0.4760638930188835,
|
|
"learning_rate": 1.4532166362687507e-05,
|
|
"loss": 0.186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1725659966468811,
|
|
"step": 2750,
|
|
"valid_targets_mean": 3864.0,
|
|
"valid_targets_min": 1833
|
|
},
|
|
{
|
|
"epoch": 4.415064102564102,
|
|
"grad_norm": 0.511002075402515,
|
|
"learning_rate": 1.445533647975871e-05,
|
|
"loss": 0.203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19956639409065247,
|
|
"step": 2755,
|
|
"valid_targets_mean": 3626.1,
|
|
"valid_targets_min": 1713
|
|
},
|
|
{
|
|
"epoch": 4.423076923076923,
|
|
"grad_norm": 0.5438811518715806,
|
|
"learning_rate": 1.437859513046263e-05,
|
|
"loss": 0.2,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23376289010047913,
|
|
"step": 2760,
|
|
"valid_targets_mean": 3961.9,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 4.431089743589744,
|
|
"grad_norm": 0.4898545257870175,
|
|
"learning_rate": 1.4301943540155914e-05,
|
|
"loss": 0.2097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17090612649917603,
|
|
"step": 2765,
|
|
"valid_targets_mean": 3729.2,
|
|
"valid_targets_min": 1751
|
|
},
|
|
{
|
|
"epoch": 4.439102564102564,
|
|
"grad_norm": 0.48058927205208335,
|
|
"learning_rate": 1.4225382932762033e-05,
|
|
"loss": 0.2201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15635713934898376,
|
|
"step": 2770,
|
|
"valid_targets_mean": 3476.9,
|
|
"valid_targets_min": 315
|
|
},
|
|
{
|
|
"epoch": 4.447115384615385,
|
|
"grad_norm": 0.5442851597081118,
|
|
"learning_rate": 1.4148914530751681e-05,
|
|
"loss": 0.2124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19521600008010864,
|
|
"step": 2775,
|
|
"valid_targets_mean": 3143.2,
|
|
"valid_targets_min": 532
|
|
},
|
|
{
|
|
"epoch": 4.455128205128205,
|
|
"grad_norm": 0.5770583514083514,
|
|
"learning_rate": 1.4072539555123292e-05,
|
|
"loss": 0.2016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20591652393341064,
|
|
"step": 2780,
|
|
"valid_targets_mean": 3650.2,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 4.4631410256410255,
|
|
"grad_norm": 0.7251203058744868,
|
|
"learning_rate": 1.3996259225383514e-05,
|
|
"loss": 0.1929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1771497130393982,
|
|
"step": 2785,
|
|
"valid_targets_mean": 3405.9,
|
|
"valid_targets_min": 1786
|
|
},
|
|
{
|
|
"epoch": 4.471153846153846,
|
|
"grad_norm": 0.6076887775424848,
|
|
"learning_rate": 1.3920074759527737e-05,
|
|
"loss": 0.2008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22005143761634827,
|
|
"step": 2790,
|
|
"valid_targets_mean": 3131.5,
|
|
"valid_targets_min": 401
|
|
},
|
|
{
|
|
"epoch": 4.479166666666667,
|
|
"grad_norm": 0.524736934106875,
|
|
"learning_rate": 1.3843987374020689e-05,
|
|
"loss": 0.2026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2200172245502472,
|
|
"step": 2795,
|
|
"valid_targets_mean": 3403.7,
|
|
"valid_targets_min": 300
|
|
},
|
|
{
|
|
"epoch": 4.487179487179487,
|
|
"grad_norm": 0.5476458833911162,
|
|
"learning_rate": 1.376799828377696e-05,
|
|
"loss": 0.1895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19838351011276245,
|
|
"step": 2800,
|
|
"valid_targets_mean": 2975.4,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 4.4951923076923075,
|
|
"grad_norm": 0.49700654071688327,
|
|
"learning_rate": 1.3692108702141642e-05,
|
|
"loss": 0.1991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21074606478214264,
|
|
"step": 2805,
|
|
"valid_targets_mean": 3779.0,
|
|
"valid_targets_min": 1566
|
|
},
|
|
{
|
|
"epoch": 4.503205128205128,
|
|
"grad_norm": 0.5685422355069307,
|
|
"learning_rate": 1.361631984087091e-05,
|
|
"loss": 0.2147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2041737586259842,
|
|
"step": 2810,
|
|
"valid_targets_mean": 2877.1,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 4.511217948717949,
|
|
"grad_norm": 0.5542806088102038,
|
|
"learning_rate": 1.354063291011273e-05,
|
|
"loss": 0.202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19276300072669983,
|
|
"step": 2815,
|
|
"valid_targets_mean": 3169.6,
|
|
"valid_targets_min": 418
|
|
},
|
|
{
|
|
"epoch": 4.519230769230769,
|
|
"grad_norm": 0.5355827919010402,
|
|
"learning_rate": 1.3465049118387486e-05,
|
|
"loss": 0.1894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19092710316181183,
|
|
"step": 2820,
|
|
"valid_targets_mean": 3060.0,
|
|
"valid_targets_min": 377
|
|
},
|
|
{
|
|
"epoch": 4.527243589743589,
|
|
"grad_norm": 0.5027999913792417,
|
|
"learning_rate": 1.3389569672568707e-05,
|
|
"loss": 0.2027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19048264622688293,
|
|
"step": 2825,
|
|
"valid_targets_mean": 3426.3,
|
|
"valid_targets_min": 2489
|
|
},
|
|
{
|
|
"epoch": 4.535256410256411,
|
|
"grad_norm": 0.5431332317741009,
|
|
"learning_rate": 1.331419577786381e-05,
|
|
"loss": 0.2054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22947734594345093,
|
|
"step": 2830,
|
|
"valid_targets_mean": 3533.0,
|
|
"valid_targets_min": 351
|
|
},
|
|
{
|
|
"epoch": 4.543269230769231,
|
|
"grad_norm": 0.49143858468037405,
|
|
"learning_rate": 1.3238928637794816e-05,
|
|
"loss": 0.1912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1725982129573822,
|
|
"step": 2835,
|
|
"valid_targets_mean": 3654.7,
|
|
"valid_targets_min": 2004
|
|
},
|
|
{
|
|
"epoch": 4.551282051282051,
|
|
"grad_norm": 0.5466219539220681,
|
|
"learning_rate": 1.3163769454179183e-05,
|
|
"loss": 0.1993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22573739290237427,
|
|
"step": 2840,
|
|
"valid_targets_mean": 3480.4,
|
|
"valid_targets_min": 352
|
|
},
|
|
{
|
|
"epoch": 4.559294871794872,
|
|
"grad_norm": 0.4499086516520062,
|
|
"learning_rate": 1.3088719427110552e-05,
|
|
"loss": 0.188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15970298647880554,
|
|
"step": 2845,
|
|
"valid_targets_mean": 3769.4,
|
|
"valid_targets_min": 2111
|
|
},
|
|
{
|
|
"epoch": 4.5673076923076925,
|
|
"grad_norm": 0.5194647525232524,
|
|
"learning_rate": 1.3013779754939666e-05,
|
|
"loss": 0.2146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17854247987270355,
|
|
"step": 2850,
|
|
"valid_targets_mean": 3513.4,
|
|
"valid_targets_min": 2089
|
|
},
|
|
{
|
|
"epoch": 4.575320512820513,
|
|
"grad_norm": 0.46776306719156063,
|
|
"learning_rate": 1.2938951634255164e-05,
|
|
"loss": 0.1957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18298465013504028,
|
|
"step": 2855,
|
|
"valid_targets_mean": 3878.9,
|
|
"valid_targets_min": 1974
|
|
},
|
|
{
|
|
"epoch": 4.583333333333333,
|
|
"grad_norm": 0.5576953176753099,
|
|
"learning_rate": 1.2864236259864495e-05,
|
|
"loss": 0.1974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19287380576133728,
|
|
"step": 2860,
|
|
"valid_targets_mean": 2957.9,
|
|
"valid_targets_min": 352
|
|
},
|
|
{
|
|
"epoch": 4.591346153846154,
|
|
"grad_norm": 0.4951258213397684,
|
|
"learning_rate": 1.2789634824774887e-05,
|
|
"loss": 0.2147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17422738671302795,
|
|
"step": 2865,
|
|
"valid_targets_mean": 3260.9,
|
|
"valid_targets_min": 2020
|
|
},
|
|
{
|
|
"epoch": 4.5993589743589745,
|
|
"grad_norm": 0.46464947946698526,
|
|
"learning_rate": 1.2715148520174206e-05,
|
|
"loss": 0.1847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15368060767650604,
|
|
"step": 2870,
|
|
"valid_targets_mean": 3799.4,
|
|
"valid_targets_min": 1561
|
|
},
|
|
{
|
|
"epoch": 4.607371794871795,
|
|
"grad_norm": 0.4061095560086419,
|
|
"learning_rate": 1.2640778535412036e-05,
|
|
"loss": 0.1838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1623319685459137,
|
|
"step": 2875,
|
|
"valid_targets_mean": 4657.2,
|
|
"valid_targets_min": 1943
|
|
},
|
|
{
|
|
"epoch": 4.615384615384615,
|
|
"grad_norm": 0.4838245534820288,
|
|
"learning_rate": 1.2566526057980608e-05,
|
|
"loss": 0.1865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18071702122688293,
|
|
"step": 2880,
|
|
"valid_targets_mean": 3507.5,
|
|
"valid_targets_min": 2029
|
|
},
|
|
{
|
|
"epoch": 4.623397435897436,
|
|
"grad_norm": 0.5243111054303644,
|
|
"learning_rate": 1.2492392273495879e-05,
|
|
"loss": 0.2015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18476027250289917,
|
|
"step": 2885,
|
|
"valid_targets_mean": 3678.5,
|
|
"valid_targets_min": 330
|
|
},
|
|
{
|
|
"epoch": 4.631410256410256,
|
|
"grad_norm": 0.553281734097847,
|
|
"learning_rate": 1.2418378365678612e-05,
|
|
"loss": 0.1966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20735764503479004,
|
|
"step": 2890,
|
|
"valid_targets_mean": 3342.6,
|
|
"valid_targets_min": 1528
|
|
},
|
|
{
|
|
"epoch": 4.639423076923077,
|
|
"grad_norm": 0.41114325272563546,
|
|
"learning_rate": 1.234448551633542e-05,
|
|
"loss": 0.2043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18882155418395996,
|
|
"step": 2895,
|
|
"valid_targets_mean": 4904.7,
|
|
"valid_targets_min": 1084
|
|
},
|
|
{
|
|
"epoch": 4.647435897435898,
|
|
"grad_norm": 0.5606770649530616,
|
|
"learning_rate": 1.2270714905339969e-05,
|
|
"loss": 0.2084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21375831961631775,
|
|
"step": 2900,
|
|
"valid_targets_mean": 3524.6,
|
|
"valid_targets_min": 1736
|
|
},
|
|
{
|
|
"epoch": 4.655448717948718,
|
|
"grad_norm": 0.5162069270799858,
|
|
"learning_rate": 1.2197067710614075e-05,
|
|
"loss": 0.1927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18576446175575256,
|
|
"step": 2905,
|
|
"valid_targets_mean": 3381.0,
|
|
"valid_targets_min": 1666
|
|
},
|
|
{
|
|
"epoch": 4.663461538461538,
|
|
"grad_norm": 0.5597459395036315,
|
|
"learning_rate": 1.2123545108108943e-05,
|
|
"loss": 0.2117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22167593240737915,
|
|
"step": 2910,
|
|
"valid_targets_mean": 3257.6,
|
|
"valid_targets_min": 391
|
|
},
|
|
{
|
|
"epoch": 4.671474358974359,
|
|
"grad_norm": 0.4781708796025318,
|
|
"learning_rate": 1.2050148271786348e-05,
|
|
"loss": 0.2101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17129084467887878,
|
|
"step": 2915,
|
|
"valid_targets_mean": 3561.2,
|
|
"valid_targets_min": 444
|
|
},
|
|
{
|
|
"epoch": 4.67948717948718,
|
|
"grad_norm": 0.4997511060015055,
|
|
"learning_rate": 1.1976878373599928e-05,
|
|
"loss": 0.1982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16873495280742645,
|
|
"step": 2920,
|
|
"valid_targets_mean": 3391.8,
|
|
"valid_targets_min": 291
|
|
},
|
|
{
|
|
"epoch": 4.6875,
|
|
"grad_norm": 0.4902845974414694,
|
|
"learning_rate": 1.1903736583476441e-05,
|
|
"loss": 0.1979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2030390202999115,
|
|
"step": 2925,
|
|
"valid_targets_mean": 3732.4,
|
|
"valid_targets_min": 2031
|
|
},
|
|
{
|
|
"epoch": 4.69551282051282,
|
|
"grad_norm": 0.5193517614088814,
|
|
"learning_rate": 1.1830724069297106e-05,
|
|
"loss": 0.205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19543379545211792,
|
|
"step": 2930,
|
|
"valid_targets_mean": 3282.8,
|
|
"valid_targets_min": 353
|
|
},
|
|
{
|
|
"epoch": 4.703525641025641,
|
|
"grad_norm": 0.5704804701796626,
|
|
"learning_rate": 1.1757841996878957e-05,
|
|
"loss": 0.1936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19900594651699066,
|
|
"step": 2935,
|
|
"valid_targets_mean": 3075.9,
|
|
"valid_targets_min": 418
|
|
},
|
|
{
|
|
"epoch": 4.711538461538462,
|
|
"grad_norm": 0.5826653409132639,
|
|
"learning_rate": 1.1685091529956187e-05,
|
|
"loss": 0.2073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2115277647972107,
|
|
"step": 2940,
|
|
"valid_targets_mean": 3320.6,
|
|
"valid_targets_min": 1780
|
|
},
|
|
{
|
|
"epoch": 4.719551282051282,
|
|
"grad_norm": 0.5021845306839792,
|
|
"learning_rate": 1.161247383016163e-05,
|
|
"loss": 0.2036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20887619256973267,
|
|
"step": 2945,
|
|
"valid_targets_mean": 3432.6,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 4.727564102564102,
|
|
"grad_norm": 0.6114150346499213,
|
|
"learning_rate": 1.1539990057008166e-05,
|
|
"loss": 0.1944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18386265635490417,
|
|
"step": 2950,
|
|
"valid_targets_mean": 3268.9,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 4.735576923076923,
|
|
"grad_norm": 0.5512196387593564,
|
|
"learning_rate": 1.1467641367870198e-05,
|
|
"loss": 0.1945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22034752368927002,
|
|
"step": 2955,
|
|
"valid_targets_mean": 3414.5,
|
|
"valid_targets_min": 1828
|
|
},
|
|
{
|
|
"epoch": 4.743589743589744,
|
|
"grad_norm": 0.5647291311418439,
|
|
"learning_rate": 1.1395428917965239e-05,
|
|
"loss": 0.1825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19606733322143555,
|
|
"step": 2960,
|
|
"valid_targets_mean": 3166.5,
|
|
"valid_targets_min": 1982
|
|
},
|
|
{
|
|
"epoch": 4.751602564102564,
|
|
"grad_norm": 0.6519634295021054,
|
|
"learning_rate": 1.1323353860335385e-05,
|
|
"loss": 0.2091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2246025651693344,
|
|
"step": 2965,
|
|
"valid_targets_mean": 2595.8,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 4.759615384615385,
|
|
"grad_norm": 0.4555433892994592,
|
|
"learning_rate": 1.1251417345828962e-05,
|
|
"loss": 0.2137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18713593482971191,
|
|
"step": 2970,
|
|
"valid_targets_mean": 3909.8,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 4.767628205128205,
|
|
"grad_norm": 0.5112213787584956,
|
|
"learning_rate": 1.1179620523082107e-05,
|
|
"loss": 0.192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18104520440101624,
|
|
"step": 2975,
|
|
"valid_targets_mean": 3262.2,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 4.7756410256410255,
|
|
"grad_norm": 0.5695788763309223,
|
|
"learning_rate": 1.110796453850047e-05,
|
|
"loss": 0.2126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21264362335205078,
|
|
"step": 2980,
|
|
"valid_targets_mean": 3146.4,
|
|
"valid_targets_min": 1505
|
|
},
|
|
{
|
|
"epoch": 4.783653846153846,
|
|
"grad_norm": 0.4658028855567424,
|
|
"learning_rate": 1.1036450536240877e-05,
|
|
"loss": 0.1921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16871154308319092,
|
|
"step": 2985,
|
|
"valid_targets_mean": 4409.3,
|
|
"valid_targets_min": 2097
|
|
},
|
|
{
|
|
"epoch": 4.791666666666667,
|
|
"grad_norm": 0.4841046230771778,
|
|
"learning_rate": 1.0965079658193068e-05,
|
|
"loss": 0.2033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2128428816795349,
|
|
"step": 2990,
|
|
"valid_targets_mean": 3797.4,
|
|
"valid_targets_min": 1539
|
|
},
|
|
{
|
|
"epoch": 4.799679487179487,
|
|
"grad_norm": 0.5488706583686694,
|
|
"learning_rate": 1.0893853043961475e-05,
|
|
"loss": 0.2028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20432452857494354,
|
|
"step": 2995,
|
|
"valid_targets_mean": 3163.5,
|
|
"valid_targets_min": 1705
|
|
},
|
|
{
|
|
"epoch": 4.8076923076923075,
|
|
"grad_norm": 0.3993765560711606,
|
|
"learning_rate": 1.0822771830847011e-05,
|
|
"loss": 0.1934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18088233470916748,
|
|
"step": 3000,
|
|
"valid_targets_mean": 5041.3,
|
|
"valid_targets_min": 837
|
|
},
|
|
{
|
|
"epoch": 4.815705128205128,
|
|
"grad_norm": 0.5478814229443099,
|
|
"learning_rate": 1.0751837153828926e-05,
|
|
"loss": 0.203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19549459218978882,
|
|
"step": 3005,
|
|
"valid_targets_mean": 3135.2,
|
|
"valid_targets_min": 287
|
|
},
|
|
{
|
|
"epoch": 4.823717948717949,
|
|
"grad_norm": 0.5340723906442628,
|
|
"learning_rate": 1.0681050145546666e-05,
|
|
"loss": 0.2036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22188079357147217,
|
|
"step": 3010,
|
|
"valid_targets_mean": 3478.6,
|
|
"valid_targets_min": 831
|
|
},
|
|
{
|
|
"epoch": 4.831730769230769,
|
|
"grad_norm": 0.5305728395941428,
|
|
"learning_rate": 1.0610411936281801e-05,
|
|
"loss": 0.2003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21828171610832214,
|
|
"step": 3015,
|
|
"valid_targets_mean": 3555.6,
|
|
"valid_targets_min": 368
|
|
},
|
|
{
|
|
"epoch": 4.839743589743589,
|
|
"grad_norm": 0.5786196807883349,
|
|
"learning_rate": 1.0539923653939978e-05,
|
|
"loss": 0.2149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20064550638198853,
|
|
"step": 3020,
|
|
"valid_targets_mean": 2935.4,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 4.847756410256411,
|
|
"grad_norm": 0.6004313342633323,
|
|
"learning_rate": 1.0469586424032903e-05,
|
|
"loss": 0.1965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16980455815792084,
|
|
"step": 3025,
|
|
"valid_targets_mean": 2810.4,
|
|
"valid_targets_min": 309
|
|
},
|
|
{
|
|
"epoch": 4.855769230769231,
|
|
"grad_norm": 0.6131691098928032,
|
|
"learning_rate": 1.0399401369660369e-05,
|
|
"loss": 0.1898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19341379404067993,
|
|
"step": 3030,
|
|
"valid_targets_mean": 2728.5,
|
|
"valid_targets_min": 302
|
|
},
|
|
{
|
|
"epoch": 4.863782051282051,
|
|
"grad_norm": 0.5544720364395579,
|
|
"learning_rate": 1.0329369611492334e-05,
|
|
"loss": 0.2055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2462378740310669,
|
|
"step": 3035,
|
|
"valid_targets_mean": 3388.8,
|
|
"valid_targets_min": 334
|
|
},
|
|
{
|
|
"epoch": 4.871794871794872,
|
|
"grad_norm": 0.5733544279629245,
|
|
"learning_rate": 1.0259492267751022e-05,
|
|
"loss": 0.2091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20790205895900726,
|
|
"step": 3040,
|
|
"valid_targets_mean": 2837.6,
|
|
"valid_targets_min": 279
|
|
},
|
|
{
|
|
"epoch": 4.8798076923076925,
|
|
"grad_norm": 0.531980939673929,
|
|
"learning_rate": 1.0189770454193052e-05,
|
|
"loss": 0.1957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2089274674654007,
|
|
"step": 3045,
|
|
"valid_targets_mean": 3406.5,
|
|
"valid_targets_min": 1848
|
|
},
|
|
{
|
|
"epoch": 4.887820512820513,
|
|
"grad_norm": 0.5854718977302726,
|
|
"learning_rate": 1.0120205284091673e-05,
|
|
"loss": 0.2063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23034754395484924,
|
|
"step": 3050,
|
|
"valid_targets_mean": 2694.4,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 4.895833333333333,
|
|
"grad_norm": 0.5806071455397431,
|
|
"learning_rate": 1.0050797868218907e-05,
|
|
"loss": 0.1945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18953976035118103,
|
|
"step": 3055,
|
|
"valid_targets_mean": 2736.9,
|
|
"valid_targets_min": 1804
|
|
},
|
|
{
|
|
"epoch": 4.903846153846154,
|
|
"grad_norm": 0.4758714435534047,
|
|
"learning_rate": 9.981549314827876e-06,
|
|
"loss": 0.1938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17658931016921997,
|
|
"step": 3060,
|
|
"valid_targets_mean": 3758.4,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 4.9118589743589745,
|
|
"grad_norm": 0.5151085606255328,
|
|
"learning_rate": 9.912460729635097e-06,
|
|
"loss": 0.2001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1856827735900879,
|
|
"step": 3065,
|
|
"valid_targets_mean": 3886.4,
|
|
"valid_targets_min": 2076
|
|
},
|
|
{
|
|
"epoch": 4.919871794871795,
|
|
"grad_norm": 0.5600078970119667,
|
|
"learning_rate": 9.843533215802796e-06,
|
|
"loss": 0.199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22082971036434174,
|
|
"step": 3070,
|
|
"valid_targets_mean": 3223.6,
|
|
"valid_targets_min": 1779
|
|
},
|
|
{
|
|
"epoch": 4.927884615384615,
|
|
"grad_norm": 0.5605700075398913,
|
|
"learning_rate": 9.774767873921357e-06,
|
|
"loss": 0.2142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2197868525981903,
|
|
"step": 3075,
|
|
"valid_targets_mean": 3292.1,
|
|
"valid_targets_min": 1981
|
|
},
|
|
{
|
|
"epoch": 4.935897435897436,
|
|
"grad_norm": 0.5388453900657821,
|
|
"learning_rate": 9.706165801991651e-06,
|
|
"loss": 0.1945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20840944349765778,
|
|
"step": 3080,
|
|
"valid_targets_mean": 3646.8,
|
|
"valid_targets_min": 2025
|
|
},
|
|
{
|
|
"epoch": 4.943910256410256,
|
|
"grad_norm": 0.5337480293840772,
|
|
"learning_rate": 9.637728095407593e-06,
|
|
"loss": 0.2032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22162067890167236,
|
|
"step": 3085,
|
|
"valid_targets_mean": 3837.4,
|
|
"valid_targets_min": 2260
|
|
},
|
|
{
|
|
"epoch": 4.951923076923077,
|
|
"grad_norm": 0.5315055333333628,
|
|
"learning_rate": 9.56945584693861e-06,
|
|
"loss": 0.1986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1910790205001831,
|
|
"step": 3090,
|
|
"valid_targets_mean": 2969.9,
|
|
"valid_targets_min": 1224
|
|
},
|
|
{
|
|
"epoch": 4.959935897435898,
|
|
"grad_norm": 0.531450262082612,
|
|
"learning_rate": 9.501350146712193e-06,
|
|
"loss": 0.2192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2249545156955719,
|
|
"step": 3095,
|
|
"valid_targets_mean": 3650.4,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 4.967948717948718,
|
|
"grad_norm": 0.514593402922467,
|
|
"learning_rate": 9.433412082196527e-06,
|
|
"loss": 0.1948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1740938276052475,
|
|
"step": 3100,
|
|
"valid_targets_mean": 3389.1,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 4.975961538461538,
|
|
"grad_norm": 0.479202568267713,
|
|
"learning_rate": 9.365642738183044e-06,
|
|
"loss": 0.1976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18655627965927124,
|
|
"step": 3105,
|
|
"valid_targets_mean": 3681.2,
|
|
"valid_targets_min": 1612
|
|
},
|
|
{
|
|
"epoch": 4.983974358974359,
|
|
"grad_norm": 0.5530192556043516,
|
|
"learning_rate": 9.298043196769217e-06,
|
|
"loss": 0.2213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2281743586063385,
|
|
"step": 3110,
|
|
"valid_targets_mean": 3384.4,
|
|
"valid_targets_min": 320
|
|
},
|
|
{
|
|
"epoch": 4.99198717948718,
|
|
"grad_norm": 0.44807416141894424,
|
|
"learning_rate": 9.230614537341167e-06,
|
|
"loss": 0.1976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1691162884235382,
|
|
"step": 3115,
|
|
"valid_targets_mean": 3748.1,
|
|
"valid_targets_min": 1861
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.5409391376747347,
|
|
"learning_rate": 9.163357836556498e-06,
|
|
"loss": 0.2145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19238653779029846,
|
|
"step": 3120,
|
|
"valid_targets_mean": 3047.0,
|
|
"valid_targets_min": 288
|
|
},
|
|
{
|
|
"epoch": 5.00801282051282,
|
|
"grad_norm": 0.5113284257073623,
|
|
"learning_rate": 9.096274168327122e-06,
|
|
"loss": 0.1968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1752883791923523,
|
|
"step": 3125,
|
|
"valid_targets_mean": 3437.4,
|
|
"valid_targets_min": 550
|
|
},
|
|
{
|
|
"epoch": 5.016025641025641,
|
|
"grad_norm": 0.5447988674027889,
|
|
"learning_rate": 9.029364603802017e-06,
|
|
"loss": 0.1885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16521689295768738,
|
|
"step": 3130,
|
|
"valid_targets_mean": 3108.8,
|
|
"valid_targets_min": 1906
|
|
},
|
|
{
|
|
"epoch": 5.024038461538462,
|
|
"grad_norm": 0.648454130718421,
|
|
"learning_rate": 8.962630211350248e-06,
|
|
"loss": 0.1887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18997415900230408,
|
|
"step": 3135,
|
|
"valid_targets_mean": 2555.8,
|
|
"valid_targets_min": 377
|
|
},
|
|
{
|
|
"epoch": 5.032051282051282,
|
|
"grad_norm": 0.5763204739253073,
|
|
"learning_rate": 8.89607205654378e-06,
|
|
"loss": 0.1738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17350471019744873,
|
|
"step": 3140,
|
|
"valid_targets_mean": 3285.9,
|
|
"valid_targets_min": 325
|
|
},
|
|
{
|
|
"epoch": 5.040064102564102,
|
|
"grad_norm": 0.5162424230093436,
|
|
"learning_rate": 8.829691202140591e-06,
|
|
"loss": 0.1878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1592182070016861,
|
|
"step": 3145,
|
|
"valid_targets_mean": 3308.9,
|
|
"valid_targets_min": 391
|
|
},
|
|
{
|
|
"epoch": 5.048076923076923,
|
|
"grad_norm": 0.6082522256664168,
|
|
"learning_rate": 8.763488708067604e-06,
|
|
"loss": 0.1871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2013545036315918,
|
|
"step": 3150,
|
|
"valid_targets_mean": 3266.9,
|
|
"valid_targets_min": 2157
|
|
},
|
|
{
|
|
"epoch": 5.056089743589744,
|
|
"grad_norm": 0.6021413534717205,
|
|
"learning_rate": 8.69746563140379e-06,
|
|
"loss": 0.2033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21238872408866882,
|
|
"step": 3155,
|
|
"valid_targets_mean": 3137.6,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 5.064102564102564,
|
|
"grad_norm": 0.46975098631980283,
|
|
"learning_rate": 8.631623026363331e-06,
|
|
"loss": 0.1861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13161732256412506,
|
|
"step": 3160,
|
|
"valid_targets_mean": 3321.1,
|
|
"valid_targets_min": 1695
|
|
},
|
|
{
|
|
"epoch": 5.072115384615385,
|
|
"grad_norm": 0.5887772251561811,
|
|
"learning_rate": 8.56596194427873e-06,
|
|
"loss": 0.1761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20781591534614563,
|
|
"step": 3165,
|
|
"valid_targets_mean": 3307.9,
|
|
"valid_targets_min": 352
|
|
},
|
|
{
|
|
"epoch": 5.080128205128205,
|
|
"grad_norm": 0.5390332643634987,
|
|
"learning_rate": 8.500483433584054e-06,
|
|
"loss": 0.1769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16811692714691162,
|
|
"step": 3170,
|
|
"valid_targets_mean": 3268.9,
|
|
"valid_targets_min": 326
|
|
},
|
|
{
|
|
"epoch": 5.0881410256410255,
|
|
"grad_norm": 0.5460202693884373,
|
|
"learning_rate": 8.435188539798187e-06,
|
|
"loss": 0.205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23228251934051514,
|
|
"step": 3175,
|
|
"valid_targets_mean": 3874.6,
|
|
"valid_targets_min": 1886
|
|
},
|
|
{
|
|
"epoch": 5.096153846153846,
|
|
"grad_norm": 0.5859478298591612,
|
|
"learning_rate": 8.370078305508136e-06,
|
|
"loss": 0.2006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21061252057552338,
|
|
"step": 3180,
|
|
"valid_targets_mean": 3122.8,
|
|
"valid_targets_min": 2093
|
|
},
|
|
{
|
|
"epoch": 5.104166666666667,
|
|
"grad_norm": 0.5059924010279726,
|
|
"learning_rate": 8.305153770352384e-06,
|
|
"loss": 0.2068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2043697088956833,
|
|
"step": 3185,
|
|
"valid_targets_mean": 4493.2,
|
|
"valid_targets_min": 1944
|
|
},
|
|
{
|
|
"epoch": 5.112179487179487,
|
|
"grad_norm": 0.5253391559679185,
|
|
"learning_rate": 8.240415971004285e-06,
|
|
"loss": 0.1934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19598671793937683,
|
|
"step": 3190,
|
|
"valid_targets_mean": 3796.3,
|
|
"valid_targets_min": 2472
|
|
},
|
|
{
|
|
"epoch": 5.1201923076923075,
|
|
"grad_norm": 0.5296821750414983,
|
|
"learning_rate": 8.175865941155525e-06,
|
|
"loss": 0.2053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1722840517759323,
|
|
"step": 3195,
|
|
"valid_targets_mean": 3151.8,
|
|
"valid_targets_min": 1415
|
|
},
|
|
{
|
|
"epoch": 5.128205128205128,
|
|
"grad_norm": 0.453908424287244,
|
|
"learning_rate": 8.111504711499598e-06,
|
|
"loss": 0.1865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18074356019496918,
|
|
"step": 3200,
|
|
"valid_targets_mean": 4311.1,
|
|
"valid_targets_min": 835
|
|
},
|
|
{
|
|
"epoch": 5.136217948717949,
|
|
"grad_norm": 0.48570840686600897,
|
|
"learning_rate": 8.04733330971536e-06,
|
|
"loss": 0.1886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18450653553009033,
|
|
"step": 3205,
|
|
"valid_targets_mean": 4052.2,
|
|
"valid_targets_min": 2120
|
|
},
|
|
{
|
|
"epoch": 5.144230769230769,
|
|
"grad_norm": 0.6526366021581173,
|
|
"learning_rate": 7.983352760450618e-06,
|
|
"loss": 0.2022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19894084334373474,
|
|
"step": 3210,
|
|
"valid_targets_mean": 2804.1,
|
|
"valid_targets_min": 338
|
|
},
|
|
{
|
|
"epoch": 5.152243589743589,
|
|
"grad_norm": 0.44063183279798923,
|
|
"learning_rate": 7.919564085305768e-06,
|
|
"loss": 0.1802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15730887651443481,
|
|
"step": 3215,
|
|
"valid_targets_mean": 4473.3,
|
|
"valid_targets_min": 2253
|
|
},
|
|
{
|
|
"epoch": 5.160256410256411,
|
|
"grad_norm": 0.6712742300870267,
|
|
"learning_rate": 7.855968302817487e-06,
|
|
"loss": 0.1948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2181408405303955,
|
|
"step": 3220,
|
|
"valid_targets_mean": 3240.4,
|
|
"valid_targets_min": 1565
|
|
},
|
|
{
|
|
"epoch": 5.168269230769231,
|
|
"grad_norm": 0.5614510327024675,
|
|
"learning_rate": 7.792566428442456e-06,
|
|
"loss": 0.1782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17663878202438354,
|
|
"step": 3225,
|
|
"valid_targets_mean": 3257.2,
|
|
"valid_targets_min": 1846
|
|
},
|
|
{
|
|
"epoch": 5.176282051282051,
|
|
"grad_norm": 0.5163523322212359,
|
|
"learning_rate": 7.729359474541168e-06,
|
|
"loss": 0.1995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2008240520954132,
|
|
"step": 3230,
|
|
"valid_targets_mean": 3838.0,
|
|
"valid_targets_min": 2021
|
|
},
|
|
{
|
|
"epoch": 5.184294871794872,
|
|
"grad_norm": 0.5690798020020454,
|
|
"learning_rate": 7.666348450361737e-06,
|
|
"loss": 0.192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2337350696325302,
|
|
"step": 3235,
|
|
"valid_targets_mean": 3405.2,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 5.1923076923076925,
|
|
"grad_norm": 0.5742136826338571,
|
|
"learning_rate": 7.60353436202381e-06,
|
|
"loss": 0.1766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1858733743429184,
|
|
"step": 3240,
|
|
"valid_targets_mean": 3966.5,
|
|
"valid_targets_min": 1872
|
|
},
|
|
{
|
|
"epoch": 5.200320512820513,
|
|
"grad_norm": 0.5122166596394158,
|
|
"learning_rate": 7.540918212502479e-06,
|
|
"loss": 0.1944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1741316318511963,
|
|
"step": 3245,
|
|
"valid_targets_mean": 3365.6,
|
|
"valid_targets_min": 1880
|
|
},
|
|
{
|
|
"epoch": 5.208333333333333,
|
|
"grad_norm": 0.49354163626246467,
|
|
"learning_rate": 7.478501001612281e-06,
|
|
"loss": 0.1886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17873799800872803,
|
|
"step": 3250,
|
|
"valid_targets_mean": 3825.0,
|
|
"valid_targets_min": 1619
|
|
},
|
|
{
|
|
"epoch": 5.216346153846154,
|
|
"grad_norm": 0.5443684108332746,
|
|
"learning_rate": 7.416283725991229e-06,
|
|
"loss": 0.1882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19958025217056274,
|
|
"step": 3255,
|
|
"valid_targets_mean": 3123.3,
|
|
"valid_targets_min": 2049
|
|
},
|
|
{
|
|
"epoch": 5.2243589743589745,
|
|
"grad_norm": 0.518143554694147,
|
|
"learning_rate": 7.354267379084896e-06,
|
|
"loss": 0.192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18749485909938812,
|
|
"step": 3260,
|
|
"valid_targets_mean": 3432.8,
|
|
"valid_targets_min": 2052
|
|
},
|
|
{
|
|
"epoch": 5.232371794871795,
|
|
"grad_norm": 0.5727610082043441,
|
|
"learning_rate": 7.292452951130548e-06,
|
|
"loss": 0.1913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22807061672210693,
|
|
"step": 3265,
|
|
"valid_targets_mean": 3272.8,
|
|
"valid_targets_min": 440
|
|
},
|
|
{
|
|
"epoch": 5.240384615384615,
|
|
"grad_norm": 0.5171761185493424,
|
|
"learning_rate": 7.230841429141347e-06,
|
|
"loss": 0.1736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1712077558040619,
|
|
"step": 3270,
|
|
"valid_targets_mean": 3486.1,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 5.248397435897436,
|
|
"grad_norm": 0.5753726653347081,
|
|
"learning_rate": 7.169433796890595e-06,
|
|
"loss": 0.1769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18391674757003784,
|
|
"step": 3275,
|
|
"valid_targets_mean": 3149.1,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 5.256410256410256,
|
|
"grad_norm": 0.5827401383446655,
|
|
"learning_rate": 7.108231034895976e-06,
|
|
"loss": 0.1898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22402942180633545,
|
|
"step": 3280,
|
|
"valid_targets_mean": 3546.9,
|
|
"valid_targets_min": 2170
|
|
},
|
|
{
|
|
"epoch": 5.264423076923077,
|
|
"grad_norm": 0.543075705148801,
|
|
"learning_rate": 7.047234120403972e-06,
|
|
"loss": 0.2025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19148463010787964,
|
|
"step": 3285,
|
|
"valid_targets_mean": 3322.8,
|
|
"valid_targets_min": 1767
|
|
},
|
|
{
|
|
"epoch": 5.272435897435898,
|
|
"grad_norm": 0.5626066912048262,
|
|
"learning_rate": 6.986444027374211e-06,
|
|
"loss": 0.19,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19853419065475464,
|
|
"step": 3290,
|
|
"valid_targets_mean": 3568.6,
|
|
"valid_targets_min": 2061
|
|
},
|
|
{
|
|
"epoch": 5.280448717948718,
|
|
"grad_norm": 0.572779952262963,
|
|
"learning_rate": 6.925861726463919e-06,
|
|
"loss": 0.2096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21987566351890564,
|
|
"step": 3295,
|
|
"valid_targets_mean": 3446.1,
|
|
"valid_targets_min": 1877
|
|
},
|
|
{
|
|
"epoch": 5.288461538461538,
|
|
"grad_norm": 0.577913018345779,
|
|
"learning_rate": 6.865488185012464e-06,
|
|
"loss": 0.1899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18918846547603607,
|
|
"step": 3300,
|
|
"valid_targets_mean": 3336.0,
|
|
"valid_targets_min": 2215
|
|
},
|
|
{
|
|
"epoch": 5.296474358974359,
|
|
"grad_norm": 0.5075057087612974,
|
|
"learning_rate": 6.805324367025825e-06,
|
|
"loss": 0.1923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1787465363740921,
|
|
"step": 3305,
|
|
"valid_targets_mean": 3670.3,
|
|
"valid_targets_min": 1922
|
|
},
|
|
{
|
|
"epoch": 5.30448717948718,
|
|
"grad_norm": 0.5001681486648626,
|
|
"learning_rate": 6.745371233161309e-06,
|
|
"loss": 0.1887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19627542793750763,
|
|
"step": 3310,
|
|
"valid_targets_mean": 3670.1,
|
|
"valid_targets_min": 1944
|
|
},
|
|
{
|
|
"epoch": 5.3125,
|
|
"grad_norm": 0.48278736188583865,
|
|
"learning_rate": 6.685629740712103e-06,
|
|
"loss": 0.1851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16993483901023865,
|
|
"step": 3315,
|
|
"valid_targets_mean": 3727.1,
|
|
"valid_targets_min": 1822
|
|
},
|
|
{
|
|
"epoch": 5.32051282051282,
|
|
"grad_norm": 0.5575051576217561,
|
|
"learning_rate": 6.6261008435920605e-06,
|
|
"loss": 0.2214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20006653666496277,
|
|
"step": 3320,
|
|
"valid_targets_mean": 3215.5,
|
|
"valid_targets_min": 1937
|
|
},
|
|
{
|
|
"epoch": 5.328525641025641,
|
|
"grad_norm": 0.5388816043955157,
|
|
"learning_rate": 6.566785492320471e-06,
|
|
"loss": 0.2074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1908368021249771,
|
|
"step": 3325,
|
|
"valid_targets_mean": 3455.5,
|
|
"valid_targets_min": 2023
|
|
},
|
|
{
|
|
"epoch": 5.336538461538462,
|
|
"grad_norm": 0.5400123521185616,
|
|
"learning_rate": 6.507684634006815e-06,
|
|
"loss": 0.1925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16674238443374634,
|
|
"step": 3330,
|
|
"valid_targets_mean": 3334.6,
|
|
"valid_targets_min": 1996
|
|
},
|
|
{
|
|
"epoch": 5.344551282051282,
|
|
"grad_norm": 0.5270342093008171,
|
|
"learning_rate": 6.448799212335734e-06,
|
|
"loss": 0.1982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1765822470188141,
|
|
"step": 3335,
|
|
"valid_targets_mean": 3522.7,
|
|
"valid_targets_min": 1690
|
|
},
|
|
{
|
|
"epoch": 5.352564102564102,
|
|
"grad_norm": 0.4636706571549095,
|
|
"learning_rate": 6.390130167551869e-06,
|
|
"loss": 0.1709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14373044669628143,
|
|
"step": 3340,
|
|
"valid_targets_mean": 3593.2,
|
|
"valid_targets_min": 345
|
|
},
|
|
{
|
|
"epoch": 5.360576923076923,
|
|
"grad_norm": 0.5805185030734262,
|
|
"learning_rate": 6.331678436444939e-06,
|
|
"loss": 0.1792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17474153637886047,
|
|
"step": 3345,
|
|
"valid_targets_mean": 2900.4,
|
|
"valid_targets_min": 377
|
|
},
|
|
{
|
|
"epoch": 5.368589743589744,
|
|
"grad_norm": 0.5396103910816336,
|
|
"learning_rate": 6.273444952334713e-06,
|
|
"loss": 0.1923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19289200007915497,
|
|
"step": 3350,
|
|
"valid_targets_mean": 3375.8,
|
|
"valid_targets_min": 1496
|
|
},
|
|
{
|
|
"epoch": 5.376602564102564,
|
|
"grad_norm": 0.5447076740652468,
|
|
"learning_rate": 6.2154306450561175e-06,
|
|
"loss": 0.1782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1780100166797638,
|
|
"step": 3355,
|
|
"valid_targets_mean": 3710.4,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 5.384615384615385,
|
|
"grad_norm": 0.5516827352859298,
|
|
"learning_rate": 6.157636440944445e-06,
|
|
"loss": 0.1874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.191841721534729,
|
|
"step": 3360,
|
|
"valid_targets_mean": 3302.8,
|
|
"valid_targets_min": 328
|
|
},
|
|
{
|
|
"epoch": 5.392628205128205,
|
|
"grad_norm": 0.8580701767767885,
|
|
"learning_rate": 6.100063262820474e-06,
|
|
"loss": 0.18,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.187397301197052,
|
|
"step": 3365,
|
|
"valid_targets_mean": 3643.5,
|
|
"valid_targets_min": 1684
|
|
},
|
|
{
|
|
"epoch": 5.4006410256410255,
|
|
"grad_norm": 0.506561834066886,
|
|
"learning_rate": 6.0427120299758236e-06,
|
|
"loss": 0.2055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1966952383518219,
|
|
"step": 3370,
|
|
"valid_targets_mean": 3982.6,
|
|
"valid_targets_min": 2118
|
|
},
|
|
{
|
|
"epoch": 5.408653846153846,
|
|
"grad_norm": 0.5289407099998846,
|
|
"learning_rate": 5.985583658158212e-06,
|
|
"loss": 0.1973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20928660035133362,
|
|
"step": 3375,
|
|
"valid_targets_mean": 3634.4,
|
|
"valid_targets_min": 382
|
|
},
|
|
{
|
|
"epoch": 5.416666666666667,
|
|
"grad_norm": 0.47593762647730675,
|
|
"learning_rate": 5.928679059556852e-06,
|
|
"loss": 0.1747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15979638695716858,
|
|
"step": 3380,
|
|
"valid_targets_mean": 4029.1,
|
|
"valid_targets_min": 1971
|
|
},
|
|
{
|
|
"epoch": 5.424679487179487,
|
|
"grad_norm": 0.54615681581572,
|
|
"learning_rate": 5.871999142787908e-06,
|
|
"loss": 0.1939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18546456098556519,
|
|
"step": 3385,
|
|
"valid_targets_mean": 3214.1,
|
|
"valid_targets_min": 1245
|
|
},
|
|
{
|
|
"epoch": 5.4326923076923075,
|
|
"grad_norm": 0.5240694658371231,
|
|
"learning_rate": 5.815544812879936e-06,
|
|
"loss": 0.1886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20221960544586182,
|
|
"step": 3390,
|
|
"valid_targets_mean": 3662.1,
|
|
"valid_targets_min": 2329
|
|
},
|
|
{
|
|
"epoch": 5.440705128205128,
|
|
"grad_norm": 0.6164173562320571,
|
|
"learning_rate": 5.759316971259503e-06,
|
|
"loss": 0.189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24456317722797394,
|
|
"step": 3395,
|
|
"valid_targets_mean": 3020.1,
|
|
"valid_targets_min": 489
|
|
},
|
|
{
|
|
"epoch": 5.448717948717949,
|
|
"grad_norm": 0.5726966590420226,
|
|
"learning_rate": 5.703316515736734e-06,
|
|
"loss": 0.175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1823807656764984,
|
|
"step": 3400,
|
|
"valid_targets_mean": 2954.6,
|
|
"valid_targets_min": 383
|
|
},
|
|
{
|
|
"epoch": 5.456730769230769,
|
|
"grad_norm": 0.514223543564771,
|
|
"learning_rate": 5.647544340491007e-06,
|
|
"loss": 0.1919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1850132793188095,
|
|
"step": 3405,
|
|
"valid_targets_mean": 3505.2,
|
|
"valid_targets_min": 831
|
|
},
|
|
{
|
|
"epoch": 5.464743589743589,
|
|
"grad_norm": 0.6189202501534701,
|
|
"learning_rate": 5.592001336056659e-06,
|
|
"loss": 0.2149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1900269091129303,
|
|
"step": 3410,
|
|
"valid_targets_mean": 3419.9,
|
|
"valid_targets_min": 2164
|
|
},
|
|
{
|
|
"epoch": 5.472756410256411,
|
|
"grad_norm": 0.6094758033614434,
|
|
"learning_rate": 5.536688389308782e-06,
|
|
"loss": 0.1864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20669332146644592,
|
|
"step": 3415,
|
|
"valid_targets_mean": 2933.1,
|
|
"valid_targets_min": 303
|
|
},
|
|
{
|
|
"epoch": 5.480769230769231,
|
|
"grad_norm": 0.437352007826735,
|
|
"learning_rate": 5.4816063834490496e-06,
|
|
"loss": 0.1736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18805278837680817,
|
|
"step": 3420,
|
|
"valid_targets_mean": 4861.1,
|
|
"valid_targets_min": 239
|
|
},
|
|
{
|
|
"epoch": 5.488782051282051,
|
|
"grad_norm": 0.5937375893287633,
|
|
"learning_rate": 5.426756197991625e-06,
|
|
"loss": 0.2026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19615274667739868,
|
|
"step": 3425,
|
|
"valid_targets_mean": 3083.9,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 5.496794871794872,
|
|
"grad_norm": 0.5292374325348774,
|
|
"learning_rate": 5.372138708749104e-06,
|
|
"loss": 0.1829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17313215136528015,
|
|
"step": 3430,
|
|
"valid_targets_mean": 3637.6,
|
|
"valid_targets_min": 1848
|
|
},
|
|
{
|
|
"epoch": 5.5048076923076925,
|
|
"grad_norm": 0.5904116952834751,
|
|
"learning_rate": 5.3177547878185436e-06,
|
|
"loss": 0.19,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18827790021896362,
|
|
"step": 3435,
|
|
"valid_targets_mean": 3208.8,
|
|
"valid_targets_min": 334
|
|
},
|
|
{
|
|
"epoch": 5.512820512820513,
|
|
"grad_norm": 0.5009765474233788,
|
|
"learning_rate": 5.263605303567532e-06,
|
|
"loss": 0.179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15040534734725952,
|
|
"step": 3440,
|
|
"valid_targets_mean": 3404.1,
|
|
"valid_targets_min": 414
|
|
},
|
|
{
|
|
"epoch": 5.520833333333333,
|
|
"grad_norm": 0.46199036273708716,
|
|
"learning_rate": 5.20969112062032e-06,
|
|
"loss": 0.1767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.155937060713768,
|
|
"step": 3445,
|
|
"valid_targets_mean": 3935.8,
|
|
"valid_targets_min": 464
|
|
},
|
|
{
|
|
"epoch": 5.528846153846154,
|
|
"grad_norm": 0.5119196894630544,
|
|
"learning_rate": 5.156013099844017e-06,
|
|
"loss": 0.1809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1872662901878357,
|
|
"step": 3450,
|
|
"valid_targets_mean": 3880.6,
|
|
"valid_targets_min": 1893
|
|
},
|
|
{
|
|
"epoch": 5.5368589743589745,
|
|
"grad_norm": 0.566007537463004,
|
|
"learning_rate": 5.1025720983348544e-06,
|
|
"loss": 0.2005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19776442646980286,
|
|
"step": 3455,
|
|
"valid_targets_mean": 3433.2,
|
|
"valid_targets_min": 1809
|
|
},
|
|
{
|
|
"epoch": 5.544871794871795,
|
|
"grad_norm": 0.5514081766003055,
|
|
"learning_rate": 5.049368969404484e-06,
|
|
"loss": 0.2006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1966758370399475,
|
|
"step": 3460,
|
|
"valid_targets_mean": 3726.0,
|
|
"valid_targets_min": 2228
|
|
},
|
|
{
|
|
"epoch": 5.552884615384615,
|
|
"grad_norm": 0.5654081475081372,
|
|
"learning_rate": 4.99640456256636e-06,
|
|
"loss": 0.1994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18677841126918793,
|
|
"step": 3465,
|
|
"valid_targets_mean": 3471.7,
|
|
"valid_targets_min": 432
|
|
},
|
|
{
|
|
"epoch": 5.560897435897436,
|
|
"grad_norm": 0.5163881738271517,
|
|
"learning_rate": 4.9436797235221814e-06,
|
|
"loss": 0.1762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18354731798171997,
|
|
"step": 3470,
|
|
"valid_targets_mean": 3611.6,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 5.568910256410256,
|
|
"grad_norm": 0.5434683494215142,
|
|
"learning_rate": 4.891195294148376e-06,
|
|
"loss": 0.2059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1964164674282074,
|
|
"step": 3475,
|
|
"valid_targets_mean": 3558.6,
|
|
"valid_targets_min": 1768
|
|
},
|
|
{
|
|
"epoch": 5.576923076923077,
|
|
"grad_norm": 0.5336897538504671,
|
|
"learning_rate": 4.838952112482671e-06,
|
|
"loss": 0.1916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16893991827964783,
|
|
"step": 3480,
|
|
"valid_targets_mean": 3513.5,
|
|
"valid_targets_min": 1784
|
|
},
|
|
{
|
|
"epoch": 5.584935897435898,
|
|
"grad_norm": 0.42933770422501677,
|
|
"learning_rate": 4.786951012710699e-06,
|
|
"loss": 0.1862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15886548161506653,
|
|
"step": 3485,
|
|
"valid_targets_mean": 4568.1,
|
|
"valid_targets_min": 1430
|
|
},
|
|
{
|
|
"epoch": 5.592948717948718,
|
|
"grad_norm": 0.4188860911448219,
|
|
"learning_rate": 4.735192825152686e-06,
|
|
"loss": 0.1701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16202595829963684,
|
|
"step": 3490,
|
|
"valid_targets_mean": 4717.6,
|
|
"valid_targets_min": 1939
|
|
},
|
|
{
|
|
"epoch": 5.600961538461538,
|
|
"grad_norm": 0.4593864099417896,
|
|
"learning_rate": 4.683678376250189e-06,
|
|
"loss": 0.1876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17314445972442627,
|
|
"step": 3495,
|
|
"valid_targets_mean": 3925.8,
|
|
"valid_targets_min": 377
|
|
},
|
|
{
|
|
"epoch": 5.608974358974359,
|
|
"grad_norm": 0.48198100944237676,
|
|
"learning_rate": 4.6324084885529086e-06,
|
|
"loss": 0.1793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16028594970703125,
|
|
"step": 3500,
|
|
"valid_targets_mean": 3850.2,
|
|
"valid_targets_min": 2018
|
|
},
|
|
{
|
|
"epoch": 5.61698717948718,
|
|
"grad_norm": 0.6326650398575101,
|
|
"learning_rate": 4.581383980705538e-06,
|
|
"loss": 0.2071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2744472324848175,
|
|
"step": 3505,
|
|
"valid_targets_mean": 3211.5,
|
|
"valid_targets_min": 1897
|
|
},
|
|
{
|
|
"epoch": 5.625,
|
|
"grad_norm": 0.492480738098872,
|
|
"learning_rate": 4.530605667434727e-06,
|
|
"loss": 0.1906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18184933066368103,
|
|
"step": 3510,
|
|
"valid_targets_mean": 3921.9,
|
|
"valid_targets_min": 1666
|
|
},
|
|
{
|
|
"epoch": 5.63301282051282,
|
|
"grad_norm": 0.5627002261210652,
|
|
"learning_rate": 4.480074359536013e-06,
|
|
"loss": 0.1957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19673556089401245,
|
|
"step": 3515,
|
|
"valid_targets_mean": 3383.2,
|
|
"valid_targets_min": 288
|
|
},
|
|
{
|
|
"epoch": 5.641025641025641,
|
|
"grad_norm": 0.5402686277972082,
|
|
"learning_rate": 4.429790863860934e-06,
|
|
"loss": 0.1932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19620636105537415,
|
|
"step": 3520,
|
|
"valid_targets_mean": 3515.4,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 5.649038461538462,
|
|
"grad_norm": 0.455833870213205,
|
|
"learning_rate": 4.3797559833041146e-06,
|
|
"loss": 0.1787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15487775206565857,
|
|
"step": 3525,
|
|
"valid_targets_mean": 3998.6,
|
|
"valid_targets_min": 1971
|
|
},
|
|
{
|
|
"epoch": 5.657051282051282,
|
|
"grad_norm": 0.5107407870906246,
|
|
"learning_rate": 4.329970516790447e-06,
|
|
"loss": 0.1827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1705532819032669,
|
|
"step": 3530,
|
|
"valid_targets_mean": 3715.2,
|
|
"valid_targets_min": 1940
|
|
},
|
|
{
|
|
"epoch": 5.665064102564102,
|
|
"grad_norm": 0.46992914278548026,
|
|
"learning_rate": 4.280435259262363e-06,
|
|
"loss": 0.1979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14910975098609924,
|
|
"step": 3535,
|
|
"valid_targets_mean": 4047.2,
|
|
"valid_targets_min": 233
|
|
},
|
|
{
|
|
"epoch": 5.673076923076923,
|
|
"grad_norm": 0.4964648069391279,
|
|
"learning_rate": 4.231151001667077e-06,
|
|
"loss": 0.2083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2373436838388443,
|
|
"step": 3540,
|
|
"valid_targets_mean": 4289.2,
|
|
"valid_targets_min": 1797
|
|
},
|
|
{
|
|
"epoch": 5.681089743589744,
|
|
"grad_norm": 0.5259660873921462,
|
|
"learning_rate": 4.182118530944044e-06,
|
|
"loss": 0.1942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18008257448673248,
|
|
"step": 3545,
|
|
"valid_targets_mean": 3665.4,
|
|
"valid_targets_min": 380
|
|
},
|
|
{
|
|
"epoch": 5.689102564102564,
|
|
"grad_norm": 0.5056551983974236,
|
|
"learning_rate": 4.133338630012307e-06,
|
|
"loss": 0.1759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19457197189331055,
|
|
"step": 3550,
|
|
"valid_targets_mean": 3579.0,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 5.697115384615385,
|
|
"grad_norm": 0.6123253119930623,
|
|
"learning_rate": 4.0848120777580554e-06,
|
|
"loss": 0.2028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18530994653701782,
|
|
"step": 3555,
|
|
"valid_targets_mean": 2964.7,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 5.705128205128205,
|
|
"grad_norm": 0.5139721617285482,
|
|
"learning_rate": 4.036539649022182e-06,
|
|
"loss": 0.1944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20200860500335693,
|
|
"step": 3560,
|
|
"valid_targets_mean": 3852.3,
|
|
"valid_targets_min": 351
|
|
},
|
|
{
|
|
"epoch": 5.7131410256410255,
|
|
"grad_norm": 0.5399140803806365,
|
|
"learning_rate": 3.988522114587865e-06,
|
|
"loss": 0.1877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1806945502758026,
|
|
"step": 3565,
|
|
"valid_targets_mean": 3429.0,
|
|
"valid_targets_min": 1667
|
|
},
|
|
{
|
|
"epoch": 5.721153846153846,
|
|
"grad_norm": 0.5501809465020228,
|
|
"learning_rate": 3.940760241168331e-06,
|
|
"loss": 0.1944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1809982806444168,
|
|
"step": 3570,
|
|
"valid_targets_mean": 2989.8,
|
|
"valid_targets_min": 318
|
|
},
|
|
{
|
|
"epoch": 5.729166666666667,
|
|
"grad_norm": 0.5844901005312456,
|
|
"learning_rate": 3.893254791394541e-06,
|
|
"loss": 0.1938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19920454919338226,
|
|
"step": 3575,
|
|
"valid_targets_mean": 3151.3,
|
|
"valid_targets_min": 917
|
|
},
|
|
{
|
|
"epoch": 5.737179487179487,
|
|
"grad_norm": 0.501643292559648,
|
|
"learning_rate": 3.846006523803074e-06,
|
|
"loss": 0.1781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1691002994775772,
|
|
"step": 3580,
|
|
"valid_targets_mean": 3593.8,
|
|
"valid_targets_min": 1719
|
|
},
|
|
{
|
|
"epoch": 5.7451923076923075,
|
|
"grad_norm": 0.5671413233547391,
|
|
"learning_rate": 3.799016192823981e-06,
|
|
"loss": 0.1707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17352180182933807,
|
|
"step": 3585,
|
|
"valid_targets_mean": 3414.9,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 5.753205128205128,
|
|
"grad_norm": 0.5666972330482487,
|
|
"learning_rate": 3.7522845487687276e-06,
|
|
"loss": 0.1839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1914469301700592,
|
|
"step": 3590,
|
|
"valid_targets_mean": 3221.8,
|
|
"valid_targets_min": 1045
|
|
},
|
|
{
|
|
"epoch": 5.761217948717949,
|
|
"grad_norm": 0.5351797553861585,
|
|
"learning_rate": 3.7058123378182664e-06,
|
|
"loss": 0.1948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2058011293411255,
|
|
"step": 3595,
|
|
"valid_targets_mean": 3487.9,
|
|
"valid_targets_min": 304
|
|
},
|
|
{
|
|
"epoch": 5.769230769230769,
|
|
"grad_norm": 0.4599129294891559,
|
|
"learning_rate": 3.6596003020110636e-06,
|
|
"loss": 0.1853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18410655856132507,
|
|
"step": 3600,
|
|
"valid_targets_mean": 4461.4,
|
|
"valid_targets_min": 1898
|
|
},
|
|
{
|
|
"epoch": 5.777243589743589,
|
|
"grad_norm": 0.4957453124553632,
|
|
"learning_rate": 3.613649179231287e-06,
|
|
"loss": 0.1816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17034442722797394,
|
|
"step": 3605,
|
|
"valid_targets_mean": 3607.4,
|
|
"valid_targets_min": 836
|
|
},
|
|
{
|
|
"epoch": 5.785256410256411,
|
|
"grad_norm": 0.47090246675664355,
|
|
"learning_rate": 3.5679597031970017e-06,
|
|
"loss": 0.1861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1710040122270584,
|
|
"step": 3610,
|
|
"valid_targets_mean": 4074.2,
|
|
"valid_targets_min": 2139
|
|
},
|
|
{
|
|
"epoch": 5.793269230769231,
|
|
"grad_norm": 0.5611368048797609,
|
|
"learning_rate": 3.5225326034484764e-06,
|
|
"loss": 0.1823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19601497054100037,
|
|
"step": 3615,
|
|
"valid_targets_mean": 3592.8,
|
|
"valid_targets_min": 418
|
|
},
|
|
{
|
|
"epoch": 5.801282051282051,
|
|
"grad_norm": 0.5429508267587924,
|
|
"learning_rate": 3.4773686053365197e-06,
|
|
"loss": 0.1956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25647762417793274,
|
|
"step": 3620,
|
|
"valid_targets_mean": 3899.1,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 5.809294871794872,
|
|
"grad_norm": 0.6267818504699711,
|
|
"learning_rate": 3.4324684300109003e-06,
|
|
"loss": 0.1873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22126325964927673,
|
|
"step": 3625,
|
|
"valid_targets_mean": 3169.4,
|
|
"valid_targets_min": 340
|
|
},
|
|
{
|
|
"epoch": 5.8173076923076925,
|
|
"grad_norm": 0.6003395101225485,
|
|
"learning_rate": 3.387832794408832e-06,
|
|
"loss": 0.1862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19179658591747284,
|
|
"step": 3630,
|
|
"valid_targets_mean": 3175.5,
|
|
"valid_targets_min": 1761
|
|
},
|
|
{
|
|
"epoch": 5.825320512820513,
|
|
"grad_norm": 0.8231448274682673,
|
|
"learning_rate": 3.3434624112435342e-06,
|
|
"loss": 0.1728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15372401475906372,
|
|
"step": 3635,
|
|
"valid_targets_mean": 3813.9,
|
|
"valid_targets_min": 2058
|
|
},
|
|
{
|
|
"epoch": 5.833333333333333,
|
|
"grad_norm": 0.5695754219453818,
|
|
"learning_rate": 3.2993579889928397e-06,
|
|
"loss": 0.1975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23370450735092163,
|
|
"step": 3640,
|
|
"valid_targets_mean": 3412.4,
|
|
"valid_targets_min": 2048
|
|
},
|
|
{
|
|
"epoch": 5.841346153846154,
|
|
"grad_norm": 0.6502128293688998,
|
|
"learning_rate": 3.25552023188789e-06,
|
|
"loss": 0.1786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21419233083724976,
|
|
"step": 3645,
|
|
"valid_targets_mean": 3037.8,
|
|
"valid_targets_min": 316
|
|
},
|
|
{
|
|
"epoch": 5.8493589743589745,
|
|
"grad_norm": 0.5359902686377935,
|
|
"learning_rate": 3.211949839901889e-06,
|
|
"loss": 0.1982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2461708039045334,
|
|
"step": 3650,
|
|
"valid_targets_mean": 3926.8,
|
|
"valid_targets_min": 1331
|
|
},
|
|
{
|
|
"epoch": 5.857371794871795,
|
|
"grad_norm": 0.5659386559768532,
|
|
"learning_rate": 3.168647508738927e-06,
|
|
"loss": 0.1948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24257324635982513,
|
|
"step": 3655,
|
|
"valid_targets_mean": 3724.2,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 5.865384615384615,
|
|
"grad_norm": 0.37056662744745467,
|
|
"learning_rate": 3.125613929822866e-06,
|
|
"loss": 0.1731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12163887917995453,
|
|
"step": 3660,
|
|
"valid_targets_mean": 5052.7,
|
|
"valid_targets_min": 2161
|
|
},
|
|
{
|
|
"epoch": 5.873397435897436,
|
|
"grad_norm": 0.5838541068133365,
|
|
"learning_rate": 3.0828497902863106e-06,
|
|
"loss": 0.2097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22443059086799622,
|
|
"step": 3665,
|
|
"valid_targets_mean": 3060.1,
|
|
"valid_targets_min": 903
|
|
},
|
|
{
|
|
"epoch": 5.881410256410256,
|
|
"grad_norm": 0.5682074734014336,
|
|
"learning_rate": 3.0403557729596267e-06,
|
|
"loss": 0.1946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19512076675891876,
|
|
"step": 3670,
|
|
"valid_targets_mean": 3185.4,
|
|
"valid_targets_min": 1948
|
|
},
|
|
{
|
|
"epoch": 5.889423076923077,
|
|
"grad_norm": 0.6022044204063187,
|
|
"learning_rate": 2.998132556360038e-06,
|
|
"loss": 0.1907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22192490100860596,
|
|
"step": 3675,
|
|
"valid_targets_mean": 3249.8,
|
|
"valid_targets_min": 1676
|
|
},
|
|
{
|
|
"epoch": 5.897435897435898,
|
|
"grad_norm": 0.6195066221518493,
|
|
"learning_rate": 2.9561808146808068e-06,
|
|
"loss": 0.2094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19596882164478302,
|
|
"step": 3680,
|
|
"valid_targets_mean": 2721.9,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 5.905448717948718,
|
|
"grad_norm": 0.574156043742674,
|
|
"learning_rate": 2.9145012177804476e-06,
|
|
"loss": 0.2033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21469567716121674,
|
|
"step": 3685,
|
|
"valid_targets_mean": 3452.7,
|
|
"valid_targets_min": 369
|
|
},
|
|
{
|
|
"epoch": 5.913461538461538,
|
|
"grad_norm": 0.5087952282766907,
|
|
"learning_rate": 2.8730944311720454e-06,
|
|
"loss": 0.1961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2278899997472763,
|
|
"step": 3690,
|
|
"valid_targets_mean": 4572.1,
|
|
"valid_targets_min": 2118
|
|
},
|
|
{
|
|
"epoch": 5.921474358974359,
|
|
"grad_norm": 0.47323882313623933,
|
|
"learning_rate": 2.8319611160126226e-06,
|
|
"loss": 0.1876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16829034686088562,
|
|
"step": 3695,
|
|
"valid_targets_mean": 4650.4,
|
|
"valid_targets_min": 1963
|
|
},
|
|
{
|
|
"epoch": 5.92948717948718,
|
|
"grad_norm": 0.4862546032433573,
|
|
"learning_rate": 2.791101929092592e-06,
|
|
"loss": 0.1908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17378859221935272,
|
|
"step": 3700,
|
|
"valid_targets_mean": 3729.2,
|
|
"valid_targets_min": 836
|
|
},
|
|
{
|
|
"epoch": 5.9375,
|
|
"grad_norm": 0.5851260153472438,
|
|
"learning_rate": 2.750517522825251e-06,
|
|
"loss": 0.1826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2061779797077179,
|
|
"step": 3705,
|
|
"valid_targets_mean": 3207.8,
|
|
"valid_targets_min": 1876
|
|
},
|
|
{
|
|
"epoch": 5.94551282051282,
|
|
"grad_norm": 0.5347583134407086,
|
|
"learning_rate": 2.710208545236397e-06,
|
|
"loss": 0.189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17754028737545013,
|
|
"step": 3710,
|
|
"valid_targets_mean": 3264.6,
|
|
"valid_targets_min": 420
|
|
},
|
|
{
|
|
"epoch": 5.953525641025641,
|
|
"grad_norm": 0.5584631349323056,
|
|
"learning_rate": 2.670175639953929e-06,
|
|
"loss": 0.1806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18711164593696594,
|
|
"step": 3715,
|
|
"valid_targets_mean": 3365.9,
|
|
"valid_targets_min": 1625
|
|
},
|
|
{
|
|
"epoch": 5.961538461538462,
|
|
"grad_norm": 0.5689525852014578,
|
|
"learning_rate": 2.6304194461976207e-06,
|
|
"loss": 0.1785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17805981636047363,
|
|
"step": 3720,
|
|
"valid_targets_mean": 2968.3,
|
|
"valid_targets_min": 1918
|
|
},
|
|
{
|
|
"epoch": 5.969551282051282,
|
|
"grad_norm": 0.5742462138075193,
|
|
"learning_rate": 2.5909405987688896e-06,
|
|
"loss": 0.1828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18538996577262878,
|
|
"step": 3725,
|
|
"valid_targets_mean": 2999.2,
|
|
"valid_targets_min": 450
|
|
},
|
|
{
|
|
"epoch": 5.977564102564102,
|
|
"grad_norm": 0.5474544354626358,
|
|
"learning_rate": 2.5517397280406565e-06,
|
|
"loss": 0.1839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18040937185287476,
|
|
"step": 3730,
|
|
"valid_targets_mean": 3083.9,
|
|
"valid_targets_min": 1855
|
|
},
|
|
{
|
|
"epoch": 5.985576923076923,
|
|
"grad_norm": 0.5370344222680715,
|
|
"learning_rate": 2.512817459947312e-06,
|
|
"loss": 0.1937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1840437948703766,
|
|
"step": 3735,
|
|
"valid_targets_mean": 3041.1,
|
|
"valid_targets_min": 833
|
|
},
|
|
{
|
|
"epoch": 5.993589743589744,
|
|
"grad_norm": 0.557742421962461,
|
|
"learning_rate": 2.4741744159746618e-06,
|
|
"loss": 0.1953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20614948868751526,
|
|
"step": 3740,
|
|
"valid_targets_mean": 3577.1,
|
|
"valid_targets_min": 322
|
|
},
|
|
{
|
|
"epoch": 6.001602564102564,
|
|
"grad_norm": 0.5492539692183419,
|
|
"learning_rate": 2.435811213150079e-06,
|
|
"loss": 0.1933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20480048656463623,
|
|
"step": 3745,
|
|
"valid_targets_mean": 3380.0,
|
|
"valid_targets_min": 2101
|
|
},
|
|
{
|
|
"epoch": 6.009615384615385,
|
|
"grad_norm": 0.5012174932189678,
|
|
"learning_rate": 2.3977284640325805e-06,
|
|
"loss": 0.1865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16375727951526642,
|
|
"step": 3750,
|
|
"valid_targets_mean": 3257.8,
|
|
"valid_targets_min": 279
|
|
},
|
|
{
|
|
"epoch": 6.017628205128205,
|
|
"grad_norm": 0.5150033985791577,
|
|
"learning_rate": 2.359926776703092e-06,
|
|
"loss": 0.1959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17800338566303253,
|
|
"step": 3755,
|
|
"valid_targets_mean": 3647.5,
|
|
"valid_targets_min": 2028
|
|
},
|
|
{
|
|
"epoch": 6.0256410256410255,
|
|
"grad_norm": 0.6125072628848407,
|
|
"learning_rate": 2.3224067547547357e-06,
|
|
"loss": 0.1902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24423670768737793,
|
|
"step": 3760,
|
|
"valid_targets_mean": 3147.4,
|
|
"valid_targets_min": 1749
|
|
},
|
|
{
|
|
"epoch": 6.033653846153846,
|
|
"grad_norm": 0.5290504030535882,
|
|
"learning_rate": 2.2851689972831536e-06,
|
|
"loss": 0.1917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1971917450428009,
|
|
"step": 3765,
|
|
"valid_targets_mean": 3512.4,
|
|
"valid_targets_min": 363
|
|
},
|
|
{
|
|
"epoch": 6.041666666666667,
|
|
"grad_norm": 0.5074131034125838,
|
|
"learning_rate": 2.248214098877002e-06,
|
|
"loss": 0.1803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17116963863372803,
|
|
"step": 3770,
|
|
"valid_targets_mean": 3892.2,
|
|
"valid_targets_min": 326
|
|
},
|
|
{
|
|
"epoch": 6.049679487179487,
|
|
"grad_norm": 0.6370750582627046,
|
|
"learning_rate": 2.2115426496083958e-06,
|
|
"loss": 0.1997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24153323471546173,
|
|
"step": 3775,
|
|
"valid_targets_mean": 3046.4,
|
|
"valid_targets_min": 1685
|
|
},
|
|
{
|
|
"epoch": 6.0576923076923075,
|
|
"grad_norm": 0.5857968742273449,
|
|
"learning_rate": 2.175155235023536e-06,
|
|
"loss": 0.1864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22333982586860657,
|
|
"step": 3780,
|
|
"valid_targets_mean": 3206.4,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 6.065705128205129,
|
|
"grad_norm": 0.5720754742458641,
|
|
"learning_rate": 2.1390524361333355e-06,
|
|
"loss": 0.1836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1934468150138855,
|
|
"step": 3785,
|
|
"valid_targets_mean": 3093.9,
|
|
"valid_targets_min": 469
|
|
},
|
|
{
|
|
"epoch": 6.073717948717949,
|
|
"grad_norm": 0.6235776979123787,
|
|
"learning_rate": 2.1032348294041305e-06,
|
|
"loss": 0.1835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17414945363998413,
|
|
"step": 3790,
|
|
"valid_targets_mean": 2934.5,
|
|
"valid_targets_min": 1846
|
|
},
|
|
{
|
|
"epoch": 6.081730769230769,
|
|
"grad_norm": 0.5800575852208971,
|
|
"learning_rate": 2.067702986748521e-06,
|
|
"loss": 0.1876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21823963522911072,
|
|
"step": 3795,
|
|
"valid_targets_mean": 3321.6,
|
|
"valid_targets_min": 1872
|
|
},
|
|
{
|
|
"epoch": 6.089743589743589,
|
|
"grad_norm": 0.5380548846853326,
|
|
"learning_rate": 2.0324574755161764e-06,
|
|
"loss": 0.1909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1871141493320465,
|
|
"step": 3800,
|
|
"valid_targets_mean": 3578.1,
|
|
"valid_targets_min": 1717
|
|
},
|
|
{
|
|
"epoch": 6.097756410256411,
|
|
"grad_norm": 0.6216251637611282,
|
|
"learning_rate": 1.9974988584848385e-06,
|
|
"loss": 0.1981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21641653776168823,
|
|
"step": 3805,
|
|
"valid_targets_mean": 3018.4,
|
|
"valid_targets_min": 1902
|
|
},
|
|
{
|
|
"epoch": 6.105769230769231,
|
|
"grad_norm": 0.4499232995661724,
|
|
"learning_rate": 1.96282769385129e-06,
|
|
"loss": 0.1715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13558490574359894,
|
|
"step": 3810,
|
|
"valid_targets_mean": 3978.8,
|
|
"valid_targets_min": 1748
|
|
},
|
|
{
|
|
"epoch": 6.113782051282051,
|
|
"grad_norm": 0.5440572437785779,
|
|
"learning_rate": 1.9284445352224625e-06,
|
|
"loss": 0.1821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1783181130886078,
|
|
"step": 3815,
|
|
"valid_targets_mean": 3168.1,
|
|
"valid_targets_min": 1751
|
|
},
|
|
{
|
|
"epoch": 6.121794871794871,
|
|
"grad_norm": 0.5104998731463487,
|
|
"learning_rate": 1.894349931606596e-06,
|
|
"loss": 0.1635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18172745406627655,
|
|
"step": 3820,
|
|
"valid_targets_mean": 3574.4,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 6.1298076923076925,
|
|
"grad_norm": 0.4896220667407711,
|
|
"learning_rate": 1.8605444274044493e-06,
|
|
"loss": 0.1899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18478181958198547,
|
|
"step": 3825,
|
|
"valid_targets_mean": 3944.8,
|
|
"valid_targets_min": 2045
|
|
},
|
|
{
|
|
"epoch": 6.137820512820513,
|
|
"grad_norm": 0.48989400822262036,
|
|
"learning_rate": 1.827028562400659e-06,
|
|
"loss": 0.1728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16057899594306946,
|
|
"step": 3830,
|
|
"valid_targets_mean": 3592.7,
|
|
"valid_targets_min": 352
|
|
},
|
|
{
|
|
"epoch": 6.145833333333333,
|
|
"grad_norm": 0.47494365061846483,
|
|
"learning_rate": 1.793802871755066e-06,
|
|
"loss": 0.1753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14633449912071228,
|
|
"step": 3835,
|
|
"valid_targets_mean": 3542.4,
|
|
"valid_targets_min": 2396
|
|
},
|
|
{
|
|
"epoch": 6.153846153846154,
|
|
"grad_norm": 0.5904921538812401,
|
|
"learning_rate": 1.760867885994202e-06,
|
|
"loss": 0.17,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2000691294670105,
|
|
"step": 3840,
|
|
"valid_targets_mean": 3720.2,
|
|
"valid_targets_min": 1964
|
|
},
|
|
{
|
|
"epoch": 6.1618589743589745,
|
|
"grad_norm": 0.5080439984155285,
|
|
"learning_rate": 1.7282241310028047e-06,
|
|
"loss": 0.1722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16287338733673096,
|
|
"step": 3845,
|
|
"valid_targets_mean": 3678.0,
|
|
"valid_targets_min": 2342
|
|
},
|
|
{
|
|
"epoch": 6.169871794871795,
|
|
"grad_norm": 0.6316969144820093,
|
|
"learning_rate": 1.6958721280154232e-06,
|
|
"loss": 0.184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20093393325805664,
|
|
"step": 3850,
|
|
"valid_targets_mean": 2975.8,
|
|
"valid_targets_min": 2118
|
|
},
|
|
{
|
|
"epoch": 6.177884615384615,
|
|
"grad_norm": 0.5629266503674685,
|
|
"learning_rate": 1.6638123936081085e-06,
|
|
"loss": 0.1945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1980648934841156,
|
|
"step": 3855,
|
|
"valid_targets_mean": 3402.4,
|
|
"valid_targets_min": 361
|
|
},
|
|
{
|
|
"epoch": 6.185897435897436,
|
|
"grad_norm": 0.4901786276304485,
|
|
"learning_rate": 1.6320454396901463e-06,
|
|
"loss": 0.1875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1593923568725586,
|
|
"step": 3860,
|
|
"valid_targets_mean": 3699.1,
|
|
"valid_targets_min": 2111
|
|
},
|
|
{
|
|
"epoch": 6.193910256410256,
|
|
"grad_norm": 0.496727862672396,
|
|
"learning_rate": 1.6005717734958914e-06,
|
|
"loss": 0.1742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14378324151039124,
|
|
"step": 3865,
|
|
"valid_targets_mean": 3391.4,
|
|
"valid_targets_min": 322
|
|
},
|
|
{
|
|
"epoch": 6.201923076923077,
|
|
"grad_norm": 0.5874727303234155,
|
|
"learning_rate": 1.569391897576671e-06,
|
|
"loss": 0.1857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17528510093688965,
|
|
"step": 3870,
|
|
"valid_targets_mean": 3422.4,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 6.209935897435898,
|
|
"grad_norm": 0.49680224237276693,
|
|
"learning_rate": 1.5385063097927533e-06,
|
|
"loss": 0.2021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18529458343982697,
|
|
"step": 3875,
|
|
"valid_targets_mean": 3745.9,
|
|
"valid_targets_min": 1508
|
|
},
|
|
{
|
|
"epoch": 6.217948717948718,
|
|
"grad_norm": 0.498633204184822,
|
|
"learning_rate": 1.5079155033054104e-06,
|
|
"loss": 0.1791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16825734078884125,
|
|
"step": 3880,
|
|
"valid_targets_mean": 4082.2,
|
|
"valid_targets_min": 1906
|
|
},
|
|
{
|
|
"epoch": 6.225961538461538,
|
|
"grad_norm": 0.4756372354476479,
|
|
"learning_rate": 1.4776199665690239e-06,
|
|
"loss": 0.197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15564313530921936,
|
|
"step": 3885,
|
|
"valid_targets_mean": 3845.8,
|
|
"valid_targets_min": 1946
|
|
},
|
|
{
|
|
"epoch": 6.233974358974359,
|
|
"grad_norm": 0.5104917457868668,
|
|
"learning_rate": 1.4476201833233084e-06,
|
|
"loss": 0.1815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1515727937221527,
|
|
"step": 3890,
|
|
"valid_targets_mean": 3294.8,
|
|
"valid_targets_min": 2030
|
|
},
|
|
{
|
|
"epoch": 6.24198717948718,
|
|
"grad_norm": 0.5320951676861468,
|
|
"learning_rate": 1.4179166325855676e-06,
|
|
"loss": 0.1914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17016607522964478,
|
|
"step": 3895,
|
|
"valid_targets_mean": 3325.2,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 6.25,
|
|
"grad_norm": 0.5226640302399762,
|
|
"learning_rate": 1.3885097886430599e-06,
|
|
"loss": 0.1822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17012283205986023,
|
|
"step": 3900,
|
|
"valid_targets_mean": 3378.5,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 6.25801282051282,
|
|
"grad_norm": 0.4885361914877402,
|
|
"learning_rate": 1.35940012104542e-06,
|
|
"loss": 0.1894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1809297651052475,
|
|
"step": 3905,
|
|
"valid_targets_mean": 4060.0,
|
|
"valid_targets_min": 1970
|
|
},
|
|
{
|
|
"epoch": 6.266025641025641,
|
|
"grad_norm": 0.5500088608225795,
|
|
"learning_rate": 1.3305880945971583e-06,
|
|
"loss": 0.2108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20557723939418793,
|
|
"step": 3910,
|
|
"valid_targets_mean": 3452.0,
|
|
"valid_targets_min": 1545
|
|
},
|
|
{
|
|
"epoch": 6.274038461538462,
|
|
"grad_norm": 0.5343047377776239,
|
|
"learning_rate": 1.3020741693502403e-06,
|
|
"loss": 0.1887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1847967505455017,
|
|
"step": 3915,
|
|
"valid_targets_mean": 3522.4,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 6.282051282051282,
|
|
"grad_norm": 0.5724110582086094,
|
|
"learning_rate": 1.27385880059675e-06,
|
|
"loss": 0.187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15530461072921753,
|
|
"step": 3920,
|
|
"valid_targets_mean": 3021.8,
|
|
"valid_targets_min": 233
|
|
},
|
|
{
|
|
"epoch": 6.290064102564102,
|
|
"grad_norm": 0.5189903179373517,
|
|
"learning_rate": 1.245942438861607e-06,
|
|
"loss": 0.1852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16924820840358734,
|
|
"step": 3925,
|
|
"valid_targets_mean": 3413.4,
|
|
"valid_targets_min": 1713
|
|
},
|
|
{
|
|
"epoch": 6.298076923076923,
|
|
"grad_norm": 0.5487751899432807,
|
|
"learning_rate": 1.2183255298953788e-06,
|
|
"loss": 0.1835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17502455413341522,
|
|
"step": 3930,
|
|
"valid_targets_mean": 3519.8,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 6.306089743589744,
|
|
"grad_norm": 0.5903657333858096,
|
|
"learning_rate": 1.1910085146671645e-06,
|
|
"loss": 0.1868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18399037420749664,
|
|
"step": 3935,
|
|
"valid_targets_mean": 3115.6,
|
|
"valid_targets_min": 224
|
|
},
|
|
{
|
|
"epoch": 6.314102564102564,
|
|
"grad_norm": 0.5420035653385537,
|
|
"learning_rate": 1.1639918293575492e-06,
|
|
"loss": 0.1921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21313560009002686,
|
|
"step": 3940,
|
|
"valid_targets_mean": 3612.5,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 6.322115384615385,
|
|
"grad_norm": 0.5477870467404146,
|
|
"learning_rate": 1.1372759053516536e-06,
|
|
"loss": 0.1738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1668134331703186,
|
|
"step": 3945,
|
|
"valid_targets_mean": 3054.5,
|
|
"valid_targets_min": 466
|
|
},
|
|
{
|
|
"epoch": 6.330128205128205,
|
|
"grad_norm": 0.407118652221098,
|
|
"learning_rate": 1.1108611692322157e-06,
|
|
"loss": 0.16,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13269789516925812,
|
|
"step": 3950,
|
|
"valid_targets_mean": 4318.4,
|
|
"valid_targets_min": 1846
|
|
},
|
|
{
|
|
"epoch": 6.3381410256410255,
|
|
"grad_norm": 0.6041896354253443,
|
|
"learning_rate": 1.0847480427728142e-06,
|
|
"loss": 0.181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19352270662784576,
|
|
"step": 3955,
|
|
"valid_targets_mean": 2949.6,
|
|
"valid_targets_min": 244
|
|
},
|
|
{
|
|
"epoch": 6.346153846153846,
|
|
"grad_norm": 0.44110066264683917,
|
|
"learning_rate": 1.0589369429311125e-06,
|
|
"loss": 0.1672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1455502212047577,
|
|
"step": 3960,
|
|
"valid_targets_mean": 4418.0,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 6.354166666666667,
|
|
"grad_norm": 0.5549047133023548,
|
|
"learning_rate": 1.0334282818422037e-06,
|
|
"loss": 0.1935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17327052354812622,
|
|
"step": 3965,
|
|
"valid_targets_mean": 3370.1,
|
|
"valid_targets_min": 1507
|
|
},
|
|
{
|
|
"epoch": 6.362179487179487,
|
|
"grad_norm": 0.5740600633298263,
|
|
"learning_rate": 1.008222466812041e-06,
|
|
"loss": 0.1991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2384001463651657,
|
|
"step": 3970,
|
|
"valid_targets_mean": 3610.7,
|
|
"valid_targets_min": 1958
|
|
},
|
|
{
|
|
"epoch": 6.3701923076923075,
|
|
"grad_norm": 0.5797903414700959,
|
|
"learning_rate": 9.83319900310915e-07,
|
|
"loss": 0.1933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2142302691936493,
|
|
"step": 3975,
|
|
"valid_targets_mean": 3315.9,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 6.378205128205128,
|
|
"grad_norm": 0.48995819790666606,
|
|
"learning_rate": 9.587209799670495e-07,
|
|
"loss": 0.1756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17958799004554749,
|
|
"step": 3980,
|
|
"valid_targets_mean": 4395.9,
|
|
"valid_targets_min": 2026
|
|
},
|
|
{
|
|
"epoch": 6.386217948717949,
|
|
"grad_norm": 0.5696379795450798,
|
|
"learning_rate": 9.344260985602327e-07,
|
|
"loss": 0.1761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16386675834655762,
|
|
"step": 3985,
|
|
"valid_targets_mean": 3073.3,
|
|
"valid_targets_min": 239
|
|
},
|
|
{
|
|
"epoch": 6.394230769230769,
|
|
"grad_norm": 0.534668018184061,
|
|
"learning_rate": 9.104356440155526e-07,
|
|
"loss": 0.1692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17810484766960144,
|
|
"step": 3990,
|
|
"valid_targets_mean": 3377.9,
|
|
"valid_targets_min": 302
|
|
},
|
|
{
|
|
"epoch": 6.402243589743589,
|
|
"grad_norm": 0.4734615929959692,
|
|
"learning_rate": 8.867499993972162e-07,
|
|
"loss": 0.1731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16918233036994934,
|
|
"step": 3995,
|
|
"valid_targets_mean": 4161.4,
|
|
"valid_targets_min": 2309
|
|
},
|
|
{
|
|
"epoch": 6.410256410256411,
|
|
"grad_norm": 0.5439903809679225,
|
|
"learning_rate": 8.633695429024058e-07,
|
|
"loss": 0.1951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18606598675251007,
|
|
"step": 4000,
|
|
"valid_targets_mean": 3215.1,
|
|
"valid_targets_min": 1557
|
|
},
|
|
{
|
|
"epoch": 6.418269230769231,
|
|
"grad_norm": 0.5942152209139376,
|
|
"learning_rate": 8.402946478552732e-07,
|
|
"loss": 0.1803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18717849254608154,
|
|
"step": 4005,
|
|
"valid_targets_mean": 2919.1,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 6.426282051282051,
|
|
"grad_norm": 0.5775374131849361,
|
|
"learning_rate": 8.175256827009392e-07,
|
|
"loss": 0.1788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2013743817806244,
|
|
"step": 4010,
|
|
"valid_targets_mean": 3049.1,
|
|
"valid_targets_min": 394
|
|
},
|
|
{
|
|
"epoch": 6.434294871794872,
|
|
"grad_norm": 0.5969180238661153,
|
|
"learning_rate": 7.95063010999646e-07,
|
|
"loss": 0.1767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18375027179718018,
|
|
"step": 4015,
|
|
"valid_targets_mean": 3105.9,
|
|
"valid_targets_min": 376
|
|
},
|
|
{
|
|
"epoch": 6.4423076923076925,
|
|
"grad_norm": 0.6564343007785355,
|
|
"learning_rate": 7.729069914209409e-07,
|
|
"loss": 0.1846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2291739583015442,
|
|
"step": 4020,
|
|
"valid_targets_mean": 3057.9,
|
|
"valid_targets_min": 1331
|
|
},
|
|
{
|
|
"epoch": 6.450320512820513,
|
|
"grad_norm": 0.5128523308822036,
|
|
"learning_rate": 7.510579777379345e-07,
|
|
"loss": 0.1749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16211318969726562,
|
|
"step": 4025,
|
|
"valid_targets_mean": 3331.9,
|
|
"valid_targets_min": 1713
|
|
},
|
|
{
|
|
"epoch": 6.458333333333333,
|
|
"grad_norm": 0.6207413303214334,
|
|
"learning_rate": 7.295163188216792e-07,
|
|
"loss": 0.1829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21440479159355164,
|
|
"step": 4030,
|
|
"valid_targets_mean": 3360.0,
|
|
"valid_targets_min": 1856
|
|
},
|
|
{
|
|
"epoch": 6.466346153846154,
|
|
"grad_norm": 0.46974688232532347,
|
|
"learning_rate": 7.08282358635568e-07,
|
|
"loss": 0.1787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16598930954933167,
|
|
"step": 4035,
|
|
"valid_targets_mean": 4054.6,
|
|
"valid_targets_min": 1918
|
|
},
|
|
{
|
|
"epoch": 6.4743589743589745,
|
|
"grad_norm": 0.5760375802259009,
|
|
"learning_rate": 6.87356436229869e-07,
|
|
"loss": 0.1868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1884935200214386,
|
|
"step": 4040,
|
|
"valid_targets_mean": 2843.3,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 6.482371794871795,
|
|
"grad_norm": 0.4862966080647505,
|
|
"learning_rate": 6.667388857362977e-07,
|
|
"loss": 0.1864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20953023433685303,
|
|
"step": 4045,
|
|
"valid_targets_mean": 4378.8,
|
|
"valid_targets_min": 2179
|
|
},
|
|
{
|
|
"epoch": 6.490384615384615,
|
|
"grad_norm": 0.48237548817545783,
|
|
"learning_rate": 6.464300363626797e-07,
|
|
"loss": 0.192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20548981428146362,
|
|
"step": 4050,
|
|
"valid_targets_mean": 4769.5,
|
|
"valid_targets_min": 1832
|
|
},
|
|
{
|
|
"epoch": 6.498397435897436,
|
|
"grad_norm": 0.5504436514800234,
|
|
"learning_rate": 6.264302123877053e-07,
|
|
"loss": 0.1937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17251989245414734,
|
|
"step": 4055,
|
|
"valid_targets_mean": 3314.0,
|
|
"valid_targets_min": 1224
|
|
},
|
|
{
|
|
"epoch": 6.506410256410256,
|
|
"grad_norm": 0.6176400060173131,
|
|
"learning_rate": 6.067397331557412e-07,
|
|
"loss": 0.1832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21772697567939758,
|
|
"step": 4060,
|
|
"valid_targets_mean": 3025.1,
|
|
"valid_targets_min": 1704
|
|
},
|
|
{
|
|
"epoch": 6.514423076923077,
|
|
"grad_norm": 0.6128183784325526,
|
|
"learning_rate": 5.873589130717405e-07,
|
|
"loss": 0.1742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18539603054523468,
|
|
"step": 4065,
|
|
"valid_targets_mean": 3641.0,
|
|
"valid_targets_min": 1606
|
|
},
|
|
{
|
|
"epoch": 6.522435897435898,
|
|
"grad_norm": 0.5869844615039573,
|
|
"learning_rate": 5.682880615962116e-07,
|
|
"loss": 0.1842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18697796761989594,
|
|
"step": 4070,
|
|
"valid_targets_mean": 3250.5,
|
|
"valid_targets_min": 1619
|
|
},
|
|
{
|
|
"epoch": 6.530448717948718,
|
|
"grad_norm": 0.45395568910065465,
|
|
"learning_rate": 5.495274832402841e-07,
|
|
"loss": 0.1878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18877571821212769,
|
|
"step": 4075,
|
|
"valid_targets_mean": 4260.9,
|
|
"valid_targets_min": 318
|
|
},
|
|
{
|
|
"epoch": 6.538461538461538,
|
|
"grad_norm": 0.5811382204791522,
|
|
"learning_rate": 5.310774775608529e-07,
|
|
"loss": 0.1987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20900984108448029,
|
|
"step": 4080,
|
|
"valid_targets_mean": 3326.0,
|
|
"valid_targets_min": 1659
|
|
},
|
|
{
|
|
"epoch": 6.546474358974359,
|
|
"grad_norm": 0.5821174706289566,
|
|
"learning_rate": 5.129383391557751e-07,
|
|
"loss": 0.1826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21676848828792572,
|
|
"step": 4085,
|
|
"valid_targets_mean": 3183.1,
|
|
"valid_targets_min": 1476
|
|
},
|
|
{
|
|
"epoch": 6.55448717948718,
|
|
"grad_norm": 0.6460932248991629,
|
|
"learning_rate": 4.951103576591876e-07,
|
|
"loss": 0.1724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18827717006206512,
|
|
"step": 4090,
|
|
"valid_targets_mean": 2421.1,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 6.5625,
|
|
"grad_norm": 0.49037830926652504,
|
|
"learning_rate": 4.7759381773687e-07,
|
|
"loss": 0.2052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21589890122413635,
|
|
"step": 4095,
|
|
"valid_targets_mean": 4651.1,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 6.57051282051282,
|
|
"grad_norm": 0.5084810119478969,
|
|
"learning_rate": 4.6038899908170234e-07,
|
|
"loss": 0.1703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15555456280708313,
|
|
"step": 4100,
|
|
"valid_targets_mean": 3502.8,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 6.578525641025641,
|
|
"grad_norm": 0.5431185847935183,
|
|
"learning_rate": 4.4349617640920164e-07,
|
|
"loss": 0.2007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21165643632411957,
|
|
"step": 4105,
|
|
"valid_targets_mean": 3681.2,
|
|
"valid_targets_min": 2204
|
|
},
|
|
{
|
|
"epoch": 6.586538461538462,
|
|
"grad_norm": 0.5911359764831849,
|
|
"learning_rate": 4.2691561945312764e-07,
|
|
"loss": 0.1934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18048742413520813,
|
|
"step": 4110,
|
|
"valid_targets_mean": 2909.3,
|
|
"valid_targets_min": 1491
|
|
},
|
|
{
|
|
"epoch": 6.594551282051282,
|
|
"grad_norm": 0.6130091429742942,
|
|
"learning_rate": 4.106475929611886e-07,
|
|
"loss": 0.183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19334766268730164,
|
|
"step": 4115,
|
|
"valid_targets_mean": 3284.5,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 6.602564102564102,
|
|
"grad_norm": 0.6142827483556337,
|
|
"learning_rate": 3.9469235669080007e-07,
|
|
"loss": 0.1915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20154422521591187,
|
|
"step": 4120,
|
|
"valid_targets_mean": 2889.7,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 6.610576923076923,
|
|
"grad_norm": 0.6990866949863356,
|
|
"learning_rate": 3.7905016540495053e-07,
|
|
"loss": 0.2014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24647264182567596,
|
|
"step": 4125,
|
|
"valid_targets_mean": 2540.8,
|
|
"valid_targets_min": 406
|
|
},
|
|
{
|
|
"epoch": 6.618589743589744,
|
|
"grad_norm": 0.46263509014103377,
|
|
"learning_rate": 3.63721268868118e-07,
|
|
"loss": 0.1769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.158363938331604,
|
|
"step": 4130,
|
|
"valid_targets_mean": 4078.2,
|
|
"valid_targets_min": 1028
|
|
},
|
|
{
|
|
"epoch": 6.626602564102564,
|
|
"grad_norm": 0.5032941205599963,
|
|
"learning_rate": 3.487059118422997e-07,
|
|
"loss": 0.1817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15640424191951752,
|
|
"step": 4135,
|
|
"valid_targets_mean": 3854.6,
|
|
"valid_targets_min": 1561
|
|
},
|
|
{
|
|
"epoch": 6.634615384615385,
|
|
"grad_norm": 0.4887526857319112,
|
|
"learning_rate": 3.3400433408308895e-07,
|
|
"loss": 0.1737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16593563556671143,
|
|
"step": 4140,
|
|
"valid_targets_mean": 3915.2,
|
|
"valid_targets_min": 1696
|
|
},
|
|
{
|
|
"epoch": 6.642628205128205,
|
|
"grad_norm": 0.5614911252396141,
|
|
"learning_rate": 3.196167703358577e-07,
|
|
"loss": 0.1709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18777693808078766,
|
|
"step": 4145,
|
|
"valid_targets_mean": 3050.1,
|
|
"valid_targets_min": 320
|
|
},
|
|
{
|
|
"epoch": 6.6506410256410255,
|
|
"grad_norm": 0.5358038811905682,
|
|
"learning_rate": 3.0554345033199985e-07,
|
|
"loss": 0.1877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18092574179172516,
|
|
"step": 4150,
|
|
"valid_targets_mean": 3627.1,
|
|
"valid_targets_min": 1736
|
|
},
|
|
{
|
|
"epoch": 6.658653846153846,
|
|
"grad_norm": 0.5716438360594936,
|
|
"learning_rate": 2.917845987852652e-07,
|
|
"loss": 0.1948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17830929160118103,
|
|
"step": 4155,
|
|
"valid_targets_mean": 3280.9,
|
|
"valid_targets_min": 1690
|
|
},
|
|
{
|
|
"epoch": 6.666666666666667,
|
|
"grad_norm": 0.5430177303430579,
|
|
"learning_rate": 2.783404353881758e-07,
|
|
"loss": 0.1626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16124941408634186,
|
|
"step": 4160,
|
|
"valid_targets_mean": 3723.2,
|
|
"valid_targets_min": 2203
|
|
},
|
|
{
|
|
"epoch": 6.674679487179487,
|
|
"grad_norm": 0.5549550586423444,
|
|
"learning_rate": 2.652111748085151e-07,
|
|
"loss": 0.1939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1879281997680664,
|
|
"step": 4165,
|
|
"valid_targets_mean": 3579.1,
|
|
"valid_targets_min": 1680
|
|
},
|
|
{
|
|
"epoch": 6.6826923076923075,
|
|
"grad_norm": 0.5422698331625211,
|
|
"learning_rate": 2.523970266859044e-07,
|
|
"loss": 0.1857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18413355946540833,
|
|
"step": 4170,
|
|
"valid_targets_mean": 3527.7,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 6.690705128205128,
|
|
"grad_norm": 0.5692580709391031,
|
|
"learning_rate": 2.398981956284363e-07,
|
|
"loss": 0.1855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24329707026481628,
|
|
"step": 4175,
|
|
"valid_targets_mean": 3685.5,
|
|
"valid_targets_min": 1907
|
|
},
|
|
{
|
|
"epoch": 6.698717948717949,
|
|
"grad_norm": 0.4258839271286024,
|
|
"learning_rate": 2.2771488120944207e-07,
|
|
"loss": 0.1723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15105578303337097,
|
|
"step": 4180,
|
|
"valid_targets_mean": 4621.4,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 6.706730769230769,
|
|
"grad_norm": 0.5449932476686346,
|
|
"learning_rate": 2.1584727796427174e-07,
|
|
"loss": 0.1741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16315647959709167,
|
|
"step": 4185,
|
|
"valid_targets_mean": 3350.1,
|
|
"valid_targets_min": 2204
|
|
},
|
|
{
|
|
"epoch": 6.714743589743589,
|
|
"grad_norm": 0.4646657622725054,
|
|
"learning_rate": 2.0429557538720556e-07,
|
|
"loss": 0.172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14010201394557953,
|
|
"step": 4190,
|
|
"valid_targets_mean": 3919.2,
|
|
"valid_targets_min": 1435
|
|
},
|
|
{
|
|
"epoch": 6.722756410256411,
|
|
"grad_norm": 0.5246048209942255,
|
|
"learning_rate": 1.930599579284298e-07,
|
|
"loss": 0.1805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1596519947052002,
|
|
"step": 4195,
|
|
"valid_targets_mean": 3963.7,
|
|
"valid_targets_min": 2061
|
|
},
|
|
{
|
|
"epoch": 6.730769230769231,
|
|
"grad_norm": 0.5620833700746424,
|
|
"learning_rate": 1.8214060499107679e-07,
|
|
"loss": 0.1702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17860868573188782,
|
|
"step": 4200,
|
|
"valid_targets_mean": 3272.2,
|
|
"valid_targets_min": 1982
|
|
},
|
|
{
|
|
"epoch": 6.738782051282051,
|
|
"grad_norm": 0.5754656693227905,
|
|
"learning_rate": 1.7153769092837614e-07,
|
|
"loss": 0.1844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19689905643463135,
|
|
"step": 4205,
|
|
"valid_targets_mean": 3276.7,
|
|
"valid_targets_min": 1811
|
|
},
|
|
{
|
|
"epoch": 6.746794871794872,
|
|
"grad_norm": 0.6155174380330322,
|
|
"learning_rate": 1.6125138504086146e-07,
|
|
"loss": 0.2014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2084151804447174,
|
|
"step": 4210,
|
|
"valid_targets_mean": 3323.5,
|
|
"valid_targets_min": 1285
|
|
},
|
|
{
|
|
"epoch": 6.7548076923076925,
|
|
"grad_norm": 0.5014874603446833,
|
|
"learning_rate": 1.5128185157367247e-07,
|
|
"loss": 0.1813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17992736399173737,
|
|
"step": 4215,
|
|
"valid_targets_mean": 3556.7,
|
|
"valid_targets_min": 2075
|
|
},
|
|
{
|
|
"epoch": 6.762820512820513,
|
|
"grad_norm": 0.5670108496497538,
|
|
"learning_rate": 1.4162924971393044e-07,
|
|
"loss": 0.1861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17421139776706696,
|
|
"step": 4220,
|
|
"valid_targets_mean": 3347.7,
|
|
"valid_targets_min": 1896
|
|
},
|
|
{
|
|
"epoch": 6.770833333333333,
|
|
"grad_norm": 0.45413689172272814,
|
|
"learning_rate": 1.322937335881891e-07,
|
|
"loss": 0.1791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1506701111793518,
|
|
"step": 4225,
|
|
"valid_targets_mean": 3958.9,
|
|
"valid_targets_min": 389
|
|
},
|
|
{
|
|
"epoch": 6.778846153846154,
|
|
"grad_norm": 0.5306931473704816,
|
|
"learning_rate": 1.2327545225999215e-07,
|
|
"loss": 0.1801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16012093424797058,
|
|
"step": 4230,
|
|
"valid_targets_mean": 3542.8,
|
|
"valid_targets_min": 1869
|
|
},
|
|
{
|
|
"epoch": 6.7868589743589745,
|
|
"grad_norm": 0.4597430728876461,
|
|
"learning_rate": 1.145745497274664e-07,
|
|
"loss": 0.1727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1277555525302887,
|
|
"step": 4235,
|
|
"valid_targets_mean": 4133.0,
|
|
"valid_targets_min": 2238
|
|
},
|
|
{
|
|
"epoch": 6.794871794871795,
|
|
"grad_norm": 0.5289155251909863,
|
|
"learning_rate": 1.061911649210523e-07,
|
|
"loss": 0.1972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1734294891357422,
|
|
"step": 4240,
|
|
"valid_targets_mean": 3650.6,
|
|
"valid_targets_min": 1200
|
|
},
|
|
{
|
|
"epoch": 6.802884615384615,
|
|
"grad_norm": 0.8185197904986652,
|
|
"learning_rate": 9.812543170126365e-08,
|
|
"loss": 0.2023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18272753059864044,
|
|
"step": 4245,
|
|
"valid_targets_mean": 3363.8,
|
|
"valid_targets_min": 1712
|
|
},
|
|
{
|
|
"epoch": 6.810897435897436,
|
|
"grad_norm": 0.6406656101310069,
|
|
"learning_rate": 9.03774788565559e-08,
|
|
"loss": 0.1816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21644482016563416,
|
|
"step": 4250,
|
|
"valid_targets_mean": 3481.6,
|
|
"valid_targets_min": 457
|
|
},
|
|
{
|
|
"epoch": 6.818910256410256,
|
|
"grad_norm": 0.5737761481772319,
|
|
"learning_rate": 8.294743010127448e-08,
|
|
"loss": 0.1963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17666317522525787,
|
|
"step": 4255,
|
|
"valid_targets_mean": 3081.4,
|
|
"valid_targets_min": 1722
|
|
},
|
|
{
|
|
"epoch": 6.826923076923077,
|
|
"grad_norm": 0.5417175250426073,
|
|
"learning_rate": 7.583540407367418e-08,
|
|
"loss": 0.1782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1564117670059204,
|
|
"step": 4260,
|
|
"valid_targets_mean": 3170.7,
|
|
"valid_targets_min": 455
|
|
},
|
|
{
|
|
"epoch": 6.834935897435898,
|
|
"grad_norm": 0.6257507553189467,
|
|
"learning_rate": 6.904151433402728e-08,
|
|
"loss": 0.1899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22169390320777893,
|
|
"step": 4265,
|
|
"valid_targets_mean": 2890.9,
|
|
"valid_targets_min": 288
|
|
},
|
|
{
|
|
"epoch": 6.842948717948718,
|
|
"grad_norm": 0.5956634707447183,
|
|
"learning_rate": 6.256586936281172e-08,
|
|
"loss": 0.1814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17920449376106262,
|
|
"step": 4270,
|
|
"valid_targets_mean": 2817.1,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 6.850961538461538,
|
|
"grad_norm": 0.5901129809534669,
|
|
"learning_rate": 5.6408572558972475e-08,
|
|
"loss": 0.188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18381944298744202,
|
|
"step": 4275,
|
|
"valid_targets_mean": 3248.1,
|
|
"valid_targets_min": 1796
|
|
},
|
|
{
|
|
"epoch": 6.858974358974359,
|
|
"grad_norm": 0.5375203922856862,
|
|
"learning_rate": 5.0569722238280605e-08,
|
|
"loss": 0.1864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16048292815685272,
|
|
"step": 4280,
|
|
"valid_targets_mean": 3052.4,
|
|
"valid_targets_min": 1575
|
|
},
|
|
{
|
|
"epoch": 6.86698717948718,
|
|
"grad_norm": 0.4429837357031785,
|
|
"learning_rate": 4.504941163175236e-08,
|
|
"loss": 0.1959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1303357481956482,
|
|
"step": 4285,
|
|
"valid_targets_mean": 4013.2,
|
|
"valid_targets_min": 288
|
|
},
|
|
{
|
|
"epoch": 6.875,
|
|
"grad_norm": 0.46755542056895144,
|
|
"learning_rate": 3.984772888417032e-08,
|
|
"loss": 0.1992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15034985542297363,
|
|
"step": 4290,
|
|
"valid_targets_mean": 3739.6,
|
|
"valid_targets_min": 2020
|
|
},
|
|
{
|
|
"epoch": 6.88301282051282,
|
|
"grad_norm": 0.5225117765429849,
|
|
"learning_rate": 3.4964757052671216e-08,
|
|
"loss": 0.1898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16861560940742493,
|
|
"step": 4295,
|
|
"valid_targets_mean": 3642.2,
|
|
"valid_targets_min": 1808
|
|
},
|
|
{
|
|
"epoch": 6.891025641025641,
|
|
"grad_norm": 0.5708858038464354,
|
|
"learning_rate": 3.0400574105415856e-08,
|
|
"loss": 0.1668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16104033589363098,
|
|
"step": 4300,
|
|
"valid_targets_mean": 3452.1,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 6.899038461538462,
|
|
"grad_norm": 0.5338479864413854,
|
|
"learning_rate": 2.615525292035459e-08,
|
|
"loss": 0.2133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19287021458148956,
|
|
"step": 4305,
|
|
"valid_targets_mean": 3645.0,
|
|
"valid_targets_min": 2051
|
|
},
|
|
{
|
|
"epoch": 6.907051282051282,
|
|
"grad_norm": 0.5011813250662843,
|
|
"learning_rate": 2.222886128405266e-08,
|
|
"loss": 0.177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17692403495311737,
|
|
"step": 4310,
|
|
"valid_targets_mean": 3748.9,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 6.915064102564102,
|
|
"grad_norm": 0.626908983487979,
|
|
"learning_rate": 1.8621461890617752e-08,
|
|
"loss": 0.1817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17852284014225006,
|
|
"step": 4315,
|
|
"valid_targets_mean": 2626.8,
|
|
"valid_targets_min": 230
|
|
},
|
|
{
|
|
"epoch": 6.923076923076923,
|
|
"grad_norm": 0.5543654144483077,
|
|
"learning_rate": 1.5333112340687463e-08,
|
|
"loss": 0.1764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16991391777992249,
|
|
"step": 4320,
|
|
"valid_targets_mean": 3477.0,
|
|
"valid_targets_min": 2240
|
|
},
|
|
{
|
|
"epoch": 6.931089743589744,
|
|
"grad_norm": 0.518706878581187,
|
|
"learning_rate": 1.2363865140518905e-08,
|
|
"loss": 0.1825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17875301837921143,
|
|
"step": 4325,
|
|
"valid_targets_mean": 3591.9,
|
|
"valid_targets_min": 1830
|
|
},
|
|
{
|
|
"epoch": 6.939102564102564,
|
|
"grad_norm": 0.49639795431937256,
|
|
"learning_rate": 9.713767701151621e-09,
|
|
"loss": 0.1668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16456924378871918,
|
|
"step": 4330,
|
|
"valid_targets_mean": 3846.6,
|
|
"valid_targets_min": 351
|
|
},
|
|
{
|
|
"epoch": 6.947115384615385,
|
|
"grad_norm": 0.561660195109291,
|
|
"learning_rate": 7.382862337641516e-09,
|
|
"loss": 0.1934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20046007633209229,
|
|
"step": 4335,
|
|
"valid_targets_mean": 3275.1,
|
|
"valid_targets_min": 1743
|
|
},
|
|
{
|
|
"epoch": 6.955128205128205,
|
|
"grad_norm": 0.5421335045005533,
|
|
"learning_rate": 5.371186268390283e-09,
|
|
"loss": 0.1779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1522393673658371,
|
|
"step": 4340,
|
|
"valid_targets_mean": 3741.4,
|
|
"valid_targets_min": 940
|
|
},
|
|
{
|
|
"epoch": 6.9631410256410255,
|
|
"grad_norm": 0.5325986102299718,
|
|
"learning_rate": 3.678771614550325e-09,
|
|
"loss": 0.1848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18922027945518494,
|
|
"step": 4345,
|
|
"valid_targets_mean": 3629.2,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 6.971153846153846,
|
|
"grad_norm": 0.4855368562058737,
|
|
"learning_rate": 2.3056453995162763e-09,
|
|
"loss": 0.1819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17648158967494965,
|
|
"step": 4350,
|
|
"valid_targets_mean": 3896.1,
|
|
"valid_targets_min": 1649
|
|
},
|
|
{
|
|
"epoch": 6.979166666666667,
|
|
"grad_norm": 0.5169470715238487,
|
|
"learning_rate": 1.2518295484875708e-09,
|
|
"loss": 0.1879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1792326271533966,
|
|
"step": 4355,
|
|
"valid_targets_mean": 3725.2,
|
|
"valid_targets_min": 377
|
|
},
|
|
{
|
|
"epoch": 6.987179487179487,
|
|
"grad_norm": 0.5051351410792214,
|
|
"learning_rate": 5.173408881198328e-10,
|
|
"loss": 0.1669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18018975853919983,
|
|
"step": 4360,
|
|
"valid_targets_mean": 3765.9,
|
|
"valid_targets_min": 419
|
|
},
|
|
{
|
|
"epoch": 6.9951923076923075,
|
|
"grad_norm": 0.4793212594197615,
|
|
"learning_rate": 1.0219114625398263e-10,
|
|
"loss": 0.1811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14409731328487396,
|
|
"step": 4365,
|
|
"valid_targets_mean": 3652.4,
|
|
"valid_targets_min": 1739
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1759420782327652,
|
|
"step": 4368,
|
|
"total_flos": 875542416195584.0,
|
|
"train_loss": 0.13199154654647405,
|
|
"train_runtime": 8877.0231,
|
|
"train_samples_per_second": 7.872,
|
|
"train_steps_per_second": 0.492,
|
|
"valid_targets_mean": 3122.6,
|
|
"valid_targets_min": 2000
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4368,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 875542416195584.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|