Model: laion/exp-syh-tezos-askllm-hardened_glm_4_7_traces_jupiter_cleaned Source: Original Platform
9255 lines
257 KiB
JSON
9255 lines
257 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4186,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.008361204013377926,
|
|
"grad_norm": 9.704340069837663,
|
|
"learning_rate": 3.8186157517899765e-07,
|
|
"loss": 0.5569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2854292392730713,
|
|
"step": 5,
|
|
"valid_targets_mean": 9895.1,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 0.016722408026755852,
|
|
"grad_norm": 9.506600624267989,
|
|
"learning_rate": 8.591885441527446e-07,
|
|
"loss": 0.5851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20839756727218628,
|
|
"step": 10,
|
|
"valid_targets_mean": 8030.5,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 0.02508361204013378,
|
|
"grad_norm": 8.732075327421436,
|
|
"learning_rate": 1.3365155131264918e-06,
|
|
"loss": 0.5567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2969813346862793,
|
|
"step": 15,
|
|
"valid_targets_mean": 9460.6,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 0.033444816053511704,
|
|
"grad_norm": 6.105369937365189,
|
|
"learning_rate": 1.8138424821002388e-06,
|
|
"loss": 0.5159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21955657005310059,
|
|
"step": 20,
|
|
"valid_targets_mean": 6872.6,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 0.04180602006688963,
|
|
"grad_norm": 4.121866788708091,
|
|
"learning_rate": 2.291169451073986e-06,
|
|
"loss": 0.4899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.336142361164093,
|
|
"step": 25,
|
|
"valid_targets_mean": 12226.1,
|
|
"valid_targets_min": 4858
|
|
},
|
|
{
|
|
"epoch": 0.05016722408026756,
|
|
"grad_norm": 2.4335982462543195,
|
|
"learning_rate": 2.768496420047733e-06,
|
|
"loss": 0.4884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1771148294210434,
|
|
"step": 30,
|
|
"valid_targets_mean": 12275.4,
|
|
"valid_targets_min": 3463
|
|
},
|
|
{
|
|
"epoch": 0.05852842809364549,
|
|
"grad_norm": 1.7378294112691788,
|
|
"learning_rate": 3.2458233890214803e-06,
|
|
"loss": 0.4961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2400568723678589,
|
|
"step": 35,
|
|
"valid_targets_mean": 9735.0,
|
|
"valid_targets_min": 2534
|
|
},
|
|
{
|
|
"epoch": 0.06688963210702341,
|
|
"grad_norm": 1.008013068910711,
|
|
"learning_rate": 3.7231503579952273e-06,
|
|
"loss": 0.416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24642127752304077,
|
|
"step": 40,
|
|
"valid_targets_mean": 11687.8,
|
|
"valid_targets_min": 4346
|
|
},
|
|
{
|
|
"epoch": 0.07525083612040134,
|
|
"grad_norm": 0.7240789446446454,
|
|
"learning_rate": 4.200477326968974e-06,
|
|
"loss": 0.444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20315223932266235,
|
|
"step": 45,
|
|
"valid_targets_mean": 8640.0,
|
|
"valid_targets_min": 1683
|
|
},
|
|
{
|
|
"epoch": 0.08361204013377926,
|
|
"grad_norm": 1.285733069217113,
|
|
"learning_rate": 4.677804295942721e-06,
|
|
"loss": 0.4202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18670576810836792,
|
|
"step": 50,
|
|
"valid_targets_mean": 7167.9,
|
|
"valid_targets_min": 372
|
|
},
|
|
{
|
|
"epoch": 0.09197324414715718,
|
|
"grad_norm": 0.46491479303387045,
|
|
"learning_rate": 5.155131264916468e-06,
|
|
"loss": 0.4212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20199692249298096,
|
|
"step": 55,
|
|
"valid_targets_mean": 16380.4,
|
|
"valid_targets_min": 7945
|
|
},
|
|
{
|
|
"epoch": 0.10033444816053512,
|
|
"grad_norm": 0.521369820272511,
|
|
"learning_rate": 5.632458233890216e-06,
|
|
"loss": 0.4435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2025570124387741,
|
|
"step": 60,
|
|
"valid_targets_mean": 8674.1,
|
|
"valid_targets_min": 2659
|
|
},
|
|
{
|
|
"epoch": 0.10869565217391304,
|
|
"grad_norm": 0.5014046901245928,
|
|
"learning_rate": 6.109785202863962e-06,
|
|
"loss": 0.3899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16892071068286896,
|
|
"step": 65,
|
|
"valid_targets_mean": 8458.6,
|
|
"valid_targets_min": 1779
|
|
},
|
|
{
|
|
"epoch": 0.11705685618729098,
|
|
"grad_norm": 0.513856577355547,
|
|
"learning_rate": 6.58711217183771e-06,
|
|
"loss": 0.4187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18453490734100342,
|
|
"step": 70,
|
|
"valid_targets_mean": 6080.1,
|
|
"valid_targets_min": 2256
|
|
},
|
|
{
|
|
"epoch": 0.1254180602006689,
|
|
"grad_norm": 0.3907061919305754,
|
|
"learning_rate": 7.0644391408114565e-06,
|
|
"loss": 0.3992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20158548653125763,
|
|
"step": 75,
|
|
"valid_targets_mean": 13529.4,
|
|
"valid_targets_min": 8718
|
|
},
|
|
{
|
|
"epoch": 0.13377926421404682,
|
|
"grad_norm": 0.44136771689675725,
|
|
"learning_rate": 7.541766109785204e-06,
|
|
"loss": 0.3874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19762465357780457,
|
|
"step": 80,
|
|
"valid_targets_mean": 10343.8,
|
|
"valid_targets_min": 1184
|
|
},
|
|
{
|
|
"epoch": 0.14214046822742474,
|
|
"grad_norm": 0.40468562324441065,
|
|
"learning_rate": 8.01909307875895e-06,
|
|
"loss": 0.4034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15114924311637878,
|
|
"step": 85,
|
|
"valid_targets_mean": 8601.0,
|
|
"valid_targets_min": 1280
|
|
},
|
|
{
|
|
"epoch": 0.1505016722408027,
|
|
"grad_norm": 0.40246299010134245,
|
|
"learning_rate": 8.496420047732697e-06,
|
|
"loss": 0.3782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2025594413280487,
|
|
"step": 90,
|
|
"valid_targets_mean": 9578.6,
|
|
"valid_targets_min": 1448
|
|
},
|
|
{
|
|
"epoch": 0.1588628762541806,
|
|
"grad_norm": 0.40067800056354774,
|
|
"learning_rate": 8.973747016706445e-06,
|
|
"loss": 0.3794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14829301834106445,
|
|
"step": 95,
|
|
"valid_targets_mean": 7018.5,
|
|
"valid_targets_min": 1584
|
|
},
|
|
{
|
|
"epoch": 0.16722408026755853,
|
|
"grad_norm": 0.40871207179537683,
|
|
"learning_rate": 9.451073985680192e-06,
|
|
"loss": 0.3865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1789432018995285,
|
|
"step": 100,
|
|
"valid_targets_mean": 9051.8,
|
|
"valid_targets_min": 1017
|
|
},
|
|
{
|
|
"epoch": 0.17558528428093645,
|
|
"grad_norm": 0.45343223535698823,
|
|
"learning_rate": 9.928400954653938e-06,
|
|
"loss": 0.3694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20758679509162903,
|
|
"step": 105,
|
|
"valid_targets_mean": 8236.8,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 0.18394648829431437,
|
|
"grad_norm": 0.3950877891932281,
|
|
"learning_rate": 1.0405727923627687e-05,
|
|
"loss": 0.3703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20216166973114014,
|
|
"step": 110,
|
|
"valid_targets_mean": 9768.6,
|
|
"valid_targets_min": 3502
|
|
},
|
|
{
|
|
"epoch": 0.19230769230769232,
|
|
"grad_norm": 0.41079767484612434,
|
|
"learning_rate": 1.0883054892601433e-05,
|
|
"loss": 0.4021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14861980080604553,
|
|
"step": 115,
|
|
"valid_targets_mean": 7944.8,
|
|
"valid_targets_min": 863
|
|
},
|
|
{
|
|
"epoch": 0.20066889632107024,
|
|
"grad_norm": 0.4294997550463128,
|
|
"learning_rate": 1.136038186157518e-05,
|
|
"loss": 0.3523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16971172392368317,
|
|
"step": 120,
|
|
"valid_targets_mean": 10083.2,
|
|
"valid_targets_min": 1015
|
|
},
|
|
{
|
|
"epoch": 0.20903010033444816,
|
|
"grad_norm": 0.3283477881534068,
|
|
"learning_rate": 1.1837708830548926e-05,
|
|
"loss": 0.3701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12869468331336975,
|
|
"step": 125,
|
|
"valid_targets_mean": 11373.4,
|
|
"valid_targets_min": 3816
|
|
},
|
|
{
|
|
"epoch": 0.21739130434782608,
|
|
"grad_norm": 0.42834590129271377,
|
|
"learning_rate": 1.2315035799522675e-05,
|
|
"loss": 0.3639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17562639713287354,
|
|
"step": 130,
|
|
"valid_targets_mean": 12009.6,
|
|
"valid_targets_min": 4006
|
|
},
|
|
{
|
|
"epoch": 0.225752508361204,
|
|
"grad_norm": 0.40328428606882416,
|
|
"learning_rate": 1.279236276849642e-05,
|
|
"loss": 0.3639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2039901465177536,
|
|
"step": 135,
|
|
"valid_targets_mean": 9897.2,
|
|
"valid_targets_min": 2633
|
|
},
|
|
{
|
|
"epoch": 0.23411371237458195,
|
|
"grad_norm": 0.3788950250922431,
|
|
"learning_rate": 1.3269689737470168e-05,
|
|
"loss": 0.348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1703433245420456,
|
|
"step": 140,
|
|
"valid_targets_mean": 11593.9,
|
|
"valid_targets_min": 375
|
|
},
|
|
{
|
|
"epoch": 0.24247491638795987,
|
|
"grad_norm": 0.4581603492527259,
|
|
"learning_rate": 1.3747016706443914e-05,
|
|
"loss": 0.3603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1755390763282776,
|
|
"step": 145,
|
|
"valid_targets_mean": 9646.8,
|
|
"valid_targets_min": 4261
|
|
},
|
|
{
|
|
"epoch": 0.2508361204013378,
|
|
"grad_norm": 0.37843854676940997,
|
|
"learning_rate": 1.4224343675417661e-05,
|
|
"loss": 0.3738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19455718994140625,
|
|
"step": 150,
|
|
"valid_targets_mean": 13260.2,
|
|
"valid_targets_min": 3323
|
|
},
|
|
{
|
|
"epoch": 0.2591973244147157,
|
|
"grad_norm": 0.43271154048822974,
|
|
"learning_rate": 1.470167064439141e-05,
|
|
"loss": 0.336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16561198234558105,
|
|
"step": 155,
|
|
"valid_targets_mean": 7743.2,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 0.26755852842809363,
|
|
"grad_norm": 0.4322457449655984,
|
|
"learning_rate": 1.5178997613365156e-05,
|
|
"loss": 0.359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23260743916034698,
|
|
"step": 160,
|
|
"valid_targets_mean": 10150.2,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 0.27591973244147155,
|
|
"grad_norm": 0.433582974461392,
|
|
"learning_rate": 1.5656324582338905e-05,
|
|
"loss": 0.3775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17154215276241302,
|
|
"step": 165,
|
|
"valid_targets_mean": 9766.1,
|
|
"valid_targets_min": 2098
|
|
},
|
|
{
|
|
"epoch": 0.2842809364548495,
|
|
"grad_norm": 0.41989670588650063,
|
|
"learning_rate": 1.613365155131265e-05,
|
|
"loss": 0.3445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1661354899406433,
|
|
"step": 170,
|
|
"valid_targets_mean": 8886.9,
|
|
"valid_targets_min": 3534
|
|
},
|
|
{
|
|
"epoch": 0.29264214046822745,
|
|
"grad_norm": 0.5300334532382173,
|
|
"learning_rate": 1.6610978520286397e-05,
|
|
"loss": 0.3373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1783657670021057,
|
|
"step": 175,
|
|
"valid_targets_mean": 6207.1,
|
|
"valid_targets_min": 381
|
|
},
|
|
{
|
|
"epoch": 0.3010033444816054,
|
|
"grad_norm": 0.38191544763776747,
|
|
"learning_rate": 1.7088305489260143e-05,
|
|
"loss": 0.3496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15386468172073364,
|
|
"step": 180,
|
|
"valid_targets_mean": 9094.2,
|
|
"valid_targets_min": 3399
|
|
},
|
|
{
|
|
"epoch": 0.3093645484949833,
|
|
"grad_norm": 0.4177328465822325,
|
|
"learning_rate": 1.7565632458233892e-05,
|
|
"loss": 0.3548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1772809624671936,
|
|
"step": 185,
|
|
"valid_targets_mean": 9946.0,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 0.3177257525083612,
|
|
"grad_norm": 0.3916557624209172,
|
|
"learning_rate": 1.8042959427207638e-05,
|
|
"loss": 0.3612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17139336466789246,
|
|
"step": 190,
|
|
"valid_targets_mean": 11076.0,
|
|
"valid_targets_min": 2224
|
|
},
|
|
{
|
|
"epoch": 0.32608695652173914,
|
|
"grad_norm": 0.7413380847599481,
|
|
"learning_rate": 1.8520286396181387e-05,
|
|
"loss": 0.3423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1589488387107849,
|
|
"step": 195,
|
|
"valid_targets_mean": 8130.6,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 0.33444816053511706,
|
|
"grad_norm": 0.5941116609754149,
|
|
"learning_rate": 1.8997613365155133e-05,
|
|
"loss": 0.3247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12648816406726837,
|
|
"step": 200,
|
|
"valid_targets_mean": 6539.0,
|
|
"valid_targets_min": 385
|
|
},
|
|
{
|
|
"epoch": 0.342809364548495,
|
|
"grad_norm": 0.45110228594558766,
|
|
"learning_rate": 1.947494033412888e-05,
|
|
"loss": 0.3512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17383962869644165,
|
|
"step": 205,
|
|
"valid_targets_mean": 12104.6,
|
|
"valid_targets_min": 5195
|
|
},
|
|
{
|
|
"epoch": 0.3511705685618729,
|
|
"grad_norm": 0.5033379653424239,
|
|
"learning_rate": 1.9952267303102627e-05,
|
|
"loss": 0.3437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13696937263011932,
|
|
"step": 210,
|
|
"valid_targets_mean": 7175.1,
|
|
"valid_targets_min": 1210
|
|
},
|
|
{
|
|
"epoch": 0.3595317725752508,
|
|
"grad_norm": 0.3472355315435835,
|
|
"learning_rate": 2.0429594272076373e-05,
|
|
"loss": 0.3093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12576332688331604,
|
|
"step": 215,
|
|
"valid_targets_mean": 11372.0,
|
|
"valid_targets_min": 440
|
|
},
|
|
{
|
|
"epoch": 0.36789297658862874,
|
|
"grad_norm": 0.5168717076471069,
|
|
"learning_rate": 2.090692124105012e-05,
|
|
"loss": 0.3288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1560574620962143,
|
|
"step": 220,
|
|
"valid_targets_mean": 11204.4,
|
|
"valid_targets_min": 4827
|
|
},
|
|
{
|
|
"epoch": 0.3762541806020067,
|
|
"grad_norm": 0.5875230124371389,
|
|
"learning_rate": 2.1384248210023868e-05,
|
|
"loss": 0.3438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17846718430519104,
|
|
"step": 225,
|
|
"valid_targets_mean": 10496.1,
|
|
"valid_targets_min": 2676
|
|
},
|
|
{
|
|
"epoch": 0.38461538461538464,
|
|
"grad_norm": 0.3725427034125673,
|
|
"learning_rate": 2.1861575178997617e-05,
|
|
"loss": 0.3473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1652049720287323,
|
|
"step": 230,
|
|
"valid_targets_mean": 12805.9,
|
|
"valid_targets_min": 3516
|
|
},
|
|
{
|
|
"epoch": 0.39297658862876256,
|
|
"grad_norm": 0.46904720788750665,
|
|
"learning_rate": 2.2338902147971363e-05,
|
|
"loss": 0.338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16661886870861053,
|
|
"step": 235,
|
|
"valid_targets_mean": 6685.5,
|
|
"valid_targets_min": 438
|
|
},
|
|
{
|
|
"epoch": 0.4013377926421405,
|
|
"grad_norm": 0.39364437446033823,
|
|
"learning_rate": 2.281622911694511e-05,
|
|
"loss": 0.3335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20515011250972748,
|
|
"step": 240,
|
|
"valid_targets_mean": 11119.1,
|
|
"valid_targets_min": 1243
|
|
},
|
|
{
|
|
"epoch": 0.4096989966555184,
|
|
"grad_norm": 0.47060126518184453,
|
|
"learning_rate": 2.3293556085918854e-05,
|
|
"loss": 0.3599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09363420307636261,
|
|
"step": 245,
|
|
"valid_targets_mean": 5870.4,
|
|
"valid_targets_min": 435
|
|
},
|
|
{
|
|
"epoch": 0.4180602006688963,
|
|
"grad_norm": 0.4220297290706089,
|
|
"learning_rate": 2.3770883054892604e-05,
|
|
"loss": 0.3395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17007336020469666,
|
|
"step": 250,
|
|
"valid_targets_mean": 13040.5,
|
|
"valid_targets_min": 348
|
|
},
|
|
{
|
|
"epoch": 0.42642140468227424,
|
|
"grad_norm": 0.38335930491640424,
|
|
"learning_rate": 2.4248210023866353e-05,
|
|
"loss": 0.3203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15440049767494202,
|
|
"step": 255,
|
|
"valid_targets_mean": 12792.5,
|
|
"valid_targets_min": 275
|
|
},
|
|
{
|
|
"epoch": 0.43478260869565216,
|
|
"grad_norm": 0.39258032259986825,
|
|
"learning_rate": 2.4725536992840095e-05,
|
|
"loss": 0.3231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15133775770664215,
|
|
"step": 260,
|
|
"valid_targets_mean": 11296.4,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 0.4431438127090301,
|
|
"grad_norm": 0.3687647761886547,
|
|
"learning_rate": 2.5202863961813844e-05,
|
|
"loss": 0.3049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14956405758857727,
|
|
"step": 265,
|
|
"valid_targets_mean": 10385.2,
|
|
"valid_targets_min": 1586
|
|
},
|
|
{
|
|
"epoch": 0.451505016722408,
|
|
"grad_norm": 0.43840715413280207,
|
|
"learning_rate": 2.5680190930787593e-05,
|
|
"loss": 0.3234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23394551873207092,
|
|
"step": 270,
|
|
"valid_targets_mean": 12659.9,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 0.459866220735786,
|
|
"grad_norm": 0.38965158382575593,
|
|
"learning_rate": 2.615751789976134e-05,
|
|
"loss": 0.3278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16852575540542603,
|
|
"step": 275,
|
|
"valid_targets_mean": 11741.4,
|
|
"valid_targets_min": 579
|
|
},
|
|
{
|
|
"epoch": 0.4682274247491639,
|
|
"grad_norm": 0.400692625615972,
|
|
"learning_rate": 2.6634844868735085e-05,
|
|
"loss": 0.3251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13496558368206024,
|
|
"step": 280,
|
|
"valid_targets_mean": 8287.6,
|
|
"valid_targets_min": 337
|
|
},
|
|
{
|
|
"epoch": 0.4765886287625418,
|
|
"grad_norm": 0.38946712835281516,
|
|
"learning_rate": 2.711217183770883e-05,
|
|
"loss": 0.3139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13237497210502625,
|
|
"step": 285,
|
|
"valid_targets_mean": 9373.2,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 0.48494983277591974,
|
|
"grad_norm": 0.4964416259532856,
|
|
"learning_rate": 2.758949880668258e-05,
|
|
"loss": 0.3119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1885404884815216,
|
|
"step": 290,
|
|
"valid_targets_mean": 10225.2,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 0.49331103678929766,
|
|
"grad_norm": 0.6379324828416614,
|
|
"learning_rate": 2.806682577565633e-05,
|
|
"loss": 0.343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15571287274360657,
|
|
"step": 295,
|
|
"valid_targets_mean": 9324.4,
|
|
"valid_targets_min": 342
|
|
},
|
|
{
|
|
"epoch": 0.5016722408026756,
|
|
"grad_norm": 0.47552233837655283,
|
|
"learning_rate": 2.8544152744630075e-05,
|
|
"loss": 0.3075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20169281959533691,
|
|
"step": 300,
|
|
"valid_targets_mean": 12377.4,
|
|
"valid_targets_min": 1187
|
|
},
|
|
{
|
|
"epoch": 0.5100334448160535,
|
|
"grad_norm": 0.47979392160139867,
|
|
"learning_rate": 2.902147971360382e-05,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11661270260810852,
|
|
"step": 305,
|
|
"valid_targets_mean": 9605.4,
|
|
"valid_targets_min": 418
|
|
},
|
|
{
|
|
"epoch": 0.5183946488294314,
|
|
"grad_norm": 0.383778002210665,
|
|
"learning_rate": 2.9498806682577566e-05,
|
|
"loss": 0.3055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20912659168243408,
|
|
"step": 310,
|
|
"valid_targets_mean": 12691.2,
|
|
"valid_targets_min": 4078
|
|
},
|
|
{
|
|
"epoch": 0.5267558528428093,
|
|
"grad_norm": 0.48347619117658297,
|
|
"learning_rate": 2.9976133651551315e-05,
|
|
"loss": 0.3086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15700465440750122,
|
|
"step": 315,
|
|
"valid_targets_mean": 7212.4,
|
|
"valid_targets_min": 400
|
|
},
|
|
{
|
|
"epoch": 0.5351170568561873,
|
|
"grad_norm": 0.5579731538317956,
|
|
"learning_rate": 3.0453460620525064e-05,
|
|
"loss": 0.3034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1959255039691925,
|
|
"step": 320,
|
|
"valid_targets_mean": 7471.1,
|
|
"valid_targets_min": 316
|
|
},
|
|
{
|
|
"epoch": 0.5434782608695652,
|
|
"grad_norm": 0.36395086228404944,
|
|
"learning_rate": 3.093078758949881e-05,
|
|
"loss": 0.3203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14121195673942566,
|
|
"step": 325,
|
|
"valid_targets_mean": 9249.9,
|
|
"valid_targets_min": 4157
|
|
},
|
|
{
|
|
"epoch": 0.5518394648829431,
|
|
"grad_norm": 0.45098529137309534,
|
|
"learning_rate": 3.140811455847255e-05,
|
|
"loss": 0.3132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18188264966011047,
|
|
"step": 330,
|
|
"valid_targets_mean": 12850.2,
|
|
"valid_targets_min": 3546
|
|
},
|
|
{
|
|
"epoch": 0.560200668896321,
|
|
"grad_norm": 0.3885761324542193,
|
|
"learning_rate": 3.18854415274463e-05,
|
|
"loss": 0.3118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12168922275304794,
|
|
"step": 335,
|
|
"valid_targets_mean": 10407.9,
|
|
"valid_targets_min": 2364
|
|
},
|
|
{
|
|
"epoch": 0.568561872909699,
|
|
"grad_norm": 0.4421596227641516,
|
|
"learning_rate": 3.236276849642005e-05,
|
|
"loss": 0.3179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13702897727489471,
|
|
"step": 340,
|
|
"valid_targets_mean": 7520.0,
|
|
"valid_targets_min": 2643
|
|
},
|
|
{
|
|
"epoch": 0.5769230769230769,
|
|
"grad_norm": 0.536695617996766,
|
|
"learning_rate": 3.284009546539379e-05,
|
|
"loss": 0.3211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21040737628936768,
|
|
"step": 345,
|
|
"valid_targets_mean": 10221.4,
|
|
"valid_targets_min": 945
|
|
},
|
|
{
|
|
"epoch": 0.5852842809364549,
|
|
"grad_norm": 0.3900374361322349,
|
|
"learning_rate": 3.331742243436754e-05,
|
|
"loss": 0.3048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16920633614063263,
|
|
"step": 350,
|
|
"valid_targets_mean": 10571.5,
|
|
"valid_targets_min": 2418
|
|
},
|
|
{
|
|
"epoch": 0.5936454849498328,
|
|
"grad_norm": 0.42879814620431844,
|
|
"learning_rate": 3.379474940334129e-05,
|
|
"loss": 0.3065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12136104702949524,
|
|
"step": 355,
|
|
"valid_targets_mean": 7590.8,
|
|
"valid_targets_min": 1057
|
|
},
|
|
{
|
|
"epoch": 0.6020066889632107,
|
|
"grad_norm": 0.3815752484700554,
|
|
"learning_rate": 3.427207637231504e-05,
|
|
"loss": 0.3036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14645403623580933,
|
|
"step": 360,
|
|
"valid_targets_mean": 10975.4,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 0.6103678929765887,
|
|
"grad_norm": 0.4236111918341472,
|
|
"learning_rate": 3.474940334128879e-05,
|
|
"loss": 0.3171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19070662558078766,
|
|
"step": 365,
|
|
"valid_targets_mean": 13051.4,
|
|
"valid_targets_min": 3279
|
|
},
|
|
{
|
|
"epoch": 0.6187290969899666,
|
|
"grad_norm": 0.4387151345363497,
|
|
"learning_rate": 3.522673031026253e-05,
|
|
"loss": 0.3274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1827457994222641,
|
|
"step": 370,
|
|
"valid_targets_mean": 10704.5,
|
|
"valid_targets_min": 4000
|
|
},
|
|
{
|
|
"epoch": 0.6270903010033445,
|
|
"grad_norm": 0.6490054010629769,
|
|
"learning_rate": 3.570405727923628e-05,
|
|
"loss": 0.3136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10865180194377899,
|
|
"step": 375,
|
|
"valid_targets_mean": 3634.4,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 0.6354515050167224,
|
|
"grad_norm": 0.3611149395234458,
|
|
"learning_rate": 3.618138424821003e-05,
|
|
"loss": 0.3244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13920778036117554,
|
|
"step": 380,
|
|
"valid_targets_mean": 14059.8,
|
|
"valid_targets_min": 9962
|
|
},
|
|
{
|
|
"epoch": 0.6438127090301003,
|
|
"grad_norm": 0.486299918141066,
|
|
"learning_rate": 3.665871121718377e-05,
|
|
"loss": 0.3089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2080010324716568,
|
|
"step": 385,
|
|
"valid_targets_mean": 9047.0,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 0.6521739130434783,
|
|
"grad_norm": 0.5207474395106452,
|
|
"learning_rate": 3.713603818615752e-05,
|
|
"loss": 0.3321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17088118195533752,
|
|
"step": 390,
|
|
"valid_targets_mean": 9675.4,
|
|
"valid_targets_min": 2603
|
|
},
|
|
{
|
|
"epoch": 0.6605351170568562,
|
|
"grad_norm": 0.34878729449260554,
|
|
"learning_rate": 3.7613365155131264e-05,
|
|
"loss": 0.3227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12294599413871765,
|
|
"step": 395,
|
|
"valid_targets_mean": 11294.9,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 0.6688963210702341,
|
|
"grad_norm": 0.479799973744041,
|
|
"learning_rate": 3.8090692124105013e-05,
|
|
"loss": 0.3161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10345245897769928,
|
|
"step": 400,
|
|
"valid_targets_mean": 6875.1,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 0.677257525083612,
|
|
"grad_norm": 0.43670569394078723,
|
|
"learning_rate": 3.856801909307876e-05,
|
|
"loss": 0.3027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1845102161169052,
|
|
"step": 405,
|
|
"valid_targets_mean": 10869.1,
|
|
"valid_targets_min": 486
|
|
},
|
|
{
|
|
"epoch": 0.68561872909699,
|
|
"grad_norm": 0.43329094218603403,
|
|
"learning_rate": 3.9045346062052505e-05,
|
|
"loss": 0.3108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14793120324611664,
|
|
"step": 410,
|
|
"valid_targets_mean": 7325.5,
|
|
"valid_targets_min": 4323
|
|
},
|
|
{
|
|
"epoch": 0.6939799331103679,
|
|
"grad_norm": 0.6039811227810687,
|
|
"learning_rate": 3.9522673031026254e-05,
|
|
"loss": 0.3148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14811517298221588,
|
|
"step": 415,
|
|
"valid_targets_mean": 7638.4,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 0.7023411371237458,
|
|
"grad_norm": 0.48852384277190175,
|
|
"learning_rate": 4e-05,
|
|
"loss": 0.3154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18178792297840118,
|
|
"step": 420,
|
|
"valid_targets_mean": 8873.4,
|
|
"valid_targets_min": 2057
|
|
},
|
|
{
|
|
"epoch": 0.7107023411371237,
|
|
"grad_norm": 0.4010926179154007,
|
|
"learning_rate": 3.999982612069952e-05,
|
|
"loss": 0.3089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12944182753562927,
|
|
"step": 425,
|
|
"valid_targets_mean": 9162.0,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 0.7190635451505016,
|
|
"grad_norm": 0.3912235994053385,
|
|
"learning_rate": 3.999930448582146e-05,
|
|
"loss": 0.3194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19051608443260193,
|
|
"step": 430,
|
|
"valid_targets_mean": 11398.8,
|
|
"valid_targets_min": 5282
|
|
},
|
|
{
|
|
"epoch": 0.7274247491638796,
|
|
"grad_norm": 0.36884316079661383,
|
|
"learning_rate": 3.999843510443598e-05,
|
|
"loss": 0.3124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12911425530910492,
|
|
"step": 435,
|
|
"valid_targets_mean": 8962.9,
|
|
"valid_targets_min": 3089
|
|
},
|
|
{
|
|
"epoch": 0.7357859531772575,
|
|
"grad_norm": 0.4362049904975079,
|
|
"learning_rate": 3.999721799165982e-05,
|
|
"loss": 0.3018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15078511834144592,
|
|
"step": 440,
|
|
"valid_targets_mean": 10268.6,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 0.7441471571906354,
|
|
"grad_norm": 0.45748473700083303,
|
|
"learning_rate": 3.9995653168656056e-05,
|
|
"loss": 0.3122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14863935112953186,
|
|
"step": 445,
|
|
"valid_targets_mean": 8205.0,
|
|
"valid_targets_min": 4399
|
|
},
|
|
{
|
|
"epoch": 0.7525083612040134,
|
|
"grad_norm": 0.4197026157794245,
|
|
"learning_rate": 3.9993740662633725e-05,
|
|
"loss": 0.3012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11096012592315674,
|
|
"step": 450,
|
|
"valid_targets_mean": 9252.2,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 0.7608695652173914,
|
|
"grad_norm": 0.39145797859300513,
|
|
"learning_rate": 3.999148050684733e-05,
|
|
"loss": 0.3023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14486055076122284,
|
|
"step": 455,
|
|
"valid_targets_mean": 8927.5,
|
|
"valid_targets_min": 300
|
|
},
|
|
{
|
|
"epoch": 0.7692307692307693,
|
|
"grad_norm": 0.4224885552149601,
|
|
"learning_rate": 3.998887274059632e-05,
|
|
"loss": 0.3032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17703157663345337,
|
|
"step": 460,
|
|
"valid_targets_mean": 9058.4,
|
|
"valid_targets_min": 3177
|
|
},
|
|
{
|
|
"epoch": 0.7775919732441472,
|
|
"grad_norm": 0.37722924780437955,
|
|
"learning_rate": 3.998591740922435e-05,
|
|
"loss": 0.302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12123087048530579,
|
|
"step": 465,
|
|
"valid_targets_mean": 8386.5,
|
|
"valid_targets_min": 2964
|
|
},
|
|
{
|
|
"epoch": 0.7859531772575251,
|
|
"grad_norm": 0.6642141403149759,
|
|
"learning_rate": 3.9982614564118506e-05,
|
|
"loss": 0.3187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15513786673545837,
|
|
"step": 470,
|
|
"valid_targets_mean": 9863.6,
|
|
"valid_targets_min": 3112
|
|
},
|
|
{
|
|
"epoch": 0.794314381270903,
|
|
"grad_norm": 0.3989521375431216,
|
|
"learning_rate": 3.997896426270843e-05,
|
|
"loss": 0.2993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15655697882175446,
|
|
"step": 475,
|
|
"valid_targets_mean": 11828.0,
|
|
"valid_targets_min": 920
|
|
},
|
|
{
|
|
"epoch": 0.802675585284281,
|
|
"grad_norm": 0.4923679994587432,
|
|
"learning_rate": 3.9974966568465315e-05,
|
|
"loss": 0.3013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20147736370563507,
|
|
"step": 480,
|
|
"valid_targets_mean": 8945.1,
|
|
"valid_targets_min": 1455
|
|
},
|
|
{
|
|
"epoch": 0.8110367892976589,
|
|
"grad_norm": 0.38743342717296536,
|
|
"learning_rate": 3.997062155090078e-05,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10722160339355469,
|
|
"step": 485,
|
|
"valid_targets_mean": 7325.9,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 0.8193979933110368,
|
|
"grad_norm": 0.7181033042801787,
|
|
"learning_rate": 3.996592928556569e-05,
|
|
"loss": 0.3144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1670161485671997,
|
|
"step": 490,
|
|
"valid_targets_mean": 9772.0,
|
|
"valid_targets_min": 1384
|
|
},
|
|
{
|
|
"epoch": 0.8277591973244147,
|
|
"grad_norm": 0.43285808874328063,
|
|
"learning_rate": 3.996088985404882e-05,
|
|
"loss": 0.3075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1052534431219101,
|
|
"step": 495,
|
|
"valid_targets_mean": 6232.9,
|
|
"valid_targets_min": 368
|
|
},
|
|
{
|
|
"epoch": 0.8361204013377926,
|
|
"grad_norm": 0.39525360131240145,
|
|
"learning_rate": 3.995550334397547e-05,
|
|
"loss": 0.3004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15355992317199707,
|
|
"step": 500,
|
|
"valid_targets_mean": 10627.1,
|
|
"valid_targets_min": 3429
|
|
},
|
|
{
|
|
"epoch": 0.8444816053511706,
|
|
"grad_norm": 0.4502773061331724,
|
|
"learning_rate": 3.994976984900589e-05,
|
|
"loss": 0.2901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19775904715061188,
|
|
"step": 505,
|
|
"valid_targets_mean": 13477.9,
|
|
"valid_targets_min": 8403
|
|
},
|
|
{
|
|
"epoch": 0.8528428093645485,
|
|
"grad_norm": 0.4962123141021862,
|
|
"learning_rate": 3.994368946883367e-05,
|
|
"loss": 0.3174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11220596730709076,
|
|
"step": 510,
|
|
"valid_targets_mean": 5743.1,
|
|
"valid_targets_min": 1516
|
|
},
|
|
{
|
|
"epoch": 0.8612040133779264,
|
|
"grad_norm": 0.36409438510022263,
|
|
"learning_rate": 3.993726230918407e-05,
|
|
"loss": 0.313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15582388639450073,
|
|
"step": 515,
|
|
"valid_targets_mean": 11274.0,
|
|
"valid_targets_min": 1067
|
|
},
|
|
{
|
|
"epoch": 0.8695652173913043,
|
|
"grad_norm": 0.4217597391519935,
|
|
"learning_rate": 3.993048848181208e-05,
|
|
"loss": 0.2857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14167800545692444,
|
|
"step": 520,
|
|
"valid_targets_mean": 9329.6,
|
|
"valid_targets_min": 3300
|
|
},
|
|
{
|
|
"epoch": 0.8779264214046822,
|
|
"grad_norm": 0.46377620524245133,
|
|
"learning_rate": 3.992336810450053e-05,
|
|
"loss": 0.3015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1527860462665558,
|
|
"step": 525,
|
|
"valid_targets_mean": 8937.6,
|
|
"valid_targets_min": 1540
|
|
},
|
|
{
|
|
"epoch": 0.8862876254180602,
|
|
"grad_norm": 0.3755533862247,
|
|
"learning_rate": 3.9915901301058045e-05,
|
|
"loss": 0.2953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13247841596603394,
|
|
"step": 530,
|
|
"valid_targets_mean": 11316.2,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 0.8946488294314381,
|
|
"grad_norm": 0.41285746938155377,
|
|
"learning_rate": 3.9908088201316874e-05,
|
|
"loss": 0.2897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12979161739349365,
|
|
"step": 535,
|
|
"valid_targets_mean": 9788.4,
|
|
"valid_targets_min": 331
|
|
},
|
|
{
|
|
"epoch": 0.903010033444816,
|
|
"grad_norm": 0.46831315558184283,
|
|
"learning_rate": 3.989992894113067e-05,
|
|
"loss": 0.2884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1559295803308487,
|
|
"step": 540,
|
|
"valid_targets_mean": 8083.8,
|
|
"valid_targets_min": 2989
|
|
},
|
|
{
|
|
"epoch": 0.9113712374581939,
|
|
"grad_norm": 0.4626117800501663,
|
|
"learning_rate": 3.989142366237205e-05,
|
|
"loss": 0.2991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17271871864795685,
|
|
"step": 545,
|
|
"valid_targets_mean": 9725.6,
|
|
"valid_targets_min": 1763
|
|
},
|
|
{
|
|
"epoch": 0.919732441471572,
|
|
"grad_norm": 0.5334452506178405,
|
|
"learning_rate": 3.9882572512930234e-05,
|
|
"loss": 0.2902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17225036025047302,
|
|
"step": 550,
|
|
"valid_targets_mean": 12995.4,
|
|
"valid_targets_min": 2502
|
|
},
|
|
{
|
|
"epoch": 0.9280936454849499,
|
|
"grad_norm": 0.4721409952390065,
|
|
"learning_rate": 3.987337564670837e-05,
|
|
"loss": 0.3144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17159369587898254,
|
|
"step": 555,
|
|
"valid_targets_mean": 8161.1,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 0.9364548494983278,
|
|
"grad_norm": 0.37051791413233026,
|
|
"learning_rate": 3.9863833223620926e-05,
|
|
"loss": 0.3253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16717952489852905,
|
|
"step": 560,
|
|
"valid_targets_mean": 11993.6,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 0.9448160535117057,
|
|
"grad_norm": 0.41649307949890185,
|
|
"learning_rate": 3.9853945409590904e-05,
|
|
"loss": 0.2918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1164616271853447,
|
|
"step": 565,
|
|
"valid_targets_mean": 9005.4,
|
|
"valid_targets_min": 1250
|
|
},
|
|
{
|
|
"epoch": 0.9531772575250836,
|
|
"grad_norm": 0.4083172076816169,
|
|
"learning_rate": 3.98437123765469e-05,
|
|
"loss": 0.3028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1383233368396759,
|
|
"step": 570,
|
|
"valid_targets_mean": 9895.6,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 0.9615384615384616,
|
|
"grad_norm": 0.5016033638307947,
|
|
"learning_rate": 3.9833134302420194e-05,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1754838526248932,
|
|
"step": 575,
|
|
"valid_targets_mean": 10179.2,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 0.9698996655518395,
|
|
"grad_norm": 0.3589660839126059,
|
|
"learning_rate": 3.982221137114159e-05,
|
|
"loss": 0.2944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13110172748565674,
|
|
"step": 580,
|
|
"valid_targets_mean": 9929.5,
|
|
"valid_targets_min": 438
|
|
},
|
|
{
|
|
"epoch": 0.9782608695652174,
|
|
"grad_norm": 0.4371446766479744,
|
|
"learning_rate": 3.981094377263825e-05,
|
|
"loss": 0.2877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2029157131910324,
|
|
"step": 585,
|
|
"valid_targets_mean": 8479.9,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 0.9866220735785953,
|
|
"grad_norm": 0.3940564894220142,
|
|
"learning_rate": 3.97993317028304e-05,
|
|
"loss": 0.2872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12427155673503876,
|
|
"step": 590,
|
|
"valid_targets_mean": 8194.5,
|
|
"valid_targets_min": 241
|
|
},
|
|
{
|
|
"epoch": 0.9949832775919732,
|
|
"grad_norm": 0.4223569394782657,
|
|
"learning_rate": 3.978737536362789e-05,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11656618118286133,
|
|
"step": 595,
|
|
"valid_targets_mean": 10453.9,
|
|
"valid_targets_min": 315
|
|
},
|
|
{
|
|
"epoch": 1.0033444816053512,
|
|
"grad_norm": 0.40478168098429507,
|
|
"learning_rate": 3.977507496292671e-05,
|
|
"loss": 0.2741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09155194461345673,
|
|
"step": 600,
|
|
"valid_targets_mean": 6334.8,
|
|
"valid_targets_min": 382
|
|
},
|
|
{
|
|
"epoch": 1.0117056856187292,
|
|
"grad_norm": 0.411947335044531,
|
|
"learning_rate": 3.9762430714605376e-05,
|
|
"loss": 0.2675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13678449392318726,
|
|
"step": 605,
|
|
"valid_targets_mean": 8789.5,
|
|
"valid_targets_min": 1336
|
|
},
|
|
{
|
|
"epoch": 1.020066889632107,
|
|
"grad_norm": 0.5420593375401783,
|
|
"learning_rate": 3.974944283852118e-05,
|
|
"loss": 0.2866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10591815412044525,
|
|
"step": 610,
|
|
"valid_targets_mean": 11123.1,
|
|
"valid_targets_min": 1184
|
|
},
|
|
{
|
|
"epoch": 1.028428093645485,
|
|
"grad_norm": 0.46148941750133415,
|
|
"learning_rate": 3.9736111560506405e-05,
|
|
"loss": 0.2952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1308574080467224,
|
|
"step": 615,
|
|
"valid_targets_mean": 6831.1,
|
|
"valid_targets_min": 376
|
|
},
|
|
{
|
|
"epoch": 1.0367892976588629,
|
|
"grad_norm": 0.4958063847476237,
|
|
"learning_rate": 3.972243711236439e-05,
|
|
"loss": 0.2905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13027632236480713,
|
|
"step": 620,
|
|
"valid_targets_mean": 8247.5,
|
|
"valid_targets_min": 1274
|
|
},
|
|
{
|
|
"epoch": 1.0451505016722409,
|
|
"grad_norm": 0.4840012373641688,
|
|
"learning_rate": 3.970841973186547e-05,
|
|
"loss": 0.3097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1612352430820465,
|
|
"step": 625,
|
|
"valid_targets_mean": 7705.2,
|
|
"valid_targets_min": 1631
|
|
},
|
|
{
|
|
"epoch": 1.0535117056856187,
|
|
"grad_norm": 0.4843851784862402,
|
|
"learning_rate": 3.969405966274288e-05,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15847352147102356,
|
|
"step": 630,
|
|
"valid_targets_mean": 6745.0,
|
|
"valid_targets_min": 1171
|
|
},
|
|
{
|
|
"epoch": 1.0618729096989967,
|
|
"grad_norm": 0.45047639305532894,
|
|
"learning_rate": 3.967935715468851e-05,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16200175881385803,
|
|
"step": 635,
|
|
"valid_targets_mean": 10218.6,
|
|
"valid_targets_min": 309
|
|
},
|
|
{
|
|
"epoch": 1.0702341137123745,
|
|
"grad_norm": 0.44012913114074015,
|
|
"learning_rate": 3.966431246334853e-05,
|
|
"loss": 0.275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14589586853981018,
|
|
"step": 640,
|
|
"valid_targets_mean": 10005.0,
|
|
"valid_targets_min": 3061
|
|
},
|
|
{
|
|
"epoch": 1.0785953177257526,
|
|
"grad_norm": 0.3967284969495125,
|
|
"learning_rate": 3.9648925850318975e-05,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12235724925994873,
|
|
"step": 645,
|
|
"valid_targets_mean": 10540.2,
|
|
"valid_targets_min": 1249
|
|
},
|
|
{
|
|
"epoch": 1.0869565217391304,
|
|
"grad_norm": 0.31534258965031803,
|
|
"learning_rate": 3.963319758314121e-05,
|
|
"loss": 0.2941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1321730613708496,
|
|
"step": 650,
|
|
"valid_targets_mean": 13079.9,
|
|
"valid_targets_min": 1798
|
|
},
|
|
{
|
|
"epoch": 1.0953177257525084,
|
|
"grad_norm": 0.42610079698910897,
|
|
"learning_rate": 3.961712793529724e-05,
|
|
"loss": 0.3132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1453770101070404,
|
|
"step": 655,
|
|
"valid_targets_mean": 7605.5,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 1.1036789297658862,
|
|
"grad_norm": 0.41100072854912634,
|
|
"learning_rate": 3.9600717186204976e-05,
|
|
"loss": 0.267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12499198317527771,
|
|
"step": 660,
|
|
"valid_targets_mean": 9410.8,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 1.1120401337792643,
|
|
"grad_norm": 0.3833564620485981,
|
|
"learning_rate": 3.958396562121337e-05,
|
|
"loss": 0.2775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13141199946403503,
|
|
"step": 665,
|
|
"valid_targets_mean": 11358.4,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 1.120401337792642,
|
|
"grad_norm": 0.5254446468499826,
|
|
"learning_rate": 3.956687353159747e-05,
|
|
"loss": 0.2999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21214735507965088,
|
|
"step": 670,
|
|
"valid_targets_mean": 10213.1,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 1.12876254180602,
|
|
"grad_norm": 0.39511181151361435,
|
|
"learning_rate": 3.954944121455334e-05,
|
|
"loss": 0.291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1569204479455948,
|
|
"step": 675,
|
|
"valid_targets_mean": 9108.5,
|
|
"valid_targets_min": 2107
|
|
},
|
|
{
|
|
"epoch": 1.137123745819398,
|
|
"grad_norm": 0.47810267746133095,
|
|
"learning_rate": 3.953166897319287e-05,
|
|
"loss": 0.276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13496772944927216,
|
|
"step": 680,
|
|
"valid_targets_mean": 10577.2,
|
|
"valid_targets_min": 3087
|
|
},
|
|
{
|
|
"epoch": 1.145484949832776,
|
|
"grad_norm": 0.38523905961021543,
|
|
"learning_rate": 3.9513557116538564e-05,
|
|
"loss": 0.2856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16221073269844055,
|
|
"step": 685,
|
|
"valid_targets_mean": 11697.5,
|
|
"valid_targets_min": 2880
|
|
},
|
|
{
|
|
"epoch": 1.1538461538461537,
|
|
"grad_norm": 0.5419139611483641,
|
|
"learning_rate": 3.949510595951812e-05,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15650245547294617,
|
|
"step": 690,
|
|
"valid_targets_mean": 12005.0,
|
|
"valid_targets_min": 1990
|
|
},
|
|
{
|
|
"epoch": 1.1622073578595318,
|
|
"grad_norm": 0.47950919059669495,
|
|
"learning_rate": 3.947631582295896e-05,
|
|
"loss": 0.3074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13776713609695435,
|
|
"step": 695,
|
|
"valid_targets_mean": 8358.9,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 1.1705685618729098,
|
|
"grad_norm": 0.48746344923547136,
|
|
"learning_rate": 3.945718703358266e-05,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12062118947505951,
|
|
"step": 700,
|
|
"valid_targets_mean": 6302.5,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 1.1789297658862876,
|
|
"grad_norm": 0.4295932021023469,
|
|
"learning_rate": 3.9437719923999276e-05,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14405778050422668,
|
|
"step": 705,
|
|
"valid_targets_mean": 11508.1,
|
|
"valid_targets_min": 344
|
|
},
|
|
{
|
|
"epoch": 1.1872909698996654,
|
|
"grad_norm": 0.4038202598671233,
|
|
"learning_rate": 3.9417914832701555e-05,
|
|
"loss": 0.2783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12962910532951355,
|
|
"step": 710,
|
|
"valid_targets_mean": 9681.9,
|
|
"valid_targets_min": 3017
|
|
},
|
|
{
|
|
"epoch": 1.1956521739130435,
|
|
"grad_norm": 0.4821408506929246,
|
|
"learning_rate": 3.939777210405903e-05,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.138853058218956,
|
|
"step": 715,
|
|
"valid_targets_mean": 7581.8,
|
|
"valid_targets_min": 355
|
|
},
|
|
{
|
|
"epoch": 1.2040133779264215,
|
|
"grad_norm": 0.4100971062420982,
|
|
"learning_rate": 3.937729208831206e-05,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19093003869056702,
|
|
"step": 720,
|
|
"valid_targets_mean": 12959.6,
|
|
"valid_targets_min": 6610
|
|
},
|
|
{
|
|
"epoch": 1.2123745819397993,
|
|
"grad_norm": 0.4253978458805654,
|
|
"learning_rate": 3.935647514156573e-05,
|
|
"loss": 0.2739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1463736891746521,
|
|
"step": 725,
|
|
"valid_targets_mean": 11251.2,
|
|
"valid_targets_min": 3928
|
|
},
|
|
{
|
|
"epoch": 1.2207357859531773,
|
|
"grad_norm": 0.6859725905438455,
|
|
"learning_rate": 3.933532162578365e-05,
|
|
"loss": 0.2717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1098906397819519,
|
|
"step": 730,
|
|
"valid_targets_mean": 7433.1,
|
|
"valid_targets_min": 4140
|
|
},
|
|
{
|
|
"epoch": 1.2290969899665551,
|
|
"grad_norm": 0.4063242072290706,
|
|
"learning_rate": 3.9313831908781664e-05,
|
|
"loss": 0.2688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13985112309455872,
|
|
"step": 735,
|
|
"valid_targets_mean": 7449.5,
|
|
"valid_targets_min": 2658
|
|
},
|
|
{
|
|
"epoch": 1.2374581939799332,
|
|
"grad_norm": 0.4360468158157744,
|
|
"learning_rate": 3.929200636422148e-05,
|
|
"loss": 0.2888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10015248507261276,
|
|
"step": 740,
|
|
"valid_targets_mean": 5890.0,
|
|
"valid_targets_min": 357
|
|
},
|
|
{
|
|
"epoch": 1.245819397993311,
|
|
"grad_norm": 0.37708467045631155,
|
|
"learning_rate": 3.926984537160414e-05,
|
|
"loss": 0.2575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12702609598636627,
|
|
"step": 745,
|
|
"valid_targets_mean": 9021.0,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 1.254180602006689,
|
|
"grad_norm": 0.4706380701818226,
|
|
"learning_rate": 3.9247349316263434e-05,
|
|
"loss": 0.2758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14217805862426758,
|
|
"step": 750,
|
|
"valid_targets_mean": 8083.9,
|
|
"valid_targets_min": 311
|
|
},
|
|
{
|
|
"epoch": 1.2625418060200668,
|
|
"grad_norm": 0.5643557520818183,
|
|
"learning_rate": 3.9224518589359195e-05,
|
|
"loss": 0.2815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17439980804920197,
|
|
"step": 755,
|
|
"valid_targets_mean": 10298.6,
|
|
"valid_targets_min": 3395
|
|
},
|
|
{
|
|
"epoch": 1.2709030100334449,
|
|
"grad_norm": 0.8111418487328552,
|
|
"learning_rate": 3.9201353587870497e-05,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11481772363185883,
|
|
"step": 760,
|
|
"valid_targets_mean": 9276.4,
|
|
"valid_targets_min": 2860
|
|
},
|
|
{
|
|
"epoch": 1.2792642140468227,
|
|
"grad_norm": 0.5743635328925323,
|
|
"learning_rate": 3.917785471458878e-05,
|
|
"loss": 0.2612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11590395122766495,
|
|
"step": 765,
|
|
"valid_targets_mean": 8298.4,
|
|
"valid_targets_min": 2443
|
|
},
|
|
{
|
|
"epoch": 1.2876254180602007,
|
|
"grad_norm": 0.4892943658362636,
|
|
"learning_rate": 3.91540223781108e-05,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.134665384888649,
|
|
"step": 770,
|
|
"valid_targets_mean": 8011.5,
|
|
"valid_targets_min": 1553
|
|
},
|
|
{
|
|
"epoch": 1.2959866220735785,
|
|
"grad_norm": 0.38872399023412524,
|
|
"learning_rate": 3.912985699283156e-05,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13240590691566467,
|
|
"step": 775,
|
|
"valid_targets_mean": 9582.4,
|
|
"valid_targets_min": 1194
|
|
},
|
|
{
|
|
"epoch": 1.3043478260869565,
|
|
"grad_norm": 0.5244459925511248,
|
|
"learning_rate": 3.910535897893709e-05,
|
|
"loss": 0.2855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15077753365039825,
|
|
"step": 780,
|
|
"valid_targets_mean": 5301.8,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 1.3127090301003346,
|
|
"grad_norm": 0.5045201463639583,
|
|
"learning_rate": 3.908052876239715e-05,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17268146574497223,
|
|
"step": 785,
|
|
"valid_targets_mean": 10566.1,
|
|
"valid_targets_min": 416
|
|
},
|
|
{
|
|
"epoch": 1.3210702341137124,
|
|
"grad_norm": 0.4022911020858761,
|
|
"learning_rate": 3.905536677495778e-05,
|
|
"loss": 0.2751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13626717031002045,
|
|
"step": 790,
|
|
"valid_targets_mean": 9332.4,
|
|
"valid_targets_min": 3302
|
|
},
|
|
{
|
|
"epoch": 1.3294314381270902,
|
|
"grad_norm": 0.632226616981315,
|
|
"learning_rate": 3.9029873454133886e-05,
|
|
"loss": 0.2603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19885721802711487,
|
|
"step": 795,
|
|
"valid_targets_mean": 9566.8,
|
|
"valid_targets_min": 2826
|
|
},
|
|
{
|
|
"epoch": 1.3377926421404682,
|
|
"grad_norm": 0.703008866608872,
|
|
"learning_rate": 3.900404924320153e-05,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11076630651950836,
|
|
"step": 800,
|
|
"valid_targets_mean": 9555.5,
|
|
"valid_targets_min": 3385
|
|
},
|
|
{
|
|
"epoch": 1.3461538461538463,
|
|
"grad_norm": 0.4056344527044363,
|
|
"learning_rate": 3.89778945911903e-05,
|
|
"loss": 0.2745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14071762561798096,
|
|
"step": 805,
|
|
"valid_targets_mean": 9279.8,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 1.354515050167224,
|
|
"grad_norm": 0.3830568525311727,
|
|
"learning_rate": 3.8951409952875446e-05,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10976162552833557,
|
|
"step": 810,
|
|
"valid_targets_mean": 9123.6,
|
|
"valid_targets_min": 340
|
|
},
|
|
{
|
|
"epoch": 1.3628762541806019,
|
|
"grad_norm": 0.39027219464950086,
|
|
"learning_rate": 3.8924595788770006e-05,
|
|
"loss": 0.2837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1033591628074646,
|
|
"step": 815,
|
|
"valid_targets_mean": 7517.6,
|
|
"valid_targets_min": 1296
|
|
},
|
|
{
|
|
"epoch": 1.37123745819398,
|
|
"grad_norm": 0.40172101087714385,
|
|
"learning_rate": 3.8897452565116794e-05,
|
|
"loss": 0.2904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1491539031267166,
|
|
"step": 820,
|
|
"valid_targets_mean": 12076.4,
|
|
"valid_targets_min": 6234
|
|
},
|
|
{
|
|
"epoch": 1.379598662207358,
|
|
"grad_norm": 0.41062989124772176,
|
|
"learning_rate": 3.8869980753880286e-05,
|
|
"loss": 0.2706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11813704669475555,
|
|
"step": 825,
|
|
"valid_targets_mean": 10399.2,
|
|
"valid_targets_min": 5064
|
|
},
|
|
{
|
|
"epoch": 1.3879598662207357,
|
|
"grad_norm": 0.43168087630357316,
|
|
"learning_rate": 3.88421808327384e-05,
|
|
"loss": 0.2772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13514377176761627,
|
|
"step": 830,
|
|
"valid_targets_mean": 8658.8,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 1.3963210702341138,
|
|
"grad_norm": 0.38146712301820296,
|
|
"learning_rate": 3.881405328507424e-05,
|
|
"loss": 0.2601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14654771983623505,
|
|
"step": 835,
|
|
"valid_targets_mean": 12017.9,
|
|
"valid_targets_min": 4305
|
|
},
|
|
{
|
|
"epoch": 1.4046822742474916,
|
|
"grad_norm": 0.5167086005795993,
|
|
"learning_rate": 3.8785598599967624e-05,
|
|
"loss": 0.2816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13879069685935974,
|
|
"step": 840,
|
|
"valid_targets_mean": 11071.5,
|
|
"valid_targets_min": 5267
|
|
},
|
|
{
|
|
"epoch": 1.4130434782608696,
|
|
"grad_norm": 0.40612416400140194,
|
|
"learning_rate": 3.875681727218663e-05,
|
|
"loss": 0.2639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15729768574237823,
|
|
"step": 845,
|
|
"valid_targets_mean": 11396.8,
|
|
"valid_targets_min": 1540
|
|
},
|
|
{
|
|
"epoch": 1.4214046822742474,
|
|
"grad_norm": 0.37165516978156,
|
|
"learning_rate": 3.872770980217897e-05,
|
|
"loss": 0.2733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11588621139526367,
|
|
"step": 850,
|
|
"valid_targets_mean": 9441.1,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 1.4297658862876255,
|
|
"grad_norm": 0.389802076703296,
|
|
"learning_rate": 3.869827669606331e-05,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13532260060310364,
|
|
"step": 855,
|
|
"valid_targets_mean": 10265.9,
|
|
"valid_targets_min": 2898
|
|
},
|
|
{
|
|
"epoch": 1.4381270903010033,
|
|
"grad_norm": 0.36528797601478735,
|
|
"learning_rate": 3.8668518465620415e-05,
|
|
"loss": 0.2593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.129550963640213,
|
|
"step": 860,
|
|
"valid_targets_mean": 12669.5,
|
|
"valid_targets_min": 3633
|
|
},
|
|
{
|
|
"epoch": 1.4464882943143813,
|
|
"grad_norm": 0.41610869252646643,
|
|
"learning_rate": 3.863843562828434e-05,
|
|
"loss": 0.2682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1280188411474228,
|
|
"step": 865,
|
|
"valid_targets_mean": 9398.0,
|
|
"valid_targets_min": 353
|
|
},
|
|
{
|
|
"epoch": 1.4548494983277591,
|
|
"grad_norm": 0.4521863599653053,
|
|
"learning_rate": 3.860802870713334e-05,
|
|
"loss": 0.2697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12920519709587097,
|
|
"step": 870,
|
|
"valid_targets_mean": 7544.9,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 1.4632107023411371,
|
|
"grad_norm": 0.40131971089720997,
|
|
"learning_rate": 3.8577298230880834e-05,
|
|
"loss": 0.2803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12073080986738205,
|
|
"step": 875,
|
|
"valid_targets_mean": 5980.9,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 1.471571906354515,
|
|
"grad_norm": 0.4535449990592638,
|
|
"learning_rate": 3.8546244733866195e-05,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14370909333229065,
|
|
"step": 880,
|
|
"valid_targets_mean": 10963.9,
|
|
"valid_targets_min": 4088
|
|
},
|
|
{
|
|
"epoch": 1.479933110367893,
|
|
"grad_norm": 0.3751164768492792,
|
|
"learning_rate": 3.851486875604546e-05,
|
|
"loss": 0.2735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10713209956884384,
|
|
"step": 885,
|
|
"valid_targets_mean": 9306.5,
|
|
"valid_targets_min": 809
|
|
},
|
|
{
|
|
"epoch": 1.488294314381271,
|
|
"grad_norm": 0.4694248784315001,
|
|
"learning_rate": 3.848317084298194e-05,
|
|
"loss": 0.2684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13058897852897644,
|
|
"step": 890,
|
|
"valid_targets_mean": 6703.0,
|
|
"valid_targets_min": 350
|
|
},
|
|
{
|
|
"epoch": 1.4966555183946488,
|
|
"grad_norm": 0.3894296203352359,
|
|
"learning_rate": 3.845115154583672e-05,
|
|
"loss": 0.2802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1557808220386505,
|
|
"step": 895,
|
|
"valid_targets_mean": 10680.4,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 1.5050167224080266,
|
|
"grad_norm": 0.356986656311927,
|
|
"learning_rate": 3.84188114213591e-05,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11349885165691376,
|
|
"step": 900,
|
|
"valid_targets_mean": 8614.0,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 1.5133779264214047,
|
|
"grad_norm": 0.3589900642699686,
|
|
"learning_rate": 3.838615103187692e-05,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13691379129886627,
|
|
"step": 905,
|
|
"valid_targets_mean": 12746.0,
|
|
"valid_targets_min": 1901
|
|
},
|
|
{
|
|
"epoch": 1.5217391304347827,
|
|
"grad_norm": 0.407791277242483,
|
|
"learning_rate": 3.8353170945286725e-05,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1411113440990448,
|
|
"step": 910,
|
|
"valid_targets_mean": 9456.8,
|
|
"valid_targets_min": 871
|
|
},
|
|
{
|
|
"epoch": 1.5301003344481605,
|
|
"grad_norm": 0.3988577049633315,
|
|
"learning_rate": 3.831987173504397e-05,
|
|
"loss": 0.2806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12818259000778198,
|
|
"step": 915,
|
|
"valid_targets_mean": 12068.9,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 1.5384615384615383,
|
|
"grad_norm": 0.3848553659787436,
|
|
"learning_rate": 3.8286253980152975e-05,
|
|
"loss": 0.2714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12293549627065659,
|
|
"step": 920,
|
|
"valid_targets_mean": 9655.9,
|
|
"valid_targets_min": 317
|
|
},
|
|
{
|
|
"epoch": 1.5468227424749164,
|
|
"grad_norm": 0.4166364461075105,
|
|
"learning_rate": 3.8252318265156924e-05,
|
|
"loss": 0.2741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18103176355361938,
|
|
"step": 925,
|
|
"valid_targets_mean": 10852.2,
|
|
"valid_targets_min": 2714
|
|
},
|
|
{
|
|
"epoch": 1.5551839464882944,
|
|
"grad_norm": 0.5351766505618932,
|
|
"learning_rate": 3.821806518012766e-05,
|
|
"loss": 0.2899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1146191880106926,
|
|
"step": 930,
|
|
"valid_targets_mean": 5178.4,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 1.5635451505016722,
|
|
"grad_norm": 0.47627537755381066,
|
|
"learning_rate": 3.818349532065542e-05,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.139126718044281,
|
|
"step": 935,
|
|
"valid_targets_mean": 8949.8,
|
|
"valid_targets_min": 1665
|
|
},
|
|
{
|
|
"epoch": 1.57190635451505,
|
|
"grad_norm": 0.49331046401067635,
|
|
"learning_rate": 3.81486092878385e-05,
|
|
"loss": 0.2786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13329848647117615,
|
|
"step": 940,
|
|
"valid_targets_mean": 9879.0,
|
|
"valid_targets_min": 2703
|
|
},
|
|
{
|
|
"epoch": 1.580267558528428,
|
|
"grad_norm": 0.4608728338817722,
|
|
"learning_rate": 3.811340768827281e-05,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12158765643835068,
|
|
"step": 945,
|
|
"valid_targets_mean": 7022.4,
|
|
"valid_targets_min": 395
|
|
},
|
|
{
|
|
"epoch": 1.588628762541806,
|
|
"grad_norm": 0.48930082226976235,
|
|
"learning_rate": 3.807789113404129e-05,
|
|
"loss": 0.2806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13771185278892517,
|
|
"step": 950,
|
|
"valid_targets_mean": 6990.6,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 1.596989966555184,
|
|
"grad_norm": 0.48428585153424064,
|
|
"learning_rate": 3.8042060242703295e-05,
|
|
"loss": 0.267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11612194776535034,
|
|
"step": 955,
|
|
"valid_targets_mean": 5316.8,
|
|
"valid_targets_min": 376
|
|
},
|
|
{
|
|
"epoch": 1.605351170568562,
|
|
"grad_norm": 0.4194791308271857,
|
|
"learning_rate": 3.800591563728388e-05,
|
|
"loss": 0.2545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1332085281610489,
|
|
"step": 960,
|
|
"valid_targets_mean": 10749.2,
|
|
"valid_targets_min": 1047
|
|
},
|
|
{
|
|
"epoch": 1.6137123745819397,
|
|
"grad_norm": 0.5181410839775149,
|
|
"learning_rate": 3.79694579462629e-05,
|
|
"loss": 0.2606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1287267953157425,
|
|
"step": 965,
|
|
"valid_targets_mean": 10934.1,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 1.6220735785953178,
|
|
"grad_norm": 0.3807128294455452,
|
|
"learning_rate": 3.793268780356414e-05,
|
|
"loss": 0.2585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1525614857673645,
|
|
"step": 970,
|
|
"valid_targets_mean": 9240.5,
|
|
"valid_targets_min": 1250
|
|
},
|
|
{
|
|
"epoch": 1.6304347826086958,
|
|
"grad_norm": 0.5103087586515478,
|
|
"learning_rate": 3.789560584854426e-05,
|
|
"loss": 0.2642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10928568243980408,
|
|
"step": 975,
|
|
"valid_targets_mean": 7974.0,
|
|
"valid_targets_min": 527
|
|
},
|
|
{
|
|
"epoch": 1.6387959866220736,
|
|
"grad_norm": 0.4006508423850524,
|
|
"learning_rate": 3.7858212725981716e-05,
|
|
"loss": 0.2684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13162365555763245,
|
|
"step": 980,
|
|
"valid_targets_mean": 10448.4,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 1.6471571906354514,
|
|
"grad_norm": 0.4714300894271912,
|
|
"learning_rate": 3.782050908606549e-05,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12257863581180573,
|
|
"step": 985,
|
|
"valid_targets_mean": 7058.1,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 1.6555183946488294,
|
|
"grad_norm": 0.349182513386174,
|
|
"learning_rate": 3.778249558438385e-05,
|
|
"loss": 0.2552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14602884650230408,
|
|
"step": 990,
|
|
"valid_targets_mean": 11488.8,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 1.6638795986622075,
|
|
"grad_norm": 0.482942446506444,
|
|
"learning_rate": 3.77441728819129e-05,
|
|
"loss": 0.2808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17961736023426056,
|
|
"step": 995,
|
|
"valid_targets_mean": 8656.1,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 1.6722408026755853,
|
|
"grad_norm": 0.39956066151386244,
|
|
"learning_rate": 3.77055416450051e-05,
|
|
"loss": 0.2816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1391647905111313,
|
|
"step": 1000,
|
|
"valid_targets_mean": 8267.4,
|
|
"valid_targets_min": 2887
|
|
},
|
|
{
|
|
"epoch": 1.680602006688963,
|
|
"grad_norm": 0.36909587810677325,
|
|
"learning_rate": 3.7666602545377716e-05,
|
|
"loss": 0.2661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10799924284219742,
|
|
"step": 1005,
|
|
"valid_targets_mean": 9897.4,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 1.6889632107023411,
|
|
"grad_norm": 0.3494039020482254,
|
|
"learning_rate": 3.762735626010106e-05,
|
|
"loss": 0.2669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13580293953418732,
|
|
"step": 1010,
|
|
"valid_targets_mean": 11577.5,
|
|
"valid_targets_min": 2594
|
|
},
|
|
{
|
|
"epoch": 1.6973244147157192,
|
|
"grad_norm": 0.5211649750499263,
|
|
"learning_rate": 3.758780347158683e-05,
|
|
"loss": 0.2615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16248250007629395,
|
|
"step": 1015,
|
|
"valid_targets_mean": 11016.0,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 1.705685618729097,
|
|
"grad_norm": 0.38802663500649154,
|
|
"learning_rate": 3.754794486757611e-05,
|
|
"loss": 0.2553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11914372444152832,
|
|
"step": 1020,
|
|
"valid_targets_mean": 8271.5,
|
|
"valid_targets_min": 2802
|
|
},
|
|
{
|
|
"epoch": 1.7140468227424748,
|
|
"grad_norm": 0.3964455481868351,
|
|
"learning_rate": 3.750778114112755e-05,
|
|
"loss": 0.2709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15059375762939453,
|
|
"step": 1025,
|
|
"valid_targets_mean": 11345.6,
|
|
"valid_targets_min": 373
|
|
},
|
|
{
|
|
"epoch": 1.7224080267558528,
|
|
"grad_norm": 0.43773870811313176,
|
|
"learning_rate": 3.7467312990605193e-05,
|
|
"loss": 0.259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12576225399971008,
|
|
"step": 1030,
|
|
"valid_targets_mean": 10739.8,
|
|
"valid_targets_min": 2098
|
|
},
|
|
{
|
|
"epoch": 1.7307692307692308,
|
|
"grad_norm": 0.5574558796152358,
|
|
"learning_rate": 3.742654111966641e-05,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13974109292030334,
|
|
"step": 1035,
|
|
"valid_targets_mean": 10682.1,
|
|
"valid_targets_min": 5738
|
|
},
|
|
{
|
|
"epoch": 1.7391304347826086,
|
|
"grad_norm": 0.411042250821021,
|
|
"learning_rate": 3.738546623724966e-05,
|
|
"loss": 0.2489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15702217817306519,
|
|
"step": 1040,
|
|
"valid_targets_mean": 9560.5,
|
|
"valid_targets_min": 2700
|
|
},
|
|
{
|
|
"epoch": 1.7474916387959865,
|
|
"grad_norm": 0.39857549486678273,
|
|
"learning_rate": 3.734408905756211e-05,
|
|
"loss": 0.2546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1282406598329544,
|
|
"step": 1045,
|
|
"valid_targets_mean": 9344.1,
|
|
"valid_targets_min": 932
|
|
},
|
|
{
|
|
"epoch": 1.7558528428093645,
|
|
"grad_norm": 0.4083501215862186,
|
|
"learning_rate": 3.7302410300067274e-05,
|
|
"loss": 0.2537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1146625503897667,
|
|
"step": 1050,
|
|
"valid_targets_mean": 11630.4,
|
|
"valid_targets_min": 1750
|
|
},
|
|
{
|
|
"epoch": 1.7642140468227425,
|
|
"grad_norm": 0.3297959454998514,
|
|
"learning_rate": 3.726043068947246e-05,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12236960232257843,
|
|
"step": 1055,
|
|
"valid_targets_mean": 11533.9,
|
|
"valid_targets_min": 1966
|
|
},
|
|
{
|
|
"epoch": 1.7725752508361206,
|
|
"grad_norm": 0.3525885607201756,
|
|
"learning_rate": 3.721815095571622e-05,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16883739829063416,
|
|
"step": 1060,
|
|
"valid_targets_mean": 12842.2,
|
|
"valid_targets_min": 423
|
|
},
|
|
{
|
|
"epoch": 1.7809364548494984,
|
|
"grad_norm": 0.5187748258599745,
|
|
"learning_rate": 3.717557183395558e-05,
|
|
"loss": 0.2693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10777710378170013,
|
|
"step": 1065,
|
|
"valid_targets_mean": 5056.2,
|
|
"valid_targets_min": 297
|
|
},
|
|
{
|
|
"epoch": 1.7892976588628762,
|
|
"grad_norm": 0.33236166623898217,
|
|
"learning_rate": 3.713269406455336e-05,
|
|
"loss": 0.2533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11712735891342163,
|
|
"step": 1070,
|
|
"valid_targets_mean": 11309.9,
|
|
"valid_targets_min": 436
|
|
},
|
|
{
|
|
"epoch": 1.7976588628762542,
|
|
"grad_norm": 0.46941515343970064,
|
|
"learning_rate": 3.708951839306519e-05,
|
|
"loss": 0.2644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12613900005817413,
|
|
"step": 1075,
|
|
"valid_targets_mean": 7798.6,
|
|
"valid_targets_min": 359
|
|
},
|
|
{
|
|
"epoch": 1.8060200668896322,
|
|
"grad_norm": 0.3848310181432469,
|
|
"learning_rate": 3.704604557022664e-05,
|
|
"loss": 0.2792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17903822660446167,
|
|
"step": 1080,
|
|
"valid_targets_mean": 13289.0,
|
|
"valid_targets_min": 9764
|
|
},
|
|
{
|
|
"epoch": 1.81438127090301,
|
|
"grad_norm": 0.48069613483170576,
|
|
"learning_rate": 3.7002276351940113e-05,
|
|
"loss": 0.2851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13812394440174103,
|
|
"step": 1085,
|
|
"valid_targets_mean": 7100.6,
|
|
"valid_targets_min": 343
|
|
},
|
|
{
|
|
"epoch": 1.8227424749163879,
|
|
"grad_norm": 0.44109380242180307,
|
|
"learning_rate": 3.695821149926171e-05,
|
|
"loss": 0.2523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1374289095401764,
|
|
"step": 1090,
|
|
"valid_targets_mean": 10994.6,
|
|
"valid_targets_min": 387
|
|
},
|
|
{
|
|
"epoch": 1.8311036789297659,
|
|
"grad_norm": 0.4348902119707691,
|
|
"learning_rate": 3.6913851778388006e-05,
|
|
"loss": 0.256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14676082134246826,
|
|
"step": 1095,
|
|
"valid_targets_mean": 7295.6,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 1.839464882943144,
|
|
"grad_norm": 0.35534490507352234,
|
|
"learning_rate": 3.686919796064272e-05,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1070433259010315,
|
|
"step": 1100,
|
|
"valid_targets_mean": 9732.0,
|
|
"valid_targets_min": 2839
|
|
},
|
|
{
|
|
"epoch": 1.8478260869565217,
|
|
"grad_norm": 0.5009418006788257,
|
|
"learning_rate": 3.682425082246332e-05,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16170530021190643,
|
|
"step": 1105,
|
|
"valid_targets_mean": 8118.5,
|
|
"valid_targets_min": 1787
|
|
},
|
|
{
|
|
"epoch": 1.8561872909698995,
|
|
"grad_norm": 0.4217777133160996,
|
|
"learning_rate": 3.6779011145387505e-05,
|
|
"loss": 0.2614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1677548587322235,
|
|
"step": 1110,
|
|
"valid_targets_mean": 9530.8,
|
|
"valid_targets_min": 383
|
|
},
|
|
{
|
|
"epoch": 1.8645484949832776,
|
|
"grad_norm": 0.8222453034613255,
|
|
"learning_rate": 3.6733479716039606e-05,
|
|
"loss": 0.2643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12629453837871552,
|
|
"step": 1115,
|
|
"valid_targets_mean": 4151.4,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 1.8729096989966556,
|
|
"grad_norm": 0.445036909496868,
|
|
"learning_rate": 3.668765732611693e-05,
|
|
"loss": 0.2462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13955241441726685,
|
|
"step": 1120,
|
|
"valid_targets_mean": 8832.5,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 1.8812709030100334,
|
|
"grad_norm": 0.40139726769082185,
|
|
"learning_rate": 3.6641544772376e-05,
|
|
"loss": 0.2487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11238652467727661,
|
|
"step": 1125,
|
|
"valid_targets_mean": 8379.1,
|
|
"valid_targets_min": 326
|
|
},
|
|
{
|
|
"epoch": 1.8896321070234112,
|
|
"grad_norm": 0.43656378571085613,
|
|
"learning_rate": 3.6595142856618656e-05,
|
|
"loss": 0.2617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13526199758052826,
|
|
"step": 1130,
|
|
"valid_targets_mean": 7998.4,
|
|
"valid_targets_min": 1280
|
|
},
|
|
{
|
|
"epoch": 1.8979933110367893,
|
|
"grad_norm": 0.43064278739239414,
|
|
"learning_rate": 3.654845238567818e-05,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09807489067316055,
|
|
"step": 1135,
|
|
"valid_targets_mean": 6772.1,
|
|
"valid_targets_min": 396
|
|
},
|
|
{
|
|
"epoch": 1.9063545150501673,
|
|
"grad_norm": 0.41572601882475946,
|
|
"learning_rate": 3.650147417140521e-05,
|
|
"loss": 0.2502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09826701879501343,
|
|
"step": 1140,
|
|
"valid_targets_mean": 7714.9,
|
|
"valid_targets_min": 1137
|
|
},
|
|
{
|
|
"epoch": 1.914715719063545,
|
|
"grad_norm": 0.43210191067993636,
|
|
"learning_rate": 3.645420903065365e-05,
|
|
"loss": 0.2644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1295890212059021,
|
|
"step": 1145,
|
|
"valid_targets_mean": 8090.1,
|
|
"valid_targets_min": 427
|
|
},
|
|
{
|
|
"epoch": 1.9230769230769231,
|
|
"grad_norm": 0.3505036128473746,
|
|
"learning_rate": 3.640665778526645e-05,
|
|
"loss": 0.2592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17909875512123108,
|
|
"step": 1150,
|
|
"valid_targets_mean": 14454.8,
|
|
"valid_targets_min": 10078
|
|
},
|
|
{
|
|
"epoch": 1.931438127090301,
|
|
"grad_norm": 0.379696221330355,
|
|
"learning_rate": 3.635882126206136e-05,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12140147387981415,
|
|
"step": 1155,
|
|
"valid_targets_mean": 10601.0,
|
|
"valid_targets_min": 1466
|
|
},
|
|
{
|
|
"epoch": 1.939799331103679,
|
|
"grad_norm": 0.3839165393691012,
|
|
"learning_rate": 3.6310700292816476e-05,
|
|
"loss": 0.2823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13042044639587402,
|
|
"step": 1160,
|
|
"valid_targets_mean": 11957.0,
|
|
"valid_targets_min": 3358
|
|
},
|
|
{
|
|
"epoch": 1.948160535117057,
|
|
"grad_norm": 0.43571232911035057,
|
|
"learning_rate": 3.6262295714255864e-05,
|
|
"loss": 0.2631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1376996487379074,
|
|
"step": 1165,
|
|
"valid_targets_mean": 9888.2,
|
|
"valid_targets_min": 2696
|
|
},
|
|
{
|
|
"epoch": 1.9565217391304348,
|
|
"grad_norm": 0.4233212110176071,
|
|
"learning_rate": 3.6213608368034945e-05,
|
|
"loss": 0.2779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17015866935253143,
|
|
"step": 1170,
|
|
"valid_targets_mean": 10513.4,
|
|
"valid_targets_min": 4198
|
|
},
|
|
{
|
|
"epoch": 1.9648829431438126,
|
|
"grad_norm": 0.3233847334110875,
|
|
"learning_rate": 3.616463910072588e-05,
|
|
"loss": 0.2536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13996142148971558,
|
|
"step": 1175,
|
|
"valid_targets_mean": 12746.8,
|
|
"valid_targets_min": 2741
|
|
},
|
|
{
|
|
"epoch": 1.9732441471571907,
|
|
"grad_norm": 0.3965658838366059,
|
|
"learning_rate": 3.611538876380287e-05,
|
|
"loss": 0.2702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12838321924209595,
|
|
"step": 1180,
|
|
"valid_targets_mean": 11440.0,
|
|
"valid_targets_min": 5258
|
|
},
|
|
{
|
|
"epoch": 1.9816053511705687,
|
|
"grad_norm": 0.34048356445182115,
|
|
"learning_rate": 3.606585821362733e-05,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1261366307735443,
|
|
"step": 1185,
|
|
"valid_targets_mean": 12529.1,
|
|
"valid_targets_min": 5201
|
|
},
|
|
{
|
|
"epoch": 1.9899665551839465,
|
|
"grad_norm": 0.36023031466592254,
|
|
"learning_rate": 3.6016048311433e-05,
|
|
"loss": 0.271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1421847641468048,
|
|
"step": 1190,
|
|
"valid_targets_mean": 10592.2,
|
|
"valid_targets_min": 1649
|
|
},
|
|
{
|
|
"epoch": 1.9983277591973243,
|
|
"grad_norm": 0.36581878374039645,
|
|
"learning_rate": 3.596595992331098e-05,
|
|
"loss": 0.2745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1497029960155487,
|
|
"step": 1195,
|
|
"valid_targets_mean": 12548.8,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 2.0066889632107023,
|
|
"grad_norm": 0.4805137542455699,
|
|
"learning_rate": 3.5915593920194645e-05,
|
|
"loss": 0.2633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1390073597431183,
|
|
"step": 1200,
|
|
"valid_targets_mean": 9461.1,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 2.0150501672240804,
|
|
"grad_norm": 0.41067285845744156,
|
|
"learning_rate": 3.5864951177844544e-05,
|
|
"loss": 0.2421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1373160183429718,
|
|
"step": 1205,
|
|
"valid_targets_mean": 11978.9,
|
|
"valid_targets_min": 8043
|
|
},
|
|
{
|
|
"epoch": 2.0234113712374584,
|
|
"grad_norm": 0.3927805940360465,
|
|
"learning_rate": 3.5814032576833144e-05,
|
|
"loss": 0.2463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13885581493377686,
|
|
"step": 1210,
|
|
"valid_targets_mean": 12121.5,
|
|
"valid_targets_min": 5071
|
|
},
|
|
{
|
|
"epoch": 2.031772575250836,
|
|
"grad_norm": 0.5095744211920563,
|
|
"learning_rate": 3.576283900252951e-05,
|
|
"loss": 0.2579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12556342780590057,
|
|
"step": 1215,
|
|
"valid_targets_mean": 7156.0,
|
|
"valid_targets_min": 2178
|
|
},
|
|
{
|
|
"epoch": 2.040133779264214,
|
|
"grad_norm": 0.4642171859407589,
|
|
"learning_rate": 3.571137134508393e-05,
|
|
"loss": 0.2569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11935026943683624,
|
|
"step": 1220,
|
|
"valid_targets_mean": 7961.1,
|
|
"valid_targets_min": 326
|
|
},
|
|
{
|
|
"epoch": 2.048494983277592,
|
|
"grad_norm": 0.37064976288113766,
|
|
"learning_rate": 3.565963049941244e-05,
|
|
"loss": 0.2533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14762020111083984,
|
|
"step": 1225,
|
|
"valid_targets_mean": 11188.9,
|
|
"valid_targets_min": 1900
|
|
},
|
|
{
|
|
"epoch": 2.05685618729097,
|
|
"grad_norm": 0.6466201874067796,
|
|
"learning_rate": 3.560761736518123e-05,
|
|
"loss": 0.2516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16191944479942322,
|
|
"step": 1230,
|
|
"valid_targets_mean": 8810.9,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 2.0652173913043477,
|
|
"grad_norm": 0.40485200516877057,
|
|
"learning_rate": 3.5555332846791044e-05,
|
|
"loss": 0.2473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17403456568717957,
|
|
"step": 1235,
|
|
"valid_targets_mean": 13767.0,
|
|
"valid_targets_min": 4819
|
|
},
|
|
{
|
|
"epoch": 2.0735785953177257,
|
|
"grad_norm": 0.3770867133297513,
|
|
"learning_rate": 3.550277785336144e-05,
|
|
"loss": 0.2376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12024596333503723,
|
|
"step": 1240,
|
|
"valid_targets_mean": 13128.1,
|
|
"valid_targets_min": 2203
|
|
},
|
|
{
|
|
"epoch": 2.0819397993311037,
|
|
"grad_norm": 0.3489661234860803,
|
|
"learning_rate": 3.544995329871497e-05,
|
|
"loss": 0.2443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0948907732963562,
|
|
"step": 1245,
|
|
"valid_targets_mean": 10262.1,
|
|
"valid_targets_min": 1458
|
|
},
|
|
{
|
|
"epoch": 2.0903010033444818,
|
|
"grad_norm": 0.4555586726425082,
|
|
"learning_rate": 3.539686010136128e-05,
|
|
"loss": 0.2417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13215045630931854,
|
|
"step": 1250,
|
|
"valid_targets_mean": 11631.5,
|
|
"valid_targets_min": 3568
|
|
},
|
|
{
|
|
"epoch": 2.0986622073578594,
|
|
"grad_norm": 0.4522057769154765,
|
|
"learning_rate": 3.5343499184481176e-05,
|
|
"loss": 0.2388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12133727967739105,
|
|
"step": 1255,
|
|
"valid_targets_mean": 8204.2,
|
|
"valid_targets_min": 2839
|
|
},
|
|
{
|
|
"epoch": 2.1070234113712374,
|
|
"grad_norm": 0.3848238739011188,
|
|
"learning_rate": 3.528987147591056e-05,
|
|
"loss": 0.2583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12333213537931442,
|
|
"step": 1260,
|
|
"valid_targets_mean": 12413.8,
|
|
"valid_targets_min": 1656
|
|
},
|
|
{
|
|
"epoch": 2.1153846153846154,
|
|
"grad_norm": 0.39213641291733276,
|
|
"learning_rate": 3.523597790812426e-05,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10923528671264648,
|
|
"step": 1265,
|
|
"valid_targets_mean": 7545.9,
|
|
"valid_targets_min": 2279
|
|
},
|
|
{
|
|
"epoch": 2.1237458193979935,
|
|
"grad_norm": 0.3597133300436031,
|
|
"learning_rate": 3.5181819418219876e-05,
|
|
"loss": 0.2416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10679396986961365,
|
|
"step": 1270,
|
|
"valid_targets_mean": 9424.8,
|
|
"valid_targets_min": 2456
|
|
},
|
|
{
|
|
"epoch": 2.132107023411371,
|
|
"grad_norm": 0.36242500361780255,
|
|
"learning_rate": 3.512739694790143e-05,
|
|
"loss": 0.2347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16019397974014282,
|
|
"step": 1275,
|
|
"valid_targets_mean": 11775.4,
|
|
"valid_targets_min": 4525
|
|
},
|
|
{
|
|
"epoch": 2.140468227424749,
|
|
"grad_norm": 0.39853958086319385,
|
|
"learning_rate": 3.5072711443463046e-05,
|
|
"loss": 0.2403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12158852815628052,
|
|
"step": 1280,
|
|
"valid_targets_mean": 10731.2,
|
|
"valid_targets_min": 4240
|
|
},
|
|
{
|
|
"epoch": 2.148829431438127,
|
|
"grad_norm": 0.36145453281345763,
|
|
"learning_rate": 3.501776385577244e-05,
|
|
"loss": 0.2476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12097007781267166,
|
|
"step": 1285,
|
|
"valid_targets_mean": 9797.6,
|
|
"valid_targets_min": 3225
|
|
},
|
|
{
|
|
"epoch": 2.157190635451505,
|
|
"grad_norm": 0.4000620097375523,
|
|
"learning_rate": 3.4962555140254405e-05,
|
|
"loss": 0.243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11302991211414337,
|
|
"step": 1290,
|
|
"valid_targets_mean": 9020.4,
|
|
"valid_targets_min": 442
|
|
},
|
|
{
|
|
"epoch": 2.165551839464883,
|
|
"grad_norm": 0.37175090238590447,
|
|
"learning_rate": 3.4907086256874266e-05,
|
|
"loss": 0.2441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12309005111455917,
|
|
"step": 1295,
|
|
"valid_targets_mean": 13143.4,
|
|
"valid_targets_min": 8091
|
|
},
|
|
{
|
|
"epoch": 2.1739130434782608,
|
|
"grad_norm": 0.33107473164768875,
|
|
"learning_rate": 3.485135817012105e-05,
|
|
"loss": 0.2575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14390119910240173,
|
|
"step": 1300,
|
|
"valid_targets_mean": 13870.8,
|
|
"valid_targets_min": 2832
|
|
},
|
|
{
|
|
"epoch": 2.182274247491639,
|
|
"grad_norm": 0.582837436613109,
|
|
"learning_rate": 3.479537184899084e-05,
|
|
"loss": 0.2414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12507672607898712,
|
|
"step": 1305,
|
|
"valid_targets_mean": 9559.8,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 2.190635451505017,
|
|
"grad_norm": 0.43525574781632165,
|
|
"learning_rate": 3.473912826696989e-05,
|
|
"loss": 0.253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12049269676208496,
|
|
"step": 1310,
|
|
"valid_targets_mean": 11079.2,
|
|
"valid_targets_min": 3861
|
|
},
|
|
{
|
|
"epoch": 2.198996655518395,
|
|
"grad_norm": 0.4398418217860631,
|
|
"learning_rate": 3.468262840201765e-05,
|
|
"loss": 0.2397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10127022117376328,
|
|
"step": 1315,
|
|
"valid_targets_mean": 6868.5,
|
|
"valid_targets_min": 584
|
|
},
|
|
{
|
|
"epoch": 2.2073578595317724,
|
|
"grad_norm": 0.4187709086090712,
|
|
"learning_rate": 3.462587323654982e-05,
|
|
"loss": 0.2343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10398909449577332,
|
|
"step": 1320,
|
|
"valid_targets_mean": 8405.2,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 2.2157190635451505,
|
|
"grad_norm": 0.3731200453497962,
|
|
"learning_rate": 3.456886375742126e-05,
|
|
"loss": 0.2527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12388882040977478,
|
|
"step": 1325,
|
|
"valid_targets_mean": 11453.4,
|
|
"valid_targets_min": 2502
|
|
},
|
|
{
|
|
"epoch": 2.2240802675585285,
|
|
"grad_norm": 0.36488689362331017,
|
|
"learning_rate": 3.451160095590879e-05,
|
|
"loss": 0.2352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11108485609292984,
|
|
"step": 1330,
|
|
"valid_targets_mean": 10867.0,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 2.2324414715719065,
|
|
"grad_norm": 0.4169602000691548,
|
|
"learning_rate": 3.445408582769402e-05,
|
|
"loss": 0.2444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1160370409488678,
|
|
"step": 1335,
|
|
"valid_targets_mean": 10743.4,
|
|
"valid_targets_min": 5485
|
|
},
|
|
{
|
|
"epoch": 2.240802675585284,
|
|
"grad_norm": 0.3923471233039714,
|
|
"learning_rate": 3.4396319372845946e-05,
|
|
"loss": 0.2482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06723027676343918,
|
|
"step": 1340,
|
|
"valid_targets_mean": 7810.5,
|
|
"valid_targets_min": 382
|
|
},
|
|
{
|
|
"epoch": 2.249163879598662,
|
|
"grad_norm": 0.41264807143539883,
|
|
"learning_rate": 3.433830259580367e-05,
|
|
"loss": 0.2591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08843734860420227,
|
|
"step": 1345,
|
|
"valid_targets_mean": 7681.5,
|
|
"valid_targets_min": 948
|
|
},
|
|
{
|
|
"epoch": 2.25752508361204,
|
|
"grad_norm": 0.5048601676998985,
|
|
"learning_rate": 3.428003650535883e-05,
|
|
"loss": 0.2349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11657047271728516,
|
|
"step": 1350,
|
|
"valid_targets_mean": 9356.5,
|
|
"valid_targets_min": 3558
|
|
},
|
|
{
|
|
"epoch": 2.265886287625418,
|
|
"grad_norm": 0.39465803715639153,
|
|
"learning_rate": 3.4221522114638155e-05,
|
|
"loss": 0.2373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13059288263320923,
|
|
"step": 1355,
|
|
"valid_targets_mean": 10105.2,
|
|
"valid_targets_min": 1476
|
|
},
|
|
{
|
|
"epoch": 2.274247491638796,
|
|
"grad_norm": 0.4015742291650399,
|
|
"learning_rate": 3.4162760441085755e-05,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11941999942064285,
|
|
"step": 1360,
|
|
"valid_targets_mean": 7364.4,
|
|
"valid_targets_min": 275
|
|
},
|
|
{
|
|
"epoch": 2.282608695652174,
|
|
"grad_norm": 0.3972629034383108,
|
|
"learning_rate": 3.410375250644552e-05,
|
|
"loss": 0.2603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14698155224323273,
|
|
"step": 1365,
|
|
"valid_targets_mean": 13904.5,
|
|
"valid_targets_min": 5858
|
|
},
|
|
{
|
|
"epoch": 2.290969899665552,
|
|
"grad_norm": 0.46062323834907454,
|
|
"learning_rate": 3.4044499336743274e-05,
|
|
"loss": 0.2569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1461297571659088,
|
|
"step": 1370,
|
|
"valid_targets_mean": 11134.4,
|
|
"valid_targets_min": 319
|
|
},
|
|
{
|
|
"epoch": 2.29933110367893,
|
|
"grad_norm": 0.40210951792443517,
|
|
"learning_rate": 3.398500196226899e-05,
|
|
"loss": 0.2613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12976032495498657,
|
|
"step": 1375,
|
|
"valid_targets_mean": 9786.8,
|
|
"valid_targets_min": 4053
|
|
},
|
|
{
|
|
"epoch": 2.3076923076923075,
|
|
"grad_norm": 0.50328447706808,
|
|
"learning_rate": 3.3925261417558856e-05,
|
|
"loss": 0.2516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15925633907318115,
|
|
"step": 1380,
|
|
"valid_targets_mean": 7419.5,
|
|
"valid_targets_min": 347
|
|
},
|
|
{
|
|
"epoch": 2.3160535117056855,
|
|
"grad_norm": 0.3868366503745488,
|
|
"learning_rate": 3.3865278741377285e-05,
|
|
"loss": 0.2382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14687833189964294,
|
|
"step": 1385,
|
|
"valid_targets_mean": 10005.5,
|
|
"valid_targets_min": 630
|
|
},
|
|
{
|
|
"epoch": 2.3244147157190636,
|
|
"grad_norm": 0.4062605127021732,
|
|
"learning_rate": 3.3805054976698865e-05,
|
|
"loss": 0.2655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1293834149837494,
|
|
"step": 1390,
|
|
"valid_targets_mean": 9932.6,
|
|
"valid_targets_min": 3587
|
|
},
|
|
{
|
|
"epoch": 2.3327759197324416,
|
|
"grad_norm": 0.3876911288944701,
|
|
"learning_rate": 3.374459117069018e-05,
|
|
"loss": 0.2501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1149972602725029,
|
|
"step": 1395,
|
|
"valid_targets_mean": 9900.9,
|
|
"valid_targets_min": 4712
|
|
},
|
|
{
|
|
"epoch": 2.3411371237458196,
|
|
"grad_norm": 0.37889618626458854,
|
|
"learning_rate": 3.368388837469168e-05,
|
|
"loss": 0.2335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09130813181400299,
|
|
"step": 1400,
|
|
"valid_targets_mean": 8142.9,
|
|
"valid_targets_min": 1352
|
|
},
|
|
{
|
|
"epoch": 2.349498327759197,
|
|
"grad_norm": 0.45116614748085065,
|
|
"learning_rate": 3.362294764419932e-05,
|
|
"loss": 0.2421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15559333562850952,
|
|
"step": 1405,
|
|
"valid_targets_mean": 10341.5,
|
|
"valid_targets_min": 386
|
|
},
|
|
{
|
|
"epoch": 2.3578595317725752,
|
|
"grad_norm": 0.4244071075073278,
|
|
"learning_rate": 3.356177003884627e-05,
|
|
"loss": 0.2465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11688801646232605,
|
|
"step": 1410,
|
|
"valid_targets_mean": 8688.5,
|
|
"valid_targets_min": 1400
|
|
},
|
|
{
|
|
"epoch": 2.3662207357859533,
|
|
"grad_norm": 0.3723103346455452,
|
|
"learning_rate": 3.350035662238445e-05,
|
|
"loss": 0.2596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13734327256679535,
|
|
"step": 1415,
|
|
"valid_targets_mean": 10992.6,
|
|
"valid_targets_min": 5318
|
|
},
|
|
{
|
|
"epoch": 2.374581939799331,
|
|
"grad_norm": 0.634598571847355,
|
|
"learning_rate": 3.343870846266604e-05,
|
|
"loss": 0.2578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10113831609487534,
|
|
"step": 1420,
|
|
"valid_targets_mean": 8526.6,
|
|
"valid_targets_min": 453
|
|
},
|
|
{
|
|
"epoch": 2.382943143812709,
|
|
"grad_norm": 0.4028379867495188,
|
|
"learning_rate": 3.337682663162495e-05,
|
|
"loss": 0.267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1888931393623352,
|
|
"step": 1425,
|
|
"valid_targets_mean": 11279.4,
|
|
"valid_targets_min": 2455
|
|
},
|
|
{
|
|
"epoch": 2.391304347826087,
|
|
"grad_norm": 0.43217351124887177,
|
|
"learning_rate": 3.331471220525811e-05,
|
|
"loss": 0.2423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0831364095211029,
|
|
"step": 1430,
|
|
"valid_targets_mean": 7056.5,
|
|
"valid_targets_min": 1877
|
|
},
|
|
{
|
|
"epoch": 2.399665551839465,
|
|
"grad_norm": 0.5187461957888965,
|
|
"learning_rate": 3.325236626360683e-05,
|
|
"loss": 0.26,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14488182961940765,
|
|
"step": 1435,
|
|
"valid_targets_mean": 7048.8,
|
|
"valid_targets_min": 454
|
|
},
|
|
{
|
|
"epoch": 2.408026755852843,
|
|
"grad_norm": 0.46152094366549873,
|
|
"learning_rate": 3.318978989073798e-05,
|
|
"loss": 0.2586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1229025274515152,
|
|
"step": 1440,
|
|
"valid_targets_mean": 7423.6,
|
|
"valid_targets_min": 1008
|
|
},
|
|
{
|
|
"epoch": 2.4163879598662206,
|
|
"grad_norm": 0.348815392090178,
|
|
"learning_rate": 3.312698417472515e-05,
|
|
"loss": 0.2419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11790160834789276,
|
|
"step": 1445,
|
|
"valid_targets_mean": 12255.2,
|
|
"valid_targets_min": 2158
|
|
},
|
|
{
|
|
"epoch": 2.4247491638795986,
|
|
"grad_norm": 0.5274241881981896,
|
|
"learning_rate": 3.306395020762974e-05,
|
|
"loss": 0.2355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10795015096664429,
|
|
"step": 1450,
|
|
"valid_targets_mean": 5474.2,
|
|
"valid_targets_min": 1044
|
|
},
|
|
{
|
|
"epoch": 2.4331103678929766,
|
|
"grad_norm": 0.35256578682311857,
|
|
"learning_rate": 3.300068908548196e-05,
|
|
"loss": 0.256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13000312447547913,
|
|
"step": 1455,
|
|
"valid_targets_mean": 11836.4,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 2.4414715719063547,
|
|
"grad_norm": 0.382236558806328,
|
|
"learning_rate": 3.2937201908261784e-05,
|
|
"loss": 0.2459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13290023803710938,
|
|
"step": 1460,
|
|
"valid_targets_mean": 10683.1,
|
|
"valid_targets_min": 1810
|
|
},
|
|
{
|
|
"epoch": 2.4498327759197323,
|
|
"grad_norm": 0.8256653291286032,
|
|
"learning_rate": 3.2873489779879795e-05,
|
|
"loss": 0.254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12021457403898239,
|
|
"step": 1465,
|
|
"valid_targets_mean": 14890.0,
|
|
"valid_targets_min": 9140
|
|
},
|
|
{
|
|
"epoch": 2.4581939799331103,
|
|
"grad_norm": 0.35599734126731225,
|
|
"learning_rate": 3.2809553808158035e-05,
|
|
"loss": 0.251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.137654647231102,
|
|
"step": 1470,
|
|
"valid_targets_mean": 11306.8,
|
|
"valid_targets_min": 4341
|
|
},
|
|
{
|
|
"epoch": 2.4665551839464883,
|
|
"grad_norm": 0.3533230009335228,
|
|
"learning_rate": 3.27453951048107e-05,
|
|
"loss": 0.2443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10107231140136719,
|
|
"step": 1475,
|
|
"valid_targets_mean": 9662.2,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 2.4749163879598663,
|
|
"grad_norm": 0.35147964312865615,
|
|
"learning_rate": 3.2681014785424845e-05,
|
|
"loss": 0.2446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13592997193336487,
|
|
"step": 1480,
|
|
"valid_targets_mean": 13559.6,
|
|
"valid_targets_min": 2981
|
|
},
|
|
{
|
|
"epoch": 2.483277591973244,
|
|
"grad_norm": 0.4316225571830276,
|
|
"learning_rate": 3.2616413969440955e-05,
|
|
"loss": 0.2606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11397585272789001,
|
|
"step": 1485,
|
|
"valid_targets_mean": 11116.8,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 2.491638795986622,
|
|
"grad_norm": 0.4075369403477426,
|
|
"learning_rate": 3.2551593780133495e-05,
|
|
"loss": 0.2442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08631586283445358,
|
|
"step": 1490,
|
|
"valid_targets_mean": 7340.8,
|
|
"valid_targets_min": 1450
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"grad_norm": 0.42075919577922355,
|
|
"learning_rate": 3.24865553445914e-05,
|
|
"loss": 0.2516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12953878939151764,
|
|
"step": 1495,
|
|
"valid_targets_mean": 11266.0,
|
|
"valid_targets_min": 3379
|
|
},
|
|
{
|
|
"epoch": 2.508361204013378,
|
|
"grad_norm": 0.4244414740559001,
|
|
"learning_rate": 3.242129979369842e-05,
|
|
"loss": 0.2632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10770672559738159,
|
|
"step": 1500,
|
|
"valid_targets_mean": 6170.9,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 2.516722408026756,
|
|
"grad_norm": 0.37042367506917234,
|
|
"learning_rate": 3.23558282621135e-05,
|
|
"loss": 0.251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09419460594654083,
|
|
"step": 1505,
|
|
"valid_targets_mean": 9486.1,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 2.5250836120401337,
|
|
"grad_norm": 0.4046590659237067,
|
|
"learning_rate": 3.229014188825108e-05,
|
|
"loss": 0.2461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1241365373134613,
|
|
"step": 1510,
|
|
"valid_targets_mean": 9934.0,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 2.5334448160535117,
|
|
"grad_norm": 0.3276841182130964,
|
|
"learning_rate": 3.2224241814261216e-05,
|
|
"loss": 0.2467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13655270636081696,
|
|
"step": 1515,
|
|
"valid_targets_mean": 12852.0,
|
|
"valid_targets_min": 2655
|
|
},
|
|
{
|
|
"epoch": 2.5418060200668897,
|
|
"grad_norm": 0.3060370921851602,
|
|
"learning_rate": 3.215812918600978e-05,
|
|
"loss": 0.2303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10720054060220718,
|
|
"step": 1520,
|
|
"valid_targets_mean": 13576.0,
|
|
"valid_targets_min": 2525
|
|
},
|
|
{
|
|
"epoch": 2.5501672240802673,
|
|
"grad_norm": 0.5552878150180105,
|
|
"learning_rate": 3.209180515305855e-05,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11084957420825958,
|
|
"step": 1525,
|
|
"valid_targets_mean": 8949.2,
|
|
"valid_targets_min": 1448
|
|
},
|
|
{
|
|
"epoch": 2.5585284280936453,
|
|
"grad_norm": 0.41237327403748636,
|
|
"learning_rate": 3.2025270868645146e-05,
|
|
"loss": 0.2426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11768274009227753,
|
|
"step": 1530,
|
|
"valid_targets_mean": 8567.6,
|
|
"valid_targets_min": 1516
|
|
},
|
|
{
|
|
"epoch": 2.5668896321070234,
|
|
"grad_norm": 0.37473753266210497,
|
|
"learning_rate": 3.195852748966306e-05,
|
|
"loss": 0.2377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12515437602996826,
|
|
"step": 1535,
|
|
"valid_targets_mean": 11350.5,
|
|
"valid_targets_min": 2951
|
|
},
|
|
{
|
|
"epoch": 2.5752508361204014,
|
|
"grad_norm": 0.36327856324321856,
|
|
"learning_rate": 3.189157617664151e-05,
|
|
"loss": 0.2407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10544034838676453,
|
|
"step": 1540,
|
|
"valid_targets_mean": 11614.4,
|
|
"valid_targets_min": 1624
|
|
},
|
|
{
|
|
"epoch": 2.5836120401337794,
|
|
"grad_norm": 0.4258593138002462,
|
|
"learning_rate": 3.182441809372523e-05,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15608590841293335,
|
|
"step": 1545,
|
|
"valid_targets_mean": 11053.5,
|
|
"valid_targets_min": 6451
|
|
},
|
|
{
|
|
"epoch": 2.591973244147157,
|
|
"grad_norm": 0.3195893331565725,
|
|
"learning_rate": 3.1757054408654266e-05,
|
|
"loss": 0.2568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08730299770832062,
|
|
"step": 1550,
|
|
"valid_targets_mean": 9104.0,
|
|
"valid_targets_min": 393
|
|
},
|
|
{
|
|
"epoch": 2.600334448160535,
|
|
"grad_norm": 0.3546063540874118,
|
|
"learning_rate": 3.168948629274367e-05,
|
|
"loss": 0.2531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11701709032058716,
|
|
"step": 1555,
|
|
"valid_targets_mean": 12393.5,
|
|
"valid_targets_min": 3425
|
|
},
|
|
{
|
|
"epoch": 2.608695652173913,
|
|
"grad_norm": 0.3404617229864709,
|
|
"learning_rate": 3.1621714920863104e-05,
|
|
"loss": 0.2639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1024714782834053,
|
|
"step": 1560,
|
|
"valid_targets_mean": 10885.8,
|
|
"valid_targets_min": 1374
|
|
},
|
|
{
|
|
"epoch": 2.617056856187291,
|
|
"grad_norm": 0.40427112931051473,
|
|
"learning_rate": 3.155374147141646e-05,
|
|
"loss": 0.2463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09823710471391678,
|
|
"step": 1565,
|
|
"valid_targets_mean": 6433.6,
|
|
"valid_targets_min": 407
|
|
},
|
|
{
|
|
"epoch": 2.625418060200669,
|
|
"grad_norm": 0.5324825842046998,
|
|
"learning_rate": 3.1485567126321295e-05,
|
|
"loss": 0.2376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1073603704571724,
|
|
"step": 1570,
|
|
"valid_targets_mean": 8536.5,
|
|
"valid_targets_min": 1699
|
|
},
|
|
{
|
|
"epoch": 2.6337792642140467,
|
|
"grad_norm": 0.3736940724900844,
|
|
"learning_rate": 3.1417193070988383e-05,
|
|
"loss": 0.2422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12857000529766083,
|
|
"step": 1575,
|
|
"valid_targets_mean": 11360.8,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 2.6421404682274248,
|
|
"grad_norm": 0.4609044115722531,
|
|
"learning_rate": 3.134862049430099e-05,
|
|
"loss": 0.2307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0988566055893898,
|
|
"step": 1580,
|
|
"valid_targets_mean": 7219.0,
|
|
"valid_targets_min": 339
|
|
},
|
|
{
|
|
"epoch": 2.650501672240803,
|
|
"grad_norm": 0.42767784827242716,
|
|
"learning_rate": 3.12798505885943e-05,
|
|
"loss": 0.2375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14818593859672546,
|
|
"step": 1585,
|
|
"valid_targets_mean": 9206.4,
|
|
"valid_targets_min": 384
|
|
},
|
|
{
|
|
"epoch": 2.6588628762541804,
|
|
"grad_norm": 0.42092834747736024,
|
|
"learning_rate": 3.1210884549634624e-05,
|
|
"loss": 0.2534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14055564999580383,
|
|
"step": 1590,
|
|
"valid_targets_mean": 8730.6,
|
|
"valid_targets_min": 439
|
|
},
|
|
{
|
|
"epoch": 2.6672240802675584,
|
|
"grad_norm": 0.3912191158129179,
|
|
"learning_rate": 3.114172357659861e-05,
|
|
"loss": 0.2614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1726861298084259,
|
|
"step": 1595,
|
|
"valid_targets_mean": 12213.6,
|
|
"valid_targets_min": 1787
|
|
},
|
|
{
|
|
"epoch": 2.6755852842809364,
|
|
"grad_norm": 0.348202787922886,
|
|
"learning_rate": 3.107236887205242e-05,
|
|
"loss": 0.2332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11289996653795242,
|
|
"step": 1600,
|
|
"valid_targets_mean": 12974.2,
|
|
"valid_targets_min": 3839
|
|
},
|
|
{
|
|
"epoch": 2.6839464882943145,
|
|
"grad_norm": 0.46956155227530566,
|
|
"learning_rate": 3.1002821641930815e-05,
|
|
"loss": 0.2452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18240371346473694,
|
|
"step": 1605,
|
|
"valid_targets_mean": 12677.1,
|
|
"valid_targets_min": 5844
|
|
},
|
|
{
|
|
"epoch": 2.6923076923076925,
|
|
"grad_norm": 0.4099578008441195,
|
|
"learning_rate": 3.093308309551616e-05,
|
|
"loss": 0.2376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10199138522148132,
|
|
"step": 1610,
|
|
"valid_targets_mean": 8624.0,
|
|
"valid_targets_min": 319
|
|
},
|
|
{
|
|
"epoch": 2.70066889632107,
|
|
"grad_norm": 0.4847805185901784,
|
|
"learning_rate": 3.0863154445417426e-05,
|
|
"loss": 0.2331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14616549015045166,
|
|
"step": 1615,
|
|
"valid_targets_mean": 8944.2,
|
|
"valid_targets_min": 899
|
|
},
|
|
{
|
|
"epoch": 2.709030100334448,
|
|
"grad_norm": 0.37766525267558826,
|
|
"learning_rate": 3.079303690754908e-05,
|
|
"loss": 0.2339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10516074299812317,
|
|
"step": 1620,
|
|
"valid_targets_mean": 9050.8,
|
|
"valid_targets_min": 2206
|
|
},
|
|
{
|
|
"epoch": 2.717391304347826,
|
|
"grad_norm": 0.38796599273749677,
|
|
"learning_rate": 3.072273170110998e-05,
|
|
"loss": 0.2428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08544375002384186,
|
|
"step": 1625,
|
|
"valid_targets_mean": 7840.2,
|
|
"valid_targets_min": 296
|
|
},
|
|
{
|
|
"epoch": 2.7257525083612038,
|
|
"grad_norm": 0.4825759324842755,
|
|
"learning_rate": 3.0652240048562134e-05,
|
|
"loss": 0.2326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11281964182853699,
|
|
"step": 1630,
|
|
"valid_targets_mean": 7965.0,
|
|
"valid_targets_min": 1288
|
|
},
|
|
{
|
|
"epoch": 2.734113712374582,
|
|
"grad_norm": 0.362373942167484,
|
|
"learning_rate": 3.058156317560945e-05,
|
|
"loss": 0.2491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10064201056957245,
|
|
"step": 1635,
|
|
"valid_targets_mean": 7810.1,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 2.74247491638796,
|
|
"grad_norm": 0.39194670939165716,
|
|
"learning_rate": 3.0510702311176477e-05,
|
|
"loss": 0.245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09394676238298416,
|
|
"step": 1640,
|
|
"valid_targets_mean": 8219.2,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 2.750836120401338,
|
|
"grad_norm": 0.42175694132023195,
|
|
"learning_rate": 3.043965868738695e-05,
|
|
"loss": 0.2436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1097821295261383,
|
|
"step": 1645,
|
|
"valid_targets_mean": 8239.8,
|
|
"valid_targets_min": 476
|
|
},
|
|
{
|
|
"epoch": 2.759197324414716,
|
|
"grad_norm": 0.4091069862541864,
|
|
"learning_rate": 3.0368433539542433e-05,
|
|
"loss": 0.2374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08766161650419235,
|
|
"step": 1650,
|
|
"valid_targets_mean": 9026.2,
|
|
"valid_targets_min": 385
|
|
},
|
|
{
|
|
"epoch": 2.7675585284280935,
|
|
"grad_norm": 0.5286110429961899,
|
|
"learning_rate": 3.029702810610082e-05,
|
|
"loss": 0.2451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1303786337375641,
|
|
"step": 1655,
|
|
"valid_targets_mean": 13092.1,
|
|
"valid_targets_min": 6516
|
|
},
|
|
{
|
|
"epoch": 2.7759197324414715,
|
|
"grad_norm": 0.43085458276729555,
|
|
"learning_rate": 3.0225443628654787e-05,
|
|
"loss": 0.2482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1763029396533966,
|
|
"step": 1660,
|
|
"valid_targets_mean": 13117.9,
|
|
"valid_targets_min": 5748
|
|
},
|
|
{
|
|
"epoch": 2.7842809364548495,
|
|
"grad_norm": 0.3989671274835127,
|
|
"learning_rate": 3.0153681351910226e-05,
|
|
"loss": 0.2404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11267106235027313,
|
|
"step": 1665,
|
|
"valid_targets_mean": 7551.0,
|
|
"valid_targets_min": 359
|
|
},
|
|
{
|
|
"epoch": 2.7926421404682276,
|
|
"grad_norm": 0.4182243683976934,
|
|
"learning_rate": 3.0081742523664576e-05,
|
|
"loss": 0.2513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16769808530807495,
|
|
"step": 1670,
|
|
"valid_targets_mean": 8917.6,
|
|
"valid_targets_min": 319
|
|
},
|
|
{
|
|
"epoch": 2.8010033444816056,
|
|
"grad_norm": 0.4627992586726246,
|
|
"learning_rate": 3.0009628394785158e-05,
|
|
"loss": 0.2497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12237042188644409,
|
|
"step": 1675,
|
|
"valid_targets_mean": 7989.4,
|
|
"valid_targets_min": 3300
|
|
},
|
|
{
|
|
"epoch": 2.809364548494983,
|
|
"grad_norm": 0.38169498772899313,
|
|
"learning_rate": 2.9937340219187402e-05,
|
|
"loss": 0.2599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0910203754901886,
|
|
"step": 1680,
|
|
"valid_targets_mean": 8034.8,
|
|
"valid_targets_min": 2301
|
|
},
|
|
{
|
|
"epoch": 2.817725752508361,
|
|
"grad_norm": 0.37945073419605485,
|
|
"learning_rate": 2.986487925381304e-05,
|
|
"loss": 0.2366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08010489493608475,
|
|
"step": 1685,
|
|
"valid_targets_mean": 6127.2,
|
|
"valid_targets_min": 374
|
|
},
|
|
{
|
|
"epoch": 2.8260869565217392,
|
|
"grad_norm": 0.38288343487509324,
|
|
"learning_rate": 2.9792246758608283e-05,
|
|
"loss": 0.232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12219588458538055,
|
|
"step": 1690,
|
|
"valid_targets_mean": 9243.2,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 2.834448160535117,
|
|
"grad_norm": 0.39971956805195763,
|
|
"learning_rate": 2.9719443996501858e-05,
|
|
"loss": 0.2541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16646254062652588,
|
|
"step": 1695,
|
|
"valid_targets_mean": 12299.8,
|
|
"valid_targets_min": 1190
|
|
},
|
|
{
|
|
"epoch": 2.842809364548495,
|
|
"grad_norm": 0.3547583133766198,
|
|
"learning_rate": 2.9646472233383118e-05,
|
|
"loss": 0.2364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1284923553466797,
|
|
"step": 1700,
|
|
"valid_targets_mean": 12530.9,
|
|
"valid_targets_min": 2154
|
|
},
|
|
{
|
|
"epoch": 2.851170568561873,
|
|
"grad_norm": 0.3610307624945759,
|
|
"learning_rate": 2.9573332738079964e-05,
|
|
"loss": 0.2534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12633870542049408,
|
|
"step": 1705,
|
|
"valid_targets_mean": 8451.6,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 2.859531772575251,
|
|
"grad_norm": 0.37399992694639345,
|
|
"learning_rate": 2.9500026782336828e-05,
|
|
"loss": 0.2465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09825838357210159,
|
|
"step": 1710,
|
|
"valid_targets_mean": 8331.5,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 2.867892976588629,
|
|
"grad_norm": 0.3949447819629481,
|
|
"learning_rate": 2.942655564079254e-05,
|
|
"loss": 0.2518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09675749391317368,
|
|
"step": 1715,
|
|
"valid_targets_mean": 8505.5,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 2.8762541806020065,
|
|
"grad_norm": 0.4026532036047343,
|
|
"learning_rate": 2.9352920590958173e-05,
|
|
"loss": 0.2442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14984367787837982,
|
|
"step": 1720,
|
|
"valid_targets_mean": 10393.2,
|
|
"valid_targets_min": 325
|
|
},
|
|
{
|
|
"epoch": 2.8846153846153846,
|
|
"grad_norm": 0.4214502010275911,
|
|
"learning_rate": 2.927912291319482e-05,
|
|
"loss": 0.2508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1902714967727661,
|
|
"step": 1725,
|
|
"valid_targets_mean": 9473.9,
|
|
"valid_targets_min": 1721
|
|
},
|
|
{
|
|
"epoch": 2.8929765886287626,
|
|
"grad_norm": 0.43124240841331496,
|
|
"learning_rate": 2.9205163890691338e-05,
|
|
"loss": 0.243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1255147010087967,
|
|
"step": 1730,
|
|
"valid_targets_mean": 8470.1,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 2.90133779264214,
|
|
"grad_norm": 0.3839482848514472,
|
|
"learning_rate": 2.9131044809442038e-05,
|
|
"loss": 0.2469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11299577355384827,
|
|
"step": 1735,
|
|
"valid_targets_mean": 10776.9,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 2.9096989966555182,
|
|
"grad_norm": 0.36462771754261397,
|
|
"learning_rate": 2.9056766958224324e-05,
|
|
"loss": 0.249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13920457661151886,
|
|
"step": 1740,
|
|
"valid_targets_mean": 11427.2,
|
|
"valid_targets_min": 292
|
|
},
|
|
{
|
|
"epoch": 2.9180602006688963,
|
|
"grad_norm": 0.35127612189707885,
|
|
"learning_rate": 2.898233162857627e-05,
|
|
"loss": 0.2271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1261829286813736,
|
|
"step": 1745,
|
|
"valid_targets_mean": 11903.6,
|
|
"valid_targets_min": 369
|
|
},
|
|
{
|
|
"epoch": 2.9264214046822743,
|
|
"grad_norm": 0.33779373955112896,
|
|
"learning_rate": 2.8907740114774185e-05,
|
|
"loss": 0.2371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11324828863143921,
|
|
"step": 1750,
|
|
"valid_targets_mean": 10493.8,
|
|
"valid_targets_min": 1162
|
|
},
|
|
{
|
|
"epoch": 2.9347826086956523,
|
|
"grad_norm": 0.38546006256373555,
|
|
"learning_rate": 2.8832993713810095e-05,
|
|
"loss": 0.2377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12133976072072983,
|
|
"step": 1755,
|
|
"valid_targets_mean": 10210.8,
|
|
"valid_targets_min": 3562
|
|
},
|
|
{
|
|
"epoch": 2.94314381270903,
|
|
"grad_norm": 0.44443569353223883,
|
|
"learning_rate": 2.8758093725369193e-05,
|
|
"loss": 0.263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12169276177883148,
|
|
"step": 1760,
|
|
"valid_targets_mean": 6676.2,
|
|
"valid_targets_min": 4778
|
|
},
|
|
{
|
|
"epoch": 2.951505016722408,
|
|
"grad_norm": 0.40084442223467437,
|
|
"learning_rate": 2.868304145180722e-05,
|
|
"loss": 0.2429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1473241001367569,
|
|
"step": 1765,
|
|
"valid_targets_mean": 11117.6,
|
|
"valid_targets_min": 1206
|
|
},
|
|
{
|
|
"epoch": 2.959866220735786,
|
|
"grad_norm": 0.401974847997422,
|
|
"learning_rate": 2.8607838198127886e-05,
|
|
"loss": 0.2599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10397756099700928,
|
|
"step": 1770,
|
|
"valid_targets_mean": 6897.1,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 2.968227424749164,
|
|
"grad_norm": 0.5802023336464625,
|
|
"learning_rate": 2.8532485271960088e-05,
|
|
"loss": 0.2444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12776628136634827,
|
|
"step": 1775,
|
|
"valid_targets_mean": 8712.8,
|
|
"valid_targets_min": 2336
|
|
},
|
|
{
|
|
"epoch": 2.976588628762542,
|
|
"grad_norm": 0.3522777957197366,
|
|
"learning_rate": 2.8456983983535243e-05,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14866910874843597,
|
|
"step": 1780,
|
|
"valid_targets_mean": 10458.0,
|
|
"valid_targets_min": 2319
|
|
},
|
|
{
|
|
"epoch": 2.9849498327759196,
|
|
"grad_norm": 0.3831091833828722,
|
|
"learning_rate": 2.838133564566447e-05,
|
|
"loss": 0.236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11089050024747849,
|
|
"step": 1785,
|
|
"valid_targets_mean": 9063.5,
|
|
"valid_targets_min": 2019
|
|
},
|
|
{
|
|
"epoch": 2.9933110367892977,
|
|
"grad_norm": 0.507905774398555,
|
|
"learning_rate": 2.8305541573715775e-05,
|
|
"loss": 0.2376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10801051557064056,
|
|
"step": 1790,
|
|
"valid_targets_mean": 7237.1,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 3.0016722408026757,
|
|
"grad_norm": 0.4506006160979905,
|
|
"learning_rate": 2.8229603085591178e-05,
|
|
"loss": 0.2481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11337893456220627,
|
|
"step": 1795,
|
|
"valid_targets_mean": 8041.2,
|
|
"valid_targets_min": 479
|
|
},
|
|
{
|
|
"epoch": 3.0100334448160537,
|
|
"grad_norm": 0.49249138295571887,
|
|
"learning_rate": 2.8153521501703803e-05,
|
|
"loss": 0.244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.143145352602005,
|
|
"step": 1800,
|
|
"valid_targets_mean": 9389.4,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 3.0183946488294313,
|
|
"grad_norm": 0.3741706862110555,
|
|
"learning_rate": 2.8077298144954904e-05,
|
|
"loss": 0.2171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13829877972602844,
|
|
"step": 1805,
|
|
"valid_targets_mean": 12628.1,
|
|
"valid_targets_min": 3418
|
|
},
|
|
{
|
|
"epoch": 3.0267558528428093,
|
|
"grad_norm": 0.39569087699729116,
|
|
"learning_rate": 2.8000934340710883e-05,
|
|
"loss": 0.2283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11841882020235062,
|
|
"step": 1810,
|
|
"valid_targets_mean": 9704.1,
|
|
"valid_targets_min": 2613
|
|
},
|
|
{
|
|
"epoch": 3.0351170568561874,
|
|
"grad_norm": 0.41519194548823973,
|
|
"learning_rate": 2.792443141678022e-05,
|
|
"loss": 0.2347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13783563673496246,
|
|
"step": 1815,
|
|
"valid_targets_mean": 11589.5,
|
|
"valid_targets_min": 3187
|
|
},
|
|
{
|
|
"epoch": 3.0434782608695654,
|
|
"grad_norm": 0.5509695774792578,
|
|
"learning_rate": 2.784779070339041e-05,
|
|
"loss": 0.2475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08058594912290573,
|
|
"step": 1820,
|
|
"valid_targets_mean": 4137.0,
|
|
"valid_targets_min": 231
|
|
},
|
|
{
|
|
"epoch": 3.051839464882943,
|
|
"grad_norm": 0.3792516716345692,
|
|
"learning_rate": 2.7771013533164805e-05,
|
|
"loss": 0.2209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13239926099777222,
|
|
"step": 1825,
|
|
"valid_targets_mean": 9074.4,
|
|
"valid_targets_min": 3327
|
|
},
|
|
{
|
|
"epoch": 3.060200668896321,
|
|
"grad_norm": 0.3825115840572522,
|
|
"learning_rate": 2.7694101241099484e-05,
|
|
"loss": 0.2286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11838261783123016,
|
|
"step": 1830,
|
|
"valid_targets_mean": 11782.2,
|
|
"valid_targets_min": 3024
|
|
},
|
|
{
|
|
"epoch": 3.068561872909699,
|
|
"grad_norm": 0.4439697626738654,
|
|
"learning_rate": 2.7617055164539993e-05,
|
|
"loss": 0.2301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1419868767261505,
|
|
"step": 1835,
|
|
"valid_targets_mean": 9437.9,
|
|
"valid_targets_min": 1566
|
|
},
|
|
{
|
|
"epoch": 3.076923076923077,
|
|
"grad_norm": 0.38556871233380496,
|
|
"learning_rate": 2.753987664315813e-05,
|
|
"loss": 0.2401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15197840332984924,
|
|
"step": 1840,
|
|
"valid_targets_mean": 11584.5,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 3.0852842809364547,
|
|
"grad_norm": 0.5064996538647744,
|
|
"learning_rate": 2.746256701892861e-05,
|
|
"loss": 0.2265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12176249176263809,
|
|
"step": 1845,
|
|
"valid_targets_mean": 11345.8,
|
|
"valid_targets_min": 3911
|
|
},
|
|
{
|
|
"epoch": 3.0936454849498327,
|
|
"grad_norm": 0.4863779924821515,
|
|
"learning_rate": 2.738512763610579e-05,
|
|
"loss": 0.2263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08231323957443237,
|
|
"step": 1850,
|
|
"valid_targets_mean": 5916.9,
|
|
"valid_targets_min": 573
|
|
},
|
|
{
|
|
"epoch": 3.1020066889632107,
|
|
"grad_norm": 0.386861417348215,
|
|
"learning_rate": 2.7307559841200238e-05,
|
|
"loss": 0.2265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11113197356462479,
|
|
"step": 1855,
|
|
"valid_targets_mean": 10113.0,
|
|
"valid_targets_min": 382
|
|
},
|
|
{
|
|
"epoch": 3.1103678929765888,
|
|
"grad_norm": 0.3271095442586401,
|
|
"learning_rate": 2.7229864982955328e-05,
|
|
"loss": 0.2292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12718120217323303,
|
|
"step": 1860,
|
|
"valid_targets_mean": 14943.4,
|
|
"valid_targets_min": 1810
|
|
},
|
|
{
|
|
"epoch": 3.1187290969899664,
|
|
"grad_norm": 0.3865916004389959,
|
|
"learning_rate": 2.7152044412323842e-05,
|
|
"loss": 0.2209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09982602298259735,
|
|
"step": 1865,
|
|
"valid_targets_mean": 10027.4,
|
|
"valid_targets_min": 2224
|
|
},
|
|
{
|
|
"epoch": 3.1270903010033444,
|
|
"grad_norm": 0.3916117229502099,
|
|
"learning_rate": 2.7074099482444406e-05,
|
|
"loss": 0.2362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10210350155830383,
|
|
"step": 1870,
|
|
"valid_targets_mean": 9821.2,
|
|
"valid_targets_min": 445
|
|
},
|
|
{
|
|
"epoch": 3.1354515050167224,
|
|
"grad_norm": 0.3531153602174525,
|
|
"learning_rate": 2.699603154861801e-05,
|
|
"loss": 0.2178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10611523687839508,
|
|
"step": 1875,
|
|
"valid_targets_mean": 11025.4,
|
|
"valid_targets_min": 3239
|
|
},
|
|
{
|
|
"epoch": 3.1438127090301005,
|
|
"grad_norm": 0.3834370237786904,
|
|
"learning_rate": 2.6917841968284433e-05,
|
|
"loss": 0.2395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14557313919067383,
|
|
"step": 1880,
|
|
"valid_targets_mean": 9045.1,
|
|
"valid_targets_min": 439
|
|
},
|
|
{
|
|
"epoch": 3.1521739130434785,
|
|
"grad_norm": 0.39778776033892466,
|
|
"learning_rate": 2.6839532100998623e-05,
|
|
"loss": 0.2181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07866433262825012,
|
|
"step": 1885,
|
|
"valid_targets_mean": 8699.5,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 3.160535117056856,
|
|
"grad_norm": 0.37327656762212347,
|
|
"learning_rate": 2.6761103308407076e-05,
|
|
"loss": 0.2257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09098326414823532,
|
|
"step": 1890,
|
|
"valid_targets_mean": 9872.6,
|
|
"valid_targets_min": 2336
|
|
},
|
|
{
|
|
"epoch": 3.168896321070234,
|
|
"grad_norm": 0.41676575470095056,
|
|
"learning_rate": 2.668255695422415e-05,
|
|
"loss": 0.2338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10257422924041748,
|
|
"step": 1895,
|
|
"valid_targets_mean": 7041.2,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 3.177257525083612,
|
|
"grad_norm": 0.4031690606635288,
|
|
"learning_rate": 2.660389440420836e-05,
|
|
"loss": 0.2344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1314913034439087,
|
|
"step": 1900,
|
|
"valid_targets_mean": 13532.6,
|
|
"valid_targets_min": 9991
|
|
},
|
|
{
|
|
"epoch": 3.1856187290969897,
|
|
"grad_norm": 0.47183442115483976,
|
|
"learning_rate": 2.6525117026138614e-05,
|
|
"loss": 0.2337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10371886193752289,
|
|
"step": 1905,
|
|
"valid_targets_mean": 10176.5,
|
|
"valid_targets_min": 3397
|
|
},
|
|
{
|
|
"epoch": 3.1939799331103678,
|
|
"grad_norm": 0.4628582312740421,
|
|
"learning_rate": 2.644622618979047e-05,
|
|
"loss": 0.2219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1257050335407257,
|
|
"step": 1910,
|
|
"valid_targets_mean": 14472.4,
|
|
"valid_targets_min": 9578
|
|
},
|
|
{
|
|
"epoch": 3.202341137123746,
|
|
"grad_norm": 0.3985544690113418,
|
|
"learning_rate": 2.6367223266912252e-05,
|
|
"loss": 0.2427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11155450344085693,
|
|
"step": 1915,
|
|
"valid_targets_mean": 10776.5,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 3.210702341137124,
|
|
"grad_norm": 0.42681300890602214,
|
|
"learning_rate": 2.6288109631201266e-05,
|
|
"loss": 0.2291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15136997401714325,
|
|
"step": 1920,
|
|
"valid_targets_mean": 11945.9,
|
|
"valid_targets_min": 388
|
|
},
|
|
{
|
|
"epoch": 3.219063545150502,
|
|
"grad_norm": 0.45927449196734843,
|
|
"learning_rate": 2.6208886658279875e-05,
|
|
"loss": 0.2297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09469208121299744,
|
|
"step": 1925,
|
|
"valid_targets_mean": 5624.4,
|
|
"valid_targets_min": 1699
|
|
},
|
|
{
|
|
"epoch": 3.2274247491638794,
|
|
"grad_norm": 0.34842158940003065,
|
|
"learning_rate": 2.6129555725671586e-05,
|
|
"loss": 0.2459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10661038011312485,
|
|
"step": 1930,
|
|
"valid_targets_mean": 12064.4,
|
|
"valid_targets_min": 1807
|
|
},
|
|
{
|
|
"epoch": 3.2357859531772575,
|
|
"grad_norm": 0.37851565038073115,
|
|
"learning_rate": 2.605011821277712e-05,
|
|
"loss": 0.2418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1588028073310852,
|
|
"step": 1935,
|
|
"valid_targets_mean": 11188.5,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 3.2441471571906355,
|
|
"grad_norm": 0.4258706962967686,
|
|
"learning_rate": 2.597057550085037e-05,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14069469273090363,
|
|
"step": 1940,
|
|
"valid_targets_mean": 11998.1,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 3.2525083612040135,
|
|
"grad_norm": 0.820354643276263,
|
|
"learning_rate": 2.589092897297447e-05,
|
|
"loss": 0.2206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1469946801662445,
|
|
"step": 1945,
|
|
"valid_targets_mean": 8774.6,
|
|
"valid_targets_min": 1660
|
|
},
|
|
{
|
|
"epoch": 3.260869565217391,
|
|
"grad_norm": 0.3631713277210335,
|
|
"learning_rate": 2.581118001403767e-05,
|
|
"loss": 0.2227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11041969060897827,
|
|
"step": 1950,
|
|
"valid_targets_mean": 12956.5,
|
|
"valid_targets_min": 5661
|
|
},
|
|
{
|
|
"epoch": 3.269230769230769,
|
|
"grad_norm": 0.3862762281892482,
|
|
"learning_rate": 2.573133001070928e-05,
|
|
"loss": 0.2269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12677189707756042,
|
|
"step": 1955,
|
|
"valid_targets_mean": 10474.8,
|
|
"valid_targets_min": 420
|
|
},
|
|
{
|
|
"epoch": 3.277591973244147,
|
|
"grad_norm": 0.3565660028202287,
|
|
"learning_rate": 2.565138035141558e-05,
|
|
"loss": 0.2432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11617505550384521,
|
|
"step": 1960,
|
|
"valid_targets_mean": 12146.4,
|
|
"valid_targets_min": 3387
|
|
},
|
|
{
|
|
"epoch": 3.2859531772575252,
|
|
"grad_norm": 0.3318609859626279,
|
|
"learning_rate": 2.557133242631565e-05,
|
|
"loss": 0.2392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10888245701789856,
|
|
"step": 1965,
|
|
"valid_targets_mean": 15546.5,
|
|
"valid_targets_min": 1323
|
|
},
|
|
{
|
|
"epoch": 3.294314381270903,
|
|
"grad_norm": 0.4443340857541708,
|
|
"learning_rate": 2.549118762727721e-05,
|
|
"loss": 0.2397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1329856514930725,
|
|
"step": 1970,
|
|
"valid_targets_mean": 13774.8,
|
|
"valid_targets_min": 5640
|
|
},
|
|
{
|
|
"epoch": 3.302675585284281,
|
|
"grad_norm": 0.3298418781806089,
|
|
"learning_rate": 2.5410947347852436e-05,
|
|
"loss": 0.2214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12475749105215073,
|
|
"step": 1975,
|
|
"valid_targets_mean": 15089.1,
|
|
"valid_targets_min": 4583
|
|
},
|
|
{
|
|
"epoch": 3.311036789297659,
|
|
"grad_norm": 0.39489344717292085,
|
|
"learning_rate": 2.5330612983253667e-05,
|
|
"loss": 0.2415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11062408238649368,
|
|
"step": 1980,
|
|
"valid_targets_mean": 9742.4,
|
|
"valid_targets_min": 2885
|
|
},
|
|
{
|
|
"epoch": 3.319397993311037,
|
|
"grad_norm": 0.38687222185999326,
|
|
"learning_rate": 2.5250185930329235e-05,
|
|
"loss": 0.2105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09480836242437363,
|
|
"step": 1985,
|
|
"valid_targets_mean": 8920.4,
|
|
"valid_targets_min": 519
|
|
},
|
|
{
|
|
"epoch": 3.327759197324415,
|
|
"grad_norm": 0.567927191618059,
|
|
"learning_rate": 2.5169667587539105e-05,
|
|
"loss": 0.2228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12882253527641296,
|
|
"step": 1990,
|
|
"valid_targets_mean": 9716.8,
|
|
"valid_targets_min": 809
|
|
},
|
|
{
|
|
"epoch": 3.3361204013377925,
|
|
"grad_norm": 0.4465102875218098,
|
|
"learning_rate": 2.5089059354930584e-05,
|
|
"loss": 0.2288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12548334896564484,
|
|
"step": 1995,
|
|
"valid_targets_mean": 6871.1,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 3.3444816053511706,
|
|
"grad_norm": 0.3970552521232286,
|
|
"learning_rate": 2.5008362634113986e-05,
|
|
"loss": 0.2411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13273335993289948,
|
|
"step": 2000,
|
|
"valid_targets_mean": 11009.4,
|
|
"valid_targets_min": 4730
|
|
},
|
|
{
|
|
"epoch": 3.3528428093645486,
|
|
"grad_norm": 0.42686980001514413,
|
|
"learning_rate": 2.4927578828238253e-05,
|
|
"loss": 0.2234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1182374656200409,
|
|
"step": 2005,
|
|
"valid_targets_mean": 9239.1,
|
|
"valid_targets_min": 2755
|
|
},
|
|
{
|
|
"epoch": 3.361204013377926,
|
|
"grad_norm": 0.4026185526734106,
|
|
"learning_rate": 2.484670934196654e-05,
|
|
"loss": 0.2344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10972792655229568,
|
|
"step": 2010,
|
|
"valid_targets_mean": 8045.8,
|
|
"valid_targets_min": 498
|
|
},
|
|
{
|
|
"epoch": 3.369565217391304,
|
|
"grad_norm": 0.8537555039364059,
|
|
"learning_rate": 2.476575558145183e-05,
|
|
"loss": 0.2262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10642819851636887,
|
|
"step": 2015,
|
|
"valid_targets_mean": 5500.4,
|
|
"valid_targets_min": 465
|
|
},
|
|
{
|
|
"epoch": 3.3779264214046822,
|
|
"grad_norm": 0.40795895444836794,
|
|
"learning_rate": 2.468471895431243e-05,
|
|
"loss": 0.2227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12816083431243896,
|
|
"step": 2020,
|
|
"valid_targets_mean": 12345.6,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 3.3862876254180603,
|
|
"grad_norm": 0.3621026446913687,
|
|
"learning_rate": 2.4603600869607564e-05,
|
|
"loss": 0.2175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11077725887298584,
|
|
"step": 2025,
|
|
"valid_targets_mean": 12300.9,
|
|
"valid_targets_min": 5783
|
|
},
|
|
{
|
|
"epoch": 3.3946488294314383,
|
|
"grad_norm": 0.4005892703596903,
|
|
"learning_rate": 2.452240273781281e-05,
|
|
"loss": 0.2414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17479680478572845,
|
|
"step": 2030,
|
|
"valid_targets_mean": 12392.8,
|
|
"valid_targets_min": 4163
|
|
},
|
|
{
|
|
"epoch": 3.403010033444816,
|
|
"grad_norm": 0.42137123937217347,
|
|
"learning_rate": 2.444112597079558e-05,
|
|
"loss": 0.2191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11121512204408646,
|
|
"step": 2035,
|
|
"valid_targets_mean": 9788.1,
|
|
"valid_targets_min": 353
|
|
},
|
|
{
|
|
"epoch": 3.411371237458194,
|
|
"grad_norm": 0.356390860765357,
|
|
"learning_rate": 2.435977198179065e-05,
|
|
"loss": 0.2273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08751965314149857,
|
|
"step": 2040,
|
|
"valid_targets_mean": 10025.4,
|
|
"valid_targets_min": 3385
|
|
},
|
|
{
|
|
"epoch": 3.419732441471572,
|
|
"grad_norm": 0.39768988505831276,
|
|
"learning_rate": 2.4278342185375467e-05,
|
|
"loss": 0.2338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11412457376718521,
|
|
"step": 2045,
|
|
"valid_targets_mean": 10361.0,
|
|
"valid_targets_min": 405
|
|
},
|
|
{
|
|
"epoch": 3.42809364548495,
|
|
"grad_norm": 0.41768300475599635,
|
|
"learning_rate": 2.4196837997445636e-05,
|
|
"loss": 0.2289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11924809217453003,
|
|
"step": 2050,
|
|
"valid_targets_mean": 9935.6,
|
|
"valid_targets_min": 446
|
|
},
|
|
{
|
|
"epoch": 3.4364548494983276,
|
|
"grad_norm": 0.42661196091473846,
|
|
"learning_rate": 2.4115260835190285e-05,
|
|
"loss": 0.2281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1370387077331543,
|
|
"step": 2055,
|
|
"valid_targets_mean": 7571.1,
|
|
"valid_targets_min": 4348
|
|
},
|
|
{
|
|
"epoch": 3.4448160535117056,
|
|
"grad_norm": 0.5402681083063512,
|
|
"learning_rate": 2.4033612117067396e-05,
|
|
"loss": 0.2536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10931956768035889,
|
|
"step": 2060,
|
|
"valid_targets_mean": 6129.6,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 3.4531772575250836,
|
|
"grad_norm": 0.41567133766637454,
|
|
"learning_rate": 2.395189326277918e-05,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16065946221351624,
|
|
"step": 2065,
|
|
"valid_targets_mean": 13651.0,
|
|
"valid_targets_min": 4109
|
|
},
|
|
{
|
|
"epoch": 3.4615384615384617,
|
|
"grad_norm": 0.5155848411516415,
|
|
"learning_rate": 2.3870105693247347e-05,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11398200690746307,
|
|
"step": 2070,
|
|
"valid_targets_mean": 8250.9,
|
|
"valid_targets_min": 359
|
|
},
|
|
{
|
|
"epoch": 3.4698996655518393,
|
|
"grad_norm": 0.38970353378191336,
|
|
"learning_rate": 2.3788250830588437e-05,
|
|
"loss": 0.2402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09583867341279984,
|
|
"step": 2075,
|
|
"valid_targets_mean": 7572.9,
|
|
"valid_targets_min": 1251
|
|
},
|
|
{
|
|
"epoch": 3.4782608695652173,
|
|
"grad_norm": 0.376482512573276,
|
|
"learning_rate": 2.3706330098089077e-05,
|
|
"loss": 0.2283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06973407417535782,
|
|
"step": 2080,
|
|
"valid_targets_mean": 5219.8,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 3.4866220735785953,
|
|
"grad_norm": 0.4689953787202081,
|
|
"learning_rate": 2.3624344920181243e-05,
|
|
"loss": 0.2364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07501358538866043,
|
|
"step": 2085,
|
|
"valid_targets_mean": 3921.6,
|
|
"valid_targets_min": 398
|
|
},
|
|
{
|
|
"epoch": 3.4949832775919734,
|
|
"grad_norm": 0.3924178226707807,
|
|
"learning_rate": 2.3542296722417452e-05,
|
|
"loss": 0.2454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12856915593147278,
|
|
"step": 2090,
|
|
"valid_targets_mean": 10092.9,
|
|
"valid_targets_min": 2454
|
|
},
|
|
{
|
|
"epoch": 3.5033444816053514,
|
|
"grad_norm": 0.38150435360779583,
|
|
"learning_rate": 2.346018693144605e-05,
|
|
"loss": 0.2421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12730631232261658,
|
|
"step": 2095,
|
|
"valid_targets_mean": 10189.2,
|
|
"valid_targets_min": 914
|
|
},
|
|
{
|
|
"epoch": 3.511705685618729,
|
|
"grad_norm": 0.4132106741837973,
|
|
"learning_rate": 2.3378016974986326e-05,
|
|
"loss": 0.2217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1328890472650528,
|
|
"step": 2100,
|
|
"valid_targets_mean": 9908.5,
|
|
"valid_targets_min": 1513
|
|
},
|
|
{
|
|
"epoch": 3.520066889632107,
|
|
"grad_norm": 0.4026106750692518,
|
|
"learning_rate": 2.3295788281803733e-05,
|
|
"loss": 0.2317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09833089262247086,
|
|
"step": 2105,
|
|
"valid_targets_mean": 9119.1,
|
|
"valid_targets_min": 393
|
|
},
|
|
{
|
|
"epoch": 3.528428093645485,
|
|
"grad_norm": 0.4315814531370703,
|
|
"learning_rate": 2.321350228168505e-05,
|
|
"loss": 0.2242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14170758426189423,
|
|
"step": 2110,
|
|
"valid_targets_mean": 9766.0,
|
|
"valid_targets_min": 945
|
|
},
|
|
{
|
|
"epoch": 3.5367892976588626,
|
|
"grad_norm": 0.3262613037510848,
|
|
"learning_rate": 2.3131160405413472e-05,
|
|
"loss": 0.2257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13256621360778809,
|
|
"step": 2115,
|
|
"valid_targets_mean": 13910.8,
|
|
"valid_targets_min": 8246
|
|
},
|
|
{
|
|
"epoch": 3.5451505016722407,
|
|
"grad_norm": 0.4352927174497593,
|
|
"learning_rate": 2.30487640847438e-05,
|
|
"loss": 0.204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11044695228338242,
|
|
"step": 2120,
|
|
"valid_targets_mean": 10564.0,
|
|
"valid_targets_min": 1921
|
|
},
|
|
{
|
|
"epoch": 3.5535117056856187,
|
|
"grad_norm": 0.4002474943228743,
|
|
"learning_rate": 2.296631475237749e-05,
|
|
"loss": 0.2198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11623425781726837,
|
|
"step": 2125,
|
|
"valid_targets_mean": 10250.8,
|
|
"valid_targets_min": 1325
|
|
},
|
|
{
|
|
"epoch": 3.5618729096989967,
|
|
"grad_norm": 0.47304284624736165,
|
|
"learning_rate": 2.2883813841937754e-05,
|
|
"loss": 0.2232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12149027734994888,
|
|
"step": 2130,
|
|
"valid_targets_mean": 11667.2,
|
|
"valid_targets_min": 3234
|
|
},
|
|
{
|
|
"epoch": 3.5702341137123748,
|
|
"grad_norm": 0.38112171601279266,
|
|
"learning_rate": 2.2801262787944668e-05,
|
|
"loss": 0.2327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10607273876667023,
|
|
"step": 2135,
|
|
"valid_targets_mean": 12610.9,
|
|
"valid_targets_min": 3429
|
|
},
|
|
{
|
|
"epoch": 3.5785953177257523,
|
|
"grad_norm": 0.5166303531313297,
|
|
"learning_rate": 2.2718663025790183e-05,
|
|
"loss": 0.2343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10478353500366211,
|
|
"step": 2140,
|
|
"valid_targets_mean": 6447.0,
|
|
"valid_targets_min": 300
|
|
},
|
|
{
|
|
"epoch": 3.5869565217391304,
|
|
"grad_norm": 0.34928787457376415,
|
|
"learning_rate": 2.2636015991713167e-05,
|
|
"loss": 0.2185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09263238310813904,
|
|
"step": 2145,
|
|
"valid_targets_mean": 8579.8,
|
|
"valid_targets_min": 1132
|
|
},
|
|
{
|
|
"epoch": 3.5953177257525084,
|
|
"grad_norm": 0.3533802784913576,
|
|
"learning_rate": 2.2553323122774487e-05,
|
|
"loss": 0.2239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12293791770935059,
|
|
"step": 2150,
|
|
"valid_targets_mean": 16759.9,
|
|
"valid_targets_min": 5379
|
|
},
|
|
{
|
|
"epoch": 3.6036789297658864,
|
|
"grad_norm": 0.3673273423602782,
|
|
"learning_rate": 2.2470585856831953e-05,
|
|
"loss": 0.2248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.087818942964077,
|
|
"step": 2155,
|
|
"valid_targets_mean": 10060.4,
|
|
"valid_targets_min": 387
|
|
},
|
|
{
|
|
"epoch": 3.6120401337792645,
|
|
"grad_norm": 0.3897733438020665,
|
|
"learning_rate": 2.2387805632515365e-05,
|
|
"loss": 0.2374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11022856086492538,
|
|
"step": 2160,
|
|
"valid_targets_mean": 12188.8,
|
|
"valid_targets_min": 369
|
|
},
|
|
{
|
|
"epoch": 3.620401337792642,
|
|
"grad_norm": 0.3102382020934278,
|
|
"learning_rate": 2.2304983889201467e-05,
|
|
"loss": 0.2195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0914934054017067,
|
|
"step": 2165,
|
|
"valid_targets_mean": 12285.5,
|
|
"valid_targets_min": 1631
|
|
},
|
|
{
|
|
"epoch": 3.62876254180602,
|
|
"grad_norm": 0.4323213941007628,
|
|
"learning_rate": 2.222212206698894e-05,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1358933001756668,
|
|
"step": 2170,
|
|
"valid_targets_mean": 13265.5,
|
|
"valid_targets_min": 1302
|
|
},
|
|
{
|
|
"epoch": 3.637123745819398,
|
|
"grad_norm": 0.3706814343130101,
|
|
"learning_rate": 2.2139221606673353e-05,
|
|
"loss": 0.2342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10313701629638672,
|
|
"step": 2175,
|
|
"valid_targets_mean": 11139.0,
|
|
"valid_targets_min": 4062
|
|
},
|
|
{
|
|
"epoch": 3.6454849498327757,
|
|
"grad_norm": 0.3791689291042618,
|
|
"learning_rate": 2.2056283949722114e-05,
|
|
"loss": 0.2381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11598846316337585,
|
|
"step": 2180,
|
|
"valid_targets_mean": 10150.6,
|
|
"valid_targets_min": 4110
|
|
},
|
|
{
|
|
"epoch": 3.6538461538461537,
|
|
"grad_norm": 0.75989037841803,
|
|
"learning_rate": 2.197331053824939e-05,
|
|
"loss": 0.2241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11637677997350693,
|
|
"step": 2185,
|
|
"valid_targets_mean": 7791.2,
|
|
"valid_targets_min": 332
|
|
},
|
|
{
|
|
"epoch": 3.6622073578595318,
|
|
"grad_norm": 0.38838382487739925,
|
|
"learning_rate": 2.1890302814991075e-05,
|
|
"loss": 0.2279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12257356196641922,
|
|
"step": 2190,
|
|
"valid_targets_mean": 8487.2,
|
|
"valid_targets_min": 1617
|
|
},
|
|
{
|
|
"epoch": 3.67056856187291,
|
|
"grad_norm": 0.3998713743883739,
|
|
"learning_rate": 2.1807262223279633e-05,
|
|
"loss": 0.2395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11533701419830322,
|
|
"step": 2195,
|
|
"valid_targets_mean": 6768.1,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 3.678929765886288,
|
|
"grad_norm": 0.3439079720587776,
|
|
"learning_rate": 2.172419020701907e-05,
|
|
"loss": 0.213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10417473316192627,
|
|
"step": 2200,
|
|
"valid_targets_mean": 10883.8,
|
|
"valid_targets_min": 5787
|
|
},
|
|
{
|
|
"epoch": 3.6872909698996654,
|
|
"grad_norm": 0.3607338220199551,
|
|
"learning_rate": 2.1641088210659804e-05,
|
|
"loss": 0.225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12066195160150528,
|
|
"step": 2205,
|
|
"valid_targets_mean": 10448.1,
|
|
"valid_targets_min": 4449
|
|
},
|
|
{
|
|
"epoch": 3.6956521739130435,
|
|
"grad_norm": 0.371920234769171,
|
|
"learning_rate": 2.155795767917352e-05,
|
|
"loss": 0.2222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11709782481193542,
|
|
"step": 2210,
|
|
"valid_targets_mean": 9372.0,
|
|
"valid_targets_min": 1544
|
|
},
|
|
{
|
|
"epoch": 3.7040133779264215,
|
|
"grad_norm": 0.418043580482605,
|
|
"learning_rate": 2.14748000580281e-05,
|
|
"loss": 0.2388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10964877903461456,
|
|
"step": 2215,
|
|
"valid_targets_mean": 8159.2,
|
|
"valid_targets_min": 1910
|
|
},
|
|
{
|
|
"epoch": 3.712374581939799,
|
|
"grad_norm": 0.46085943804613605,
|
|
"learning_rate": 2.1391616793162435e-05,
|
|
"loss": 0.2123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09041042625904083,
|
|
"step": 2220,
|
|
"valid_targets_mean": 5586.2,
|
|
"valid_targets_min": 241
|
|
},
|
|
{
|
|
"epoch": 3.720735785953177,
|
|
"grad_norm": 0.39329914380128467,
|
|
"learning_rate": 2.1308409330961308e-05,
|
|
"loss": 0.2291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09908054769039154,
|
|
"step": 2225,
|
|
"valid_targets_mean": 8056.6,
|
|
"valid_targets_min": 336
|
|
},
|
|
{
|
|
"epoch": 3.729096989966555,
|
|
"grad_norm": 0.438211136629277,
|
|
"learning_rate": 2.122517911823027e-05,
|
|
"loss": 0.2516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1180066466331482,
|
|
"step": 2230,
|
|
"valid_targets_mean": 8578.9,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 3.737458193979933,
|
|
"grad_norm": 0.4020723253897148,
|
|
"learning_rate": 2.114192760217042e-05,
|
|
"loss": 0.2272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11283163726329803,
|
|
"step": 2235,
|
|
"valid_targets_mean": 8280.6,
|
|
"valid_targets_min": 347
|
|
},
|
|
{
|
|
"epoch": 3.745819397993311,
|
|
"grad_norm": 0.3521895315128848,
|
|
"learning_rate": 2.10586562303533e-05,
|
|
"loss": 0.2229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11954619735479355,
|
|
"step": 2240,
|
|
"valid_targets_mean": 12761.8,
|
|
"valid_targets_min": 2344
|
|
},
|
|
{
|
|
"epoch": 3.754180602006689,
|
|
"grad_norm": 0.37323358200914486,
|
|
"learning_rate": 2.0975366450695707e-05,
|
|
"loss": 0.2238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12421952188014984,
|
|
"step": 2245,
|
|
"valid_targets_mean": 9843.6,
|
|
"valid_targets_min": 4282
|
|
},
|
|
{
|
|
"epoch": 3.762541806020067,
|
|
"grad_norm": 0.39084303441206214,
|
|
"learning_rate": 2.0892059711434496e-05,
|
|
"loss": 0.2178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09830933809280396,
|
|
"step": 2250,
|
|
"valid_targets_mean": 8728.6,
|
|
"valid_targets_min": 1446
|
|
},
|
|
{
|
|
"epoch": 3.770903010033445,
|
|
"grad_norm": 0.3109824705354332,
|
|
"learning_rate": 2.0808737461101417e-05,
|
|
"loss": 0.2069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12205028533935547,
|
|
"step": 2255,
|
|
"valid_targets_mean": 14888.6,
|
|
"valid_targets_min": 1883
|
|
},
|
|
{
|
|
"epoch": 3.779264214046823,
|
|
"grad_norm": 0.4911592710912328,
|
|
"learning_rate": 2.0725401148497946e-05,
|
|
"loss": 0.2254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10874302685260773,
|
|
"step": 2260,
|
|
"valid_targets_mean": 9849.2,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 3.787625418060201,
|
|
"grad_norm": 0.3549129707051121,
|
|
"learning_rate": 2.0642052222670043e-05,
|
|
"loss": 0.2479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13289181888103485,
|
|
"step": 2265,
|
|
"valid_targets_mean": 12516.6,
|
|
"valid_targets_min": 5493
|
|
},
|
|
{
|
|
"epoch": 3.7959866220735785,
|
|
"grad_norm": 0.3693470117676211,
|
|
"learning_rate": 2.0558692132883008e-05,
|
|
"loss": 0.2321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10257293283939362,
|
|
"step": 2270,
|
|
"valid_targets_mean": 11396.5,
|
|
"valid_targets_min": 480
|
|
},
|
|
{
|
|
"epoch": 3.8043478260869565,
|
|
"grad_norm": 0.42337908498291044,
|
|
"learning_rate": 2.047532232859625e-05,
|
|
"loss": 0.2351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1192982867360115,
|
|
"step": 2275,
|
|
"valid_targets_mean": 11505.1,
|
|
"valid_targets_min": 2455
|
|
},
|
|
{
|
|
"epoch": 3.8127090301003346,
|
|
"grad_norm": 0.3918216657189213,
|
|
"learning_rate": 2.039194425943808e-05,
|
|
"loss": 0.242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09815693646669388,
|
|
"step": 2280,
|
|
"valid_targets_mean": 10444.1,
|
|
"valid_targets_min": 3440
|
|
},
|
|
{
|
|
"epoch": 3.821070234113712,
|
|
"grad_norm": 0.4077997545130558,
|
|
"learning_rate": 2.0308559375180557e-05,
|
|
"loss": 0.2364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17672425508499146,
|
|
"step": 2285,
|
|
"valid_targets_mean": 11177.4,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 3.82943143812709,
|
|
"grad_norm": 0.42038513502203473,
|
|
"learning_rate": 2.0225169125714193e-05,
|
|
"loss": 0.2161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09381366521120071,
|
|
"step": 2290,
|
|
"valid_targets_mean": 5689.2,
|
|
"valid_targets_min": 421
|
|
},
|
|
{
|
|
"epoch": 3.8377926421404682,
|
|
"grad_norm": 0.6809312075152667,
|
|
"learning_rate": 2.0141774961022826e-05,
|
|
"loss": 0.2376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13569194078445435,
|
|
"step": 2295,
|
|
"valid_targets_mean": 10960.5,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 3.8461538461538463,
|
|
"grad_norm": 0.36188969740454247,
|
|
"learning_rate": 2.0058378331158357e-05,
|
|
"loss": 0.2214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08458662033081055,
|
|
"step": 2300,
|
|
"valid_targets_mean": 10572.0,
|
|
"valid_targets_min": 639
|
|
},
|
|
{
|
|
"epoch": 3.8545150501672243,
|
|
"grad_norm": 0.5182696580804744,
|
|
"learning_rate": 1.9974980686215546e-05,
|
|
"loss": 0.2508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14742368459701538,
|
|
"step": 2305,
|
|
"valid_targets_mean": 9108.1,
|
|
"valid_targets_min": 1673
|
|
},
|
|
{
|
|
"epoch": 3.862876254180602,
|
|
"grad_norm": 0.3510747944940706,
|
|
"learning_rate": 1.9891583476306814e-05,
|
|
"loss": 0.2364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1218988448381424,
|
|
"step": 2310,
|
|
"valid_targets_mean": 14554.2,
|
|
"valid_targets_min": 8613
|
|
},
|
|
{
|
|
"epoch": 3.87123745819398,
|
|
"grad_norm": 0.38382663591346805,
|
|
"learning_rate": 1.9808188151537008e-05,
|
|
"loss": 0.2368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1352759301662445,
|
|
"step": 2315,
|
|
"valid_targets_mean": 11531.4,
|
|
"valid_targets_min": 2934
|
|
},
|
|
{
|
|
"epoch": 3.879598662207358,
|
|
"grad_norm": 0.3998533392496732,
|
|
"learning_rate": 1.972479616197821e-05,
|
|
"loss": 0.2268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12313543260097504,
|
|
"step": 2320,
|
|
"valid_targets_mean": 15043.9,
|
|
"valid_targets_min": 4303
|
|
},
|
|
{
|
|
"epoch": 3.8879598662207355,
|
|
"grad_norm": 0.3636946394954891,
|
|
"learning_rate": 1.96414089576445e-05,
|
|
"loss": 0.2136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10160799324512482,
|
|
"step": 2325,
|
|
"valid_targets_mean": 8692.1,
|
|
"valid_targets_min": 1271
|
|
},
|
|
{
|
|
"epoch": 3.8963210702341136,
|
|
"grad_norm": 0.37189088456482733,
|
|
"learning_rate": 1.9558027988466743e-05,
|
|
"loss": 0.215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12251149863004684,
|
|
"step": 2330,
|
|
"valid_targets_mean": 10337.8,
|
|
"valid_targets_min": 3502
|
|
},
|
|
{
|
|
"epoch": 3.9046822742474916,
|
|
"grad_norm": 0.4222272689759962,
|
|
"learning_rate": 1.947465470426741e-05,
|
|
"loss": 0.2336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12189013510942459,
|
|
"step": 2335,
|
|
"valid_targets_mean": 12251.6,
|
|
"valid_targets_min": 3398
|
|
},
|
|
{
|
|
"epoch": 3.9130434782608696,
|
|
"grad_norm": 0.39536017434086906,
|
|
"learning_rate": 1.9391290554735326e-05,
|
|
"loss": 0.2376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14112915098667145,
|
|
"step": 2340,
|
|
"valid_targets_mean": 12184.6,
|
|
"valid_targets_min": 2101
|
|
},
|
|
{
|
|
"epoch": 3.9214046822742477,
|
|
"grad_norm": 0.4195380495721719,
|
|
"learning_rate": 1.93079369894005e-05,
|
|
"loss": 0.2287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1210208386182785,
|
|
"step": 2345,
|
|
"valid_targets_mean": 11404.5,
|
|
"valid_targets_min": 414
|
|
},
|
|
{
|
|
"epoch": 3.9297658862876252,
|
|
"grad_norm": 0.37083692537662444,
|
|
"learning_rate": 1.922459545760889e-05,
|
|
"loss": 0.2366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11951281130313873,
|
|
"step": 2350,
|
|
"valid_targets_mean": 12123.1,
|
|
"valid_targets_min": 3574
|
|
},
|
|
{
|
|
"epoch": 3.9381270903010033,
|
|
"grad_norm": 0.4208518984541492,
|
|
"learning_rate": 1.914126740849723e-05,
|
|
"loss": 0.2299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08365805447101593,
|
|
"step": 2355,
|
|
"valid_targets_mean": 6481.6,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 3.9464882943143813,
|
|
"grad_norm": 0.3816300730839306,
|
|
"learning_rate": 1.9057954290967795e-05,
|
|
"loss": 0.2163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1420331597328186,
|
|
"step": 2360,
|
|
"valid_targets_mean": 12067.6,
|
|
"valid_targets_min": 2875
|
|
},
|
|
{
|
|
"epoch": 3.9548494983277593,
|
|
"grad_norm": 0.4187638640055337,
|
|
"learning_rate": 1.897465755366325e-05,
|
|
"loss": 0.2577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11118359863758087,
|
|
"step": 2365,
|
|
"valid_targets_mean": 8544.8,
|
|
"valid_targets_min": 1700
|
|
},
|
|
{
|
|
"epoch": 3.9632107023411374,
|
|
"grad_norm": 0.49041466421876145,
|
|
"learning_rate": 1.8891378644941437e-05,
|
|
"loss": 0.2359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14806511998176575,
|
|
"step": 2370,
|
|
"valid_targets_mean": 8757.1,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 3.971571906354515,
|
|
"grad_norm": 0.3379819194791503,
|
|
"learning_rate": 1.88081190128502e-05,
|
|
"loss": 0.2374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08078338950872421,
|
|
"step": 2375,
|
|
"valid_targets_mean": 11669.6,
|
|
"valid_targets_min": 863
|
|
},
|
|
{
|
|
"epoch": 3.979933110367893,
|
|
"grad_norm": 0.4225704511258691,
|
|
"learning_rate": 1.8724880105102196e-05,
|
|
"loss": 0.2211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14075110852718353,
|
|
"step": 2380,
|
|
"valid_targets_mean": 12421.8,
|
|
"valid_targets_min": 3643
|
|
},
|
|
{
|
|
"epoch": 3.988294314381271,
|
|
"grad_norm": 0.4244540909242069,
|
|
"learning_rate": 1.8641663369049724e-05,
|
|
"loss": 0.2223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1246207058429718,
|
|
"step": 2385,
|
|
"valid_targets_mean": 8270.1,
|
|
"valid_targets_min": 2082
|
|
},
|
|
{
|
|
"epoch": 3.9966555183946486,
|
|
"grad_norm": 0.4038418011744287,
|
|
"learning_rate": 1.8558470251659574e-05,
|
|
"loss": 0.2485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17353568971157074,
|
|
"step": 2390,
|
|
"valid_targets_mean": 11545.6,
|
|
"valid_targets_min": 1043
|
|
},
|
|
{
|
|
"epoch": 4.005016722408027,
|
|
"grad_norm": 0.46824913379799576,
|
|
"learning_rate": 1.8475302199487848e-05,
|
|
"loss": 0.2063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07456627488136292,
|
|
"step": 2395,
|
|
"valid_targets_mean": 6363.2,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 4.013377926421405,
|
|
"grad_norm": 0.4150456790191164,
|
|
"learning_rate": 1.8392160658654826e-05,
|
|
"loss": 0.2329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12160071730613708,
|
|
"step": 2400,
|
|
"valid_targets_mean": 11345.1,
|
|
"valid_targets_min": 3014
|
|
},
|
|
{
|
|
"epoch": 4.021739130434782,
|
|
"grad_norm": 0.4279456004646581,
|
|
"learning_rate": 1.8309047074819805e-05,
|
|
"loss": 0.2178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1325855255126953,
|
|
"step": 2405,
|
|
"valid_targets_mean": 12146.4,
|
|
"valid_targets_min": 7888
|
|
},
|
|
{
|
|
"epoch": 4.030100334448161,
|
|
"grad_norm": 0.3693815921962192,
|
|
"learning_rate": 1.822596289315596e-05,
|
|
"loss": 0.2221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12098702788352966,
|
|
"step": 2410,
|
|
"valid_targets_mean": 11123.5,
|
|
"valid_targets_min": 3312
|
|
},
|
|
{
|
|
"epoch": 4.038461538461538,
|
|
"grad_norm": 0.4056165867153895,
|
|
"learning_rate": 1.814290955832523e-05,
|
|
"loss": 0.2219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09238055348396301,
|
|
"step": 2415,
|
|
"valid_targets_mean": 9455.0,
|
|
"valid_targets_min": 2495
|
|
},
|
|
{
|
|
"epoch": 4.046822742474917,
|
|
"grad_norm": 0.46291086423129296,
|
|
"learning_rate": 1.8059888514453196e-05,
|
|
"loss": 0.2129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13354870676994324,
|
|
"step": 2420,
|
|
"valid_targets_mean": 8782.2,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 4.055183946488294,
|
|
"grad_norm": 0.42217945488148834,
|
|
"learning_rate": 1.7976901205103953e-05,
|
|
"loss": 0.2181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10084179043769836,
|
|
"step": 2425,
|
|
"valid_targets_mean": 9552.9,
|
|
"valid_targets_min": 3440
|
|
},
|
|
{
|
|
"epoch": 4.063545150501672,
|
|
"grad_norm": 0.3903608731255544,
|
|
"learning_rate": 1.789394907325504e-05,
|
|
"loss": 0.2271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10546227544546127,
|
|
"step": 2430,
|
|
"valid_targets_mean": 10039.8,
|
|
"valid_targets_min": 1482
|
|
},
|
|
{
|
|
"epoch": 4.0719063545150505,
|
|
"grad_norm": 0.4357222176489469,
|
|
"learning_rate": 1.7811033561272328e-05,
|
|
"loss": 0.2141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1062517985701561,
|
|
"step": 2435,
|
|
"valid_targets_mean": 9030.1,
|
|
"valid_targets_min": 1132
|
|
},
|
|
{
|
|
"epoch": 4.080267558528428,
|
|
"grad_norm": 0.40913762279040217,
|
|
"learning_rate": 1.7728156110884924e-05,
|
|
"loss": 0.2198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11293068528175354,
|
|
"step": 2440,
|
|
"valid_targets_mean": 8946.8,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 4.088628762541806,
|
|
"grad_norm": 0.38410089920490753,
|
|
"learning_rate": 1.7645318163160146e-05,
|
|
"loss": 0.1983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12508314847946167,
|
|
"step": 2445,
|
|
"valid_targets_mean": 11340.2,
|
|
"valid_targets_min": 4211
|
|
},
|
|
{
|
|
"epoch": 4.096989966555184,
|
|
"grad_norm": 0.4317845255065196,
|
|
"learning_rate": 1.7562521158478432e-05,
|
|
"loss": 0.217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07759210467338562,
|
|
"step": 2450,
|
|
"valid_targets_mean": 6232.1,
|
|
"valid_targets_min": 405
|
|
},
|
|
{
|
|
"epoch": 4.105351170568562,
|
|
"grad_norm": 0.36292107161136766,
|
|
"learning_rate": 1.7479766536508313e-05,
|
|
"loss": 0.217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07792367041110992,
|
|
"step": 2455,
|
|
"valid_targets_mean": 9648.2,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 4.11371237458194,
|
|
"grad_norm": 0.3522228612548513,
|
|
"learning_rate": 1.7397055736181366e-05,
|
|
"loss": 0.218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11576631665229797,
|
|
"step": 2460,
|
|
"valid_targets_mean": 13632.0,
|
|
"valid_targets_min": 7079
|
|
},
|
|
{
|
|
"epoch": 4.122073578595318,
|
|
"grad_norm": 0.4938967691262431,
|
|
"learning_rate": 1.7314390195667193e-05,
|
|
"loss": 0.2105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12811027467250824,
|
|
"step": 2465,
|
|
"valid_targets_mean": 9009.1,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 4.130434782608695,
|
|
"grad_norm": 0.5938212727843428,
|
|
"learning_rate": 1.723177135234844e-05,
|
|
"loss": 0.2201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09701566398143768,
|
|
"step": 2470,
|
|
"valid_targets_mean": 10842.0,
|
|
"valid_targets_min": 2864
|
|
},
|
|
{
|
|
"epoch": 4.138795986622074,
|
|
"grad_norm": 0.35551266651322705,
|
|
"learning_rate": 1.7149200642795765e-05,
|
|
"loss": 0.2049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06892241537570953,
|
|
"step": 2475,
|
|
"valid_targets_mean": 9766.2,
|
|
"valid_targets_min": 376
|
|
},
|
|
{
|
|
"epoch": 4.147157190635451,
|
|
"grad_norm": 0.35007065590445896,
|
|
"learning_rate": 1.70666795027429e-05,
|
|
"loss": 0.2111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11715811491012573,
|
|
"step": 2480,
|
|
"valid_targets_mean": 15961.1,
|
|
"valid_targets_min": 7661
|
|
},
|
|
{
|
|
"epoch": 4.15551839464883,
|
|
"grad_norm": 0.367523484706207,
|
|
"learning_rate": 1.6984209367061657e-05,
|
|
"loss": 0.2174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09838040173053741,
|
|
"step": 2485,
|
|
"valid_targets_mean": 11699.6,
|
|
"valid_targets_min": 5686
|
|
},
|
|
{
|
|
"epoch": 4.1638795986622075,
|
|
"grad_norm": 0.5249487696712716,
|
|
"learning_rate": 1.6901791669736974e-05,
|
|
"loss": 0.2245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1311858892440796,
|
|
"step": 2490,
|
|
"valid_targets_mean": 9406.2,
|
|
"valid_targets_min": 1375
|
|
},
|
|
{
|
|
"epoch": 4.172240802675585,
|
|
"grad_norm": 0.3869965500106061,
|
|
"learning_rate": 1.6819427843842016e-05,
|
|
"loss": 0.2239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08758570998907089,
|
|
"step": 2495,
|
|
"valid_targets_mean": 10491.4,
|
|
"valid_targets_min": 1927
|
|
},
|
|
{
|
|
"epoch": 4.1806020066889635,
|
|
"grad_norm": 0.44949350570234503,
|
|
"learning_rate": 1.6737119321513224e-05,
|
|
"loss": 0.2098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08844759315252304,
|
|
"step": 2500,
|
|
"valid_targets_mean": 7023.6,
|
|
"valid_targets_min": 433
|
|
},
|
|
{
|
|
"epoch": 4.188963210702341,
|
|
"grad_norm": 0.40724288378564727,
|
|
"learning_rate": 1.6654867533925418e-05,
|
|
"loss": 0.2057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09080510586500168,
|
|
"step": 2505,
|
|
"valid_targets_mean": 8109.6,
|
|
"valid_targets_min": 1973
|
|
},
|
|
{
|
|
"epoch": 4.197324414715719,
|
|
"grad_norm": 0.43653325035874313,
|
|
"learning_rate": 1.6572673911266943e-05,
|
|
"loss": 0.2236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1195165365934372,
|
|
"step": 2510,
|
|
"valid_targets_mean": 10019.6,
|
|
"valid_targets_min": 848
|
|
},
|
|
{
|
|
"epoch": 4.205685618729097,
|
|
"grad_norm": 0.36353816633695396,
|
|
"learning_rate": 1.6490539882714756e-05,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10527320206165314,
|
|
"step": 2515,
|
|
"valid_targets_mean": 9215.5,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 4.214046822742475,
|
|
"grad_norm": 0.45167755812275684,
|
|
"learning_rate": 1.6408466876409596e-05,
|
|
"loss": 0.2223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07090730965137482,
|
|
"step": 2520,
|
|
"valid_targets_mean": 5699.4,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 4.222408026755853,
|
|
"grad_norm": 0.4237405099986818,
|
|
"learning_rate": 1.6326456319431154e-05,
|
|
"loss": 0.2267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13936494290828705,
|
|
"step": 2525,
|
|
"valid_targets_mean": 9788.9,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 4.230769230769231,
|
|
"grad_norm": 0.4396349391858134,
|
|
"learning_rate": 1.6244509637773256e-05,
|
|
"loss": 0.224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14318327605724335,
|
|
"step": 2530,
|
|
"valid_targets_mean": 7565.6,
|
|
"valid_targets_min": 2847
|
|
},
|
|
{
|
|
"epoch": 4.239130434782608,
|
|
"grad_norm": 0.43397699516749344,
|
|
"learning_rate": 1.6162628256319078e-05,
|
|
"loss": 0.2018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10817872732877731,
|
|
"step": 2535,
|
|
"valid_targets_mean": 10702.4,
|
|
"valid_targets_min": 3550
|
|
},
|
|
{
|
|
"epoch": 4.247491638795987,
|
|
"grad_norm": 0.4951553398036785,
|
|
"learning_rate": 1.6080813598816355e-05,
|
|
"loss": 0.2161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1070375144481659,
|
|
"step": 2540,
|
|
"valid_targets_mean": 9614.1,
|
|
"valid_targets_min": 1402
|
|
},
|
|
{
|
|
"epoch": 4.2558528428093645,
|
|
"grad_norm": 0.47826696293676246,
|
|
"learning_rate": 1.599906708785262e-05,
|
|
"loss": 0.2279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09833066165447235,
|
|
"step": 2545,
|
|
"valid_targets_mean": 9176.2,
|
|
"valid_targets_min": 286
|
|
},
|
|
{
|
|
"epoch": 4.264214046822742,
|
|
"grad_norm": 0.40536561816546335,
|
|
"learning_rate": 1.5917390144830488e-05,
|
|
"loss": 0.2104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11016871780157089,
|
|
"step": 2550,
|
|
"valid_targets_mean": 11123.4,
|
|
"valid_targets_min": 2502
|
|
},
|
|
{
|
|
"epoch": 4.2725752508361206,
|
|
"grad_norm": 0.33018755647852094,
|
|
"learning_rate": 1.583578418994294e-05,
|
|
"loss": 0.2016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11970333009958267,
|
|
"step": 2555,
|
|
"valid_targets_mean": 12023.8,
|
|
"valid_targets_min": 493
|
|
},
|
|
{
|
|
"epoch": 4.280936454849498,
|
|
"grad_norm": 0.41580437416977656,
|
|
"learning_rate": 1.5754250642148592e-05,
|
|
"loss": 0.2128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13583989441394806,
|
|
"step": 2560,
|
|
"valid_targets_mean": 11565.8,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 4.289297658862877,
|
|
"grad_norm": 0.5253167841956038,
|
|
"learning_rate": 1.5672790919147096e-05,
|
|
"loss": 0.2208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11574394255876541,
|
|
"step": 2565,
|
|
"valid_targets_mean": 9219.8,
|
|
"valid_targets_min": 778
|
|
},
|
|
{
|
|
"epoch": 4.297658862876254,
|
|
"grad_norm": 0.4440052032922653,
|
|
"learning_rate": 1.5591406437354394e-05,
|
|
"loss": 0.2058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11206547915935516,
|
|
"step": 2570,
|
|
"valid_targets_mean": 7280.2,
|
|
"valid_targets_min": 292
|
|
},
|
|
{
|
|
"epoch": 4.306020066889632,
|
|
"grad_norm": 0.37441401744831954,
|
|
"learning_rate": 1.5510098611878177e-05,
|
|
"loss": 0.2139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08261077851057053,
|
|
"step": 2575,
|
|
"valid_targets_mean": 11446.6,
|
|
"valid_targets_min": 3425
|
|
},
|
|
{
|
|
"epoch": 4.31438127090301,
|
|
"grad_norm": 0.37486026752509155,
|
|
"learning_rate": 1.542886885649322e-05,
|
|
"loss": 0.237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12461366504430771,
|
|
"step": 2580,
|
|
"valid_targets_mean": 12098.5,
|
|
"valid_targets_min": 748
|
|
},
|
|
{
|
|
"epoch": 4.322742474916388,
|
|
"grad_norm": 0.46020865948847056,
|
|
"learning_rate": 1.534771858361683e-05,
|
|
"loss": 0.2199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09141238033771515,
|
|
"step": 2585,
|
|
"valid_targets_mean": 8432.6,
|
|
"valid_targets_min": 449
|
|
},
|
|
{
|
|
"epoch": 4.331103678929766,
|
|
"grad_norm": 0.43835376047502206,
|
|
"learning_rate": 1.5266649204284273e-05,
|
|
"loss": 0.2242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10374638438224792,
|
|
"step": 2590,
|
|
"valid_targets_mean": 7781.0,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 4.339464882943144,
|
|
"grad_norm": 0.4542948561183914,
|
|
"learning_rate": 1.5185662128124254e-05,
|
|
"loss": 0.22,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1059148758649826,
|
|
"step": 2595,
|
|
"valid_targets_mean": 8940.4,
|
|
"valid_targets_min": 1599
|
|
},
|
|
{
|
|
"epoch": 4.3478260869565215,
|
|
"grad_norm": 0.4857232369283785,
|
|
"learning_rate": 1.510475876333438e-05,
|
|
"loss": 0.2257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10642239451408386,
|
|
"step": 2600,
|
|
"valid_targets_mean": 7598.8,
|
|
"valid_targets_min": 3325
|
|
},
|
|
{
|
|
"epoch": 4.3561872909699,
|
|
"grad_norm": 0.41481024061362015,
|
|
"learning_rate": 1.5023940516656697e-05,
|
|
"loss": 0.2256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11508562415838242,
|
|
"step": 2605,
|
|
"valid_targets_mean": 10293.4,
|
|
"valid_targets_min": 423
|
|
},
|
|
{
|
|
"epoch": 4.364548494983278,
|
|
"grad_norm": 0.40971628641492464,
|
|
"learning_rate": 1.4943208793353235e-05,
|
|
"loss": 0.2221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10408522188663483,
|
|
"step": 2610,
|
|
"valid_targets_mean": 9142.2,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 4.372909698996655,
|
|
"grad_norm": 0.5079398483911838,
|
|
"learning_rate": 1.4862564997181528e-05,
|
|
"loss": 0.2058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11725317686796188,
|
|
"step": 2615,
|
|
"valid_targets_mean": 9619.6,
|
|
"valid_targets_min": 2937
|
|
},
|
|
{
|
|
"epoch": 4.381270903010034,
|
|
"grad_norm": 0.8114385340973936,
|
|
"learning_rate": 1.4782010530370294e-05,
|
|
"loss": 0.2104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11597201228141785,
|
|
"step": 2620,
|
|
"valid_targets_mean": 10266.6,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 4.389632107023411,
|
|
"grad_norm": 0.40041334629813247,
|
|
"learning_rate": 1.470154679359495e-05,
|
|
"loss": 0.2046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1117502748966217,
|
|
"step": 2625,
|
|
"valid_targets_mean": 9488.9,
|
|
"valid_targets_min": 3282
|
|
},
|
|
{
|
|
"epoch": 4.39799331103679,
|
|
"grad_norm": 0.4099420877461767,
|
|
"learning_rate": 1.4621175185953322e-05,
|
|
"loss": 0.2233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13564707338809967,
|
|
"step": 2630,
|
|
"valid_targets_mean": 11001.0,
|
|
"valid_targets_min": 1455
|
|
},
|
|
{
|
|
"epoch": 4.406354515050167,
|
|
"grad_norm": 0.43227772914817614,
|
|
"learning_rate": 1.4540897104941307e-05,
|
|
"loss": 0.2142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08555178344249725,
|
|
"step": 2635,
|
|
"valid_targets_mean": 6730.1,
|
|
"valid_targets_min": 331
|
|
},
|
|
{
|
|
"epoch": 4.414715719063545,
|
|
"grad_norm": 0.6602650186071302,
|
|
"learning_rate": 1.4460713946428553e-05,
|
|
"loss": 0.2082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11905218660831451,
|
|
"step": 2640,
|
|
"valid_targets_mean": 11081.8,
|
|
"valid_targets_min": 2045
|
|
},
|
|
{
|
|
"epoch": 4.423076923076923,
|
|
"grad_norm": 0.4416837724521798,
|
|
"learning_rate": 1.4380627104634224e-05,
|
|
"loss": 0.2152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14645928144454956,
|
|
"step": 2645,
|
|
"valid_targets_mean": 9335.5,
|
|
"valid_targets_min": 1884
|
|
},
|
|
{
|
|
"epoch": 4.431438127090301,
|
|
"grad_norm": 0.3755062515947819,
|
|
"learning_rate": 1.4300637972102721e-05,
|
|
"loss": 0.2226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09524558484554291,
|
|
"step": 2650,
|
|
"valid_targets_mean": 10721.2,
|
|
"valid_targets_min": 384
|
|
},
|
|
{
|
|
"epoch": 4.4397993311036785,
|
|
"grad_norm": 0.38876651147916574,
|
|
"learning_rate": 1.4220747939679478e-05,
|
|
"loss": 0.2034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08305356651544571,
|
|
"step": 2655,
|
|
"valid_targets_mean": 13021.1,
|
|
"valid_targets_min": 344
|
|
},
|
|
{
|
|
"epoch": 4.448160535117057,
|
|
"grad_norm": 0.37895809475539416,
|
|
"learning_rate": 1.414095839648679e-05,
|
|
"loss": 0.226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11551002413034439,
|
|
"step": 2660,
|
|
"valid_targets_mean": 11642.8,
|
|
"valid_targets_min": 321
|
|
},
|
|
{
|
|
"epoch": 4.456521739130435,
|
|
"grad_norm": 0.38738906892170377,
|
|
"learning_rate": 1.4061270729899663e-05,
|
|
"loss": 0.2162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1263466477394104,
|
|
"step": 2665,
|
|
"valid_targets_mean": 12396.5,
|
|
"valid_targets_min": 3048
|
|
},
|
|
{
|
|
"epoch": 4.464882943143813,
|
|
"grad_norm": 0.38771379796893235,
|
|
"learning_rate": 1.3981686325521647e-05,
|
|
"loss": 0.2105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0854523777961731,
|
|
"step": 2670,
|
|
"valid_targets_mean": 10752.1,
|
|
"valid_targets_min": 357
|
|
},
|
|
{
|
|
"epoch": 4.473244147157191,
|
|
"grad_norm": 0.352456061615308,
|
|
"learning_rate": 1.3902206567160827e-05,
|
|
"loss": 0.2187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12601426243782043,
|
|
"step": 2675,
|
|
"valid_targets_mean": 13595.5,
|
|
"valid_targets_min": 5791
|
|
},
|
|
{
|
|
"epoch": 4.481605351170568,
|
|
"grad_norm": 0.367782906168656,
|
|
"learning_rate": 1.3822832836805667e-05,
|
|
"loss": 0.221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1091400682926178,
|
|
"step": 2680,
|
|
"valid_targets_mean": 10662.8,
|
|
"valid_targets_min": 1271
|
|
},
|
|
{
|
|
"epoch": 4.489966555183947,
|
|
"grad_norm": 0.35505012032694894,
|
|
"learning_rate": 1.3743566514601037e-05,
|
|
"loss": 0.2068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09951368719339371,
|
|
"step": 2685,
|
|
"valid_targets_mean": 12259.1,
|
|
"valid_targets_min": 3062
|
|
},
|
|
{
|
|
"epoch": 4.498327759197324,
|
|
"grad_norm": 0.4001651153034477,
|
|
"learning_rate": 1.3664408978824209e-05,
|
|
"loss": 0.2123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10657516121864319,
|
|
"step": 2690,
|
|
"valid_targets_mean": 9886.9,
|
|
"valid_targets_min": 824
|
|
},
|
|
{
|
|
"epoch": 4.506688963210703,
|
|
"grad_norm": 0.43378531949527016,
|
|
"learning_rate": 1.3585361605860863e-05,
|
|
"loss": 0.2177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10175126045942307,
|
|
"step": 2695,
|
|
"valid_targets_mean": 7144.4,
|
|
"valid_targets_min": 995
|
|
},
|
|
{
|
|
"epoch": 4.51505016722408,
|
|
"grad_norm": 0.379858730398327,
|
|
"learning_rate": 1.3506425770181211e-05,
|
|
"loss": 0.209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08921210467815399,
|
|
"step": 2700,
|
|
"valid_targets_mean": 8162.4,
|
|
"valid_targets_min": 3091
|
|
},
|
|
{
|
|
"epoch": 4.523411371237458,
|
|
"grad_norm": 0.3803913557407151,
|
|
"learning_rate": 1.342760284431603e-05,
|
|
"loss": 0.1983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08413312584161758,
|
|
"step": 2705,
|
|
"valid_targets_mean": 9668.8,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 4.531772575250836,
|
|
"grad_norm": 0.3695571749268172,
|
|
"learning_rate": 1.3348894198832845e-05,
|
|
"loss": 0.2297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11284390836954117,
|
|
"step": 2710,
|
|
"valid_targets_mean": 11042.2,
|
|
"valid_targets_min": 1950
|
|
},
|
|
{
|
|
"epoch": 4.540133779264214,
|
|
"grad_norm": 0.3753385361171216,
|
|
"learning_rate": 1.3270301202312075e-05,
|
|
"loss": 0.2032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08523640036582947,
|
|
"step": 2715,
|
|
"valid_targets_mean": 9812.6,
|
|
"valid_targets_min": 2318
|
|
},
|
|
{
|
|
"epoch": 4.548494983277592,
|
|
"grad_norm": 0.4504396222873335,
|
|
"learning_rate": 1.3191825221323246e-05,
|
|
"loss": 0.2189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10473215579986572,
|
|
"step": 2720,
|
|
"valid_targets_mean": 8950.9,
|
|
"valid_targets_min": 1856
|
|
},
|
|
{
|
|
"epoch": 4.55685618729097,
|
|
"grad_norm": 0.41843228978631886,
|
|
"learning_rate": 1.311346762040123e-05,
|
|
"loss": 0.2239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13844969868659973,
|
|
"step": 2725,
|
|
"valid_targets_mean": 8738.9,
|
|
"valid_targets_min": 300
|
|
},
|
|
{
|
|
"epoch": 4.565217391304348,
|
|
"grad_norm": 0.4221256238196481,
|
|
"learning_rate": 1.3035229762022513e-05,
|
|
"loss": 0.203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09268766641616821,
|
|
"step": 2730,
|
|
"valid_targets_mean": 10855.6,
|
|
"valid_targets_min": 275
|
|
},
|
|
{
|
|
"epoch": 4.573578595317725,
|
|
"grad_norm": 0.7028442173079945,
|
|
"learning_rate": 1.2957113006581494e-05,
|
|
"loss": 0.2294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07372640073299408,
|
|
"step": 2735,
|
|
"valid_targets_mean": 6022.2,
|
|
"valid_targets_min": 396
|
|
},
|
|
{
|
|
"epoch": 4.581939799331104,
|
|
"grad_norm": 0.5046638104907637,
|
|
"learning_rate": 1.2879118712366858e-05,
|
|
"loss": 0.2249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12358702719211578,
|
|
"step": 2740,
|
|
"valid_targets_mean": 9869.8,
|
|
"valid_targets_min": 1694
|
|
},
|
|
{
|
|
"epoch": 4.590301003344481,
|
|
"grad_norm": 0.3939518342157583,
|
|
"learning_rate": 1.280124823553794e-05,
|
|
"loss": 0.2106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10814936459064484,
|
|
"step": 2745,
|
|
"valid_targets_mean": 11132.0,
|
|
"valid_targets_min": 1132
|
|
},
|
|
{
|
|
"epoch": 4.59866220735786,
|
|
"grad_norm": 0.37377398692150315,
|
|
"learning_rate": 1.2723502930101126e-05,
|
|
"loss": 0.2157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1401630938053131,
|
|
"step": 2750,
|
|
"valid_targets_mean": 13976.9,
|
|
"valid_targets_min": 8462
|
|
},
|
|
{
|
|
"epoch": 4.607023411371237,
|
|
"grad_norm": 0.43555499665241,
|
|
"learning_rate": 1.2645884147886376e-05,
|
|
"loss": 0.2179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1386481672525406,
|
|
"step": 2755,
|
|
"valid_targets_mean": 11095.9,
|
|
"valid_targets_min": 7399
|
|
},
|
|
{
|
|
"epoch": 4.615384615384615,
|
|
"grad_norm": 0.4495095092727491,
|
|
"learning_rate": 1.2568393238523627e-05,
|
|
"loss": 0.2061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09528936445713043,
|
|
"step": 2760,
|
|
"valid_targets_mean": 8823.0,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 4.6237458193979935,
|
|
"grad_norm": 0.3551639381308307,
|
|
"learning_rate": 1.2491031549419396e-05,
|
|
"loss": 0.2048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09725973010063171,
|
|
"step": 2765,
|
|
"valid_targets_mean": 9556.8,
|
|
"valid_targets_min": 376
|
|
},
|
|
{
|
|
"epoch": 4.632107023411371,
|
|
"grad_norm": 0.43511334079976793,
|
|
"learning_rate": 1.2413800425733324e-05,
|
|
"loss": 0.2186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08855731040239334,
|
|
"step": 2770,
|
|
"valid_targets_mean": 9271.0,
|
|
"valid_targets_min": 256
|
|
},
|
|
{
|
|
"epoch": 4.6404682274247495,
|
|
"grad_norm": 0.39254666401128285,
|
|
"learning_rate": 1.2336701210354774e-05,
|
|
"loss": 0.2129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10847564041614532,
|
|
"step": 2775,
|
|
"valid_targets_mean": 10472.8,
|
|
"valid_targets_min": 1566
|
|
},
|
|
{
|
|
"epoch": 4.648829431438127,
|
|
"grad_norm": 0.5146895083625952,
|
|
"learning_rate": 1.2259735243879533e-05,
|
|
"loss": 0.2261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06758619099855423,
|
|
"step": 2780,
|
|
"valid_targets_mean": 6145.4,
|
|
"valid_targets_min": 2455
|
|
},
|
|
{
|
|
"epoch": 4.657190635451505,
|
|
"grad_norm": 0.41332020861373625,
|
|
"learning_rate": 1.2182903864586424e-05,
|
|
"loss": 0.2225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09794852882623672,
|
|
"step": 2785,
|
|
"valid_targets_mean": 9508.9,
|
|
"valid_targets_min": 1176
|
|
},
|
|
{
|
|
"epoch": 4.665551839464883,
|
|
"grad_norm": 0.42452556390112034,
|
|
"learning_rate": 1.2106208408414101e-05,
|
|
"loss": 0.2391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1038217693567276,
|
|
"step": 2790,
|
|
"valid_targets_mean": 7773.0,
|
|
"valid_targets_min": 341
|
|
},
|
|
{
|
|
"epoch": 4.673913043478261,
|
|
"grad_norm": 0.41863436572223295,
|
|
"learning_rate": 1.202965020893779e-05,
|
|
"loss": 0.2081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11352011561393738,
|
|
"step": 2795,
|
|
"valid_targets_mean": 8635.0,
|
|
"valid_targets_min": 2829
|
|
},
|
|
{
|
|
"epoch": 4.682274247491639,
|
|
"grad_norm": 0.44936547606108096,
|
|
"learning_rate": 1.1953230597346116e-05,
|
|
"loss": 0.2189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13090568780899048,
|
|
"step": 2800,
|
|
"valid_targets_mean": 9519.6,
|
|
"valid_targets_min": 1938
|
|
},
|
|
{
|
|
"epoch": 4.690635451505017,
|
|
"grad_norm": 0.3740091042232538,
|
|
"learning_rate": 1.1876950902417921e-05,
|
|
"loss": 0.2169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07533756643533707,
|
|
"step": 2805,
|
|
"valid_targets_mean": 8145.6,
|
|
"valid_targets_min": 305
|
|
},
|
|
{
|
|
"epoch": 4.698996655518394,
|
|
"grad_norm": 0.455329404095625,
|
|
"learning_rate": 1.1800812450499227e-05,
|
|
"loss": 0.2068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12073203921318054,
|
|
"step": 2810,
|
|
"valid_targets_mean": 10387.2,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 4.707357859531773,
|
|
"grad_norm": 0.4074038046185549,
|
|
"learning_rate": 1.1724816565480092e-05,
|
|
"loss": 0.2157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06991899013519287,
|
|
"step": 2815,
|
|
"valid_targets_mean": 7584.2,
|
|
"valid_targets_min": 398
|
|
},
|
|
{
|
|
"epoch": 4.7157190635451505,
|
|
"grad_norm": 0.3676270483354745,
|
|
"learning_rate": 1.1648964568771661e-05,
|
|
"loss": 0.22,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11562500894069672,
|
|
"step": 2820,
|
|
"valid_targets_mean": 12426.4,
|
|
"valid_targets_min": 3251
|
|
},
|
|
{
|
|
"epoch": 4.724080267558528,
|
|
"grad_norm": 0.41411583799766677,
|
|
"learning_rate": 1.157325777928314e-05,
|
|
"loss": 0.217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11710748076438904,
|
|
"step": 2825,
|
|
"valid_targets_mean": 9287.1,
|
|
"valid_targets_min": 1310
|
|
},
|
|
{
|
|
"epoch": 4.7324414715719065,
|
|
"grad_norm": 0.41363705840802145,
|
|
"learning_rate": 1.149769751339889e-05,
|
|
"loss": 0.2323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15478257834911346,
|
|
"step": 2830,
|
|
"valid_targets_mean": 10014.6,
|
|
"valid_targets_min": 1673
|
|
},
|
|
{
|
|
"epoch": 4.740802675585284,
|
|
"grad_norm": 0.49814875737848874,
|
|
"learning_rate": 1.142228508495553e-05,
|
|
"loss": 0.2285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18731409311294556,
|
|
"step": 2835,
|
|
"valid_targets_mean": 11445.0,
|
|
"valid_targets_min": 4863
|
|
},
|
|
{
|
|
"epoch": 4.749163879598662,
|
|
"grad_norm": 0.4573170219287229,
|
|
"learning_rate": 1.1347021805219092e-05,
|
|
"loss": 0.2221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15321433544158936,
|
|
"step": 2840,
|
|
"valid_targets_mean": 10263.4,
|
|
"valid_targets_min": 2682
|
|
},
|
|
{
|
|
"epoch": 4.75752508361204,
|
|
"grad_norm": 0.7445595381123542,
|
|
"learning_rate": 1.1271908982862214e-05,
|
|
"loss": 0.2231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1091545969247818,
|
|
"step": 2845,
|
|
"valid_targets_mean": 12502.4,
|
|
"valid_targets_min": 7808
|
|
},
|
|
{
|
|
"epoch": 4.765886287625418,
|
|
"grad_norm": 0.3944270256188886,
|
|
"learning_rate": 1.11969479239414e-05,
|
|
"loss": 0.2074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1146778091788292,
|
|
"step": 2850,
|
|
"valid_targets_mean": 10767.1,
|
|
"valid_targets_min": 954
|
|
},
|
|
{
|
|
"epoch": 4.774247491638796,
|
|
"grad_norm": 0.3859180592057163,
|
|
"learning_rate": 1.1122139931874303e-05,
|
|
"loss": 0.2056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0834469124674797,
|
|
"step": 2855,
|
|
"valid_targets_mean": 10520.2,
|
|
"valid_targets_min": 3204
|
|
},
|
|
{
|
|
"epoch": 4.782608695652174,
|
|
"grad_norm": 0.6481948770790057,
|
|
"learning_rate": 1.104748630741705e-05,
|
|
"loss": 0.2077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08295457065105438,
|
|
"step": 2860,
|
|
"valid_targets_mean": 9309.2,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 4.790969899665551,
|
|
"grad_norm": 0.41308055037602565,
|
|
"learning_rate": 1.0972988348641643e-05,
|
|
"loss": 0.2198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12424132227897644,
|
|
"step": 2865,
|
|
"valid_targets_mean": 11155.8,
|
|
"valid_targets_min": 899
|
|
},
|
|
{
|
|
"epoch": 4.79933110367893,
|
|
"grad_norm": 0.3862013106126084,
|
|
"learning_rate": 1.0898647350913376e-05,
|
|
"loss": 0.2184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11552851647138596,
|
|
"step": 2870,
|
|
"valid_targets_mean": 9404.1,
|
|
"valid_targets_min": 344
|
|
},
|
|
{
|
|
"epoch": 4.8076923076923075,
|
|
"grad_norm": 0.47870786592466785,
|
|
"learning_rate": 1.0824464606868323e-05,
|
|
"loss": 0.2261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09273598343133926,
|
|
"step": 2875,
|
|
"valid_targets_mean": 6551.0,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 4.816053511705686,
|
|
"grad_norm": 0.43698612831830147,
|
|
"learning_rate": 1.0750441406390841e-05,
|
|
"loss": 0.2098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10714380443096161,
|
|
"step": 2880,
|
|
"valid_targets_mean": 8101.9,
|
|
"valid_targets_min": 1243
|
|
},
|
|
{
|
|
"epoch": 4.8244147157190636,
|
|
"grad_norm": 0.3681404389352903,
|
|
"learning_rate": 1.0676579036591167e-05,
|
|
"loss": 0.2087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07010447978973389,
|
|
"step": 2885,
|
|
"valid_targets_mean": 8584.1,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 4.832775919732441,
|
|
"grad_norm": 0.3938221099925066,
|
|
"learning_rate": 1.0602878781783019e-05,
|
|
"loss": 0.2103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.083269864320755,
|
|
"step": 2890,
|
|
"valid_targets_mean": 8040.6,
|
|
"valid_targets_min": 297
|
|
},
|
|
{
|
|
"epoch": 4.84113712374582,
|
|
"grad_norm": 0.42081064410533703,
|
|
"learning_rate": 1.0529341923461272e-05,
|
|
"loss": 0.2258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11785873025655746,
|
|
"step": 2895,
|
|
"valid_targets_mean": 8842.8,
|
|
"valid_targets_min": 1722
|
|
},
|
|
{
|
|
"epoch": 4.849498327759197,
|
|
"grad_norm": 0.3531495092399403,
|
|
"learning_rate": 1.0455969740279675e-05,
|
|
"loss": 0.2069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10647302865982056,
|
|
"step": 2900,
|
|
"valid_targets_mean": 12969.2,
|
|
"valid_targets_min": 2633
|
|
},
|
|
{
|
|
"epoch": 4.857859531772576,
|
|
"grad_norm": 0.3666297453679126,
|
|
"learning_rate": 1.0382763508028615e-05,
|
|
"loss": 0.2126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09191754460334778,
|
|
"step": 2905,
|
|
"valid_targets_mean": 11285.9,
|
|
"valid_targets_min": 293
|
|
},
|
|
{
|
|
"epoch": 4.866220735785953,
|
|
"grad_norm": 0.4673375163712736,
|
|
"learning_rate": 1.0309724499612939e-05,
|
|
"loss": 0.2233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12670591473579407,
|
|
"step": 2910,
|
|
"valid_targets_mean": 7762.9,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 4.874581939799331,
|
|
"grad_norm": 0.40146728151127425,
|
|
"learning_rate": 1.0236853985029815e-05,
|
|
"loss": 0.2109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10891246795654297,
|
|
"step": 2915,
|
|
"valid_targets_mean": 8317.5,
|
|
"valid_targets_min": 2356
|
|
},
|
|
{
|
|
"epoch": 4.882943143812709,
|
|
"grad_norm": 0.7548995650835181,
|
|
"learning_rate": 1.0164153231346656e-05,
|
|
"loss": 0.222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10860119760036469,
|
|
"step": 2920,
|
|
"valid_targets_mean": 12518.4,
|
|
"valid_targets_min": 4532
|
|
},
|
|
{
|
|
"epoch": 4.891304347826087,
|
|
"grad_norm": 0.4225634995630011,
|
|
"learning_rate": 1.0091623502679075e-05,
|
|
"loss": 0.2127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09869314730167389,
|
|
"step": 2925,
|
|
"valid_targets_mean": 7963.6,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 4.8996655518394645,
|
|
"grad_norm": 0.37934581723485067,
|
|
"learning_rate": 1.0019266060168929e-05,
|
|
"loss": 0.2158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11323339492082596,
|
|
"step": 2930,
|
|
"valid_targets_mean": 9412.6,
|
|
"valid_targets_min": 369
|
|
},
|
|
{
|
|
"epoch": 4.908026755852843,
|
|
"grad_norm": 0.39564347165315983,
|
|
"learning_rate": 9.947082161962363e-06,
|
|
"loss": 0.2182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14827749133110046,
|
|
"step": 2935,
|
|
"valid_targets_mean": 13638.1,
|
|
"valid_targets_min": 6849
|
|
},
|
|
{
|
|
"epoch": 4.916387959866221,
|
|
"grad_norm": 0.3824743273694215,
|
|
"learning_rate": 9.875073063187947e-06,
|
|
"loss": 0.2211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07139907777309418,
|
|
"step": 2940,
|
|
"valid_targets_mean": 7317.9,
|
|
"valid_targets_min": 476
|
|
},
|
|
{
|
|
"epoch": 4.924749163879599,
|
|
"grad_norm": 0.4200990337436774,
|
|
"learning_rate": 9.803240015934859e-06,
|
|
"loss": 0.2135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12324058264493942,
|
|
"step": 2945,
|
|
"valid_targets_mean": 10873.9,
|
|
"valid_targets_min": 3248
|
|
},
|
|
{
|
|
"epoch": 4.933110367892977,
|
|
"grad_norm": 0.45419326938088966,
|
|
"learning_rate": 9.731584269231094e-06,
|
|
"loss": 0.22,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1385079175233841,
|
|
"step": 2950,
|
|
"valid_targets_mean": 8162.9,
|
|
"valid_targets_min": 2619
|
|
},
|
|
{
|
|
"epoch": 4.941471571906354,
|
|
"grad_norm": 0.39306327842154165,
|
|
"learning_rate": 9.660107069021767e-06,
|
|
"loss": 0.2151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12314289808273315,
|
|
"step": 2955,
|
|
"valid_targets_mean": 11834.6,
|
|
"valid_targets_min": 305
|
|
},
|
|
{
|
|
"epoch": 4.949832775919733,
|
|
"grad_norm": 0.42045890742908704,
|
|
"learning_rate": 9.588809658147433e-06,
|
|
"loss": 0.2085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10116656124591827,
|
|
"step": 2960,
|
|
"valid_targets_mean": 7800.5,
|
|
"valid_targets_min": 3924
|
|
},
|
|
{
|
|
"epoch": 4.95819397993311,
|
|
"grad_norm": 0.47668815925042246,
|
|
"learning_rate": 9.517693276322488e-06,
|
|
"loss": 0.217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08628484606742859,
|
|
"step": 2965,
|
|
"valid_targets_mean": 5407.5,
|
|
"valid_targets_min": 332
|
|
},
|
|
{
|
|
"epoch": 4.966555183946488,
|
|
"grad_norm": 0.4003376223981708,
|
|
"learning_rate": 9.446759160113602e-06,
|
|
"loss": 0.2129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07662422955036163,
|
|
"step": 2970,
|
|
"valid_targets_mean": 7477.6,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 4.974916387959866,
|
|
"grad_norm": 0.3691708932681528,
|
|
"learning_rate": 9.376008542918227e-06,
|
|
"loss": 0.2287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09217250347137451,
|
|
"step": 2975,
|
|
"valid_targets_mean": 9894.9,
|
|
"valid_targets_min": 576
|
|
},
|
|
{
|
|
"epoch": 4.983277591973244,
|
|
"grad_norm": 0.3625496706116447,
|
|
"learning_rate": 9.305442654943145e-06,
|
|
"loss": 0.2268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08600175380706787,
|
|
"step": 2980,
|
|
"valid_targets_mean": 8686.1,
|
|
"valid_targets_min": 4712
|
|
},
|
|
{
|
|
"epoch": 4.991638795986622,
|
|
"grad_norm": 0.35502501382834395,
|
|
"learning_rate": 9.235062723183076e-06,
|
|
"loss": 0.2207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10747610032558441,
|
|
"step": 2985,
|
|
"valid_targets_mean": 11645.1,
|
|
"valid_targets_min": 3030
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.36455981337577437,
|
|
"learning_rate": 9.164869971399359e-06,
|
|
"loss": 0.219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08229192346334457,
|
|
"step": 2990,
|
|
"valid_targets_mean": 8362.0,
|
|
"valid_targets_min": 317
|
|
},
|
|
{
|
|
"epoch": 5.008361204013378,
|
|
"grad_norm": 0.37954937861181104,
|
|
"learning_rate": 9.094865620098646e-06,
|
|
"loss": 0.1998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08893205225467682,
|
|
"step": 2995,
|
|
"valid_targets_mean": 10019.2,
|
|
"valid_targets_min": 1692
|
|
},
|
|
{
|
|
"epoch": 5.016722408026756,
|
|
"grad_norm": 0.422183844242081,
|
|
"learning_rate": 9.025050886511702e-06,
|
|
"loss": 0.2019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1081496924161911,
|
|
"step": 3000,
|
|
"valid_targets_mean": 10792.4,
|
|
"valid_targets_min": 2974
|
|
},
|
|
{
|
|
"epoch": 5.025083612040134,
|
|
"grad_norm": 0.3865393222316313,
|
|
"learning_rate": 8.955426984572228e-06,
|
|
"loss": 0.2146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1049729734659195,
|
|
"step": 3005,
|
|
"valid_targets_mean": 12506.2,
|
|
"valid_targets_min": 3957
|
|
},
|
|
{
|
|
"epoch": 5.033444816053512,
|
|
"grad_norm": 0.41872628671048007,
|
|
"learning_rate": 8.885995124895768e-06,
|
|
"loss": 0.2089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13752016425132751,
|
|
"step": 3010,
|
|
"valid_targets_mean": 12204.2,
|
|
"valid_targets_min": 7102
|
|
},
|
|
{
|
|
"epoch": 5.04180602006689,
|
|
"grad_norm": 0.36924627131676535,
|
|
"learning_rate": 8.816756514758634e-06,
|
|
"loss": 0.2042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09319281578063965,
|
|
"step": 3015,
|
|
"valid_targets_mean": 11000.9,
|
|
"valid_targets_min": 5223
|
|
},
|
|
{
|
|
"epoch": 5.050167224080267,
|
|
"grad_norm": 0.44557357368668515,
|
|
"learning_rate": 8.747712358076936e-06,
|
|
"loss": 0.2006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0742655098438263,
|
|
"step": 3020,
|
|
"valid_targets_mean": 6571.5,
|
|
"valid_targets_min": 819
|
|
},
|
|
{
|
|
"epoch": 5.058528428093646,
|
|
"grad_norm": 0.43458111531487953,
|
|
"learning_rate": 8.678863855385646e-06,
|
|
"loss": 0.2159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09501565992832184,
|
|
"step": 3025,
|
|
"valid_targets_mean": 9214.8,
|
|
"valid_targets_min": 1374
|
|
},
|
|
{
|
|
"epoch": 5.066889632107023,
|
|
"grad_norm": 0.3962647904975391,
|
|
"learning_rate": 8.61021220381771e-06,
|
|
"loss": 0.2079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10328078269958496,
|
|
"step": 3030,
|
|
"valid_targets_mean": 9648.0,
|
|
"valid_targets_min": 3061
|
|
},
|
|
{
|
|
"epoch": 5.075250836120401,
|
|
"grad_norm": 0.417869275681211,
|
|
"learning_rate": 8.54175859708324e-06,
|
|
"loss": 0.232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10877534747123718,
|
|
"step": 3035,
|
|
"valid_targets_mean": 9269.6,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 5.083612040133779,
|
|
"grad_norm": 0.5248847143286705,
|
|
"learning_rate": 8.473504225448765e-06,
|
|
"loss": 0.2114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14188247919082642,
|
|
"step": 3040,
|
|
"valid_targets_mean": 9256.5,
|
|
"valid_targets_min": 433
|
|
},
|
|
{
|
|
"epoch": 5.091973244147157,
|
|
"grad_norm": 0.5095493052762392,
|
|
"learning_rate": 8.405450275716525e-06,
|
|
"loss": 0.2111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12208770960569382,
|
|
"step": 3045,
|
|
"valid_targets_mean": 9845.9,
|
|
"valid_targets_min": 1206
|
|
},
|
|
{
|
|
"epoch": 5.1003344481605355,
|
|
"grad_norm": 0.3711517968616653,
|
|
"learning_rate": 8.337597931203836e-06,
|
|
"loss": 0.2023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0857909619808197,
|
|
"step": 3050,
|
|
"valid_targets_mean": 9040.9,
|
|
"valid_targets_min": 1986
|
|
},
|
|
{
|
|
"epoch": 5.108695652173913,
|
|
"grad_norm": 0.37306066315980263,
|
|
"learning_rate": 8.269948371722518e-06,
|
|
"loss": 0.2059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11526457965373993,
|
|
"step": 3055,
|
|
"valid_targets_mean": 11635.8,
|
|
"valid_targets_min": 1067
|
|
},
|
|
{
|
|
"epoch": 5.117056856187291,
|
|
"grad_norm": 0.3617427328401958,
|
|
"learning_rate": 8.20250277355838e-06,
|
|
"loss": 0.197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09946681559085846,
|
|
"step": 3060,
|
|
"valid_targets_mean": 12971.8,
|
|
"valid_targets_min": 4013
|
|
},
|
|
{
|
|
"epoch": 5.125418060200669,
|
|
"grad_norm": 0.6029057821817608,
|
|
"learning_rate": 8.135262309450764e-06,
|
|
"loss": 0.2115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11430326104164124,
|
|
"step": 3065,
|
|
"valid_targets_mean": 11637.9,
|
|
"valid_targets_min": 476
|
|
},
|
|
{
|
|
"epoch": 5.133779264214047,
|
|
"grad_norm": 0.38006473485452824,
|
|
"learning_rate": 8.068228148572157e-06,
|
|
"loss": 0.1945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09980309009552002,
|
|
"step": 3070,
|
|
"valid_targets_mean": 12051.4,
|
|
"valid_targets_min": 3688
|
|
},
|
|
{
|
|
"epoch": 5.142140468227424,
|
|
"grad_norm": 0.4271970517177784,
|
|
"learning_rate": 8.001401456507858e-06,
|
|
"loss": 0.2078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13471044600009918,
|
|
"step": 3075,
|
|
"valid_targets_mean": 12700.1,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 5.150501672240803,
|
|
"grad_norm": 0.42775308382038146,
|
|
"learning_rate": 7.934783395235716e-06,
|
|
"loss": 0.2255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08711956441402435,
|
|
"step": 3080,
|
|
"valid_targets_mean": 7942.5,
|
|
"valid_targets_min": 1406
|
|
},
|
|
{
|
|
"epoch": 5.15886287625418,
|
|
"grad_norm": 0.4325484923603721,
|
|
"learning_rate": 7.868375123105921e-06,
|
|
"loss": 0.2193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15878254175186157,
|
|
"step": 3085,
|
|
"valid_targets_mean": 11785.1,
|
|
"valid_targets_min": 1532
|
|
},
|
|
{
|
|
"epoch": 5.167224080267559,
|
|
"grad_norm": 0.3847055836626383,
|
|
"learning_rate": 7.802177794820857e-06,
|
|
"loss": 0.2226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10110179334878922,
|
|
"step": 3090,
|
|
"valid_targets_mean": 11400.1,
|
|
"valid_targets_min": 3749
|
|
},
|
|
{
|
|
"epoch": 5.1755852842809364,
|
|
"grad_norm": 0.38446373354073804,
|
|
"learning_rate": 7.736192561415045e-06,
|
|
"loss": 0.2061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09235391020774841,
|
|
"step": 3095,
|
|
"valid_targets_mean": 11473.4,
|
|
"valid_targets_min": 1658
|
|
},
|
|
{
|
|
"epoch": 5.183946488294314,
|
|
"grad_norm": 0.44917689740348477,
|
|
"learning_rate": 7.670420570235113e-06,
|
|
"loss": 0.205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13029703497886658,
|
|
"step": 3100,
|
|
"valid_targets_mean": 9836.0,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 5.1923076923076925,
|
|
"grad_norm": 0.41091257695545136,
|
|
"learning_rate": 7.604862964919819e-06,
|
|
"loss": 0.2139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08510925620794296,
|
|
"step": 3105,
|
|
"valid_targets_mean": 9927.5,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 5.20066889632107,
|
|
"grad_norm": 0.5108104278973443,
|
|
"learning_rate": 7.539520885380242e-06,
|
|
"loss": 0.2065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11931552737951279,
|
|
"step": 3110,
|
|
"valid_targets_mean": 10721.0,
|
|
"valid_targets_min": 2411
|
|
},
|
|
{
|
|
"epoch": 5.209030100334449,
|
|
"grad_norm": 0.41357121558578835,
|
|
"learning_rate": 7.474395467779885e-06,
|
|
"loss": 0.1928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09283965826034546,
|
|
"step": 3115,
|
|
"valid_targets_mean": 11007.9,
|
|
"valid_targets_min": 3540
|
|
},
|
|
{
|
|
"epoch": 5.217391304347826,
|
|
"grad_norm": 0.41889283899230556,
|
|
"learning_rate": 7.409487844514946e-06,
|
|
"loss": 0.2096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09854487329721451,
|
|
"step": 3120,
|
|
"valid_targets_mean": 9645.1,
|
|
"valid_targets_min": 1411
|
|
},
|
|
{
|
|
"epoch": 5.225752508361204,
|
|
"grad_norm": 0.5065091534713135,
|
|
"learning_rate": 7.344799144194647e-06,
|
|
"loss": 0.2119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10172537714242935,
|
|
"step": 3125,
|
|
"valid_targets_mean": 7547.8,
|
|
"valid_targets_min": 364
|
|
},
|
|
{
|
|
"epoch": 5.234113712374582,
|
|
"grad_norm": 0.4117375775597541,
|
|
"learning_rate": 7.280330491621579e-06,
|
|
"loss": 0.2146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12743309140205383,
|
|
"step": 3130,
|
|
"valid_targets_mean": 11454.8,
|
|
"valid_targets_min": 3348
|
|
},
|
|
{
|
|
"epoch": 5.24247491638796,
|
|
"grad_norm": 0.4593265729636907,
|
|
"learning_rate": 7.2160830077721655e-06,
|
|
"loss": 0.2179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11716936528682709,
|
|
"step": 3135,
|
|
"valid_targets_mean": 11411.6,
|
|
"valid_targets_min": 1460
|
|
},
|
|
{
|
|
"epoch": 5.250836120401337,
|
|
"grad_norm": 0.40113371138323256,
|
|
"learning_rate": 7.15205780977716e-06,
|
|
"loss": 0.1891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10036275535821915,
|
|
"step": 3140,
|
|
"valid_targets_mean": 10348.4,
|
|
"valid_targets_min": 1971
|
|
},
|
|
{
|
|
"epoch": 5.259197324414716,
|
|
"grad_norm": 0.3507430521742832,
|
|
"learning_rate": 7.0882560109022255e-06,
|
|
"loss": 0.1917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06913955509662628,
|
|
"step": 3145,
|
|
"valid_targets_mean": 8934.2,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 5.2675585284280935,
|
|
"grad_norm": 0.6403156592549997,
|
|
"learning_rate": 7.02467872052858e-06,
|
|
"loss": 0.2035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13711944222450256,
|
|
"step": 3150,
|
|
"valid_targets_mean": 6588.2,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 5.275919732441472,
|
|
"grad_norm": 0.48628927134671135,
|
|
"learning_rate": 6.9613270441337075e-06,
|
|
"loss": 0.2192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1327417939901352,
|
|
"step": 3155,
|
|
"valid_targets_mean": 7652.8,
|
|
"valid_targets_min": 414
|
|
},
|
|
{
|
|
"epoch": 5.2842809364548495,
|
|
"grad_norm": 0.5488634348497299,
|
|
"learning_rate": 6.8982020832721054e-06,
|
|
"loss": 0.2182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1445990353822708,
|
|
"step": 3160,
|
|
"valid_targets_mean": 8072.2,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 5.292642140468227,
|
|
"grad_norm": 0.39873724463253746,
|
|
"learning_rate": 6.835304935556198e-06,
|
|
"loss": 0.2033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10765382647514343,
|
|
"step": 3165,
|
|
"valid_targets_mean": 9019.9,
|
|
"valid_targets_min": 2998
|
|
},
|
|
{
|
|
"epoch": 5.301003344481606,
|
|
"grad_norm": 0.4174875902178956,
|
|
"learning_rate": 6.772636694637183e-06,
|
|
"loss": 0.2093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09825219959020615,
|
|
"step": 3170,
|
|
"valid_targets_mean": 9184.5,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 5.309364548494983,
|
|
"grad_norm": 0.452336371801939,
|
|
"learning_rate": 6.710198450186047e-06,
|
|
"loss": 0.1914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08789323270320892,
|
|
"step": 3175,
|
|
"valid_targets_mean": 9412.6,
|
|
"valid_targets_min": 1206
|
|
},
|
|
{
|
|
"epoch": 5.317725752508361,
|
|
"grad_norm": 0.5263520827501855,
|
|
"learning_rate": 6.6479912878746225e-06,
|
|
"loss": 0.2106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1398400068283081,
|
|
"step": 3180,
|
|
"valid_targets_mean": 9807.0,
|
|
"valid_targets_min": 2281
|
|
},
|
|
{
|
|
"epoch": 5.326086956521739,
|
|
"grad_norm": 0.37564531216973346,
|
|
"learning_rate": 6.586016289356692e-06,
|
|
"loss": 0.2107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08281303942203522,
|
|
"step": 3185,
|
|
"valid_targets_mean": 10886.1,
|
|
"valid_targets_min": 412
|
|
},
|
|
{
|
|
"epoch": 5.334448160535117,
|
|
"grad_norm": 0.746522210041205,
|
|
"learning_rate": 6.524274532249195e-06,
|
|
"loss": 0.2099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12465259432792664,
|
|
"step": 3190,
|
|
"valid_targets_mean": 12344.2,
|
|
"valid_targets_min": 5104
|
|
},
|
|
{
|
|
"epoch": 5.342809364548495,
|
|
"grad_norm": 0.47234505428575957,
|
|
"learning_rate": 6.462767090113486e-06,
|
|
"loss": 0.1954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10397669672966003,
|
|
"step": 3195,
|
|
"valid_targets_mean": 9572.5,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 5.351170568561873,
|
|
"grad_norm": 0.48128997921144473,
|
|
"learning_rate": 6.401495032436667e-06,
|
|
"loss": 0.2186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09231501817703247,
|
|
"step": 3200,
|
|
"valid_targets_mean": 10547.6,
|
|
"valid_targets_min": 2739
|
|
},
|
|
{
|
|
"epoch": 5.3595317725752505,
|
|
"grad_norm": 0.4078701069484779,
|
|
"learning_rate": 6.34045942461299e-06,
|
|
"loss": 0.215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10664214938879013,
|
|
"step": 3205,
|
|
"valid_targets_mean": 11826.9,
|
|
"valid_targets_min": 2952
|
|
},
|
|
{
|
|
"epoch": 5.367892976588629,
|
|
"grad_norm": 0.3824503102006652,
|
|
"learning_rate": 6.279661327925333e-06,
|
|
"loss": 0.1988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09388929605484009,
|
|
"step": 3210,
|
|
"valid_targets_mean": 10776.8,
|
|
"valid_targets_min": 1773
|
|
},
|
|
{
|
|
"epoch": 5.3762541806020065,
|
|
"grad_norm": 0.3814154593629807,
|
|
"learning_rate": 6.219101799526753e-06,
|
|
"loss": 0.2095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16176995635032654,
|
|
"step": 3215,
|
|
"valid_targets_mean": 13815.4,
|
|
"valid_targets_min": 364
|
|
},
|
|
{
|
|
"epoch": 5.384615384615385,
|
|
"grad_norm": 0.3960048575229065,
|
|
"learning_rate": 6.158781892422085e-06,
|
|
"loss": 0.2066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07062983512878418,
|
|
"step": 3220,
|
|
"valid_targets_mean": 9105.4,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 5.392976588628763,
|
|
"grad_norm": 0.3768333289338213,
|
|
"learning_rate": 6.098702655449664e-06,
|
|
"loss": 0.1953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10719835758209229,
|
|
"step": 3225,
|
|
"valid_targets_mean": 15773.4,
|
|
"valid_targets_min": 7901
|
|
},
|
|
{
|
|
"epoch": 5.40133779264214,
|
|
"grad_norm": 0.4038212974263481,
|
|
"learning_rate": 6.038865133263054e-06,
|
|
"loss": 0.2011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10818804055452347,
|
|
"step": 3230,
|
|
"valid_targets_mean": 11273.5,
|
|
"valid_targets_min": 1181
|
|
},
|
|
{
|
|
"epoch": 5.409698996655519,
|
|
"grad_norm": 0.5991260555125224,
|
|
"learning_rate": 5.9792703663129125e-06,
|
|
"loss": 0.1971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09010619670152664,
|
|
"step": 3235,
|
|
"valid_targets_mean": 9312.5,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 5.418060200668896,
|
|
"grad_norm": 0.3504574415319783,
|
|
"learning_rate": 5.919919390828859e-06,
|
|
"loss": 0.2108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10788781940937042,
|
|
"step": 3240,
|
|
"valid_targets_mean": 10585.9,
|
|
"valid_targets_min": 431
|
|
},
|
|
{
|
|
"epoch": 5.426421404682274,
|
|
"grad_norm": 0.383160747191933,
|
|
"learning_rate": 5.860813238801523e-06,
|
|
"loss": 0.2066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12747430801391602,
|
|
"step": 3245,
|
|
"valid_targets_mean": 11361.1,
|
|
"valid_targets_min": 3014
|
|
},
|
|
{
|
|
"epoch": 5.434782608695652,
|
|
"grad_norm": 0.4448395208063101,
|
|
"learning_rate": 5.801952937964537e-06,
|
|
"loss": 0.2091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1131446361541748,
|
|
"step": 3250,
|
|
"valid_targets_mean": 9041.6,
|
|
"valid_targets_min": 2778
|
|
},
|
|
{
|
|
"epoch": 5.44314381270903,
|
|
"grad_norm": 0.374061563055739,
|
|
"learning_rate": 5.743339511776693e-06,
|
|
"loss": 0.2065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12289728224277496,
|
|
"step": 3255,
|
|
"valid_targets_mean": 10412.9,
|
|
"valid_targets_min": 3992
|
|
},
|
|
{
|
|
"epoch": 5.451505016722408,
|
|
"grad_norm": 0.41768955393163837,
|
|
"learning_rate": 5.684973979404144e-06,
|
|
"loss": 0.214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12625618278980255,
|
|
"step": 3260,
|
|
"valid_targets_mean": 10263.5,
|
|
"valid_targets_min": 2351
|
|
},
|
|
{
|
|
"epoch": 5.459866220735786,
|
|
"grad_norm": 0.4581390122481781,
|
|
"learning_rate": 5.6268573557026865e-06,
|
|
"loss": 0.2207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08365988731384277,
|
|
"step": 3265,
|
|
"valid_targets_mean": 7242.9,
|
|
"valid_targets_min": 948
|
|
},
|
|
{
|
|
"epoch": 5.468227424749164,
|
|
"grad_norm": 0.3990498240505439,
|
|
"learning_rate": 5.568990651200108e-06,
|
|
"loss": 0.2189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08962704241275787,
|
|
"step": 3270,
|
|
"valid_targets_mean": 9560.9,
|
|
"valid_targets_min": 263
|
|
},
|
|
{
|
|
"epoch": 5.476588628762542,
|
|
"grad_norm": 0.3771406536869821,
|
|
"learning_rate": 5.511374872078616e-06,
|
|
"loss": 0.2028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08498679846525192,
|
|
"step": 3275,
|
|
"valid_targets_mean": 11075.0,
|
|
"valid_targets_min": 2413
|
|
},
|
|
{
|
|
"epoch": 5.48494983277592,
|
|
"grad_norm": 0.4317421746741838,
|
|
"learning_rate": 5.454011020157348e-06,
|
|
"loss": 0.2271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12408112734556198,
|
|
"step": 3280,
|
|
"valid_targets_mean": 9941.5,
|
|
"valid_targets_min": 1499
|
|
},
|
|
{
|
|
"epoch": 5.493311036789297,
|
|
"grad_norm": 0.40185075884389715,
|
|
"learning_rate": 5.396900092874953e-06,
|
|
"loss": 0.1916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06251989305019379,
|
|
"step": 3285,
|
|
"valid_targets_mean": 6731.9,
|
|
"valid_targets_min": 1790
|
|
},
|
|
{
|
|
"epoch": 5.501672240802676,
|
|
"grad_norm": 0.3908923553413453,
|
|
"learning_rate": 5.340043083272239e-06,
|
|
"loss": 0.2177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0889013335108757,
|
|
"step": 3290,
|
|
"valid_targets_mean": 10524.2,
|
|
"valid_targets_min": 3332
|
|
},
|
|
{
|
|
"epoch": 5.510033444816053,
|
|
"grad_norm": 0.41639353932705836,
|
|
"learning_rate": 5.283440979974901e-06,
|
|
"loss": 0.1943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0963592678308487,
|
|
"step": 3295,
|
|
"valid_targets_mean": 11330.9,
|
|
"valid_targets_min": 1196
|
|
},
|
|
{
|
|
"epoch": 5.518394648829432,
|
|
"grad_norm": 0.3770540372097882,
|
|
"learning_rate": 5.227094767176364e-06,
|
|
"loss": 0.1986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09438344836235046,
|
|
"step": 3300,
|
|
"valid_targets_mean": 8755.9,
|
|
"valid_targets_min": 2910
|
|
},
|
|
{
|
|
"epoch": 5.526755852842809,
|
|
"grad_norm": 0.41987899709378035,
|
|
"learning_rate": 5.17100542462063e-06,
|
|
"loss": 0.218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09191924333572388,
|
|
"step": 3305,
|
|
"valid_targets_mean": 7664.1,
|
|
"valid_targets_min": 2643
|
|
},
|
|
{
|
|
"epoch": 5.535117056856187,
|
|
"grad_norm": 0.34912928315012975,
|
|
"learning_rate": 5.115173927585264e-06,
|
|
"loss": 0.2179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09554652869701385,
|
|
"step": 3310,
|
|
"valid_targets_mean": 12016.2,
|
|
"valid_targets_min": 1197
|
|
},
|
|
{
|
|
"epoch": 5.543478260869565,
|
|
"grad_norm": 0.4360767621043697,
|
|
"learning_rate": 5.059601246864438e-06,
|
|
"loss": 0.2141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08397121727466583,
|
|
"step": 3315,
|
|
"valid_targets_mean": 6164.8,
|
|
"valid_targets_min": 293
|
|
},
|
|
{
|
|
"epoch": 5.551839464882943,
|
|
"grad_norm": 0.42793766150237056,
|
|
"learning_rate": 5.004288348752018e-06,
|
|
"loss": 0.2036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06341104209423065,
|
|
"step": 3320,
|
|
"valid_targets_mean": 6197.0,
|
|
"valid_targets_min": 389
|
|
},
|
|
{
|
|
"epoch": 5.5602006688963215,
|
|
"grad_norm": 0.46201762093008497,
|
|
"learning_rate": 4.949236195024825e-06,
|
|
"loss": 0.2067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11919822543859482,
|
|
"step": 3325,
|
|
"valid_targets_mean": 11613.9,
|
|
"valid_targets_min": 4095
|
|
},
|
|
{
|
|
"epoch": 5.568561872909699,
|
|
"grad_norm": 0.43355687318089886,
|
|
"learning_rate": 4.894445742925853e-06,
|
|
"loss": 0.2071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12500903010368347,
|
|
"step": 3330,
|
|
"valid_targets_mean": 12543.5,
|
|
"valid_targets_min": 3567
|
|
},
|
|
{
|
|
"epoch": 5.576923076923077,
|
|
"grad_norm": 0.42924833780828353,
|
|
"learning_rate": 4.839917945147647e-06,
|
|
"loss": 0.1988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12948071956634521,
|
|
"step": 3335,
|
|
"valid_targets_mean": 12034.4,
|
|
"valid_targets_min": 2778
|
|
},
|
|
{
|
|
"epoch": 5.585284280936455,
|
|
"grad_norm": 0.3993296799292308,
|
|
"learning_rate": 4.785653749815744e-06,
|
|
"loss": 0.2192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08601127564907074,
|
|
"step": 3340,
|
|
"valid_targets_mean": 7827.6,
|
|
"valid_targets_min": 1997
|
|
},
|
|
{
|
|
"epoch": 5.593645484949833,
|
|
"grad_norm": 0.44137077891742355,
|
|
"learning_rate": 4.731654100472178e-06,
|
|
"loss": 0.2117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08072753250598907,
|
|
"step": 3345,
|
|
"valid_targets_mean": 6305.2,
|
|
"valid_targets_min": 320
|
|
},
|
|
{
|
|
"epoch": 5.602006688963211,
|
|
"grad_norm": 0.40342224727815224,
|
|
"learning_rate": 4.677919936059064e-06,
|
|
"loss": 0.2056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09617055952548981,
|
|
"step": 3350,
|
|
"valid_targets_mean": 10345.5,
|
|
"valid_targets_min": 3675
|
|
},
|
|
{
|
|
"epoch": 5.610367892976589,
|
|
"grad_norm": 0.5763198240618125,
|
|
"learning_rate": 4.624452190902304e-06,
|
|
"loss": 0.1987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13516852259635925,
|
|
"step": 3355,
|
|
"valid_targets_mean": 7691.9,
|
|
"valid_targets_min": 319
|
|
},
|
|
{
|
|
"epoch": 5.618729096989966,
|
|
"grad_norm": 0.41100368052886294,
|
|
"learning_rate": 4.571251794695308e-06,
|
|
"loss": 0.2026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09989924728870392,
|
|
"step": 3360,
|
|
"valid_targets_mean": 8544.6,
|
|
"valid_targets_min": 4145
|
|
},
|
|
{
|
|
"epoch": 5.627090301003345,
|
|
"grad_norm": 0.4348737958096964,
|
|
"learning_rate": 4.518319672482845e-06,
|
|
"loss": 0.2163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14520440995693207,
|
|
"step": 3365,
|
|
"valid_targets_mean": 11374.6,
|
|
"valid_targets_min": 4196
|
|
},
|
|
{
|
|
"epoch": 5.635451505016722,
|
|
"grad_norm": 0.37165807451110344,
|
|
"learning_rate": 4.465656744644957e-06,
|
|
"loss": 0.2153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10454623401165009,
|
|
"step": 3370,
|
|
"valid_targets_mean": 12574.0,
|
|
"valid_targets_min": 4126
|
|
},
|
|
{
|
|
"epoch": 5.6438127090301,
|
|
"grad_norm": 0.4304945225779929,
|
|
"learning_rate": 4.413263926880935e-06,
|
|
"loss": 0.1913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11468327045440674,
|
|
"step": 3375,
|
|
"valid_targets_mean": 11572.2,
|
|
"valid_targets_min": 2380
|
|
},
|
|
{
|
|
"epoch": 5.6521739130434785,
|
|
"grad_norm": 0.4280068412140972,
|
|
"learning_rate": 4.3611421301934435e-06,
|
|
"loss": 0.2105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09538109600543976,
|
|
"step": 3380,
|
|
"valid_targets_mean": 8948.2,
|
|
"valid_targets_min": 515
|
|
},
|
|
{
|
|
"epoch": 5.660535117056856,
|
|
"grad_norm": 0.388470300549808,
|
|
"learning_rate": 4.309292260872633e-06,
|
|
"loss": 0.2126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09327365458011627,
|
|
"step": 3385,
|
|
"valid_targets_mean": 9293.6,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 5.668896321070234,
|
|
"grad_norm": 0.3739555663596554,
|
|
"learning_rate": 4.257715220480405e-06,
|
|
"loss": 0.2081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09301489591598511,
|
|
"step": 3390,
|
|
"valid_targets_mean": 11463.4,
|
|
"valid_targets_min": 1259
|
|
},
|
|
{
|
|
"epoch": 5.677257525083612,
|
|
"grad_norm": 0.5762623186325518,
|
|
"learning_rate": 4.206411905834733e-06,
|
|
"loss": 0.2057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10505624860525131,
|
|
"step": 3395,
|
|
"valid_targets_mean": 9109.8,
|
|
"valid_targets_min": 4391
|
|
},
|
|
{
|
|
"epoch": 5.68561872909699,
|
|
"grad_norm": 0.43769839743781636,
|
|
"learning_rate": 4.155383208994055e-06,
|
|
"loss": 0.2147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09230999648571014,
|
|
"step": 3400,
|
|
"valid_targets_mean": 6510.5,
|
|
"valid_targets_min": 359
|
|
},
|
|
{
|
|
"epoch": 5.693979933110368,
|
|
"grad_norm": 0.4756122121446175,
|
|
"learning_rate": 4.10463001724178e-06,
|
|
"loss": 0.2033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1207800805568695,
|
|
"step": 3405,
|
|
"valid_targets_mean": 8616.9,
|
|
"valid_targets_min": 475
|
|
},
|
|
{
|
|
"epoch": 5.702341137123746,
|
|
"grad_norm": 0.42855788897041003,
|
|
"learning_rate": 4.054153213070868e-06,
|
|
"loss": 0.2122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07039805501699448,
|
|
"step": 3410,
|
|
"valid_targets_mean": 7469.0,
|
|
"valid_targets_min": 387
|
|
},
|
|
{
|
|
"epoch": 5.710702341137123,
|
|
"grad_norm": 0.4097454885338387,
|
|
"learning_rate": 4.003953674168455e-06,
|
|
"loss": 0.2158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15759631991386414,
|
|
"step": 3415,
|
|
"valid_targets_mean": 13915.5,
|
|
"valid_targets_min": 4490
|
|
},
|
|
{
|
|
"epoch": 5.719063545150502,
|
|
"grad_norm": 0.4109864282001101,
|
|
"learning_rate": 3.954032273400608e-06,
|
|
"loss": 0.2132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12468603253364563,
|
|
"step": 3420,
|
|
"valid_targets_mean": 10872.9,
|
|
"valid_targets_min": 1545
|
|
},
|
|
{
|
|
"epoch": 5.7274247491638794,
|
|
"grad_norm": 0.3940047890682954,
|
|
"learning_rate": 3.904389878797159e-06,
|
|
"loss": 0.211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09924322366714478,
|
|
"step": 3425,
|
|
"valid_targets_mean": 10603.1,
|
|
"valid_targets_min": 5640
|
|
},
|
|
{
|
|
"epoch": 5.735785953177258,
|
|
"grad_norm": 0.3548658722566724,
|
|
"learning_rate": 3.85502735353658e-06,
|
|
"loss": 0.2066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09239357709884644,
|
|
"step": 3430,
|
|
"valid_targets_mean": 12109.5,
|
|
"valid_targets_min": 3623
|
|
},
|
|
{
|
|
"epoch": 5.7441471571906355,
|
|
"grad_norm": 0.3626465308249873,
|
|
"learning_rate": 3.8059455559310167e-06,
|
|
"loss": 0.2095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09994740039110184,
|
|
"step": 3435,
|
|
"valid_targets_mean": 13404.6,
|
|
"valid_targets_min": 5365
|
|
},
|
|
{
|
|
"epoch": 5.752508361204013,
|
|
"grad_norm": 0.492734468794942,
|
|
"learning_rate": 3.757145339411332e-06,
|
|
"loss": 0.207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10074135661125183,
|
|
"step": 3440,
|
|
"valid_targets_mean": 6156.0,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 5.760869565217392,
|
|
"grad_norm": 0.3878036146018422,
|
|
"learning_rate": 3.708627552512276e-06,
|
|
"loss": 0.2093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0926172286272049,
|
|
"step": 3445,
|
|
"valid_targets_mean": 10034.6,
|
|
"valid_targets_min": 4817
|
|
},
|
|
{
|
|
"epoch": 5.769230769230769,
|
|
"grad_norm": 0.43876585482598734,
|
|
"learning_rate": 3.660393038857739e-06,
|
|
"loss": 0.2094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1336514949798584,
|
|
"step": 3450,
|
|
"valid_targets_mean": 11873.8,
|
|
"valid_targets_min": 8504
|
|
},
|
|
{
|
|
"epoch": 5.777591973244148,
|
|
"grad_norm": 0.4184785108684915,
|
|
"learning_rate": 3.6124426371460542e-06,
|
|
"loss": 0.2132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13077645003795624,
|
|
"step": 3455,
|
|
"valid_targets_mean": 12093.6,
|
|
"valid_targets_min": 5012
|
|
},
|
|
{
|
|
"epoch": 5.785953177257525,
|
|
"grad_norm": 0.46059309722732283,
|
|
"learning_rate": 3.564777181135466e-06,
|
|
"loss": 0.224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09467418491840363,
|
|
"step": 3460,
|
|
"valid_targets_mean": 6730.5,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 5.794314381270903,
|
|
"grad_norm": 0.43972248217673926,
|
|
"learning_rate": 3.517397499629589e-06,
|
|
"loss": 0.2072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09671802818775177,
|
|
"step": 3465,
|
|
"valid_targets_mean": 7122.2,
|
|
"valid_targets_min": 385
|
|
},
|
|
{
|
|
"epoch": 5.802675585284281,
|
|
"grad_norm": 0.4248596190548862,
|
|
"learning_rate": 3.4703044164630064e-06,
|
|
"loss": 0.2167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0729823112487793,
|
|
"step": 3470,
|
|
"valid_targets_mean": 7085.2,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 5.811036789297659,
|
|
"grad_norm": 0.6047178366952471,
|
|
"learning_rate": 3.4234987504869553e-06,
|
|
"loss": 0.2129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10936473309993744,
|
|
"step": 3475,
|
|
"valid_targets_mean": 7445.8,
|
|
"valid_targets_min": 382
|
|
},
|
|
{
|
|
"epoch": 5.8193979933110365,
|
|
"grad_norm": 0.4219719935611951,
|
|
"learning_rate": 3.376981315555086e-06,
|
|
"loss": 0.1972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12464763224124908,
|
|
"step": 3480,
|
|
"valid_targets_mean": 12764.0,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 5.827759197324415,
|
|
"grad_norm": 0.4322762562931411,
|
|
"learning_rate": 3.3307529205092903e-06,
|
|
"loss": 0.2097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11842355132102966,
|
|
"step": 3485,
|
|
"valid_targets_mean": 11189.8,
|
|
"valid_targets_min": 3874
|
|
},
|
|
{
|
|
"epoch": 5.8361204013377925,
|
|
"grad_norm": 0.3929093355476684,
|
|
"learning_rate": 3.2848143691656807e-06,
|
|
"loss": 0.2093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09951166808605194,
|
|
"step": 3490,
|
|
"valid_targets_mean": 9659.0,
|
|
"valid_targets_min": 400
|
|
},
|
|
{
|
|
"epoch": 5.84448160535117,
|
|
"grad_norm": 0.5625532326545907,
|
|
"learning_rate": 3.239166460300571e-06,
|
|
"loss": 0.2159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1323026567697525,
|
|
"step": 3495,
|
|
"valid_targets_mean": 10729.9,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 5.852842809364549,
|
|
"grad_norm": 0.41369840737160624,
|
|
"learning_rate": 3.1938099876366047e-06,
|
|
"loss": 0.2093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09389300644397736,
|
|
"step": 3500,
|
|
"valid_targets_mean": 8096.8,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 5.861204013377926,
|
|
"grad_norm": 0.3854607718814448,
|
|
"learning_rate": 3.1487457398289645e-06,
|
|
"loss": 0.2043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08788316696882248,
|
|
"step": 3505,
|
|
"valid_targets_mean": 10436.4,
|
|
"valid_targets_min": 1762
|
|
},
|
|
{
|
|
"epoch": 5.869565217391305,
|
|
"grad_norm": 0.3938431350219495,
|
|
"learning_rate": 3.1039745004516207e-06,
|
|
"loss": 0.2263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.079367995262146,
|
|
"step": 3510,
|
|
"valid_targets_mean": 8871.0,
|
|
"valid_targets_min": 438
|
|
},
|
|
{
|
|
"epoch": 5.877926421404682,
|
|
"grad_norm": 0.38607324057696585,
|
|
"learning_rate": 3.0594970479837683e-06,
|
|
"loss": 0.2041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08825874328613281,
|
|
"step": 3515,
|
|
"valid_targets_mean": 8788.8,
|
|
"valid_targets_min": 357
|
|
},
|
|
{
|
|
"epoch": 5.88628762541806,
|
|
"grad_norm": 0.39472885373113464,
|
|
"learning_rate": 3.015314155796234e-06,
|
|
"loss": 0.1919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07223385572433472,
|
|
"step": 3520,
|
|
"valid_targets_mean": 6712.2,
|
|
"valid_targets_min": 2400
|
|
},
|
|
{
|
|
"epoch": 5.894648829431438,
|
|
"grad_norm": 0.43542431923073827,
|
|
"learning_rate": 2.9714265921380557e-06,
|
|
"loss": 0.2026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10756658017635345,
|
|
"step": 3525,
|
|
"valid_targets_mean": 10973.0,
|
|
"valid_targets_min": 3715
|
|
},
|
|
{
|
|
"epoch": 5.903010033444816,
|
|
"grad_norm": 0.3398688782775103,
|
|
"learning_rate": 2.927835120123128e-06,
|
|
"loss": 0.2083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10329543799161911,
|
|
"step": 3530,
|
|
"valid_targets_mean": 13105.0,
|
|
"valid_targets_min": 3767
|
|
},
|
|
{
|
|
"epoch": 5.911371237458194,
|
|
"grad_norm": 0.40728490116618504,
|
|
"learning_rate": 2.8845404977169057e-06,
|
|
"loss": 0.1934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08138227462768555,
|
|
"step": 3535,
|
|
"valid_targets_mean": 11252.6,
|
|
"valid_targets_min": 404
|
|
},
|
|
{
|
|
"epoch": 5.919732441471572,
|
|
"grad_norm": 0.5113853131420949,
|
|
"learning_rate": 2.841543477723254e-06,
|
|
"loss": 0.2096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1089818924665451,
|
|
"step": 3540,
|
|
"valid_targets_mean": 6999.4,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 5.9280936454849495,
|
|
"grad_norm": 0.36069866863676664,
|
|
"learning_rate": 2.7988448077713592e-06,
|
|
"loss": 0.2108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10439550131559372,
|
|
"step": 3545,
|
|
"valid_targets_mean": 10734.0,
|
|
"valid_targets_min": 760
|
|
},
|
|
{
|
|
"epoch": 5.936454849498328,
|
|
"grad_norm": 0.44080674243416595,
|
|
"learning_rate": 2.7564452303027024e-06,
|
|
"loss": 0.212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09451819211244583,
|
|
"step": 3550,
|
|
"valid_targets_mean": 9758.6,
|
|
"valid_targets_min": 2983
|
|
},
|
|
{
|
|
"epoch": 5.944816053511706,
|
|
"grad_norm": 0.45680219484485435,
|
|
"learning_rate": 2.7143454825581714e-06,
|
|
"loss": 0.2038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10183557868003845,
|
|
"step": 3555,
|
|
"valid_targets_mean": 8240.8,
|
|
"valid_targets_min": 2153
|
|
},
|
|
{
|
|
"epoch": 5.953177257525084,
|
|
"grad_norm": 0.366866494470298,
|
|
"learning_rate": 2.672546296565237e-06,
|
|
"loss": 0.2034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14180888235569,
|
|
"step": 3560,
|
|
"valid_targets_mean": 13377.0,
|
|
"valid_targets_min": 1434
|
|
},
|
|
{
|
|
"epoch": 5.961538461538462,
|
|
"grad_norm": 0.43905202480663247,
|
|
"learning_rate": 2.6310483991252133e-06,
|
|
"loss": 0.2105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10144704580307007,
|
|
"step": 3565,
|
|
"valid_targets_mean": 7684.8,
|
|
"valid_targets_min": 293
|
|
},
|
|
{
|
|
"epoch": 5.969899665551839,
|
|
"grad_norm": 0.4118506391205177,
|
|
"learning_rate": 2.589852511800646e-06,
|
|
"loss": 0.2138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12602347135543823,
|
|
"step": 3570,
|
|
"valid_targets_mean": 10904.1,
|
|
"valid_targets_min": 376
|
|
},
|
|
{
|
|
"epoch": 5.978260869565218,
|
|
"grad_norm": 0.49454403103659905,
|
|
"learning_rate": 2.54895935090274e-06,
|
|
"loss": 0.1979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08639810979366302,
|
|
"step": 3575,
|
|
"valid_targets_mean": 5529.1,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 5.986622073578595,
|
|
"grad_norm": 0.3812013002952409,
|
|
"learning_rate": 2.508369627478917e-06,
|
|
"loss": 0.211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09986335784196854,
|
|
"step": 3580,
|
|
"valid_targets_mean": 10368.8,
|
|
"valid_targets_min": 2577
|
|
},
|
|
{
|
|
"epoch": 5.994983277591973,
|
|
"grad_norm": 0.45766795584391323,
|
|
"learning_rate": 2.468084047300452e-06,
|
|
"loss": 0.2032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11471857130527496,
|
|
"step": 3585,
|
|
"valid_targets_mean": 8507.0,
|
|
"valid_targets_min": 336
|
|
},
|
|
{
|
|
"epoch": 6.003344481605351,
|
|
"grad_norm": 0.4088049909702847,
|
|
"learning_rate": 2.4281033108501873e-06,
|
|
"loss": 0.2134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09181240200996399,
|
|
"step": 3590,
|
|
"valid_targets_mean": 7894.5,
|
|
"valid_targets_min": 2220
|
|
},
|
|
{
|
|
"epoch": 6.011705685618729,
|
|
"grad_norm": 0.39517604807035955,
|
|
"learning_rate": 2.3884281133103725e-06,
|
|
"loss": 0.211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0808369517326355,
|
|
"step": 3595,
|
|
"valid_targets_mean": 7612.1,
|
|
"valid_targets_min": 1400
|
|
},
|
|
{
|
|
"epoch": 6.0200668896321075,
|
|
"grad_norm": 0.38210823770614116,
|
|
"learning_rate": 2.3490591445505715e-06,
|
|
"loss": 0.1975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07163643836975098,
|
|
"step": 3600,
|
|
"valid_targets_mean": 9051.2,
|
|
"valid_targets_min": 285
|
|
},
|
|
{
|
|
"epoch": 6.028428093645485,
|
|
"grad_norm": 0.48670604158115566,
|
|
"learning_rate": 2.309997089115659e-06,
|
|
"loss": 0.2054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08660681545734406,
|
|
"step": 3605,
|
|
"valid_targets_mean": 5456.6,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 6.036789297658863,
|
|
"grad_norm": 0.4055422066944094,
|
|
"learning_rate": 2.271242626213925e-06,
|
|
"loss": 0.198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11772844195365906,
|
|
"step": 3610,
|
|
"valid_targets_mean": 8387.8,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 6.045150501672241,
|
|
"grad_norm": 0.43623291128657354,
|
|
"learning_rate": 2.232796429705253e-06,
|
|
"loss": 0.2012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08723407238721848,
|
|
"step": 3615,
|
|
"valid_targets_mean": 7331.6,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 6.053511705685619,
|
|
"grad_norm": 0.38944391221372543,
|
|
"learning_rate": 2.1946591680894145e-06,
|
|
"loss": 0.2061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08822585642337799,
|
|
"step": 3620,
|
|
"valid_targets_mean": 8546.6,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 6.061872909698996,
|
|
"grad_norm": 0.4116100818082183,
|
|
"learning_rate": 2.1568315044944586e-06,
|
|
"loss": 0.1952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09745118021965027,
|
|
"step": 3625,
|
|
"valid_targets_mean": 10045.9,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 6.070234113712375,
|
|
"grad_norm": 0.5354053516401446,
|
|
"learning_rate": 2.1193140966651484e-06,
|
|
"loss": 0.199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1083788201212883,
|
|
"step": 3630,
|
|
"valid_targets_mean": 8598.2,
|
|
"valid_targets_min": 2842
|
|
},
|
|
{
|
|
"epoch": 6.078595317725752,
|
|
"grad_norm": 0.4001978673551788,
|
|
"learning_rate": 2.082107596951548e-06,
|
|
"loss": 0.1845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1019393652677536,
|
|
"step": 3635,
|
|
"valid_targets_mean": 9461.6,
|
|
"valid_targets_min": 1856
|
|
},
|
|
{
|
|
"epoch": 6.086956521739131,
|
|
"grad_norm": 0.48657176567402693,
|
|
"learning_rate": 2.0452126522976746e-06,
|
|
"loss": 0.2223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11235760152339935,
|
|
"step": 3640,
|
|
"valid_targets_mean": 6156.2,
|
|
"valid_targets_min": 2456
|
|
},
|
|
{
|
|
"epoch": 6.095317725752508,
|
|
"grad_norm": 0.48988529016260757,
|
|
"learning_rate": 2.008629904230237e-06,
|
|
"loss": 0.2054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10067173838615417,
|
|
"step": 3645,
|
|
"valid_targets_mean": 7365.0,
|
|
"valid_targets_min": 1144
|
|
},
|
|
{
|
|
"epoch": 6.103678929765886,
|
|
"grad_norm": 0.5789029975916761,
|
|
"learning_rate": 1.972359988847499e-06,
|
|
"loss": 0.198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11299233138561249,
|
|
"step": 3650,
|
|
"valid_targets_mean": 7890.1,
|
|
"valid_targets_min": 1566
|
|
},
|
|
{
|
|
"epoch": 6.1120401337792645,
|
|
"grad_norm": 0.4325019678781378,
|
|
"learning_rate": 1.9364035368082222e-06,
|
|
"loss": 0.2109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12556350231170654,
|
|
"step": 3655,
|
|
"valid_targets_mean": 11164.8,
|
|
"valid_targets_min": 2676
|
|
},
|
|
{
|
|
"epoch": 6.120401337792642,
|
|
"grad_norm": 0.6415192193106152,
|
|
"learning_rate": 1.9007611733206733e-06,
|
|
"loss": 0.2127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10394009202718735,
|
|
"step": 3660,
|
|
"valid_targets_mean": 9193.2,
|
|
"valid_targets_min": 2700
|
|
},
|
|
{
|
|
"epoch": 6.12876254180602,
|
|
"grad_norm": 0.47420374970611173,
|
|
"learning_rate": 1.8654335181317784e-06,
|
|
"loss": 0.2108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1135619729757309,
|
|
"step": 3665,
|
|
"valid_targets_mean": 8094.5,
|
|
"valid_targets_min": 351
|
|
},
|
|
{
|
|
"epoch": 6.137123745819398,
|
|
"grad_norm": 0.548061796828787,
|
|
"learning_rate": 1.8304211855163311e-06,
|
|
"loss": 0.2089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11094358563423157,
|
|
"step": 3670,
|
|
"valid_targets_mean": 7057.6,
|
|
"valid_targets_min": 1187
|
|
},
|
|
{
|
|
"epoch": 6.145484949832776,
|
|
"grad_norm": 0.38432179553562035,
|
|
"learning_rate": 1.7957247842663194e-06,
|
|
"loss": 0.2036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08132559061050415,
|
|
"step": 3675,
|
|
"valid_targets_mean": 7339.0,
|
|
"valid_targets_min": 1564
|
|
},
|
|
{
|
|
"epoch": 6.153846153846154,
|
|
"grad_norm": 0.41298622947408825,
|
|
"learning_rate": 1.7613449176803476e-06,
|
|
"loss": 0.2022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08749121427536011,
|
|
"step": 3680,
|
|
"valid_targets_mean": 10091.8,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 6.162207357859532,
|
|
"grad_norm": 0.44267117197464856,
|
|
"learning_rate": 1.7272821835531295e-06,
|
|
"loss": 0.1994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11734290421009064,
|
|
"step": 3685,
|
|
"valid_targets_mean": 9884.9,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 6.170568561872909,
|
|
"grad_norm": 0.4325410276347886,
|
|
"learning_rate": 1.693537174165103e-06,
|
|
"loss": 0.2126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11704067885875702,
|
|
"step": 3690,
|
|
"valid_targets_mean": 11795.6,
|
|
"valid_targets_min": 5617
|
|
},
|
|
{
|
|
"epoch": 6.178929765886288,
|
|
"grad_norm": 0.3679703418722953,
|
|
"learning_rate": 1.660110476272132e-06,
|
|
"loss": 0.2112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09583941847085953,
|
|
"step": 3695,
|
|
"valid_targets_mean": 11377.2,
|
|
"valid_targets_min": 374
|
|
},
|
|
{
|
|
"epoch": 6.187290969899665,
|
|
"grad_norm": 0.43307123719231605,
|
|
"learning_rate": 1.6270026710952924e-06,
|
|
"loss": 0.2088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09866052865982056,
|
|
"step": 3700,
|
|
"valid_targets_mean": 8857.6,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 6.195652173913044,
|
|
"grad_norm": 0.3780925119622439,
|
|
"learning_rate": 1.5942143343107953e-06,
|
|
"loss": 0.2052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10966908186674118,
|
|
"step": 3705,
|
|
"valid_targets_mean": 9795.5,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 6.2040133779264215,
|
|
"grad_norm": 0.37893449590430744,
|
|
"learning_rate": 1.5617460360399439e-06,
|
|
"loss": 0.2035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12843623757362366,
|
|
"step": 3710,
|
|
"valid_targets_mean": 12764.1,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 6.212374581939799,
|
|
"grad_norm": 0.40385662728684574,
|
|
"learning_rate": 1.529598340839238e-06,
|
|
"loss": 0.1983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10242074728012085,
|
|
"step": 3715,
|
|
"valid_targets_mean": 10187.9,
|
|
"valid_targets_min": 422
|
|
},
|
|
{
|
|
"epoch": 6.2207357859531776,
|
|
"grad_norm": 0.48339991757590023,
|
|
"learning_rate": 1.4977718076905533e-06,
|
|
"loss": 0.1941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09428669512271881,
|
|
"step": 3720,
|
|
"valid_targets_mean": 6506.5,
|
|
"valid_targets_min": 903
|
|
},
|
|
{
|
|
"epoch": 6.229096989966555,
|
|
"grad_norm": 0.38911403295834907,
|
|
"learning_rate": 1.4662669899914161e-06,
|
|
"loss": 0.211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10617174953222275,
|
|
"step": 3725,
|
|
"valid_targets_mean": 11464.6,
|
|
"valid_targets_min": 2696
|
|
},
|
|
{
|
|
"epoch": 6.237458193979933,
|
|
"grad_norm": 0.39425741770450334,
|
|
"learning_rate": 1.4350844355453952e-06,
|
|
"loss": 0.2073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10640241950750351,
|
|
"step": 3730,
|
|
"valid_targets_mean": 10308.5,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 6.245819397993311,
|
|
"grad_norm": 0.43923728309641125,
|
|
"learning_rate": 1.404224686552571e-06,
|
|
"loss": 0.1903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09903937578201294,
|
|
"step": 3735,
|
|
"valid_targets_mean": 9853.0,
|
|
"valid_targets_min": 6019
|
|
},
|
|
{
|
|
"epoch": 6.254180602006689,
|
|
"grad_norm": 0.41908897839911563,
|
|
"learning_rate": 1.3736882796000983e-06,
|
|
"loss": 0.2016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09888114035129547,
|
|
"step": 3740,
|
|
"valid_targets_mean": 9900.9,
|
|
"valid_targets_min": 2627
|
|
},
|
|
{
|
|
"epoch": 6.262541806020067,
|
|
"grad_norm": 0.49404140617884423,
|
|
"learning_rate": 1.3434757456528868e-06,
|
|
"loss": 0.2003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1672218143939972,
|
|
"step": 3745,
|
|
"valid_targets_mean": 12299.8,
|
|
"valid_targets_min": 969
|
|
},
|
|
{
|
|
"epoch": 6.270903010033445,
|
|
"grad_norm": 0.49991082862230735,
|
|
"learning_rate": 1.3135876100443557e-06,
|
|
"loss": 0.2148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13769176602363586,
|
|
"step": 3750,
|
|
"valid_targets_mean": 9714.8,
|
|
"valid_targets_min": 3204
|
|
},
|
|
{
|
|
"epoch": 6.2792642140468224,
|
|
"grad_norm": 0.5586790246636285,
|
|
"learning_rate": 1.2840243924673202e-06,
|
|
"loss": 0.2142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08177978545427322,
|
|
"step": 3755,
|
|
"valid_targets_mean": 6872.9,
|
|
"valid_targets_min": 2816
|
|
},
|
|
{
|
|
"epoch": 6.287625418060201,
|
|
"grad_norm": 0.5963033134414659,
|
|
"learning_rate": 1.2547866069649418e-06,
|
|
"loss": 0.2258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08051039278507233,
|
|
"step": 3760,
|
|
"valid_targets_mean": 7102.8,
|
|
"valid_targets_min": 1040
|
|
},
|
|
{
|
|
"epoch": 6.2959866220735785,
|
|
"grad_norm": 0.3813929506645655,
|
|
"learning_rate": 1.225874761921788e-06,
|
|
"loss": 0.203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11782650649547577,
|
|
"step": 3765,
|
|
"valid_targets_mean": 15239.8,
|
|
"valid_targets_min": 5371
|
|
},
|
|
{
|
|
"epoch": 6.304347826086957,
|
|
"grad_norm": 0.39530022877162574,
|
|
"learning_rate": 1.1972893600550007e-06,
|
|
"loss": 0.21,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08144278079271317,
|
|
"step": 3770,
|
|
"valid_targets_mean": 8821.5,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 6.312709030100335,
|
|
"grad_norm": 0.3785215913686803,
|
|
"learning_rate": 1.1690308984055454e-06,
|
|
"loss": 0.1905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08577847480773926,
|
|
"step": 3775,
|
|
"valid_targets_mean": 9662.6,
|
|
"valid_targets_min": 346
|
|
},
|
|
{
|
|
"epoch": 6.321070234113712,
|
|
"grad_norm": 0.41122433895520144,
|
|
"learning_rate": 1.141099868329576e-06,
|
|
"loss": 0.2058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10857082903385162,
|
|
"step": 3780,
|
|
"valid_targets_mean": 9792.8,
|
|
"valid_targets_min": 2202
|
|
},
|
|
{
|
|
"epoch": 6.329431438127091,
|
|
"grad_norm": 0.5411362852531139,
|
|
"learning_rate": 1.1134967554898868e-06,
|
|
"loss": 0.1942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1019723191857338,
|
|
"step": 3785,
|
|
"valid_targets_mean": 5998.4,
|
|
"valid_targets_min": 362
|
|
},
|
|
{
|
|
"epoch": 6.337792642140468,
|
|
"grad_norm": 0.43214644881810926,
|
|
"learning_rate": 1.0862220398474798e-06,
|
|
"loss": 0.2107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12764345109462738,
|
|
"step": 3790,
|
|
"valid_targets_mean": 10628.5,
|
|
"valid_targets_min": 1663
|
|
},
|
|
{
|
|
"epoch": 6.346153846153846,
|
|
"grad_norm": 0.38438063349369705,
|
|
"learning_rate": 1.0592761956531983e-06,
|
|
"loss": 0.2086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08641922473907471,
|
|
"step": 3795,
|
|
"valid_targets_mean": 7199.9,
|
|
"valid_targets_min": 809
|
|
},
|
|
{
|
|
"epoch": 6.354515050167224,
|
|
"grad_norm": 0.4369622670819147,
|
|
"learning_rate": 1.0326596914395015e-06,
|
|
"loss": 0.1957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1129743829369545,
|
|
"step": 3800,
|
|
"valid_targets_mean": 10330.8,
|
|
"valid_targets_min": 932
|
|
},
|
|
{
|
|
"epoch": 6.362876254180602,
|
|
"grad_norm": 0.3539122180541354,
|
|
"learning_rate": 1.0063729900122943e-06,
|
|
"loss": 0.2158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09641806781291962,
|
|
"step": 3805,
|
|
"valid_targets_mean": 11308.9,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 6.3712374581939795,
|
|
"grad_norm": 0.3663236884309548,
|
|
"learning_rate": 9.80416548442904e-07,
|
|
"loss": 0.2071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09869398176670074,
|
|
"step": 3810,
|
|
"valid_targets_mean": 12074.1,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 6.379598662207358,
|
|
"grad_norm": 0.48241710259410986,
|
|
"learning_rate": 9.547908180601274e-07,
|
|
"loss": 0.2001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10535872727632523,
|
|
"step": 3815,
|
|
"valid_targets_mean": 7108.5,
|
|
"valid_targets_min": 321
|
|
},
|
|
{
|
|
"epoch": 6.3879598662207355,
|
|
"grad_norm": 0.4794615340486704,
|
|
"learning_rate": 9.294962444423672e-07,
|
|
"loss": 0.2119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09399455785751343,
|
|
"step": 3820,
|
|
"valid_targets_mean": 6704.5,
|
|
"valid_targets_min": 819
|
|
},
|
|
{
|
|
"epoch": 6.396321070234114,
|
|
"grad_norm": 0.40056917164794786,
|
|
"learning_rate": 9.045332674099039e-07,
|
|
"loss": 0.2002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07060383260250092,
|
|
"step": 3825,
|
|
"valid_targets_mean": 6799.1,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 6.404682274247492,
|
|
"grad_norm": 0.40424016939732016,
|
|
"learning_rate": 8.799023210172319e-07,
|
|
"loss": 0.2157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09708660840988159,
|
|
"step": 3830,
|
|
"valid_targets_mean": 9571.8,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 6.413043478260869,
|
|
"grad_norm": 0.45041733238806253,
|
|
"learning_rate": 8.556038335455241e-07,
|
|
"loss": 0.2095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10671599209308624,
|
|
"step": 3835,
|
|
"valid_targets_mean": 9813.2,
|
|
"valid_targets_min": 3425
|
|
},
|
|
{
|
|
"epoch": 6.421404682274248,
|
|
"grad_norm": 0.3777292728821344,
|
|
"learning_rate": 8.316382274951773e-07,
|
|
"loss": 0.2094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13505390286445618,
|
|
"step": 3840,
|
|
"valid_targets_mean": 12613.1,
|
|
"valid_targets_min": 4795
|
|
},
|
|
{
|
|
"epoch": 6.429765886287625,
|
|
"grad_norm": 0.4737916054194675,
|
|
"learning_rate": 8.080059195784829e-07,
|
|
"loss": 0.2006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09208667278289795,
|
|
"step": 3845,
|
|
"valid_targets_mean": 11486.4,
|
|
"valid_targets_min": 1201
|
|
},
|
|
{
|
|
"epoch": 6.438127090301004,
|
|
"grad_norm": 0.4089084832140847,
|
|
"learning_rate": 7.847073207123523e-07,
|
|
"loss": 0.2009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1044401228427887,
|
|
"step": 3850,
|
|
"valid_targets_mean": 9801.2,
|
|
"valid_targets_min": 4059
|
|
},
|
|
{
|
|
"epoch": 6.446488294314381,
|
|
"grad_norm": 0.46946660629840237,
|
|
"learning_rate": 7.617428360111945e-07,
|
|
"loss": 0.2065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11579585075378418,
|
|
"step": 3855,
|
|
"valid_targets_mean": 10637.0,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 6.454849498327759,
|
|
"grad_norm": 2.39337360417389,
|
|
"learning_rate": 7.391128647798607e-07,
|
|
"loss": 0.1988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12137849628925323,
|
|
"step": 3860,
|
|
"valid_targets_mean": 10133.8,
|
|
"valid_targets_min": 2429
|
|
},
|
|
{
|
|
"epoch": 6.463210702341137,
|
|
"grad_norm": 0.47202791622039414,
|
|
"learning_rate": 7.168178005067062e-07,
|
|
"loss": 0.2099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06815285235643387,
|
|
"step": 3865,
|
|
"valid_targets_mean": 5605.1,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 6.471571906354515,
|
|
"grad_norm": 0.46078653145402976,
|
|
"learning_rate": 6.948580308567532e-07,
|
|
"loss": 0.2008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0785512924194336,
|
|
"step": 3870,
|
|
"valid_targets_mean": 7890.9,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 6.479933110367893,
|
|
"grad_norm": 0.35596200325333016,
|
|
"learning_rate": 6.732339376649388e-07,
|
|
"loss": 0.2036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11740652471780777,
|
|
"step": 3875,
|
|
"valid_targets_mean": 14734.4,
|
|
"valid_targets_min": 7711
|
|
},
|
|
{
|
|
"epoch": 6.488294314381271,
|
|
"grad_norm": 0.434038215979546,
|
|
"learning_rate": 6.519458969294845e-07,
|
|
"loss": 0.2097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12669533491134644,
|
|
"step": 3880,
|
|
"valid_targets_mean": 9022.9,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 6.496655518394649,
|
|
"grad_norm": 0.36898763311879984,
|
|
"learning_rate": 6.309942788053502e-07,
|
|
"loss": 0.2008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10530098527669907,
|
|
"step": 3885,
|
|
"valid_targets_mean": 11916.8,
|
|
"valid_targets_min": 3062
|
|
},
|
|
{
|
|
"epoch": 6.505016722408027,
|
|
"grad_norm": 0.3516993078838502,
|
|
"learning_rate": 6.103794475978086e-07,
|
|
"loss": 0.2031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10090771317481995,
|
|
"step": 3890,
|
|
"valid_targets_mean": 13832.6,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 6.513377926421405,
|
|
"grad_norm": 0.42512250965315607,
|
|
"learning_rate": 5.901017617560989e-07,
|
|
"loss": 0.2011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06793075799942017,
|
|
"step": 3895,
|
|
"valid_targets_mean": 6523.4,
|
|
"valid_targets_min": 339
|
|
},
|
|
{
|
|
"epoch": 6.521739130434782,
|
|
"grad_norm": 0.4462960277842445,
|
|
"learning_rate": 5.701615738672073e-07,
|
|
"loss": 0.1971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11571821570396423,
|
|
"step": 3900,
|
|
"valid_targets_mean": 13611.9,
|
|
"valid_targets_min": 5601
|
|
},
|
|
{
|
|
"epoch": 6.530100334448161,
|
|
"grad_norm": 0.3676010585484793,
|
|
"learning_rate": 5.505592306497298e-07,
|
|
"loss": 0.2036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09901118278503418,
|
|
"step": 3905,
|
|
"valid_targets_mean": 12711.5,
|
|
"valid_targets_min": 4841
|
|
},
|
|
{
|
|
"epoch": 6.538461538461538,
|
|
"grad_norm": 0.38277731767403833,
|
|
"learning_rate": 5.312950729478327e-07,
|
|
"loss": 0.2039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10607827454805374,
|
|
"step": 3910,
|
|
"valid_targets_mean": 11145.0,
|
|
"valid_targets_min": 4748
|
|
},
|
|
{
|
|
"epoch": 6.546822742474916,
|
|
"grad_norm": 0.3677834619434402,
|
|
"learning_rate": 5.123694357253439e-07,
|
|
"loss": 0.2025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08389432728290558,
|
|
"step": 3915,
|
|
"valid_targets_mean": 8261.1,
|
|
"valid_targets_min": 840
|
|
},
|
|
{
|
|
"epoch": 6.555183946488294,
|
|
"grad_norm": 0.535339887491514,
|
|
"learning_rate": 4.937826480599195e-07,
|
|
"loss": 0.2063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13953952491283417,
|
|
"step": 3920,
|
|
"valid_targets_mean": 6951.8,
|
|
"valid_targets_min": 3286
|
|
},
|
|
{
|
|
"epoch": 6.563545150501672,
|
|
"grad_norm": 0.5436773277484084,
|
|
"learning_rate": 4.755350331373243e-07,
|
|
"loss": 0.2005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11926500499248505,
|
|
"step": 3925,
|
|
"valid_targets_mean": 12418.5,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 6.5719063545150505,
|
|
"grad_norm": 0.5486851588483915,
|
|
"learning_rate": 4.576269082458118e-07,
|
|
"loss": 0.2019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0999874398112297,
|
|
"step": 3930,
|
|
"valid_targets_mean": 6415.2,
|
|
"valid_targets_min": 319
|
|
},
|
|
{
|
|
"epoch": 6.580267558528428,
|
|
"grad_norm": 0.3644140022673904,
|
|
"learning_rate": 4.4005858477060404e-07,
|
|
"loss": 0.2094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08239322900772095,
|
|
"step": 3935,
|
|
"valid_targets_mean": 9468.5,
|
|
"valid_targets_min": 1572
|
|
},
|
|
{
|
|
"epoch": 6.588628762541806,
|
|
"grad_norm": 0.3662955711090805,
|
|
"learning_rate": 4.228303681884782e-07,
|
|
"loss": 0.1937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11350613832473755,
|
|
"step": 3940,
|
|
"valid_targets_mean": 12280.2,
|
|
"valid_targets_min": 1124
|
|
},
|
|
{
|
|
"epoch": 6.596989966555184,
|
|
"grad_norm": 0.3884473304064643,
|
|
"learning_rate": 4.059425580624576e-07,
|
|
"loss": 0.2001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10216642171144485,
|
|
"step": 3945,
|
|
"valid_targets_mean": 8904.6,
|
|
"valid_targets_min": 990
|
|
},
|
|
{
|
|
"epoch": 6.605351170568562,
|
|
"grad_norm": 0.43262165809958353,
|
|
"learning_rate": 3.893954480366091e-07,
|
|
"loss": 0.2032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11453603953123093,
|
|
"step": 3950,
|
|
"valid_targets_mean": 7513.5,
|
|
"valid_targets_min": 955
|
|
},
|
|
{
|
|
"epoch": 6.61371237458194,
|
|
"grad_norm": 0.443144495245252,
|
|
"learning_rate": 3.731893258309227e-07,
|
|
"loss": 0.204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1186174750328064,
|
|
"step": 3955,
|
|
"valid_targets_mean": 11757.5,
|
|
"valid_targets_min": 3523
|
|
},
|
|
{
|
|
"epoch": 6.622073578595318,
|
|
"grad_norm": 0.4071706017978847,
|
|
"learning_rate": 3.573244732363179e-07,
|
|
"loss": 0.2174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08510640263557434,
|
|
"step": 3960,
|
|
"valid_targets_mean": 10461.6,
|
|
"valid_targets_min": 343
|
|
},
|
|
{
|
|
"epoch": 6.630434782608695,
|
|
"grad_norm": 0.4009964672309479,
|
|
"learning_rate": 3.4180116610973645e-07,
|
|
"loss": 0.2178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10028751194477081,
|
|
"step": 3965,
|
|
"valid_targets_mean": 10420.1,
|
|
"valid_targets_min": 1145
|
|
},
|
|
{
|
|
"epoch": 6.638795986622074,
|
|
"grad_norm": 0.4991812688997343,
|
|
"learning_rate": 3.2661967436936394e-07,
|
|
"loss": 0.2017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11248572170734406,
|
|
"step": 3970,
|
|
"valid_targets_mean": 8629.0,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 6.647157190635451,
|
|
"grad_norm": 0.5178917315250771,
|
|
"learning_rate": 3.117802619899113e-07,
|
|
"loss": 0.2029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09312313050031662,
|
|
"step": 3975,
|
|
"valid_targets_mean": 10799.6,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 6.65551839464883,
|
|
"grad_norm": 0.404211731426026,
|
|
"learning_rate": 2.9728318699804525e-07,
|
|
"loss": 0.2018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09753704816102982,
|
|
"step": 3980,
|
|
"valid_targets_mean": 10062.8,
|
|
"valid_targets_min": 3597
|
|
},
|
|
{
|
|
"epoch": 6.6638795986622075,
|
|
"grad_norm": 0.4192235571830123,
|
|
"learning_rate": 2.831287014678941e-07,
|
|
"loss": 0.201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11463919281959534,
|
|
"step": 3985,
|
|
"valid_targets_mean": 12277.0,
|
|
"valid_targets_min": 1530
|
|
},
|
|
{
|
|
"epoch": 6.672240802675585,
|
|
"grad_norm": 0.4794325007756201,
|
|
"learning_rate": 2.693170515166599e-07,
|
|
"loss": 0.2154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15384384989738464,
|
|
"step": 3990,
|
|
"valid_targets_mean": 11027.1,
|
|
"valid_targets_min": 2739
|
|
},
|
|
{
|
|
"epoch": 6.6806020066889635,
|
|
"grad_norm": 0.48390768068734374,
|
|
"learning_rate": 2.558484773003445e-07,
|
|
"loss": 0.1857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08786249160766602,
|
|
"step": 3995,
|
|
"valid_targets_mean": 6594.4,
|
|
"valid_targets_min": 320
|
|
},
|
|
{
|
|
"epoch": 6.688963210702341,
|
|
"grad_norm": 0.5938002293075769,
|
|
"learning_rate": 2.427232130095747e-07,
|
|
"loss": 0.1944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11286531388759613,
|
|
"step": 4000,
|
|
"valid_targets_mean": 9607.1,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 6.697324414715719,
|
|
"grad_norm": 0.39266992313274224,
|
|
"learning_rate": 2.299414868655281e-07,
|
|
"loss": 0.2044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08752676844596863,
|
|
"step": 4005,
|
|
"valid_targets_mean": 9019.8,
|
|
"valid_targets_min": 3154
|
|
},
|
|
{
|
|
"epoch": 6.705685618729097,
|
|
"grad_norm": 0.46611784157091757,
|
|
"learning_rate": 2.1750352111596707e-07,
|
|
"loss": 0.197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09676424413919449,
|
|
"step": 4010,
|
|
"valid_targets_mean": 9082.4,
|
|
"valid_targets_min": 2317
|
|
},
|
|
{
|
|
"epoch": 6.714046822742475,
|
|
"grad_norm": 0.376772959313835,
|
|
"learning_rate": 2.0540953203137093e-07,
|
|
"loss": 0.2041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11070505529642105,
|
|
"step": 4015,
|
|
"valid_targets_mean": 12849.8,
|
|
"valid_targets_min": 2940
|
|
},
|
|
{
|
|
"epoch": 6.722408026755852,
|
|
"grad_norm": 0.39016753982039915,
|
|
"learning_rate": 1.9365972990117e-07,
|
|
"loss": 0.2043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07128378748893738,
|
|
"step": 4020,
|
|
"valid_targets_mean": 8831.2,
|
|
"valid_targets_min": 1582
|
|
},
|
|
{
|
|
"epoch": 6.730769230769231,
|
|
"grad_norm": 0.43755912971447036,
|
|
"learning_rate": 1.8225431903010403e-07,
|
|
"loss": 0.1999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11199066787958145,
|
|
"step": 4025,
|
|
"valid_targets_mean": 11371.0,
|
|
"valid_targets_min": 3398
|
|
},
|
|
{
|
|
"epoch": 6.739130434782608,
|
|
"grad_norm": 0.5719210180565383,
|
|
"learning_rate": 1.7119349773466076e-07,
|
|
"loss": 0.196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12811347842216492,
|
|
"step": 4030,
|
|
"valid_targets_mean": 11284.8,
|
|
"valid_targets_min": 3567
|
|
},
|
|
{
|
|
"epoch": 6.747491638795987,
|
|
"grad_norm": 0.43383806356321236,
|
|
"learning_rate": 1.6047745833962735e-07,
|
|
"loss": 0.2112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10031247138977051,
|
|
"step": 4035,
|
|
"valid_targets_mean": 8005.5,
|
|
"valid_targets_min": 2398
|
|
},
|
|
{
|
|
"epoch": 6.7558528428093645,
|
|
"grad_norm": 0.3986936769785833,
|
|
"learning_rate": 1.5010638717474878e-07,
|
|
"loss": 0.1984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11303079128265381,
|
|
"step": 4040,
|
|
"valid_targets_mean": 9260.4,
|
|
"valid_targets_min": 1207
|
|
},
|
|
{
|
|
"epoch": 6.764214046822742,
|
|
"grad_norm": 0.45172717575270477,
|
|
"learning_rate": 1.400804645714815e-07,
|
|
"loss": 0.2016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10619814693927765,
|
|
"step": 4045,
|
|
"valid_targets_mean": 10068.6,
|
|
"valid_targets_min": 969
|
|
},
|
|
{
|
|
"epoch": 6.7725752508361206,
|
|
"grad_norm": 0.46708346409768564,
|
|
"learning_rate": 1.30399864859867e-07,
|
|
"loss": 0.2081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08373802900314331,
|
|
"step": 4050,
|
|
"valid_targets_mean": 6115.5,
|
|
"valid_targets_min": 417
|
|
},
|
|
{
|
|
"epoch": 6.780936454849498,
|
|
"grad_norm": 0.4561940729871474,
|
|
"learning_rate": 1.2106475636549654e-07,
|
|
"loss": 0.2142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10245362669229507,
|
|
"step": 4055,
|
|
"valid_targets_mean": 9053.6,
|
|
"valid_targets_min": 4092
|
|
},
|
|
{
|
|
"epoch": 6.789297658862877,
|
|
"grad_norm": 0.5189269510680221,
|
|
"learning_rate": 1.1207530140658452e-07,
|
|
"loss": 0.2163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09477068483829498,
|
|
"step": 4060,
|
|
"valid_targets_mean": 7264.1,
|
|
"valid_targets_min": 386
|
|
},
|
|
{
|
|
"epoch": 6.797658862876254,
|
|
"grad_norm": 0.38280068940173617,
|
|
"learning_rate": 1.0343165629114416e-07,
|
|
"loss": 0.1942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1097802072763443,
|
|
"step": 4065,
|
|
"valid_targets_mean": 11957.1,
|
|
"valid_targets_min": 5164
|
|
},
|
|
{
|
|
"epoch": 6.806020066889632,
|
|
"grad_norm": 0.35495764225116017,
|
|
"learning_rate": 9.513397131427404e-08,
|
|
"loss": 0.2051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10491962730884552,
|
|
"step": 4070,
|
|
"valid_targets_mean": 14889.5,
|
|
"valid_targets_min": 11448
|
|
},
|
|
{
|
|
"epoch": 6.81438127090301,
|
|
"grad_norm": 0.39863217738364687,
|
|
"learning_rate": 8.71823907555358e-08,
|
|
"loss": 0.195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12158674746751785,
|
|
"step": 4075,
|
|
"valid_targets_mean": 11346.4,
|
|
"valid_targets_min": 389
|
|
},
|
|
{
|
|
"epoch": 6.822742474916388,
|
|
"grad_norm": 0.4516526639800713,
|
|
"learning_rate": 7.957705287645834e-08,
|
|
"loss": 0.1995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11042718589305878,
|
|
"step": 4080,
|
|
"valid_targets_mean": 8454.2,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 6.831103678929766,
|
|
"grad_norm": 0.45744429522681335,
|
|
"learning_rate": 7.231808991812639e-08,
|
|
"loss": 0.1968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10361301898956299,
|
|
"step": 4085,
|
|
"valid_targets_mean": 8718.8,
|
|
"valid_targets_min": 2930
|
|
},
|
|
{
|
|
"epoch": 6.839464882943144,
|
|
"grad_norm": 0.46979853391523047,
|
|
"learning_rate": 6.540562809887574e-08,
|
|
"loss": 0.2025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0838114395737648,
|
|
"step": 4090,
|
|
"valid_targets_mean": 5628.9,
|
|
"valid_targets_min": 271
|
|
},
|
|
{
|
|
"epoch": 6.8478260869565215,
|
|
"grad_norm": 0.39559244193679705,
|
|
"learning_rate": 5.8839787612114955e-08,
|
|
"loss": 0.2149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09930168092250824,
|
|
"step": 4095,
|
|
"valid_targets_mean": 9047.4,
|
|
"valid_targets_min": 2382
|
|
},
|
|
{
|
|
"epoch": 6.8561872909699,
|
|
"grad_norm": 0.4396604676506477,
|
|
"learning_rate": 5.2620682624213714e-08,
|
|
"loss": 0.1936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09862633049488068,
|
|
"step": 4100,
|
|
"valid_targets_mean": 7766.1,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 6.864548494983278,
|
|
"grad_norm": 0.3788968567390339,
|
|
"learning_rate": 4.6748421272537756e-08,
|
|
"loss": 0.2062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08503955602645874,
|
|
"step": 4105,
|
|
"valid_targets_mean": 10534.5,
|
|
"valid_targets_min": 2002
|
|
},
|
|
{
|
|
"epoch": 6.872909698996655,
|
|
"grad_norm": 0.3823803765137751,
|
|
"learning_rate": 4.1223105663554806e-08,
|
|
"loss": 0.2044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09367858618497849,
|
|
"step": 4110,
|
|
"valid_targets_mean": 11757.6,
|
|
"valid_targets_min": 4328
|
|
},
|
|
{
|
|
"epoch": 6.881270903010034,
|
|
"grad_norm": 0.44814694678956085,
|
|
"learning_rate": 3.604483187106711e-08,
|
|
"loss": 0.2079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12711641192436218,
|
|
"step": 4115,
|
|
"valid_targets_mean": 8435.4,
|
|
"valid_targets_min": 433
|
|
},
|
|
{
|
|
"epoch": 6.889632107023411,
|
|
"grad_norm": 0.4731518290383752,
|
|
"learning_rate": 3.1213689934537215e-08,
|
|
"loss": 0.1992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09158414602279663,
|
|
"step": 4120,
|
|
"valid_targets_mean": 7984.6,
|
|
"valid_targets_min": 396
|
|
},
|
|
{
|
|
"epoch": 6.897993311036789,
|
|
"grad_norm": 0.5463810521546091,
|
|
"learning_rate": 2.6729763857522573e-08,
|
|
"loss": 0.1948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09313490986824036,
|
|
"step": 4125,
|
|
"valid_targets_mean": 9091.8,
|
|
"valid_targets_min": 340
|
|
},
|
|
{
|
|
"epoch": 6.906354515050167,
|
|
"grad_norm": 0.5014951800886587,
|
|
"learning_rate": 2.2593131606216677e-08,
|
|
"loss": 0.2086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1266055703163147,
|
|
"step": 4130,
|
|
"valid_targets_mean": 7752.0,
|
|
"valid_targets_min": 302
|
|
},
|
|
{
|
|
"epoch": 6.914715719063545,
|
|
"grad_norm": 0.39614249599908763,
|
|
"learning_rate": 1.880386510809018e-08,
|
|
"loss": 0.2183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09274753928184509,
|
|
"step": 4135,
|
|
"valid_targets_mean": 11580.0,
|
|
"valid_targets_min": 382
|
|
},
|
|
{
|
|
"epoch": 6.923076923076923,
|
|
"grad_norm": 0.44601041819852894,
|
|
"learning_rate": 1.536203025064742e-08,
|
|
"loss": 0.1958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08652716875076294,
|
|
"step": 4140,
|
|
"valid_targets_mean": 7778.4,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 6.931438127090301,
|
|
"grad_norm": 0.3900972182996629,
|
|
"learning_rate": 1.226768688026736e-08,
|
|
"loss": 0.2068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11261281371116638,
|
|
"step": 4145,
|
|
"valid_targets_mean": 10736.4,
|
|
"valid_targets_min": 1113
|
|
},
|
|
{
|
|
"epoch": 6.9397993311036785,
|
|
"grad_norm": 0.38878030730759694,
|
|
"learning_rate": 9.520888801182182e-09,
|
|
"loss": 0.2103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10385765135288239,
|
|
"step": 4150,
|
|
"valid_targets_mean": 12869.1,
|
|
"valid_targets_min": 4137
|
|
},
|
|
{
|
|
"epoch": 6.948160535117057,
|
|
"grad_norm": 0.4894173693571196,
|
|
"learning_rate": 7.121683774518051e-09,
|
|
"loss": 0.1984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07940640300512314,
|
|
"step": 4155,
|
|
"valid_targets_mean": 8760.5,
|
|
"valid_targets_min": 2262
|
|
},
|
|
{
|
|
"epoch": 6.956521739130435,
|
|
"grad_norm": 0.38874120372041604,
|
|
"learning_rate": 5.0701135174890944e-09,
|
|
"loss": 0.2007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0905492752790451,
|
|
"step": 4160,
|
|
"valid_targets_mean": 8967.8,
|
|
"valid_targets_min": 1471
|
|
},
|
|
{
|
|
"epoch": 6.964882943143813,
|
|
"grad_norm": 0.3500915100027991,
|
|
"learning_rate": 3.3662137026535537e-09,
|
|
"loss": 0.2064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09743893146514893,
|
|
"step": 4165,
|
|
"valid_targets_mean": 11369.9,
|
|
"valid_targets_min": 4229
|
|
},
|
|
{
|
|
"epoch": 6.973244147157191,
|
|
"grad_norm": 0.5101682942084185,
|
|
"learning_rate": 2.0100139573031584e-09,
|
|
"loss": 0.2135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15595540404319763,
|
|
"step": 4170,
|
|
"valid_targets_mean": 9480.2,
|
|
"valid_targets_min": 993
|
|
},
|
|
{
|
|
"epoch": 6.981605351170568,
|
|
"grad_norm": 0.4079031251141346,
|
|
"learning_rate": 1.0015378629413265e-09,
|
|
"loss": 0.2089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0791822001338005,
|
|
"step": 4175,
|
|
"valid_targets_mean": 7611.1,
|
|
"valid_targets_min": 2107
|
|
},
|
|
{
|
|
"epoch": 6.989966555183947,
|
|
"grad_norm": 0.8645286469473469,
|
|
"learning_rate": 3.4080295488347903e-10,
|
|
"loss": 0.2048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11597691476345062,
|
|
"step": 4180,
|
|
"valid_targets_mean": 8784.5,
|
|
"valid_targets_min": 1750
|
|
},
|
|
{
|
|
"epoch": 6.998327759197324,
|
|
"grad_norm": 0.40324376914006904,
|
|
"learning_rate": 2.7820721939519902e-11,
|
|
"loss": 0.2121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11497265845537186,
|
|
"step": 4185,
|
|
"valid_targets_mean": 10486.4,
|
|
"valid_targets_min": 2982
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08761073648929596,
|
|
"step": 4186,
|
|
"total_flos": 2.474107676200534e+18,
|
|
"train_loss": 0.24594933182050951,
|
|
"train_runtime": 62635.179,
|
|
"train_samples_per_second": 1.069,
|
|
"train_steps_per_second": 0.067,
|
|
"valid_targets_mean": 11621.8,
|
|
"valid_targets_min": 3550
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4186,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2.474107676200534e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|