3060 lines
85 KiB
JSON
3060 lines
85 KiB
JSON
|
|
{
|
||
|
|
"best_global_step": null,
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 7.0,
|
||
|
|
"eval_steps": 500,
|
||
|
|
"global_step": 1379,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"epoch": 0.025466893039049237,
|
||
|
|
"grad_norm": 12.905935111767764,
|
||
|
|
"learning_rate": 1.1594202898550726e-06,
|
||
|
|
"loss": 0.5549,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15574893355369568,
|
||
|
|
"step": 5,
|
||
|
|
"valid_targets_mean": 4775.0,
|
||
|
|
"valid_targets_min": 444
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.050933786078098474,
|
||
|
|
"grad_norm": 10.078738477645182,
|
||
|
|
"learning_rate": 2.6086956521739132e-06,
|
||
|
|
"loss": 0.5226,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19069485366344452,
|
||
|
|
"step": 10,
|
||
|
|
"valid_targets_mean": 6629.3,
|
||
|
|
"valid_targets_min": 499
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07640067911714771,
|
||
|
|
"grad_norm": 5.205060737600816,
|
||
|
|
"learning_rate": 4.057971014492754e-06,
|
||
|
|
"loss": 0.4676,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13914033770561218,
|
||
|
|
"step": 15,
|
||
|
|
"valid_targets_mean": 3617.4,
|
||
|
|
"valid_targets_min": 508
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10186757215619695,
|
||
|
|
"grad_norm": 1.7561630559315866,
|
||
|
|
"learning_rate": 5.507246376811595e-06,
|
||
|
|
"loss": 0.4211,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13013756275177002,
|
||
|
|
"step": 20,
|
||
|
|
"valid_targets_mean": 4257.7,
|
||
|
|
"valid_targets_min": 701
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1273344651952462,
|
||
|
|
"grad_norm": 1.0501819268457813,
|
||
|
|
"learning_rate": 6.956521739130435e-06,
|
||
|
|
"loss": 0.369,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11685547232627869,
|
||
|
|
"step": 25,
|
||
|
|
"valid_targets_mean": 4292.6,
|
||
|
|
"valid_targets_min": 518
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.15280135823429541,
|
||
|
|
"grad_norm": 0.8646360383884321,
|
||
|
|
"learning_rate": 8.405797101449275e-06,
|
||
|
|
"loss": 0.3535,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.136878103017807,
|
||
|
|
"step": 30,
|
||
|
|
"valid_targets_mean": 4800.1,
|
||
|
|
"valid_targets_min": 456
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.17826825127334464,
|
||
|
|
"grad_norm": 0.5897389861417078,
|
||
|
|
"learning_rate": 9.855072463768118e-06,
|
||
|
|
"loss": 0.3545,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11823979020118713,
|
||
|
|
"step": 35,
|
||
|
|
"valid_targets_mean": 4771.1,
|
||
|
|
"valid_targets_min": 528
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2037351443123939,
|
||
|
|
"grad_norm": 0.4060461860301084,
|
||
|
|
"learning_rate": 1.1304347826086957e-05,
|
||
|
|
"loss": 0.3454,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11010861396789551,
|
||
|
|
"step": 40,
|
||
|
|
"valid_targets_mean": 5615.3,
|
||
|
|
"valid_targets_min": 513
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22920203735144312,
|
||
|
|
"grad_norm": 0.34315152890741674,
|
||
|
|
"learning_rate": 1.2753623188405797e-05,
|
||
|
|
"loss": 0.3384,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09936510026454926,
|
||
|
|
"step": 45,
|
||
|
|
"valid_targets_mean": 4959.6,
|
||
|
|
"valid_targets_min": 509
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2546689303904924,
|
||
|
|
"grad_norm": 0.35246800035056547,
|
||
|
|
"learning_rate": 1.420289855072464e-05,
|
||
|
|
"loss": 0.2966,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11312542855739594,
|
||
|
|
"step": 50,
|
||
|
|
"valid_targets_mean": 6158.5,
|
||
|
|
"valid_targets_min": 642
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2801358234295416,
|
||
|
|
"grad_norm": 0.23652896675744703,
|
||
|
|
"learning_rate": 1.565217391304348e-05,
|
||
|
|
"loss": 0.2833,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07454854249954224,
|
||
|
|
"step": 55,
|
||
|
|
"valid_targets_mean": 5430.5,
|
||
|
|
"valid_targets_min": 370
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.30560271646859083,
|
||
|
|
"grad_norm": 0.2647481031451876,
|
||
|
|
"learning_rate": 1.710144927536232e-05,
|
||
|
|
"loss": 0.2756,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09449119865894318,
|
||
|
|
"step": 60,
|
||
|
|
"valid_targets_mean": 5365.9,
|
||
|
|
"valid_targets_min": 501
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3310696095076401,
|
||
|
|
"grad_norm": 0.22866704217660094,
|
||
|
|
"learning_rate": 1.8550724637681162e-05,
|
||
|
|
"loss": 0.273,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07665856182575226,
|
||
|
|
"step": 65,
|
||
|
|
"valid_targets_mean": 3499.1,
|
||
|
|
"valid_targets_min": 394
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3565365025466893,
|
||
|
|
"grad_norm": 0.23838754270863646,
|
||
|
|
"learning_rate": 2e-05,
|
||
|
|
"loss": 0.2771,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09475560486316681,
|
||
|
|
"step": 70,
|
||
|
|
"valid_targets_mean": 5222.6,
|
||
|
|
"valid_targets_min": 452
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.38200339558573854,
|
||
|
|
"grad_norm": 0.22204236958742846,
|
||
|
|
"learning_rate": 2.1449275362318844e-05,
|
||
|
|
"loss": 0.2772,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06597326695919037,
|
||
|
|
"step": 75,
|
||
|
|
"valid_targets_mean": 3930.4,
|
||
|
|
"valid_targets_min": 624
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4074702886247878,
|
||
|
|
"grad_norm": 0.21916062266130582,
|
||
|
|
"learning_rate": 2.2898550724637684e-05,
|
||
|
|
"loss": 0.2525,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08288627862930298,
|
||
|
|
"step": 80,
|
||
|
|
"valid_targets_mean": 5469.0,
|
||
|
|
"valid_targets_min": 524
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.432937181663837,
|
||
|
|
"grad_norm": 0.21084610995408531,
|
||
|
|
"learning_rate": 2.4347826086956526e-05,
|
||
|
|
"loss": 0.2461,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07836160808801651,
|
||
|
|
"step": 85,
|
||
|
|
"valid_targets_mean": 4980.2,
|
||
|
|
"valid_targets_min": 371
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.45840407470288624,
|
||
|
|
"grad_norm": 0.24877805391611285,
|
||
|
|
"learning_rate": 2.5797101449275362e-05,
|
||
|
|
"loss": 0.244,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10522396862506866,
|
||
|
|
"step": 90,
|
||
|
|
"valid_targets_mean": 5283.2,
|
||
|
|
"valid_targets_min": 448
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4838709677419355,
|
||
|
|
"grad_norm": 0.20293051375960244,
|
||
|
|
"learning_rate": 2.7246376811594205e-05,
|
||
|
|
"loss": 0.2435,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0722227692604065,
|
||
|
|
"step": 95,
|
||
|
|
"valid_targets_mean": 5206.1,
|
||
|
|
"valid_targets_min": 688
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5093378607809848,
|
||
|
|
"grad_norm": 0.20540839975282746,
|
||
|
|
"learning_rate": 2.8695652173913044e-05,
|
||
|
|
"loss": 0.2406,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0783793106675148,
|
||
|
|
"step": 100,
|
||
|
|
"valid_targets_mean": 5392.8,
|
||
|
|
"valid_targets_min": 597
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.534804753820034,
|
||
|
|
"grad_norm": 0.22119488094876025,
|
||
|
|
"learning_rate": 3.0144927536231887e-05,
|
||
|
|
"loss": 0.2289,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07414476573467255,
|
||
|
|
"step": 105,
|
||
|
|
"valid_targets_mean": 6400.4,
|
||
|
|
"valid_targets_min": 420
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5602716468590832,
|
||
|
|
"grad_norm": 0.33622198716688095,
|
||
|
|
"learning_rate": 3.1594202898550726e-05,
|
||
|
|
"loss": 0.2146,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07343263179063797,
|
||
|
|
"step": 110,
|
||
|
|
"valid_targets_mean": 5583.2,
|
||
|
|
"valid_targets_min": 502
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5857385398981324,
|
||
|
|
"grad_norm": 0.18463308515750862,
|
||
|
|
"learning_rate": 3.304347826086957e-05,
|
||
|
|
"loss": 0.2199,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06492072343826294,
|
||
|
|
"step": 115,
|
||
|
|
"valid_targets_mean": 5075.7,
|
||
|
|
"valid_targets_min": 365
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6112054329371817,
|
||
|
|
"grad_norm": 0.20390681903894134,
|
||
|
|
"learning_rate": 3.449275362318841e-05,
|
||
|
|
"loss": 0.2231,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05670241639018059,
|
||
|
|
"step": 120,
|
||
|
|
"valid_targets_mean": 4689.9,
|
||
|
|
"valid_targets_min": 93
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6366723259762309,
|
||
|
|
"grad_norm": 0.24639858051715774,
|
||
|
|
"learning_rate": 3.594202898550725e-05,
|
||
|
|
"loss": 0.2184,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06681576371192932,
|
||
|
|
"step": 125,
|
||
|
|
"valid_targets_mean": 4932.8,
|
||
|
|
"valid_targets_min": 343
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6621392190152802,
|
||
|
|
"grad_norm": 0.23628475300889795,
|
||
|
|
"learning_rate": 3.739130434782609e-05,
|
||
|
|
"loss": 0.214,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08704615384340286,
|
||
|
|
"step": 130,
|
||
|
|
"valid_targets_mean": 6058.8,
|
||
|
|
"valid_targets_min": 256
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6876061120543294,
|
||
|
|
"grad_norm": 0.22646525736855452,
|
||
|
|
"learning_rate": 3.884057971014493e-05,
|
||
|
|
"loss": 0.2103,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06430119276046753,
|
||
|
|
"step": 135,
|
||
|
|
"valid_targets_mean": 5760.0,
|
||
|
|
"valid_targets_min": 615
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7130730050933786,
|
||
|
|
"grad_norm": 0.23100345627656083,
|
||
|
|
"learning_rate": 3.999993591506466e-05,
|
||
|
|
"loss": 0.2006,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06479993462562561,
|
||
|
|
"step": 140,
|
||
|
|
"valid_targets_mean": 5392.5,
|
||
|
|
"valid_targets_min": 442
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7385398981324278,
|
||
|
|
"grad_norm": 0.19049974356747768,
|
||
|
|
"learning_rate": 3.99976929854497e-05,
|
||
|
|
"loss": 0.2006,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.056917525827884674,
|
||
|
|
"step": 145,
|
||
|
|
"valid_targets_mean": 6286.8,
|
||
|
|
"valid_targets_min": 556
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7640067911714771,
|
||
|
|
"grad_norm": 0.191970251151516,
|
||
|
|
"learning_rate": 3.999224621974382e-05,
|
||
|
|
"loss": 0.2025,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0581885427236557,
|
||
|
|
"step": 150,
|
||
|
|
"valid_targets_mean": 4557.8,
|
||
|
|
"valid_targets_min": 525
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7894736842105263,
|
||
|
|
"grad_norm": 0.18997461349835917,
|
||
|
|
"learning_rate": 3.9983596490574876e-05,
|
||
|
|
"loss": 0.2052,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05231058597564697,
|
||
|
|
"step": 155,
|
||
|
|
"valid_targets_mean": 5922.7,
|
||
|
|
"valid_targets_min": 607
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8149405772495756,
|
||
|
|
"grad_norm": 0.24055282524770882,
|
||
|
|
"learning_rate": 3.9971745183718484e-05,
|
||
|
|
"loss": 0.2226,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06692491471767426,
|
||
|
|
"step": 160,
|
||
|
|
"valid_targets_mean": 5274.8,
|
||
|
|
"valid_targets_min": 585
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8404074702886248,
|
||
|
|
"grad_norm": 0.1942999906232565,
|
||
|
|
"learning_rate": 3.995669419787586e-05,
|
||
|
|
"loss": 0.2102,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05607856065034866,
|
||
|
|
"step": 165,
|
||
|
|
"valid_targets_mean": 5621.2,
|
||
|
|
"valid_targets_min": 406
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.865874363327674,
|
||
|
|
"grad_norm": 0.22362655428605688,
|
||
|
|
"learning_rate": 3.9938445944369745e-05,
|
||
|
|
"loss": 0.2116,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08082179725170135,
|
||
|
|
"step": 170,
|
||
|
|
"valid_targets_mean": 5157.8,
|
||
|
|
"valid_targets_min": 500
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8913412563667232,
|
||
|
|
"grad_norm": 0.21350382917499386,
|
||
|
|
"learning_rate": 3.9917003346758035e-05,
|
||
|
|
"loss": 0.1987,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.060487180948257446,
|
||
|
|
"step": 175,
|
||
|
|
"valid_targets_mean": 5434.3,
|
||
|
|
"valid_targets_min": 424
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9168081494057725,
|
||
|
|
"grad_norm": 0.20059728021230971,
|
||
|
|
"learning_rate": 3.989236984036541e-05,
|
||
|
|
"loss": 0.1858,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.056000277400016785,
|
||
|
|
"step": 180,
|
||
|
|
"valid_targets_mean": 4637.7,
|
||
|
|
"valid_targets_min": 328
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9422750424448217,
|
||
|
|
"grad_norm": 0.21478048535292446,
|
||
|
|
"learning_rate": 3.986454937173292e-05,
|
||
|
|
"loss": 0.2145,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05570094287395477,
|
||
|
|
"step": 185,
|
||
|
|
"valid_targets_mean": 4192.4,
|
||
|
|
"valid_targets_min": 418
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.967741935483871,
|
||
|
|
"grad_norm": 0.1847053866044272,
|
||
|
|
"learning_rate": 3.98335463979858e-05,
|
||
|
|
"loss": 0.1915,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.054768189787864685,
|
||
|
|
"step": 190,
|
||
|
|
"valid_targets_mean": 7047.9,
|
||
|
|
"valid_targets_min": 560
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9932088285229203,
|
||
|
|
"grad_norm": 0.23251181066885263,
|
||
|
|
"learning_rate": 3.9799365886119304e-05,
|
||
|
|
"loss": 0.1988,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06877763569355011,
|
||
|
|
"step": 195,
|
||
|
|
"valid_targets_mean": 5928.5,
|
||
|
|
"valid_targets_min": 339
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0152801358234296,
|
||
|
|
"grad_norm": 0.2716010848630139,
|
||
|
|
"learning_rate": 3.976201331220296e-05,
|
||
|
|
"loss": 0.1986,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07890613377094269,
|
||
|
|
"step": 200,
|
||
|
|
"valid_targets_mean": 4808.2,
|
||
|
|
"valid_targets_min": 442
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0407470288624787,
|
||
|
|
"grad_norm": 0.25684628959193206,
|
||
|
|
"learning_rate": 3.9721494660503295e-05,
|
||
|
|
"loss": 0.1903,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06097788363695145,
|
||
|
|
"step": 205,
|
||
|
|
"valid_targets_mean": 5266.6,
|
||
|
|
"valid_targets_min": 312
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.066213921901528,
|
||
|
|
"grad_norm": 0.24425370965885543,
|
||
|
|
"learning_rate": 3.9677816422525024e-05,
|
||
|
|
"loss": 0.19,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07912104576826096,
|
||
|
|
"step": 210,
|
||
|
|
"valid_targets_mean": 4408.8,
|
||
|
|
"valid_targets_min": 314
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0916808149405772,
|
||
|
|
"grad_norm": 0.2002845335399574,
|
||
|
|
"learning_rate": 3.963098559597112e-05,
|
||
|
|
"loss": 0.1898,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05172646418213844,
|
||
|
|
"step": 215,
|
||
|
|
"valid_targets_mean": 5689.8,
|
||
|
|
"valid_targets_min": 623
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1171477079796266,
|
||
|
|
"grad_norm": 0.20284356911889379,
|
||
|
|
"learning_rate": 3.9581009683621634e-05,
|
||
|
|
"loss": 0.1873,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05965813249349594,
|
||
|
|
"step": 220,
|
||
|
|
"valid_targets_mean": 5551.1,
|
||
|
|
"valid_targets_min": 488
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1426146010186757,
|
||
|
|
"grad_norm": 0.2138271050816797,
|
||
|
|
"learning_rate": 3.952789669213173e-05,
|
||
|
|
"loss": 0.2014,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07276459038257599,
|
||
|
|
"step": 225,
|
||
|
|
"valid_targets_mean": 5722.6,
|
||
|
|
"valid_targets_min": 469
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1680814940577249,
|
||
|
|
"grad_norm": 0.2039562273306318,
|
||
|
|
"learning_rate": 3.9471655130748894e-05,
|
||
|
|
"loss": 0.1894,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04895240440964699,
|
||
|
|
"step": 230,
|
||
|
|
"valid_targets_mean": 5333.4,
|
||
|
|
"valid_targets_min": 755
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1935483870967742,
|
||
|
|
"grad_norm": 0.22459112995344713,
|
||
|
|
"learning_rate": 3.9412294009949716e-05,
|
||
|
|
"loss": 0.1838,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.057704657316207886,
|
||
|
|
"step": 235,
|
||
|
|
"valid_targets_mean": 4390.5,
|
||
|
|
"valid_targets_min": 508
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2190152801358234,
|
||
|
|
"grad_norm": 0.22320904545680997,
|
||
|
|
"learning_rate": 3.9349822839996266e-05,
|
||
|
|
"loss": 0.1908,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05154874920845032,
|
||
|
|
"step": 240,
|
||
|
|
"valid_targets_mean": 5963.7,
|
||
|
|
"valid_targets_min": 443
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2444821731748728,
|
||
|
|
"grad_norm": 0.21734471823901905,
|
||
|
|
"learning_rate": 3.928425162941248e-05,
|
||
|
|
"loss": 0.1936,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.056830763816833496,
|
||
|
|
"step": 245,
|
||
|
|
"valid_targets_mean": 4469.3,
|
||
|
|
"valid_targets_min": 573
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.269949066213922,
|
||
|
|
"grad_norm": 0.22044616993174154,
|
||
|
|
"learning_rate": 3.9215590883380687e-05,
|
||
|
|
"loss": 0.187,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04839012026786804,
|
||
|
|
"step": 250,
|
||
|
|
"valid_targets_mean": 3393.2,
|
||
|
|
"valid_targets_min": 463
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.295415959252971,
|
||
|
|
"grad_norm": 0.19907685552815926,
|
||
|
|
"learning_rate": 3.914385160205858e-05,
|
||
|
|
"loss": 0.189,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06223241984844208,
|
||
|
|
"step": 255,
|
||
|
|
"valid_targets_mean": 7150.2,
|
||
|
|
"valid_targets_min": 698
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3208828522920204,
|
||
|
|
"grad_norm": 0.20354883472054425,
|
||
|
|
"learning_rate": 3.9069045278816844e-05,
|
||
|
|
"loss": 0.1797,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.054079022258520126,
|
||
|
|
"step": 260,
|
||
|
|
"valid_targets_mean": 4868.2,
|
||
|
|
"valid_targets_min": 550
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3463497453310695,
|
||
|
|
"grad_norm": 0.20367423810310317,
|
||
|
|
"learning_rate": 3.899118389839785e-05,
|
||
|
|
"loss": 0.1756,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05707092210650444,
|
||
|
|
"step": 265,
|
||
|
|
"valid_targets_mean": 6063.3,
|
||
|
|
"valid_targets_min": 356
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.371816638370119,
|
||
|
|
"grad_norm": 0.2388271841451862,
|
||
|
|
"learning_rate": 3.8910279934995545e-05,
|
||
|
|
"loss": 0.1889,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06782951951026917,
|
||
|
|
"step": 270,
|
||
|
|
"valid_targets_mean": 4558.4,
|
||
|
|
"valid_targets_min": 338
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.397283531409168,
|
||
|
|
"grad_norm": 0.18588514404156858,
|
||
|
|
"learning_rate": 3.8826346350256943e-05,
|
||
|
|
"loss": 0.1793,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.047811634838581085,
|
||
|
|
"step": 275,
|
||
|
|
"valid_targets_mean": 5856.0,
|
||
|
|
"valid_targets_min": 595
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4227504244482172,
|
||
|
|
"grad_norm": 0.22939026820955755,
|
||
|
|
"learning_rate": 3.873939659120558e-05,
|
||
|
|
"loss": 0.1849,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05040040612220764,
|
||
|
|
"step": 280,
|
||
|
|
"valid_targets_mean": 3587.8,
|
||
|
|
"valid_targets_min": 628
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4482173174872666,
|
||
|
|
"grad_norm": 0.248321484069037,
|
||
|
|
"learning_rate": 3.864944458808712e-05,
|
||
|
|
"loss": 0.1904,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07024089246988297,
|
||
|
|
"step": 285,
|
||
|
|
"valid_targets_mean": 5318.2,
|
||
|
|
"valid_targets_min": 331
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4736842105263157,
|
||
|
|
"grad_norm": 0.19531700185276812,
|
||
|
|
"learning_rate": 3.855650475213761e-05,
|
||
|
|
"loss": 0.1947,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05222306028008461,
|
||
|
|
"step": 290,
|
||
|
|
"valid_targets_mean": 4420.7,
|
||
|
|
"valid_targets_min": 84
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.499151103565365,
|
||
|
|
"grad_norm": 0.21409213927029386,
|
||
|
|
"learning_rate": 3.846059197327466e-05,
|
||
|
|
"loss": 0.191,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07738282531499863,
|
||
|
|
"step": 295,
|
||
|
|
"valid_targets_mean": 5504.2,
|
||
|
|
"valid_targets_min": 832
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5246179966044142,
|
||
|
|
"grad_norm": 0.2727937403888451,
|
||
|
|
"learning_rate": 3.836172161771189e-05,
|
||
|
|
"loss": 0.2016,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07301558554172516,
|
||
|
|
"step": 300,
|
||
|
|
"valid_targets_mean": 4146.5,
|
||
|
|
"valid_targets_min": 510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5500848896434634,
|
||
|
|
"grad_norm": 0.27591933946670505,
|
||
|
|
"learning_rate": 3.8259909525497134e-05,
|
||
|
|
"loss": 0.184,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07577518373727798,
|
||
|
|
"step": 305,
|
||
|
|
"valid_targets_mean": 5413.3,
|
||
|
|
"valid_targets_min": 524
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5755517826825127,
|
||
|
|
"grad_norm": 0.19788814829959928,
|
||
|
|
"learning_rate": 3.81551720079747e-05,
|
||
|
|
"loss": 0.1787,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.046708859503269196,
|
||
|
|
"step": 310,
|
||
|
|
"valid_targets_mean": 5426.9,
|
||
|
|
"valid_targets_min": 358
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.601018675721562,
|
||
|
|
"grad_norm": 0.1871223645202415,
|
||
|
|
"learning_rate": 3.8047525845172104e-05,
|
||
|
|
"loss": 0.186,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.058847323060035706,
|
||
|
|
"step": 315,
|
||
|
|
"valid_targets_mean": 7743.5,
|
||
|
|
"valid_targets_min": 503
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6264855687606112,
|
||
|
|
"grad_norm": 0.22652771363762644,
|
||
|
|
"learning_rate": 3.7936988283111764e-05,
|
||
|
|
"loss": 0.1935,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0625789687037468,
|
||
|
|
"step": 320,
|
||
|
|
"valid_targets_mean": 5233.7,
|
||
|
|
"valid_targets_min": 482
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6519524617996604,
|
||
|
|
"grad_norm": 0.1827154465048512,
|
||
|
|
"learning_rate": 3.7823577031048e-05,
|
||
|
|
"loss": 0.1704,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0553651861846447,
|
||
|
|
"step": 325,
|
||
|
|
"valid_targets_mean": 6211.6,
|
||
|
|
"valid_targets_min": 517
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6774193548387095,
|
||
|
|
"grad_norm": 0.22217593165464103,
|
||
|
|
"learning_rate": 3.77073102586298e-05,
|
||
|
|
"loss": 0.1836,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0627608448266983,
|
||
|
|
"step": 330,
|
||
|
|
"valid_targets_mean": 4552.9,
|
||
|
|
"valid_targets_min": 277
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7028862478777589,
|
||
|
|
"grad_norm": 0.22270346306202596,
|
||
|
|
"learning_rate": 3.758820659298991e-05,
|
||
|
|
"loss": 0.1759,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05124921351671219,
|
||
|
|
"step": 335,
|
||
|
|
"valid_targets_mean": 4196.0,
|
||
|
|
"valid_targets_min": 326
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7283531409168083,
|
||
|
|
"grad_norm": 0.21105427568746413,
|
||
|
|
"learning_rate": 3.746628511576054e-05,
|
||
|
|
"loss": 0.1921,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.054783932864665985,
|
||
|
|
"step": 340,
|
||
|
|
"valid_targets_mean": 4825.3,
|
||
|
|
"valid_targets_min": 377
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7538200339558574,
|
||
|
|
"grad_norm": 0.20812248067096134,
|
||
|
|
"learning_rate": 3.734156536001629e-05,
|
||
|
|
"loss": 0.1864,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06411299854516983,
|
||
|
|
"step": 345,
|
||
|
|
"valid_targets_mean": 5575.1,
|
||
|
|
"valid_targets_min": 650
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7792869269949065,
|
||
|
|
"grad_norm": 0.21613044021886657,
|
||
|
|
"learning_rate": 3.721406730714476e-05,
|
||
|
|
"loss": 0.1777,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0606655478477478,
|
||
|
|
"step": 350,
|
||
|
|
"valid_targets_mean": 4606.2,
|
||
|
|
"valid_targets_min": 528
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.804753820033956,
|
||
|
|
"grad_norm": 0.1915904424922132,
|
||
|
|
"learning_rate": 3.7083811383645334e-05,
|
||
|
|
"loss": 0.1692,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05197008326649666,
|
||
|
|
"step": 355,
|
||
|
|
"valid_targets_mean": 4822.4,
|
||
|
|
"valid_targets_min": 410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.830220713073005,
|
||
|
|
"grad_norm": 0.23345796645494707,
|
||
|
|
"learning_rate": 3.695081845785663e-05,
|
||
|
|
"loss": 0.1681,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04420693591237068,
|
||
|
|
"step": 360,
|
||
|
|
"valid_targets_mean": 5001.8,
|
||
|
|
"valid_targets_min": 557
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8556876061120544,
|
||
|
|
"grad_norm": 0.23815063812250117,
|
||
|
|
"learning_rate": 3.6815109836613165e-05,
|
||
|
|
"loss": 0.1717,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.054919321089982986,
|
||
|
|
"step": 365,
|
||
|
|
"valid_targets_mean": 4973.3,
|
||
|
|
"valid_targets_min": 365
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8811544991511036,
|
||
|
|
"grad_norm": 0.2468495850612204,
|
||
|
|
"learning_rate": 3.6676707261831836e-05,
|
||
|
|
"loss": 0.1862,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06972532719373703,
|
||
|
|
"step": 370,
|
||
|
|
"valid_targets_mean": 4836.3,
|
||
|
|
"valid_targets_min": 355
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9066213921901527,
|
||
|
|
"grad_norm": 0.1957634434807587,
|
||
|
|
"learning_rate": 3.6535632907028566e-05,
|
||
|
|
"loss": 0.1772,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05477672815322876,
|
||
|
|
"step": 375,
|
||
|
|
"valid_targets_mean": 5473.1,
|
||
|
|
"valid_targets_min": 490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.932088285229202,
|
||
|
|
"grad_norm": 0.22151494928458712,
|
||
|
|
"learning_rate": 3.6391909373765944e-05,
|
||
|
|
"loss": 0.1827,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06339579820632935,
|
||
|
|
"step": 380,
|
||
|
|
"valid_targets_mean": 5057.0,
|
||
|
|
"valid_targets_min": 398
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9575551782682514,
|
||
|
|
"grad_norm": 0.23166366744975464,
|
||
|
|
"learning_rate": 3.6245559688032176e-05,
|
||
|
|
"loss": 0.1798,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06725640594959259,
|
||
|
|
"step": 385,
|
||
|
|
"valid_targets_mean": 5779.2,
|
||
|
|
"valid_targets_min": 530
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9830220713073006,
|
||
|
|
"grad_norm": 0.20716401844004403,
|
||
|
|
"learning_rate": 3.609660729655212e-05,
|
||
|
|
"loss": 0.1786,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05772645026445389,
|
||
|
|
"step": 390,
|
||
|
|
"valid_targets_mean": 5280.8,
|
||
|
|
"valid_targets_min": 337
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.00509337860781,
|
||
|
|
"grad_norm": 0.20967807986867762,
|
||
|
|
"learning_rate": 3.5945076063030835e-05,
|
||
|
|
"loss": 0.1776,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04859248921275139,
|
||
|
|
"step": 395,
|
||
|
|
"valid_targets_mean": 4550.8,
|
||
|
|
"valid_targets_min": 649
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.030560271646859,
|
||
|
|
"grad_norm": 0.20081072423556587,
|
||
|
|
"learning_rate": 3.579099026433044e-05,
|
||
|
|
"loss": 0.1573,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.047822657972574234,
|
||
|
|
"step": 400,
|
||
|
|
"valid_targets_mean": 4847.1,
|
||
|
|
"valid_targets_min": 649
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0560271646859083,
|
||
|
|
"grad_norm": 0.179351052611451,
|
||
|
|
"learning_rate": 3.563437458658064e-05,
|
||
|
|
"loss": 0.179,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.046782758086919785,
|
||
|
|
"step": 405,
|
||
|
|
"valid_targets_mean": 6659.4,
|
||
|
|
"valid_targets_min": 438
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0814940577249574,
|
||
|
|
"grad_norm": 0.19813059770710748,
|
||
|
|
"learning_rate": 3.547525412122378e-05,
|
||
|
|
"loss": 0.1613,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.062020085752010345,
|
||
|
|
"step": 410,
|
||
|
|
"valid_targets_mean": 5190.2,
|
||
|
|
"valid_targets_min": 557
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1069609507640066,
|
||
|
|
"grad_norm": 0.19987083948716222,
|
||
|
|
"learning_rate": 3.531365436099497e-05,
|
||
|
|
"loss": 0.1629,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05440008267760277,
|
||
|
|
"step": 415,
|
||
|
|
"valid_targets_mean": 4833.9,
|
||
|
|
"valid_targets_min": 287
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.132427843803056,
|
||
|
|
"grad_norm": 0.22185874964663296,
|
||
|
|
"learning_rate": 3.5149601195837815e-05,
|
||
|
|
"loss": 0.1721,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05104801058769226,
|
||
|
|
"step": 420,
|
||
|
|
"valid_targets_mean": 5238.0,
|
||
|
|
"valid_targets_min": 175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1578947368421053,
|
||
|
|
"grad_norm": 0.20211384951819653,
|
||
|
|
"learning_rate": 3.498312090875667e-05,
|
||
|
|
"loss": 0.1619,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04737398028373718,
|
||
|
|
"step": 425,
|
||
|
|
"valid_targets_mean": 5755.9,
|
||
|
|
"valid_targets_min": 561
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1833616298811545,
|
||
|
|
"grad_norm": 0.18791093618983779,
|
||
|
|
"learning_rate": 3.481424017160574e-05,
|
||
|
|
"loss": 0.1626,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05240146815776825,
|
||
|
|
"step": 430,
|
||
|
|
"valid_targets_mean": 5847.5,
|
||
|
|
"valid_targets_min": 360
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2088285229202036,
|
||
|
|
"grad_norm": 0.18568470911214985,
|
||
|
|
"learning_rate": 3.464298604081607e-05,
|
||
|
|
"loss": 0.1725,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04802415519952774,
|
||
|
|
"step": 435,
|
||
|
|
"valid_targets_mean": 5056.2,
|
||
|
|
"valid_targets_min": 298
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.234295415959253,
|
||
|
|
"grad_norm": 0.22781637961543907,
|
||
|
|
"learning_rate": 3.4469385953060715e-05,
|
||
|
|
"loss": 0.1793,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04768109694123268,
|
||
|
|
"step": 440,
|
||
|
|
"valid_targets_mean": 4325.7,
|
||
|
|
"valid_targets_min": 120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2597623089983023,
|
||
|
|
"grad_norm": 0.2397152235569167,
|
||
|
|
"learning_rate": 3.429346772085923e-05,
|
||
|
|
"loss": 0.1667,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05326157063245773,
|
||
|
|
"step": 445,
|
||
|
|
"valid_targets_mean": 3865.7,
|
||
|
|
"valid_targets_min": 220
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2852292020373515,
|
||
|
|
"grad_norm": 0.20489042679339467,
|
||
|
|
"learning_rate": 3.4115259528121685e-05,
|
||
|
|
"loss": 0.1708,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.057636670768260956,
|
||
|
|
"step": 450,
|
||
|
|
"valid_targets_mean": 6207.6,
|
||
|
|
"valid_targets_min": 530
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3106960950764006,
|
||
|
|
"grad_norm": 0.21677251280545407,
|
||
|
|
"learning_rate": 3.3934789925633426e-05,
|
||
|
|
"loss": 0.1723,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.055631302297115326,
|
||
|
|
"step": 455,
|
||
|
|
"valid_targets_mean": 4781.9,
|
||
|
|
"valid_targets_min": 544
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3361629881154498,
|
||
|
|
"grad_norm": 0.197104867431473,
|
||
|
|
"learning_rate": 3.37520878264809e-05,
|
||
|
|
"loss": 0.1703,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05575048178434372,
|
||
|
|
"step": 460,
|
||
|
|
"valid_targets_mean": 3780.2,
|
||
|
|
"valid_targets_min": 440
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3616298811544993,
|
||
|
|
"grad_norm": 0.19119683771865437,
|
||
|
|
"learning_rate": 3.356718250141945e-05,
|
||
|
|
"loss": 0.1763,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04281400889158249,
|
||
|
|
"step": 465,
|
||
|
|
"valid_targets_mean": 4780.9,
|
||
|
|
"valid_targets_min": 648
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3870967741935485,
|
||
|
|
"grad_norm": 0.24228470287917536,
|
||
|
|
"learning_rate": 3.33801035741839e-05,
|
||
|
|
"loss": 0.1786,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06536433100700378,
|
||
|
|
"step": 470,
|
||
|
|
"valid_targets_mean": 6222.0,
|
||
|
|
"valid_targets_min": 611
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4125636672325976,
|
||
|
|
"grad_norm": 0.22050849754803323,
|
||
|
|
"learning_rate": 3.3190881016742476e-05,
|
||
|
|
"loss": 0.1771,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06315451860427856,
|
||
|
|
"step": 475,
|
||
|
|
"valid_targets_mean": 5316.1,
|
||
|
|
"valid_targets_min": 490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4380305602716468,
|
||
|
|
"grad_norm": 0.2382957140570959,
|
||
|
|
"learning_rate": 3.2999545144495037e-05,
|
||
|
|
"loss": 0.179,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07954886555671692,
|
||
|
|
"step": 480,
|
||
|
|
"valid_targets_mean": 4862.7,
|
||
|
|
"valid_targets_min": 679
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.463497453310696,
|
||
|
|
"grad_norm": 0.23609092463828943,
|
||
|
|
"learning_rate": 3.280612661141615e-05,
|
||
|
|
"loss": 0.1848,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05267144739627838,
|
||
|
|
"step": 485,
|
||
|
|
"valid_targets_mean": 5179.8,
|
||
|
|
"valid_targets_min": 666
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4889643463497455,
|
||
|
|
"grad_norm": 0.18759948079571875,
|
||
|
|
"learning_rate": 3.2610656405144155e-05,
|
||
|
|
"loss": 0.163,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06271520256996155,
|
||
|
|
"step": 490,
|
||
|
|
"valid_targets_mean": 6420.2,
|
||
|
|
"valid_targets_min": 545
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5144312393887946,
|
||
|
|
"grad_norm": 0.20855216144633196,
|
||
|
|
"learning_rate": 3.241316584201647e-05,
|
||
|
|
"loss": 0.1679,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06124936416745186,
|
||
|
|
"step": 495,
|
||
|
|
"valid_targets_mean": 5141.9,
|
||
|
|
"valid_targets_min": 502
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.539898132427844,
|
||
|
|
"grad_norm": 0.21246755645053164,
|
||
|
|
"learning_rate": 3.2213686562052474e-05,
|
||
|
|
"loss": 0.1772,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0753287523984909,
|
||
|
|
"step": 500,
|
||
|
|
"valid_targets_mean": 5840.4,
|
||
|
|
"valid_targets_min": 338
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.565365025466893,
|
||
|
|
"grad_norm": 0.2115360085430234,
|
||
|
|
"learning_rate": 3.201225052388446e-05,
|
||
|
|
"loss": 0.1655,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0660819411277771,
|
||
|
|
"step": 505,
|
||
|
|
"valid_targets_mean": 5808.2,
|
||
|
|
"valid_targets_min": 332
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.590831918505942,
|
||
|
|
"grad_norm": 0.18198237656841157,
|
||
|
|
"learning_rate": 3.1808889999637496e-05,
|
||
|
|
"loss": 0.1792,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.042239751666784286,
|
||
|
|
"step": 510,
|
||
|
|
"valid_targets_mean": 5342.5,
|
||
|
|
"valid_targets_min": 756
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6162988115449917,
|
||
|
|
"grad_norm": 0.21647629339184596,
|
||
|
|
"learning_rate": 3.16036375697591e-05,
|
||
|
|
"loss": 0.1689,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06771990656852722,
|
||
|
|
"step": 515,
|
||
|
|
"valid_targets_mean": 5934.7,
|
||
|
|
"valid_targets_min": 666
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.641765704584041,
|
||
|
|
"grad_norm": 0.20787346398933795,
|
||
|
|
"learning_rate": 3.1396526117799557e-05,
|
||
|
|
"loss": 0.1708,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05229032784700394,
|
||
|
|
"step": 520,
|
||
|
|
"valid_targets_mean": 4764.7,
|
||
|
|
"valid_targets_min": 182
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.66723259762309,
|
||
|
|
"grad_norm": 0.22738252585567634,
|
||
|
|
"learning_rate": 3.1187588825143596e-05,
|
||
|
|
"loss": 0.1764,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06814859062433243,
|
||
|
|
"step": 525,
|
||
|
|
"valid_targets_mean": 4228.3,
|
||
|
|
"valid_targets_min": 354
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.692699490662139,
|
||
|
|
"grad_norm": 0.18956065067088942,
|
||
|
|
"learning_rate": 3.097685916569439e-05,
|
||
|
|
"loss": 0.1708,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05186791345477104,
|
||
|
|
"step": 530,
|
||
|
|
"valid_targets_mean": 5616.5,
|
||
|
|
"valid_targets_min": 502
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7181663837011882,
|
||
|
|
"grad_norm": 0.1673599198641445,
|
||
|
|
"learning_rate": 3.076437090051073e-05,
|
||
|
|
"loss": 0.1626,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03805826976895332,
|
||
|
|
"step": 535,
|
||
|
|
"valid_targets_mean": 5770.4,
|
||
|
|
"valid_targets_min": 472
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.743633276740238,
|
||
|
|
"grad_norm": 0.2047431050311338,
|
||
|
|
"learning_rate": 3.0550158072398125e-05,
|
||
|
|
"loss": 0.1665,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.056432489305734634,
|
||
|
|
"step": 540,
|
||
|
|
"valid_targets_mean": 5859.8,
|
||
|
|
"valid_targets_min": 681
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.769100169779287,
|
||
|
|
"grad_norm": 0.19152278114020901,
|
||
|
|
"learning_rate": 3.0334255000454795e-05,
|
||
|
|
"loss": 0.166,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04708458483219147,
|
||
|
|
"step": 545,
|
||
|
|
"valid_targets_mean": 5319.8,
|
||
|
|
"valid_targets_min": 422
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.794567062818336,
|
||
|
|
"grad_norm": 0.18664751337731664,
|
||
|
|
"learning_rate": 3.011669627457341e-05,
|
||
|
|
"loss": 0.1636,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05980811268091202,
|
||
|
|
"step": 550,
|
||
|
|
"valid_targets_mean": 7653.7,
|
||
|
|
"valid_targets_min": 746
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8200339558573853,
|
||
|
|
"grad_norm": 0.1998549268481176,
|
||
|
|
"learning_rate": 2.989751674989943e-05,
|
||
|
|
"loss": 0.1667,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05653294920921326,
|
||
|
|
"step": 555,
|
||
|
|
"valid_targets_mean": 4836.5,
|
||
|
|
"valid_targets_min": 388
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8455008488964344,
|
||
|
|
"grad_norm": 0.21026299154262526,
|
||
|
|
"learning_rate": 2.967675154124696e-05,
|
||
|
|
"loss": 0.1599,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04772374406456947,
|
||
|
|
"step": 560,
|
||
|
|
"valid_targets_mean": 4973.8,
|
||
|
|
"valid_targets_min": 120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.870967741935484,
|
||
|
|
"grad_norm": 0.17304228310969938,
|
||
|
|
"learning_rate": 2.945443601747297e-05,
|
||
|
|
"loss": 0.1702,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05010491609573364,
|
||
|
|
"step": 565,
|
||
|
|
"valid_targets_mean": 6293.7,
|
||
|
|
"valid_targets_min": 410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.896434634974533,
|
||
|
|
"grad_norm": 0.20564622695902693,
|
||
|
|
"learning_rate": 2.923060579581087e-05,
|
||
|
|
"loss": 0.1734,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06477466225624084,
|
||
|
|
"step": 570,
|
||
|
|
"valid_targets_mean": 5248.3,
|
||
|
|
"valid_targets_min": 507
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9219015280135823,
|
||
|
|
"grad_norm": 0.21682003672674044,
|
||
|
|
"learning_rate": 2.9005296736164246e-05,
|
||
|
|
"loss": 0.1653,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04603603109717369,
|
||
|
|
"step": 575,
|
||
|
|
"valid_targets_mean": 3767.2,
|
||
|
|
"valid_targets_min": 278
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9473684210526314,
|
||
|
|
"grad_norm": 0.2036828766728075,
|
||
|
|
"learning_rate": 2.8778544935361742e-05,
|
||
|
|
"loss": 0.1627,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05513259023427963,
|
||
|
|
"step": 580,
|
||
|
|
"valid_targets_mean": 5203.5,
|
||
|
|
"valid_targets_min": 404
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9728353140916806,
|
||
|
|
"grad_norm": 0.1849428246148903,
|
||
|
|
"learning_rate": 2.855038672137396e-05,
|
||
|
|
"loss": 0.1641,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05258062481880188,
|
||
|
|
"step": 585,
|
||
|
|
"valid_targets_mean": 4857.2,
|
||
|
|
"valid_targets_min": 596
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.99830220713073,
|
||
|
|
"grad_norm": 0.21395815338632157,
|
||
|
|
"learning_rate": 2.8320858647493374e-05,
|
||
|
|
"loss": 0.158,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06454746425151825,
|
||
|
|
"step": 590,
|
||
|
|
"valid_targets_mean": 4870.2,
|
||
|
|
"valid_targets_min": 378
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0203735144312396,
|
||
|
|
"grad_norm": 0.21179059991069732,
|
||
|
|
"learning_rate": 2.8089997486478102e-05,
|
||
|
|
"loss": 0.1619,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.057160839438438416,
|
||
|
|
"step": 595,
|
||
|
|
"valid_targets_mean": 5278.4,
|
||
|
|
"valid_targets_min": 633
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0458404074702887,
|
||
|
|
"grad_norm": 0.2098482991745256,
|
||
|
|
"learning_rate": 2.785784022466053e-05,
|
||
|
|
"loss": 0.1598,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0546451210975647,
|
||
|
|
"step": 600,
|
||
|
|
"valid_targets_mean": 4272.8,
|
||
|
|
"valid_targets_min": 584
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.071307300509338,
|
||
|
|
"grad_norm": 0.19354015406908995,
|
||
|
|
"learning_rate": 2.7624424056021707e-05,
|
||
|
|
"loss": 0.158,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.048149678856134415,
|
||
|
|
"step": 605,
|
||
|
|
"valid_targets_mean": 4952.3,
|
||
|
|
"valid_targets_min": 255
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.096774193548387,
|
||
|
|
"grad_norm": 0.21754581543034396,
|
||
|
|
"learning_rate": 2.738978637623252e-05,
|
||
|
|
"loss": 0.1726,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06529363989830017,
|
||
|
|
"step": 610,
|
||
|
|
"valid_targets_mean": 4998.4,
|
||
|
|
"valid_targets_min": 689
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.122241086587436,
|
||
|
|
"grad_norm": 0.20888242176073119,
|
||
|
|
"learning_rate": 2.7153964776662517e-05,
|
||
|
|
"loss": 0.1671,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05744210258126259,
|
||
|
|
"step": 615,
|
||
|
|
"valid_targets_mean": 4900.2,
|
||
|
|
"valid_targets_min": 475
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1477079796264857,
|
||
|
|
"grad_norm": 0.19295107145038598,
|
||
|
|
"learning_rate": 2.691699703835733e-05,
|
||
|
|
"loss": 0.1692,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05718826502561569,
|
||
|
|
"step": 620,
|
||
|
|
"valid_targets_mean": 5434.8,
|
||
|
|
"valid_targets_min": 588
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.173174872665535,
|
||
|
|
"grad_norm": 0.2137305552260263,
|
||
|
|
"learning_rate": 2.6678921125985845e-05,
|
||
|
|
"loss": 0.159,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.072231724858284,
|
||
|
|
"step": 625,
|
||
|
|
"valid_targets_mean": 6674.1,
|
||
|
|
"valid_targets_min": 550
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.198641765704584,
|
||
|
|
"grad_norm": 0.2514949379597044,
|
||
|
|
"learning_rate": 2.6439775181757806e-05,
|
||
|
|
"loss": 0.1694,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06704398989677429,
|
||
|
|
"step": 630,
|
||
|
|
"valid_targets_mean": 4067.2,
|
||
|
|
"valid_targets_min": 499
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.224108658743633,
|
||
|
|
"grad_norm": 0.21911387821663028,
|
||
|
|
"learning_rate": 2.6199597519313092e-05,
|
||
|
|
"loss": 0.1651,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04223302751779556,
|
||
|
|
"step": 635,
|
||
|
|
"valid_targets_mean": 3504.5,
|
||
|
|
"valid_targets_min": 503
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2495755517826823,
|
||
|
|
"grad_norm": 0.21812279060693707,
|
||
|
|
"learning_rate": 2.5958426617583417e-05,
|
||
|
|
"loss": 0.1601,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06164269521832466,
|
||
|
|
"step": 640,
|
||
|
|
"valid_targets_mean": 4890.7,
|
||
|
|
"valid_targets_min": 411
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.275042444821732,
|
||
|
|
"grad_norm": 0.21420332843184514,
|
||
|
|
"learning_rate": 2.5716301114627663e-05,
|
||
|
|
"loss": 0.1622,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0604836605489254,
|
||
|
|
"step": 645,
|
||
|
|
"valid_targets_mean": 4715.1,
|
||
|
|
"valid_targets_min": 245
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.300509337860781,
|
||
|
|
"grad_norm": 0.1945183899864475,
|
||
|
|
"learning_rate": 2.5473259801441663e-05,
|
||
|
|
"loss": 0.161,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03996345400810242,
|
||
|
|
"step": 650,
|
||
|
|
"valid_targets_mean": 3867.2,
|
||
|
|
"valid_targets_min": 422
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.32597623089983,
|
||
|
|
"grad_norm": 0.1838149996271793,
|
||
|
|
"learning_rate": 2.5229341615743423e-05,
|
||
|
|
"loss": 0.1538,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.046856336295604706,
|
||
|
|
"step": 655,
|
||
|
|
"valid_targets_mean": 4675.5,
|
||
|
|
"valid_targets_min": 533
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3514431239388793,
|
||
|
|
"grad_norm": 0.19620472692592283,
|
||
|
|
"learning_rate": 2.4984585635734995e-05,
|
||
|
|
"loss": 0.1499,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04475293681025505,
|
||
|
|
"step": 660,
|
||
|
|
"valid_targets_mean": 3989.5,
|
||
|
|
"valid_targets_min": 601
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.376910016977929,
|
||
|
|
"grad_norm": 0.19236934985032755,
|
||
|
|
"learning_rate": 2.4739031073841652e-05,
|
||
|
|
"loss": 0.1584,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0530376099050045,
|
||
|
|
"step": 665,
|
||
|
|
"valid_targets_mean": 3726.4,
|
||
|
|
"valid_targets_min": 209
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.402376910016978,
|
||
|
|
"grad_norm": 0.2621809960660758,
|
||
|
|
"learning_rate": 2.4492717270429736e-05,
|
||
|
|
"loss": 0.1616,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06869988143444061,
|
||
|
|
"step": 670,
|
||
|
|
"valid_targets_mean": 6037.5,
|
||
|
|
"valid_targets_min": 714
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.427843803056027,
|
||
|
|
"grad_norm": 0.1777064909782656,
|
||
|
|
"learning_rate": 2.424568368750385e-05,
|
||
|
|
"loss": 0.1597,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05851980298757553,
|
||
|
|
"step": 675,
|
||
|
|
"valid_targets_mean": 5295.7,
|
||
|
|
"valid_targets_min": 518
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4533106960950763,
|
||
|
|
"grad_norm": 0.1949390809455965,
|
||
|
|
"learning_rate": 2.3997969902384722e-05,
|
||
|
|
"loss": 0.1618,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0510847307741642,
|
||
|
|
"step": 680,
|
||
|
|
"valid_targets_mean": 4908.3,
|
||
|
|
"valid_targets_min": 573
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4787775891341255,
|
||
|
|
"grad_norm": 0.17456309455989005,
|
||
|
|
"learning_rate": 2.3749615601368434e-05,
|
||
|
|
"loss": 0.1599,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0578172393143177,
|
||
|
|
"step": 685,
|
||
|
|
"valid_targets_mean": 6196.7,
|
||
|
|
"valid_targets_min": 459
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.504244482173175,
|
||
|
|
"grad_norm": 0.18989495891248262,
|
||
|
|
"learning_rate": 2.3500660573368305e-05,
|
||
|
|
"loss": 0.1551,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05929127335548401,
|
||
|
|
"step": 690,
|
||
|
|
"valid_targets_mean": 5045.1,
|
||
|
|
"valid_targets_min": 483
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5297113752122242,
|
||
|
|
"grad_norm": 0.22953903468452172,
|
||
|
|
"learning_rate": 2.3251144703540313e-05,
|
||
|
|
"loss": 0.1689,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06596667319536209,
|
||
|
|
"step": 695,
|
||
|
|
"valid_targets_mean": 4899.3,
|
||
|
|
"valid_targets_min": 636
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5551782682512734,
|
||
|
|
"grad_norm": 0.2140483198993044,
|
||
|
|
"learning_rate": 2.3001107966893054e-05,
|
||
|
|
"loss": 0.1597,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05482468381524086,
|
||
|
|
"step": 700,
|
||
|
|
"valid_targets_mean": 4562.0,
|
||
|
|
"valid_targets_min": 589
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5806451612903225,
|
||
|
|
"grad_norm": 0.17307959974608458,
|
||
|
|
"learning_rate": 2.2750590421883348e-05,
|
||
|
|
"loss": 0.157,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.047615014016628265,
|
||
|
|
"step": 705,
|
||
|
|
"valid_targets_mean": 5728.4,
|
||
|
|
"valid_targets_min": 435
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6061120543293717,
|
||
|
|
"grad_norm": 0.1958982806804525,
|
||
|
|
"learning_rate": 2.2499632203998454e-05,
|
||
|
|
"loss": 0.1527,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04757123440504074,
|
||
|
|
"step": 710,
|
||
|
|
"valid_targets_mean": 4777.9,
|
||
|
|
"valid_targets_min": 406
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6315789473684212,
|
||
|
|
"grad_norm": 0.19567641358372231,
|
||
|
|
"learning_rate": 2.224827351932596e-05,
|
||
|
|
"loss": 0.1572,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04391469061374664,
|
||
|
|
"step": 715,
|
||
|
|
"valid_targets_mean": 4801.1,
|
||
|
|
"valid_targets_min": 291
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6570458404074704,
|
||
|
|
"grad_norm": 0.17339935956933378,
|
||
|
|
"learning_rate": 2.1996554638112362e-05,
|
||
|
|
"loss": 0.158,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05453699454665184,
|
||
|
|
"step": 720,
|
||
|
|
"valid_targets_mean": 5255.2,
|
||
|
|
"valid_targets_min": 1151
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6825127334465195,
|
||
|
|
"grad_norm": 0.20274261173391694,
|
||
|
|
"learning_rate": 2.174451588831134e-05,
|
||
|
|
"loss": 0.1614,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.053828105330467224,
|
||
|
|
"step": 725,
|
||
|
|
"valid_targets_mean": 5086.0,
|
||
|
|
"valid_targets_min": 711
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7079796264855687,
|
||
|
|
"grad_norm": 0.19162539636147466,
|
||
|
|
"learning_rate": 2.1492197649122794e-05,
|
||
|
|
"loss": 0.158,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.054310865700244904,
|
||
|
|
"step": 730,
|
||
|
|
"valid_targets_mean": 5213.3,
|
||
|
|
"valid_targets_min": 567
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.733446519524618,
|
||
|
|
"grad_norm": 0.2072759818666121,
|
||
|
|
"learning_rate": 2.1239640344523735e-05,
|
||
|
|
"loss": 0.1496,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05167793482542038,
|
||
|
|
"step": 735,
|
||
|
|
"valid_targets_mean": 5258.2,
|
||
|
|
"valid_targets_min": 394
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7589134125636674,
|
||
|
|
"grad_norm": 0.18233070553104944,
|
||
|
|
"learning_rate": 2.0986884436791875e-05,
|
||
|
|
"loss": 0.1552,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04380776733160019,
|
||
|
|
"step": 740,
|
||
|
|
"valid_targets_mean": 6251.1,
|
||
|
|
"valid_targets_min": 534
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7843803056027165,
|
||
|
|
"grad_norm": 0.2008779170543001,
|
||
|
|
"learning_rate": 2.073397042002322e-05,
|
||
|
|
"loss": 0.1599,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.046290189027786255,
|
||
|
|
"step": 745,
|
||
|
|
"valid_targets_mean": 4248.8,
|
||
|
|
"valid_targets_min": 299
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8098471986417657,
|
||
|
|
"grad_norm": 0.206845021389941,
|
||
|
|
"learning_rate": 2.0480938813644443e-05,
|
||
|
|
"loss": 0.1637,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06837663054466248,
|
||
|
|
"step": 750,
|
||
|
|
"valid_targets_mean": 5167.8,
|
||
|
|
"valid_targets_min": 458
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.835314091680815,
|
||
|
|
"grad_norm": 0.1887022297446418,
|
||
|
|
"learning_rate": 2.022783015592132e-05,
|
||
|
|
"loss": 0.1581,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05138999596238136,
|
||
|
|
"step": 755,
|
||
|
|
"valid_targets_mean": 3476.4,
|
||
|
|
"valid_targets_min": 550
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.860780984719864,
|
||
|
|
"grad_norm": 0.2053982078217207,
|
||
|
|
"learning_rate": 1.9974684997463986e-05,
|
||
|
|
"loss": 0.1661,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06539995223283768,
|
||
|
|
"step": 760,
|
||
|
|
"valid_targets_mean": 4786.1,
|
||
|
|
"valid_targets_min": 363
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8862478777589136,
|
||
|
|
"grad_norm": 0.23624597205106068,
|
||
|
|
"learning_rate": 1.9721543894730428e-05,
|
||
|
|
"loss": 0.1633,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05308888107538223,
|
||
|
|
"step": 765,
|
||
|
|
"valid_targets_mean": 3819.8,
|
||
|
|
"valid_targets_min": 555
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9117147707979627,
|
||
|
|
"grad_norm": 0.17988017064853987,
|
||
|
|
"learning_rate": 1.946844740352883e-05,
|
||
|
|
"loss": 0.1536,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05310368537902832,
|
||
|
|
"step": 770,
|
||
|
|
"valid_targets_mean": 5330.0,
|
||
|
|
"valid_targets_min": 355
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.937181663837012,
|
||
|
|
"grad_norm": 0.17873166166831936,
|
||
|
|
"learning_rate": 1.9215436072520167e-05,
|
||
|
|
"loss": 0.1487,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05123206973075867,
|
||
|
|
"step": 775,
|
||
|
|
"valid_targets_mean": 7128.5,
|
||
|
|
"valid_targets_min": 813
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.962648556876061,
|
||
|
|
"grad_norm": 0.17809523228411328,
|
||
|
|
"learning_rate": 1.8962550436721867e-05,
|
||
|
|
"loss": 0.1537,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04764179885387421,
|
||
|
|
"step": 780,
|
||
|
|
"valid_targets_mean": 5059.8,
|
||
|
|
"valid_targets_min": 510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.98811544991511,
|
||
|
|
"grad_norm": 0.18059771071561703,
|
||
|
|
"learning_rate": 1.8709831011013678e-05,
|
||
|
|
"loss": 0.1605,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04299032688140869,
|
||
|
|
"step": 785,
|
||
|
|
"valid_targets_mean": 4471.0,
|
||
|
|
"valid_targets_min": 573
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.01018675721562,
|
||
|
|
"grad_norm": 0.19087550011502016,
|
||
|
|
"learning_rate": 1.8457318283646814e-05,
|
||
|
|
"loss": 0.1609,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04256012290716171,
|
||
|
|
"step": 790,
|
||
|
|
"valid_targets_mean": 4504.0,
|
||
|
|
"valid_targets_min": 425
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.035653650254669,
|
||
|
|
"grad_norm": 0.2012261960566325,
|
||
|
|
"learning_rate": 1.8205052709757263e-05,
|
||
|
|
"loss": 0.1638,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05523894727230072,
|
||
|
|
"step": 795,
|
||
|
|
"valid_targets_mean": 5278.5,
|
||
|
|
"valid_targets_min": 392
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.061120543293718,
|
||
|
|
"grad_norm": 0.19172876392679508,
|
||
|
|
"learning_rate": 1.79530747048845e-05,
|
||
|
|
"loss": 0.1511,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04358460009098053,
|
||
|
|
"step": 800,
|
||
|
|
"valid_targets_mean": 3897.0,
|
||
|
|
"valid_targets_min": 568
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.086587436332767,
|
||
|
|
"grad_norm": 0.2014458476082743,
|
||
|
|
"learning_rate": 1.7701424638496473e-05,
|
||
|
|
"loss": 0.1537,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04093575105071068,
|
||
|
|
"step": 805,
|
||
|
|
"valid_targets_mean": 4177.9,
|
||
|
|
"valid_targets_min": 332
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.112054329371817,
|
||
|
|
"grad_norm": 0.20826490372365644,
|
||
|
|
"learning_rate": 1.7450142827522027e-05,
|
||
|
|
"loss": 0.1575,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03707022964954376,
|
||
|
|
"step": 810,
|
||
|
|
"valid_targets_mean": 4739.0,
|
||
|
|
"valid_targets_min": 467
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.137521222410866,
|
||
|
|
"grad_norm": 0.20599780475309687,
|
||
|
|
"learning_rate": 1.719926952989169e-05,
|
||
|
|
"loss": 0.1525,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06674011051654816,
|
||
|
|
"step": 815,
|
||
|
|
"valid_targets_mean": 5112.4,
|
||
|
|
"valid_targets_min": 645
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.162988115449915,
|
||
|
|
"grad_norm": 0.2135972888739578,
|
||
|
|
"learning_rate": 1.694884493808795e-05,
|
||
|
|
"loss": 0.1493,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05046378821134567,
|
||
|
|
"step": 820,
|
||
|
|
"valid_targets_mean": 4885.5,
|
||
|
|
"valid_targets_min": 343
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.1884550084889645,
|
||
|
|
"grad_norm": 0.19507138430628793,
|
||
|
|
"learning_rate": 1.6698909172706e-05,
|
||
|
|
"loss": 0.1443,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0384068563580513,
|
||
|
|
"step": 825,
|
||
|
|
"valid_targets_mean": 4854.2,
|
||
|
|
"valid_targets_min": 407
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.213921901528013,
|
||
|
|
"grad_norm": 0.19466510533758044,
|
||
|
|
"learning_rate": 1.644950227602605e-05,
|
||
|
|
"loss": 0.1506,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0502655915915966,
|
||
|
|
"step": 830,
|
||
|
|
"valid_targets_mean": 4918.0,
|
||
|
|
"valid_targets_min": 362
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.239388794567063,
|
||
|
|
"grad_norm": 0.20892683791486863,
|
||
|
|
"learning_rate": 1.620066420559805e-05,
|
||
|
|
"loss": 0.1623,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05903159826993942,
|
||
|
|
"step": 835,
|
||
|
|
"valid_targets_mean": 4497.6,
|
||
|
|
"valid_targets_min": 337
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.264855687606112,
|
||
|
|
"grad_norm": 0.2106424130972343,
|
||
|
|
"learning_rate": 1.5952434827840187e-05,
|
||
|
|
"loss": 0.1531,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06574611365795135,
|
||
|
|
"step": 840,
|
||
|
|
"valid_targets_mean": 5347.1,
|
||
|
|
"valid_targets_min": 560
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.290322580645161,
|
||
|
|
"grad_norm": 0.239733963665783,
|
||
|
|
"learning_rate": 1.5704853911651777e-05,
|
||
|
|
"loss": 0.1737,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0693153589963913,
|
||
|
|
"step": 845,
|
||
|
|
"valid_targets_mean": 4404.1,
|
||
|
|
"valid_targets_min": 508
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.315789473684211,
|
||
|
|
"grad_norm": 0.17730261138280262,
|
||
|
|
"learning_rate": 1.545796112204196e-05,
|
||
|
|
"loss": 0.1586,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.046191245317459106,
|
||
|
|
"step": 850,
|
||
|
|
"valid_targets_mean": 6100.7,
|
||
|
|
"valid_targets_min": 704
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.341256366723259,
|
||
|
|
"grad_norm": 0.21009022637541613,
|
||
|
|
"learning_rate": 1.5211796013774893e-05,
|
||
|
|
"loss": 0.1481,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06376850605010986,
|
||
|
|
"step": 855,
|
||
|
|
"valid_targets_mean": 5445.8,
|
||
|
|
"valid_targets_min": 494
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.366723259762309,
|
||
|
|
"grad_norm": 0.19147138861623728,
|
||
|
|
"learning_rate": 1.4966398025032706e-05,
|
||
|
|
"loss": 0.1588,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.040558382868766785,
|
||
|
|
"step": 860,
|
||
|
|
"valid_targets_mean": 5572.8,
|
||
|
|
"valid_targets_min": 643
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.3921901528013585,
|
||
|
|
"grad_norm": 0.25489222062294176,
|
||
|
|
"learning_rate": 1.4721806471097104e-05,
|
||
|
|
"loss": 0.1535,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05043104663491249,
|
||
|
|
"step": 865,
|
||
|
|
"valid_targets_mean": 5482.2,
|
||
|
|
"valid_targets_min": 419
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.417657045840407,
|
||
|
|
"grad_norm": 0.21423497052399765,
|
||
|
|
"learning_rate": 1.4478060538050622e-05,
|
||
|
|
"loss": 0.1661,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.060023024678230286,
|
||
|
|
"step": 870,
|
||
|
|
"valid_targets_mean": 4986.4,
|
||
|
|
"valid_targets_min": 464
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.443123938879457,
|
||
|
|
"grad_norm": 0.18747238981681072,
|
||
|
|
"learning_rate": 1.4235199276498652e-05,
|
||
|
|
"loss": 0.1531,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05143500119447708,
|
||
|
|
"step": 875,
|
||
|
|
"valid_targets_mean": 7035.5,
|
||
|
|
"valid_targets_min": 328
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.468590831918506,
|
||
|
|
"grad_norm": 0.1894590468808143,
|
||
|
|
"learning_rate": 1.3993261595313094e-05,
|
||
|
|
"loss": 0.1536,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05919076129794121,
|
||
|
|
"step": 880,
|
||
|
|
"valid_targets_mean": 5586.1,
|
||
|
|
"valid_targets_min": 660
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.494057724957555,
|
||
|
|
"grad_norm": 0.2076964513961011,
|
||
|
|
"learning_rate": 1.3752286255398794e-05,
|
||
|
|
"loss": 0.1539,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05181092768907547,
|
||
|
|
"step": 885,
|
||
|
|
"valid_targets_mean": 5430.3,
|
||
|
|
"valid_targets_min": 667
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.519524617996605,
|
||
|
|
"grad_norm": 0.19715103216822133,
|
||
|
|
"learning_rate": 1.3512311863483606e-05,
|
||
|
|
"loss": 0.153,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0564362034201622,
|
||
|
|
"step": 890,
|
||
|
|
"valid_targets_mean": 5937.5,
|
||
|
|
"valid_targets_min": 517
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.544991511035653,
|
||
|
|
"grad_norm": 0.21836120644331902,
|
||
|
|
"learning_rate": 1.3273376865933236e-05,
|
||
|
|
"loss": 0.1542,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06101875752210617,
|
||
|
|
"step": 895,
|
||
|
|
"valid_targets_mean": 4459.8,
|
||
|
|
"valid_targets_min": 528
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.570458404074703,
|
||
|
|
"grad_norm": 0.19979008110611626,
|
||
|
|
"learning_rate": 1.303551954259172e-05,
|
||
|
|
"loss": 0.144,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03848603367805481,
|
||
|
|
"step": 900,
|
||
|
|
"valid_targets_mean": 4476.2,
|
||
|
|
"valid_targets_min": 346
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.595925297113752,
|
||
|
|
"grad_norm": 0.20657333412135,
|
||
|
|
"learning_rate": 1.2798778000648602e-05,
|
||
|
|
"loss": 0.156,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.049523286521434784,
|
||
|
|
"step": 905,
|
||
|
|
"valid_targets_mean": 4977.6,
|
||
|
|
"valid_targets_min": 456
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.621392190152801,
|
||
|
|
"grad_norm": 0.22194409821177138,
|
||
|
|
"learning_rate": 1.2563190168533766e-05,
|
||
|
|
"loss": 0.1562,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0513107106089592,
|
||
|
|
"step": 910,
|
||
|
|
"valid_targets_mean": 4753.3,
|
||
|
|
"valid_targets_min": 411
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.646859083191851,
|
||
|
|
"grad_norm": 0.20073415825748206,
|
||
|
|
"learning_rate": 1.2328793789840926e-05,
|
||
|
|
"loss": 0.1497,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05154266953468323,
|
||
|
|
"step": 915,
|
||
|
|
"valid_targets_mean": 4509.1,
|
||
|
|
"valid_targets_min": 667
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.6723259762308995,
|
||
|
|
"grad_norm": 0.20363480305430207,
|
||
|
|
"learning_rate": 1.2095626417280686e-05,
|
||
|
|
"loss": 0.1568,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05040046572685242,
|
||
|
|
"step": 920,
|
||
|
|
"valid_targets_mean": 4495.6,
|
||
|
|
"valid_targets_min": 439
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.697792869269949,
|
||
|
|
"grad_norm": 0.15788485491127127,
|
||
|
|
"learning_rate": 1.1863725406664241e-05,
|
||
|
|
"loss": 0.1536,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04818938672542572,
|
||
|
|
"step": 925,
|
||
|
|
"valid_targets_mean": 6288.2,
|
||
|
|
"valid_targets_min": 475
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.723259762308999,
|
||
|
|
"grad_norm": 0.18869305593095,
|
||
|
|
"learning_rate": 1.163312791091858e-05,
|
||
|
|
"loss": 0.1497,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04854920506477356,
|
||
|
|
"step": 930,
|
||
|
|
"valid_targets_mean": 5647.6,
|
||
|
|
"valid_targets_min": 354
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.748726655348047,
|
||
|
|
"grad_norm": 0.16443579067469954,
|
||
|
|
"learning_rate": 1.1403870874134192e-05,
|
||
|
|
"loss": 0.146,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.046788256615400314,
|
||
|
|
"step": 935,
|
||
|
|
"valid_targets_mean": 5484.6,
|
||
|
|
"valid_targets_min": 332
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.774193548387097,
|
||
|
|
"grad_norm": 0.20763711069277233,
|
||
|
|
"learning_rate": 1.1175991025646267e-05,
|
||
|
|
"loss": 0.1487,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05495909973978996,
|
||
|
|
"step": 940,
|
||
|
|
"valid_targets_mean": 4970.3,
|
||
|
|
"valid_targets_min": 603
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.799660441426146,
|
||
|
|
"grad_norm": 0.17015784881873067,
|
||
|
|
"learning_rate": 1.0949524874150246e-05,
|
||
|
|
"loss": 0.156,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04452647641301155,
|
||
|
|
"step": 945,
|
||
|
|
"valid_targets_mean": 4811.3,
|
||
|
|
"valid_targets_min": 616
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.825127334465195,
|
||
|
|
"grad_norm": 0.18869199446083515,
|
||
|
|
"learning_rate": 1.0724508701852807e-05,
|
||
|
|
"loss": 0.1488,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.047742199152708054,
|
||
|
|
"step": 950,
|
||
|
|
"valid_targets_mean": 4139.6,
|
||
|
|
"valid_targets_min": 496
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.850594227504245,
|
||
|
|
"grad_norm": 0.19979140112460086,
|
||
|
|
"learning_rate": 1.0500978558659001e-05,
|
||
|
|
"loss": 0.1615,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06486503779888153,
|
||
|
|
"step": 955,
|
||
|
|
"valid_targets_mean": 5047.7,
|
||
|
|
"valid_targets_min": 415
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.8760611205432935,
|
||
|
|
"grad_norm": 0.1798361541913208,
|
||
|
|
"learning_rate": 1.0278970256396764e-05,
|
||
|
|
"loss": 0.1449,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04292941093444824,
|
||
|
|
"step": 960,
|
||
|
|
"valid_targets_mean": 3786.9,
|
||
|
|
"valid_targets_min": 84
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.901528013582343,
|
||
|
|
"grad_norm": 0.18933423790659826,
|
||
|
|
"learning_rate": 1.0058519363079464e-05,
|
||
|
|
"loss": 0.1476,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05371493473649025,
|
||
|
|
"step": 965,
|
||
|
|
"valid_targets_mean": 5490.7,
|
||
|
|
"valid_targets_min": 415
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.926994906621392,
|
||
|
|
"grad_norm": 0.16566669416409652,
|
||
|
|
"learning_rate": 9.839661197207527e-06,
|
||
|
|
"loss": 0.1509,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03648955747485161,
|
||
|
|
"step": 970,
|
||
|
|
"valid_targets_mean": 4383.9,
|
||
|
|
"valid_targets_min": 394
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.952461799660441,
|
||
|
|
"grad_norm": 0.22033674195119468,
|
||
|
|
"learning_rate": 9.622430822110063e-06,
|
||
|
|
"loss": 0.1455,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.060049720108509064,
|
||
|
|
"step": 975,
|
||
|
|
"valid_targets_mean": 4436.6,
|
||
|
|
"valid_targets_min": 490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.977928692699491,
|
||
|
|
"grad_norm": 0.20840871296607372,
|
||
|
|
"learning_rate": 9.40686304032735e-06,
|
||
|
|
"loss": 0.1578,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06423541158437729,
|
||
|
|
"step": 980,
|
||
|
|
"valid_targets_mean": 5278.7,
|
||
|
|
"valid_targets_min": 673
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.0,
|
||
|
|
"grad_norm": 0.3112288683683397,
|
||
|
|
"learning_rate": 9.19299238803515e-06,
|
||
|
|
"loss": 0.1426,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12402482330799103,
|
||
|
|
"step": 985,
|
||
|
|
"valid_targets_mean": 4365.9,
|
||
|
|
"valid_targets_min": 385
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.02546689303905,
|
||
|
|
"grad_norm": 0.21198712020884392,
|
||
|
|
"learning_rate": 8.980853129511584e-06,
|
||
|
|
"loss": 0.152,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.045234695076942444,
|
||
|
|
"step": 990,
|
||
|
|
"valid_targets_mean": 5211.6,
|
||
|
|
"valid_targets_min": 564
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.050933786078098,
|
||
|
|
"grad_norm": 0.1832814377777405,
|
||
|
|
"learning_rate": 8.770479251647708e-06,
|
||
|
|
"loss": 0.1486,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04300159588456154,
|
||
|
|
"step": 995,
|
||
|
|
"valid_targets_mean": 4824.9,
|
||
|
|
"valid_targets_min": 649
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.076400679117148,
|
||
|
|
"grad_norm": 0.18323969987536556,
|
||
|
|
"learning_rate": 8.561904458502424e-06,
|
||
|
|
"loss": 0.1515,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05170092731714249,
|
||
|
|
"step": 1000,
|
||
|
|
"valid_targets_mean": 5709.0,
|
||
|
|
"valid_targets_min": 657
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.101867572156197,
|
||
|
|
"grad_norm": 0.17438645023828483,
|
||
|
|
"learning_rate": 8.355162165902785e-06,
|
||
|
|
"loss": 0.143,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04301121085882187,
|
||
|
|
"step": 1005,
|
||
|
|
"valid_targets_mean": 5400.9,
|
||
|
|
"valid_targets_min": 821
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.127334465195246,
|
||
|
|
"grad_norm": 0.21968555614480795,
|
||
|
|
"learning_rate": 8.150285496090388e-06,
|
||
|
|
"loss": 0.1496,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.048399992287158966,
|
||
|
|
"step": 1010,
|
||
|
|
"valid_targets_mean": 4492.3,
|
||
|
|
"valid_targets_min": 568
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.152801358234296,
|
||
|
|
"grad_norm": 0.17762702372598874,
|
||
|
|
"learning_rate": 7.947307272414874e-06,
|
||
|
|
"loss": 0.1464,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0411510169506073,
|
||
|
|
"step": 1015,
|
||
|
|
"valid_targets_mean": 5294.9,
|
||
|
|
"valid_targets_min": 680
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.1782682512733444,
|
||
|
|
"grad_norm": 0.19722109935186782,
|
||
|
|
"learning_rate": 7.746260014075293e-06,
|
||
|
|
"loss": 0.1455,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04638431593775749,
|
||
|
|
"step": 1020,
|
||
|
|
"valid_targets_mean": 5451.1,
|
||
|
|
"valid_targets_min": 626
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.203735144312394,
|
||
|
|
"grad_norm": 0.21092644781830097,
|
||
|
|
"learning_rate": 7.547175930910187e-06,
|
||
|
|
"loss": 0.1507,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.058665595948696136,
|
||
|
|
"step": 1025,
|
||
|
|
"valid_targets_mean": 5437.9,
|
||
|
|
"valid_targets_min": 568
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.229202037351443,
|
||
|
|
"grad_norm": 0.18538550618712651,
|
||
|
|
"learning_rate": 7.350086918237238e-06,
|
||
|
|
"loss": 0.1423,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04550827667117119,
|
||
|
|
"step": 1030,
|
||
|
|
"valid_targets_mean": 5939.2,
|
||
|
|
"valid_targets_min": 742
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.254668930390492,
|
||
|
|
"grad_norm": 0.19790413231909332,
|
||
|
|
"learning_rate": 7.155024551743317e-06,
|
||
|
|
"loss": 0.1461,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04877372458577156,
|
||
|
|
"step": 1035,
|
||
|
|
"valid_targets_mean": 4680.1,
|
||
|
|
"valid_targets_min": 294
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.280135823429542,
|
||
|
|
"grad_norm": 0.17992804017128824,
|
||
|
|
"learning_rate": 6.962020082425749e-06,
|
||
|
|
"loss": 0.14,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0452592596411705,
|
||
|
|
"step": 1040,
|
||
|
|
"valid_targets_mean": 4596.0,
|
||
|
|
"valid_targets_min": 628
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.305602716468591,
|
||
|
|
"grad_norm": 0.21512679110303626,
|
||
|
|
"learning_rate": 6.771104431585551e-06,
|
||
|
|
"loss": 0.1543,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05498050898313522,
|
||
|
|
"step": 1045,
|
||
|
|
"valid_targets_mean": 5272.1,
|
||
|
|
"valid_targets_min": 809
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.33106960950764,
|
||
|
|
"grad_norm": 0.1874903612063792,
|
||
|
|
"learning_rate": 6.582308185873536e-06,
|
||
|
|
"loss": 0.1523,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04390951991081238,
|
||
|
|
"step": 1050,
|
||
|
|
"valid_targets_mean": 4639.8,
|
||
|
|
"valid_targets_min": 620
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.356536502546689,
|
||
|
|
"grad_norm": 0.2013909570825948,
|
||
|
|
"learning_rate": 6.3956615923900214e-06,
|
||
|
|
"loss": 0.1495,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05374452471733093,
|
||
|
|
"step": 1055,
|
||
|
|
"valid_targets_mean": 5208.7,
|
||
|
|
"valid_targets_min": 607
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.3820033955857385,
|
||
|
|
"grad_norm": 0.20150711758852105,
|
||
|
|
"learning_rate": 6.211194553838931e-06,
|
||
|
|
"loss": 0.1578,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.045037642121315,
|
||
|
|
"step": 1060,
|
||
|
|
"valid_targets_mean": 4180.2,
|
||
|
|
"valid_targets_min": 430
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.407470288624788,
|
||
|
|
"grad_norm": 0.20342486091636308,
|
||
|
|
"learning_rate": 6.028936623737067e-06,
|
||
|
|
"loss": 0.1525,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05274080112576485,
|
||
|
|
"step": 1065,
|
||
|
|
"valid_targets_mean": 4714.8,
|
||
|
|
"valid_targets_min": 595
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.432937181663837,
|
||
|
|
"grad_norm": 0.20387106581873626,
|
||
|
|
"learning_rate": 5.848917001679339e-06,
|
||
|
|
"loss": 0.1465,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04757241904735565,
|
||
|
|
"step": 1070,
|
||
|
|
"valid_targets_mean": 4019.6,
|
||
|
|
"valid_targets_min": 414
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.458404074702886,
|
||
|
|
"grad_norm": 0.19042029685743941,
|
||
|
|
"learning_rate": 5.671164528660687e-06,
|
||
|
|
"loss": 0.1529,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.041462354362010956,
|
||
|
|
"step": 1075,
|
||
|
|
"valid_targets_mean": 5469.5,
|
||
|
|
"valid_targets_min": 519
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.483870967741936,
|
||
|
|
"grad_norm": 0.19866514160174117,
|
||
|
|
"learning_rate": 5.495707682455464e-06,
|
||
|
|
"loss": 0.1494,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04318884387612343,
|
||
|
|
"step": 1080,
|
||
|
|
"valid_targets_mean": 4570.6,
|
||
|
|
"valid_targets_min": 428
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.509337860780985,
|
||
|
|
"grad_norm": 0.19880221047276755,
|
||
|
|
"learning_rate": 5.322574573054991e-06,
|
||
|
|
"loss": 0.1568,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03715455159544945,
|
||
|
|
"step": 1085,
|
||
|
|
"valid_targets_mean": 4258.1,
|
||
|
|
"valid_targets_min": 445
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.534804753820034,
|
||
|
|
"grad_norm": 0.20412947916891577,
|
||
|
|
"learning_rate": 5.151792938164051e-06,
|
||
|
|
"loss": 0.1515,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05464012548327446,
|
||
|
|
"step": 1090,
|
||
|
|
"valid_targets_mean": 5134.4,
|
||
|
|
"valid_targets_min": 387
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.560271646859083,
|
||
|
|
"grad_norm": 0.189743897553305,
|
||
|
|
"learning_rate": 4.983390138757027e-06,
|
||
|
|
"loss": 0.149,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.043821871280670166,
|
||
|
|
"step": 1095,
|
||
|
|
"valid_targets_mean": 5105.5,
|
||
|
|
"valid_targets_min": 604
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.5857385398981325,
|
||
|
|
"grad_norm": 0.2218260695480036,
|
||
|
|
"learning_rate": 4.817393154694399e-06,
|
||
|
|
"loss": 0.1601,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07308252155780792,
|
||
|
|
"step": 1100,
|
||
|
|
"valid_targets_mean": 5176.9,
|
||
|
|
"valid_targets_min": 349
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.611205432937181,
|
||
|
|
"grad_norm": 0.1948526443237716,
|
||
|
|
"learning_rate": 4.653828580400275e-06,
|
||
|
|
"loss": 0.146,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0502275675535202,
|
||
|
|
"step": 1105,
|
||
|
|
"valid_targets_mean": 4895.3,
|
||
|
|
"valid_targets_min": 642
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.636672325976231,
|
||
|
|
"grad_norm": 0.19490180065468093,
|
||
|
|
"learning_rate": 4.4927226206017e-06,
|
||
|
|
"loss": 0.1576,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05357596278190613,
|
||
|
|
"step": 1110,
|
||
|
|
"valid_targets_mean": 5545.6,
|
||
|
|
"valid_targets_min": 233
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.66213921901528,
|
||
|
|
"grad_norm": 0.1948458421330662,
|
||
|
|
"learning_rate": 4.334101086130409e-06,
|
||
|
|
"loss": 0.1541,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04059261083602905,
|
||
|
|
"step": 1115,
|
||
|
|
"valid_targets_mean": 5798.2,
|
||
|
|
"valid_targets_min": 513
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.687606112054329,
|
||
|
|
"grad_norm": 0.17712620714761174,
|
||
|
|
"learning_rate": 4.177989389787625e-06,
|
||
|
|
"loss": 0.1488,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04724474996328354,
|
||
|
|
"step": 1120,
|
||
|
|
"valid_targets_mean": 6293.3,
|
||
|
|
"valid_targets_min": 661
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.713073005093379,
|
||
|
|
"grad_norm": 0.19834535803430425,
|
||
|
|
"learning_rate": 4.024412542272706e-06,
|
||
|
|
"loss": 0.1484,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.052177608013153076,
|
||
|
|
"step": 1125,
|
||
|
|
"valid_targets_mean": 5706.2,
|
||
|
|
"valid_targets_min": 299
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.738539898132428,
|
||
|
|
"grad_norm": 0.19806574977490346,
|
||
|
|
"learning_rate": 3.873395148176135e-06,
|
||
|
|
"loss": 0.1424,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.047021541744470596,
|
||
|
|
"step": 1130,
|
||
|
|
"valid_targets_mean": 4157.7,
|
||
|
|
"valid_targets_min": 522
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.764006791171477,
|
||
|
|
"grad_norm": 0.17186350973731854,
|
||
|
|
"learning_rate": 3.724961402037661e-06,
|
||
|
|
"loss": 0.1421,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05093415826559067,
|
||
|
|
"step": 1135,
|
||
|
|
"valid_targets_mean": 5404.0,
|
||
|
|
"valid_targets_min": 429
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.7894736842105265,
|
||
|
|
"grad_norm": 0.20509235758060426,
|
||
|
|
"learning_rate": 3.57913508447004e-06,
|
||
|
|
"loss": 0.1471,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.054411761462688446,
|
||
|
|
"step": 1140,
|
||
|
|
"valid_targets_mean": 4937.4,
|
||
|
|
"valid_targets_min": 455
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.814940577249575,
|
||
|
|
"grad_norm": 0.18141549621559375,
|
||
|
|
"learning_rate": 3.4359395583491594e-06,
|
||
|
|
"loss": 0.148,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.054643046110868454,
|
||
|
|
"step": 1145,
|
||
|
|
"valid_targets_mean": 5395.0,
|
||
|
|
"valid_targets_min": 630
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.840407470288625,
|
||
|
|
"grad_norm": 0.20957711061633771,
|
||
|
|
"learning_rate": 3.2953977650710513e-06,
|
||
|
|
"loss": 0.1478,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06269936263561249,
|
||
|
|
"step": 1150,
|
||
|
|
"valid_targets_mean": 7170.0,
|
||
|
|
"valid_targets_min": 780
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.8658743633276735,
|
||
|
|
"grad_norm": 0.19968832235520612,
|
||
|
|
"learning_rate": 3.1575322208764714e-06,
|
||
|
|
"loss": 0.1425,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03520461916923523,
|
||
|
|
"step": 1155,
|
||
|
|
"valid_targets_mean": 5633.9,
|
||
|
|
"valid_targets_min": 486
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.891341256366723,
|
||
|
|
"grad_norm": 0.18042191473733155,
|
||
|
|
"learning_rate": 3.0223650132435335e-06,
|
||
|
|
"loss": 0.1547,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04744488745927811,
|
||
|
|
"step": 1160,
|
||
|
|
"valid_targets_mean": 4839.0,
|
||
|
|
"valid_targets_min": 510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.916808149405773,
|
||
|
|
"grad_norm": 0.19710041340370513,
|
||
|
|
"learning_rate": 2.8899177973490734e-06,
|
||
|
|
"loss": 0.1436,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.046739302575588226,
|
||
|
|
"step": 1165,
|
||
|
|
"valid_targets_mean": 5891.5,
|
||
|
|
"valid_targets_min": 306
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.942275042444821,
|
||
|
|
"grad_norm": 0.22661796840060533,
|
||
|
|
"learning_rate": 2.7602117925992964e-06,
|
||
|
|
"loss": 0.1508,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05284252017736435,
|
||
|
|
"step": 1170,
|
||
|
|
"valid_targets_mean": 4766.3,
|
||
|
|
"valid_targets_min": 496
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.967741935483871,
|
||
|
|
"grad_norm": 0.22336951734405172,
|
||
|
|
"learning_rate": 2.6332677792301773e-06,
|
||
|
|
"loss": 0.163,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06108919158577919,
|
||
|
|
"step": 1175,
|
||
|
|
"valid_targets_mean": 4240.5,
|
||
|
|
"valid_targets_min": 374
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.993208828522921,
|
||
|
|
"grad_norm": 0.1915920301061591,
|
||
|
|
"learning_rate": 2.5091060949782664e-06,
|
||
|
|
"loss": 0.1333,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03976128250360489,
|
||
|
|
"step": 1180,
|
||
|
|
"valid_targets_mean": 6280.5,
|
||
|
|
"valid_targets_min": 672
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.01528013582343,
|
||
|
|
"grad_norm": 0.19317503708680775,
|
||
|
|
"learning_rate": 2.3877466318223698e-06,
|
||
|
|
"loss": 0.1525,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.03881210461258888,
|
||
|
|
"step": 1185,
|
||
|
|
"valid_targets_mean": 5353.2,
|
||
|
|
"valid_targets_min": 339
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.040747028862479,
|
||
|
|
"grad_norm": 0.20764541418959367,
|
||
|
|
"learning_rate": 2.2692088327966655e-06,
|
||
|
|
"loss": 0.1467,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.044896483421325684,
|
||
|
|
"step": 1190,
|
||
|
|
"valid_targets_mean": 5634.2,
|
||
|
|
"valid_targets_min": 595
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.066213921901528,
|
||
|
|
"grad_norm": 0.20593165601992494,
|
||
|
|
"learning_rate": 2.153511688875707e-06,
|
||
|
|
"loss": 0.1536,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04371983930468559,
|
||
|
|
"step": 1195,
|
||
|
|
"valid_targets_mean": 3858.5,
|
||
|
|
"valid_targets_min": 738
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.0916808149405774,
|
||
|
|
"grad_norm": 0.21297043486058082,
|
||
|
|
"learning_rate": 2.0406737359318797e-06,
|
||
|
|
"loss": 0.1432,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0521714873611927,
|
||
|
|
"step": 1200,
|
||
|
|
"valid_targets_mean": 4474.7,
|
||
|
|
"valid_targets_min": 647
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.117147707979626,
|
||
|
|
"grad_norm": 0.21186564633702598,
|
||
|
|
"learning_rate": 1.930713051765776e-06,
|
||
|
|
"loss": 0.1475,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04589856415987015,
|
||
|
|
"step": 1205,
|
||
|
|
"valid_targets_mean": 4510.9,
|
||
|
|
"valid_targets_min": 335
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.142614601018676,
|
||
|
|
"grad_norm": 0.1990233774591213,
|
||
|
|
"learning_rate": 1.8236472532099413e-06,
|
||
|
|
"loss": 0.1578,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05373925343155861,
|
||
|
|
"step": 1210,
|
||
|
|
"valid_targets_mean": 4050.8,
|
||
|
|
"valid_targets_min": 394
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.168081494057725,
|
||
|
|
"grad_norm": 0.18833278391984534,
|
||
|
|
"learning_rate": 1.7194934933064654e-06,
|
||
|
|
"loss": 0.1462,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.044683780521154404,
|
||
|
|
"step": 1215,
|
||
|
|
"valid_targets_mean": 4603.1,
|
||
|
|
"valid_targets_min": 398
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.193548387096774,
|
||
|
|
"grad_norm": 0.2206745205430478,
|
||
|
|
"learning_rate": 1.6182684585588981e-06,
|
||
|
|
"loss": 0.1547,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.057726383209228516,
|
||
|
|
"step": 1220,
|
||
|
|
"valid_targets_mean": 4889.2,
|
||
|
|
"valid_targets_min": 488
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.219015280135824,
|
||
|
|
"grad_norm": 0.18839205856507873,
|
||
|
|
"learning_rate": 1.5199883662588954e-06,
|
||
|
|
"loss": 0.1448,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.049824222922325134,
|
||
|
|
"step": 1225,
|
||
|
|
"valid_targets_mean": 5002.8,
|
||
|
|
"valid_targets_min": 481
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.244482173174872,
|
||
|
|
"grad_norm": 0.18511976328013788,
|
||
|
|
"learning_rate": 1.4246689618880472e-06,
|
||
|
|
"loss": 0.143,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.053400591015815735,
|
||
|
|
"step": 1230,
|
||
|
|
"valid_targets_mean": 5463.7,
|
||
|
|
"valid_targets_min": 370
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.269949066213922,
|
||
|
|
"grad_norm": 0.1850428475008149,
|
||
|
|
"learning_rate": 1.3323255165952875e-06,
|
||
|
|
"loss": 0.1448,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04526630416512489,
|
||
|
|
"step": 1235,
|
||
|
|
"valid_targets_mean": 5302.2,
|
||
|
|
"valid_targets_min": 480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.2954159592529715,
|
||
|
|
"grad_norm": 0.1806475924766929,
|
||
|
|
"learning_rate": 1.2429728247502926e-06,
|
||
|
|
"loss": 0.156,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04810335487127304,
|
||
|
|
"step": 1240,
|
||
|
|
"valid_targets_mean": 5395.3,
|
||
|
|
"valid_targets_min": 459
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.32088285229202,
|
||
|
|
"grad_norm": 0.19446809319802125,
|
||
|
|
"learning_rate": 1.156625201573287e-06,
|
||
|
|
"loss": 0.138,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04256840795278549,
|
||
|
|
"step": 1245,
|
||
|
|
"valid_targets_mean": 4533.1,
|
||
|
|
"valid_targets_min": 416
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.34634974533107,
|
||
|
|
"grad_norm": 0.17412413386777287,
|
||
|
|
"learning_rate": 1.0732964808415792e-06,
|
||
|
|
"loss": 0.1422,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.043165817856788635,
|
||
|
|
"step": 1250,
|
||
|
|
"valid_targets_mean": 5473.6,
|
||
|
|
"valid_targets_min": 489
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.3718166383701185,
|
||
|
|
"grad_norm": 0.1906858840368389,
|
||
|
|
"learning_rate": 9.93000012673262e-07,
|
||
|
|
"loss": 0.145,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04549487307667732,
|
||
|
|
"step": 1255,
|
||
|
|
"valid_targets_mean": 5095.8,
|
||
|
|
"valid_targets_min": 758
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.397283531409168,
|
||
|
|
"grad_norm": 0.1937848545695707,
|
||
|
|
"learning_rate": 9.157486613883759e-07,
|
||
|
|
"loss": 0.1472,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04257480800151825,
|
||
|
|
"step": 1260,
|
||
|
|
"valid_targets_mean": 4423.5,
|
||
|
|
"valid_targets_min": 481
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.422750424448218,
|
||
|
|
"grad_norm": 0.17775123277009072,
|
||
|
|
"learning_rate": 8.415548034479215e-07,
|
||
|
|
"loss": 0.1489,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05277522653341293,
|
||
|
|
"step": 1265,
|
||
|
|
"valid_targets_mean": 5918.0,
|
||
|
|
"valid_targets_min": 681
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.448217317487266,
|
||
|
|
"grad_norm": 0.19966295683357554,
|
||
|
|
"learning_rate": 7.704303254710188e-07,
|
||
|
|
"loss": 0.1594,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.047615669667720795,
|
||
|
|
"step": 1270,
|
||
|
|
"valid_targets_mean": 4504.9,
|
||
|
|
"valid_targets_min": 490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.473684210526316,
|
||
|
|
"grad_norm": 0.20669844700159937,
|
||
|
|
"learning_rate": 7.023866223305487e-07,
|
||
|
|
"loss": 0.1434,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.053897857666015625,
|
||
|
|
"step": 1275,
|
||
|
|
"valid_targets_mean": 5455.9,
|
||
|
|
"valid_targets_min": 518
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.499151103565365,
|
||
|
|
"grad_norm": 0.18115259950945548,
|
||
|
|
"learning_rate": 6.374345953275773e-07,
|
||
|
|
"loss": 0.1449,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.048284128308296204,
|
||
|
|
"step": 1280,
|
||
|
|
"valid_targets_mean": 4987.2,
|
||
|
|
"valid_targets_min": 567
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.524617996604414,
|
||
|
|
"grad_norm": 0.17791064676741664,
|
||
|
|
"learning_rate": 5.755846504448604e-07,
|
||
|
|
"loss": 0.1528,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.053075529634952545,
|
||
|
|
"step": 1285,
|
||
|
|
"valid_targets_mean": 5521.2,
|
||
|
|
"valid_targets_min": 491
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.550084889643464,
|
||
|
|
"grad_norm": 0.17384477446959934,
|
||
|
|
"learning_rate": 5.16846696679687e-07,
|
||
|
|
"loss": 0.1468,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04560650885105133,
|
||
|
|
"step": 1290,
|
||
|
|
"valid_targets_mean": 5726.9,
|
||
|
|
"valid_targets_min": 473
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.5755517826825125,
|
||
|
|
"grad_norm": 0.17427350773632405,
|
||
|
|
"learning_rate": 4.6123014445636605e-07,
|
||
|
|
"loss": 0.1443,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05331622064113617,
|
||
|
|
"step": 1295,
|
||
|
|
"valid_targets_mean": 5595.4,
|
||
|
|
"valid_targets_min": 745
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.601018675721562,
|
||
|
|
"grad_norm": 0.1847039591416848,
|
||
|
|
"learning_rate": 4.087439041185781e-07,
|
||
|
|
"loss": 0.1427,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04816567897796631,
|
||
|
|
"step": 1300,
|
||
|
|
"valid_targets_mean": 5271.9,
|
||
|
|
"valid_targets_min": 628
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.626485568760611,
|
||
|
|
"grad_norm": 0.2075639627371849,
|
||
|
|
"learning_rate": 3.5939638450183776e-07,
|
||
|
|
"loss": 0.1513,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.048930585384368896,
|
||
|
|
"step": 1305,
|
||
|
|
"valid_targets_mean": 3848.1,
|
||
|
|
"valid_targets_min": 567
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.65195246179966,
|
||
|
|
"grad_norm": 0.22392832964076018,
|
||
|
|
"learning_rate": 3.1319549158632444e-07,
|
||
|
|
"loss": 0.1553,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05328825116157532,
|
||
|
|
"step": 1310,
|
||
|
|
"valid_targets_mean": 4645.8,
|
||
|
|
"valid_targets_min": 726
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.67741935483871,
|
||
|
|
"grad_norm": 0.3070786462946522,
|
||
|
|
"learning_rate": 2.701486272302534e-07,
|
||
|
|
"loss": 0.1424,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06487414985895157,
|
||
|
|
"step": 1315,
|
||
|
|
"valid_targets_mean": 5223.2,
|
||
|
|
"valid_targets_min": 545
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.702886247877759,
|
||
|
|
"grad_norm": 0.18439549160916988,
|
||
|
|
"learning_rate": 2.302626879840353e-07,
|
||
|
|
"loss": 0.1512,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04168329015374184,
|
||
|
|
"step": 1320,
|
||
|
|
"valid_targets_mean": 5075.8,
|
||
|
|
"valid_targets_min": 380
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.728353140916808,
|
||
|
|
"grad_norm": 0.16908764146487906,
|
||
|
|
"learning_rate": 1.9354406398535363e-07,
|
||
|
|
"loss": 0.1408,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04210246726870537,
|
||
|
|
"step": 1325,
|
||
|
|
"valid_targets_mean": 5584.5,
|
||
|
|
"valid_targets_min": 658
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.753820033955858,
|
||
|
|
"grad_norm": 0.17630235972167382,
|
||
|
|
"learning_rate": 1.599986379354257e-07,
|
||
|
|
"loss": 0.1396,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.042337290942668915,
|
||
|
|
"step": 1330,
|
||
|
|
"valid_targets_mean": 5165.1,
|
||
|
|
"valid_targets_min": 641
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.7792869269949065,
|
||
|
|
"grad_norm": 0.20555657737356492,
|
||
|
|
"learning_rate": 1.29631784156512e-07,
|
||
|
|
"loss": 0.147,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.051874805241823196,
|
||
|
|
"step": 1335,
|
||
|
|
"valid_targets_mean": 5075.5,
|
||
|
|
"valid_targets_min": 680
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.804753820033956,
|
||
|
|
"grad_norm": 0.2322688373131418,
|
||
|
|
"learning_rate": 1.0244836773091182e-07,
|
||
|
|
"loss": 0.1466,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0678434669971466,
|
||
|
|
"step": 1340,
|
||
|
|
"valid_targets_mean": 4481.3,
|
||
|
|
"valid_targets_min": 365
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.830220713073005,
|
||
|
|
"grad_norm": 0.2065623953027286,
|
||
|
|
"learning_rate": 7.845274372151767e-08,
|
||
|
|
"loss": 0.149,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05911225453019142,
|
||
|
|
"step": 1345,
|
||
|
|
"valid_targets_mean": 6274.2,
|
||
|
|
"valid_targets_min": 838
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.855687606112054,
|
||
|
|
"grad_norm": 0.19220426007499147,
|
||
|
|
"learning_rate": 5.7648756474084636e-08,
|
||
|
|
"loss": 0.1505,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.04873078316450119,
|
||
|
|
"step": 1350,
|
||
|
|
"valid_targets_mean": 3809.2,
|
||
|
|
"valid_targets_min": 570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.881154499151103,
|
||
|
|
"grad_norm": 0.21298725476050626,
|
||
|
|
"learning_rate": 4.003973900133851e-08,
|
||
|
|
"loss": 0.1499,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05683305114507675,
|
||
|
|
"step": 1355,
|
||
|
|
"valid_targets_mean": 5734.7,
|
||
|
|
"valid_targets_min": 465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.906621392190153,
|
||
|
|
"grad_norm": 0.187496545143332,
|
||
|
|
"learning_rate": 2.5628512448987453e-08,
|
||
|
|
"loss": 0.1478,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0421915203332901,
|
||
|
|
"step": 1360,
|
||
|
|
"valid_targets_mean": 5190.7,
|
||
|
|
"valid_targets_min": 537
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.932088285229202,
|
||
|
|
"grad_norm": 0.16945065292116984,
|
||
|
|
"learning_rate": 1.4417385643741289e-08,
|
||
|
|
"loss": 0.1467,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05242417752742767,
|
||
|
|
"step": 1365,
|
||
|
|
"valid_targets_mean": 6694.6,
|
||
|
|
"valid_targets_min": 394
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.957555178268251,
|
||
|
|
"grad_norm": 0.1868609584998114,
|
||
|
|
"learning_rate": 6.408154723420712e-09,
|
||
|
|
"loss": 0.1474,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06806820631027222,
|
||
|
|
"step": 1370,
|
||
|
|
"valid_targets_mean": 6651.1,
|
||
|
|
"valid_targets_min": 656
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.983022071307301,
|
||
|
|
"grad_norm": 0.19090476524097702,
|
||
|
|
"learning_rate": 1.6021028491941538e-09,
|
||
|
|
"loss": 0.145,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.05688754841685295,
|
||
|
|
"step": 1375,
|
||
|
|
"valid_targets_mean": 4995.7,
|
||
|
|
"valid_targets_min": 544
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 5,
|
||
|
|
"max_steps": 1379,
|
||
|
|
"num_input_tokens_seen": 0,
|
||
|
|
"num_train_epochs": 7,
|
||
|
|
"save_steps": 300,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": true,
|
||
|
|
"should_training_stop": true
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 5.507594515378078e+18,
|
||
|
|
"train_batch_size": 1,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|