9145 lines
254 KiB
JSON
9145 lines
254 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4137,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.008460236886632826,
|
|
"grad_norm": 13.550565862560147,
|
|
"learning_rate": 3.8647342995169085e-07,
|
|
"loss": 0.5928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5940208435058594,
|
|
"step": 5,
|
|
"valid_targets_mean": 5987.2,
|
|
"valid_targets_min": 2561
|
|
},
|
|
{
|
|
"epoch": 0.01692047377326565,
|
|
"grad_norm": 13.271805758299354,
|
|
"learning_rate": 8.695652173913044e-07,
|
|
"loss": 0.5819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5992782115936279,
|
|
"step": 10,
|
|
"valid_targets_mean": 5222.0,
|
|
"valid_targets_min": 1916
|
|
},
|
|
{
|
|
"epoch": 0.025380710659898477,
|
|
"grad_norm": 11.723706949288397,
|
|
"learning_rate": 1.3526570048309178e-06,
|
|
"loss": 0.5925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5386878252029419,
|
|
"step": 15,
|
|
"valid_targets_mean": 4792.3,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 0.0338409475465313,
|
|
"grad_norm": 9.366396904837638,
|
|
"learning_rate": 1.8357487922705318e-06,
|
|
"loss": 0.5436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5105949640274048,
|
|
"step": 20,
|
|
"valid_targets_mean": 5064.2,
|
|
"valid_targets_min": 322
|
|
},
|
|
{
|
|
"epoch": 0.04230118443316413,
|
|
"grad_norm": 6.491763750747841,
|
|
"learning_rate": 2.3188405797101453e-06,
|
|
"loss": 0.5333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4740247130393982,
|
|
"step": 25,
|
|
"valid_targets_mean": 4292.6,
|
|
"valid_targets_min": 394
|
|
},
|
|
{
|
|
"epoch": 0.050761421319796954,
|
|
"grad_norm": 3.5702984393974617,
|
|
"learning_rate": 2.801932367149759e-06,
|
|
"loss": 0.4545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4139009416103363,
|
|
"step": 30,
|
|
"valid_targets_mean": 5059.7,
|
|
"valid_targets_min": 362
|
|
},
|
|
{
|
|
"epoch": 0.05922165820642978,
|
|
"grad_norm": 2.276822707641667,
|
|
"learning_rate": 3.2850241545893724e-06,
|
|
"loss": 0.4466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4260575771331787,
|
|
"step": 35,
|
|
"valid_targets_mean": 5339.8,
|
|
"valid_targets_min": 1392
|
|
},
|
|
{
|
|
"epoch": 0.0676818950930626,
|
|
"grad_norm": 1.2455650373052085,
|
|
"learning_rate": 3.768115942028986e-06,
|
|
"loss": 0.3783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3827280104160309,
|
|
"step": 40,
|
|
"valid_targets_mean": 5314.4,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 0.07614213197969544,
|
|
"grad_norm": 1.0556970052613233,
|
|
"learning_rate": 4.251207729468599e-06,
|
|
"loss": 0.3677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3635188937187195,
|
|
"step": 45,
|
|
"valid_targets_mean": 4748.6,
|
|
"valid_targets_min": 358
|
|
},
|
|
{
|
|
"epoch": 0.08460236886632826,
|
|
"grad_norm": 0.9821921297357231,
|
|
"learning_rate": 4.7342995169082125e-06,
|
|
"loss": 0.3816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3691825270652771,
|
|
"step": 50,
|
|
"valid_targets_mean": 4049.0,
|
|
"valid_targets_min": 362
|
|
},
|
|
{
|
|
"epoch": 0.09306260575296109,
|
|
"grad_norm": 0.7860553663863065,
|
|
"learning_rate": 5.2173913043478265e-06,
|
|
"loss": 0.3635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34921014308929443,
|
|
"step": 55,
|
|
"valid_targets_mean": 5323.3,
|
|
"valid_targets_min": 1863
|
|
},
|
|
{
|
|
"epoch": 0.10152284263959391,
|
|
"grad_norm": 0.6315893309077825,
|
|
"learning_rate": 5.70048309178744e-06,
|
|
"loss": 0.3334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3394542336463928,
|
|
"step": 60,
|
|
"valid_targets_mean": 5377.1,
|
|
"valid_targets_min": 2299
|
|
},
|
|
{
|
|
"epoch": 0.10998307952622674,
|
|
"grad_norm": 0.581176866983876,
|
|
"learning_rate": 6.1835748792270535e-06,
|
|
"loss": 0.3398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3163953125476837,
|
|
"step": 65,
|
|
"valid_targets_mean": 4908.8,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 0.11844331641285956,
|
|
"grad_norm": 0.5831306663278869,
|
|
"learning_rate": 6.666666666666667e-06,
|
|
"loss": 0.3283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31604528427124023,
|
|
"step": 70,
|
|
"valid_targets_mean": 5108.1,
|
|
"valid_targets_min": 1822
|
|
},
|
|
{
|
|
"epoch": 0.12690355329949238,
|
|
"grad_norm": 0.44812791974279287,
|
|
"learning_rate": 7.149758454106281e-06,
|
|
"loss": 0.3167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2836999297142029,
|
|
"step": 75,
|
|
"valid_targets_mean": 5606.1,
|
|
"valid_targets_min": 2358
|
|
},
|
|
{
|
|
"epoch": 0.1353637901861252,
|
|
"grad_norm": 0.5485412692172336,
|
|
"learning_rate": 7.632850241545895e-06,
|
|
"loss": 0.3068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3379978537559509,
|
|
"step": 80,
|
|
"valid_targets_mean": 4742.0,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 0.14382402707275804,
|
|
"grad_norm": 0.5061914932876679,
|
|
"learning_rate": 8.115942028985508e-06,
|
|
"loss": 0.3133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32806557416915894,
|
|
"step": 85,
|
|
"valid_targets_mean": 5343.8,
|
|
"valid_targets_min": 338
|
|
},
|
|
{
|
|
"epoch": 0.15228426395939088,
|
|
"grad_norm": 0.5141038306197633,
|
|
"learning_rate": 8.599033816425122e-06,
|
|
"loss": 0.3194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3256992697715759,
|
|
"step": 90,
|
|
"valid_targets_mean": 4635.7,
|
|
"valid_targets_min": 941
|
|
},
|
|
{
|
|
"epoch": 0.16074450084602368,
|
|
"grad_norm": 0.49480466140928564,
|
|
"learning_rate": 9.082125603864736e-06,
|
|
"loss": 0.3155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31382113695144653,
|
|
"step": 95,
|
|
"valid_targets_mean": 5502.8,
|
|
"valid_targets_min": 878
|
|
},
|
|
{
|
|
"epoch": 0.1692047377326565,
|
|
"grad_norm": 0.6073688242099473,
|
|
"learning_rate": 9.565217391304349e-06,
|
|
"loss": 0.3194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3620792627334595,
|
|
"step": 100,
|
|
"valid_targets_mean": 4349.2,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 0.17766497461928935,
|
|
"grad_norm": 0.545123801696058,
|
|
"learning_rate": 1.0048309178743962e-05,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3439517617225647,
|
|
"step": 105,
|
|
"valid_targets_mean": 4780.7,
|
|
"valid_targets_min": 356
|
|
},
|
|
{
|
|
"epoch": 0.18612521150592218,
|
|
"grad_norm": 0.4940064330602346,
|
|
"learning_rate": 1.0531400966183577e-05,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2904033064842224,
|
|
"step": 110,
|
|
"valid_targets_mean": 4414.6,
|
|
"valid_targets_min": 413
|
|
},
|
|
{
|
|
"epoch": 0.19458544839255498,
|
|
"grad_norm": 0.5189006564330562,
|
|
"learning_rate": 1.101449275362319e-05,
|
|
"loss": 0.2802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25093141198158264,
|
|
"step": 115,
|
|
"valid_targets_mean": 4424.4,
|
|
"valid_targets_min": 372
|
|
},
|
|
{
|
|
"epoch": 0.20304568527918782,
|
|
"grad_norm": 0.554838817657106,
|
|
"learning_rate": 1.1497584541062803e-05,
|
|
"loss": 0.2762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2708420753479004,
|
|
"step": 120,
|
|
"valid_targets_mean": 4469.4,
|
|
"valid_targets_min": 354
|
|
},
|
|
{
|
|
"epoch": 0.21150592216582065,
|
|
"grad_norm": 0.4786506194853003,
|
|
"learning_rate": 1.1980676328502416e-05,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2803344130516052,
|
|
"step": 125,
|
|
"valid_targets_mean": 6088.1,
|
|
"valid_targets_min": 2952
|
|
},
|
|
{
|
|
"epoch": 0.21996615905245348,
|
|
"grad_norm": 0.5137957361473627,
|
|
"learning_rate": 1.2463768115942029e-05,
|
|
"loss": 0.2868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31358495354652405,
|
|
"step": 130,
|
|
"valid_targets_mean": 5234.2,
|
|
"valid_targets_min": 2147
|
|
},
|
|
{
|
|
"epoch": 0.22842639593908629,
|
|
"grad_norm": 0.4534859058219339,
|
|
"learning_rate": 1.2946859903381644e-05,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26073989272117615,
|
|
"step": 135,
|
|
"valid_targets_mean": 5635.8,
|
|
"valid_targets_min": 1042
|
|
},
|
|
{
|
|
"epoch": 0.23688663282571912,
|
|
"grad_norm": 0.48073621655414567,
|
|
"learning_rate": 1.3429951690821257e-05,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24234168231487274,
|
|
"step": 140,
|
|
"valid_targets_mean": 5001.4,
|
|
"valid_targets_min": 1370
|
|
},
|
|
{
|
|
"epoch": 0.24534686971235195,
|
|
"grad_norm": 0.47032595853528714,
|
|
"learning_rate": 1.391304347826087e-05,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26762863993644714,
|
|
"step": 145,
|
|
"valid_targets_mean": 5087.2,
|
|
"valid_targets_min": 525
|
|
},
|
|
{
|
|
"epoch": 0.25380710659898476,
|
|
"grad_norm": 0.5512442137462736,
|
|
"learning_rate": 1.4396135265700483e-05,
|
|
"loss": 0.2726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28231728076934814,
|
|
"step": 150,
|
|
"valid_targets_mean": 3779.8,
|
|
"valid_targets_min": 398
|
|
},
|
|
{
|
|
"epoch": 0.2622673434856176,
|
|
"grad_norm": 0.5103795542409397,
|
|
"learning_rate": 1.4879227053140098e-05,
|
|
"loss": 0.25,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2566998600959778,
|
|
"step": 155,
|
|
"valid_targets_mean": 5426.4,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 0.2707275803722504,
|
|
"grad_norm": 0.6440162443013583,
|
|
"learning_rate": 1.536231884057971e-05,
|
|
"loss": 0.2695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27988994121551514,
|
|
"step": 160,
|
|
"valid_targets_mean": 5427.8,
|
|
"valid_targets_min": 404
|
|
},
|
|
{
|
|
"epoch": 0.27918781725888325,
|
|
"grad_norm": 0.44193890375183603,
|
|
"learning_rate": 1.5845410628019324e-05,
|
|
"loss": 0.2523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2581903040409088,
|
|
"step": 165,
|
|
"valid_targets_mean": 5781.6,
|
|
"valid_targets_min": 1323
|
|
},
|
|
{
|
|
"epoch": 0.2876480541455161,
|
|
"grad_norm": 0.5000816103771775,
|
|
"learning_rate": 1.6328502415458937e-05,
|
|
"loss": 0.2706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28430601954460144,
|
|
"step": 170,
|
|
"valid_targets_mean": 5190.1,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 0.2961082910321489,
|
|
"grad_norm": 0.5549818857931835,
|
|
"learning_rate": 1.681159420289855e-05,
|
|
"loss": 0.254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27324509620666504,
|
|
"step": 175,
|
|
"valid_targets_mean": 4888.2,
|
|
"valid_targets_min": 406
|
|
},
|
|
{
|
|
"epoch": 0.30456852791878175,
|
|
"grad_norm": 0.5700405090936757,
|
|
"learning_rate": 1.7294685990338163e-05,
|
|
"loss": 0.2684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26071736216545105,
|
|
"step": 180,
|
|
"valid_targets_mean": 4880.3,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 0.3130287648054145,
|
|
"grad_norm": 0.5308165736015592,
|
|
"learning_rate": 1.7777777777777777e-05,
|
|
"loss": 0.2753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2689594626426697,
|
|
"step": 185,
|
|
"valid_targets_mean": 4553.6,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 0.32148900169204736,
|
|
"grad_norm": 0.5324602123317155,
|
|
"learning_rate": 1.8260869565217393e-05,
|
|
"loss": 0.2672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2672916054725647,
|
|
"step": 190,
|
|
"valid_targets_mean": 5158.1,
|
|
"valid_targets_min": 2363
|
|
},
|
|
{
|
|
"epoch": 0.3299492385786802,
|
|
"grad_norm": 0.5268444953814607,
|
|
"learning_rate": 1.8743961352657006e-05,
|
|
"loss": 0.2569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24527263641357422,
|
|
"step": 195,
|
|
"valid_targets_mean": 4768.2,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 0.338409475465313,
|
|
"grad_norm": 0.5599652891514332,
|
|
"learning_rate": 1.922705314009662e-05,
|
|
"loss": 0.2501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23499363660812378,
|
|
"step": 200,
|
|
"valid_targets_mean": 3776.0,
|
|
"valid_targets_min": 378
|
|
},
|
|
{
|
|
"epoch": 0.34686971235194586,
|
|
"grad_norm": 0.5824144492975196,
|
|
"learning_rate": 1.9710144927536236e-05,
|
|
"loss": 0.2572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30033043026924133,
|
|
"step": 205,
|
|
"valid_targets_mean": 5187.8,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 0.3553299492385787,
|
|
"grad_norm": 0.4939809531722893,
|
|
"learning_rate": 2.019323671497585e-05,
|
|
"loss": 0.2539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24299289286136627,
|
|
"step": 210,
|
|
"valid_targets_mean": 4991.9,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 0.3637901861252115,
|
|
"grad_norm": 0.5557598595749614,
|
|
"learning_rate": 2.067632850241546e-05,
|
|
"loss": 0.261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24369274079799652,
|
|
"step": 215,
|
|
"valid_targets_mean": 5662.2,
|
|
"valid_targets_min": 2816
|
|
},
|
|
{
|
|
"epoch": 0.37225042301184436,
|
|
"grad_norm": 0.5000259677052363,
|
|
"learning_rate": 2.1159420289855075e-05,
|
|
"loss": 0.2479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25936776399612427,
|
|
"step": 220,
|
|
"valid_targets_mean": 5071.4,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 0.38071065989847713,
|
|
"grad_norm": 0.44476852551493634,
|
|
"learning_rate": 2.1642512077294685e-05,
|
|
"loss": 0.2454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23750360310077667,
|
|
"step": 225,
|
|
"valid_targets_mean": 5994.4,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 0.38917089678510997,
|
|
"grad_norm": 1.2304528541794182,
|
|
"learning_rate": 2.21256038647343e-05,
|
|
"loss": 0.251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24853405356407166,
|
|
"step": 230,
|
|
"valid_targets_mean": 4282.9,
|
|
"valid_targets_min": 698
|
|
},
|
|
{
|
|
"epoch": 0.3976311336717428,
|
|
"grad_norm": 0.6485654510574366,
|
|
"learning_rate": 2.2608695652173914e-05,
|
|
"loss": 0.2516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28122857213020325,
|
|
"step": 235,
|
|
"valid_targets_mean": 4597.8,
|
|
"valid_targets_min": 815
|
|
},
|
|
{
|
|
"epoch": 0.40609137055837563,
|
|
"grad_norm": 0.5284641612748222,
|
|
"learning_rate": 2.3091787439613528e-05,
|
|
"loss": 0.2389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2298058271408081,
|
|
"step": 240,
|
|
"valid_targets_mean": 4746.2,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 0.41455160744500846,
|
|
"grad_norm": 0.5032195140270087,
|
|
"learning_rate": 2.357487922705314e-05,
|
|
"loss": 0.2396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25134193897247314,
|
|
"step": 245,
|
|
"valid_targets_mean": 5387.8,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 0.4230118443316413,
|
|
"grad_norm": 0.4677165351727786,
|
|
"learning_rate": 2.4057971014492757e-05,
|
|
"loss": 0.2445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21930018067359924,
|
|
"step": 250,
|
|
"valid_targets_mean": 5013.1,
|
|
"valid_targets_min": 1454
|
|
},
|
|
{
|
|
"epoch": 0.43147208121827413,
|
|
"grad_norm": 0.45442025370906947,
|
|
"learning_rate": 2.4541062801932367e-05,
|
|
"loss": 0.2426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23426589369773865,
|
|
"step": 255,
|
|
"valid_targets_mean": 5348.7,
|
|
"valid_targets_min": 2940
|
|
},
|
|
{
|
|
"epoch": 0.43993231810490696,
|
|
"grad_norm": 0.48002457868332377,
|
|
"learning_rate": 2.5024154589371983e-05,
|
|
"loss": 0.2383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2222846895456314,
|
|
"step": 260,
|
|
"valid_targets_mean": 5255.3,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 0.44839255499153974,
|
|
"grad_norm": 0.5132497333788721,
|
|
"learning_rate": 2.5507246376811593e-05,
|
|
"loss": 0.23,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21403613686561584,
|
|
"step": 265,
|
|
"valid_targets_mean": 4435.9,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 0.45685279187817257,
|
|
"grad_norm": 0.5703747284762789,
|
|
"learning_rate": 2.599033816425121e-05,
|
|
"loss": 0.2475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27638232707977295,
|
|
"step": 270,
|
|
"valid_targets_mean": 4327.1,
|
|
"valid_targets_min": 326
|
|
},
|
|
{
|
|
"epoch": 0.4653130287648054,
|
|
"grad_norm": 0.5249077640080672,
|
|
"learning_rate": 2.6473429951690826e-05,
|
|
"loss": 0.2338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2443409115076065,
|
|
"step": 275,
|
|
"valid_targets_mean": 5012.4,
|
|
"valid_targets_min": 338
|
|
},
|
|
{
|
|
"epoch": 0.47377326565143824,
|
|
"grad_norm": 0.4310085062237469,
|
|
"learning_rate": 2.6956521739130436e-05,
|
|
"loss": 0.2373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21428534388542175,
|
|
"step": 280,
|
|
"valid_targets_mean": 5354.5,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 0.48223350253807107,
|
|
"grad_norm": 0.582036961354854,
|
|
"learning_rate": 2.7439613526570052e-05,
|
|
"loss": 0.256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25510644912719727,
|
|
"step": 285,
|
|
"valid_targets_mean": 4891.1,
|
|
"valid_targets_min": 485
|
|
},
|
|
{
|
|
"epoch": 0.4906937394247039,
|
|
"grad_norm": 0.5087579960951107,
|
|
"learning_rate": 2.7922705314009665e-05,
|
|
"loss": 0.2424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27939173579216003,
|
|
"step": 290,
|
|
"valid_targets_mean": 6010.2,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 0.49915397631133673,
|
|
"grad_norm": 0.542757863891318,
|
|
"learning_rate": 2.840579710144928e-05,
|
|
"loss": 0.2296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2463659644126892,
|
|
"step": 295,
|
|
"valid_targets_mean": 4579.2,
|
|
"valid_targets_min": 2333
|
|
},
|
|
{
|
|
"epoch": 0.5076142131979695,
|
|
"grad_norm": 0.5328583284641185,
|
|
"learning_rate": 2.888888888888889e-05,
|
|
"loss": 0.2314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23743540048599243,
|
|
"step": 300,
|
|
"valid_targets_mean": 4185.3,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 0.5160744500846024,
|
|
"grad_norm": 0.5475841443260192,
|
|
"learning_rate": 2.9371980676328508e-05,
|
|
"loss": 0.2356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22340849041938782,
|
|
"step": 305,
|
|
"valid_targets_mean": 5538.2,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 0.5245346869712352,
|
|
"grad_norm": 0.6111602260466787,
|
|
"learning_rate": 2.9855072463768118e-05,
|
|
"loss": 0.2413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2502322793006897,
|
|
"step": 310,
|
|
"valid_targets_mean": 4369.8,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 0.5329949238578681,
|
|
"grad_norm": 0.4780513198121726,
|
|
"learning_rate": 3.0338164251207734e-05,
|
|
"loss": 0.2159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21728099882602692,
|
|
"step": 315,
|
|
"valid_targets_mean": 4700.3,
|
|
"valid_targets_min": 2610
|
|
},
|
|
{
|
|
"epoch": 0.5414551607445008,
|
|
"grad_norm": 0.5166273238752809,
|
|
"learning_rate": 3.0821256038647344e-05,
|
|
"loss": 0.234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2029610276222229,
|
|
"step": 320,
|
|
"valid_targets_mean": 4345.2,
|
|
"valid_targets_min": 336
|
|
},
|
|
{
|
|
"epoch": 0.5499153976311336,
|
|
"grad_norm": 0.492895369845411,
|
|
"learning_rate": 3.130434782608696e-05,
|
|
"loss": 0.2211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21461325883865356,
|
|
"step": 325,
|
|
"valid_targets_mean": 5366.8,
|
|
"valid_targets_min": 1752
|
|
},
|
|
{
|
|
"epoch": 0.5583756345177665,
|
|
"grad_norm": 0.7972003333851228,
|
|
"learning_rate": 3.178743961352657e-05,
|
|
"loss": 0.2424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24187514185905457,
|
|
"step": 330,
|
|
"valid_targets_mean": 6370.7,
|
|
"valid_targets_min": 1793
|
|
},
|
|
{
|
|
"epoch": 0.5668358714043993,
|
|
"grad_norm": 0.5318420731993138,
|
|
"learning_rate": 3.227053140096619e-05,
|
|
"loss": 0.2388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24010798335075378,
|
|
"step": 335,
|
|
"valid_targets_mean": 4513.4,
|
|
"valid_targets_min": 984
|
|
},
|
|
{
|
|
"epoch": 0.5752961082910322,
|
|
"grad_norm": 0.6350384571203126,
|
|
"learning_rate": 3.2753623188405796e-05,
|
|
"loss": 0.2314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2458498179912567,
|
|
"step": 340,
|
|
"valid_targets_mean": 5341.1,
|
|
"valid_targets_min": 2479
|
|
},
|
|
{
|
|
"epoch": 0.583756345177665,
|
|
"grad_norm": 0.5370004027655637,
|
|
"learning_rate": 3.323671497584541e-05,
|
|
"loss": 0.2395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1987411081790924,
|
|
"step": 345,
|
|
"valid_targets_mean": 5804.2,
|
|
"valid_targets_min": 2209
|
|
},
|
|
{
|
|
"epoch": 0.5922165820642978,
|
|
"grad_norm": 0.5148831345337035,
|
|
"learning_rate": 3.371980676328502e-05,
|
|
"loss": 0.2348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24516241252422333,
|
|
"step": 350,
|
|
"valid_targets_mean": 5083.2,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 0.6006768189509306,
|
|
"grad_norm": 0.5616560369126067,
|
|
"learning_rate": 3.420289855072464e-05,
|
|
"loss": 0.2288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25557953119277954,
|
|
"step": 355,
|
|
"valid_targets_mean": 4906.8,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 0.6091370558375635,
|
|
"grad_norm": 0.46740010348180594,
|
|
"learning_rate": 3.4685990338164256e-05,
|
|
"loss": 0.2451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22764000296592712,
|
|
"step": 360,
|
|
"valid_targets_mean": 5779.9,
|
|
"valid_targets_min": 948
|
|
},
|
|
{
|
|
"epoch": 0.6175972927241963,
|
|
"grad_norm": 0.5052255194932961,
|
|
"learning_rate": 3.5169082125603865e-05,
|
|
"loss": 0.2449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2353506088256836,
|
|
"step": 365,
|
|
"valid_targets_mean": 5582.5,
|
|
"valid_targets_min": 2421
|
|
},
|
|
{
|
|
"epoch": 0.626057529610829,
|
|
"grad_norm": 0.4473982689564181,
|
|
"learning_rate": 3.565217391304348e-05,
|
|
"loss": 0.2384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21068497002124786,
|
|
"step": 370,
|
|
"valid_targets_mean": 5522.8,
|
|
"valid_targets_min": 2207
|
|
},
|
|
{
|
|
"epoch": 0.6345177664974619,
|
|
"grad_norm": 0.46783750468093876,
|
|
"learning_rate": 3.61352657004831e-05,
|
|
"loss": 0.2218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.220205157995224,
|
|
"step": 375,
|
|
"valid_targets_mean": 5413.2,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 0.6429780033840947,
|
|
"grad_norm": 0.5055865004661576,
|
|
"learning_rate": 3.661835748792271e-05,
|
|
"loss": 0.2269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2254999727010727,
|
|
"step": 380,
|
|
"valid_targets_mean": 4778.7,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 0.6514382402707276,
|
|
"grad_norm": 0.48862386851887035,
|
|
"learning_rate": 3.7101449275362325e-05,
|
|
"loss": 0.2321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27320221066474915,
|
|
"step": 385,
|
|
"valid_targets_mean": 5421.1,
|
|
"valid_targets_min": 1490
|
|
},
|
|
{
|
|
"epoch": 0.6598984771573604,
|
|
"grad_norm": 0.5169343511868016,
|
|
"learning_rate": 3.7584541062801934e-05,
|
|
"loss": 0.2426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24001258611679077,
|
|
"step": 390,
|
|
"valid_targets_mean": 4846.2,
|
|
"valid_targets_min": 631
|
|
},
|
|
{
|
|
"epoch": 0.6683587140439933,
|
|
"grad_norm": 0.4665422242931767,
|
|
"learning_rate": 3.806763285024155e-05,
|
|
"loss": 0.231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23488229513168335,
|
|
"step": 395,
|
|
"valid_targets_mean": 5732.5,
|
|
"valid_targets_min": 1913
|
|
},
|
|
{
|
|
"epoch": 0.676818950930626,
|
|
"grad_norm": 0.6082000681675154,
|
|
"learning_rate": 3.855072463768116e-05,
|
|
"loss": 0.2349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2453429251909256,
|
|
"step": 400,
|
|
"valid_targets_mean": 3878.6,
|
|
"valid_targets_min": 367
|
|
},
|
|
{
|
|
"epoch": 0.6852791878172588,
|
|
"grad_norm": 0.4719845711895155,
|
|
"learning_rate": 3.903381642512078e-05,
|
|
"loss": 0.2164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21182672679424286,
|
|
"step": 405,
|
|
"valid_targets_mean": 5344.9,
|
|
"valid_targets_min": 2501
|
|
},
|
|
{
|
|
"epoch": 0.6937394247038917,
|
|
"grad_norm": 0.5343996608611009,
|
|
"learning_rate": 3.951690821256039e-05,
|
|
"loss": 0.2347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2502586245536804,
|
|
"step": 410,
|
|
"valid_targets_mean": 4388.1,
|
|
"valid_targets_min": 287
|
|
},
|
|
{
|
|
"epoch": 0.7021996615905245,
|
|
"grad_norm": 0.4980400684307769,
|
|
"learning_rate": 4e-05,
|
|
"loss": 0.2286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21835613250732422,
|
|
"step": 415,
|
|
"valid_targets_mean": 5149.6,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 0.7106598984771574,
|
|
"grad_norm": 0.5064179186615263,
|
|
"learning_rate": 3.999982198645973e-05,
|
|
"loss": 0.2225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22057250142097473,
|
|
"step": 420,
|
|
"valid_targets_mean": 4204.8,
|
|
"valid_targets_min": 317
|
|
},
|
|
{
|
|
"epoch": 0.7191201353637902,
|
|
"grad_norm": 0.4605634806478446,
|
|
"learning_rate": 3.9999287949007806e-05,
|
|
"loss": 0.2193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1910340040922165,
|
|
"step": 425,
|
|
"valid_targets_mean": 5194.0,
|
|
"valid_targets_min": 1188
|
|
},
|
|
{
|
|
"epoch": 0.727580372250423,
|
|
"grad_norm": 0.5639144225300708,
|
|
"learning_rate": 3.999839789715081e-05,
|
|
"loss": 0.2141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21337902545928955,
|
|
"step": 430,
|
|
"valid_targets_mean": 5190.6,
|
|
"valid_targets_min": 439
|
|
},
|
|
{
|
|
"epoch": 0.7360406091370558,
|
|
"grad_norm": 0.585265419764489,
|
|
"learning_rate": 3.999715184673287e-05,
|
|
"loss": 0.2367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27958816289901733,
|
|
"step": 435,
|
|
"valid_targets_mean": 4706.2,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 0.7445008460236887,
|
|
"grad_norm": 0.5469927063279804,
|
|
"learning_rate": 3.999554981993538e-05,
|
|
"loss": 0.2192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2254151850938797,
|
|
"step": 440,
|
|
"valid_targets_mean": 5697.1,
|
|
"valid_targets_min": 2532
|
|
},
|
|
{
|
|
"epoch": 0.7529610829103215,
|
|
"grad_norm": 0.6853888063343746,
|
|
"learning_rate": 3.9993591845276585e-05,
|
|
"loss": 0.2264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2301885038614273,
|
|
"step": 445,
|
|
"valid_targets_mean": 4598.3,
|
|
"valid_targets_min": 1072
|
|
},
|
|
{
|
|
"epoch": 0.7614213197969543,
|
|
"grad_norm": 0.5175880428745917,
|
|
"learning_rate": 3.9991277957611074e-05,
|
|
"loss": 0.2267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20948538184165955,
|
|
"step": 450,
|
|
"valid_targets_mean": 4663.6,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 0.7698815566835872,
|
|
"grad_norm": 0.5258586413233737,
|
|
"learning_rate": 3.9988608198129186e-05,
|
|
"loss": 0.2108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21092236042022705,
|
|
"step": 455,
|
|
"valid_targets_mean": 5536.7,
|
|
"valid_targets_min": 1537
|
|
},
|
|
{
|
|
"epoch": 0.7783417935702199,
|
|
"grad_norm": 0.4581221581900407,
|
|
"learning_rate": 3.9985582614356266e-05,
|
|
"loss": 0.2201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1957901120185852,
|
|
"step": 460,
|
|
"valid_targets_mean": 5439.0,
|
|
"valid_targets_min": 1473
|
|
},
|
|
{
|
|
"epoch": 0.7868020304568528,
|
|
"grad_norm": 0.5381480132849752,
|
|
"learning_rate": 3.998220126015178e-05,
|
|
"loss": 0.2177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2263282984495163,
|
|
"step": 465,
|
|
"valid_targets_mean": 5475.1,
|
|
"valid_targets_min": 2266
|
|
},
|
|
{
|
|
"epoch": 0.7952622673434856,
|
|
"grad_norm": 0.5330032055118807,
|
|
"learning_rate": 3.997846419570843e-05,
|
|
"loss": 0.2105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.217029869556427,
|
|
"step": 470,
|
|
"valid_targets_mean": 5447.3,
|
|
"valid_targets_min": 1781
|
|
},
|
|
{
|
|
"epoch": 0.8037225042301185,
|
|
"grad_norm": 0.5493182898948761,
|
|
"learning_rate": 3.9974371487551015e-05,
|
|
"loss": 0.2202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22503063082695007,
|
|
"step": 475,
|
|
"valid_targets_mean": 4483.1,
|
|
"valid_targets_min": 222
|
|
},
|
|
{
|
|
"epoch": 0.8121827411167513,
|
|
"grad_norm": 0.5159430844590853,
|
|
"learning_rate": 3.9969923208535284e-05,
|
|
"loss": 0.2223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2074272334575653,
|
|
"step": 480,
|
|
"valid_targets_mean": 5122.2,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 0.8206429780033841,
|
|
"grad_norm": 0.4459008299478174,
|
|
"learning_rate": 3.9965119437846624e-05,
|
|
"loss": 0.2272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23702971637248993,
|
|
"step": 485,
|
|
"valid_targets_mean": 5876.9,
|
|
"valid_targets_min": 3038
|
|
},
|
|
{
|
|
"epoch": 0.8291032148900169,
|
|
"grad_norm": 0.5848983874191999,
|
|
"learning_rate": 3.995996026099866e-05,
|
|
"loss": 0.2213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23720139265060425,
|
|
"step": 490,
|
|
"valid_targets_mean": 4322.2,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 0.8375634517766497,
|
|
"grad_norm": 0.49905827233873506,
|
|
"learning_rate": 3.995444576983173e-05,
|
|
"loss": 0.2257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2422059029340744,
|
|
"step": 495,
|
|
"valid_targets_mean": 4848.7,
|
|
"valid_targets_min": 339
|
|
},
|
|
{
|
|
"epoch": 0.8460236886632826,
|
|
"grad_norm": 0.4942705016287664,
|
|
"learning_rate": 3.994857606251124e-05,
|
|
"loss": 0.2063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1887921690940857,
|
|
"step": 500,
|
|
"valid_targets_mean": 4902.5,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 0.8544839255499154,
|
|
"grad_norm": 0.5101993208016913,
|
|
"learning_rate": 3.994235124352592e-05,
|
|
"loss": 0.2114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22199124097824097,
|
|
"step": 505,
|
|
"valid_targets_mean": 4131.5,
|
|
"valid_targets_min": 787
|
|
},
|
|
{
|
|
"epoch": 0.8629441624365483,
|
|
"grad_norm": 0.5257126503418817,
|
|
"learning_rate": 3.993577142368598e-05,
|
|
"loss": 0.2177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22355453670024872,
|
|
"step": 510,
|
|
"valid_targets_mean": 5359.0,
|
|
"valid_targets_min": 2714
|
|
},
|
|
{
|
|
"epoch": 0.871404399323181,
|
|
"grad_norm": 0.49294790290997303,
|
|
"learning_rate": 3.9928836720121135e-05,
|
|
"loss": 0.2277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24246609210968018,
|
|
"step": 515,
|
|
"valid_targets_mean": 5644.2,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 0.8798646362098139,
|
|
"grad_norm": 0.49884718014658797,
|
|
"learning_rate": 3.992154725627849e-05,
|
|
"loss": 0.2145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23286427557468414,
|
|
"step": 520,
|
|
"valid_targets_mean": 5593.0,
|
|
"valid_targets_min": 833
|
|
},
|
|
{
|
|
"epoch": 0.8883248730964467,
|
|
"grad_norm": 0.5063895951381985,
|
|
"learning_rate": 3.991390316192036e-05,
|
|
"loss": 0.2049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22068354487419128,
|
|
"step": 525,
|
|
"valid_targets_mean": 4883.7,
|
|
"valid_targets_min": 1260
|
|
},
|
|
{
|
|
"epoch": 0.8967851099830795,
|
|
"grad_norm": 0.6511023778632143,
|
|
"learning_rate": 3.990590457312199e-05,
|
|
"loss": 0.207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2216435670852661,
|
|
"step": 530,
|
|
"valid_targets_mean": 4864.8,
|
|
"valid_targets_min": 1196
|
|
},
|
|
{
|
|
"epoch": 0.9052453468697124,
|
|
"grad_norm": 0.45240463287828214,
|
|
"learning_rate": 3.9897551632269094e-05,
|
|
"loss": 0.2042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20242667198181152,
|
|
"step": 535,
|
|
"valid_targets_mean": 5157.3,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 0.9137055837563451,
|
|
"grad_norm": 0.7042243248939775,
|
|
"learning_rate": 3.988884448805531e-05,
|
|
"loss": 0.2209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20520509779453278,
|
|
"step": 540,
|
|
"valid_targets_mean": 4534.5,
|
|
"valid_targets_min": 2491
|
|
},
|
|
{
|
|
"epoch": 0.922165820642978,
|
|
"grad_norm": 0.4876983981590496,
|
|
"learning_rate": 3.9879783295479614e-05,
|
|
"loss": 0.229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22159594297409058,
|
|
"step": 545,
|
|
"valid_targets_mean": 4654.6,
|
|
"valid_targets_min": 770
|
|
},
|
|
{
|
|
"epoch": 0.9306260575296108,
|
|
"grad_norm": 0.46422867641792875,
|
|
"learning_rate": 3.987036821584349e-05,
|
|
"loss": 0.2005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19952347874641418,
|
|
"step": 550,
|
|
"valid_targets_mean": 5041.4,
|
|
"valid_targets_min": 1807
|
|
},
|
|
{
|
|
"epoch": 0.9390862944162437,
|
|
"grad_norm": 0.6573292552051783,
|
|
"learning_rate": 3.986059941674811e-05,
|
|
"loss": 0.2148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2387896329164505,
|
|
"step": 555,
|
|
"valid_targets_mean": 4162.5,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 0.9475465313028765,
|
|
"grad_norm": 0.5154514129465185,
|
|
"learning_rate": 3.985047707209133e-05,
|
|
"loss": 0.2199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19981658458709717,
|
|
"step": 560,
|
|
"valid_targets_mean": 4430.3,
|
|
"valid_targets_min": 1897
|
|
},
|
|
{
|
|
"epoch": 0.9560067681895094,
|
|
"grad_norm": 0.4557363913410953,
|
|
"learning_rate": 3.984000136206458e-05,
|
|
"loss": 0.2087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1911255121231079,
|
|
"step": 565,
|
|
"valid_targets_mean": 5058.5,
|
|
"valid_targets_min": 337
|
|
},
|
|
{
|
|
"epoch": 0.9644670050761421,
|
|
"grad_norm": 0.5121021631902367,
|
|
"learning_rate": 3.982917247314969e-05,
|
|
"loss": 0.2002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20491699874401093,
|
|
"step": 570,
|
|
"valid_targets_mean": 4937.4,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 0.9729272419627749,
|
|
"grad_norm": 0.5077336225301499,
|
|
"learning_rate": 3.981799059811554e-05,
|
|
"loss": 0.2168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23445892333984375,
|
|
"step": 575,
|
|
"valid_targets_mean": 5470.0,
|
|
"valid_targets_min": 1509
|
|
},
|
|
{
|
|
"epoch": 0.9813874788494078,
|
|
"grad_norm": 0.4583823019751048,
|
|
"learning_rate": 3.980645593601465e-05,
|
|
"loss": 0.2167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22711579501628876,
|
|
"step": 580,
|
|
"valid_targets_mean": 5523.4,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 0.9898477157360406,
|
|
"grad_norm": 0.4817560134634566,
|
|
"learning_rate": 3.979456869217962e-05,
|
|
"loss": 0.2088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19622814655303955,
|
|
"step": 585,
|
|
"valid_targets_mean": 4752.9,
|
|
"valid_targets_min": 302
|
|
},
|
|
{
|
|
"epoch": 0.9983079526226735,
|
|
"grad_norm": 0.5037708561185498,
|
|
"learning_rate": 3.9782329078219494e-05,
|
|
"loss": 0.2158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20974299311637878,
|
|
"step": 590,
|
|
"valid_targets_mean": 4403.6,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 1.0067681895093064,
|
|
"grad_norm": 0.5427985005608751,
|
|
"learning_rate": 3.9769737312015964e-05,
|
|
"loss": 0.1966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18172559142112732,
|
|
"step": 595,
|
|
"valid_targets_mean": 4998.5,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 1.015228426395939,
|
|
"grad_norm": 0.48026048199004173,
|
|
"learning_rate": 3.9756793617719525e-05,
|
|
"loss": 0.1971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2016129493713379,
|
|
"step": 600,
|
|
"valid_targets_mean": 5866.0,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 1.023688663282572,
|
|
"grad_norm": 0.4916501302959123,
|
|
"learning_rate": 3.9743498225745466e-05,
|
|
"loss": 0.2027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18868499994277954,
|
|
"step": 605,
|
|
"valid_targets_mean": 4387.6,
|
|
"valid_targets_min": 413
|
|
},
|
|
{
|
|
"epoch": 1.0321489001692048,
|
|
"grad_norm": 0.5675574126464277,
|
|
"learning_rate": 3.972985137276975e-05,
|
|
"loss": 0.21,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24072161316871643,
|
|
"step": 610,
|
|
"valid_targets_mean": 4429.9,
|
|
"valid_targets_min": 447
|
|
},
|
|
{
|
|
"epoch": 1.0406091370558375,
|
|
"grad_norm": 0.49235686523160993,
|
|
"learning_rate": 3.971585330172485e-05,
|
|
"loss": 0.2057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18050576746463776,
|
|
"step": 615,
|
|
"valid_targets_mean": 4421.5,
|
|
"valid_targets_min": 1234
|
|
},
|
|
{
|
|
"epoch": 1.0490693739424704,
|
|
"grad_norm": 0.44599061760328046,
|
|
"learning_rate": 3.970150426179538e-05,
|
|
"loss": 0.198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2131604254245758,
|
|
"step": 620,
|
|
"valid_targets_mean": 5057.1,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 1.0575296108291032,
|
|
"grad_norm": 0.4700403253075101,
|
|
"learning_rate": 3.968680450841368e-05,
|
|
"loss": 0.2099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19388672709465027,
|
|
"step": 625,
|
|
"valid_targets_mean": 5343.1,
|
|
"valid_targets_min": 300
|
|
},
|
|
{
|
|
"epoch": 1.0659898477157361,
|
|
"grad_norm": 0.4415807515527962,
|
|
"learning_rate": 3.967175430325527e-05,
|
|
"loss": 0.1967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18718497455120087,
|
|
"step": 630,
|
|
"valid_targets_mean": 5551.1,
|
|
"valid_targets_min": 526
|
|
},
|
|
{
|
|
"epoch": 1.0744500846023688,
|
|
"grad_norm": 0.4779436757592297,
|
|
"learning_rate": 3.9656353914234176e-05,
|
|
"loss": 0.2085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2019282877445221,
|
|
"step": 635,
|
|
"valid_targets_mean": 5005.2,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 1.0829103214890017,
|
|
"grad_norm": 0.6328843212631523,
|
|
"learning_rate": 3.964060361549817e-05,
|
|
"loss": 0.2058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20527201890945435,
|
|
"step": 640,
|
|
"valid_targets_mean": 5494.0,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 1.0913705583756346,
|
|
"grad_norm": 0.5798337433170615,
|
|
"learning_rate": 3.96245036874239e-05,
|
|
"loss": 0.1941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19272944331169128,
|
|
"step": 645,
|
|
"valid_targets_mean": 4591.1,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 1.0998307952622675,
|
|
"grad_norm": 0.49858652034600226,
|
|
"learning_rate": 3.9608054416611887e-05,
|
|
"loss": 0.2067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19531401991844177,
|
|
"step": 650,
|
|
"valid_targets_mean": 4984.9,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 1.1082910321489001,
|
|
"grad_norm": 0.5375188284299802,
|
|
"learning_rate": 3.959125609588143e-05,
|
|
"loss": 0.1836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2005653977394104,
|
|
"step": 655,
|
|
"valid_targets_mean": 4730.1,
|
|
"valid_targets_min": 292
|
|
},
|
|
{
|
|
"epoch": 1.116751269035533,
|
|
"grad_norm": 0.5025344757352098,
|
|
"learning_rate": 3.957410902426537e-05,
|
|
"loss": 0.1994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20729684829711914,
|
|
"step": 660,
|
|
"valid_targets_mean": 5334.9,
|
|
"valid_targets_min": 2102
|
|
},
|
|
{
|
|
"epoch": 1.125211505922166,
|
|
"grad_norm": 0.5056217634793785,
|
|
"learning_rate": 3.955661350700481e-05,
|
|
"loss": 0.1992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20715957880020142,
|
|
"step": 665,
|
|
"valid_targets_mean": 5147.1,
|
|
"valid_targets_min": 2561
|
|
},
|
|
{
|
|
"epoch": 1.1336717428087986,
|
|
"grad_norm": 0.44406750868277767,
|
|
"learning_rate": 3.953876985554364e-05,
|
|
"loss": 0.2046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20916059613227844,
|
|
"step": 670,
|
|
"valid_targets_mean": 5216.8,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 1.1421319796954315,
|
|
"grad_norm": 0.5196080639271501,
|
|
"learning_rate": 3.952057838752302e-05,
|
|
"loss": 0.203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19599583745002747,
|
|
"step": 675,
|
|
"valid_targets_mean": 4013.1,
|
|
"valid_targets_min": 358
|
|
},
|
|
{
|
|
"epoch": 1.1505922165820643,
|
|
"grad_norm": 0.4349963753104149,
|
|
"learning_rate": 3.950203942677571e-05,
|
|
"loss": 0.1882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17536047101020813,
|
|
"step": 680,
|
|
"valid_targets_mean": 5491.8,
|
|
"valid_targets_min": 2428
|
|
},
|
|
{
|
|
"epoch": 1.1590524534686972,
|
|
"grad_norm": 0.4769127458021984,
|
|
"learning_rate": 3.9483153303320316e-05,
|
|
"loss": 0.1948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18676772713661194,
|
|
"step": 685,
|
|
"valid_targets_mean": 5066.1,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 1.16751269035533,
|
|
"grad_norm": 0.4575387320974584,
|
|
"learning_rate": 3.946392035335541e-05,
|
|
"loss": 0.2039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18085691332817078,
|
|
"step": 690,
|
|
"valid_targets_mean": 5194.2,
|
|
"valid_targets_min": 340
|
|
},
|
|
{
|
|
"epoch": 1.1759729272419628,
|
|
"grad_norm": 0.5254757414263358,
|
|
"learning_rate": 3.944434091925354e-05,
|
|
"loss": 0.191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20795777440071106,
|
|
"step": 695,
|
|
"valid_targets_mean": 5242.1,
|
|
"valid_targets_min": 1227
|
|
},
|
|
{
|
|
"epoch": 1.1844331641285957,
|
|
"grad_norm": 0.43948201571291107,
|
|
"learning_rate": 3.9424415349555145e-05,
|
|
"loss": 0.178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16768401861190796,
|
|
"step": 700,
|
|
"valid_targets_mean": 6096.1,
|
|
"valid_targets_min": 1978
|
|
},
|
|
{
|
|
"epoch": 1.1928934010152283,
|
|
"grad_norm": 0.4501371644324456,
|
|
"learning_rate": 3.940414399896234e-05,
|
|
"loss": 0.1989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1981137990951538,
|
|
"step": 705,
|
|
"valid_targets_mean": 5631.0,
|
|
"valid_targets_min": 3215
|
|
},
|
|
{
|
|
"epoch": 1.2013536379018612,
|
|
"grad_norm": 0.5799595870401942,
|
|
"learning_rate": 3.9383527228332626e-05,
|
|
"loss": 0.195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20237240195274353,
|
|
"step": 710,
|
|
"valid_targets_mean": 5094.8,
|
|
"valid_targets_min": 241
|
|
},
|
|
{
|
|
"epoch": 1.2098138747884941,
|
|
"grad_norm": 0.4351233658677141,
|
|
"learning_rate": 3.936256540467243e-05,
|
|
"loss": 0.1914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17191343009471893,
|
|
"step": 715,
|
|
"valid_targets_mean": 5250.9,
|
|
"valid_targets_min": 1369
|
|
},
|
|
{
|
|
"epoch": 1.218274111675127,
|
|
"grad_norm": 0.4602533063445863,
|
|
"learning_rate": 3.9341258901130584e-05,
|
|
"loss": 0.1866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.186012864112854,
|
|
"step": 720,
|
|
"valid_targets_mean": 5371.9,
|
|
"valid_targets_min": 870
|
|
},
|
|
{
|
|
"epoch": 1.2267343485617597,
|
|
"grad_norm": 0.4721651074027633,
|
|
"learning_rate": 3.931960809699172e-05,
|
|
"loss": 0.1957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18820719420909882,
|
|
"step": 725,
|
|
"valid_targets_mean": 5016.1,
|
|
"valid_targets_min": 338
|
|
},
|
|
{
|
|
"epoch": 1.2351945854483926,
|
|
"grad_norm": 0.40852490698168337,
|
|
"learning_rate": 3.929761337766946e-05,
|
|
"loss": 0.1953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17621873319149017,
|
|
"step": 730,
|
|
"valid_targets_mean": 5527.6,
|
|
"valid_targets_min": 1890
|
|
},
|
|
{
|
|
"epoch": 1.2436548223350254,
|
|
"grad_norm": 0.49480802473568963,
|
|
"learning_rate": 3.927527513469958e-05,
|
|
"loss": 0.1978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18733616173267365,
|
|
"step": 735,
|
|
"valid_targets_mean": 4506.0,
|
|
"valid_targets_min": 414
|
|
},
|
|
{
|
|
"epoch": 1.252115059221658,
|
|
"grad_norm": 0.506483141577998,
|
|
"learning_rate": 3.925259376573307e-05,
|
|
"loss": 0.1895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2057613730430603,
|
|
"step": 740,
|
|
"valid_targets_mean": 4166.4,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 1.260575296108291,
|
|
"grad_norm": 0.4147018934990673,
|
|
"learning_rate": 3.9229569674528985e-05,
|
|
"loss": 0.1975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17576982080936432,
|
|
"step": 745,
|
|
"valid_targets_mean": 6074.4,
|
|
"valid_targets_min": 2512
|
|
},
|
|
{
|
|
"epoch": 1.2690355329949239,
|
|
"grad_norm": 0.4610475183486664,
|
|
"learning_rate": 3.920620327094734e-05,
|
|
"loss": 0.1958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20575100183486938,
|
|
"step": 750,
|
|
"valid_targets_mean": 4843.1,
|
|
"valid_targets_min": 345
|
|
},
|
|
{
|
|
"epoch": 1.2774957698815568,
|
|
"grad_norm": 0.45866099262668036,
|
|
"learning_rate": 3.918249497094176e-05,
|
|
"loss": 0.1832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19560591876506805,
|
|
"step": 755,
|
|
"valid_targets_mean": 4433.0,
|
|
"valid_targets_min": 1183
|
|
},
|
|
{
|
|
"epoch": 1.2859560067681894,
|
|
"grad_norm": 0.4854573343661988,
|
|
"learning_rate": 3.915844519655208e-05,
|
|
"loss": 0.1916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1879267394542694,
|
|
"step": 760,
|
|
"valid_targets_mean": 4572.7,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 1.2944162436548223,
|
|
"grad_norm": 0.5012660368442886,
|
|
"learning_rate": 3.913405437589685e-05,
|
|
"loss": 0.1971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21660539507865906,
|
|
"step": 765,
|
|
"valid_targets_mean": 5289.1,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 1.3028764805414552,
|
|
"grad_norm": 0.44901651354395855,
|
|
"learning_rate": 3.910932294316571e-05,
|
|
"loss": 0.1942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16873972117900848,
|
|
"step": 770,
|
|
"valid_targets_mean": 5076.8,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 1.3113367174280879,
|
|
"grad_norm": 0.5187293417894832,
|
|
"learning_rate": 3.9084251338611626e-05,
|
|
"loss": 0.2104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22978994250297546,
|
|
"step": 775,
|
|
"valid_targets_mean": 4863.4,
|
|
"valid_targets_min": 1923
|
|
},
|
|
{
|
|
"epoch": 1.3197969543147208,
|
|
"grad_norm": 0.563262321187526,
|
|
"learning_rate": 3.9058840008543136e-05,
|
|
"loss": 0.1943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22565895318984985,
|
|
"step": 780,
|
|
"valid_targets_mean": 4367.1,
|
|
"valid_targets_min": 967
|
|
},
|
|
{
|
|
"epoch": 1.3282571912013537,
|
|
"grad_norm": 0.475563327772266,
|
|
"learning_rate": 3.9033089405316304e-05,
|
|
"loss": 0.178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16092956066131592,
|
|
"step": 785,
|
|
"valid_targets_mean": 5274.7,
|
|
"valid_targets_min": 365
|
|
},
|
|
{
|
|
"epoch": 1.3367174280879865,
|
|
"grad_norm": 0.4961285389297545,
|
|
"learning_rate": 3.900699998732674e-05,
|
|
"loss": 0.1901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18498799204826355,
|
|
"step": 790,
|
|
"valid_targets_mean": 4973.9,
|
|
"valid_targets_min": 526
|
|
},
|
|
{
|
|
"epoch": 1.3451776649746192,
|
|
"grad_norm": 0.4765845811776378,
|
|
"learning_rate": 3.898057221900142e-05,
|
|
"loss": 0.1955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21554306149482727,
|
|
"step": 795,
|
|
"valid_targets_mean": 5857.4,
|
|
"valid_targets_min": 631
|
|
},
|
|
{
|
|
"epoch": 1.353637901861252,
|
|
"grad_norm": 0.5158170793979375,
|
|
"learning_rate": 3.895380657079038e-05,
|
|
"loss": 0.2001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18528833985328674,
|
|
"step": 800,
|
|
"valid_targets_mean": 5465.2,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 1.362098138747885,
|
|
"grad_norm": 0.46117523617902056,
|
|
"learning_rate": 3.892670351915842e-05,
|
|
"loss": 0.1797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17768925428390503,
|
|
"step": 805,
|
|
"valid_targets_mean": 5113.9,
|
|
"valid_targets_min": 1921
|
|
},
|
|
{
|
|
"epoch": 1.3705583756345177,
|
|
"grad_norm": 0.5115025374290036,
|
|
"learning_rate": 3.889926354657656e-05,
|
|
"loss": 0.1932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17449840903282166,
|
|
"step": 810,
|
|
"valid_targets_mean": 5450.5,
|
|
"valid_targets_min": 1707
|
|
},
|
|
{
|
|
"epoch": 1.3790186125211505,
|
|
"grad_norm": 0.4882735542820637,
|
|
"learning_rate": 3.887148714151345e-05,
|
|
"loss": 0.2005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2181738018989563,
|
|
"step": 815,
|
|
"valid_targets_mean": 5074.0,
|
|
"valid_targets_min": 2679
|
|
},
|
|
{
|
|
"epoch": 1.3874788494077834,
|
|
"grad_norm": 0.5399097511757945,
|
|
"learning_rate": 3.884337479842672e-05,
|
|
"loss": 0.2053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21440020203590393,
|
|
"step": 820,
|
|
"valid_targets_mean": 4272.6,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 1.3959390862944163,
|
|
"grad_norm": 0.5911488508484037,
|
|
"learning_rate": 3.8814927017754136e-05,
|
|
"loss": 0.1974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21045376360416412,
|
|
"step": 825,
|
|
"valid_targets_mean": 4733.8,
|
|
"valid_targets_min": 961
|
|
},
|
|
{
|
|
"epoch": 1.404399323181049,
|
|
"grad_norm": 0.45604798422733656,
|
|
"learning_rate": 3.878614430590472e-05,
|
|
"loss": 0.1896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17496258020401,
|
|
"step": 830,
|
|
"valid_targets_mean": 4537.8,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 1.4128595600676819,
|
|
"grad_norm": 0.4737493055466367,
|
|
"learning_rate": 3.875702717524971e-05,
|
|
"loss": 0.173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19898968935012817,
|
|
"step": 835,
|
|
"valid_targets_mean": 5676.6,
|
|
"valid_targets_min": 1822
|
|
},
|
|
{
|
|
"epoch": 1.4213197969543148,
|
|
"grad_norm": 0.4828998463140644,
|
|
"learning_rate": 3.872757614411346e-05,
|
|
"loss": 0.1939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21491539478302002,
|
|
"step": 840,
|
|
"valid_targets_mean": 4638.1,
|
|
"valid_targets_min": 2113
|
|
},
|
|
{
|
|
"epoch": 1.4297800338409474,
|
|
"grad_norm": 0.4772917800206705,
|
|
"learning_rate": 3.86977917367642e-05,
|
|
"loss": 0.1917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17982393503189087,
|
|
"step": 845,
|
|
"valid_targets_mean": 4981.8,
|
|
"valid_targets_min": 1916
|
|
},
|
|
{
|
|
"epoch": 1.4382402707275803,
|
|
"grad_norm": 0.44964493966993735,
|
|
"learning_rate": 3.866767448340472e-05,
|
|
"loss": 0.1934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18098363280296326,
|
|
"step": 850,
|
|
"valid_targets_mean": 5042.6,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 1.4467005076142132,
|
|
"grad_norm": 0.5397105104605748,
|
|
"learning_rate": 3.8637224920162894e-05,
|
|
"loss": 0.1809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19100229442119598,
|
|
"step": 855,
|
|
"valid_targets_mean": 4337.5,
|
|
"valid_targets_min": 1162
|
|
},
|
|
{
|
|
"epoch": 1.455160744500846,
|
|
"grad_norm": 0.4718134007565553,
|
|
"learning_rate": 3.8606443589082176e-05,
|
|
"loss": 0.1897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17423993349075317,
|
|
"step": 860,
|
|
"valid_targets_mean": 5424.7,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 1.463620981387479,
|
|
"grad_norm": 0.42872130699004035,
|
|
"learning_rate": 3.857533103811196e-05,
|
|
"loss": 0.184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.197713702917099,
|
|
"step": 865,
|
|
"valid_targets_mean": 5694.4,
|
|
"valid_targets_min": 1124
|
|
},
|
|
{
|
|
"epoch": 1.4720812182741116,
|
|
"grad_norm": 0.45840892635488045,
|
|
"learning_rate": 3.8543887821097754e-05,
|
|
"loss": 0.191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1947043240070343,
|
|
"step": 870,
|
|
"valid_targets_mean": 4380.9,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 1.4805414551607445,
|
|
"grad_norm": 0.4898444892647538,
|
|
"learning_rate": 3.851211449777141e-05,
|
|
"loss": 0.1971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17051412165164948,
|
|
"step": 875,
|
|
"valid_targets_mean": 4948.7,
|
|
"valid_targets_min": 1216
|
|
},
|
|
{
|
|
"epoch": 1.4890016920473772,
|
|
"grad_norm": 0.4854113879060671,
|
|
"learning_rate": 3.848001163374111e-05,
|
|
"loss": 0.1785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1769687831401825,
|
|
"step": 880,
|
|
"valid_targets_mean": 4174.0,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 1.49746192893401,
|
|
"grad_norm": 0.46304528087972835,
|
|
"learning_rate": 3.844757980048129e-05,
|
|
"loss": 0.1842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17404592037200928,
|
|
"step": 885,
|
|
"valid_targets_mean": 4889.9,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 1.505922165820643,
|
|
"grad_norm": 0.42834354358170396,
|
|
"learning_rate": 3.84148195753225e-05,
|
|
"loss": 0.1945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1861925721168518,
|
|
"step": 890,
|
|
"valid_targets_mean": 5467.8,
|
|
"valid_targets_min": 1988
|
|
},
|
|
{
|
|
"epoch": 1.5143824027072759,
|
|
"grad_norm": 0.4656782999773032,
|
|
"learning_rate": 3.83817315414411e-05,
|
|
"loss": 0.1941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1866692304611206,
|
|
"step": 895,
|
|
"valid_targets_mean": 4862.5,
|
|
"valid_targets_min": 343
|
|
},
|
|
{
|
|
"epoch": 1.5228426395939088,
|
|
"grad_norm": 0.47056210078002325,
|
|
"learning_rate": 3.834831628784891e-05,
|
|
"loss": 0.1835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16940632462501526,
|
|
"step": 900,
|
|
"valid_targets_mean": 4519.9,
|
|
"valid_targets_min": 2263
|
|
},
|
|
{
|
|
"epoch": 1.5313028764805414,
|
|
"grad_norm": 0.4252013660932989,
|
|
"learning_rate": 3.831457440938269e-05,
|
|
"loss": 0.2092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.174214705824852,
|
|
"step": 905,
|
|
"valid_targets_mean": 5083.5,
|
|
"valid_targets_min": 760
|
|
},
|
|
{
|
|
"epoch": 1.5397631133671743,
|
|
"grad_norm": 0.47539514771634706,
|
|
"learning_rate": 3.8280506506693534e-05,
|
|
"loss": 0.1869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19522397220134735,
|
|
"step": 910,
|
|
"valid_targets_mean": 5077.3,
|
|
"valid_targets_min": 317
|
|
},
|
|
{
|
|
"epoch": 1.548223350253807,
|
|
"grad_norm": 0.5424144781443524,
|
|
"learning_rate": 3.8246113186236275e-05,
|
|
"loss": 0.1939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1940397024154663,
|
|
"step": 915,
|
|
"valid_targets_mean": 4990.3,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 1.5566835871404399,
|
|
"grad_norm": 0.48562451117973277,
|
|
"learning_rate": 3.8211395060258565e-05,
|
|
"loss": 0.1849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17793706059455872,
|
|
"step": 920,
|
|
"valid_targets_mean": 4522.7,
|
|
"valid_targets_min": 1675
|
|
},
|
|
{
|
|
"epoch": 1.5651438240270727,
|
|
"grad_norm": 0.5311528796208124,
|
|
"learning_rate": 3.817635274679006e-05,
|
|
"loss": 0.1803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1961062252521515,
|
|
"step": 925,
|
|
"valid_targets_mean": 4307.6,
|
|
"valid_targets_min": 480
|
|
},
|
|
{
|
|
"epoch": 1.5736040609137056,
|
|
"grad_norm": 0.47658510494076567,
|
|
"learning_rate": 3.8140986869631395e-05,
|
|
"loss": 0.1763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1828208863735199,
|
|
"step": 930,
|
|
"valid_targets_mean": 4432.1,
|
|
"valid_targets_min": 381
|
|
},
|
|
{
|
|
"epoch": 1.5820642978003385,
|
|
"grad_norm": 0.560658002715923,
|
|
"learning_rate": 3.8105298058343064e-05,
|
|
"loss": 0.1856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19097475707530975,
|
|
"step": 935,
|
|
"valid_targets_mean": 3703.4,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 1.5905245346869712,
|
|
"grad_norm": 0.5132720370773259,
|
|
"learning_rate": 3.806928694823423e-05,
|
|
"loss": 0.1959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1904466450214386,
|
|
"step": 940,
|
|
"valid_targets_mean": 5068.7,
|
|
"valid_targets_min": 287
|
|
},
|
|
{
|
|
"epoch": 1.598984771573604,
|
|
"grad_norm": 0.44557002654206584,
|
|
"learning_rate": 3.8032954180351415e-05,
|
|
"loss": 0.1988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2225380539894104,
|
|
"step": 945,
|
|
"valid_targets_mean": 5685.8,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 1.6074450084602367,
|
|
"grad_norm": 0.56325366482875,
|
|
"learning_rate": 3.799630040146708e-05,
|
|
"loss": 0.1891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19706985354423523,
|
|
"step": 950,
|
|
"valid_targets_mean": 3841.2,
|
|
"valid_targets_min": 328
|
|
},
|
|
{
|
|
"epoch": 1.6159052453468696,
|
|
"grad_norm": 0.6201486790589118,
|
|
"learning_rate": 3.7959326264068124e-05,
|
|
"loss": 0.1933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19432157278060913,
|
|
"step": 955,
|
|
"valid_targets_mean": 4467.6,
|
|
"valid_targets_min": 348
|
|
},
|
|
{
|
|
"epoch": 1.6243654822335025,
|
|
"grad_norm": 0.5043179141382221,
|
|
"learning_rate": 3.792203242634426e-05,
|
|
"loss": 0.187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18448185920715332,
|
|
"step": 960,
|
|
"valid_targets_mean": 5777.9,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 1.6328257191201354,
|
|
"grad_norm": 0.45403607022743153,
|
|
"learning_rate": 3.7884419552176284e-05,
|
|
"loss": 0.18,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17754754424095154,
|
|
"step": 965,
|
|
"valid_targets_mean": 6206.1,
|
|
"valid_targets_min": 452
|
|
},
|
|
{
|
|
"epoch": 1.6412859560067683,
|
|
"grad_norm": 0.4826452741261295,
|
|
"learning_rate": 3.784648831112429e-05,
|
|
"loss": 0.1858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14989638328552246,
|
|
"step": 970,
|
|
"valid_targets_mean": 4102.9,
|
|
"valid_targets_min": 293
|
|
},
|
|
{
|
|
"epoch": 1.649746192893401,
|
|
"grad_norm": 0.4676201586139191,
|
|
"learning_rate": 3.7808239378415745e-05,
|
|
"loss": 0.1812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17905931174755096,
|
|
"step": 975,
|
|
"valid_targets_mean": 5080.6,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 1.6582064297800339,
|
|
"grad_norm": 0.47346083310338305,
|
|
"learning_rate": 3.7769673434933425e-05,
|
|
"loss": 0.1887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18687918782234192,
|
|
"step": 980,
|
|
"valid_targets_mean": 4323.2,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 1.6666666666666665,
|
|
"grad_norm": 0.4666156197188496,
|
|
"learning_rate": 3.773079116720334e-05,
|
|
"loss": 0.1918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19744890928268433,
|
|
"step": 985,
|
|
"valid_targets_mean": 4728.0,
|
|
"valid_targets_min": 337
|
|
},
|
|
{
|
|
"epoch": 1.6751269035532994,
|
|
"grad_norm": 0.5774562259181324,
|
|
"learning_rate": 3.769159326738251e-05,
|
|
"loss": 0.1919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18352428078651428,
|
|
"step": 990,
|
|
"valid_targets_mean": 5721.6,
|
|
"valid_targets_min": 315
|
|
},
|
|
{
|
|
"epoch": 1.6835871404399323,
|
|
"grad_norm": 0.43425054459170465,
|
|
"learning_rate": 3.7652080433246616e-05,
|
|
"loss": 0.1748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18768930435180664,
|
|
"step": 995,
|
|
"valid_targets_mean": 5245.9,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 1.6920473773265652,
|
|
"grad_norm": 0.3966500805861816,
|
|
"learning_rate": 3.7612253368177626e-05,
|
|
"loss": 0.1784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17174136638641357,
|
|
"step": 1000,
|
|
"valid_targets_mean": 5759.6,
|
|
"valid_targets_min": 2972
|
|
},
|
|
{
|
|
"epoch": 1.700507614213198,
|
|
"grad_norm": 0.5171439476572143,
|
|
"learning_rate": 3.7572112781151224e-05,
|
|
"loss": 0.1985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19574686884880066,
|
|
"step": 1005,
|
|
"valid_targets_mean": 4769.9,
|
|
"valid_targets_min": 222
|
|
},
|
|
{
|
|
"epoch": 1.708967851099831,
|
|
"grad_norm": 0.4670811781342547,
|
|
"learning_rate": 3.7531659386724195e-05,
|
|
"loss": 0.1855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18442121148109436,
|
|
"step": 1010,
|
|
"valid_targets_mean": 4399.9,
|
|
"valid_targets_min": 1850
|
|
},
|
|
{
|
|
"epoch": 1.7174280879864636,
|
|
"grad_norm": 0.422129911873122,
|
|
"learning_rate": 3.7490893905021736e-05,
|
|
"loss": 0.1856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1851533055305481,
|
|
"step": 1015,
|
|
"valid_targets_mean": 5337.6,
|
|
"valid_targets_min": 2678
|
|
},
|
|
{
|
|
"epoch": 1.7258883248730963,
|
|
"grad_norm": 0.47734382146707427,
|
|
"learning_rate": 3.7449817061724634e-05,
|
|
"loss": 0.1908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18302862346172333,
|
|
"step": 1020,
|
|
"valid_targets_mean": 5057.1,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 1.7343485617597292,
|
|
"grad_norm": 0.43252727841234423,
|
|
"learning_rate": 3.740842958805631e-05,
|
|
"loss": 0.174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17234152555465698,
|
|
"step": 1025,
|
|
"valid_targets_mean": 5871.3,
|
|
"valid_targets_min": 1363
|
|
},
|
|
{
|
|
"epoch": 1.742808798646362,
|
|
"grad_norm": 0.47812808405240226,
|
|
"learning_rate": 3.7366732220769824e-05,
|
|
"loss": 0.1843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1819082349538803,
|
|
"step": 1030,
|
|
"valid_targets_mean": 5243.1,
|
|
"valid_targets_min": 1310
|
|
},
|
|
{
|
|
"epoch": 1.751269035532995,
|
|
"grad_norm": 0.5203926707941086,
|
|
"learning_rate": 3.7324725702134796e-05,
|
|
"loss": 0.1852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20042525231838226,
|
|
"step": 1035,
|
|
"valid_targets_mean": 4288.0,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 1.7597292724196278,
|
|
"grad_norm": 0.5982171968272748,
|
|
"learning_rate": 3.7282410779924114e-05,
|
|
"loss": 0.1967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21546810865402222,
|
|
"step": 1040,
|
|
"valid_targets_mean": 5145.4,
|
|
"valid_targets_min": 369
|
|
},
|
|
{
|
|
"epoch": 1.7681895093062607,
|
|
"grad_norm": 0.4862322958113092,
|
|
"learning_rate": 3.723978820740071e-05,
|
|
"loss": 0.1783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18794873356819153,
|
|
"step": 1045,
|
|
"valid_targets_mean": 5054.8,
|
|
"valid_targets_min": 2105
|
|
},
|
|
{
|
|
"epoch": 1.7766497461928934,
|
|
"grad_norm": 0.49102441127503643,
|
|
"learning_rate": 3.719685874330407e-05,
|
|
"loss": 0.1833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2122485339641571,
|
|
"step": 1050,
|
|
"valid_targets_mean": 5380.0,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 1.785109983079526,
|
|
"grad_norm": 0.4326618779666083,
|
|
"learning_rate": 3.715362315183679e-05,
|
|
"loss": 0.1693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15333399176597595,
|
|
"step": 1055,
|
|
"valid_targets_mean": 5344.2,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 1.793570219966159,
|
|
"grad_norm": 0.44549574021472244,
|
|
"learning_rate": 3.711008220265093e-05,
|
|
"loss": 0.1903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1729809045791626,
|
|
"step": 1060,
|
|
"valid_targets_mean": 5199.6,
|
|
"valid_targets_min": 367
|
|
},
|
|
{
|
|
"epoch": 1.8020304568527918,
|
|
"grad_norm": 0.503740526068646,
|
|
"learning_rate": 3.706623667083435e-05,
|
|
"loss": 0.1844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1734774261713028,
|
|
"step": 1065,
|
|
"valid_targets_mean": 4252.8,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 1.8104906937394247,
|
|
"grad_norm": 0.5229803547352467,
|
|
"learning_rate": 3.7022087336896896e-05,
|
|
"loss": 0.1964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23427821695804596,
|
|
"step": 1070,
|
|
"valid_targets_mean": 4270.0,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 1.8189509306260576,
|
|
"grad_norm": 0.5672081909344545,
|
|
"learning_rate": 3.6977634986756476e-05,
|
|
"loss": 0.1824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1655508577823639,
|
|
"step": 1075,
|
|
"valid_targets_mean": 5403.0,
|
|
"valid_targets_min": 2490
|
|
},
|
|
{
|
|
"epoch": 1.8274111675126905,
|
|
"grad_norm": 0.475916275445546,
|
|
"learning_rate": 3.693288041172511e-05,
|
|
"loss": 0.177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17607484757900238,
|
|
"step": 1080,
|
|
"valid_targets_mean": 4864.9,
|
|
"valid_targets_min": 387
|
|
},
|
|
{
|
|
"epoch": 1.8358714043993232,
|
|
"grad_norm": 0.4484803008640689,
|
|
"learning_rate": 3.688782440849484e-05,
|
|
"loss": 0.1857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18279041349887848,
|
|
"step": 1085,
|
|
"valid_targets_mean": 4759.8,
|
|
"valid_targets_min": 2807
|
|
},
|
|
{
|
|
"epoch": 1.844331641285956,
|
|
"grad_norm": 0.43758369829135624,
|
|
"learning_rate": 3.684246777912353e-05,
|
|
"loss": 0.1868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1620556116104126,
|
|
"step": 1090,
|
|
"valid_targets_mean": 5347.7,
|
|
"valid_targets_min": 970
|
|
},
|
|
{
|
|
"epoch": 1.8527918781725887,
|
|
"grad_norm": 0.45651542397223477,
|
|
"learning_rate": 3.67968113310206e-05,
|
|
"loss": 0.1775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19337314367294312,
|
|
"step": 1095,
|
|
"valid_targets_mean": 5591.7,
|
|
"valid_targets_min": 2188
|
|
},
|
|
{
|
|
"epoch": 1.8612521150592216,
|
|
"grad_norm": 0.45342047440526156,
|
|
"learning_rate": 3.675085587693263e-05,
|
|
"loss": 0.1817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17368710041046143,
|
|
"step": 1100,
|
|
"valid_targets_mean": 5225.9,
|
|
"valid_targets_min": 2068
|
|
},
|
|
{
|
|
"epoch": 1.8697123519458545,
|
|
"grad_norm": 0.44445444493689296,
|
|
"learning_rate": 3.670460223492895e-05,
|
|
"loss": 0.1854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19034744799137115,
|
|
"step": 1105,
|
|
"valid_targets_mean": 5756.8,
|
|
"valid_targets_min": 2662
|
|
},
|
|
{
|
|
"epoch": 1.8781725888324874,
|
|
"grad_norm": 0.46466065508312604,
|
|
"learning_rate": 3.6658051228387005e-05,
|
|
"loss": 0.1916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20202375948429108,
|
|
"step": 1110,
|
|
"valid_targets_mean": 4874.2,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 1.8866328257191203,
|
|
"grad_norm": 0.40825318876258565,
|
|
"learning_rate": 3.661120368597774e-05,
|
|
"loss": 0.188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17643505334854126,
|
|
"step": 1115,
|
|
"valid_targets_mean": 5820.1,
|
|
"valid_targets_min": 356
|
|
},
|
|
{
|
|
"epoch": 1.895093062605753,
|
|
"grad_norm": 0.4480921922277971,
|
|
"learning_rate": 3.656406044165085e-05,
|
|
"loss": 0.174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18309161067008972,
|
|
"step": 1120,
|
|
"valid_targets_mean": 5286.9,
|
|
"valid_targets_min": 885
|
|
},
|
|
{
|
|
"epoch": 1.9035532994923858,
|
|
"grad_norm": 0.44405937850826077,
|
|
"learning_rate": 3.651662233461991e-05,
|
|
"loss": 0.1752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15674394369125366,
|
|
"step": 1125,
|
|
"valid_targets_mean": 4938.8,
|
|
"valid_targets_min": 385
|
|
},
|
|
{
|
|
"epoch": 1.9120135363790185,
|
|
"grad_norm": 0.4703798571936935,
|
|
"learning_rate": 3.646889020934746e-05,
|
|
"loss": 0.1789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17999885976314545,
|
|
"step": 1130,
|
|
"valid_targets_mean": 4544.7,
|
|
"valid_targets_min": 439
|
|
},
|
|
{
|
|
"epoch": 1.9204737732656514,
|
|
"grad_norm": 0.4500289954011104,
|
|
"learning_rate": 3.6420864915529964e-05,
|
|
"loss": 0.1812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1868266761302948,
|
|
"step": 1135,
|
|
"valid_targets_mean": 4898.5,
|
|
"valid_targets_min": 1144
|
|
},
|
|
{
|
|
"epoch": 1.9289340101522843,
|
|
"grad_norm": 0.44762203107992843,
|
|
"learning_rate": 3.637254730808268e-05,
|
|
"loss": 0.1853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16608929634094238,
|
|
"step": 1140,
|
|
"valid_targets_mean": 5071.6,
|
|
"valid_targets_min": 1850
|
|
},
|
|
{
|
|
"epoch": 1.9373942470389172,
|
|
"grad_norm": 0.4461993303395474,
|
|
"learning_rate": 3.632393824712444e-05,
|
|
"loss": 0.1824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20071442425251007,
|
|
"step": 1145,
|
|
"valid_targets_mean": 5984.9,
|
|
"valid_targets_min": 2598
|
|
},
|
|
{
|
|
"epoch": 1.94585448392555,
|
|
"grad_norm": 0.4816384165112368,
|
|
"learning_rate": 3.6275038597962345e-05,
|
|
"loss": 0.1727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18033169209957123,
|
|
"step": 1150,
|
|
"valid_targets_mean": 5072.4,
|
|
"valid_targets_min": 297
|
|
},
|
|
{
|
|
"epoch": 1.9543147208121827,
|
|
"grad_norm": 0.4330770286736242,
|
|
"learning_rate": 3.6225849231076363e-05,
|
|
"loss": 0.1904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20190589129924774,
|
|
"step": 1155,
|
|
"valid_targets_mean": 5525.6,
|
|
"valid_targets_min": 1588
|
|
},
|
|
{
|
|
"epoch": 1.9627749576988156,
|
|
"grad_norm": 0.45319896720072145,
|
|
"learning_rate": 3.6176371022103835e-05,
|
|
"loss": 0.1804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18416467308998108,
|
|
"step": 1160,
|
|
"valid_targets_mean": 4817.2,
|
|
"valid_targets_min": 1579
|
|
},
|
|
{
|
|
"epoch": 1.9712351945854483,
|
|
"grad_norm": 0.53318192272576,
|
|
"learning_rate": 3.612660485182387e-05,
|
|
"loss": 0.1801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.188938707113266,
|
|
"step": 1165,
|
|
"valid_targets_mean": 3981.8,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 1.9796954314720812,
|
|
"grad_norm": 0.44192560670231057,
|
|
"learning_rate": 3.607655160614168e-05,
|
|
"loss": 0.1754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17359186708927155,
|
|
"step": 1170,
|
|
"valid_targets_mean": 5168.6,
|
|
"valid_targets_min": 401
|
|
},
|
|
{
|
|
"epoch": 1.988155668358714,
|
|
"grad_norm": 0.42981105011430426,
|
|
"learning_rate": 3.602621217607282e-05,
|
|
"loss": 0.1838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17031371593475342,
|
|
"step": 1175,
|
|
"valid_targets_mean": 5643.2,
|
|
"valid_targets_min": 1859
|
|
},
|
|
{
|
|
"epoch": 1.996615905245347,
|
|
"grad_norm": 0.47529237499014876,
|
|
"learning_rate": 3.59755874577273e-05,
|
|
"loss": 0.1788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17048805952072144,
|
|
"step": 1180,
|
|
"valid_targets_mean": 4990.6,
|
|
"valid_targets_min": 1562
|
|
},
|
|
{
|
|
"epoch": 2.00507614213198,
|
|
"grad_norm": 0.40192188351761127,
|
|
"learning_rate": 3.592467835229366e-05,
|
|
"loss": 0.1661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1483587920665741,
|
|
"step": 1185,
|
|
"valid_targets_mean": 6308.6,
|
|
"valid_targets_min": 3419
|
|
},
|
|
{
|
|
"epoch": 2.0135363790186127,
|
|
"grad_norm": 0.4500189688550047,
|
|
"learning_rate": 3.58734857660229e-05,
|
|
"loss": 0.1626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13474039733409882,
|
|
"step": 1190,
|
|
"valid_targets_mean": 4726.1,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 2.021996615905245,
|
|
"grad_norm": 0.5420495176514685,
|
|
"learning_rate": 3.582201061021238e-05,
|
|
"loss": 0.1595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15263521671295166,
|
|
"step": 1195,
|
|
"valid_targets_mean": 5371.6,
|
|
"valid_targets_min": 2166
|
|
},
|
|
{
|
|
"epoch": 2.030456852791878,
|
|
"grad_norm": 0.5414401944590708,
|
|
"learning_rate": 3.577025380118956e-05,
|
|
"loss": 0.166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1646997183561325,
|
|
"step": 1200,
|
|
"valid_targets_mean": 4584.0,
|
|
"valid_targets_min": 376
|
|
},
|
|
{
|
|
"epoch": 2.038917089678511,
|
|
"grad_norm": 0.4053407356009929,
|
|
"learning_rate": 3.571821626029574e-05,
|
|
"loss": 0.1454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13890305161476135,
|
|
"step": 1205,
|
|
"valid_targets_mean": 6241.1,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 2.047377326565144,
|
|
"grad_norm": 0.5735464703165732,
|
|
"learning_rate": 3.5665898913869594e-05,
|
|
"loss": 0.157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16589976847171783,
|
|
"step": 1210,
|
|
"valid_targets_mean": 4700.9,
|
|
"valid_targets_min": 777
|
|
},
|
|
{
|
|
"epoch": 2.0558375634517767,
|
|
"grad_norm": 0.3798680736107315,
|
|
"learning_rate": 3.5613302693230736e-05,
|
|
"loss": 0.1517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15606409311294556,
|
|
"step": 1215,
|
|
"valid_targets_mean": 6716.2,
|
|
"valid_targets_min": 3892
|
|
},
|
|
{
|
|
"epoch": 2.0642978003384096,
|
|
"grad_norm": 0.47534256347321013,
|
|
"learning_rate": 3.5560428534663096e-05,
|
|
"loss": 0.1619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15152978897094727,
|
|
"step": 1220,
|
|
"valid_targets_mean": 5016.1,
|
|
"valid_targets_min": 878
|
|
},
|
|
{
|
|
"epoch": 2.0727580372250425,
|
|
"grad_norm": 0.445308954959373,
|
|
"learning_rate": 3.5507277379398304e-05,
|
|
"loss": 0.1885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1713963896036148,
|
|
"step": 1225,
|
|
"valid_targets_mean": 5435.4,
|
|
"valid_targets_min": 846
|
|
},
|
|
{
|
|
"epoch": 2.081218274111675,
|
|
"grad_norm": 0.41305044782343964,
|
|
"learning_rate": 3.545385017359889e-05,
|
|
"loss": 0.1652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13976427912712097,
|
|
"step": 1230,
|
|
"valid_targets_mean": 5201.6,
|
|
"valid_targets_min": 2128
|
|
},
|
|
{
|
|
"epoch": 2.089678510998308,
|
|
"grad_norm": 0.49000777335587264,
|
|
"learning_rate": 3.540014786834144e-05,
|
|
"loss": 0.1551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1551114320755005,
|
|
"step": 1235,
|
|
"valid_targets_mean": 4572.3,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 2.0981387478849407,
|
|
"grad_norm": 0.43934341625870066,
|
|
"learning_rate": 3.534617141959973e-05,
|
|
"loss": 0.1649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14957617223262787,
|
|
"step": 1240,
|
|
"valid_targets_mean": 5047.8,
|
|
"valid_targets_min": 2636
|
|
},
|
|
{
|
|
"epoch": 2.1065989847715736,
|
|
"grad_norm": 0.4586337156988163,
|
|
"learning_rate": 3.529192178822762e-05,
|
|
"loss": 0.1822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19370266795158386,
|
|
"step": 1245,
|
|
"valid_targets_mean": 5811.9,
|
|
"valid_targets_min": 1596
|
|
},
|
|
{
|
|
"epoch": 2.1150592216582065,
|
|
"grad_norm": 0.4590426646045105,
|
|
"learning_rate": 3.5237399939942006e-05,
|
|
"loss": 0.153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16032487154006958,
|
|
"step": 1250,
|
|
"valid_targets_mean": 5130.4,
|
|
"valid_targets_min": 1286
|
|
},
|
|
{
|
|
"epoch": 2.1235194585448394,
|
|
"grad_norm": 0.498059838292537,
|
|
"learning_rate": 3.5182606845305604e-05,
|
|
"loss": 0.1633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15795816481113434,
|
|
"step": 1255,
|
|
"valid_targets_mean": 5266.6,
|
|
"valid_targets_min": 815
|
|
},
|
|
{
|
|
"epoch": 2.1319796954314723,
|
|
"grad_norm": 0.4503224670922865,
|
|
"learning_rate": 3.5127543479709706e-05,
|
|
"loss": 0.1641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15011468529701233,
|
|
"step": 1260,
|
|
"valid_targets_mean": 4330.8,
|
|
"valid_targets_min": 603
|
|
},
|
|
{
|
|
"epoch": 2.1404399323181047,
|
|
"grad_norm": 0.48742252715744816,
|
|
"learning_rate": 3.507221082335676e-05,
|
|
"loss": 0.1773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16891446709632874,
|
|
"step": 1265,
|
|
"valid_targets_mean": 4589.6,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 2.1489001692047376,
|
|
"grad_norm": 0.5026027232101132,
|
|
"learning_rate": 3.501660986124297e-05,
|
|
"loss": 0.1726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19233793020248413,
|
|
"step": 1270,
|
|
"valid_targets_mean": 4669.2,
|
|
"valid_targets_min": 903
|
|
},
|
|
{
|
|
"epoch": 2.1573604060913705,
|
|
"grad_norm": 0.42099338197816233,
|
|
"learning_rate": 3.496074158314077e-05,
|
|
"loss": 0.1644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1375967562198639,
|
|
"step": 1275,
|
|
"valid_targets_mean": 5771.2,
|
|
"valid_targets_min": 1661
|
|
},
|
|
{
|
|
"epoch": 2.1658206429780034,
|
|
"grad_norm": 0.5910992123830302,
|
|
"learning_rate": 3.490460698358114e-05,
|
|
"loss": 0.1691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1655455082654953,
|
|
"step": 1280,
|
|
"valid_targets_mean": 4519.9,
|
|
"valid_targets_min": 358
|
|
},
|
|
{
|
|
"epoch": 2.1742808798646363,
|
|
"grad_norm": 0.5247329603920639,
|
|
"learning_rate": 3.4848207061835955e-05,
|
|
"loss": 0.1693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16585072875022888,
|
|
"step": 1285,
|
|
"valid_targets_mean": 4548.4,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 2.182741116751269,
|
|
"grad_norm": 0.4710422999225154,
|
|
"learning_rate": 3.4791542821900194e-05,
|
|
"loss": 0.1665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18040716648101807,
|
|
"step": 1290,
|
|
"valid_targets_mean": 5505.4,
|
|
"valid_targets_min": 1002
|
|
},
|
|
{
|
|
"epoch": 2.191201353637902,
|
|
"grad_norm": 0.47233916315046487,
|
|
"learning_rate": 3.4734615272474065e-05,
|
|
"loss": 0.16,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1609891951084137,
|
|
"step": 1295,
|
|
"valid_targets_mean": 4832.8,
|
|
"valid_targets_min": 1030
|
|
},
|
|
{
|
|
"epoch": 2.199661590524535,
|
|
"grad_norm": 0.46689897612221704,
|
|
"learning_rate": 3.4677425426945015e-05,
|
|
"loss": 0.1626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15596657991409302,
|
|
"step": 1300,
|
|
"valid_targets_mean": 4856.8,
|
|
"valid_targets_min": 1119
|
|
},
|
|
{
|
|
"epoch": 2.2081218274111674,
|
|
"grad_norm": 0.4675196212900623,
|
|
"learning_rate": 3.4619974303369735e-05,
|
|
"loss": 0.1584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1661664843559265,
|
|
"step": 1305,
|
|
"valid_targets_mean": 4841.8,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 2.2165820642978002,
|
|
"grad_norm": 0.44888989413387514,
|
|
"learning_rate": 3.4562262924456016e-05,
|
|
"loss": 0.1647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17136889696121216,
|
|
"step": 1310,
|
|
"valid_targets_mean": 5180.2,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 2.225042301184433,
|
|
"grad_norm": 0.4491045644746825,
|
|
"learning_rate": 3.450429231754454e-05,
|
|
"loss": 0.1576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16562819480895996,
|
|
"step": 1315,
|
|
"valid_targets_mean": 5433.8,
|
|
"valid_targets_min": 2110
|
|
},
|
|
{
|
|
"epoch": 2.233502538071066,
|
|
"grad_norm": 0.5176652632999197,
|
|
"learning_rate": 3.444606351459061e-05,
|
|
"loss": 0.1708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1932157278060913,
|
|
"step": 1320,
|
|
"valid_targets_mean": 4794.9,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 2.241962774957699,
|
|
"grad_norm": 0.4427324099708239,
|
|
"learning_rate": 3.4387577552145765e-05,
|
|
"loss": 0.1628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16027715802192688,
|
|
"step": 1325,
|
|
"valid_targets_mean": 4999.9,
|
|
"valid_targets_min": 2294
|
|
},
|
|
{
|
|
"epoch": 2.250423011844332,
|
|
"grad_norm": 0.4700949723891161,
|
|
"learning_rate": 3.4328835471339315e-05,
|
|
"loss": 0.1572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15684500336647034,
|
|
"step": 1330,
|
|
"valid_targets_mean": 4705.4,
|
|
"valid_targets_min": 1042
|
|
},
|
|
{
|
|
"epoch": 2.2588832487309647,
|
|
"grad_norm": 0.4001168306773032,
|
|
"learning_rate": 3.426983831785985e-05,
|
|
"loss": 0.1615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15065252780914307,
|
|
"step": 1335,
|
|
"valid_targets_mean": 5587.9,
|
|
"valid_targets_min": 2572
|
|
},
|
|
{
|
|
"epoch": 2.267343485617597,
|
|
"grad_norm": 0.8831603555514999,
|
|
"learning_rate": 3.4210587141936584e-05,
|
|
"loss": 0.1624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14559999108314514,
|
|
"step": 1340,
|
|
"valid_targets_mean": 5556.8,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 2.27580372250423,
|
|
"grad_norm": 0.41870042181340816,
|
|
"learning_rate": 3.415108299832067e-05,
|
|
"loss": 0.158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1594214141368866,
|
|
"step": 1345,
|
|
"valid_targets_mean": 5154.6,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 2.284263959390863,
|
|
"grad_norm": 0.4344455260410754,
|
|
"learning_rate": 3.409132694626643e-05,
|
|
"loss": 0.1576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1480238139629364,
|
|
"step": 1350,
|
|
"valid_targets_mean": 5170.6,
|
|
"valid_targets_min": 1234
|
|
},
|
|
{
|
|
"epoch": 2.292724196277496,
|
|
"grad_norm": 0.4161087021696222,
|
|
"learning_rate": 3.403132004951253e-05,
|
|
"loss": 0.159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13673090934753418,
|
|
"step": 1355,
|
|
"valid_targets_mean": 5008.3,
|
|
"valid_targets_min": 2136
|
|
},
|
|
{
|
|
"epoch": 2.3011844331641287,
|
|
"grad_norm": 0.4406394719758189,
|
|
"learning_rate": 3.397106337626294e-05,
|
|
"loss": 0.1646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16267988085746765,
|
|
"step": 1360,
|
|
"valid_targets_mean": 4902.1,
|
|
"valid_targets_min": 825
|
|
},
|
|
{
|
|
"epoch": 2.3096446700507616,
|
|
"grad_norm": 0.5717184946375302,
|
|
"learning_rate": 3.391055799916806e-05,
|
|
"loss": 0.169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2242920696735382,
|
|
"step": 1365,
|
|
"valid_targets_mean": 4307.5,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 2.3181049069373945,
|
|
"grad_norm": 0.4538753259746753,
|
|
"learning_rate": 3.3849804995305536e-05,
|
|
"loss": 0.1644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17751973867416382,
|
|
"step": 1370,
|
|
"valid_targets_mean": 4930.9,
|
|
"valid_targets_min": 2135
|
|
},
|
|
{
|
|
"epoch": 2.326565143824027,
|
|
"grad_norm": 0.42260404254792383,
|
|
"learning_rate": 3.378880544616108e-05,
|
|
"loss": 0.1637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14068442583084106,
|
|
"step": 1375,
|
|
"valid_targets_mean": 5997.0,
|
|
"valid_targets_min": 1734
|
|
},
|
|
{
|
|
"epoch": 2.33502538071066,
|
|
"grad_norm": 0.4611806627086395,
|
|
"learning_rate": 3.372756043760926e-05,
|
|
"loss": 0.1565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15757374465465546,
|
|
"step": 1380,
|
|
"valid_targets_mean": 5262.3,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 2.3434856175972927,
|
|
"grad_norm": 0.542780758066831,
|
|
"learning_rate": 3.366607105989417e-05,
|
|
"loss": 0.1704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19393837451934814,
|
|
"step": 1385,
|
|
"valid_targets_mean": 5297.1,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 2.3519458544839256,
|
|
"grad_norm": 0.4772128678517535,
|
|
"learning_rate": 3.3604338407609984e-05,
|
|
"loss": 0.1517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1424546241760254,
|
|
"step": 1390,
|
|
"valid_targets_mean": 4186.8,
|
|
"valid_targets_min": 326
|
|
},
|
|
{
|
|
"epoch": 2.3604060913705585,
|
|
"grad_norm": 0.4676504529666525,
|
|
"learning_rate": 3.3542363579681503e-05,
|
|
"loss": 0.1593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16771553456783295,
|
|
"step": 1395,
|
|
"valid_targets_mean": 5114.4,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 2.3688663282571913,
|
|
"grad_norm": 0.4102916711196719,
|
|
"learning_rate": 3.3480147679344574e-05,
|
|
"loss": 0.1584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14777728915214539,
|
|
"step": 1400,
|
|
"valid_targets_mean": 5539.8,
|
|
"valid_targets_min": 1144
|
|
},
|
|
{
|
|
"epoch": 2.3773265651438242,
|
|
"grad_norm": 0.45578209518142226,
|
|
"learning_rate": 3.341769181412647e-05,
|
|
"loss": 0.1682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14805658161640167,
|
|
"step": 1405,
|
|
"valid_targets_mean": 4820.8,
|
|
"valid_targets_min": 812
|
|
},
|
|
{
|
|
"epoch": 2.3857868020304567,
|
|
"grad_norm": 0.4237919016289559,
|
|
"learning_rate": 3.335499709582616e-05,
|
|
"loss": 0.1571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.148662269115448,
|
|
"step": 1410,
|
|
"valid_targets_mean": 6033.6,
|
|
"valid_targets_min": 1537
|
|
},
|
|
{
|
|
"epoch": 2.3942470389170896,
|
|
"grad_norm": 0.4970168997684864,
|
|
"learning_rate": 3.3292064640494515e-05,
|
|
"loss": 0.1586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18160566687583923,
|
|
"step": 1415,
|
|
"valid_targets_mean": 5025.6,
|
|
"valid_targets_min": 2215
|
|
},
|
|
{
|
|
"epoch": 2.4027072758037225,
|
|
"grad_norm": 0.5093165904407829,
|
|
"learning_rate": 3.322889556841445e-05,
|
|
"loss": 0.1577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15800192952156067,
|
|
"step": 1420,
|
|
"valid_targets_mean": 4659.3,
|
|
"valid_targets_min": 2359
|
|
},
|
|
{
|
|
"epoch": 2.4111675126903553,
|
|
"grad_norm": 0.47588156906320306,
|
|
"learning_rate": 3.316549100408099e-05,
|
|
"loss": 0.1638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18586465716362,
|
|
"step": 1425,
|
|
"valid_targets_mean": 5206.2,
|
|
"valid_targets_min": 1062
|
|
},
|
|
{
|
|
"epoch": 2.4196277495769882,
|
|
"grad_norm": 0.5390699552699545,
|
|
"learning_rate": 3.310185207618123e-05,
|
|
"loss": 0.1674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1907520443201065,
|
|
"step": 1430,
|
|
"valid_targets_mean": 5165.6,
|
|
"valid_targets_min": 1461
|
|
},
|
|
{
|
|
"epoch": 2.428087986463621,
|
|
"grad_norm": 0.3998709815748944,
|
|
"learning_rate": 3.303797991757425e-05,
|
|
"loss": 0.1669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15011313557624817,
|
|
"step": 1435,
|
|
"valid_targets_mean": 6055.0,
|
|
"valid_targets_min": 2892
|
|
},
|
|
{
|
|
"epoch": 2.436548223350254,
|
|
"grad_norm": 0.4968987511798135,
|
|
"learning_rate": 3.2973875665270964e-05,
|
|
"loss": 0.1493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13671809434890747,
|
|
"step": 1440,
|
|
"valid_targets_mean": 4044.3,
|
|
"valid_targets_min": 630
|
|
},
|
|
{
|
|
"epoch": 2.4450084602368864,
|
|
"grad_norm": 0.5562381335679685,
|
|
"learning_rate": 3.2909540460413856e-05,
|
|
"loss": 0.1633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16330832242965698,
|
|
"step": 1445,
|
|
"valid_targets_mean": 4217.7,
|
|
"valid_targets_min": 2112
|
|
},
|
|
{
|
|
"epoch": 2.4534686971235193,
|
|
"grad_norm": 0.4570823120695683,
|
|
"learning_rate": 3.2844975448256686e-05,
|
|
"loss": 0.1542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16085098683834076,
|
|
"step": 1450,
|
|
"valid_targets_mean": 5949.1,
|
|
"valid_targets_min": 1357
|
|
},
|
|
{
|
|
"epoch": 2.4619289340101522,
|
|
"grad_norm": 0.4878073015643177,
|
|
"learning_rate": 3.278018177814409e-05,
|
|
"loss": 0.1583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1659071445465088,
|
|
"step": 1455,
|
|
"valid_targets_mean": 5214.1,
|
|
"valid_targets_min": 1004
|
|
},
|
|
{
|
|
"epoch": 2.470389170896785,
|
|
"grad_norm": 0.4566735554184396,
|
|
"learning_rate": 3.271516060349114e-05,
|
|
"loss": 0.1722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17824187874794006,
|
|
"step": 1460,
|
|
"valid_targets_mean": 5024.1,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 2.478849407783418,
|
|
"grad_norm": 0.4515804240526931,
|
|
"learning_rate": 3.264991308176277e-05,
|
|
"loss": 0.152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14254231750965118,
|
|
"step": 1465,
|
|
"valid_targets_mean": 4545.5,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 2.487309644670051,
|
|
"grad_norm": 0.4197426739156812,
|
|
"learning_rate": 3.2584440374453224e-05,
|
|
"loss": 0.1589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14782416820526123,
|
|
"step": 1470,
|
|
"valid_targets_mean": 5700.8,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 2.495769881556684,
|
|
"grad_norm": 0.519132560015327,
|
|
"learning_rate": 3.251874364706534e-05,
|
|
"loss": 0.165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15622010827064514,
|
|
"step": 1475,
|
|
"valid_targets_mean": 4514.0,
|
|
"valid_targets_min": 1964
|
|
},
|
|
{
|
|
"epoch": 2.504230118443316,
|
|
"grad_norm": 0.41874027418735116,
|
|
"learning_rate": 3.245282406908982e-05,
|
|
"loss": 0.1543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1485244482755661,
|
|
"step": 1480,
|
|
"valid_targets_mean": 5170.4,
|
|
"valid_targets_min": 339
|
|
},
|
|
{
|
|
"epoch": 2.512690355329949,
|
|
"grad_norm": 0.5319462605076105,
|
|
"learning_rate": 3.2386682813984415e-05,
|
|
"loss": 0.1608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16047444939613342,
|
|
"step": 1485,
|
|
"valid_targets_mean": 5305.9,
|
|
"valid_targets_min": 1078
|
|
},
|
|
{
|
|
"epoch": 2.521150592216582,
|
|
"grad_norm": 0.4422066534793984,
|
|
"learning_rate": 3.232032105915301e-05,
|
|
"loss": 0.1599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15670092403888702,
|
|
"step": 1490,
|
|
"valid_targets_mean": 4885.6,
|
|
"valid_targets_min": 1362
|
|
},
|
|
{
|
|
"epoch": 2.529610829103215,
|
|
"grad_norm": 0.5353046402901728,
|
|
"learning_rate": 3.225373998592471e-05,
|
|
"loss": 0.1566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1738118678331375,
|
|
"step": 1495,
|
|
"valid_targets_mean": 4288.8,
|
|
"valid_targets_min": 631
|
|
},
|
|
{
|
|
"epoch": 2.5380710659898478,
|
|
"grad_norm": 0.42462616772811707,
|
|
"learning_rate": 3.2186940779532774e-05,
|
|
"loss": 0.1569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15645335614681244,
|
|
"step": 1500,
|
|
"valid_targets_mean": 5540.3,
|
|
"valid_targets_min": 2092
|
|
},
|
|
{
|
|
"epoch": 2.5465313028764807,
|
|
"grad_norm": 0.5196187140574408,
|
|
"learning_rate": 3.21199246290935e-05,
|
|
"loss": 0.1574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15462103486061096,
|
|
"step": 1505,
|
|
"valid_targets_mean": 3787.6,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 2.5549915397631136,
|
|
"grad_norm": 0.5203143821814524,
|
|
"learning_rate": 3.205269272758513e-05,
|
|
"loss": 0.1586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15906576812267303,
|
|
"step": 1510,
|
|
"valid_targets_mean": 4676.1,
|
|
"valid_targets_min": 653
|
|
},
|
|
{
|
|
"epoch": 2.563451776649746,
|
|
"grad_norm": 0.4927952478362897,
|
|
"learning_rate": 3.198524627182653e-05,
|
|
"loss": 0.1631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.190546914935112,
|
|
"step": 1515,
|
|
"valid_targets_mean": 5240.3,
|
|
"valid_targets_min": 1399
|
|
},
|
|
{
|
|
"epoch": 2.571912013536379,
|
|
"grad_norm": 0.4817788580694931,
|
|
"learning_rate": 3.191758646245596e-05,
|
|
"loss": 0.1586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15032008290290833,
|
|
"step": 1520,
|
|
"valid_targets_mean": 4996.0,
|
|
"valid_targets_min": 2390
|
|
},
|
|
{
|
|
"epoch": 2.5803722504230118,
|
|
"grad_norm": 0.5206450778515485,
|
|
"learning_rate": 3.184971450390961e-05,
|
|
"loss": 0.1609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1696622371673584,
|
|
"step": 1525,
|
|
"valid_targets_mean": 4581.5,
|
|
"valid_targets_min": 791
|
|
},
|
|
{
|
|
"epoch": 2.5888324873096447,
|
|
"grad_norm": 0.5327414483315036,
|
|
"learning_rate": 3.178163160440027e-05,
|
|
"loss": 0.1612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15347623825073242,
|
|
"step": 1530,
|
|
"valid_targets_mean": 4547.4,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 2.5972927241962775,
|
|
"grad_norm": 0.5446671418589988,
|
|
"learning_rate": 3.171333897589572e-05,
|
|
"loss": 0.1652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16811826825141907,
|
|
"step": 1535,
|
|
"valid_targets_mean": 3864.3,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 2.6057529610829104,
|
|
"grad_norm": 0.4441931794450234,
|
|
"learning_rate": 3.164483783409722e-05,
|
|
"loss": 0.1493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14580821990966797,
|
|
"step": 1540,
|
|
"valid_targets_mean": 5266.6,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 2.6142131979695433,
|
|
"grad_norm": 0.5194674748570998,
|
|
"learning_rate": 3.157612939841785e-05,
|
|
"loss": 0.1713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1813579797744751,
|
|
"step": 1545,
|
|
"valid_targets_mean": 4871.1,
|
|
"valid_targets_min": 861
|
|
},
|
|
{
|
|
"epoch": 2.6226734348561758,
|
|
"grad_norm": 0.4615056486597965,
|
|
"learning_rate": 3.1507214891960795e-05,
|
|
"loss": 0.1626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15680788457393646,
|
|
"step": 1550,
|
|
"valid_targets_mean": 5749.0,
|
|
"valid_targets_min": 1752
|
|
},
|
|
{
|
|
"epoch": 2.6311336717428087,
|
|
"grad_norm": 0.48448303917353747,
|
|
"learning_rate": 3.143809554149758e-05,
|
|
"loss": 0.1531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1652015745639801,
|
|
"step": 1555,
|
|
"valid_targets_mean": 4527.9,
|
|
"valid_targets_min": 1859
|
|
},
|
|
{
|
|
"epoch": 2.6395939086294415,
|
|
"grad_norm": 0.4745940322071671,
|
|
"learning_rate": 3.136877257744624e-05,
|
|
"loss": 0.1564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15696007013320923,
|
|
"step": 1560,
|
|
"valid_targets_mean": 4827.1,
|
|
"valid_targets_min": 338
|
|
},
|
|
{
|
|
"epoch": 2.6480541455160744,
|
|
"grad_norm": 0.47029163251653167,
|
|
"learning_rate": 3.12992472338494e-05,
|
|
"loss": 0.1642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16431154310703278,
|
|
"step": 1565,
|
|
"valid_targets_mean": 5540.9,
|
|
"valid_targets_min": 1588
|
|
},
|
|
{
|
|
"epoch": 2.6565143824027073,
|
|
"grad_norm": 0.46576483623439563,
|
|
"learning_rate": 3.1229520748352306e-05,
|
|
"loss": 0.1529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1618572473526001,
|
|
"step": 1570,
|
|
"valid_targets_mean": 5083.0,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 2.66497461928934,
|
|
"grad_norm": 0.4097292049098021,
|
|
"learning_rate": 3.1159594362180815e-05,
|
|
"loss": 0.1717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15929672122001648,
|
|
"step": 1575,
|
|
"valid_targets_mean": 5883.1,
|
|
"valid_targets_min": 1596
|
|
},
|
|
{
|
|
"epoch": 2.673434856175973,
|
|
"grad_norm": 0.428841605344436,
|
|
"learning_rate": 3.108946932011929e-05,
|
|
"loss": 0.1566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15604759752750397,
|
|
"step": 1580,
|
|
"valid_targets_mean": 5249.9,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 2.6818950930626055,
|
|
"grad_norm": 0.8398743556425554,
|
|
"learning_rate": 3.101914687048843e-05,
|
|
"loss": 0.1542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15508058667182922,
|
|
"step": 1585,
|
|
"valid_targets_mean": 5358.6,
|
|
"valid_targets_min": 1829
|
|
},
|
|
{
|
|
"epoch": 2.6903553299492384,
|
|
"grad_norm": 0.47135710948959575,
|
|
"learning_rate": 3.094862826512304e-05,
|
|
"loss": 0.1577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14281907677650452,
|
|
"step": 1590,
|
|
"valid_targets_mean": 4444.8,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 2.6988155668358713,
|
|
"grad_norm": 0.5591016014523688,
|
|
"learning_rate": 3.08779147593498e-05,
|
|
"loss": 0.1531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14947618544101715,
|
|
"step": 1595,
|
|
"valid_targets_mean": 4639.8,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 2.707275803722504,
|
|
"grad_norm": 0.4656811276701209,
|
|
"learning_rate": 3.0807007611964856e-05,
|
|
"loss": 0.163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1678958684206009,
|
|
"step": 1600,
|
|
"valid_targets_mean": 4876.5,
|
|
"valid_targets_min": 1530
|
|
},
|
|
{
|
|
"epoch": 2.715736040609137,
|
|
"grad_norm": 0.503471145562384,
|
|
"learning_rate": 3.073590808521144e-05,
|
|
"loss": 0.154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15936635434627533,
|
|
"step": 1605,
|
|
"valid_targets_mean": 5706.6,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 2.72419627749577,
|
|
"grad_norm": 0.40803651312929234,
|
|
"learning_rate": 3.066461744475739e-05,
|
|
"loss": 0.1509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14386966824531555,
|
|
"step": 1610,
|
|
"valid_targets_mean": 5556.6,
|
|
"valid_targets_min": 2600
|
|
},
|
|
{
|
|
"epoch": 2.732656514382403,
|
|
"grad_norm": 0.4546766343835229,
|
|
"learning_rate": 3.059313695967265e-05,
|
|
"loss": 0.1565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16490095853805542,
|
|
"step": 1615,
|
|
"valid_targets_mean": 4873.2,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 2.7411167512690353,
|
|
"grad_norm": 0.5023169367381617,
|
|
"learning_rate": 3.052146790240663e-05,
|
|
"loss": 0.1544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16127008199691772,
|
|
"step": 1620,
|
|
"valid_targets_mean": 4445.9,
|
|
"valid_targets_min": 836
|
|
},
|
|
{
|
|
"epoch": 2.749576988155668,
|
|
"grad_norm": 0.588420303846056,
|
|
"learning_rate": 3.0449611548765607e-05,
|
|
"loss": 0.1555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15663179755210876,
|
|
"step": 1625,
|
|
"valid_targets_mean": 4741.3,
|
|
"valid_targets_min": 861
|
|
},
|
|
{
|
|
"epoch": 2.758037225042301,
|
|
"grad_norm": 0.5007663614491175,
|
|
"learning_rate": 3.0377569177889943e-05,
|
|
"loss": 0.1651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16994991898536682,
|
|
"step": 1630,
|
|
"valid_targets_mean": 5396.6,
|
|
"valid_targets_min": 413
|
|
},
|
|
{
|
|
"epoch": 2.766497461928934,
|
|
"grad_norm": 0.5322331247472983,
|
|
"learning_rate": 3.0305342072231413e-05,
|
|
"loss": 0.1571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1510058045387268,
|
|
"step": 1635,
|
|
"valid_targets_mean": 5534.1,
|
|
"valid_targets_min": 354
|
|
},
|
|
{
|
|
"epoch": 2.774957698815567,
|
|
"grad_norm": 0.4813109292393679,
|
|
"learning_rate": 3.0232931517530285e-05,
|
|
"loss": 0.1552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16733938455581665,
|
|
"step": 1640,
|
|
"valid_targets_mean": 5360.2,
|
|
"valid_targets_min": 1802
|
|
},
|
|
{
|
|
"epoch": 2.7834179357021998,
|
|
"grad_norm": 0.3924333567466101,
|
|
"learning_rate": 3.016033880279248e-05,
|
|
"loss": 0.1487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14490291476249695,
|
|
"step": 1645,
|
|
"valid_targets_mean": 6093.1,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 2.7918781725888326,
|
|
"grad_norm": 0.4836244481309287,
|
|
"learning_rate": 3.0087565220266612e-05,
|
|
"loss": 0.1441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16277191042900085,
|
|
"step": 1650,
|
|
"valid_targets_mean": 5388.1,
|
|
"valid_targets_min": 2853
|
|
},
|
|
{
|
|
"epoch": 2.800338409475465,
|
|
"grad_norm": 0.47452798918282857,
|
|
"learning_rate": 3.001461206542098e-05,
|
|
"loss": 0.1548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14367181062698364,
|
|
"step": 1655,
|
|
"valid_targets_mean": 5332.6,
|
|
"valid_targets_min": 1904
|
|
},
|
|
{
|
|
"epoch": 2.808798646362098,
|
|
"grad_norm": 0.4590702057091949,
|
|
"learning_rate": 2.9941480636920543e-05,
|
|
"loss": 0.1588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13985106348991394,
|
|
"step": 1660,
|
|
"valid_targets_mean": 4860.8,
|
|
"valid_targets_min": 1234
|
|
},
|
|
{
|
|
"epoch": 2.817258883248731,
|
|
"grad_norm": 0.5249408169092435,
|
|
"learning_rate": 2.9868172236603726e-05,
|
|
"loss": 0.1578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16879092156887054,
|
|
"step": 1665,
|
|
"valid_targets_mean": 4939.3,
|
|
"valid_targets_min": 300
|
|
},
|
|
{
|
|
"epoch": 2.8257191201353637,
|
|
"grad_norm": 0.41127142343692735,
|
|
"learning_rate": 2.9794688169459327e-05,
|
|
"loss": 0.1535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1544240117073059,
|
|
"step": 1670,
|
|
"valid_targets_mean": 5823.6,
|
|
"valid_targets_min": 312
|
|
},
|
|
{
|
|
"epoch": 2.8341793570219966,
|
|
"grad_norm": 0.4708383589499081,
|
|
"learning_rate": 2.972102974360324e-05,
|
|
"loss": 0.1516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1473483443260193,
|
|
"step": 1675,
|
|
"valid_targets_mean": 4595.3,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 2.8426395939086295,
|
|
"grad_norm": 0.4953875805477632,
|
|
"learning_rate": 2.9647198270255183e-05,
|
|
"loss": 0.1556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1636551022529602,
|
|
"step": 1680,
|
|
"valid_targets_mean": 4507.3,
|
|
"valid_targets_min": 1855
|
|
},
|
|
{
|
|
"epoch": 2.8510998307952624,
|
|
"grad_norm": 0.4371656399998726,
|
|
"learning_rate": 2.9573195063715343e-05,
|
|
"loss": 0.1637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15377205610275269,
|
|
"step": 1685,
|
|
"valid_targets_mean": 5378.6,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 2.859560067681895,
|
|
"grad_norm": 0.4725284308990612,
|
|
"learning_rate": 2.9499021441341012e-05,
|
|
"loss": 0.1552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15661120414733887,
|
|
"step": 1690,
|
|
"valid_targets_mean": 5546.1,
|
|
"valid_targets_min": 1369
|
|
},
|
|
{
|
|
"epoch": 2.868020304568528,
|
|
"grad_norm": 0.43020093910830515,
|
|
"learning_rate": 2.9424678723523085e-05,
|
|
"loss": 0.1649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15594634413719177,
|
|
"step": 1695,
|
|
"valid_targets_mean": 5153.7,
|
|
"valid_targets_min": 1260
|
|
},
|
|
{
|
|
"epoch": 2.8764805414551606,
|
|
"grad_norm": 0.45438777570782185,
|
|
"learning_rate": 2.9350168233662612e-05,
|
|
"loss": 0.1573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17989277839660645,
|
|
"step": 1700,
|
|
"valid_targets_mean": 5074.8,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 2.8849407783417935,
|
|
"grad_norm": 0.45448336515941723,
|
|
"learning_rate": 2.9275491298147205e-05,
|
|
"loss": 0.1668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1612529456615448,
|
|
"step": 1705,
|
|
"valid_targets_mean": 5661.2,
|
|
"valid_targets_min": 1436
|
|
},
|
|
{
|
|
"epoch": 2.8934010152284264,
|
|
"grad_norm": 0.5535914332084444,
|
|
"learning_rate": 2.920064924632742e-05,
|
|
"loss": 0.1681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20866410434246063,
|
|
"step": 1710,
|
|
"valid_targets_mean": 3731.2,
|
|
"valid_targets_min": 372
|
|
},
|
|
{
|
|
"epoch": 2.9018612521150593,
|
|
"grad_norm": 0.5013715414322812,
|
|
"learning_rate": 2.9125643410493127e-05,
|
|
"loss": 0.1635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18296000361442566,
|
|
"step": 1715,
|
|
"valid_targets_mean": 4993.7,
|
|
"valid_targets_min": 1118
|
|
},
|
|
{
|
|
"epoch": 2.910321489001692,
|
|
"grad_norm": 0.4953445658022114,
|
|
"learning_rate": 2.9050475125849757e-05,
|
|
"loss": 0.1624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1697656661272049,
|
|
"step": 1720,
|
|
"valid_targets_mean": 4565.5,
|
|
"valid_targets_min": 1048
|
|
},
|
|
{
|
|
"epoch": 2.9187817258883246,
|
|
"grad_norm": 0.48938562462741003,
|
|
"learning_rate": 2.8975145730494562e-05,
|
|
"loss": 0.1514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15301690995693207,
|
|
"step": 1725,
|
|
"valid_targets_mean": 5422.2,
|
|
"valid_targets_min": 346
|
|
},
|
|
{
|
|
"epoch": 2.927241962774958,
|
|
"grad_norm": 0.4976865295645525,
|
|
"learning_rate": 2.889965656539277e-05,
|
|
"loss": 0.1567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16401198506355286,
|
|
"step": 1730,
|
|
"valid_targets_mean": 5847.1,
|
|
"valid_targets_min": 2115
|
|
},
|
|
{
|
|
"epoch": 2.9357021996615904,
|
|
"grad_norm": 0.5377996148626777,
|
|
"learning_rate": 2.8824008974353745e-05,
|
|
"loss": 0.1571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1667400300502777,
|
|
"step": 1735,
|
|
"valid_targets_mean": 4009.6,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 2.9441624365482233,
|
|
"grad_norm": 0.46734280029079045,
|
|
"learning_rate": 2.874820430400702e-05,
|
|
"loss": 0.1475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15590521693229675,
|
|
"step": 1740,
|
|
"valid_targets_mean": 4965.5,
|
|
"valid_targets_min": 491
|
|
},
|
|
{
|
|
"epoch": 2.952622673434856,
|
|
"grad_norm": 0.48170984485486423,
|
|
"learning_rate": 2.8672243903778385e-05,
|
|
"loss": 0.157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15455186367034912,
|
|
"step": 1745,
|
|
"valid_targets_mean": 4198.9,
|
|
"valid_targets_min": 631
|
|
},
|
|
{
|
|
"epoch": 2.961082910321489,
|
|
"grad_norm": 0.49096268058066084,
|
|
"learning_rate": 2.859612912586581e-05,
|
|
"loss": 0.1534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16107434034347534,
|
|
"step": 1750,
|
|
"valid_targets_mean": 4709.2,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 2.969543147208122,
|
|
"grad_norm": 0.4167285084153793,
|
|
"learning_rate": 2.8519861325215408e-05,
|
|
"loss": 0.1652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1771593987941742,
|
|
"step": 1755,
|
|
"valid_targets_mean": 6754.7,
|
|
"valid_targets_min": 3214
|
|
},
|
|
{
|
|
"epoch": 2.9780033840947544,
|
|
"grad_norm": 0.45288360274937667,
|
|
"learning_rate": 2.8443441859497293e-05,
|
|
"loss": 0.1553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1530476063489914,
|
|
"step": 1760,
|
|
"valid_targets_mean": 5220.6,
|
|
"valid_targets_min": 1790
|
|
},
|
|
{
|
|
"epoch": 2.9864636209813877,
|
|
"grad_norm": 0.42408020266101604,
|
|
"learning_rate": 2.8366872089081426e-05,
|
|
"loss": 0.1571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13717344403266907,
|
|
"step": 1765,
|
|
"valid_targets_mean": 5217.3,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 2.99492385786802,
|
|
"grad_norm": 0.5159903532951695,
|
|
"learning_rate": 2.8290153377013406e-05,
|
|
"loss": 0.1637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19514843821525574,
|
|
"step": 1770,
|
|
"valid_targets_mean": 4551.5,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 3.003384094754653,
|
|
"grad_norm": 0.4420723449521131,
|
|
"learning_rate": 2.8213287088990184e-05,
|
|
"loss": 0.1531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15232232213020325,
|
|
"step": 1775,
|
|
"valid_targets_mean": 5349.9,
|
|
"valid_targets_min": 760
|
|
},
|
|
{
|
|
"epoch": 3.011844331641286,
|
|
"grad_norm": 0.5116220893657077,
|
|
"learning_rate": 2.8136274593335767e-05,
|
|
"loss": 0.1407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11911862343549728,
|
|
"step": 1780,
|
|
"valid_targets_mean": 5551.9,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 3.020304568527919,
|
|
"grad_norm": 0.4113239399201233,
|
|
"learning_rate": 2.8059117260976848e-05,
|
|
"loss": 0.1394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12352170795202255,
|
|
"step": 1785,
|
|
"valid_targets_mean": 5401.5,
|
|
"valid_targets_min": 310
|
|
},
|
|
{
|
|
"epoch": 3.0287648054145517,
|
|
"grad_norm": 0.5064832828121603,
|
|
"learning_rate": 2.7981816465418425e-05,
|
|
"loss": 0.1491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15855196118354797,
|
|
"step": 1790,
|
|
"valid_targets_mean": 4811.9,
|
|
"valid_targets_min": 1916
|
|
},
|
|
{
|
|
"epoch": 3.0372250423011846,
|
|
"grad_norm": 0.5331900860425771,
|
|
"learning_rate": 2.790437358271932e-05,
|
|
"loss": 0.1436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1453004777431488,
|
|
"step": 1795,
|
|
"valid_targets_mean": 4946.8,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 3.045685279187817,
|
|
"grad_norm": 0.4898183986678095,
|
|
"learning_rate": 2.7826789991467707e-05,
|
|
"loss": 0.1433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13452716171741486,
|
|
"step": 1800,
|
|
"valid_targets_mean": 4563.1,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 3.05414551607445,
|
|
"grad_norm": 0.5046367333644931,
|
|
"learning_rate": 2.7749067072756567e-05,
|
|
"loss": 0.1369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1186387687921524,
|
|
"step": 1805,
|
|
"valid_targets_mean": 4541.8,
|
|
"valid_targets_min": 332
|
|
},
|
|
{
|
|
"epoch": 3.062605752961083,
|
|
"grad_norm": 0.49914204598820533,
|
|
"learning_rate": 2.767120621015908e-05,
|
|
"loss": 0.1401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13256406784057617,
|
|
"step": 1810,
|
|
"valid_targets_mean": 4427.9,
|
|
"valid_targets_min": 823
|
|
},
|
|
{
|
|
"epoch": 3.0710659898477157,
|
|
"grad_norm": 0.4746853379724344,
|
|
"learning_rate": 2.7593208789704037e-05,
|
|
"loss": 0.1315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13574594259262085,
|
|
"step": 1815,
|
|
"valid_targets_mean": 4824.0,
|
|
"valid_targets_min": 2212
|
|
},
|
|
{
|
|
"epoch": 3.0795262267343486,
|
|
"grad_norm": 0.4768440214667582,
|
|
"learning_rate": 2.7515076199851127e-05,
|
|
"loss": 0.1217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1262766420841217,
|
|
"step": 1820,
|
|
"valid_targets_mean": 5187.0,
|
|
"valid_targets_min": 367
|
|
},
|
|
{
|
|
"epoch": 3.0879864636209815,
|
|
"grad_norm": 0.49743501352979713,
|
|
"learning_rate": 2.743680983146624e-05,
|
|
"loss": 0.1373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14144861698150635,
|
|
"step": 1825,
|
|
"valid_targets_mean": 4679.8,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 3.0964467005076144,
|
|
"grad_norm": 0.5126807374285746,
|
|
"learning_rate": 2.735841107779671e-05,
|
|
"loss": 0.1454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13649937510490417,
|
|
"step": 1830,
|
|
"valid_targets_mean": 4321.4,
|
|
"valid_targets_min": 300
|
|
},
|
|
{
|
|
"epoch": 3.104906937394247,
|
|
"grad_norm": 0.44685699972509113,
|
|
"learning_rate": 2.7279881334446508e-05,
|
|
"loss": 0.1482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15724554657936096,
|
|
"step": 1835,
|
|
"valid_targets_mean": 5814.1,
|
|
"valid_targets_min": 2477
|
|
},
|
|
{
|
|
"epoch": 3.1133671742808797,
|
|
"grad_norm": 0.5321519466654511,
|
|
"learning_rate": 2.7201221999351406e-05,
|
|
"loss": 0.1391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13165681064128876,
|
|
"step": 1840,
|
|
"valid_targets_mean": 5124.4,
|
|
"valid_targets_min": 241
|
|
},
|
|
{
|
|
"epoch": 3.1218274111675126,
|
|
"grad_norm": 0.48498321839546954,
|
|
"learning_rate": 2.7122434472754058e-05,
|
|
"loss": 0.137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13917747139930725,
|
|
"step": 1845,
|
|
"valid_targets_mean": 5066.8,
|
|
"valid_targets_min": 2040
|
|
},
|
|
{
|
|
"epoch": 3.1302876480541455,
|
|
"grad_norm": 0.42767134499991216,
|
|
"learning_rate": 2.704352015717913e-05,
|
|
"loss": 0.124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11487990617752075,
|
|
"step": 1850,
|
|
"valid_targets_mean": 5800.7,
|
|
"valid_targets_min": 1117
|
|
},
|
|
{
|
|
"epoch": 3.1387478849407784,
|
|
"grad_norm": 0.4572224669358456,
|
|
"learning_rate": 2.6964480457408286e-05,
|
|
"loss": 0.1368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12481475621461868,
|
|
"step": 1855,
|
|
"valid_targets_mean": 5419.2,
|
|
"valid_targets_min": 1380
|
|
},
|
|
{
|
|
"epoch": 3.1472081218274113,
|
|
"grad_norm": 0.48662962848726266,
|
|
"learning_rate": 2.6885316780455208e-05,
|
|
"loss": 0.1385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13712885975837708,
|
|
"step": 1860,
|
|
"valid_targets_mean": 5268.6,
|
|
"valid_targets_min": 2790
|
|
},
|
|
{
|
|
"epoch": 3.155668358714044,
|
|
"grad_norm": 0.5683414150256999,
|
|
"learning_rate": 2.6806030535540527e-05,
|
|
"loss": 0.1499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15892931818962097,
|
|
"step": 1865,
|
|
"valid_targets_mean": 4186.8,
|
|
"valid_targets_min": 337
|
|
},
|
|
{
|
|
"epoch": 3.164128595600677,
|
|
"grad_norm": 0.4730723850712263,
|
|
"learning_rate": 2.6726623134066768e-05,
|
|
"loss": 0.1375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15399762988090515,
|
|
"step": 1870,
|
|
"valid_targets_mean": 5436.8,
|
|
"valid_targets_min": 292
|
|
},
|
|
{
|
|
"epoch": 3.1725888324873095,
|
|
"grad_norm": 0.4906999099489448,
|
|
"learning_rate": 2.6647095989593194e-05,
|
|
"loss": 0.1271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12929117679595947,
|
|
"step": 1875,
|
|
"valid_targets_mean": 4812.7,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 3.1810490693739424,
|
|
"grad_norm": 0.47148731590299137,
|
|
"learning_rate": 2.6567450517810657e-05,
|
|
"loss": 0.142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1394195705652237,
|
|
"step": 1880,
|
|
"valid_targets_mean": 5537.2,
|
|
"valid_targets_min": 312
|
|
},
|
|
{
|
|
"epoch": 3.1895093062605753,
|
|
"grad_norm": 0.4421266686486959,
|
|
"learning_rate": 2.64876881365164e-05,
|
|
"loss": 0.135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13396847248077393,
|
|
"step": 1885,
|
|
"valid_targets_mean": 5977.9,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 3.197969543147208,
|
|
"grad_norm": 0.44404638890811393,
|
|
"learning_rate": 2.6407810265588806e-05,
|
|
"loss": 0.1354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13543297350406647,
|
|
"step": 1890,
|
|
"valid_targets_mean": 6234.9,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 3.206429780033841,
|
|
"grad_norm": 0.5715461852108062,
|
|
"learning_rate": 2.632781832696214e-05,
|
|
"loss": 0.1279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14457839727401733,
|
|
"step": 1895,
|
|
"valid_targets_mean": 4201.9,
|
|
"valid_targets_min": 1137
|
|
},
|
|
{
|
|
"epoch": 3.214890016920474,
|
|
"grad_norm": 0.4983355321351684,
|
|
"learning_rate": 2.6247713744601214e-05,
|
|
"loss": 0.1355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15222328901290894,
|
|
"step": 1900,
|
|
"valid_targets_mean": 4955.2,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 3.223350253807107,
|
|
"grad_norm": 0.5253993796445995,
|
|
"learning_rate": 2.6167497944476056e-05,
|
|
"loss": 0.1493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17441003024578094,
|
|
"step": 1905,
|
|
"valid_targets_mean": 4202.4,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 3.2318104906937393,
|
|
"grad_norm": 0.5028020028850634,
|
|
"learning_rate": 2.6087172354536536e-05,
|
|
"loss": 0.1356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1370972990989685,
|
|
"step": 1910,
|
|
"valid_targets_mean": 4758.9,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 3.240270727580372,
|
|
"grad_norm": 0.44029838413486516,
|
|
"learning_rate": 2.6006738404686908e-05,
|
|
"loss": 0.1488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1251326948404312,
|
|
"step": 1915,
|
|
"valid_targets_mean": 5456.8,
|
|
"valid_targets_min": 2287
|
|
},
|
|
{
|
|
"epoch": 3.248730964467005,
|
|
"grad_norm": 0.4751826929870349,
|
|
"learning_rate": 2.592619752676039e-05,
|
|
"loss": 0.1393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16267669200897217,
|
|
"step": 1920,
|
|
"valid_targets_mean": 5672.6,
|
|
"valid_targets_min": 1868
|
|
},
|
|
{
|
|
"epoch": 3.257191201353638,
|
|
"grad_norm": 0.4202586065675278,
|
|
"learning_rate": 2.5845551154493668e-05,
|
|
"loss": 0.1377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1344599574804306,
|
|
"step": 1925,
|
|
"valid_targets_mean": 6683.9,
|
|
"valid_targets_min": 977
|
|
},
|
|
{
|
|
"epoch": 3.265651438240271,
|
|
"grad_norm": 0.4604806922435822,
|
|
"learning_rate": 2.576480072350135e-05,
|
|
"loss": 0.1341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11901302635669708,
|
|
"step": 1930,
|
|
"valid_targets_mean": 4499.6,
|
|
"valid_targets_min": 356
|
|
},
|
|
{
|
|
"epoch": 3.2741116751269037,
|
|
"grad_norm": 0.499436455119448,
|
|
"learning_rate": 2.5683947671250465e-05,
|
|
"loss": 0.147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14189772307872772,
|
|
"step": 1935,
|
|
"valid_targets_mean": 5042.9,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 3.2825719120135366,
|
|
"grad_norm": 0.47832416595410066,
|
|
"learning_rate": 2.5602993437034814e-05,
|
|
"loss": 0.1227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11368954181671143,
|
|
"step": 1940,
|
|
"valid_targets_mean": 4791.8,
|
|
"valid_targets_min": 2304
|
|
},
|
|
{
|
|
"epoch": 3.291032148900169,
|
|
"grad_norm": 0.4552328895260579,
|
|
"learning_rate": 2.5521939461949384e-05,
|
|
"loss": 0.1422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13419240713119507,
|
|
"step": 1945,
|
|
"valid_targets_mean": 4862.1,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 3.299492385786802,
|
|
"grad_norm": 0.45606634000885626,
|
|
"learning_rate": 2.5440787188864667e-05,
|
|
"loss": 0.1292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12509828805923462,
|
|
"step": 1950,
|
|
"valid_targets_mean": 5235.2,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 3.307952622673435,
|
|
"grad_norm": 0.4955270069523444,
|
|
"learning_rate": 2.5359538062401016e-05,
|
|
"loss": 0.1234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13792753219604492,
|
|
"step": 1955,
|
|
"valid_targets_mean": 4246.7,
|
|
"valid_targets_min": 2047
|
|
},
|
|
{
|
|
"epoch": 3.3164128595600677,
|
|
"grad_norm": 0.4939975716309515,
|
|
"learning_rate": 2.5278193528902902e-05,
|
|
"loss": 0.1411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1382458508014679,
|
|
"step": 1960,
|
|
"valid_targets_mean": 4742.6,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 3.3248730964467006,
|
|
"grad_norm": 0.44798906149918344,
|
|
"learning_rate": 2.5196755036413153e-05,
|
|
"loss": 0.1367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13484597206115723,
|
|
"step": 1965,
|
|
"valid_targets_mean": 5432.9,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 3.3333333333333335,
|
|
"grad_norm": 0.5175524411244139,
|
|
"learning_rate": 2.5115224034647207e-05,
|
|
"loss": 0.1344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13900785148143768,
|
|
"step": 1970,
|
|
"valid_targets_mean": 5206.4,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 3.3417935702199664,
|
|
"grad_norm": 0.43812178995985585,
|
|
"learning_rate": 2.5033601974967302e-05,
|
|
"loss": 0.1325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10131923854351044,
|
|
"step": 1975,
|
|
"valid_targets_mean": 4715.1,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 3.350253807106599,
|
|
"grad_norm": 0.6419217947846552,
|
|
"learning_rate": 2.4951890310356602e-05,
|
|
"loss": 0.1424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1283770054578781,
|
|
"step": 1980,
|
|
"valid_targets_mean": 3841.7,
|
|
"valid_targets_min": 465
|
|
},
|
|
{
|
|
"epoch": 3.3587140439932317,
|
|
"grad_norm": 0.46782174558639844,
|
|
"learning_rate": 2.4870090495393386e-05,
|
|
"loss": 0.1395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1442975252866745,
|
|
"step": 1985,
|
|
"valid_targets_mean": 5235.8,
|
|
"valid_targets_min": 1237
|
|
},
|
|
{
|
|
"epoch": 3.3671742808798646,
|
|
"grad_norm": 0.4333200024710141,
|
|
"learning_rate": 2.4788203986225115e-05,
|
|
"loss": 0.1367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13630858063697815,
|
|
"step": 1990,
|
|
"valid_targets_mean": 5427.7,
|
|
"valid_targets_min": 586
|
|
},
|
|
{
|
|
"epoch": 3.3756345177664975,
|
|
"grad_norm": 0.4599924703555542,
|
|
"learning_rate": 2.4706232240542532e-05,
|
|
"loss": 0.1304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1525718867778778,
|
|
"step": 1995,
|
|
"valid_targets_mean": 5828.1,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 3.3840947546531304,
|
|
"grad_norm": 0.45305962341259265,
|
|
"learning_rate": 2.4624176717553704e-05,
|
|
"loss": 0.1358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11804615706205368,
|
|
"step": 2000,
|
|
"valid_targets_mean": 4663.2,
|
|
"valid_targets_min": 2879
|
|
},
|
|
{
|
|
"epoch": 3.3925549915397633,
|
|
"grad_norm": 0.44307148304977667,
|
|
"learning_rate": 2.4542038877958046e-05,
|
|
"loss": 0.1342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1325203776359558,
|
|
"step": 2005,
|
|
"valid_targets_mean": 5747.2,
|
|
"valid_targets_min": 381
|
|
},
|
|
{
|
|
"epoch": 3.401015228426396,
|
|
"grad_norm": 0.42800799155663205,
|
|
"learning_rate": 2.445982018392032e-05,
|
|
"loss": 0.1339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1213143840432167,
|
|
"step": 2010,
|
|
"valid_targets_mean": 5969.4,
|
|
"valid_targets_min": 2512
|
|
},
|
|
{
|
|
"epoch": 3.4094754653130286,
|
|
"grad_norm": 0.4380602733849998,
|
|
"learning_rate": 2.437752209904459e-05,
|
|
"loss": 0.1301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12374117225408554,
|
|
"step": 2015,
|
|
"valid_targets_mean": 5541.8,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 3.4179357021996615,
|
|
"grad_norm": 0.47745340566019395,
|
|
"learning_rate": 2.4295146088348226e-05,
|
|
"loss": 0.1283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11294626444578171,
|
|
"step": 2020,
|
|
"valid_targets_mean": 4372.5,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 3.4263959390862944,
|
|
"grad_norm": 0.4421330749700307,
|
|
"learning_rate": 2.4212693618235742e-05,
|
|
"loss": 0.1258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12031947076320648,
|
|
"step": 2025,
|
|
"valid_targets_mean": 5296.8,
|
|
"valid_targets_min": 2598
|
|
},
|
|
{
|
|
"epoch": 3.4348561759729273,
|
|
"grad_norm": 0.4503060634226028,
|
|
"learning_rate": 2.413016615647275e-05,
|
|
"loss": 0.1453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12349995970726013,
|
|
"step": 2030,
|
|
"valid_targets_mean": 5364.5,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 3.44331641285956,
|
|
"grad_norm": 0.5096703479780633,
|
|
"learning_rate": 2.4047565172159817e-05,
|
|
"loss": 0.1339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14142143726348877,
|
|
"step": 2035,
|
|
"valid_targets_mean": 5079.9,
|
|
"valid_targets_min": 1749
|
|
},
|
|
{
|
|
"epoch": 3.451776649746193,
|
|
"grad_norm": 0.4122613990443822,
|
|
"learning_rate": 2.3964892135706316e-05,
|
|
"loss": 0.1273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10684022307395935,
|
|
"step": 2040,
|
|
"valid_targets_mean": 5627.8,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 3.460236886632826,
|
|
"grad_norm": 0.47847548842152565,
|
|
"learning_rate": 2.3882148518804215e-05,
|
|
"loss": 0.1278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12360725551843643,
|
|
"step": 2045,
|
|
"valid_targets_mean": 4422.4,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 3.4686971235194584,
|
|
"grad_norm": 0.4843211061066756,
|
|
"learning_rate": 2.3799335794401955e-05,
|
|
"loss": 0.1427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13680878281593323,
|
|
"step": 2050,
|
|
"valid_targets_mean": 4725.1,
|
|
"valid_targets_min": 550
|
|
},
|
|
{
|
|
"epoch": 3.4771573604060912,
|
|
"grad_norm": 0.47201690143953146,
|
|
"learning_rate": 2.3716455436678142e-05,
|
|
"loss": 0.1393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13971850275993347,
|
|
"step": 2055,
|
|
"valid_targets_mean": 5484.4,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 3.485617597292724,
|
|
"grad_norm": 0.520507148576224,
|
|
"learning_rate": 2.3633508921015382e-05,
|
|
"loss": 0.1437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12103571742773056,
|
|
"step": 2060,
|
|
"valid_targets_mean": 4525.1,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 3.494077834179357,
|
|
"grad_norm": 0.4826254385683438,
|
|
"learning_rate": 2.3550497723973957e-05,
|
|
"loss": 0.1386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13892921805381775,
|
|
"step": 2065,
|
|
"valid_targets_mean": 4760.8,
|
|
"valid_targets_min": 2348
|
|
},
|
|
{
|
|
"epoch": 3.50253807106599,
|
|
"grad_norm": 0.5435007594935469,
|
|
"learning_rate": 2.346742332326558e-05,
|
|
"loss": 0.1392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13652250170707703,
|
|
"step": 2070,
|
|
"valid_targets_mean": 5215.1,
|
|
"valid_targets_min": 1966
|
|
},
|
|
{
|
|
"epoch": 3.510998307952623,
|
|
"grad_norm": 0.4855585609161836,
|
|
"learning_rate": 2.338428719772706e-05,
|
|
"loss": 0.14,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12285912036895752,
|
|
"step": 2075,
|
|
"valid_targets_mean": 5098.3,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 3.5194585448392557,
|
|
"grad_norm": 0.5433031037470556,
|
|
"learning_rate": 2.3301090827294002e-05,
|
|
"loss": 0.1355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13296103477478027,
|
|
"step": 2080,
|
|
"valid_targets_mean": 5721.0,
|
|
"valid_targets_min": 2214
|
|
},
|
|
{
|
|
"epoch": 3.527918781725888,
|
|
"grad_norm": 0.4434397450836832,
|
|
"learning_rate": 2.3217835692974458e-05,
|
|
"loss": 0.1271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1097981333732605,
|
|
"step": 2085,
|
|
"valid_targets_mean": 4910.5,
|
|
"valid_targets_min": 2208
|
|
},
|
|
{
|
|
"epoch": 3.536379018612521,
|
|
"grad_norm": 0.5399741849182257,
|
|
"learning_rate": 2.3134523276822537e-05,
|
|
"loss": 0.1497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14059670269489288,
|
|
"step": 2090,
|
|
"valid_targets_mean": 4348.1,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 3.544839255499154,
|
|
"grad_norm": 0.5235459802233994,
|
|
"learning_rate": 2.3051155061912058e-05,
|
|
"loss": 0.1453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14942292869091034,
|
|
"step": 2095,
|
|
"valid_targets_mean": 4546.6,
|
|
"valid_targets_min": 1199
|
|
},
|
|
{
|
|
"epoch": 3.553299492385787,
|
|
"grad_norm": 0.495855458039366,
|
|
"learning_rate": 2.2967732532310135e-05,
|
|
"loss": 0.1227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14421787858009338,
|
|
"step": 2100,
|
|
"valid_targets_mean": 4963.6,
|
|
"valid_targets_min": 1408
|
|
},
|
|
{
|
|
"epoch": 3.5617597292724197,
|
|
"grad_norm": 0.4545487320208881,
|
|
"learning_rate": 2.288425717305075e-05,
|
|
"loss": 0.133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13516095280647278,
|
|
"step": 2105,
|
|
"valid_targets_mean": 5848.1,
|
|
"valid_targets_min": 1857
|
|
},
|
|
{
|
|
"epoch": 3.5702199661590526,
|
|
"grad_norm": 0.417066795593777,
|
|
"learning_rate": 2.280073047010832e-05,
|
|
"loss": 0.1296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11398180574178696,
|
|
"step": 2110,
|
|
"valid_targets_mean": 5640.4,
|
|
"valid_targets_min": 1008
|
|
},
|
|
{
|
|
"epoch": 3.5786802030456855,
|
|
"grad_norm": 0.500353582370106,
|
|
"learning_rate": 2.271715391037126e-05,
|
|
"loss": 0.1327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12354753911495209,
|
|
"step": 2115,
|
|
"valid_targets_mean": 4960.4,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 3.587140439932318,
|
|
"grad_norm": 0.5201037688561199,
|
|
"learning_rate": 2.2633528981615484e-05,
|
|
"loss": 0.1388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12576597929000854,
|
|
"step": 2120,
|
|
"valid_targets_mean": 4996.9,
|
|
"valid_targets_min": 2238
|
|
},
|
|
{
|
|
"epoch": 3.595600676818951,
|
|
"grad_norm": 0.4373009336253946,
|
|
"learning_rate": 2.2549857172477973e-05,
|
|
"loss": 0.1519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12523704767227173,
|
|
"step": 2125,
|
|
"valid_targets_mean": 5787.6,
|
|
"valid_targets_min": 861
|
|
},
|
|
{
|
|
"epoch": 3.6040609137055837,
|
|
"grad_norm": 0.479334573884982,
|
|
"learning_rate": 2.246613997243022e-05,
|
|
"loss": 0.156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16822044551372528,
|
|
"step": 2130,
|
|
"valid_targets_mean": 5430.7,
|
|
"valid_targets_min": 2844
|
|
},
|
|
{
|
|
"epoch": 3.6125211505922166,
|
|
"grad_norm": 0.46079870170460563,
|
|
"learning_rate": 2.2382378871751734e-05,
|
|
"loss": 0.1335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.135625422000885,
|
|
"step": 2135,
|
|
"valid_targets_mean": 5553.2,
|
|
"valid_targets_min": 1491
|
|
},
|
|
{
|
|
"epoch": 3.6209813874788495,
|
|
"grad_norm": 0.4582839345617985,
|
|
"learning_rate": 2.2298575361503526e-05,
|
|
"loss": 0.1392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14283427596092224,
|
|
"step": 2140,
|
|
"valid_targets_mean": 5928.2,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 3.6294416243654823,
|
|
"grad_norm": 0.47028611977591034,
|
|
"learning_rate": 2.2214730933501555e-05,
|
|
"loss": 0.1417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13672372698783875,
|
|
"step": 2145,
|
|
"valid_targets_mean": 5304.9,
|
|
"valid_targets_min": 479
|
|
},
|
|
{
|
|
"epoch": 3.6379018612521152,
|
|
"grad_norm": 0.4770926794366849,
|
|
"learning_rate": 2.2130847080290155e-05,
|
|
"loss": 0.1421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1376904398202896,
|
|
"step": 2150,
|
|
"valid_targets_mean": 5570.2,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 3.6463620981387477,
|
|
"grad_norm": 0.5196669157161529,
|
|
"learning_rate": 2.2046925295115504e-05,
|
|
"loss": 0.1337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.156345933675766,
|
|
"step": 2155,
|
|
"valid_targets_mean": 4431.1,
|
|
"valid_targets_min": 263
|
|
},
|
|
{
|
|
"epoch": 3.6548223350253806,
|
|
"grad_norm": 0.4453491708787737,
|
|
"learning_rate": 2.1962967071899005e-05,
|
|
"loss": 0.1293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11248328536748886,
|
|
"step": 2160,
|
|
"valid_targets_mean": 5359.1,
|
|
"valid_targets_min": 414
|
|
},
|
|
{
|
|
"epoch": 3.6632825719120135,
|
|
"grad_norm": 0.4511929100146715,
|
|
"learning_rate": 2.1878973905210722e-05,
|
|
"loss": 0.1292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11784185469150543,
|
|
"step": 2165,
|
|
"valid_targets_mean": 5286.0,
|
|
"valid_targets_min": 2518
|
|
},
|
|
{
|
|
"epoch": 3.6717428087986463,
|
|
"grad_norm": 0.48315355298362633,
|
|
"learning_rate": 2.1794947290242747e-05,
|
|
"loss": 0.1363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13470733165740967,
|
|
"step": 2170,
|
|
"valid_targets_mean": 6099.7,
|
|
"valid_targets_min": 948
|
|
},
|
|
{
|
|
"epoch": 3.6802030456852792,
|
|
"grad_norm": 0.4613759744314735,
|
|
"learning_rate": 2.171088872278259e-05,
|
|
"loss": 0.1321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10934378206729889,
|
|
"step": 2175,
|
|
"valid_targets_mean": 5320.7,
|
|
"valid_targets_min": 1728
|
|
},
|
|
{
|
|
"epoch": 3.688663282571912,
|
|
"grad_norm": 0.48872660827407277,
|
|
"learning_rate": 2.1626799699186577e-05,
|
|
"loss": 0.144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13354673981666565,
|
|
"step": 2180,
|
|
"valid_targets_mean": 5293.0,
|
|
"valid_targets_min": 485
|
|
},
|
|
{
|
|
"epoch": 3.697123519458545,
|
|
"grad_norm": 0.4373717598235291,
|
|
"learning_rate": 2.154268171635319e-05,
|
|
"loss": 0.1291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1157350093126297,
|
|
"step": 2185,
|
|
"valid_targets_mean": 6068.4,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 3.7055837563451774,
|
|
"grad_norm": 0.5032599546326298,
|
|
"learning_rate": 2.1458536271696422e-05,
|
|
"loss": 0.1411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13225588202476501,
|
|
"step": 2190,
|
|
"valid_targets_mean": 4395.2,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 3.7140439932318103,
|
|
"grad_norm": 0.5500219948166671,
|
|
"learning_rate": 2.1374364863119124e-05,
|
|
"loss": 0.1378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1534668207168579,
|
|
"step": 2195,
|
|
"valid_targets_mean": 4257.9,
|
|
"valid_targets_min": 257
|
|
},
|
|
{
|
|
"epoch": 3.7225042301184432,
|
|
"grad_norm": 0.5905876786620081,
|
|
"learning_rate": 2.1290168988986332e-05,
|
|
"loss": 0.1279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13136783242225647,
|
|
"step": 2200,
|
|
"valid_targets_mean": 4862.8,
|
|
"valid_targets_min": 2489
|
|
},
|
|
{
|
|
"epoch": 3.730964467005076,
|
|
"grad_norm": 0.5017115386747313,
|
|
"learning_rate": 2.1205950148098608e-05,
|
|
"loss": 0.1371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14224137365818024,
|
|
"step": 2205,
|
|
"valid_targets_mean": 4645.6,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 3.739424703891709,
|
|
"grad_norm": 0.5554381336293088,
|
|
"learning_rate": 2.1121709839665363e-05,
|
|
"loss": 0.1361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17350545525550842,
|
|
"step": 2210,
|
|
"valid_targets_mean": 3921.4,
|
|
"valid_targets_min": 464
|
|
},
|
|
{
|
|
"epoch": 3.747884940778342,
|
|
"grad_norm": 0.4492160673895607,
|
|
"learning_rate": 2.1037449563278147e-05,
|
|
"loss": 0.1244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1170819029211998,
|
|
"step": 2215,
|
|
"valid_targets_mean": 5969.1,
|
|
"valid_targets_min": 2570
|
|
},
|
|
{
|
|
"epoch": 3.7563451776649748,
|
|
"grad_norm": 0.5190233300452124,
|
|
"learning_rate": 2.0953170818883968e-05,
|
|
"loss": 0.1389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1592286378145218,
|
|
"step": 2220,
|
|
"valid_targets_mean": 5341.8,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 3.764805414551607,
|
|
"grad_norm": 0.7205459936063536,
|
|
"learning_rate": 2.086887510675859e-05,
|
|
"loss": 0.1302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13329637050628662,
|
|
"step": 2225,
|
|
"valid_targets_mean": 4844.2,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 3.77326565143824,
|
|
"grad_norm": 0.402247717714784,
|
|
"learning_rate": 2.0784563927479837e-05,
|
|
"loss": 0.1326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11404536664485931,
|
|
"step": 2230,
|
|
"valid_targets_mean": 6565.1,
|
|
"valid_targets_min": 2483
|
|
},
|
|
{
|
|
"epoch": 3.781725888324873,
|
|
"grad_norm": 0.49605905704017556,
|
|
"learning_rate": 2.0700238781900856e-05,
|
|
"loss": 0.1273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11886497586965561,
|
|
"step": 2235,
|
|
"valid_targets_mean": 5184.9,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 3.790186125211506,
|
|
"grad_norm": 0.48384905092072333,
|
|
"learning_rate": 2.061590117112341e-05,
|
|
"loss": 0.1446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1490713357925415,
|
|
"step": 2240,
|
|
"valid_targets_mean": 5845.1,
|
|
"valid_targets_min": 1358
|
|
},
|
|
{
|
|
"epoch": 3.7986463620981388,
|
|
"grad_norm": 0.45625631687966317,
|
|
"learning_rate": 2.0531552596471173e-05,
|
|
"loss": 0.1292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14283062517642975,
|
|
"step": 2245,
|
|
"valid_targets_mean": 5249.1,
|
|
"valid_targets_min": 525
|
|
},
|
|
{
|
|
"epoch": 3.8071065989847717,
|
|
"grad_norm": 0.5195270633096154,
|
|
"learning_rate": 2.0447194559462978e-05,
|
|
"loss": 0.1382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1424553096294403,
|
|
"step": 2250,
|
|
"valid_targets_mean": 4908.0,
|
|
"valid_targets_min": 1591
|
|
},
|
|
{
|
|
"epoch": 3.8155668358714045,
|
|
"grad_norm": 0.47333644812425013,
|
|
"learning_rate": 2.0362828561786116e-05,
|
|
"loss": 0.133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1630922108888626,
|
|
"step": 2255,
|
|
"valid_targets_mean": 5195.7,
|
|
"valid_targets_min": 802
|
|
},
|
|
{
|
|
"epoch": 3.824027072758037,
|
|
"grad_norm": 0.5140508410561418,
|
|
"learning_rate": 2.027845610526958e-05,
|
|
"loss": 0.1255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12374693155288696,
|
|
"step": 2260,
|
|
"valid_targets_mean": 4438.9,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 3.8324873096446703,
|
|
"grad_norm": 0.48323118502889284,
|
|
"learning_rate": 2.0194078691857325e-05,
|
|
"loss": 0.1443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1483040153980255,
|
|
"step": 2265,
|
|
"valid_targets_mean": 4945.2,
|
|
"valid_targets_min": 2276
|
|
},
|
|
{
|
|
"epoch": 3.8409475465313028,
|
|
"grad_norm": 0.45914993417346833,
|
|
"learning_rate": 2.010969782358157e-05,
|
|
"loss": 0.1277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12424896657466888,
|
|
"step": 2270,
|
|
"valid_targets_mean": 5445.5,
|
|
"valid_targets_min": 355
|
|
},
|
|
{
|
|
"epoch": 3.8494077834179357,
|
|
"grad_norm": 0.4711572825401971,
|
|
"learning_rate": 2.002531500253602e-05,
|
|
"loss": 0.1316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11576488614082336,
|
|
"step": 2275,
|
|
"valid_targets_mean": 4892.1,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 3.8578680203045685,
|
|
"grad_norm": 0.5211007483347162,
|
|
"learning_rate": 1.9940931730849143e-05,
|
|
"loss": 0.1359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14363281428813934,
|
|
"step": 2280,
|
|
"valid_targets_mean": 4537.1,
|
|
"valid_targets_min": 357
|
|
},
|
|
{
|
|
"epoch": 3.8663282571912014,
|
|
"grad_norm": 0.4434502989678565,
|
|
"learning_rate": 1.9856549510657447e-05,
|
|
"loss": 0.1354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12276516109704971,
|
|
"step": 2285,
|
|
"valid_targets_mean": 5147.1,
|
|
"valid_targets_min": 959
|
|
},
|
|
{
|
|
"epoch": 3.8747884940778343,
|
|
"grad_norm": 0.5302181869365117,
|
|
"learning_rate": 1.9772169844078688e-05,
|
|
"loss": 0.1401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1536346673965454,
|
|
"step": 2290,
|
|
"valid_targets_mean": 4576.6,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 3.8832487309644668,
|
|
"grad_norm": 0.4594207829548666,
|
|
"learning_rate": 1.96877942331852e-05,
|
|
"loss": 0.1262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11715570092201233,
|
|
"step": 2295,
|
|
"valid_targets_mean": 5011.7,
|
|
"valid_targets_min": 2313
|
|
},
|
|
{
|
|
"epoch": 3.8917089678511,
|
|
"grad_norm": 0.4765293330295309,
|
|
"learning_rate": 1.9603424179977095e-05,
|
|
"loss": 0.1361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1369125097990036,
|
|
"step": 2300,
|
|
"valid_targets_mean": 5119.8,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 3.9001692047377325,
|
|
"grad_norm": 0.5720619185220353,
|
|
"learning_rate": 1.9519061186355554e-05,
|
|
"loss": 0.1404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1410321295261383,
|
|
"step": 2305,
|
|
"valid_targets_mean": 5052.2,
|
|
"valid_targets_min": 367
|
|
},
|
|
{
|
|
"epoch": 3.9086294416243654,
|
|
"grad_norm": 0.50387564585181,
|
|
"learning_rate": 1.943470675409611e-05,
|
|
"loss": 0.1308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13722364604473114,
|
|
"step": 2310,
|
|
"valid_targets_mean": 4413.6,
|
|
"valid_targets_min": 1735
|
|
},
|
|
{
|
|
"epoch": 3.9170896785109983,
|
|
"grad_norm": 0.4689184284495773,
|
|
"learning_rate": 1.9350362384821857e-05,
|
|
"loss": 0.1329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14235243201255798,
|
|
"step": 2315,
|
|
"valid_targets_mean": 5515.7,
|
|
"valid_targets_min": 2991
|
|
},
|
|
{
|
|
"epoch": 3.925549915397631,
|
|
"grad_norm": 0.4503232145936985,
|
|
"learning_rate": 1.9266029579976782e-05,
|
|
"loss": 0.1267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11667802184820175,
|
|
"step": 2320,
|
|
"valid_targets_mean": 5112.9,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 3.934010152284264,
|
|
"grad_norm": 0.4409074798098301,
|
|
"learning_rate": 1.9181709840799006e-05,
|
|
"loss": 0.1295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1168362945318222,
|
|
"step": 2325,
|
|
"valid_targets_mean": 5720.8,
|
|
"valid_targets_min": 526
|
|
},
|
|
{
|
|
"epoch": 3.9424703891708965,
|
|
"grad_norm": 0.46524840283897406,
|
|
"learning_rate": 1.9097404668294046e-05,
|
|
"loss": 0.1406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13940882682800293,
|
|
"step": 2330,
|
|
"valid_targets_mean": 5288.9,
|
|
"valid_targets_min": 1421
|
|
},
|
|
{
|
|
"epoch": 3.95093062605753,
|
|
"grad_norm": 0.5013141924287504,
|
|
"learning_rate": 1.9013115563208135e-05,
|
|
"loss": 0.1327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12259677052497864,
|
|
"step": 2335,
|
|
"valid_targets_mean": 5285.1,
|
|
"valid_targets_min": 1000
|
|
},
|
|
{
|
|
"epoch": 3.9593908629441623,
|
|
"grad_norm": 0.4675103907065486,
|
|
"learning_rate": 1.8928844026001456e-05,
|
|
"loss": 0.1392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12828120589256287,
|
|
"step": 2340,
|
|
"valid_targets_mean": 5590.6,
|
|
"valid_targets_min": 825
|
|
},
|
|
{
|
|
"epoch": 3.967851099830795,
|
|
"grad_norm": 0.47452040367475135,
|
|
"learning_rate": 1.884459155682149e-05,
|
|
"loss": 0.124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1308564841747284,
|
|
"step": 2345,
|
|
"valid_targets_mean": 4980.2,
|
|
"valid_targets_min": 1137
|
|
},
|
|
{
|
|
"epoch": 3.976311336717428,
|
|
"grad_norm": 0.4359966765373313,
|
|
"learning_rate": 1.8760359655476275e-05,
|
|
"loss": 0.1277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11558710038661957,
|
|
"step": 2350,
|
|
"valid_targets_mean": 5570.9,
|
|
"valid_targets_min": 2615
|
|
},
|
|
{
|
|
"epoch": 3.984771573604061,
|
|
"grad_norm": 0.48973588503589366,
|
|
"learning_rate": 1.867614982140769e-05,
|
|
"loss": 0.1369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15834450721740723,
|
|
"step": 2355,
|
|
"valid_targets_mean": 5409.0,
|
|
"valid_targets_min": 2092
|
|
},
|
|
{
|
|
"epoch": 3.993231810490694,
|
|
"grad_norm": 0.4774362922878023,
|
|
"learning_rate": 1.8591963553664817e-05,
|
|
"loss": 0.1377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13099083304405212,
|
|
"step": 2360,
|
|
"valid_targets_mean": 5510.8,
|
|
"valid_targets_min": 401
|
|
},
|
|
{
|
|
"epoch": 4.001692047377326,
|
|
"grad_norm": 0.47511799533399457,
|
|
"learning_rate": 1.8507802350877213e-05,
|
|
"loss": 0.1322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1106976717710495,
|
|
"step": 2365,
|
|
"valid_targets_mean": 5321.2,
|
|
"valid_targets_min": 1802
|
|
},
|
|
{
|
|
"epoch": 4.01015228426396,
|
|
"grad_norm": 0.4973582417346882,
|
|
"learning_rate": 1.842366771122823e-05,
|
|
"loss": 0.1114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10673023760318756,
|
|
"step": 2370,
|
|
"valid_targets_mean": 4648.1,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 4.018612521150592,
|
|
"grad_norm": 0.45512250255628767,
|
|
"learning_rate": 1.8339561132428393e-05,
|
|
"loss": 0.1125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10182854533195496,
|
|
"step": 2375,
|
|
"valid_targets_mean": 5824.0,
|
|
"valid_targets_min": 2261
|
|
},
|
|
{
|
|
"epoch": 4.027072758037225,
|
|
"grad_norm": 0.4549504038329006,
|
|
"learning_rate": 1.8255484111688668e-05,
|
|
"loss": 0.1102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10221542418003082,
|
|
"step": 2380,
|
|
"valid_targets_mean": 5396.1,
|
|
"valid_targets_min": 327
|
|
},
|
|
{
|
|
"epoch": 4.035532994923858,
|
|
"grad_norm": 0.5386419794169544,
|
|
"learning_rate": 1.817143814569388e-05,
|
|
"loss": 0.1157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12394209206104279,
|
|
"step": 2385,
|
|
"valid_targets_mean": 5163.8,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 4.04399323181049,
|
|
"grad_norm": 0.47097843189271993,
|
|
"learning_rate": 1.8087424730576025e-05,
|
|
"loss": 0.115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10171864181756973,
|
|
"step": 2390,
|
|
"valid_targets_mean": 4764.5,
|
|
"valid_targets_min": 401
|
|
},
|
|
{
|
|
"epoch": 4.052453468697124,
|
|
"grad_norm": 0.5054255577126322,
|
|
"learning_rate": 1.800344536188764e-05,
|
|
"loss": 0.1203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09796445071697235,
|
|
"step": 2395,
|
|
"valid_targets_mean": 5156.1,
|
|
"valid_targets_min": 825
|
|
},
|
|
{
|
|
"epoch": 4.060913705583756,
|
|
"grad_norm": 0.5611900229361249,
|
|
"learning_rate": 1.791950153457521e-05,
|
|
"loss": 0.123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12867556512355804,
|
|
"step": 2400,
|
|
"valid_targets_mean": 4955.1,
|
|
"valid_targets_min": 1368
|
|
},
|
|
{
|
|
"epoch": 4.069373942470389,
|
|
"grad_norm": 0.5546802332285454,
|
|
"learning_rate": 1.7835594742952506e-05,
|
|
"loss": 0.119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11180967092514038,
|
|
"step": 2405,
|
|
"valid_targets_mean": 5189.2,
|
|
"valid_targets_min": 2328
|
|
},
|
|
{
|
|
"epoch": 4.077834179357022,
|
|
"grad_norm": 0.43339175100272426,
|
|
"learning_rate": 1.775172648067404e-05,
|
|
"loss": 0.1096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10482772439718246,
|
|
"step": 2410,
|
|
"valid_targets_mean": 5720.9,
|
|
"valid_targets_min": 1525
|
|
},
|
|
{
|
|
"epoch": 4.086294416243655,
|
|
"grad_norm": 0.6826577244941917,
|
|
"learning_rate": 1.766789824070845e-05,
|
|
"loss": 0.1247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1264183670282364,
|
|
"step": 2415,
|
|
"valid_targets_mean": 5412.3,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 4.094754653130288,
|
|
"grad_norm": 0.5344505099277075,
|
|
"learning_rate": 1.7584111515311894e-05,
|
|
"loss": 0.1164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11256110668182373,
|
|
"step": 2420,
|
|
"valid_targets_mean": 4065.8,
|
|
"valid_targets_min": 1507
|
|
},
|
|
{
|
|
"epoch": 4.10321489001692,
|
|
"grad_norm": 0.5662946737105445,
|
|
"learning_rate": 1.750036779600155e-05,
|
|
"loss": 0.1151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13470518589019775,
|
|
"step": 2425,
|
|
"valid_targets_mean": 4611.9,
|
|
"valid_targets_min": 378
|
|
},
|
|
{
|
|
"epoch": 4.111675126903553,
|
|
"grad_norm": 0.48389224678142545,
|
|
"learning_rate": 1.7416668573529012e-05,
|
|
"loss": 0.1249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12454604357481003,
|
|
"step": 2430,
|
|
"valid_targets_mean": 5002.1,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 4.120135363790186,
|
|
"grad_norm": 0.47339072112461805,
|
|
"learning_rate": 1.7333015337853754e-05,
|
|
"loss": 0.1013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10720397531986237,
|
|
"step": 2435,
|
|
"valid_targets_mean": 5257.8,
|
|
"valid_targets_min": 1230
|
|
},
|
|
{
|
|
"epoch": 4.128595600676819,
|
|
"grad_norm": 0.48084269886785075,
|
|
"learning_rate": 1.7249409578116662e-05,
|
|
"loss": 0.1094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10184866189956665,
|
|
"step": 2440,
|
|
"valid_targets_mean": 5349.6,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 4.137055837563452,
|
|
"grad_norm": 0.457809696872409,
|
|
"learning_rate": 1.7165852782613442e-05,
|
|
"loss": 0.1099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09542272239923477,
|
|
"step": 2445,
|
|
"valid_targets_mean": 4649.6,
|
|
"valid_targets_min": 1984
|
|
},
|
|
{
|
|
"epoch": 4.145516074450085,
|
|
"grad_norm": 0.4931433927951892,
|
|
"learning_rate": 1.7082346438768206e-05,
|
|
"loss": 0.1216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11491983383893967,
|
|
"step": 2450,
|
|
"valid_targets_mean": 5240.4,
|
|
"valid_targets_min": 1137
|
|
},
|
|
{
|
|
"epoch": 4.153976311336717,
|
|
"grad_norm": 0.4883201900943464,
|
|
"learning_rate": 1.699889203310695e-05,
|
|
"loss": 0.1133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12604977190494537,
|
|
"step": 2455,
|
|
"valid_targets_mean": 5009.7,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 4.16243654822335,
|
|
"grad_norm": 0.5937666858107894,
|
|
"learning_rate": 1.691549105123108e-05,
|
|
"loss": 0.1164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1275440752506256,
|
|
"step": 2460,
|
|
"valid_targets_mean": 4117.4,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 4.170896785109983,
|
|
"grad_norm": 0.5820053835179225,
|
|
"learning_rate": 1.6832144977791004e-05,
|
|
"loss": 0.109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11013667285442352,
|
|
"step": 2465,
|
|
"valid_targets_mean": 4623.8,
|
|
"valid_targets_min": 341
|
|
},
|
|
{
|
|
"epoch": 4.179357021996616,
|
|
"grad_norm": 0.8287199744593868,
|
|
"learning_rate": 1.674885529645969e-05,
|
|
"loss": 0.1185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.111517995595932,
|
|
"step": 2470,
|
|
"valid_targets_mean": 4911.8,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 4.187817258883249,
|
|
"grad_norm": 0.49925653973724027,
|
|
"learning_rate": 1.6665623489906233e-05,
|
|
"loss": 0.1077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1012364998459816,
|
|
"step": 2475,
|
|
"valid_targets_mean": 4529.9,
|
|
"valid_targets_min": 389
|
|
},
|
|
{
|
|
"epoch": 4.196277495769881,
|
|
"grad_norm": 0.5340103773552424,
|
|
"learning_rate": 1.65824510397695e-05,
|
|
"loss": 0.1176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13323643803596497,
|
|
"step": 2480,
|
|
"valid_targets_mean": 4731.4,
|
|
"valid_targets_min": 2263
|
|
},
|
|
{
|
|
"epoch": 4.204737732656515,
|
|
"grad_norm": 0.5356469170925333,
|
|
"learning_rate": 1.6499339426631698e-05,
|
|
"loss": 0.1323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12146926671266556,
|
|
"step": 2485,
|
|
"valid_targets_mean": 4534.4,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 4.213197969543147,
|
|
"grad_norm": 0.5169422948601681,
|
|
"learning_rate": 1.64162901299921e-05,
|
|
"loss": 0.1243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11951170861721039,
|
|
"step": 2490,
|
|
"valid_targets_mean": 5522.2,
|
|
"valid_targets_min": 2231
|
|
},
|
|
{
|
|
"epoch": 4.22165820642978,
|
|
"grad_norm": 0.7144964515290791,
|
|
"learning_rate": 1.6333304628240633e-05,
|
|
"loss": 0.1223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12838444113731384,
|
|
"step": 2495,
|
|
"valid_targets_mean": 4277.6,
|
|
"valid_targets_min": 900
|
|
},
|
|
{
|
|
"epoch": 4.230118443316413,
|
|
"grad_norm": 0.4448636904789219,
|
|
"learning_rate": 1.6250384398631576e-05,
|
|
"loss": 0.1176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10107284784317017,
|
|
"step": 2500,
|
|
"valid_targets_mean": 5629.9,
|
|
"valid_targets_min": 1729
|
|
},
|
|
{
|
|
"epoch": 4.238578680203045,
|
|
"grad_norm": 0.5323367725712128,
|
|
"learning_rate": 1.616753091725731e-05,
|
|
"loss": 0.1207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12738008797168732,
|
|
"step": 2505,
|
|
"valid_targets_mean": 4750.5,
|
|
"valid_targets_min": 385
|
|
},
|
|
{
|
|
"epoch": 4.247038917089679,
|
|
"grad_norm": 0.4790362127026047,
|
|
"learning_rate": 1.6084745659021982e-05,
|
|
"loss": 0.1126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11164727061986923,
|
|
"step": 2510,
|
|
"valid_targets_mean": 4855.5,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 4.255499153976311,
|
|
"grad_norm": 0.5134821292040456,
|
|
"learning_rate": 1.600203009761528e-05,
|
|
"loss": 0.1184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1107853502035141,
|
|
"step": 2515,
|
|
"valid_targets_mean": 4909.9,
|
|
"valid_targets_min": 300
|
|
},
|
|
{
|
|
"epoch": 4.2639593908629445,
|
|
"grad_norm": 0.5333977590608937,
|
|
"learning_rate": 1.5919385705486208e-05,
|
|
"loss": 0.1212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10620379447937012,
|
|
"step": 2520,
|
|
"valid_targets_mean": 4985.3,
|
|
"valid_targets_min": 1399
|
|
},
|
|
{
|
|
"epoch": 4.272419627749577,
|
|
"grad_norm": 0.5555872936185655,
|
|
"learning_rate": 1.5836813953816835e-05,
|
|
"loss": 0.1177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13448113203048706,
|
|
"step": 2525,
|
|
"valid_targets_mean": 4878.6,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 4.280879864636209,
|
|
"grad_norm": 0.43110792424977085,
|
|
"learning_rate": 1.575431631249615e-05,
|
|
"loss": 0.114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11432795971632004,
|
|
"step": 2530,
|
|
"valid_targets_mean": 6472.2,
|
|
"valid_targets_min": 1954
|
|
},
|
|
{
|
|
"epoch": 4.289340101522843,
|
|
"grad_norm": 0.4676123182962347,
|
|
"learning_rate": 1.567189425009388e-05,
|
|
"loss": 0.1201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12004925310611725,
|
|
"step": 2535,
|
|
"valid_targets_mean": 6090.7,
|
|
"valid_targets_min": 3566
|
|
},
|
|
{
|
|
"epoch": 4.297800338409475,
|
|
"grad_norm": 0.49264099429229224,
|
|
"learning_rate": 1.558954923383432e-05,
|
|
"loss": 0.1171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11778266727924347,
|
|
"step": 2540,
|
|
"valid_targets_mean": 6076.6,
|
|
"valid_targets_min": 2689
|
|
},
|
|
{
|
|
"epoch": 4.3062605752961085,
|
|
"grad_norm": 0.486742886643022,
|
|
"learning_rate": 1.5507282729570278e-05,
|
|
"loss": 0.1191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13285931944847107,
|
|
"step": 2545,
|
|
"valid_targets_mean": 5412.8,
|
|
"valid_targets_min": 2030
|
|
},
|
|
{
|
|
"epoch": 4.314720812182741,
|
|
"grad_norm": 0.49455021656632203,
|
|
"learning_rate": 1.5425096201756897e-05,
|
|
"loss": 0.1181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1490100622177124,
|
|
"step": 2550,
|
|
"valid_targets_mean": 6479.6,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 4.323181049069374,
|
|
"grad_norm": 0.4519587399557808,
|
|
"learning_rate": 1.5342991113425674e-05,
|
|
"loss": 0.1213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11015860736370087,
|
|
"step": 2555,
|
|
"valid_targets_mean": 5501.5,
|
|
"valid_targets_min": 900
|
|
},
|
|
{
|
|
"epoch": 4.331641285956007,
|
|
"grad_norm": 0.6089522275741038,
|
|
"learning_rate": 1.5260968926158355e-05,
|
|
"loss": 0.115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12057401239871979,
|
|
"step": 2560,
|
|
"valid_targets_mean": 4213.7,
|
|
"valid_targets_min": 370
|
|
},
|
|
{
|
|
"epoch": 4.340101522842639,
|
|
"grad_norm": 0.482720344329213,
|
|
"learning_rate": 1.5179031100060917e-05,
|
|
"loss": 0.1151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10619397461414337,
|
|
"step": 2565,
|
|
"valid_targets_mean": 4520.8,
|
|
"valid_targets_min": 337
|
|
},
|
|
{
|
|
"epoch": 4.3485617597292725,
|
|
"grad_norm": 0.6113046164135808,
|
|
"learning_rate": 1.5097179093737629e-05,
|
|
"loss": 0.1251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10891176760196686,
|
|
"step": 2570,
|
|
"valid_targets_mean": 3906.1,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 4.357021996615905,
|
|
"grad_norm": 0.5126032644586281,
|
|
"learning_rate": 1.5015414364265013e-05,
|
|
"loss": 0.1234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13515430688858032,
|
|
"step": 2575,
|
|
"valid_targets_mean": 5212.6,
|
|
"valid_targets_min": 358
|
|
},
|
|
{
|
|
"epoch": 4.365482233502538,
|
|
"grad_norm": 0.5475923416245554,
|
|
"learning_rate": 1.493373836716598e-05,
|
|
"loss": 0.1175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11125195026397705,
|
|
"step": 2580,
|
|
"valid_targets_mean": 4739.6,
|
|
"valid_targets_min": 387
|
|
},
|
|
{
|
|
"epoch": 4.373942470389171,
|
|
"grad_norm": 0.4997861590253254,
|
|
"learning_rate": 1.4852152556383871e-05,
|
|
"loss": 0.1197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09848365187644958,
|
|
"step": 2585,
|
|
"valid_targets_mean": 4422.0,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 4.382402707275804,
|
|
"grad_norm": 0.4717434553403031,
|
|
"learning_rate": 1.477065838425658e-05,
|
|
"loss": 0.1457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11205875873565674,
|
|
"step": 2590,
|
|
"valid_targets_mean": 5345.7,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 4.3908629441624365,
|
|
"grad_norm": 0.5004969533657186,
|
|
"learning_rate": 1.4689257301490716e-05,
|
|
"loss": 0.1122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0990268886089325,
|
|
"step": 2595,
|
|
"valid_targets_mean": 4260.4,
|
|
"valid_targets_min": 312
|
|
},
|
|
{
|
|
"epoch": 4.39932318104907,
|
|
"grad_norm": 0.5561598799396039,
|
|
"learning_rate": 1.4607950757135783e-05,
|
|
"loss": 0.1217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14237508177757263,
|
|
"step": 2600,
|
|
"valid_targets_mean": 4306.9,
|
|
"valid_targets_min": 357
|
|
},
|
|
{
|
|
"epoch": 4.407783417935702,
|
|
"grad_norm": 0.603139106200442,
|
|
"learning_rate": 1.4526740198558345e-05,
|
|
"loss": 0.1124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12600648403167725,
|
|
"step": 2605,
|
|
"valid_targets_mean": 4762.4,
|
|
"valid_targets_min": 777
|
|
},
|
|
{
|
|
"epoch": 4.416243654822335,
|
|
"grad_norm": 0.5356893456517208,
|
|
"learning_rate": 1.4445627071416319e-05,
|
|
"loss": 0.1133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11970909684896469,
|
|
"step": 2610,
|
|
"valid_targets_mean": 5642.7,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 4.424703891708968,
|
|
"grad_norm": 0.5422033861064612,
|
|
"learning_rate": 1.4364612819633187e-05,
|
|
"loss": 0.12,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12544970214366913,
|
|
"step": 2615,
|
|
"valid_targets_mean": 4757.5,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 4.4331641285956005,
|
|
"grad_norm": 0.5142963265683194,
|
|
"learning_rate": 1.4283698885372336e-05,
|
|
"loss": 0.1232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11739353835582733,
|
|
"step": 2620,
|
|
"valid_targets_mean": 5121.6,
|
|
"valid_targets_min": 404
|
|
},
|
|
{
|
|
"epoch": 4.441624365482234,
|
|
"grad_norm": 0.5166935455197309,
|
|
"learning_rate": 1.4202886709011357e-05,
|
|
"loss": 0.1089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09951464831829071,
|
|
"step": 2625,
|
|
"valid_targets_mean": 4890.1,
|
|
"valid_targets_min": 2302
|
|
},
|
|
{
|
|
"epoch": 4.450084602368866,
|
|
"grad_norm": 0.5041182527742695,
|
|
"learning_rate": 1.4122177729116395e-05,
|
|
"loss": 0.1205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12261448800563812,
|
|
"step": 2630,
|
|
"valid_targets_mean": 5114.2,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 4.458544839255499,
|
|
"grad_norm": 0.48015662580083257,
|
|
"learning_rate": 1.4041573382416584e-05,
|
|
"loss": 0.11,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12138631939888,
|
|
"step": 2635,
|
|
"valid_targets_mean": 5326.0,
|
|
"valid_targets_min": 1408
|
|
},
|
|
{
|
|
"epoch": 4.467005076142132,
|
|
"grad_norm": 0.5060790005409431,
|
|
"learning_rate": 1.3961075103778447e-05,
|
|
"loss": 0.1207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11979687958955765,
|
|
"step": 2640,
|
|
"valid_targets_mean": 5634.1,
|
|
"valid_targets_min": 2014
|
|
},
|
|
{
|
|
"epoch": 4.4754653130287645,
|
|
"grad_norm": 0.5043235941827582,
|
|
"learning_rate": 1.3880684326180323e-05,
|
|
"loss": 0.1063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08686380833387375,
|
|
"step": 2645,
|
|
"valid_targets_mean": 5787.6,
|
|
"valid_targets_min": 3117
|
|
},
|
|
{
|
|
"epoch": 4.483925549915398,
|
|
"grad_norm": 0.5112090510366569,
|
|
"learning_rate": 1.3800402480686916e-05,
|
|
"loss": 0.1268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13445940613746643,
|
|
"step": 2650,
|
|
"valid_targets_mean": 5005.8,
|
|
"valid_targets_min": 339
|
|
},
|
|
{
|
|
"epoch": 4.49238578680203,
|
|
"grad_norm": 0.7560883975381615,
|
|
"learning_rate": 1.3720230996423774e-05,
|
|
"loss": 0.1158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11718163639307022,
|
|
"step": 2655,
|
|
"valid_targets_mean": 4527.0,
|
|
"valid_targets_min": 1221
|
|
},
|
|
{
|
|
"epoch": 4.500846023688664,
|
|
"grad_norm": 0.5182428828184167,
|
|
"learning_rate": 1.3640171300551868e-05,
|
|
"loss": 0.1175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11507587879896164,
|
|
"step": 2660,
|
|
"valid_targets_mean": 4597.1,
|
|
"valid_targets_min": 825
|
|
},
|
|
{
|
|
"epoch": 4.509306260575296,
|
|
"grad_norm": 0.45789309849543947,
|
|
"learning_rate": 1.3560224818242197e-05,
|
|
"loss": 0.1184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11421696841716766,
|
|
"step": 2665,
|
|
"valid_targets_mean": 5267.6,
|
|
"valid_targets_min": 1227
|
|
},
|
|
{
|
|
"epoch": 4.517766497461929,
|
|
"grad_norm": 0.5655752886882534,
|
|
"learning_rate": 1.3480392972650385e-05,
|
|
"loss": 0.1265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1220095083117485,
|
|
"step": 2670,
|
|
"valid_targets_mean": 4647.4,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 4.526226734348562,
|
|
"grad_norm": 0.5049442901941974,
|
|
"learning_rate": 1.3400677184891393e-05,
|
|
"loss": 0.1173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12077365815639496,
|
|
"step": 2675,
|
|
"valid_targets_mean": 5520.7,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 4.534686971235194,
|
|
"grad_norm": 0.4873029617831326,
|
|
"learning_rate": 1.3321078874014162e-05,
|
|
"loss": 0.118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12391913682222366,
|
|
"step": 2680,
|
|
"valid_targets_mean": 5987.2,
|
|
"valid_targets_min": 2639
|
|
},
|
|
{
|
|
"epoch": 4.543147208121828,
|
|
"grad_norm": 0.7424607652182657,
|
|
"learning_rate": 1.3241599456976416e-05,
|
|
"loss": 0.1157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10421408712863922,
|
|
"step": 2685,
|
|
"valid_targets_mean": 4582.9,
|
|
"valid_targets_min": 1035
|
|
},
|
|
{
|
|
"epoch": 4.55160744500846,
|
|
"grad_norm": 0.49647114293499267,
|
|
"learning_rate": 1.3162240348619396e-05,
|
|
"loss": 0.1098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11489611864089966,
|
|
"step": 2690,
|
|
"valid_targets_mean": 5225.4,
|
|
"valid_targets_min": 2097
|
|
},
|
|
{
|
|
"epoch": 4.560067681895093,
|
|
"grad_norm": 0.5390304771526867,
|
|
"learning_rate": 1.3083002961642677e-05,
|
|
"loss": 0.1132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13163185119628906,
|
|
"step": 2695,
|
|
"valid_targets_mean": 4885.9,
|
|
"valid_targets_min": 2068
|
|
},
|
|
{
|
|
"epoch": 4.568527918781726,
|
|
"grad_norm": 0.4833353407345534,
|
|
"learning_rate": 1.300388870657904e-05,
|
|
"loss": 0.118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10971377789974213,
|
|
"step": 2700,
|
|
"valid_targets_mean": 4948.1,
|
|
"valid_targets_min": 1572
|
|
},
|
|
{
|
|
"epoch": 4.576988155668358,
|
|
"grad_norm": 0.5387145440912565,
|
|
"learning_rate": 1.2924898991769357e-05,
|
|
"loss": 0.115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14387647807598114,
|
|
"step": 2705,
|
|
"valid_targets_mean": 4675.2,
|
|
"valid_targets_min": 2319
|
|
},
|
|
{
|
|
"epoch": 4.585448392554992,
|
|
"grad_norm": 0.4645547764513857,
|
|
"learning_rate": 1.284603522333749e-05,
|
|
"loss": 0.1121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10917040705680847,
|
|
"step": 2710,
|
|
"valid_targets_mean": 5962.4,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 4.593908629441624,
|
|
"grad_norm": 0.4849642888880769,
|
|
"learning_rate": 1.2767298805165318e-05,
|
|
"loss": 0.1184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12268083542585373,
|
|
"step": 2715,
|
|
"valid_targets_mean": 5853.5,
|
|
"valid_targets_min": 1979
|
|
},
|
|
{
|
|
"epoch": 4.602368866328257,
|
|
"grad_norm": 0.448700586936395,
|
|
"learning_rate": 1.2688691138867682e-05,
|
|
"loss": 0.1157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10441425442695618,
|
|
"step": 2720,
|
|
"valid_targets_mean": 6451.6,
|
|
"valid_targets_min": 2209
|
|
},
|
|
{
|
|
"epoch": 4.61082910321489,
|
|
"grad_norm": 0.6910385778410035,
|
|
"learning_rate": 1.261021362376748e-05,
|
|
"loss": 0.1171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12111136317253113,
|
|
"step": 2725,
|
|
"valid_targets_mean": 4004.8,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 4.619289340101523,
|
|
"grad_norm": 0.4939443167844028,
|
|
"learning_rate": 1.2531867656870753e-05,
|
|
"loss": 0.1127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1197144091129303,
|
|
"step": 2730,
|
|
"valid_targets_mean": 5221.2,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 4.627749576988156,
|
|
"grad_norm": 0.5181639698340933,
|
|
"learning_rate": 1.245365463284178e-05,
|
|
"loss": 0.1112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11533892154693604,
|
|
"step": 2735,
|
|
"valid_targets_mean": 4906.8,
|
|
"valid_targets_min": 372
|
|
},
|
|
{
|
|
"epoch": 4.636209813874789,
|
|
"grad_norm": 0.5070080597404826,
|
|
"learning_rate": 1.23755759439783e-05,
|
|
"loss": 0.1115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1115088164806366,
|
|
"step": 2740,
|
|
"valid_targets_mean": 5075.1,
|
|
"valid_targets_min": 1005
|
|
},
|
|
{
|
|
"epoch": 4.644670050761421,
|
|
"grad_norm": 0.4838071519179223,
|
|
"learning_rate": 1.229763298018669e-05,
|
|
"loss": 0.1165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11341849714517593,
|
|
"step": 2745,
|
|
"valid_targets_mean": 4844.9,
|
|
"valid_targets_min": 1149
|
|
},
|
|
{
|
|
"epoch": 4.653130287648054,
|
|
"grad_norm": 0.484875124165614,
|
|
"learning_rate": 1.2219827128957251e-05,
|
|
"loss": 0.1137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11604801565408707,
|
|
"step": 2750,
|
|
"valid_targets_mean": 5229.9,
|
|
"valid_targets_min": 1530
|
|
},
|
|
{
|
|
"epoch": 4.661590524534687,
|
|
"grad_norm": 0.4908584226057113,
|
|
"learning_rate": 1.2142159775339484e-05,
|
|
"loss": 0.1235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11748868972063065,
|
|
"step": 2755,
|
|
"valid_targets_mean": 4625.5,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 4.67005076142132,
|
|
"grad_norm": 0.4674259888323767,
|
|
"learning_rate": 1.2064632301917434e-05,
|
|
"loss": 0.1172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1165299266576767,
|
|
"step": 2760,
|
|
"valid_targets_mean": 5808.5,
|
|
"valid_targets_min": 2559
|
|
},
|
|
{
|
|
"epoch": 4.678510998307953,
|
|
"grad_norm": 0.5013141078619668,
|
|
"learning_rate": 1.1987246088785111e-05,
|
|
"loss": 0.1081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09349864721298218,
|
|
"step": 2765,
|
|
"valid_targets_mean": 4603.9,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 4.686971235194585,
|
|
"grad_norm": 0.5097835897279829,
|
|
"learning_rate": 1.1910002513521901e-05,
|
|
"loss": 0.1117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11260973662137985,
|
|
"step": 2770,
|
|
"valid_targets_mean": 4969.1,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 4.695431472081218,
|
|
"grad_norm": 0.4428567344975358,
|
|
"learning_rate": 1.1832902951168022e-05,
|
|
"loss": 0.1198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10317705571651459,
|
|
"step": 2775,
|
|
"valid_targets_mean": 6071.2,
|
|
"valid_targets_min": 1754
|
|
},
|
|
{
|
|
"epoch": 4.703891708967851,
|
|
"grad_norm": 0.5428427580281757,
|
|
"learning_rate": 1.1755948774200086e-05,
|
|
"loss": 0.1172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11682090163230896,
|
|
"step": 2780,
|
|
"valid_targets_mean": 4179.5,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 4.712351945854484,
|
|
"grad_norm": 0.5622686081537065,
|
|
"learning_rate": 1.1679141352506634e-05,
|
|
"loss": 0.1128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12282834202051163,
|
|
"step": 2785,
|
|
"valid_targets_mean": 4341.8,
|
|
"valid_targets_min": 1162
|
|
},
|
|
{
|
|
"epoch": 4.720812182741117,
|
|
"grad_norm": 0.4833379919447014,
|
|
"learning_rate": 1.1602482053363772e-05,
|
|
"loss": 0.1117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10564549267292023,
|
|
"step": 2790,
|
|
"valid_targets_mean": 4815.8,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 4.729272419627749,
|
|
"grad_norm": 0.47093720788134164,
|
|
"learning_rate": 1.1525972241410827e-05,
|
|
"loss": 0.1207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10980655997991562,
|
|
"step": 2795,
|
|
"valid_targets_mean": 5245.2,
|
|
"valid_targets_min": 1741
|
|
},
|
|
{
|
|
"epoch": 4.737732656514383,
|
|
"grad_norm": 0.45991705893280405,
|
|
"learning_rate": 1.144961327862605e-05,
|
|
"loss": 0.1121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12417744845151901,
|
|
"step": 2800,
|
|
"valid_targets_mean": 6066.2,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 4.746192893401015,
|
|
"grad_norm": 0.5193081279476732,
|
|
"learning_rate": 1.1373406524302357e-05,
|
|
"loss": 0.1044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10718132555484772,
|
|
"step": 2805,
|
|
"valid_targets_mean": 3843.8,
|
|
"valid_targets_min": 2015
|
|
},
|
|
{
|
|
"epoch": 4.7546531302876485,
|
|
"grad_norm": 0.48175984742663047,
|
|
"learning_rate": 1.1297353335023192e-05,
|
|
"loss": 0.1167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1006290465593338,
|
|
"step": 2810,
|
|
"valid_targets_mean": 4465.8,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 4.763113367174281,
|
|
"grad_norm": 0.4733427864155358,
|
|
"learning_rate": 1.1221455064638263e-05,
|
|
"loss": 0.1148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10685653239488602,
|
|
"step": 2815,
|
|
"valid_targets_mean": 5164.7,
|
|
"valid_targets_min": 2610
|
|
},
|
|
{
|
|
"epoch": 4.771573604060913,
|
|
"grad_norm": 0.4664514240597311,
|
|
"learning_rate": 1.1145713064239585e-05,
|
|
"loss": 0.1186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10703268647193909,
|
|
"step": 2820,
|
|
"valid_targets_mean": 5041.2,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 4.780033840947547,
|
|
"grad_norm": 0.5308982609222499,
|
|
"learning_rate": 1.1070128682137295e-05,
|
|
"loss": 0.1099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11384016275405884,
|
|
"step": 2825,
|
|
"valid_targets_mean": 4962.2,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 4.788494077834179,
|
|
"grad_norm": 0.5520977123956073,
|
|
"learning_rate": 1.0994703263835755e-05,
|
|
"loss": 0.1147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13287977874279022,
|
|
"step": 2830,
|
|
"valid_targets_mean": 4319.8,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 4.7969543147208125,
|
|
"grad_norm": 0.5150221984813803,
|
|
"learning_rate": 1.091943815200954e-05,
|
|
"loss": 0.1122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12661895155906677,
|
|
"step": 2835,
|
|
"valid_targets_mean": 5046.8,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 4.805414551607445,
|
|
"grad_norm": 0.4815478074371246,
|
|
"learning_rate": 1.0844334686479531e-05,
|
|
"loss": 0.1222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11400146782398224,
|
|
"step": 2840,
|
|
"valid_targets_mean": 5537.9,
|
|
"valid_targets_min": 1883
|
|
},
|
|
{
|
|
"epoch": 4.813874788494077,
|
|
"grad_norm": 0.5696284213744985,
|
|
"learning_rate": 1.076939420418913e-05,
|
|
"loss": 0.1254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10404601693153381,
|
|
"step": 2845,
|
|
"valid_targets_mean": 4466.8,
|
|
"valid_targets_min": 233
|
|
},
|
|
{
|
|
"epoch": 4.822335025380711,
|
|
"grad_norm": 0.5256458484381987,
|
|
"learning_rate": 1.0694618039180385e-05,
|
|
"loss": 0.1232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10447492450475693,
|
|
"step": 2850,
|
|
"valid_targets_mean": 4885.2,
|
|
"valid_targets_min": 375
|
|
},
|
|
{
|
|
"epoch": 4.830795262267343,
|
|
"grad_norm": 0.5305287298641234,
|
|
"learning_rate": 1.0620007522570278e-05,
|
|
"loss": 0.1156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12097901105880737,
|
|
"step": 2855,
|
|
"valid_targets_mean": 5236.5,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 4.8392554991539765,
|
|
"grad_norm": 0.4944513203914488,
|
|
"learning_rate": 1.0545563982527031e-05,
|
|
"loss": 0.1258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1069316640496254,
|
|
"step": 2860,
|
|
"valid_targets_mean": 4519.3,
|
|
"valid_targets_min": 334
|
|
},
|
|
{
|
|
"epoch": 4.847715736040609,
|
|
"grad_norm": 0.5288322846559587,
|
|
"learning_rate": 1.047128874424646e-05,
|
|
"loss": 0.1278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13514918088912964,
|
|
"step": 2865,
|
|
"valid_targets_mean": 5493.1,
|
|
"valid_targets_min": 345
|
|
},
|
|
{
|
|
"epoch": 4.856175972927242,
|
|
"grad_norm": 0.5134348583516644,
|
|
"learning_rate": 1.0397183129928372e-05,
|
|
"loss": 0.1234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1327642798423767,
|
|
"step": 2870,
|
|
"valid_targets_mean": 5503.5,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 4.864636209813875,
|
|
"grad_norm": 0.5297243912886433,
|
|
"learning_rate": 1.0323248458753045e-05,
|
|
"loss": 0.1177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16231319308280945,
|
|
"step": 2875,
|
|
"valid_targets_mean": 5482.2,
|
|
"valid_targets_min": 959
|
|
},
|
|
{
|
|
"epoch": 4.873096446700508,
|
|
"grad_norm": 0.5177485664902126,
|
|
"learning_rate": 1.0249486046857735e-05,
|
|
"loss": 0.1077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10660773515701294,
|
|
"step": 2880,
|
|
"valid_targets_mean": 4738.6,
|
|
"valid_targets_min": 1412
|
|
},
|
|
{
|
|
"epoch": 4.8815566835871405,
|
|
"grad_norm": 0.5058920042144293,
|
|
"learning_rate": 1.017589720731325e-05,
|
|
"loss": 0.1145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12517228722572327,
|
|
"step": 2885,
|
|
"valid_targets_mean": 5147.9,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 4.890016920473773,
|
|
"grad_norm": 0.4994669767865543,
|
|
"learning_rate": 1.0102483250100574e-05,
|
|
"loss": 0.1042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10191367566585541,
|
|
"step": 2890,
|
|
"valid_targets_mean": 5500.1,
|
|
"valid_targets_min": 2234
|
|
},
|
|
{
|
|
"epoch": 4.898477157360406,
|
|
"grad_norm": 0.4778169551672598,
|
|
"learning_rate": 1.0029245482087555e-05,
|
|
"loss": 0.1145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0906507670879364,
|
|
"step": 2895,
|
|
"valid_targets_mean": 5382.4,
|
|
"valid_targets_min": 398
|
|
},
|
|
{
|
|
"epoch": 4.906937394247039,
|
|
"grad_norm": 0.4625099008151308,
|
|
"learning_rate": 9.956185207005622e-06,
|
|
"loss": 0.118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10199104249477386,
|
|
"step": 2900,
|
|
"valid_targets_mean": 5540.0,
|
|
"valid_targets_min": 870
|
|
},
|
|
{
|
|
"epoch": 4.915397631133672,
|
|
"grad_norm": 0.4580559331777595,
|
|
"learning_rate": 9.883303725426601e-06,
|
|
"loss": 0.115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10280783474445343,
|
|
"step": 2905,
|
|
"valid_targets_mean": 5608.9,
|
|
"valid_targets_min": 1263
|
|
},
|
|
{
|
|
"epoch": 4.9238578680203045,
|
|
"grad_norm": 0.5229263063451585,
|
|
"learning_rate": 9.810602334739541e-06,
|
|
"loss": 0.1153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11636469513177872,
|
|
"step": 2910,
|
|
"valid_targets_mean": 5495.2,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 4.932318104906937,
|
|
"grad_norm": 0.4778034485503313,
|
|
"learning_rate": 9.738082329127655e-06,
|
|
"loss": 0.1153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10515119135379791,
|
|
"step": 2915,
|
|
"valid_targets_mean": 5484.6,
|
|
"valid_targets_min": 1807
|
|
},
|
|
{
|
|
"epoch": 4.94077834179357,
|
|
"grad_norm": 0.48700183806933506,
|
|
"learning_rate": 9.665744999545219e-06,
|
|
"loss": 0.1166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11018317937850952,
|
|
"step": 2920,
|
|
"valid_targets_mean": 5465.5,
|
|
"valid_targets_min": 802
|
|
},
|
|
{
|
|
"epoch": 4.949238578680203,
|
|
"grad_norm": 0.49253946147249844,
|
|
"learning_rate": 9.59359163369465e-06,
|
|
"loss": 0.1058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10416784882545471,
|
|
"step": 2925,
|
|
"valid_targets_mean": 4746.2,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 4.957698815566836,
|
|
"grad_norm": 0.5263470639476079,
|
|
"learning_rate": 9.521623516003559e-06,
|
|
"loss": 0.1231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12788143754005432,
|
|
"step": 2930,
|
|
"valid_targets_mean": 5312.8,
|
|
"valid_targets_min": 2279
|
|
},
|
|
{
|
|
"epoch": 4.9661590524534684,
|
|
"grad_norm": 0.480861545026602,
|
|
"learning_rate": 9.449841927601879e-06,
|
|
"loss": 0.1148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12768593430519104,
|
|
"step": 2935,
|
|
"valid_targets_mean": 5970.1,
|
|
"valid_targets_min": 2864
|
|
},
|
|
{
|
|
"epoch": 4.974619289340102,
|
|
"grad_norm": 0.6212151146085724,
|
|
"learning_rate": 9.378248146299104e-06,
|
|
"loss": 0.1064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10577527433633804,
|
|
"step": 2940,
|
|
"valid_targets_mean": 5097.1,
|
|
"valid_targets_min": 1281
|
|
},
|
|
{
|
|
"epoch": 4.983079526226734,
|
|
"grad_norm": 0.49493428335046113,
|
|
"learning_rate": 9.306843446561448e-06,
|
|
"loss": 0.1168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11329629272222519,
|
|
"step": 2945,
|
|
"valid_targets_mean": 5001.4,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 4.991539763113368,
|
|
"grad_norm": 0.47674879885948707,
|
|
"learning_rate": 9.235629099489281e-06,
|
|
"loss": 0.1054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10628634691238403,
|
|
"step": 2950,
|
|
"valid_targets_mean": 5002.0,
|
|
"valid_targets_min": 1672
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.4465067435851117,
|
|
"learning_rate": 9.164606372794376e-06,
|
|
"loss": 0.1082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09636123478412628,
|
|
"step": 2955,
|
|
"valid_targets_mean": 5561.8,
|
|
"valid_targets_min": 1802
|
|
},
|
|
{
|
|
"epoch": 5.008460236886632,
|
|
"grad_norm": 0.46341380991553455,
|
|
"learning_rate": 9.093776530777459e-06,
|
|
"loss": 0.0983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10254893451929092,
|
|
"step": 2960,
|
|
"valid_targets_mean": 5686.9,
|
|
"valid_targets_min": 346
|
|
},
|
|
{
|
|
"epoch": 5.016920473773266,
|
|
"grad_norm": 0.4964974475277022,
|
|
"learning_rate": 9.023140834305621e-06,
|
|
"loss": 0.1008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10365019738674164,
|
|
"step": 2965,
|
|
"valid_targets_mean": 5201.5,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 5.025380710659898,
|
|
"grad_norm": 0.5183209288112081,
|
|
"learning_rate": 8.952700540789884e-06,
|
|
"loss": 0.1075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09999212622642517,
|
|
"step": 2970,
|
|
"valid_targets_mean": 5346.2,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 5.0338409475465316,
|
|
"grad_norm": 0.5249037922912057,
|
|
"learning_rate": 8.882456904162868e-06,
|
|
"loss": 0.1054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11892069876194,
|
|
"step": 2975,
|
|
"valid_targets_mean": 5159.9,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 5.042301184433164,
|
|
"grad_norm": 0.577160456193281,
|
|
"learning_rate": 8.812411174856412e-06,
|
|
"loss": 0.1088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10910410434007645,
|
|
"step": 2980,
|
|
"valid_targets_mean": 3907.7,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 5.050761421319797,
|
|
"grad_norm": 0.5101276067628427,
|
|
"learning_rate": 8.742564599779342e-06,
|
|
"loss": 0.105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10311952233314514,
|
|
"step": 2985,
|
|
"valid_targets_mean": 5001.5,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 5.05922165820643,
|
|
"grad_norm": 0.5248987355275777,
|
|
"learning_rate": 8.672918422295264e-06,
|
|
"loss": 0.0989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09956686943769455,
|
|
"step": 2990,
|
|
"valid_targets_mean": 5081.2,
|
|
"valid_targets_min": 1956
|
|
},
|
|
{
|
|
"epoch": 5.067681895093062,
|
|
"grad_norm": 0.4571479701779368,
|
|
"learning_rate": 8.603473882200447e-06,
|
|
"loss": 0.1063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09373961389064789,
|
|
"step": 2995,
|
|
"valid_targets_mean": 5224.1,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 5.0761421319796955,
|
|
"grad_norm": 0.44466978691736725,
|
|
"learning_rate": 8.534232215701727e-06,
|
|
"loss": 0.1028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11685381829738617,
|
|
"step": 3000,
|
|
"valid_targets_mean": 5470.9,
|
|
"valid_targets_min": 1704
|
|
},
|
|
{
|
|
"epoch": 5.084602368866328,
|
|
"grad_norm": 0.5071077672620312,
|
|
"learning_rate": 8.465194655394531e-06,
|
|
"loss": 0.1082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08857870101928711,
|
|
"step": 3005,
|
|
"valid_targets_mean": 5155.2,
|
|
"valid_targets_min": 300
|
|
},
|
|
{
|
|
"epoch": 5.093062605752961,
|
|
"grad_norm": 0.4693078472060947,
|
|
"learning_rate": 8.396362430240903e-06,
|
|
"loss": 0.1049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.098457470536232,
|
|
"step": 3010,
|
|
"valid_targets_mean": 4969.4,
|
|
"valid_targets_min": 465
|
|
},
|
|
{
|
|
"epoch": 5.101522842639594,
|
|
"grad_norm": 0.4494837174063322,
|
|
"learning_rate": 8.327736765547658e-06,
|
|
"loss": 0.1024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0764254480600357,
|
|
"step": 3015,
|
|
"valid_targets_mean": 5779.6,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 5.109983079526227,
|
|
"grad_norm": 0.49876652553838324,
|
|
"learning_rate": 8.259318882944545e-06,
|
|
"loss": 0.1188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11605612933635712,
|
|
"step": 3020,
|
|
"valid_targets_mean": 5404.7,
|
|
"valid_targets_min": 825
|
|
},
|
|
{
|
|
"epoch": 5.1184433164128595,
|
|
"grad_norm": 0.5327754847776021,
|
|
"learning_rate": 8.191110000362514e-06,
|
|
"loss": 0.0951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10155603289604187,
|
|
"step": 3025,
|
|
"valid_targets_mean": 4968.0,
|
|
"valid_targets_min": 2092
|
|
},
|
|
{
|
|
"epoch": 5.126903553299492,
|
|
"grad_norm": 0.5254389660490575,
|
|
"learning_rate": 8.123111332012033e-06,
|
|
"loss": 0.1115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10445211827754974,
|
|
"step": 3030,
|
|
"valid_targets_mean": 4586.4,
|
|
"valid_targets_min": 338
|
|
},
|
|
{
|
|
"epoch": 5.135363790186125,
|
|
"grad_norm": 0.48219869338553634,
|
|
"learning_rate": 8.055324088361469e-06,
|
|
"loss": 0.1026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09873694181442261,
|
|
"step": 3035,
|
|
"valid_targets_mean": 5345.2,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 5.143824027072758,
|
|
"grad_norm": 0.5015719137435583,
|
|
"learning_rate": 7.987749476115541e-06,
|
|
"loss": 0.1071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10546354949474335,
|
|
"step": 3040,
|
|
"valid_targets_mean": 5792.4,
|
|
"valid_targets_min": 1042
|
|
},
|
|
{
|
|
"epoch": 5.152284263959391,
|
|
"grad_norm": 0.5101127922529497,
|
|
"learning_rate": 7.920388698193861e-06,
|
|
"loss": 0.0959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07332199811935425,
|
|
"step": 3045,
|
|
"valid_targets_mean": 4936.8,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 5.1607445008460235,
|
|
"grad_norm": 0.4705540868638593,
|
|
"learning_rate": 7.853242953709467e-06,
|
|
"loss": 0.099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09903880953788757,
|
|
"step": 3050,
|
|
"valid_targets_mean": 5571.5,
|
|
"valid_targets_min": 2241
|
|
},
|
|
{
|
|
"epoch": 5.169204737732657,
|
|
"grad_norm": 0.5015847987553127,
|
|
"learning_rate": 7.786313437947534e-06,
|
|
"loss": 0.1044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09668935835361481,
|
|
"step": 3055,
|
|
"valid_targets_mean": 4727.9,
|
|
"valid_targets_min": 358
|
|
},
|
|
{
|
|
"epoch": 5.177664974619289,
|
|
"grad_norm": 0.5490685511807961,
|
|
"learning_rate": 7.719601342344067e-06,
|
|
"loss": 0.1144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11464767158031464,
|
|
"step": 3060,
|
|
"valid_targets_mean": 4473.9,
|
|
"valid_targets_min": 343
|
|
},
|
|
{
|
|
"epoch": 5.186125211505922,
|
|
"grad_norm": 0.4841938815482277,
|
|
"learning_rate": 7.653107854464691e-06,
|
|
"loss": 0.1053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09820397198200226,
|
|
"step": 3065,
|
|
"valid_targets_mean": 5677.9,
|
|
"valid_targets_min": 2240
|
|
},
|
|
{
|
|
"epoch": 5.194585448392555,
|
|
"grad_norm": 0.6771897142507459,
|
|
"learning_rate": 7.586834157983545e-06,
|
|
"loss": 0.1059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13634631037712097,
|
|
"step": 3070,
|
|
"valid_targets_mean": 3841.1,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 5.2030456852791875,
|
|
"grad_norm": 0.5528044112015387,
|
|
"learning_rate": 7.520781432662135e-06,
|
|
"loss": 0.1129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11673308908939362,
|
|
"step": 3075,
|
|
"valid_targets_mean": 4960.6,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 5.211505922165821,
|
|
"grad_norm": 0.5124024047355318,
|
|
"learning_rate": 7.4549508543284264e-06,
|
|
"loss": 0.0942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10015653073787689,
|
|
"step": 3080,
|
|
"valid_targets_mean": 5421.8,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 5.219966159052453,
|
|
"grad_norm": 0.5617115231260924,
|
|
"learning_rate": 7.389343594855849e-06,
|
|
"loss": 0.103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11335933953523636,
|
|
"step": 3085,
|
|
"valid_targets_mean": 3948.6,
|
|
"valid_targets_min": 379
|
|
},
|
|
{
|
|
"epoch": 5.228426395939087,
|
|
"grad_norm": 0.534322091604494,
|
|
"learning_rate": 7.323960822142451e-06,
|
|
"loss": 0.0983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08656813204288483,
|
|
"step": 3090,
|
|
"valid_targets_mean": 5112.8,
|
|
"valid_targets_min": 452
|
|
},
|
|
{
|
|
"epoch": 5.236886632825719,
|
|
"grad_norm": 0.46230816123704255,
|
|
"learning_rate": 7.2588037000901245e-06,
|
|
"loss": 0.096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07273416221141815,
|
|
"step": 3095,
|
|
"valid_targets_mean": 4951.0,
|
|
"valid_targets_min": 339
|
|
},
|
|
{
|
|
"epoch": 5.2453468697123515,
|
|
"grad_norm": 0.5250748274535079,
|
|
"learning_rate": 7.193873388583847e-06,
|
|
"loss": 0.093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09708460420370102,
|
|
"step": 3100,
|
|
"valid_targets_mean": 5053.8,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 5.253807106598985,
|
|
"grad_norm": 0.5481569161002939,
|
|
"learning_rate": 7.1291710434711015e-06,
|
|
"loss": 0.1076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10908104479312897,
|
|
"step": 3105,
|
|
"valid_targets_mean": 5365.4,
|
|
"valid_targets_min": 1183
|
|
},
|
|
{
|
|
"epoch": 5.262267343485617,
|
|
"grad_norm": 0.5366792797480076,
|
|
"learning_rate": 7.06469781654123e-06,
|
|
"loss": 0.1015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09573692083358765,
|
|
"step": 3110,
|
|
"valid_targets_mean": 5436.2,
|
|
"valid_targets_min": 3068
|
|
},
|
|
{
|
|
"epoch": 5.270727580372251,
|
|
"grad_norm": 0.5393364042050155,
|
|
"learning_rate": 7.000454855504974e-06,
|
|
"loss": 0.1083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10076029598712921,
|
|
"step": 3115,
|
|
"valid_targets_mean": 4773.4,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 5.279187817258883,
|
|
"grad_norm": 0.567116368550176,
|
|
"learning_rate": 6.9364433039740255e-06,
|
|
"loss": 0.0924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08370479941368103,
|
|
"step": 3120,
|
|
"valid_targets_mean": 5204.2,
|
|
"valid_targets_min": 222
|
|
},
|
|
{
|
|
"epoch": 5.287648054145516,
|
|
"grad_norm": 0.5432965783237929,
|
|
"learning_rate": 6.872664301440674e-06,
|
|
"loss": 0.1023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10108493268489838,
|
|
"step": 3125,
|
|
"valid_targets_mean": 5178.6,
|
|
"valid_targets_min": 1212
|
|
},
|
|
{
|
|
"epoch": 5.296108291032149,
|
|
"grad_norm": 0.543972282709709,
|
|
"learning_rate": 6.809118983257523e-06,
|
|
"loss": 0.1,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.105931356549263,
|
|
"step": 3130,
|
|
"valid_targets_mean": 4309.5,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 5.304568527918782,
|
|
"grad_norm": 0.4904153521518797,
|
|
"learning_rate": 6.74580848061728e-06,
|
|
"loss": 0.1031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08686600625514984,
|
|
"step": 3135,
|
|
"valid_targets_mean": 4950.2,
|
|
"valid_targets_min": 343
|
|
},
|
|
{
|
|
"epoch": 5.313028764805415,
|
|
"grad_norm": 0.568425437878163,
|
|
"learning_rate": 6.682733920532616e-06,
|
|
"loss": 0.1094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11276842653751373,
|
|
"step": 3140,
|
|
"valid_targets_mean": 3828.9,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 5.321489001692047,
|
|
"grad_norm": 0.4661465378067185,
|
|
"learning_rate": 6.6198964258161035e-06,
|
|
"loss": 0.1016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0786193311214447,
|
|
"step": 3145,
|
|
"valid_targets_mean": 4937.8,
|
|
"valid_targets_min": 2617
|
|
},
|
|
{
|
|
"epoch": 5.32994923857868,
|
|
"grad_norm": 0.6106895839496127,
|
|
"learning_rate": 6.557297115060233e-06,
|
|
"loss": 0.0975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11392094194889069,
|
|
"step": 3150,
|
|
"valid_targets_mean": 3562.1,
|
|
"valid_targets_min": 310
|
|
},
|
|
{
|
|
"epoch": 5.338409475465313,
|
|
"grad_norm": 0.5452488763371566,
|
|
"learning_rate": 6.494937102617496e-06,
|
|
"loss": 0.1199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20042118430137634,
|
|
"step": 3155,
|
|
"valid_targets_mean": 5110.2,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 5.346869712351946,
|
|
"grad_norm": 0.4807768195907496,
|
|
"learning_rate": 6.432817498580552e-06,
|
|
"loss": 0.1014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07285821437835693,
|
|
"step": 3160,
|
|
"valid_targets_mean": 4066.4,
|
|
"valid_targets_min": 328
|
|
},
|
|
{
|
|
"epoch": 5.355329949238579,
|
|
"grad_norm": 0.5630751542246044,
|
|
"learning_rate": 6.370939408762466e-06,
|
|
"loss": 0.1006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09935158491134644,
|
|
"step": 3165,
|
|
"valid_targets_mean": 4721.2,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 5.363790186125211,
|
|
"grad_norm": 0.5927777056003928,
|
|
"learning_rate": 6.309303934677014e-06,
|
|
"loss": 0.1083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11449384689331055,
|
|
"step": 3170,
|
|
"valid_targets_mean": 5692.8,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 5.372250423011844,
|
|
"grad_norm": 0.5240503841426364,
|
|
"learning_rate": 6.247912173519106e-06,
|
|
"loss": 0.1058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10193105041980743,
|
|
"step": 3175,
|
|
"valid_targets_mean": 5117.6,
|
|
"valid_targets_min": 2065
|
|
},
|
|
{
|
|
"epoch": 5.380710659898477,
|
|
"grad_norm": 0.4761475988684068,
|
|
"learning_rate": 6.186765218145206e-06,
|
|
"loss": 0.1045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08963773399591446,
|
|
"step": 3180,
|
|
"valid_targets_mean": 4896.5,
|
|
"valid_targets_min": 2328
|
|
},
|
|
{
|
|
"epoch": 5.38917089678511,
|
|
"grad_norm": 0.5722412476241824,
|
|
"learning_rate": 6.125864157053906e-06,
|
|
"loss": 0.1097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12817630171775818,
|
|
"step": 3185,
|
|
"valid_targets_mean": 4908.6,
|
|
"valid_targets_min": 370
|
|
},
|
|
{
|
|
"epoch": 5.397631133671743,
|
|
"grad_norm": 0.49278173421845856,
|
|
"learning_rate": 6.065210074366579e-06,
|
|
"loss": 0.1036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10473261028528214,
|
|
"step": 3190,
|
|
"valid_targets_mean": 5137.9,
|
|
"valid_targets_min": 401
|
|
},
|
|
{
|
|
"epoch": 5.406091370558376,
|
|
"grad_norm": 0.5075113549549578,
|
|
"learning_rate": 6.004804049807998e-06,
|
|
"loss": 0.0998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09309966117143631,
|
|
"step": 3195,
|
|
"valid_targets_mean": 4751.4,
|
|
"valid_targets_min": 309
|
|
},
|
|
{
|
|
"epoch": 5.414551607445008,
|
|
"grad_norm": 0.5485431325310579,
|
|
"learning_rate": 5.944647158687213e-06,
|
|
"loss": 0.0977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10693592578172684,
|
|
"step": 3200,
|
|
"valid_targets_mean": 6371.8,
|
|
"valid_targets_min": 2948
|
|
},
|
|
{
|
|
"epoch": 5.423011844331642,
|
|
"grad_norm": 0.49210005616512625,
|
|
"learning_rate": 5.88474047187832e-06,
|
|
"loss": 0.1031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.096981942653656,
|
|
"step": 3205,
|
|
"valid_targets_mean": 5573.1,
|
|
"valid_targets_min": 2320
|
|
},
|
|
{
|
|
"epoch": 5.431472081218274,
|
|
"grad_norm": 0.5200775023541058,
|
|
"learning_rate": 5.825085055801478e-06,
|
|
"loss": 0.1051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12143023312091827,
|
|
"step": 3210,
|
|
"valid_targets_mean": 5733.1,
|
|
"valid_targets_min": 2464
|
|
},
|
|
{
|
|
"epoch": 5.439932318104907,
|
|
"grad_norm": 0.47424102385586653,
|
|
"learning_rate": 5.7656819724038585e-06,
|
|
"loss": 0.1021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08302508294582367,
|
|
"step": 3215,
|
|
"valid_targets_mean": 4727.8,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 5.44839255499154,
|
|
"grad_norm": 0.6237870171213488,
|
|
"learning_rate": 5.706532279140782e-06,
|
|
"loss": 0.1075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10895833373069763,
|
|
"step": 3220,
|
|
"valid_targets_mean": 5346.4,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 5.456852791878172,
|
|
"grad_norm": 0.547485267722891,
|
|
"learning_rate": 5.647637028956883e-06,
|
|
"loss": 0.1083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.099636010825634,
|
|
"step": 3225,
|
|
"valid_targets_mean": 4956.6,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 5.465313028764806,
|
|
"grad_norm": 0.5108065406861291,
|
|
"learning_rate": 5.588997270267342e-06,
|
|
"loss": 0.1009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09709082543849945,
|
|
"step": 3230,
|
|
"valid_targets_mean": 4640.7,
|
|
"valid_targets_min": 1728
|
|
},
|
|
{
|
|
"epoch": 5.473773265651438,
|
|
"grad_norm": 0.5240722981573627,
|
|
"learning_rate": 5.530614046939286e-06,
|
|
"loss": 0.0991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1282409429550171,
|
|
"step": 3235,
|
|
"valid_targets_mean": 4488.6,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 5.482233502538071,
|
|
"grad_norm": 0.4882686571191812,
|
|
"learning_rate": 5.472488398273133e-06,
|
|
"loss": 0.0931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09069374203681946,
|
|
"step": 3240,
|
|
"valid_targets_mean": 5731.9,
|
|
"valid_targets_min": 1978
|
|
},
|
|
{
|
|
"epoch": 5.490693739424704,
|
|
"grad_norm": 0.46977594297784586,
|
|
"learning_rate": 5.4146213589841316e-06,
|
|
"loss": 0.1009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10214699804782867,
|
|
"step": 3245,
|
|
"valid_targets_mean": 5740.6,
|
|
"valid_targets_min": 1774
|
|
},
|
|
{
|
|
"epoch": 5.499153976311336,
|
|
"grad_norm": 0.4854179194155063,
|
|
"learning_rate": 5.357013959183938e-06,
|
|
"loss": 0.1004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0880771279335022,
|
|
"step": 3250,
|
|
"valid_targets_mean": 5450.8,
|
|
"valid_targets_min": 1706
|
|
},
|
|
{
|
|
"epoch": 5.50761421319797,
|
|
"grad_norm": 0.5701100809626418,
|
|
"learning_rate": 5.29966722436227e-06,
|
|
"loss": 0.1129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12139388173818588,
|
|
"step": 3255,
|
|
"valid_targets_mean": 4442.0,
|
|
"valid_targets_min": 361
|
|
},
|
|
{
|
|
"epoch": 5.516074450084602,
|
|
"grad_norm": 0.46626102757844284,
|
|
"learning_rate": 5.242582175368656e-06,
|
|
"loss": 0.0936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08438153564929962,
|
|
"step": 3260,
|
|
"valid_targets_mean": 5807.1,
|
|
"valid_targets_min": 2397
|
|
},
|
|
{
|
|
"epoch": 5.5245346869712355,
|
|
"grad_norm": 0.5347741915044524,
|
|
"learning_rate": 5.185759828394262e-06,
|
|
"loss": 0.1138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11006850004196167,
|
|
"step": 3265,
|
|
"valid_targets_mean": 5391.4,
|
|
"valid_targets_min": 966
|
|
},
|
|
{
|
|
"epoch": 5.532994923857868,
|
|
"grad_norm": 0.588354840329431,
|
|
"learning_rate": 5.129201194953801e-06,
|
|
"loss": 0.0996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0988774299621582,
|
|
"step": 3270,
|
|
"valid_targets_mean": 3892.1,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 5.541455160744501,
|
|
"grad_norm": 0.5190814386386787,
|
|
"learning_rate": 5.072907281867536e-06,
|
|
"loss": 0.0983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0867508053779602,
|
|
"step": 3275,
|
|
"valid_targets_mean": 5166.2,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 5.549915397631134,
|
|
"grad_norm": 0.5405268888349825,
|
|
"learning_rate": 5.016879091243339e-06,
|
|
"loss": 0.105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10581696033477783,
|
|
"step": 3280,
|
|
"valid_targets_mean": 4462.9,
|
|
"valid_targets_min": 2207
|
|
},
|
|
{
|
|
"epoch": 5.558375634517766,
|
|
"grad_norm": 0.5289789456757176,
|
|
"learning_rate": 4.961117620458869e-06,
|
|
"loss": 0.1044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10458050668239594,
|
|
"step": 3285,
|
|
"valid_targets_mean": 5182.1,
|
|
"valid_targets_min": 1859
|
|
},
|
|
{
|
|
"epoch": 5.5668358714043995,
|
|
"grad_norm": 0.6014811712477885,
|
|
"learning_rate": 4.9056238621438e-06,
|
|
"loss": 0.1034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1116582527756691,
|
|
"step": 3290,
|
|
"valid_targets_mean": 4030.6,
|
|
"valid_targets_min": 317
|
|
},
|
|
{
|
|
"epoch": 5.575296108291032,
|
|
"grad_norm": 0.4473615663155537,
|
|
"learning_rate": 4.85039880416219e-06,
|
|
"loss": 0.096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09415919333696365,
|
|
"step": 3295,
|
|
"valid_targets_mean": 6032.3,
|
|
"valid_targets_min": 1807
|
|
},
|
|
{
|
|
"epoch": 5.583756345177665,
|
|
"grad_norm": 0.4999229405157226,
|
|
"learning_rate": 4.795443429594826e-06,
|
|
"loss": 0.1039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10442546755075455,
|
|
"step": 3300,
|
|
"valid_targets_mean": 6014.5,
|
|
"valid_targets_min": 3014
|
|
},
|
|
{
|
|
"epoch": 5.592216582064298,
|
|
"grad_norm": 0.5125074398542362,
|
|
"learning_rate": 4.740758716721803e-06,
|
|
"loss": 0.1059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12040921300649643,
|
|
"step": 3305,
|
|
"valid_targets_mean": 5209.5,
|
|
"valid_targets_min": 1263
|
|
},
|
|
{
|
|
"epoch": 5.60067681895093,
|
|
"grad_norm": 0.5803045524045409,
|
|
"learning_rate": 4.686345639005044e-06,
|
|
"loss": 0.1039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10885774344205856,
|
|
"step": 3310,
|
|
"valid_targets_mean": 4536.6,
|
|
"valid_targets_min": 300
|
|
},
|
|
{
|
|
"epoch": 5.6091370558375635,
|
|
"grad_norm": 0.5313773650268618,
|
|
"learning_rate": 4.6322051650710045e-06,
|
|
"loss": 0.1048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11994049698114395,
|
|
"step": 3315,
|
|
"valid_targets_mean": 4834.9,
|
|
"valid_targets_min": 2542
|
|
},
|
|
{
|
|
"epoch": 5.617597292724196,
|
|
"grad_norm": 0.5923348784501238,
|
|
"learning_rate": 4.5783382586934484e-06,
|
|
"loss": 0.099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09442084282636642,
|
|
"step": 3320,
|
|
"valid_targets_mean": 4502.1,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 5.626057529610829,
|
|
"grad_norm": 0.5339873497191369,
|
|
"learning_rate": 4.5247458787762225e-06,
|
|
"loss": 0.099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10028509795665741,
|
|
"step": 3325,
|
|
"valid_targets_mean": 4382.4,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 5.634517766497462,
|
|
"grad_norm": 0.5668440179534936,
|
|
"learning_rate": 4.471428979336277e-06,
|
|
"loss": 0.0953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12189441919326782,
|
|
"step": 3330,
|
|
"valid_targets_mean": 4854.8,
|
|
"valid_targets_min": 1678
|
|
},
|
|
{
|
|
"epoch": 5.642978003384095,
|
|
"grad_norm": 0.4853650123569915,
|
|
"learning_rate": 4.41838850948659e-06,
|
|
"loss": 0.1034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09861092269420624,
|
|
"step": 3335,
|
|
"valid_targets_mean": 5194.8,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 5.6514382402707275,
|
|
"grad_norm": 0.5277716383986365,
|
|
"learning_rate": 4.365625413419365e-06,
|
|
"loss": 0.1041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10952402651309967,
|
|
"step": 3340,
|
|
"valid_targets_mean": 4806.1,
|
|
"valid_targets_min": 1530
|
|
},
|
|
{
|
|
"epoch": 5.659898477157361,
|
|
"grad_norm": 0.5581886135902275,
|
|
"learning_rate": 4.313140630389148e-06,
|
|
"loss": 0.1013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10420006513595581,
|
|
"step": 3345,
|
|
"valid_targets_mean": 4938.6,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 5.668358714043993,
|
|
"grad_norm": 0.5017262872294549,
|
|
"learning_rate": 4.260935094696139e-06,
|
|
"loss": 0.101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08997639268636703,
|
|
"step": 3350,
|
|
"valid_targets_mean": 4635.6,
|
|
"valid_targets_min": 332
|
|
},
|
|
{
|
|
"epoch": 5.676818950930626,
|
|
"grad_norm": 0.581802213822545,
|
|
"learning_rate": 4.209009735669563e-06,
|
|
"loss": 0.1126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10598112642765045,
|
|
"step": 3355,
|
|
"valid_targets_mean": 3492.5,
|
|
"valid_targets_min": 355
|
|
},
|
|
{
|
|
"epoch": 5.685279187817259,
|
|
"grad_norm": 0.4536862776834267,
|
|
"learning_rate": 4.157365477651119e-06,
|
|
"loss": 0.1033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08999267220497131,
|
|
"step": 3360,
|
|
"valid_targets_mean": 6219.8,
|
|
"valid_targets_min": 2340
|
|
},
|
|
{
|
|
"epoch": 5.6937394247038915,
|
|
"grad_norm": 0.47411530549986003,
|
|
"learning_rate": 4.106003239978529e-06,
|
|
"loss": 0.1113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11165902018547058,
|
|
"step": 3365,
|
|
"valid_targets_mean": 5259.2,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 5.702199661590525,
|
|
"grad_norm": 0.5569379643465078,
|
|
"learning_rate": 4.054923936969166e-06,
|
|
"loss": 0.1141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1375100314617157,
|
|
"step": 3370,
|
|
"valid_targets_mean": 5087.4,
|
|
"valid_targets_min": 2802
|
|
},
|
|
{
|
|
"epoch": 5.710659898477157,
|
|
"grad_norm": 0.5347597414749146,
|
|
"learning_rate": 4.00412847790379e-06,
|
|
"loss": 0.102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10659273713827133,
|
|
"step": 3375,
|
|
"valid_targets_mean": 5056.4,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 5.71912013536379,
|
|
"grad_norm": 0.5102623248767796,
|
|
"learning_rate": 3.953617767010349e-06,
|
|
"loss": 0.0972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10672193765640259,
|
|
"step": 3380,
|
|
"valid_targets_mean": 5267.9,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 5.727580372250423,
|
|
"grad_norm": 0.5497446271692702,
|
|
"learning_rate": 3.90339270344789e-06,
|
|
"loss": 0.098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1138182133436203,
|
|
"step": 3385,
|
|
"valid_targets_mean": 4773.8,
|
|
"valid_targets_min": 948
|
|
},
|
|
{
|
|
"epoch": 5.7360406091370555,
|
|
"grad_norm": 0.5639157684025348,
|
|
"learning_rate": 3.85345418129055e-06,
|
|
"loss": 0.1015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10153436660766602,
|
|
"step": 3390,
|
|
"valid_targets_mean": 5084.5,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 5.744500846023689,
|
|
"grad_norm": 0.5181642784252168,
|
|
"learning_rate": 3.8038030895116375e-06,
|
|
"loss": 0.108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10071215778589249,
|
|
"step": 3395,
|
|
"valid_targets_mean": 4853.3,
|
|
"valid_targets_min": 233
|
|
},
|
|
{
|
|
"epoch": 5.752961082910321,
|
|
"grad_norm": 0.47693001765943094,
|
|
"learning_rate": 3.7544403119678285e-06,
|
|
"loss": 0.1007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0908127874135971,
|
|
"step": 3400,
|
|
"valid_targets_mean": 5394.6,
|
|
"valid_targets_min": 1729
|
|
},
|
|
{
|
|
"epoch": 5.761421319796955,
|
|
"grad_norm": 0.4830884983293547,
|
|
"learning_rate": 3.705366727383388e-06,
|
|
"loss": 0.1046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09165860712528229,
|
|
"step": 3405,
|
|
"valid_targets_mean": 5469.1,
|
|
"valid_targets_min": 2212
|
|
},
|
|
{
|
|
"epoch": 5.769881556683587,
|
|
"grad_norm": 0.47608436361899914,
|
|
"learning_rate": 3.6565832093345745e-06,
|
|
"loss": 0.1045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08629769086837769,
|
|
"step": 3410,
|
|
"valid_targets_mean": 5247.1,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 5.77834179357022,
|
|
"grad_norm": 0.7155549647405476,
|
|
"learning_rate": 3.608090626234062e-06,
|
|
"loss": 0.1104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12667709589004517,
|
|
"step": 3415,
|
|
"valid_targets_mean": 3406.8,
|
|
"valid_targets_min": 287
|
|
},
|
|
{
|
|
"epoch": 5.786802030456853,
|
|
"grad_norm": 0.5002286943349183,
|
|
"learning_rate": 3.559889841315485e-06,
|
|
"loss": 0.1036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11120729893445969,
|
|
"step": 3420,
|
|
"valid_targets_mean": 5150.2,
|
|
"valid_targets_min": 698
|
|
},
|
|
{
|
|
"epoch": 5.795262267343485,
|
|
"grad_norm": 0.6052758999513584,
|
|
"learning_rate": 3.5119817126180953e-06,
|
|
"loss": 0.0961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1012842059135437,
|
|
"step": 3425,
|
|
"valid_targets_mean": 3992.0,
|
|
"valid_targets_min": 422
|
|
},
|
|
{
|
|
"epoch": 5.803722504230119,
|
|
"grad_norm": 0.46344471347281785,
|
|
"learning_rate": 3.4643670929714323e-06,
|
|
"loss": 0.1012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09275519847869873,
|
|
"step": 3430,
|
|
"valid_targets_mean": 5883.4,
|
|
"valid_targets_min": 1258
|
|
},
|
|
{
|
|
"epoch": 5.812182741116751,
|
|
"grad_norm": 0.486356172618942,
|
|
"learning_rate": 3.4170468299802196e-06,
|
|
"loss": 0.1047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08840268850326538,
|
|
"step": 3435,
|
|
"valid_targets_mean": 5393.6,
|
|
"valid_targets_min": 1588
|
|
},
|
|
{
|
|
"epoch": 5.820642978003384,
|
|
"grad_norm": 0.5068345109705868,
|
|
"learning_rate": 3.3700217660091927e-06,
|
|
"loss": 0.1073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11103978753089905,
|
|
"step": 3440,
|
|
"valid_targets_mean": 5672.1,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 5.829103214890017,
|
|
"grad_norm": 0.6045798121265903,
|
|
"learning_rate": 3.3232927381681667e-06,
|
|
"loss": 0.1011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11271055787801743,
|
|
"step": 3445,
|
|
"valid_targets_mean": 4014.2,
|
|
"valid_targets_min": 362
|
|
},
|
|
{
|
|
"epoch": 5.837563451776649,
|
|
"grad_norm": 0.5705745022645152,
|
|
"learning_rate": 3.276860578297123e-06,
|
|
"loss": 0.1022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09084933996200562,
|
|
"step": 3450,
|
|
"valid_targets_mean": 5095.1,
|
|
"valid_targets_min": 291
|
|
},
|
|
{
|
|
"epoch": 5.846023688663283,
|
|
"grad_norm": 0.5155478926916548,
|
|
"learning_rate": 3.2307261129513566e-06,
|
|
"loss": 0.1043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08574243634939194,
|
|
"step": 3455,
|
|
"valid_targets_mean": 4663.9,
|
|
"valid_targets_min": 1508
|
|
},
|
|
{
|
|
"epoch": 5.854483925549915,
|
|
"grad_norm": 0.4818960248155182,
|
|
"learning_rate": 3.1848901633868355e-06,
|
|
"loss": 0.102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09491369128227234,
|
|
"step": 3460,
|
|
"valid_targets_mean": 5319.2,
|
|
"valid_targets_min": 1630
|
|
},
|
|
{
|
|
"epoch": 5.862944162436548,
|
|
"grad_norm": 0.5159165458585783,
|
|
"learning_rate": 3.1393535455455183e-06,
|
|
"loss": 0.1057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09796810150146484,
|
|
"step": 3465,
|
|
"valid_targets_mean": 5420.6,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 5.871404399323181,
|
|
"grad_norm": 0.5153304169760907,
|
|
"learning_rate": 3.094117070040861e-06,
|
|
"loss": 0.105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11242002993822098,
|
|
"step": 3470,
|
|
"valid_targets_mean": 5150.9,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 5.879864636209814,
|
|
"grad_norm": 0.5418208757527092,
|
|
"learning_rate": 3.0491815421433825e-06,
|
|
"loss": 0.1043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0878225713968277,
|
|
"step": 3475,
|
|
"valid_targets_mean": 4655.0,
|
|
"valid_targets_min": 1707
|
|
},
|
|
{
|
|
"epoch": 5.888324873096447,
|
|
"grad_norm": 0.4979820894213741,
|
|
"learning_rate": 3.00454776176631e-06,
|
|
"loss": 0.1093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09508287906646729,
|
|
"step": 3480,
|
|
"valid_targets_mean": 5418.4,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 5.89678510998308,
|
|
"grad_norm": 0.5045354722776119,
|
|
"learning_rate": 2.9602165234513868e-06,
|
|
"loss": 0.0929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10958752781152725,
|
|
"step": 3485,
|
|
"valid_targets_mean": 5060.5,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 5.905245346869712,
|
|
"grad_norm": 0.52384572168439,
|
|
"learning_rate": 2.916188616354669e-06,
|
|
"loss": 0.1076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1274002194404602,
|
|
"step": 3490,
|
|
"valid_targets_mean": 5585.6,
|
|
"valid_targets_min": 2333
|
|
},
|
|
{
|
|
"epoch": 5.913705583756345,
|
|
"grad_norm": 0.5813895288741002,
|
|
"learning_rate": 2.872464824232526e-06,
|
|
"loss": 0.0886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10212315618991852,
|
|
"step": 3495,
|
|
"valid_targets_mean": 4466.1,
|
|
"valid_targets_min": 1112
|
|
},
|
|
{
|
|
"epoch": 5.922165820642978,
|
|
"grad_norm": 0.5297889494714989,
|
|
"learning_rate": 2.8290459254276538e-06,
|
|
"loss": 0.1006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10411001741886139,
|
|
"step": 3500,
|
|
"valid_targets_mean": 4912.7,
|
|
"valid_targets_min": 1916
|
|
},
|
|
{
|
|
"epoch": 5.930626057529611,
|
|
"grad_norm": 0.5472262938337276,
|
|
"learning_rate": 2.7859326928552444e-06,
|
|
"loss": 0.1046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11614669859409332,
|
|
"step": 3505,
|
|
"valid_targets_mean": 5338.5,
|
|
"valid_targets_min": 1899
|
|
},
|
|
{
|
|
"epoch": 5.939086294416244,
|
|
"grad_norm": 0.5125194107666327,
|
|
"learning_rate": 2.7431258939892157e-06,
|
|
"loss": 0.1026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11367000639438629,
|
|
"step": 3510,
|
|
"valid_targets_mean": 5368.2,
|
|
"valid_targets_min": 447
|
|
},
|
|
{
|
|
"epoch": 5.947546531302876,
|
|
"grad_norm": 0.5031132393040323,
|
|
"learning_rate": 2.700626290848547e-06,
|
|
"loss": 0.1047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11168454587459564,
|
|
"step": 3515,
|
|
"valid_targets_mean": 6275.1,
|
|
"valid_targets_min": 3096
|
|
},
|
|
{
|
|
"epoch": 5.95600676818951,
|
|
"grad_norm": 0.46343136229617843,
|
|
"learning_rate": 2.658434639983718e-06,
|
|
"loss": 0.1046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09521348774433136,
|
|
"step": 3520,
|
|
"valid_targets_mean": 5478.4,
|
|
"valid_targets_min": 283
|
|
},
|
|
{
|
|
"epoch": 5.964467005076142,
|
|
"grad_norm": 0.530010972470127,
|
|
"learning_rate": 2.616551692463243e-06,
|
|
"loss": 0.1007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11282682418823242,
|
|
"step": 3525,
|
|
"valid_targets_mean": 5244.1,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 5.972927241962775,
|
|
"grad_norm": 0.5521249468948588,
|
|
"learning_rate": 2.5749781938603067e-06,
|
|
"loss": 0.1089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12777575850486755,
|
|
"step": 3530,
|
|
"valid_targets_mean": 5451.6,
|
|
"valid_targets_min": 1849
|
|
},
|
|
{
|
|
"epoch": 5.981387478849408,
|
|
"grad_norm": 0.5671109856254452,
|
|
"learning_rate": 2.5337148842394687e-06,
|
|
"loss": 0.1144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09757398813962936,
|
|
"step": 3535,
|
|
"valid_targets_mean": 4380.4,
|
|
"valid_targets_min": 631
|
|
},
|
|
{
|
|
"epoch": 5.98984771573604,
|
|
"grad_norm": 0.5058981136653168,
|
|
"learning_rate": 2.4927624981435105e-06,
|
|
"loss": 0.0997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08428417146205902,
|
|
"step": 3540,
|
|
"valid_targets_mean": 4734.4,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 5.998307952622674,
|
|
"grad_norm": 0.5251669134966137,
|
|
"learning_rate": 2.4521217645803573e-06,
|
|
"loss": 0.0938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10468511283397675,
|
|
"step": 3545,
|
|
"valid_targets_mean": 4463.6,
|
|
"valid_targets_min": 302
|
|
},
|
|
{
|
|
"epoch": 6.006768189509306,
|
|
"grad_norm": 0.4334377995437898,
|
|
"learning_rate": 2.4117934070100922e-06,
|
|
"loss": 0.0993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08374033868312836,
|
|
"step": 3550,
|
|
"valid_targets_mean": 5989.1,
|
|
"valid_targets_min": 2790
|
|
},
|
|
{
|
|
"epoch": 6.0152284263959395,
|
|
"grad_norm": 0.46603613064927013,
|
|
"learning_rate": 2.3717781433320974e-06,
|
|
"loss": 0.0993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08957934379577637,
|
|
"step": 3555,
|
|
"valid_targets_mean": 5251.0,
|
|
"valid_targets_min": 358
|
|
},
|
|
{
|
|
"epoch": 6.023688663282572,
|
|
"grad_norm": 0.521526990767039,
|
|
"learning_rate": 2.332076685872231e-06,
|
|
"loss": 0.0975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12568718194961548,
|
|
"step": 3560,
|
|
"valid_targets_mean": 5509.4,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 6.032148900169204,
|
|
"grad_norm": 0.5130554763795364,
|
|
"learning_rate": 2.2926897413702043e-06,
|
|
"loss": 0.0969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10350829362869263,
|
|
"step": 3565,
|
|
"valid_targets_mean": 5392.0,
|
|
"valid_targets_min": 328
|
|
},
|
|
{
|
|
"epoch": 6.040609137055838,
|
|
"grad_norm": 0.5048897762601476,
|
|
"learning_rate": 2.2536180109669625e-06,
|
|
"loss": 0.1038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11529231071472168,
|
|
"step": 3570,
|
|
"valid_targets_mean": 5074.0,
|
|
"valid_targets_min": 3040
|
|
},
|
|
{
|
|
"epoch": 6.04906937394247,
|
|
"grad_norm": 0.624901379519202,
|
|
"learning_rate": 2.2148621901921973e-06,
|
|
"loss": 0.0998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11761482059955597,
|
|
"step": 3575,
|
|
"valid_targets_mean": 4654.3,
|
|
"valid_targets_min": 465
|
|
},
|
|
{
|
|
"epoch": 6.0575296108291035,
|
|
"grad_norm": 0.517794661303843,
|
|
"learning_rate": 2.17642296895201e-06,
|
|
"loss": 0.0935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10267569124698639,
|
|
"step": 3580,
|
|
"valid_targets_mean": 4977.0,
|
|
"valid_targets_min": 292
|
|
},
|
|
{
|
|
"epoch": 6.065989847715736,
|
|
"grad_norm": 0.5439136870612994,
|
|
"learning_rate": 2.138301031516574e-06,
|
|
"loss": 0.0977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09601834416389465,
|
|
"step": 3585,
|
|
"valid_targets_mean": 4889.3,
|
|
"valid_targets_min": 833
|
|
},
|
|
{
|
|
"epoch": 6.074450084602369,
|
|
"grad_norm": 0.47692453585045774,
|
|
"learning_rate": 2.100497056508004e-06,
|
|
"loss": 0.0969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08719412982463837,
|
|
"step": 3590,
|
|
"valid_targets_mean": 5063.5,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 6.082910321489002,
|
|
"grad_norm": 0.4828021041915194,
|
|
"learning_rate": 2.063011716888237e-06,
|
|
"loss": 0.0853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08156873285770416,
|
|
"step": 3595,
|
|
"valid_targets_mean": 5224.1,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 6.091370558375634,
|
|
"grad_norm": 0.5606535550595851,
|
|
"learning_rate": 2.025845679947076e-06,
|
|
"loss": 0.0954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11357354372739792,
|
|
"step": 3600,
|
|
"valid_targets_mean": 4846.6,
|
|
"valid_targets_min": 770
|
|
},
|
|
{
|
|
"epoch": 6.0998307952622675,
|
|
"grad_norm": 0.4943358785219895,
|
|
"learning_rate": 1.988999607290305e-06,
|
|
"loss": 0.0922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08067478239536285,
|
|
"step": 3605,
|
|
"valid_targets_mean": 4455.6,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 6.1082910321489,
|
|
"grad_norm": 0.5466972728545532,
|
|
"learning_rate": 1.9524741548278968e-06,
|
|
"loss": 0.0984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09645147621631622,
|
|
"step": 3610,
|
|
"valid_targets_mean": 4905.4,
|
|
"valid_targets_min": 343
|
|
},
|
|
{
|
|
"epoch": 6.116751269035533,
|
|
"grad_norm": 0.44467694822988857,
|
|
"learning_rate": 1.9162699727623768e-06,
|
|
"loss": 0.1111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10161375999450684,
|
|
"step": 3615,
|
|
"valid_targets_mean": 6859.4,
|
|
"valid_targets_min": 1904
|
|
},
|
|
{
|
|
"epoch": 6.125211505922166,
|
|
"grad_norm": 0.5118357122198756,
|
|
"learning_rate": 1.880387705577198e-06,
|
|
"loss": 0.0907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07639001309871674,
|
|
"step": 3620,
|
|
"valid_targets_mean": 4731.6,
|
|
"valid_targets_min": 1707
|
|
},
|
|
{
|
|
"epoch": 6.133671742808799,
|
|
"grad_norm": 0.5011732676405243,
|
|
"learning_rate": 1.8448279920253043e-06,
|
|
"loss": 0.0984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07988914847373962,
|
|
"step": 3625,
|
|
"valid_targets_mean": 5076.1,
|
|
"valid_targets_min": 1047
|
|
},
|
|
{
|
|
"epoch": 6.1421319796954315,
|
|
"grad_norm": 0.4894462011457796,
|
|
"learning_rate": 1.809591465117746e-06,
|
|
"loss": 0.0944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10325116664171219,
|
|
"step": 3630,
|
|
"valid_targets_mean": 5921.6,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 6.150592216582064,
|
|
"grad_norm": 0.44877923854299273,
|
|
"learning_rate": 1.774678752112413e-06,
|
|
"loss": 0.086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08767088502645493,
|
|
"step": 3635,
|
|
"valid_targets_mean": 5384.1,
|
|
"valid_targets_min": 222
|
|
},
|
|
{
|
|
"epoch": 6.159052453468697,
|
|
"grad_norm": 0.5203231255359002,
|
|
"learning_rate": 1.740090474502869e-06,
|
|
"loss": 0.0997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10471676290035248,
|
|
"step": 3640,
|
|
"valid_targets_mean": 5601.7,
|
|
"valid_targets_min": 2602
|
|
},
|
|
{
|
|
"epoch": 6.16751269035533,
|
|
"grad_norm": 0.5166023532411659,
|
|
"learning_rate": 1.7058272480072879e-06,
|
|
"loss": 0.0953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09871937334537506,
|
|
"step": 3645,
|
|
"valid_targets_mean": 5276.1,
|
|
"valid_targets_min": 334
|
|
},
|
|
{
|
|
"epoch": 6.175972927241963,
|
|
"grad_norm": 0.4760878232412381,
|
|
"learning_rate": 1.671889682557497e-06,
|
|
"loss": 0.0871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08054915070533752,
|
|
"step": 3650,
|
|
"valid_targets_mean": 4883.8,
|
|
"valid_targets_min": 367
|
|
},
|
|
{
|
|
"epoch": 6.1844331641285955,
|
|
"grad_norm": 0.4492776443811122,
|
|
"learning_rate": 1.6382783822881098e-06,
|
|
"loss": 0.108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.087978295981884,
|
|
"step": 3655,
|
|
"valid_targets_mean": 5679.5,
|
|
"valid_targets_min": 2801
|
|
},
|
|
{
|
|
"epoch": 6.192893401015229,
|
|
"grad_norm": 0.5007812415652039,
|
|
"learning_rate": 1.6049939455257924e-06,
|
|
"loss": 0.0996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11002659052610397,
|
|
"step": 3660,
|
|
"valid_targets_mean": 5007.1,
|
|
"valid_targets_min": 1861
|
|
},
|
|
{
|
|
"epoch": 6.201353637901861,
|
|
"grad_norm": 0.5382235949511616,
|
|
"learning_rate": 1.5720369647785738e-06,
|
|
"loss": 0.1,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10036440938711166,
|
|
"step": 3665,
|
|
"valid_targets_mean": 4048.7,
|
|
"valid_targets_min": 326
|
|
},
|
|
{
|
|
"epoch": 6.209813874788494,
|
|
"grad_norm": 0.5796752893158671,
|
|
"learning_rate": 1.5394080267253396e-06,
|
|
"loss": 0.1022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13826364278793335,
|
|
"step": 3670,
|
|
"valid_targets_mean": 4822.8,
|
|
"valid_targets_min": 1000
|
|
},
|
|
{
|
|
"epoch": 6.218274111675127,
|
|
"grad_norm": 0.5047042158872018,
|
|
"learning_rate": 1.5071077122053756e-06,
|
|
"loss": 0.0949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09457089751958847,
|
|
"step": 3675,
|
|
"valid_targets_mean": 5433.9,
|
|
"valid_targets_min": 339
|
|
},
|
|
{
|
|
"epoch": 6.2267343485617594,
|
|
"grad_norm": 0.5252876431920921,
|
|
"learning_rate": 1.4751365962080045e-06,
|
|
"loss": 0.0923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11120679974555969,
|
|
"step": 3680,
|
|
"valid_targets_mean": 5362.2,
|
|
"valid_targets_min": 1352
|
|
},
|
|
{
|
|
"epoch": 6.235194585448393,
|
|
"grad_norm": 0.5924220082939049,
|
|
"learning_rate": 1.443495247862392e-06,
|
|
"loss": 0.0987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10887941718101501,
|
|
"step": 3685,
|
|
"valid_targets_mean": 4560.0,
|
|
"valid_targets_min": 829
|
|
},
|
|
{
|
|
"epoch": 6.243654822335025,
|
|
"grad_norm": 0.9231733544874481,
|
|
"learning_rate": 1.4121842304273692e-06,
|
|
"loss": 0.1098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1148952916264534,
|
|
"step": 3690,
|
|
"valid_targets_mean": 4076.4,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 6.2521150592216586,
|
|
"grad_norm": 0.5398467498503117,
|
|
"learning_rate": 1.3812041012814504e-06,
|
|
"loss": 0.0959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0994405746459961,
|
|
"step": 3695,
|
|
"valid_targets_mean": 4489.6,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 6.260575296108291,
|
|
"grad_norm": 0.4471793559998494,
|
|
"learning_rate": 1.3505554119128861e-06,
|
|
"loss": 0.0978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0758499950170517,
|
|
"step": 3700,
|
|
"valid_targets_mean": 4993.8,
|
|
"valid_targets_min": 1830
|
|
},
|
|
{
|
|
"epoch": 6.269035532994923,
|
|
"grad_norm": 0.4832457224683788,
|
|
"learning_rate": 1.3202387079098356e-06,
|
|
"loss": 0.0956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08595097064971924,
|
|
"step": 3705,
|
|
"valid_targets_mean": 4819.7,
|
|
"valid_targets_min": 1735
|
|
},
|
|
{
|
|
"epoch": 6.277495769881557,
|
|
"grad_norm": 0.5635192661033877,
|
|
"learning_rate": 1.2902545289506896e-06,
|
|
"loss": 0.0977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.109042689204216,
|
|
"step": 3710,
|
|
"valid_targets_mean": 4759.6,
|
|
"valid_targets_min": 1111
|
|
},
|
|
{
|
|
"epoch": 6.285956006768189,
|
|
"grad_norm": 0.46706667095714566,
|
|
"learning_rate": 1.2606034087944251e-06,
|
|
"loss": 0.0972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09136313945055008,
|
|
"step": 3715,
|
|
"valid_targets_mean": 5316.7,
|
|
"valid_targets_min": 2910
|
|
},
|
|
{
|
|
"epoch": 6.2944162436548226,
|
|
"grad_norm": 0.5624330466190937,
|
|
"learning_rate": 1.2312858752711332e-06,
|
|
"loss": 0.101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10467932373285294,
|
|
"step": 3720,
|
|
"valid_targets_mean": 4631.4,
|
|
"valid_targets_min": 2783
|
|
},
|
|
{
|
|
"epoch": 6.302876480541455,
|
|
"grad_norm": 0.4994056706139944,
|
|
"learning_rate": 1.20230245027261e-06,
|
|
"loss": 0.0885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08694010972976685,
|
|
"step": 3725,
|
|
"valid_targets_mean": 5343.1,
|
|
"valid_targets_min": 2486
|
|
},
|
|
{
|
|
"epoch": 6.311336717428088,
|
|
"grad_norm": 0.515626527652221,
|
|
"learning_rate": 1.1736536497430584e-06,
|
|
"loss": 0.0932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09744478762149811,
|
|
"step": 3730,
|
|
"valid_targets_mean": 4703.9,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 6.319796954314721,
|
|
"grad_norm": 0.6445857371168183,
|
|
"learning_rate": 1.1453399836699264e-06,
|
|
"loss": 0.109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1183057576417923,
|
|
"step": 3735,
|
|
"valid_targets_mean": 4174.9,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 6.328257191201354,
|
|
"grad_norm": 0.5091479203930267,
|
|
"learning_rate": 1.1173619560748006e-06,
|
|
"loss": 0.0972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10634033381938934,
|
|
"step": 3740,
|
|
"valid_targets_mean": 5213.0,
|
|
"valid_targets_min": 1873
|
|
},
|
|
{
|
|
"epoch": 6.3367174280879865,
|
|
"grad_norm": 0.500431874575773,
|
|
"learning_rate": 1.0897200650044604e-06,
|
|
"loss": 0.0908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08392609655857086,
|
|
"step": 3745,
|
|
"valid_targets_mean": 4898.2,
|
|
"valid_targets_min": 343
|
|
},
|
|
{
|
|
"epoch": 6.345177664974619,
|
|
"grad_norm": 0.5114041737894246,
|
|
"learning_rate": 1.0624148025219916e-06,
|
|
"loss": 0.0995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08959788084030151,
|
|
"step": 3750,
|
|
"valid_targets_mean": 5145.5,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 6.353637901861252,
|
|
"grad_norm": 0.4692049259439289,
|
|
"learning_rate": 1.035446654698038e-06,
|
|
"loss": 0.0875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08107668906450272,
|
|
"step": 3755,
|
|
"valid_targets_mean": 5242.1,
|
|
"valid_targets_min": 836
|
|
},
|
|
{
|
|
"epoch": 6.362098138747885,
|
|
"grad_norm": 0.5644520824573487,
|
|
"learning_rate": 1.0088161016021503e-06,
|
|
"loss": 0.1032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1276993751525879,
|
|
"step": 3760,
|
|
"valid_targets_mean": 4944.6,
|
|
"valid_targets_min": 881
|
|
},
|
|
{
|
|
"epoch": 6.370558375634518,
|
|
"grad_norm": 0.5612696035084762,
|
|
"learning_rate": 9.825236172942287e-07,
|
|
"loss": 0.098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.126927450299263,
|
|
"step": 3765,
|
|
"valid_targets_mean": 4653.9,
|
|
"valid_targets_min": 862
|
|
},
|
|
{
|
|
"epoch": 6.3790186125211505,
|
|
"grad_norm": 0.5602341098783055,
|
|
"learning_rate": 9.565696698160964e-07,
|
|
"loss": 0.0974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08963138610124588,
|
|
"step": 3770,
|
|
"valid_targets_mean": 4078.1,
|
|
"valid_targets_min": 447
|
|
},
|
|
{
|
|
"epoch": 6.387478849407783,
|
|
"grad_norm": 0.5282951533512035,
|
|
"learning_rate": 9.309547211831571e-07,
|
|
"loss": 0.0937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08957298845052719,
|
|
"step": 3775,
|
|
"valid_targets_mean": 4611.1,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 6.395939086294416,
|
|
"grad_norm": 0.486260593560147,
|
|
"learning_rate": 9.056792273761883e-07,
|
|
"loss": 0.098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0829610526561737,
|
|
"step": 3780,
|
|
"valid_targets_mean": 4643.8,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 6.404399323181049,
|
|
"grad_norm": 0.5483278483603932,
|
|
"learning_rate": 8.807436383331925e-07,
|
|
"loss": 0.0984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10767057538032532,
|
|
"step": 3785,
|
|
"valid_targets_mean": 4835.0,
|
|
"valid_targets_min": 2014
|
|
},
|
|
{
|
|
"epoch": 6.412859560067682,
|
|
"grad_norm": 0.6137889848447139,
|
|
"learning_rate": 8.561483979414275e-07,
|
|
"loss": 0.0999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13027939200401306,
|
|
"step": 3790,
|
|
"valid_targets_mean": 4653.6,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 6.4213197969543145,
|
|
"grad_norm": 0.5140888393211731,
|
|
"learning_rate": 8.318939440294693e-07,
|
|
"loss": 0.0944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09655652940273285,
|
|
"step": 3795,
|
|
"valid_targets_mean": 4913.4,
|
|
"valid_targets_min": 1144
|
|
},
|
|
{
|
|
"epoch": 6.429780033840948,
|
|
"grad_norm": 0.5492333236931263,
|
|
"learning_rate": 8.079807083594393e-07,
|
|
"loss": 0.1006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09877557307481766,
|
|
"step": 3800,
|
|
"valid_targets_mean": 4262.1,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 6.43824027072758,
|
|
"grad_norm": 0.45865445762050056,
|
|
"learning_rate": 7.844091166193158e-07,
|
|
"loss": 0.0955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09923732280731201,
|
|
"step": 3805,
|
|
"valid_targets_mean": 6032.1,
|
|
"valid_targets_min": 1300
|
|
},
|
|
{
|
|
"epoch": 6.446700507614214,
|
|
"grad_norm": 0.5851828493684269,
|
|
"learning_rate": 7.611795884153416e-07,
|
|
"loss": 0.0951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10672376304864883,
|
|
"step": 3810,
|
|
"valid_targets_mean": 3939.9,
|
|
"valid_targets_min": 406
|
|
},
|
|
{
|
|
"epoch": 6.455160744500846,
|
|
"grad_norm": 0.42394116938449,
|
|
"learning_rate": 7.3829253726458e-07,
|
|
"loss": 0.0892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08186431229114532,
|
|
"step": 3815,
|
|
"valid_targets_mean": 6307.1,
|
|
"valid_targets_min": 1000
|
|
},
|
|
{
|
|
"epoch": 6.4636209813874785,
|
|
"grad_norm": 0.4916658523326147,
|
|
"learning_rate": 7.157483705875234e-07,
|
|
"loss": 0.096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08187875151634216,
|
|
"step": 3820,
|
|
"valid_targets_mean": 6121.4,
|
|
"valid_targets_min": 1966
|
|
},
|
|
{
|
|
"epoch": 6.472081218274112,
|
|
"grad_norm": 0.4639457928720263,
|
|
"learning_rate": 6.935474897008676e-07,
|
|
"loss": 0.0914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08266662061214447,
|
|
"step": 3825,
|
|
"valid_targets_mean": 5469.9,
|
|
"valid_targets_min": 1436
|
|
},
|
|
{
|
|
"epoch": 6.480541455160744,
|
|
"grad_norm": 0.4575360153265071,
|
|
"learning_rate": 6.716902898103583e-07,
|
|
"loss": 0.1001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0824141651391983,
|
|
"step": 3830,
|
|
"valid_targets_mean": 5853.2,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 6.489001692047378,
|
|
"grad_norm": 0.5888154679083005,
|
|
"learning_rate": 6.501771600037354e-07,
|
|
"loss": 0.1035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09684567153453827,
|
|
"step": 3835,
|
|
"valid_targets_mean": 4420.0,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 6.49746192893401,
|
|
"grad_norm": 0.5227917135226477,
|
|
"learning_rate": 6.290084832438515e-07,
|
|
"loss": 0.0965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08650623261928558,
|
|
"step": 3840,
|
|
"valid_targets_mean": 4911.8,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 6.5059221658206425,
|
|
"grad_norm": 0.48685773560740486,
|
|
"learning_rate": 6.081846363618105e-07,
|
|
"loss": 0.09,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08294326812028885,
|
|
"step": 3845,
|
|
"valid_targets_mean": 5178.1,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 6.514382402707276,
|
|
"grad_norm": 0.4933042108984585,
|
|
"learning_rate": 5.877059900502847e-07,
|
|
"loss": 0.0989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08742164075374603,
|
|
"step": 3850,
|
|
"valid_targets_mean": 5444.9,
|
|
"valid_targets_min": 1791
|
|
},
|
|
{
|
|
"epoch": 6.522842639593908,
|
|
"grad_norm": 0.5123563748435379,
|
|
"learning_rate": 5.675729088569059e-07,
|
|
"loss": 0.1004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10979636013507843,
|
|
"step": 3855,
|
|
"valid_targets_mean": 5534.8,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 6.531302876480542,
|
|
"grad_norm": 0.5178060584086482,
|
|
"learning_rate": 5.477857511777806e-07,
|
|
"loss": 0.1006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08917900174856186,
|
|
"step": 3860,
|
|
"valid_targets_mean": 5632.7,
|
|
"valid_targets_min": 1787
|
|
},
|
|
{
|
|
"epoch": 6.539763113367174,
|
|
"grad_norm": 0.5996139922986976,
|
|
"learning_rate": 5.283448692511073e-07,
|
|
"loss": 0.0976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10269352793693542,
|
|
"step": 3865,
|
|
"valid_targets_mean": 4415.8,
|
|
"valid_targets_min": 326
|
|
},
|
|
{
|
|
"epoch": 6.548223350253807,
|
|
"grad_norm": 0.4674047557637622,
|
|
"learning_rate": 5.092506091509064e-07,
|
|
"loss": 0.1003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08618251979351044,
|
|
"step": 3870,
|
|
"valid_targets_mean": 5891.9,
|
|
"valid_targets_min": 302
|
|
},
|
|
{
|
|
"epoch": 6.55668358714044,
|
|
"grad_norm": 0.45254056149999244,
|
|
"learning_rate": 4.905033107808654e-07,
|
|
"loss": 0.0988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10120868682861328,
|
|
"step": 3875,
|
|
"valid_targets_mean": 5923.3,
|
|
"valid_targets_min": 1727
|
|
},
|
|
{
|
|
"epoch": 6.565143824027073,
|
|
"grad_norm": 0.4840552721299055,
|
|
"learning_rate": 4.7210330786827683e-07,
|
|
"loss": 0.0908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07866621762514114,
|
|
"step": 3880,
|
|
"valid_targets_mean": 4633.6,
|
|
"valid_targets_min": 1682
|
|
},
|
|
{
|
|
"epoch": 6.573604060913706,
|
|
"grad_norm": 0.5359006760311739,
|
|
"learning_rate": 4.5405092795810734e-07,
|
|
"loss": 0.0963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09255710989236832,
|
|
"step": 3885,
|
|
"valid_targets_mean": 4744.4,
|
|
"valid_targets_min": 2067
|
|
},
|
|
{
|
|
"epoch": 6.582064297800338,
|
|
"grad_norm": 0.5720729297669024,
|
|
"learning_rate": 4.3634649240716254e-07,
|
|
"loss": 0.105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12340878695249557,
|
|
"step": 3890,
|
|
"valid_targets_mean": 4919.5,
|
|
"valid_targets_min": 2348
|
|
},
|
|
{
|
|
"epoch": 6.590524534686971,
|
|
"grad_norm": 0.5136412729903628,
|
|
"learning_rate": 4.189903163783693e-07,
|
|
"loss": 0.0908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08942011743783951,
|
|
"step": 3895,
|
|
"valid_targets_mean": 4765.4,
|
|
"valid_targets_min": 2270
|
|
},
|
|
{
|
|
"epoch": 6.598984771573604,
|
|
"grad_norm": 0.4619729984591383,
|
|
"learning_rate": 4.01982708835158e-07,
|
|
"loss": 0.0955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09319942444562912,
|
|
"step": 3900,
|
|
"valid_targets_mean": 5849.9,
|
|
"valid_targets_min": 2738
|
|
},
|
|
{
|
|
"epoch": 6.607445008460237,
|
|
"grad_norm": 0.9407310395586151,
|
|
"learning_rate": 3.8532397253597367e-07,
|
|
"loss": 0.0996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09881626069545746,
|
|
"step": 3905,
|
|
"valid_targets_mean": 5391.5,
|
|
"valid_targets_min": 1692
|
|
},
|
|
{
|
|
"epoch": 6.61590524534687,
|
|
"grad_norm": 0.4588148266565705,
|
|
"learning_rate": 3.690144040288823e-07,
|
|
"loss": 0.0968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0952102392911911,
|
|
"step": 3910,
|
|
"valid_targets_mean": 6032.8,
|
|
"valid_targets_min": 525
|
|
},
|
|
{
|
|
"epoch": 6.624365482233502,
|
|
"grad_norm": 0.4731640713967488,
|
|
"learning_rate": 3.530542936462822e-07,
|
|
"loss": 0.1051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09247958660125732,
|
|
"step": 3915,
|
|
"valid_targets_mean": 5713.1,
|
|
"valid_targets_min": 1010
|
|
},
|
|
{
|
|
"epoch": 6.632825719120135,
|
|
"grad_norm": 0.5720412855983552,
|
|
"learning_rate": 3.374439254997497e-07,
|
|
"loss": 0.087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09772346168756485,
|
|
"step": 3920,
|
|
"valid_targets_mean": 4572.2,
|
|
"valid_targets_min": 406
|
|
},
|
|
{
|
|
"epoch": 6.641285956006768,
|
|
"grad_norm": 0.48361393152470394,
|
|
"learning_rate": 3.221835774749771e-07,
|
|
"loss": 0.0974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09274819493293762,
|
|
"step": 3925,
|
|
"valid_targets_mean": 5462.0,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 6.649746192893401,
|
|
"grad_norm": 0.5630998813492135,
|
|
"learning_rate": 3.072735212268163e-07,
|
|
"loss": 0.0918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12273791432380676,
|
|
"step": 3930,
|
|
"valid_targets_mean": 4709.9,
|
|
"valid_targets_min": 1021
|
|
},
|
|
{
|
|
"epoch": 6.658206429780034,
|
|
"grad_norm": 0.4897117173580522,
|
|
"learning_rate": 2.9271402217446286e-07,
|
|
"loss": 0.0944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08731566369533539,
|
|
"step": 3935,
|
|
"valid_targets_mean": 5608.7,
|
|
"valid_targets_min": 1857
|
|
},
|
|
{
|
|
"epoch": 6.666666666666667,
|
|
"grad_norm": 0.5716851047163992,
|
|
"learning_rate": 2.7850533949671076e-07,
|
|
"loss": 0.0967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07912249118089676,
|
|
"step": 3940,
|
|
"valid_targets_mean": 4581.1,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 6.675126903553299,
|
|
"grad_norm": 0.5726455187906027,
|
|
"learning_rate": 2.646477261273539e-07,
|
|
"loss": 0.097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11903196573257446,
|
|
"step": 3945,
|
|
"valid_targets_mean": 4466.9,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 6.683587140439933,
|
|
"grad_norm": 0.4563520388982015,
|
|
"learning_rate": 2.511414287506697e-07,
|
|
"loss": 0.0822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08261684328317642,
|
|
"step": 3950,
|
|
"valid_targets_mean": 5908.8,
|
|
"valid_targets_min": 1736
|
|
},
|
|
{
|
|
"epoch": 6.692047377326565,
|
|
"grad_norm": 0.521114941511451,
|
|
"learning_rate": 2.379866877970427e-07,
|
|
"loss": 0.0967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10266834497451782,
|
|
"step": 3955,
|
|
"valid_targets_mean": 5412.4,
|
|
"valid_targets_min": 1588
|
|
},
|
|
{
|
|
"epoch": 6.700507614213198,
|
|
"grad_norm": 0.5231209352710996,
|
|
"learning_rate": 2.251837374386723e-07,
|
|
"loss": 0.0948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09191662073135376,
|
|
"step": 3960,
|
|
"valid_targets_mean": 5143.2,
|
|
"valid_targets_min": 903
|
|
},
|
|
{
|
|
"epoch": 6.708967851099831,
|
|
"grad_norm": 0.5388968470574731,
|
|
"learning_rate": 2.1273280558540944e-07,
|
|
"loss": 0.0918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09500572085380554,
|
|
"step": 3965,
|
|
"valid_targets_mean": 4502.2,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 6.717428087986463,
|
|
"grad_norm": 0.4856389289700242,
|
|
"learning_rate": 2.006341138807022e-07,
|
|
"loss": 0.0921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09137289971113205,
|
|
"step": 3970,
|
|
"valid_targets_mean": 6019.6,
|
|
"valid_targets_min": 2998
|
|
},
|
|
{
|
|
"epoch": 6.725888324873097,
|
|
"grad_norm": 0.5152258498121826,
|
|
"learning_rate": 1.8888787769764328e-07,
|
|
"loss": 0.0982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09885644912719727,
|
|
"step": 3975,
|
|
"valid_targets_mean": 5402.8,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 6.734348561759729,
|
|
"grad_norm": 0.49178023625596573,
|
|
"learning_rate": 1.7749430613514195e-07,
|
|
"loss": 0.1036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09314092993736267,
|
|
"step": 3980,
|
|
"valid_targets_mean": 4878.6,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 6.742808798646362,
|
|
"grad_norm": 0.48953719410649044,
|
|
"learning_rate": 1.6645360201420046e-07,
|
|
"loss": 0.0907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08180683851242065,
|
|
"step": 3985,
|
|
"valid_targets_mean": 4933.0,
|
|
"valid_targets_min": 422
|
|
},
|
|
{
|
|
"epoch": 6.751269035532995,
|
|
"grad_norm": 0.47588716432951444,
|
|
"learning_rate": 1.5576596187429905e-07,
|
|
"loss": 0.0947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08491626381874084,
|
|
"step": 3990,
|
|
"valid_targets_mean": 5403.2,
|
|
"valid_targets_min": 1399
|
|
},
|
|
{
|
|
"epoch": 6.759729272419627,
|
|
"grad_norm": 0.5089507784227532,
|
|
"learning_rate": 1.4543157596990542e-07,
|
|
"loss": 0.1153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10244308412075043,
|
|
"step": 3995,
|
|
"valid_targets_mean": 5009.8,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 6.768189509306261,
|
|
"grad_norm": 0.7300939040154428,
|
|
"learning_rate": 1.3545062826707977e-07,
|
|
"loss": 0.1057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12415587902069092,
|
|
"step": 4000,
|
|
"valid_targets_mean": 4203.2,
|
|
"valid_targets_min": 389
|
|
},
|
|
{
|
|
"epoch": 6.776649746192893,
|
|
"grad_norm": 0.5507318317483387,
|
|
"learning_rate": 1.258232964402084e-07,
|
|
"loss": 0.0964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08693400025367737,
|
|
"step": 4005,
|
|
"valid_targets_mean": 4217.6,
|
|
"valid_targets_min": 1215
|
|
},
|
|
{
|
|
"epoch": 6.7851099830795265,
|
|
"grad_norm": 0.5491296973556198,
|
|
"learning_rate": 1.16549751868833e-07,
|
|
"loss": 0.1,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09959080815315247,
|
|
"step": 4010,
|
|
"valid_targets_mean": 4593.8,
|
|
"valid_targets_min": 401
|
|
},
|
|
{
|
|
"epoch": 6.793570219966159,
|
|
"grad_norm": 0.5239770135624575,
|
|
"learning_rate": 1.0763015963460189e-07,
|
|
"loss": 0.0932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09921510517597198,
|
|
"step": 4015,
|
|
"valid_targets_mean": 5015.4,
|
|
"valid_targets_min": 381
|
|
},
|
|
{
|
|
"epoch": 6.802030456852792,
|
|
"grad_norm": 0.5625256067486185,
|
|
"learning_rate": 9.906467851833468e-08,
|
|
"loss": 0.0969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11118162423372269,
|
|
"step": 4020,
|
|
"valid_targets_mean": 5221.1,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 6.810490693739425,
|
|
"grad_norm": 0.561476883139737,
|
|
"learning_rate": 9.085346099719339e-08,
|
|
"loss": 0.0986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11373043060302734,
|
|
"step": 4025,
|
|
"valid_targets_mean": 5497.6,
|
|
"valid_targets_min": 2360
|
|
},
|
|
{
|
|
"epoch": 6.818950930626057,
|
|
"grad_norm": 0.4996827797068375,
|
|
"learning_rate": 8.299665324196904e-08,
|
|
"loss": 0.094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.101529061794281,
|
|
"step": 4030,
|
|
"valid_targets_mean": 5569.3,
|
|
"valid_targets_min": 1916
|
|
},
|
|
{
|
|
"epoch": 6.8274111675126905,
|
|
"grad_norm": 0.5044340320043282,
|
|
"learning_rate": 7.549439511447487e-08,
|
|
"loss": 0.1019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09147416055202484,
|
|
"step": 4035,
|
|
"valid_targets_mean": 4796.2,
|
|
"valid_targets_min": 358
|
|
},
|
|
{
|
|
"epoch": 6.835871404399323,
|
|
"grad_norm": 0.7779221547307812,
|
|
"learning_rate": 6.834682016506833e-08,
|
|
"loss": 0.103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08968411386013031,
|
|
"step": 4040,
|
|
"valid_targets_mean": 5062.6,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 6.844331641285956,
|
|
"grad_norm": 0.4922437559732313,
|
|
"learning_rate": 6.15540556302574e-08,
|
|
"loss": 0.1006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09555431455373764,
|
|
"step": 4045,
|
|
"valid_targets_mean": 5396.3,
|
|
"valid_targets_min": 480
|
|
},
|
|
{
|
|
"epoch": 6.852791878172589,
|
|
"grad_norm": 0.48916779370725,
|
|
"learning_rate": 5.511622243045356e-08,
|
|
"loss": 0.0941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08758147805929184,
|
|
"step": 4050,
|
|
"valid_targets_mean": 5347.8,
|
|
"valid_targets_min": 1372
|
|
},
|
|
{
|
|
"epoch": 6.861252115059221,
|
|
"grad_norm": 0.5458615933209491,
|
|
"learning_rate": 4.903343516780013e-08,
|
|
"loss": 0.1037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10891421139240265,
|
|
"step": 4055,
|
|
"valid_targets_mean": 5587.6,
|
|
"valid_targets_min": 2132
|
|
},
|
|
{
|
|
"epoch": 6.8697123519458545,
|
|
"grad_norm": 0.5471741708645214,
|
|
"learning_rate": 4.3305802124145034e-08,
|
|
"loss": 0.0968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09566224366426468,
|
|
"step": 4060,
|
|
"valid_targets_mean": 4015.0,
|
|
"valid_targets_min": 394
|
|
},
|
|
{
|
|
"epoch": 6.878172588832487,
|
|
"grad_norm": 0.559511627709425,
|
|
"learning_rate": 3.793342525911792e-08,
|
|
"loss": 0.1064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10558187961578369,
|
|
"step": 4065,
|
|
"valid_targets_mean": 4932.1,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 6.88663282571912,
|
|
"grad_norm": 0.5429137409201592,
|
|
"learning_rate": 3.291640020829823e-08,
|
|
"loss": 0.0926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10273975133895874,
|
|
"step": 4070,
|
|
"valid_targets_mean": 4832.1,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 6.895093062605753,
|
|
"grad_norm": 0.49143266205523867,
|
|
"learning_rate": 2.8254816281523267e-08,
|
|
"loss": 0.0963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09490954875946045,
|
|
"step": 4075,
|
|
"valid_targets_mean": 5193.2,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 6.903553299492386,
|
|
"grad_norm": 0.46914188420025804,
|
|
"learning_rate": 2.3948756461302792e-08,
|
|
"loss": 0.1006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10113157331943512,
|
|
"step": 4080,
|
|
"valid_targets_mean": 5593.8,
|
|
"valid_targets_min": 2421
|
|
},
|
|
{
|
|
"epoch": 6.9120135363790185,
|
|
"grad_norm": 0.5457155831248266,
|
|
"learning_rate": 1.999829740132908e-08,
|
|
"loss": 0.0909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09735345095396042,
|
|
"step": 4085,
|
|
"valid_targets_mean": 4896.3,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 6.920473773265652,
|
|
"grad_norm": 0.4735596676576718,
|
|
"learning_rate": 1.6403509425122476e-08,
|
|
"loss": 0.087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08993491530418396,
|
|
"step": 4090,
|
|
"valid_targets_mean": 5227.8,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 6.928934010152284,
|
|
"grad_norm": 0.48893489580448474,
|
|
"learning_rate": 1.3164456524779045e-08,
|
|
"loss": 0.102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10575798153877258,
|
|
"step": 4095,
|
|
"valid_targets_mean": 5555.6,
|
|
"valid_targets_min": 2773
|
|
},
|
|
{
|
|
"epoch": 6.937394247038917,
|
|
"grad_norm": 0.4924855594081143,
|
|
"learning_rate": 1.028119635982261e-08,
|
|
"loss": 0.1011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08962814509868622,
|
|
"step": 4100,
|
|
"valid_targets_mean": 5089.2,
|
|
"valid_targets_min": 2959
|
|
},
|
|
{
|
|
"epoch": 6.94585448392555,
|
|
"grad_norm": 0.561260753345444,
|
|
"learning_rate": 7.753780256190002e-09,
|
|
"loss": 0.1008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11104787141084671,
|
|
"step": 4105,
|
|
"valid_targets_mean": 4552.4,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 6.9543147208121825,
|
|
"grad_norm": 0.540283611143795,
|
|
"learning_rate": 5.582253205309584e-09,
|
|
"loss": 0.0875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09759306907653809,
|
|
"step": 4110,
|
|
"valid_targets_mean": 4975.6,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 6.962774957698816,
|
|
"grad_norm": 0.6402585562566375,
|
|
"learning_rate": 3.766653863304104e-09,
|
|
"loss": 0.1004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10013514757156372,
|
|
"step": 4115,
|
|
"valid_targets_mean": 3181.8,
|
|
"valid_targets_min": 357
|
|
},
|
|
{
|
|
"epoch": 6.971235194585448,
|
|
"grad_norm": 0.5179400745234719,
|
|
"learning_rate": 2.307014550300135e-09,
|
|
"loss": 0.0976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10430614650249481,
|
|
"step": 4120,
|
|
"valid_targets_mean": 4977.9,
|
|
"valid_targets_min": 1442
|
|
},
|
|
{
|
|
"epoch": 6.979695431472082,
|
|
"grad_norm": 0.5485351423038184,
|
|
"learning_rate": 1.2033612498529857e-09,
|
|
"loss": 0.0978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09550436586141586,
|
|
"step": 4125,
|
|
"valid_targets_mean": 5178.5,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 6.988155668358714,
|
|
"grad_norm": 0.548313896975385,
|
|
"learning_rate": 4.557136084870628e-10,
|
|
"loss": 0.0897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0772751122713089,
|
|
"step": 4130,
|
|
"valid_targets_mean": 4320.4,
|
|
"valid_targets_min": 310
|
|
},
|
|
{
|
|
"epoch": 6.9966159052453465,
|
|
"grad_norm": 0.565345860588913,
|
|
"learning_rate": 6.408493534060256e-11,
|
|
"loss": 0.0895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08861735463142395,
|
|
"step": 4135,
|
|
"valid_targets_mean": 4579.5,
|
|
"valid_targets_min": 450
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0902627557516098,
|
|
"step": 4137,
|
|
"total_flos": 1470230373269504.0,
|
|
"train_loss": 0.15259632258951764,
|
|
"train_runtime": 23029.2175,
|
|
"train_samples_per_second": 2.87,
|
|
"train_steps_per_second": 0.18,
|
|
"valid_targets_mean": 5391.1,
|
|
"valid_targets_min": 1620
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4137,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1470230373269504.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|