9684 lines
268 KiB
JSON
9684 lines
268 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4382,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.007987220447284345,
|
|
"grad_norm": 20.906039491002094,
|
|
"learning_rate": 3.644646924829157e-07,
|
|
"loss": 0.9573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 1.0240644216537476,
|
|
"step": 5,
|
|
"valid_targets_mean": 2140.7,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 0.01597444089456869,
|
|
"grad_norm": 19.496570126908942,
|
|
"learning_rate": 8.200455580865605e-07,
|
|
"loss": 0.9503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.9385452270507812,
|
|
"step": 10,
|
|
"valid_targets_mean": 2679.1,
|
|
"valid_targets_min": 936
|
|
},
|
|
{
|
|
"epoch": 0.023961661341853034,
|
|
"grad_norm": 15.91324445139062,
|
|
"learning_rate": 1.2756264236902052e-06,
|
|
"loss": 0.9579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8709354400634766,
|
|
"step": 15,
|
|
"valid_targets_mean": 2352.9,
|
|
"valid_targets_min": 852
|
|
},
|
|
{
|
|
"epoch": 0.03194888178913738,
|
|
"grad_norm": 14.17787230187944,
|
|
"learning_rate": 1.7312072892938498e-06,
|
|
"loss": 0.9391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.9081532955169678,
|
|
"step": 20,
|
|
"valid_targets_mean": 2614.5,
|
|
"valid_targets_min": 1111
|
|
},
|
|
{
|
|
"epoch": 0.039936102236421724,
|
|
"grad_norm": 10.842908155952221,
|
|
"learning_rate": 2.1867881548974945e-06,
|
|
"loss": 0.8488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.9098473787307739,
|
|
"step": 25,
|
|
"valid_targets_mean": 2627.1,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 0.04792332268370607,
|
|
"grad_norm": 5.948841930090851,
|
|
"learning_rate": 2.642369020501139e-06,
|
|
"loss": 0.8327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8018196821212769,
|
|
"step": 30,
|
|
"valid_targets_mean": 2429.8,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 0.05591054313099041,
|
|
"grad_norm": 3.5495471513134635,
|
|
"learning_rate": 3.0979498861047843e-06,
|
|
"loss": 0.7494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7273592948913574,
|
|
"step": 35,
|
|
"valid_targets_mean": 2393.9,
|
|
"valid_targets_min": 888
|
|
},
|
|
{
|
|
"epoch": 0.06389776357827476,
|
|
"grad_norm": 2.381014945875693,
|
|
"learning_rate": 3.5535307517084285e-06,
|
|
"loss": 0.6755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6890754699707031,
|
|
"step": 40,
|
|
"valid_targets_mean": 2399.9,
|
|
"valid_targets_min": 763
|
|
},
|
|
{
|
|
"epoch": 0.07188498402555911,
|
|
"grad_norm": 1.9532273935329045,
|
|
"learning_rate": 4.009111617312073e-06,
|
|
"loss": 0.6717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6620908975601196,
|
|
"step": 45,
|
|
"valid_targets_mean": 2450.8,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 0.07987220447284345,
|
|
"grad_norm": 1.43260543848756,
|
|
"learning_rate": 4.464692482915718e-06,
|
|
"loss": 0.6358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5758607387542725,
|
|
"step": 50,
|
|
"valid_targets_mean": 2883.0,
|
|
"valid_targets_min": 922
|
|
},
|
|
{
|
|
"epoch": 0.0878594249201278,
|
|
"grad_norm": 1.348239988632868,
|
|
"learning_rate": 4.920273348519363e-06,
|
|
"loss": 0.6492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6575493216514587,
|
|
"step": 55,
|
|
"valid_targets_mean": 2218.3,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 0.09584664536741214,
|
|
"grad_norm": 1.327791204219617,
|
|
"learning_rate": 5.375854214123008e-06,
|
|
"loss": 0.6543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6716210842132568,
|
|
"step": 60,
|
|
"valid_targets_mean": 1987.1,
|
|
"valid_targets_min": 972
|
|
},
|
|
{
|
|
"epoch": 0.10383386581469649,
|
|
"grad_norm": 1.038096513013993,
|
|
"learning_rate": 5.831435079726651e-06,
|
|
"loss": 0.6233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6457006931304932,
|
|
"step": 65,
|
|
"valid_targets_mean": 2498.4,
|
|
"valid_targets_min": 979
|
|
},
|
|
{
|
|
"epoch": 0.11182108626198083,
|
|
"grad_norm": 1.0356008018107776,
|
|
"learning_rate": 6.287015945330297e-06,
|
|
"loss": 0.5986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6072132587432861,
|
|
"step": 70,
|
|
"valid_targets_mean": 2097.6,
|
|
"valid_targets_min": 985
|
|
},
|
|
{
|
|
"epoch": 0.11980830670926518,
|
|
"grad_norm": 0.9155911797036268,
|
|
"learning_rate": 6.742596810933942e-06,
|
|
"loss": 0.6088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5689595937728882,
|
|
"step": 75,
|
|
"valid_targets_mean": 2613.4,
|
|
"valid_targets_min": 970
|
|
},
|
|
{
|
|
"epoch": 0.12779552715654952,
|
|
"grad_norm": 0.9133821985558432,
|
|
"learning_rate": 7.1981776765375854e-06,
|
|
"loss": 0.6187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5926398038864136,
|
|
"step": 80,
|
|
"valid_targets_mean": 2660.6,
|
|
"valid_targets_min": 931
|
|
},
|
|
{
|
|
"epoch": 0.13578274760383385,
|
|
"grad_norm": 0.8707993045624333,
|
|
"learning_rate": 7.65375854214123e-06,
|
|
"loss": 0.5742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5566529035568237,
|
|
"step": 85,
|
|
"valid_targets_mean": 2858.4,
|
|
"valid_targets_min": 1005
|
|
},
|
|
{
|
|
"epoch": 0.14376996805111822,
|
|
"grad_norm": 0.8072901124490639,
|
|
"learning_rate": 8.109339407744875e-06,
|
|
"loss": 0.5691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5588597059249878,
|
|
"step": 90,
|
|
"valid_targets_mean": 2820.3,
|
|
"valid_targets_min": 1321
|
|
},
|
|
{
|
|
"epoch": 0.15175718849840256,
|
|
"grad_norm": 0.7533577230640448,
|
|
"learning_rate": 8.564920273348521e-06,
|
|
"loss": 0.6011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5308464765548706,
|
|
"step": 95,
|
|
"valid_targets_mean": 2745.4,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 0.1597444089456869,
|
|
"grad_norm": 0.7860662957749742,
|
|
"learning_rate": 9.020501138952164e-06,
|
|
"loss": 0.5616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.537956714630127,
|
|
"step": 100,
|
|
"valid_targets_mean": 2986.8,
|
|
"valid_targets_min": 863
|
|
},
|
|
{
|
|
"epoch": 0.16773162939297126,
|
|
"grad_norm": 1.0023677491796024,
|
|
"learning_rate": 9.47608200455581e-06,
|
|
"loss": 0.5692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5105007290840149,
|
|
"step": 105,
|
|
"valid_targets_mean": 2991.4,
|
|
"valid_targets_min": 871
|
|
},
|
|
{
|
|
"epoch": 0.1757188498402556,
|
|
"grad_norm": 1.6247134379251689,
|
|
"learning_rate": 9.931662870159453e-06,
|
|
"loss": 0.5402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5320438146591187,
|
|
"step": 110,
|
|
"valid_targets_mean": 2529.6,
|
|
"valid_targets_min": 1034
|
|
},
|
|
{
|
|
"epoch": 0.18370607028753994,
|
|
"grad_norm": 0.9155648921835337,
|
|
"learning_rate": 1.03872437357631e-05,
|
|
"loss": 0.5585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6459822654724121,
|
|
"step": 115,
|
|
"valid_targets_mean": 2616.8,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 0.19169329073482427,
|
|
"grad_norm": 0.8613071023126322,
|
|
"learning_rate": 1.0842824601366744e-05,
|
|
"loss": 0.5357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45346564054489136,
|
|
"step": 120,
|
|
"valid_targets_mean": 1864.3,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 0.19968051118210864,
|
|
"grad_norm": 0.8870641276330381,
|
|
"learning_rate": 1.1298405466970387e-05,
|
|
"loss": 0.5464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5671485662460327,
|
|
"step": 125,
|
|
"valid_targets_mean": 2214.9,
|
|
"valid_targets_min": 1008
|
|
},
|
|
{
|
|
"epoch": 0.20766773162939298,
|
|
"grad_norm": 0.829186545860219,
|
|
"learning_rate": 1.1753986332574032e-05,
|
|
"loss": 0.5259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5569150447845459,
|
|
"step": 130,
|
|
"valid_targets_mean": 2625.1,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 0.21565495207667731,
|
|
"grad_norm": 0.6977962196063097,
|
|
"learning_rate": 1.2209567198177677e-05,
|
|
"loss": 0.515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4756750762462616,
|
|
"step": 135,
|
|
"valid_targets_mean": 3178.6,
|
|
"valid_targets_min": 1451
|
|
},
|
|
{
|
|
"epoch": 0.22364217252396165,
|
|
"grad_norm": 0.8672379963227861,
|
|
"learning_rate": 1.2665148063781323e-05,
|
|
"loss": 0.5024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5537216663360596,
|
|
"step": 140,
|
|
"valid_targets_mean": 2197.5,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 0.23162939297124602,
|
|
"grad_norm": 0.6655132896172644,
|
|
"learning_rate": 1.3120728929384968e-05,
|
|
"loss": 0.4923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45678824186325073,
|
|
"step": 145,
|
|
"valid_targets_mean": 3287.2,
|
|
"valid_targets_min": 953
|
|
},
|
|
{
|
|
"epoch": 0.23961661341853036,
|
|
"grad_norm": 0.9125958254156283,
|
|
"learning_rate": 1.357630979498861e-05,
|
|
"loss": 0.5313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5262229442596436,
|
|
"step": 150,
|
|
"valid_targets_mean": 2109.3,
|
|
"valid_targets_min": 946
|
|
},
|
|
{
|
|
"epoch": 0.2476038338658147,
|
|
"grad_norm": 0.8279765367556495,
|
|
"learning_rate": 1.4031890660592255e-05,
|
|
"loss": 0.4997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47942978143692017,
|
|
"step": 155,
|
|
"valid_targets_mean": 2362.8,
|
|
"valid_targets_min": 1179
|
|
},
|
|
{
|
|
"epoch": 0.25559105431309903,
|
|
"grad_norm": 0.8121873927482371,
|
|
"learning_rate": 1.4487471526195902e-05,
|
|
"loss": 0.5182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.504438042640686,
|
|
"step": 160,
|
|
"valid_targets_mean": 2390.2,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 0.26357827476038337,
|
|
"grad_norm": 0.8483399606624832,
|
|
"learning_rate": 1.4943052391799546e-05,
|
|
"loss": 0.4784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4892246723175049,
|
|
"step": 165,
|
|
"valid_targets_mean": 2381.1,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 0.2715654952076677,
|
|
"grad_norm": 0.8718458946078799,
|
|
"learning_rate": 1.539863325740319e-05,
|
|
"loss": 0.4938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5256572961807251,
|
|
"step": 170,
|
|
"valid_targets_mean": 2371.9,
|
|
"valid_targets_min": 1020
|
|
},
|
|
{
|
|
"epoch": 0.2795527156549521,
|
|
"grad_norm": 0.8458579493414248,
|
|
"learning_rate": 1.5854214123006836e-05,
|
|
"loss": 0.5072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5094478130340576,
|
|
"step": 175,
|
|
"valid_targets_mean": 2189.0,
|
|
"valid_targets_min": 997
|
|
},
|
|
{
|
|
"epoch": 0.28753993610223644,
|
|
"grad_norm": 0.890074661639448,
|
|
"learning_rate": 1.630979498861048e-05,
|
|
"loss": 0.5102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5577884912490845,
|
|
"step": 180,
|
|
"valid_targets_mean": 2159.1,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 0.2955271565495208,
|
|
"grad_norm": 0.8623712338439931,
|
|
"learning_rate": 1.6765375854214125e-05,
|
|
"loss": 0.5241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5096868872642517,
|
|
"step": 185,
|
|
"valid_targets_mean": 2316.1,
|
|
"valid_targets_min": 936
|
|
},
|
|
{
|
|
"epoch": 0.3035143769968051,
|
|
"grad_norm": 0.7804686614649181,
|
|
"learning_rate": 1.722095671981777e-05,
|
|
"loss": 0.4937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48676884174346924,
|
|
"step": 190,
|
|
"valid_targets_mean": 2819.6,
|
|
"valid_targets_min": 1311
|
|
},
|
|
{
|
|
"epoch": 0.31150159744408945,
|
|
"grad_norm": 0.9035918303282658,
|
|
"learning_rate": 1.7676537585421415e-05,
|
|
"loss": 0.5089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5741513967514038,
|
|
"step": 195,
|
|
"valid_targets_mean": 2415.0,
|
|
"valid_targets_min": 1131
|
|
},
|
|
{
|
|
"epoch": 0.3194888178913738,
|
|
"grad_norm": 0.6366046510628361,
|
|
"learning_rate": 1.813211845102506e-05,
|
|
"loss": 0.502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45521289110183716,
|
|
"step": 200,
|
|
"valid_targets_mean": 3464.0,
|
|
"valid_targets_min": 1075
|
|
},
|
|
{
|
|
"epoch": 0.3274760383386581,
|
|
"grad_norm": 0.7836552950615538,
|
|
"learning_rate": 1.8587699316628704e-05,
|
|
"loss": 0.4823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5014628171920776,
|
|
"step": 205,
|
|
"valid_targets_mean": 2736.2,
|
|
"valid_targets_min": 809
|
|
},
|
|
{
|
|
"epoch": 0.3354632587859425,
|
|
"grad_norm": 0.778460292860232,
|
|
"learning_rate": 1.904328018223235e-05,
|
|
"loss": 0.4947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4946124851703644,
|
|
"step": 210,
|
|
"valid_targets_mean": 2553.0,
|
|
"valid_targets_min": 1112
|
|
},
|
|
{
|
|
"epoch": 0.34345047923322686,
|
|
"grad_norm": 0.7135687432841079,
|
|
"learning_rate": 1.9498861047835993e-05,
|
|
"loss": 0.496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46126413345336914,
|
|
"step": 215,
|
|
"valid_targets_mean": 2891.1,
|
|
"valid_targets_min": 965
|
|
},
|
|
{
|
|
"epoch": 0.3514376996805112,
|
|
"grad_norm": 0.8357192096489694,
|
|
"learning_rate": 1.9954441913439638e-05,
|
|
"loss": 0.4909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4671036899089813,
|
|
"step": 220,
|
|
"valid_targets_mean": 2429.2,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 0.35942492012779553,
|
|
"grad_norm": 0.8461816650679702,
|
|
"learning_rate": 2.0410022779043283e-05,
|
|
"loss": 0.4942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4583345651626587,
|
|
"step": 225,
|
|
"valid_targets_mean": 2345.6,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 0.36741214057507987,
|
|
"grad_norm": 0.6873339325911447,
|
|
"learning_rate": 2.0865603644646927e-05,
|
|
"loss": 0.4908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47522109746932983,
|
|
"step": 230,
|
|
"valid_targets_mean": 3305.7,
|
|
"valid_targets_min": 1092
|
|
},
|
|
{
|
|
"epoch": 0.3753993610223642,
|
|
"grad_norm": 0.9023165375027137,
|
|
"learning_rate": 2.1321184510250572e-05,
|
|
"loss": 0.4814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5197421312332153,
|
|
"step": 235,
|
|
"valid_targets_mean": 2181.6,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 0.38338658146964855,
|
|
"grad_norm": 0.8069769913259841,
|
|
"learning_rate": 2.1776765375854217e-05,
|
|
"loss": 0.489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45820581912994385,
|
|
"step": 240,
|
|
"valid_targets_mean": 2501.1,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 0.3913738019169329,
|
|
"grad_norm": 1.0532773549209784,
|
|
"learning_rate": 2.223234624145786e-05,
|
|
"loss": 0.4541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47036832571029663,
|
|
"step": 245,
|
|
"valid_targets_mean": 2673.6,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 0.3993610223642173,
|
|
"grad_norm": 0.7332740291486907,
|
|
"learning_rate": 2.2687927107061506e-05,
|
|
"loss": 0.483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43740615248680115,
|
|
"step": 250,
|
|
"valid_targets_mean": 2641.2,
|
|
"valid_targets_min": 1210
|
|
},
|
|
{
|
|
"epoch": 0.4073482428115016,
|
|
"grad_norm": 0.8428545019958994,
|
|
"learning_rate": 2.314350797266515e-05,
|
|
"loss": 0.5238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5407164096832275,
|
|
"step": 255,
|
|
"valid_targets_mean": 2284.6,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 0.41533546325878595,
|
|
"grad_norm": 0.7338003188479382,
|
|
"learning_rate": 2.3599088838268792e-05,
|
|
"loss": 0.4578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4148678779602051,
|
|
"step": 260,
|
|
"valid_targets_mean": 3033.6,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 0.4233226837060703,
|
|
"grad_norm": 0.7841819812133717,
|
|
"learning_rate": 2.4054669703872436e-05,
|
|
"loss": 0.4979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5546318292617798,
|
|
"step": 265,
|
|
"valid_targets_mean": 2941.3,
|
|
"valid_targets_min": 910
|
|
},
|
|
{
|
|
"epoch": 0.43130990415335463,
|
|
"grad_norm": 0.721365289371031,
|
|
"learning_rate": 2.4510250569476085e-05,
|
|
"loss": 0.468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46093493700027466,
|
|
"step": 270,
|
|
"valid_targets_mean": 2911.8,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 0.43929712460063897,
|
|
"grad_norm": 0.6639172835659963,
|
|
"learning_rate": 2.496583143507973e-05,
|
|
"loss": 0.488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4423864483833313,
|
|
"step": 275,
|
|
"valid_targets_mean": 3359.3,
|
|
"valid_targets_min": 1172
|
|
},
|
|
{
|
|
"epoch": 0.4472843450479233,
|
|
"grad_norm": 0.8242150018030385,
|
|
"learning_rate": 2.5421412300683374e-05,
|
|
"loss": 0.4629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4896467924118042,
|
|
"step": 280,
|
|
"valid_targets_mean": 2422.3,
|
|
"valid_targets_min": 1229
|
|
},
|
|
{
|
|
"epoch": 0.45527156549520764,
|
|
"grad_norm": 0.770823399064823,
|
|
"learning_rate": 2.587699316628702e-05,
|
|
"loss": 0.4616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4594544768333435,
|
|
"step": 285,
|
|
"valid_targets_mean": 2628.8,
|
|
"valid_targets_min": 1252
|
|
},
|
|
{
|
|
"epoch": 0.46325878594249204,
|
|
"grad_norm": 0.7994346958843023,
|
|
"learning_rate": 2.6332574031890663e-05,
|
|
"loss": 0.4598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49476492404937744,
|
|
"step": 290,
|
|
"valid_targets_mean": 2330.9,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 0.4712460063897764,
|
|
"grad_norm": 0.7514771518140472,
|
|
"learning_rate": 2.6788154897494308e-05,
|
|
"loss": 0.4468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3994782567024231,
|
|
"step": 295,
|
|
"valid_targets_mean": 2733.8,
|
|
"valid_targets_min": 1257
|
|
},
|
|
{
|
|
"epoch": 0.4792332268370607,
|
|
"grad_norm": 2.475475526448787,
|
|
"learning_rate": 2.7243735763097953e-05,
|
|
"loss": 0.4692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47923150658607483,
|
|
"step": 300,
|
|
"valid_targets_mean": 2443.7,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 0.48722044728434505,
|
|
"grad_norm": 0.7086598707757921,
|
|
"learning_rate": 2.7699316628701597e-05,
|
|
"loss": 0.4951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40425026416778564,
|
|
"step": 305,
|
|
"valid_targets_mean": 3036.4,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 0.4952076677316294,
|
|
"grad_norm": 0.8391370698933868,
|
|
"learning_rate": 2.815489749430524e-05,
|
|
"loss": 0.4403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4703635573387146,
|
|
"step": 310,
|
|
"valid_targets_mean": 2451.1,
|
|
"valid_targets_min": 969
|
|
},
|
|
{
|
|
"epoch": 0.5031948881789138,
|
|
"grad_norm": 0.8319164873193978,
|
|
"learning_rate": 2.8610478359908883e-05,
|
|
"loss": 0.4697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49283626675605774,
|
|
"step": 315,
|
|
"valid_targets_mean": 2414.0,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 0.5111821086261981,
|
|
"grad_norm": 0.7701963279980867,
|
|
"learning_rate": 2.906605922551253e-05,
|
|
"loss": 0.4568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3877711594104767,
|
|
"step": 320,
|
|
"valid_targets_mean": 2594.1,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 0.5191693290734825,
|
|
"grad_norm": 0.8176172777624666,
|
|
"learning_rate": 2.9521640091116176e-05,
|
|
"loss": 0.4836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48881566524505615,
|
|
"step": 325,
|
|
"valid_targets_mean": 2457.0,
|
|
"valid_targets_min": 1182
|
|
},
|
|
{
|
|
"epoch": 0.5271565495207667,
|
|
"grad_norm": 0.7154790776146704,
|
|
"learning_rate": 2.997722095671982e-05,
|
|
"loss": 0.4683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44245707988739014,
|
|
"step": 330,
|
|
"valid_targets_mean": 3123.4,
|
|
"valid_targets_min": 831
|
|
},
|
|
{
|
|
"epoch": 0.5351437699680511,
|
|
"grad_norm": 0.8233583964629695,
|
|
"learning_rate": 3.0432801822323465e-05,
|
|
"loss": 0.4461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45075714588165283,
|
|
"step": 335,
|
|
"valid_targets_mean": 2231.7,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 0.5431309904153354,
|
|
"grad_norm": 0.9423679438332069,
|
|
"learning_rate": 3.088838268792711e-05,
|
|
"loss": 0.4794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4764547049999237,
|
|
"step": 340,
|
|
"valid_targets_mean": 1926.5,
|
|
"valid_targets_min": 789
|
|
},
|
|
{
|
|
"epoch": 0.5511182108626198,
|
|
"grad_norm": 0.8621481261614021,
|
|
"learning_rate": 3.1343963553530755e-05,
|
|
"loss": 0.4834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4466078281402588,
|
|
"step": 345,
|
|
"valid_targets_mean": 2508.6,
|
|
"valid_targets_min": 844
|
|
},
|
|
{
|
|
"epoch": 0.5591054313099042,
|
|
"grad_norm": 0.8122116525818203,
|
|
"learning_rate": 3.17995444191344e-05,
|
|
"loss": 0.4644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42387473583221436,
|
|
"step": 350,
|
|
"valid_targets_mean": 2201.7,
|
|
"valid_targets_min": 978
|
|
},
|
|
{
|
|
"epoch": 0.5670926517571885,
|
|
"grad_norm": 0.7656622591142271,
|
|
"learning_rate": 3.2255125284738044e-05,
|
|
"loss": 0.4686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4961174428462982,
|
|
"step": 355,
|
|
"valid_targets_mean": 2999.6,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 0.5750798722044729,
|
|
"grad_norm": 0.9383452661084898,
|
|
"learning_rate": 3.271070615034169e-05,
|
|
"loss": 0.4886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49159035086631775,
|
|
"step": 360,
|
|
"valid_targets_mean": 2128.4,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 0.5830670926517572,
|
|
"grad_norm": 0.7798341077489722,
|
|
"learning_rate": 3.316628701594533e-05,
|
|
"loss": 0.4344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44215476512908936,
|
|
"step": 365,
|
|
"valid_targets_mean": 2958.8,
|
|
"valid_targets_min": 1217
|
|
},
|
|
{
|
|
"epoch": 0.5910543130990416,
|
|
"grad_norm": 0.7423189577311211,
|
|
"learning_rate": 3.362186788154898e-05,
|
|
"loss": 0.4796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4933112561702728,
|
|
"step": 370,
|
|
"valid_targets_mean": 3382.0,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 0.5990415335463258,
|
|
"grad_norm": 0.7580823645381265,
|
|
"learning_rate": 3.407744874715262e-05,
|
|
"loss": 0.4519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46064528822898865,
|
|
"step": 375,
|
|
"valid_targets_mean": 3018.9,
|
|
"valid_targets_min": 1278
|
|
},
|
|
{
|
|
"epoch": 0.6070287539936102,
|
|
"grad_norm": 0.8066664152782219,
|
|
"learning_rate": 3.453302961275627e-05,
|
|
"loss": 0.4763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4480937719345093,
|
|
"step": 380,
|
|
"valid_targets_mean": 2517.2,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 0.6150159744408946,
|
|
"grad_norm": 0.6973221390872589,
|
|
"learning_rate": 3.498861047835991e-05,
|
|
"loss": 0.4083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39936941862106323,
|
|
"step": 385,
|
|
"valid_targets_mean": 2965.8,
|
|
"valid_targets_min": 865
|
|
},
|
|
{
|
|
"epoch": 0.6230031948881789,
|
|
"grad_norm": 0.6465105562706295,
|
|
"learning_rate": 3.5444191343963557e-05,
|
|
"loss": 0.4476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37953072786331177,
|
|
"step": 390,
|
|
"valid_targets_mean": 3469.5,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 0.6309904153354633,
|
|
"grad_norm": 0.7632096748974264,
|
|
"learning_rate": 3.58997722095672e-05,
|
|
"loss": 0.4544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4599865674972534,
|
|
"step": 395,
|
|
"valid_targets_mean": 3006.7,
|
|
"valid_targets_min": 1044
|
|
},
|
|
{
|
|
"epoch": 0.6389776357827476,
|
|
"grad_norm": 0.7312096491232796,
|
|
"learning_rate": 3.6355353075170846e-05,
|
|
"loss": 0.4554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42953866720199585,
|
|
"step": 400,
|
|
"valid_targets_mean": 2476.7,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 0.646964856230032,
|
|
"grad_norm": 0.7479898151101109,
|
|
"learning_rate": 3.681093394077449e-05,
|
|
"loss": 0.4659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.474043607711792,
|
|
"step": 405,
|
|
"valid_targets_mean": 2910.8,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 0.6549520766773163,
|
|
"grad_norm": 0.8551237190516656,
|
|
"learning_rate": 3.7266514806378135e-05,
|
|
"loss": 0.4847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49820223450660706,
|
|
"step": 410,
|
|
"valid_targets_mean": 2128.3,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 0.6629392971246006,
|
|
"grad_norm": 0.7850032433883269,
|
|
"learning_rate": 3.772209567198178e-05,
|
|
"loss": 0.4542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4363866448402405,
|
|
"step": 415,
|
|
"valid_targets_mean": 2410.7,
|
|
"valid_targets_min": 988
|
|
},
|
|
{
|
|
"epoch": 0.670926517571885,
|
|
"grad_norm": 0.6966951507635679,
|
|
"learning_rate": 3.8177676537585425e-05,
|
|
"loss": 0.4488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44000887870788574,
|
|
"step": 420,
|
|
"valid_targets_mean": 2960.1,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 0.6789137380191693,
|
|
"grad_norm": 0.8948019953336019,
|
|
"learning_rate": 3.863325740318907e-05,
|
|
"loss": 0.4506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4773053228855133,
|
|
"step": 425,
|
|
"valid_targets_mean": 2818.6,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 0.6869009584664537,
|
|
"grad_norm": 0.8258683139668815,
|
|
"learning_rate": 3.9088838268792714e-05,
|
|
"loss": 0.4584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45869919657707214,
|
|
"step": 430,
|
|
"valid_targets_mean": 2282.5,
|
|
"valid_targets_min": 969
|
|
},
|
|
{
|
|
"epoch": 0.694888178913738,
|
|
"grad_norm": 0.8449753239870428,
|
|
"learning_rate": 3.954441913439636e-05,
|
|
"loss": 0.4599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4593479037284851,
|
|
"step": 435,
|
|
"valid_targets_mean": 2183.7,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 0.7028753993610224,
|
|
"grad_norm": 0.9376070421040541,
|
|
"learning_rate": 4e-05,
|
|
"loss": 0.4607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4468621015548706,
|
|
"step": 440,
|
|
"valid_targets_mean": 2086.6,
|
|
"valid_targets_min": 1088
|
|
},
|
|
{
|
|
"epoch": 0.7108626198083067,
|
|
"grad_norm": 0.7831529202283422,
|
|
"learning_rate": 3.999984129682125e-05,
|
|
"loss": 0.4372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43732643127441406,
|
|
"step": 445,
|
|
"valid_targets_mean": 2636.1,
|
|
"valid_targets_min": 1018
|
|
},
|
|
{
|
|
"epoch": 0.7188498402555911,
|
|
"grad_norm": 0.8974828192641618,
|
|
"learning_rate": 3.9999365189803684e-05,
|
|
"loss": 0.4425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48447495698928833,
|
|
"step": 450,
|
|
"valid_targets_mean": 1855.2,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 0.7268370607028753,
|
|
"grad_norm": 0.6189779358262517,
|
|
"learning_rate": 3.9998571686503264e-05,
|
|
"loss": 0.4546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3997269868850708,
|
|
"step": 455,
|
|
"valid_targets_mean": 3446.9,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 0.7348242811501597,
|
|
"grad_norm": 0.8951955313431909,
|
|
"learning_rate": 3.9997460799513134e-05,
|
|
"loss": 0.4812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44068706035614014,
|
|
"step": 460,
|
|
"valid_targets_mean": 2169.4,
|
|
"valid_targets_min": 943
|
|
},
|
|
{
|
|
"epoch": 0.7428115015974441,
|
|
"grad_norm": 0.8228623238578393,
|
|
"learning_rate": 3.999603254646343e-05,
|
|
"loss": 0.4438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49897968769073486,
|
|
"step": 465,
|
|
"valid_targets_mean": 3037.7,
|
|
"valid_targets_min": 979
|
|
},
|
|
{
|
|
"epoch": 0.7507987220447284,
|
|
"grad_norm": 0.759996173169153,
|
|
"learning_rate": 3.9994286950020986e-05,
|
|
"loss": 0.4514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41016900539398193,
|
|
"step": 470,
|
|
"valid_targets_mean": 2570.5,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 0.7587859424920128,
|
|
"grad_norm": 0.8977833692695875,
|
|
"learning_rate": 3.999222403788896e-05,
|
|
"loss": 0.4468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4586367607116699,
|
|
"step": 475,
|
|
"valid_targets_mean": 2140.6,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 0.7667731629392971,
|
|
"grad_norm": 0.9193044522300281,
|
|
"learning_rate": 3.9989843842806435e-05,
|
|
"loss": 0.438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.514497697353363,
|
|
"step": 480,
|
|
"valid_targets_mean": 2342.2,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 0.7747603833865815,
|
|
"grad_norm": 0.8206835086818982,
|
|
"learning_rate": 3.998714640254786e-05,
|
|
"loss": 0.427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4679526388645172,
|
|
"step": 485,
|
|
"valid_targets_mean": 2194.3,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 0.7827476038338658,
|
|
"grad_norm": 0.717741580788042,
|
|
"learning_rate": 3.998413175992247e-05,
|
|
"loss": 0.4657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42926549911499023,
|
|
"step": 490,
|
|
"valid_targets_mean": 2672.8,
|
|
"valid_targets_min": 823
|
|
},
|
|
{
|
|
"epoch": 0.7907348242811502,
|
|
"grad_norm": 0.7049217179012289,
|
|
"learning_rate": 3.99807999627736e-05,
|
|
"loss": 0.4276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41634947061538696,
|
|
"step": 495,
|
|
"valid_targets_mean": 2769.8,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 0.7987220447284346,
|
|
"grad_norm": 0.922770306724613,
|
|
"learning_rate": 3.997715106397794e-05,
|
|
"loss": 0.4474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4767404794692993,
|
|
"step": 500,
|
|
"valid_targets_mean": 2618.2,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 0.8067092651757188,
|
|
"grad_norm": 0.7976543521843491,
|
|
"learning_rate": 3.997318512144465e-05,
|
|
"loss": 0.4331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41540616750717163,
|
|
"step": 505,
|
|
"valid_targets_mean": 2522.8,
|
|
"valid_targets_min": 931
|
|
},
|
|
{
|
|
"epoch": 0.8146964856230032,
|
|
"grad_norm": 0.7692282994469742,
|
|
"learning_rate": 3.9968902198114516e-05,
|
|
"loss": 0.4765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4726718068122864,
|
|
"step": 510,
|
|
"valid_targets_mean": 2283.3,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 0.8226837060702875,
|
|
"grad_norm": 0.7246353062113854,
|
|
"learning_rate": 3.996430236195889e-05,
|
|
"loss": 0.4519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41303181648254395,
|
|
"step": 515,
|
|
"valid_targets_mean": 2569.4,
|
|
"valid_targets_min": 916
|
|
},
|
|
{
|
|
"epoch": 0.8306709265175719,
|
|
"grad_norm": 0.7411053339578045,
|
|
"learning_rate": 3.995938568597864e-05,
|
|
"loss": 0.452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42254364490509033,
|
|
"step": 520,
|
|
"valid_targets_mean": 2748.4,
|
|
"valid_targets_min": 921
|
|
},
|
|
{
|
|
"epoch": 0.8386581469648562,
|
|
"grad_norm": 0.7917690961087318,
|
|
"learning_rate": 3.995415224820297e-05,
|
|
"loss": 0.4496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4489549994468689,
|
|
"step": 525,
|
|
"valid_targets_mean": 2162.8,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 0.8466453674121406,
|
|
"grad_norm": 0.8422422243693936,
|
|
"learning_rate": 3.994860213168819e-05,
|
|
"loss": 0.4377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4269351661205292,
|
|
"step": 530,
|
|
"valid_targets_mean": 1882.8,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 0.854632587859425,
|
|
"grad_norm": 0.7426075706321315,
|
|
"learning_rate": 3.9942735424516435e-05,
|
|
"loss": 0.4246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39095965027809143,
|
|
"step": 535,
|
|
"valid_targets_mean": 2487.1,
|
|
"valid_targets_min": 257
|
|
},
|
|
{
|
|
"epoch": 0.8626198083067093,
|
|
"grad_norm": 0.6880731690979766,
|
|
"learning_rate": 3.9936552219794196e-05,
|
|
"loss": 0.4289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4472552537918091,
|
|
"step": 540,
|
|
"valid_targets_mean": 2719.0,
|
|
"valid_targets_min": 898
|
|
},
|
|
{
|
|
"epoch": 0.8706070287539937,
|
|
"grad_norm": 0.6127638008579778,
|
|
"learning_rate": 3.993005261565091e-05,
|
|
"loss": 0.4354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37561172246932983,
|
|
"step": 545,
|
|
"valid_targets_mean": 3264.6,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 0.8785942492012779,
|
|
"grad_norm": 0.734936387391193,
|
|
"learning_rate": 3.992323671523735e-05,
|
|
"loss": 0.4753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4480840563774109,
|
|
"step": 550,
|
|
"valid_targets_mean": 2956.7,
|
|
"valid_targets_min": 1103
|
|
},
|
|
{
|
|
"epoch": 0.8865814696485623,
|
|
"grad_norm": 0.7459059541350692,
|
|
"learning_rate": 3.991610462672403e-05,
|
|
"loss": 0.4503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4517761468887329,
|
|
"step": 555,
|
|
"valid_targets_mean": 2538.3,
|
|
"valid_targets_min": 487
|
|
},
|
|
{
|
|
"epoch": 0.8945686900958466,
|
|
"grad_norm": 0.7142683684915366,
|
|
"learning_rate": 3.9908656463299456e-05,
|
|
"loss": 0.4293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48803699016571045,
|
|
"step": 560,
|
|
"valid_targets_mean": 3283.1,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 0.902555910543131,
|
|
"grad_norm": 0.7754594092244924,
|
|
"learning_rate": 3.990089234316835e-05,
|
|
"loss": 0.4645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4826216697692871,
|
|
"step": 565,
|
|
"valid_targets_mean": 2237.9,
|
|
"valid_targets_min": 1031
|
|
},
|
|
{
|
|
"epoch": 0.9105431309904153,
|
|
"grad_norm": 0.8822751028643037,
|
|
"learning_rate": 3.989281238954978e-05,
|
|
"loss": 0.435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44305723905563354,
|
|
"step": 570,
|
|
"valid_targets_mean": 1873.4,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 0.9185303514376997,
|
|
"grad_norm": 0.7678566719028712,
|
|
"learning_rate": 3.9884416730675155e-05,
|
|
"loss": 0.4541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48998817801475525,
|
|
"step": 575,
|
|
"valid_targets_mean": 2420.9,
|
|
"valid_targets_min": 1002
|
|
},
|
|
{
|
|
"epoch": 0.9265175718849841,
|
|
"grad_norm": 0.7514146372168061,
|
|
"learning_rate": 3.987570549978626e-05,
|
|
"loss": 0.4121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4123725891113281,
|
|
"step": 580,
|
|
"valid_targets_mean": 2298.2,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 0.9345047923322684,
|
|
"grad_norm": 1.0628517046355306,
|
|
"learning_rate": 3.986667883513311e-05,
|
|
"loss": 0.4833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4944913983345032,
|
|
"step": 585,
|
|
"valid_targets_mean": 2378.1,
|
|
"valid_targets_min": 1305
|
|
},
|
|
{
|
|
"epoch": 0.9424920127795527,
|
|
"grad_norm": 0.8049172330305002,
|
|
"learning_rate": 3.985733687997173e-05,
|
|
"loss": 0.4582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5038259625434875,
|
|
"step": 590,
|
|
"valid_targets_mean": 2356.6,
|
|
"valid_targets_min": 1048
|
|
},
|
|
{
|
|
"epoch": 0.950479233226837,
|
|
"grad_norm": 0.6940177525092096,
|
|
"learning_rate": 3.984767978256192e-05,
|
|
"loss": 0.4533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4390389919281006,
|
|
"step": 595,
|
|
"valid_targets_mean": 2926.9,
|
|
"valid_targets_min": 902
|
|
},
|
|
{
|
|
"epoch": 0.9584664536741214,
|
|
"grad_norm": 0.7572548889401504,
|
|
"learning_rate": 3.983770769616488e-05,
|
|
"loss": 0.4165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42548584938049316,
|
|
"step": 600,
|
|
"valid_targets_mean": 2427.2,
|
|
"valid_targets_min": 1318
|
|
},
|
|
{
|
|
"epoch": 0.9664536741214057,
|
|
"grad_norm": 0.8738189106260309,
|
|
"learning_rate": 3.9827420779040805e-05,
|
|
"loss": 0.4421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4466143250465393,
|
|
"step": 605,
|
|
"valid_targets_mean": 1977.6,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 0.9744408945686901,
|
|
"grad_norm": 0.8037273950316778,
|
|
"learning_rate": 3.981681919444633e-05,
|
|
"loss": 0.4508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5017565488815308,
|
|
"step": 610,
|
|
"valid_targets_mean": 2386.1,
|
|
"valid_targets_min": 1116
|
|
},
|
|
{
|
|
"epoch": 0.9824281150159745,
|
|
"grad_norm": 0.5982383899800223,
|
|
"learning_rate": 3.980590311063197e-05,
|
|
"loss": 0.418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41834089159965515,
|
|
"step": 615,
|
|
"valid_targets_mean": 3628.3,
|
|
"valid_targets_min": 812
|
|
},
|
|
{
|
|
"epoch": 0.9904153354632588,
|
|
"grad_norm": 0.7696620219380677,
|
|
"learning_rate": 3.9794672700839455e-05,
|
|
"loss": 0.4488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4373490810394287,
|
|
"step": 620,
|
|
"valid_targets_mean": 2549.8,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 0.9984025559105432,
|
|
"grad_norm": 0.7461721197441102,
|
|
"learning_rate": 3.9783128143298945e-05,
|
|
"loss": 0.4487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49368953704833984,
|
|
"step": 625,
|
|
"valid_targets_mean": 2643.3,
|
|
"valid_targets_min": 1084
|
|
},
|
|
{
|
|
"epoch": 1.0063897763578276,
|
|
"grad_norm": 0.7272341369936172,
|
|
"learning_rate": 3.977126962122625e-05,
|
|
"loss": 0.4393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3843986988067627,
|
|
"step": 630,
|
|
"valid_targets_mean": 2303.7,
|
|
"valid_targets_min": 1047
|
|
},
|
|
{
|
|
"epoch": 1.0143769968051117,
|
|
"grad_norm": 0.8596305129934554,
|
|
"learning_rate": 3.975909732281988e-05,
|
|
"loss": 0.4221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5049939155578613,
|
|
"step": 635,
|
|
"valid_targets_mean": 2025.8,
|
|
"valid_targets_min": 804
|
|
},
|
|
{
|
|
"epoch": 1.0223642172523961,
|
|
"grad_norm": 0.7258174772306256,
|
|
"learning_rate": 3.974661144125808e-05,
|
|
"loss": 0.4066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3765987753868103,
|
|
"step": 640,
|
|
"valid_targets_mean": 2560.7,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 1.0303514376996805,
|
|
"grad_norm": 0.7550618173262585,
|
|
"learning_rate": 3.973381217469576e-05,
|
|
"loss": 0.4043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4596840739250183,
|
|
"step": 645,
|
|
"valid_targets_mean": 2573.4,
|
|
"valid_targets_min": 804
|
|
},
|
|
{
|
|
"epoch": 1.038338658146965,
|
|
"grad_norm": 0.7265215893052677,
|
|
"learning_rate": 3.972069972626135e-05,
|
|
"loss": 0.4271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45169511437416077,
|
|
"step": 650,
|
|
"valid_targets_mean": 2874.6,
|
|
"valid_targets_min": 1074
|
|
},
|
|
{
|
|
"epoch": 1.0463258785942493,
|
|
"grad_norm": 0.7839638745981033,
|
|
"learning_rate": 3.970727430405357e-05,
|
|
"loss": 0.416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42562973499298096,
|
|
"step": 655,
|
|
"valid_targets_mean": 2274.5,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 1.0543130990415335,
|
|
"grad_norm": 0.9242540978995814,
|
|
"learning_rate": 3.969353612113815e-05,
|
|
"loss": 0.4148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43743249773979187,
|
|
"step": 660,
|
|
"valid_targets_mean": 2414.0,
|
|
"valid_targets_min": 1161
|
|
},
|
|
{
|
|
"epoch": 1.0623003194888179,
|
|
"grad_norm": 0.6724287015934308,
|
|
"learning_rate": 3.96794853955444e-05,
|
|
"loss": 0.38,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.370551198720932,
|
|
"step": 665,
|
|
"valid_targets_mean": 3733.8,
|
|
"valid_targets_min": 1200
|
|
},
|
|
{
|
|
"epoch": 1.0702875399361023,
|
|
"grad_norm": 0.708472375332413,
|
|
"learning_rate": 3.966512235026182e-05,
|
|
"loss": 0.4376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4240679144859314,
|
|
"step": 670,
|
|
"valid_targets_mean": 3099.8,
|
|
"valid_targets_min": 917
|
|
},
|
|
{
|
|
"epoch": 1.0782747603833867,
|
|
"grad_norm": 0.7203252790704515,
|
|
"learning_rate": 3.96504472132365e-05,
|
|
"loss": 0.4238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3867179751396179,
|
|
"step": 675,
|
|
"valid_targets_mean": 3290.5,
|
|
"valid_targets_min": 1432
|
|
},
|
|
{
|
|
"epoch": 1.0862619808306708,
|
|
"grad_norm": 0.8408502027804452,
|
|
"learning_rate": 3.9635460217367513e-05,
|
|
"loss": 0.3915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40862441062927246,
|
|
"step": 680,
|
|
"valid_targets_mean": 2296.8,
|
|
"valid_targets_min": 830
|
|
},
|
|
{
|
|
"epoch": 1.0942492012779552,
|
|
"grad_norm": 0.7278953390961722,
|
|
"learning_rate": 3.962016160050327e-05,
|
|
"loss": 0.4139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3588273525238037,
|
|
"step": 685,
|
|
"valid_targets_mean": 2724.4,
|
|
"valid_targets_min": 1189
|
|
},
|
|
{
|
|
"epoch": 1.1022364217252396,
|
|
"grad_norm": 0.9306408007580942,
|
|
"learning_rate": 3.960455160543767e-05,
|
|
"loss": 0.3999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4472426176071167,
|
|
"step": 690,
|
|
"valid_targets_mean": 1974.0,
|
|
"valid_targets_min": 725
|
|
},
|
|
{
|
|
"epoch": 1.110223642172524,
|
|
"grad_norm": 0.7477340891993111,
|
|
"learning_rate": 3.958863047990631e-05,
|
|
"loss": 0.404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38502562046051025,
|
|
"step": 695,
|
|
"valid_targets_mean": 2132.6,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 1.1182108626198084,
|
|
"grad_norm": 0.8349293457260016,
|
|
"learning_rate": 3.95723984765825e-05,
|
|
"loss": 0.3943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3825334906578064,
|
|
"step": 700,
|
|
"valid_targets_mean": 2618.2,
|
|
"valid_targets_min": 1114
|
|
},
|
|
{
|
|
"epoch": 1.1261980830670926,
|
|
"grad_norm": 0.8711065091218907,
|
|
"learning_rate": 3.955585585307329e-05,
|
|
"loss": 0.4284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44381728768348694,
|
|
"step": 705,
|
|
"valid_targets_mean": 2252.5,
|
|
"valid_targets_min": 888
|
|
},
|
|
{
|
|
"epoch": 1.134185303514377,
|
|
"grad_norm": 0.9455723621923142,
|
|
"learning_rate": 3.9539002871915395e-05,
|
|
"loss": 0.4232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3965805768966675,
|
|
"step": 710,
|
|
"valid_targets_mean": 1871.1,
|
|
"valid_targets_min": 895
|
|
},
|
|
{
|
|
"epoch": 1.1421725239616614,
|
|
"grad_norm": 0.8491218387953755,
|
|
"learning_rate": 3.952183980057096e-05,
|
|
"loss": 0.436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44100692868232727,
|
|
"step": 715,
|
|
"valid_targets_mean": 1969.0,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 1.1501597444089458,
|
|
"grad_norm": 0.8801764335720406,
|
|
"learning_rate": 3.950436691142339e-05,
|
|
"loss": 0.4277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40614932775497437,
|
|
"step": 720,
|
|
"valid_targets_mean": 1733.1,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 1.15814696485623,
|
|
"grad_norm": 0.8492808533320004,
|
|
"learning_rate": 3.948658448177299e-05,
|
|
"loss": 0.4098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4536987543106079,
|
|
"step": 725,
|
|
"valid_targets_mean": 2315.6,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 1.1661341853035143,
|
|
"grad_norm": 0.8552951801568066,
|
|
"learning_rate": 3.946849279383258e-05,
|
|
"loss": 0.4219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43429261445999146,
|
|
"step": 730,
|
|
"valid_targets_mean": 2856.9,
|
|
"valid_targets_min": 631
|
|
},
|
|
{
|
|
"epoch": 1.1741214057507987,
|
|
"grad_norm": 0.7946345321115321,
|
|
"learning_rate": 3.9450092134722984e-05,
|
|
"loss": 0.4255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4214361011981964,
|
|
"step": 735,
|
|
"valid_targets_mean": 2343.9,
|
|
"valid_targets_min": 931
|
|
},
|
|
{
|
|
"epoch": 1.182108626198083,
|
|
"grad_norm": 0.8378629083974003,
|
|
"learning_rate": 3.943138279646853e-05,
|
|
"loss": 0.407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39023956656455994,
|
|
"step": 740,
|
|
"valid_targets_mean": 3099.4,
|
|
"valid_targets_min": 965
|
|
},
|
|
{
|
|
"epoch": 1.1900958466453675,
|
|
"grad_norm": 0.6738425776122363,
|
|
"learning_rate": 3.941236507599234e-05,
|
|
"loss": 0.4031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40918710827827454,
|
|
"step": 745,
|
|
"valid_targets_mean": 3145.7,
|
|
"valid_targets_min": 1619
|
|
},
|
|
{
|
|
"epoch": 1.1980830670926517,
|
|
"grad_norm": 0.7718472850757405,
|
|
"learning_rate": 3.93930392751117e-05,
|
|
"loss": 0.4137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39953070878982544,
|
|
"step": 750,
|
|
"valid_targets_mean": 2135.1,
|
|
"valid_targets_min": 1046
|
|
},
|
|
{
|
|
"epoch": 1.206070287539936,
|
|
"grad_norm": 0.6463423656234337,
|
|
"learning_rate": 3.9373405700533204e-05,
|
|
"loss": 0.4021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4134911894798279,
|
|
"step": 755,
|
|
"valid_targets_mean": 3690.4,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 1.2140575079872205,
|
|
"grad_norm": 0.8112733949723302,
|
|
"learning_rate": 3.935346466384793e-05,
|
|
"loss": 0.439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.389288067817688,
|
|
"step": 760,
|
|
"valid_targets_mean": 2656.1,
|
|
"valid_targets_min": 1486
|
|
},
|
|
{
|
|
"epoch": 1.2220447284345048,
|
|
"grad_norm": 0.920745633707765,
|
|
"learning_rate": 3.933321648152646e-05,
|
|
"loss": 0.4231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4509386420249939,
|
|
"step": 765,
|
|
"valid_targets_mean": 1792.8,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 1.230031948881789,
|
|
"grad_norm": 0.865735872783338,
|
|
"learning_rate": 3.931266147491389e-05,
|
|
"loss": 0.4327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.482857882976532,
|
|
"step": 770,
|
|
"valid_targets_mean": 2327.7,
|
|
"valid_targets_min": 1186
|
|
},
|
|
{
|
|
"epoch": 1.2380191693290734,
|
|
"grad_norm": 0.6527459337711454,
|
|
"learning_rate": 3.929179997022471e-05,
|
|
"loss": 0.3877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4181767702102661,
|
|
"step": 775,
|
|
"valid_targets_mean": 3132.7,
|
|
"valid_targets_min": 916
|
|
},
|
|
{
|
|
"epoch": 1.2460063897763578,
|
|
"grad_norm": 0.8078843549494872,
|
|
"learning_rate": 3.927063229853763e-05,
|
|
"loss": 0.4044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42715543508529663,
|
|
"step": 780,
|
|
"valid_targets_mean": 2410.9,
|
|
"valid_targets_min": 965
|
|
},
|
|
{
|
|
"epoch": 1.2539936102236422,
|
|
"grad_norm": 0.6675037223741659,
|
|
"learning_rate": 3.9249158795790316e-05,
|
|
"loss": 0.4069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3899785876274109,
|
|
"step": 785,
|
|
"valid_targets_mean": 2846.6,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 1.2619808306709266,
|
|
"grad_norm": 0.8455035790032538,
|
|
"learning_rate": 3.9227379802774106e-05,
|
|
"loss": 0.4252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.451267808675766,
|
|
"step": 790,
|
|
"valid_targets_mean": 2269.3,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 1.269968051118211,
|
|
"grad_norm": 0.6475569610624897,
|
|
"learning_rate": 3.920529566512852e-05,
|
|
"loss": 0.3967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36655640602111816,
|
|
"step": 795,
|
|
"valid_targets_mean": 2917.2,
|
|
"valid_targets_min": 884
|
|
},
|
|
{
|
|
"epoch": 1.2779552715654952,
|
|
"grad_norm": 0.7456629544372014,
|
|
"learning_rate": 3.918290673333585e-05,
|
|
"loss": 0.4067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3755319118499756,
|
|
"step": 800,
|
|
"valid_targets_mean": 2427.9,
|
|
"valid_targets_min": 1079
|
|
},
|
|
{
|
|
"epoch": 1.2859424920127795,
|
|
"grad_norm": 1.0632823272717977,
|
|
"learning_rate": 3.916021336271556e-05,
|
|
"loss": 0.3876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3854368031024933,
|
|
"step": 805,
|
|
"valid_targets_mean": 2943.0,
|
|
"valid_targets_min": 1074
|
|
},
|
|
{
|
|
"epoch": 1.293929712460064,
|
|
"grad_norm": 0.8379939258431478,
|
|
"learning_rate": 3.913721591341867e-05,
|
|
"loss": 0.4148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4337990880012512,
|
|
"step": 810,
|
|
"valid_targets_mean": 1997.2,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 1.3019169329073481,
|
|
"grad_norm": 0.8273038471696934,
|
|
"learning_rate": 3.9113914750421985e-05,
|
|
"loss": 0.401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4168071448802948,
|
|
"step": 815,
|
|
"valid_targets_mean": 2032.8,
|
|
"valid_targets_min": 480
|
|
},
|
|
{
|
|
"epoch": 1.3099041533546325,
|
|
"grad_norm": 0.7477651131994341,
|
|
"learning_rate": 3.9090310243522394e-05,
|
|
"loss": 0.4095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4636048674583435,
|
|
"step": 820,
|
|
"valid_targets_mean": 2495.1,
|
|
"valid_targets_min": 984
|
|
},
|
|
{
|
|
"epoch": 1.317891373801917,
|
|
"grad_norm": 0.7670685145708385,
|
|
"learning_rate": 3.90664027673309e-05,
|
|
"loss": 0.3875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42857807874679565,
|
|
"step": 825,
|
|
"valid_targets_mean": 2686.5,
|
|
"valid_targets_min": 210
|
|
},
|
|
{
|
|
"epoch": 1.3258785942492013,
|
|
"grad_norm": 0.6998788955375843,
|
|
"learning_rate": 3.904219270126677e-05,
|
|
"loss": 0.4251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39968425035476685,
|
|
"step": 830,
|
|
"valid_targets_mean": 2978.9,
|
|
"valid_targets_min": 1426
|
|
},
|
|
{
|
|
"epoch": 1.3338658146964857,
|
|
"grad_norm": 0.6641298700246149,
|
|
"learning_rate": 3.901768042955144e-05,
|
|
"loss": 0.3806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.422119677066803,
|
|
"step": 835,
|
|
"valid_targets_mean": 3346.3,
|
|
"valid_targets_min": 1163
|
|
},
|
|
{
|
|
"epoch": 1.34185303514377,
|
|
"grad_norm": 0.8066450416948963,
|
|
"learning_rate": 3.8992866341202446e-05,
|
|
"loss": 0.4201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40724167227745056,
|
|
"step": 840,
|
|
"valid_targets_mean": 1984.0,
|
|
"valid_targets_min": 851
|
|
},
|
|
{
|
|
"epoch": 1.3498402555910542,
|
|
"grad_norm": 0.8252041006700711,
|
|
"learning_rate": 3.8967750830027277e-05,
|
|
"loss": 0.4522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.529170036315918,
|
|
"step": 845,
|
|
"valid_targets_mean": 2445.2,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 1.3578274760383386,
|
|
"grad_norm": 0.7848403537318833,
|
|
"learning_rate": 3.894233429461706e-05,
|
|
"loss": 0.3958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39025038480758667,
|
|
"step": 850,
|
|
"valid_targets_mean": 2084.1,
|
|
"valid_targets_min": 1033
|
|
},
|
|
{
|
|
"epoch": 1.365814696485623,
|
|
"grad_norm": 0.76373194401839,
|
|
"learning_rate": 3.89166171383403e-05,
|
|
"loss": 0.4057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41189396381378174,
|
|
"step": 855,
|
|
"valid_targets_mean": 2355.9,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 1.3738019169329074,
|
|
"grad_norm": 0.6355610424565099,
|
|
"learning_rate": 3.889059976933644e-05,
|
|
"loss": 0.4067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38024890422821045,
|
|
"step": 860,
|
|
"valid_targets_mean": 3153.1,
|
|
"valid_targets_min": 1097
|
|
},
|
|
{
|
|
"epoch": 1.3817891373801916,
|
|
"grad_norm": 0.7002812049962568,
|
|
"learning_rate": 3.88642826005094e-05,
|
|
"loss": 0.3809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3568761944770813,
|
|
"step": 865,
|
|
"valid_targets_mean": 2762.6,
|
|
"valid_targets_min": 883
|
|
},
|
|
{
|
|
"epoch": 1.389776357827476,
|
|
"grad_norm": 0.6554300277197657,
|
|
"learning_rate": 3.883766604952102e-05,
|
|
"loss": 0.4024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36589592695236206,
|
|
"step": 870,
|
|
"valid_targets_mean": 3013.3,
|
|
"valid_targets_min": 1029
|
|
},
|
|
{
|
|
"epoch": 1.3977635782747604,
|
|
"grad_norm": 0.9277762548807718,
|
|
"learning_rate": 3.8810750538784404e-05,
|
|
"loss": 0.4383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.602756142616272,
|
|
"step": 875,
|
|
"valid_targets_mean": 2646.9,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 1.4057507987220448,
|
|
"grad_norm": 0.7056121273784406,
|
|
"learning_rate": 3.878353649545728e-05,
|
|
"loss": 0.3965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3720335364341736,
|
|
"step": 880,
|
|
"valid_targets_mean": 2626.4,
|
|
"valid_targets_min": 1350
|
|
},
|
|
{
|
|
"epoch": 1.4137380191693292,
|
|
"grad_norm": 0.8160821217697277,
|
|
"learning_rate": 3.875602435143517e-05,
|
|
"loss": 0.4157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.398079514503479,
|
|
"step": 885,
|
|
"valid_targets_mean": 2784.6,
|
|
"valid_targets_min": 1060
|
|
},
|
|
{
|
|
"epoch": 1.4217252396166133,
|
|
"grad_norm": 0.7804539766300876,
|
|
"learning_rate": 3.872821454334453e-05,
|
|
"loss": 0.4039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38995784521102905,
|
|
"step": 890,
|
|
"valid_targets_mean": 2039.9,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 1.4297124600638977,
|
|
"grad_norm": 0.6316196054027368,
|
|
"learning_rate": 3.870010751253587e-05,
|
|
"loss": 0.4016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37002265453338623,
|
|
"step": 895,
|
|
"valid_targets_mean": 3040.9,
|
|
"valid_targets_min": 1086
|
|
},
|
|
{
|
|
"epoch": 1.4376996805111821,
|
|
"grad_norm": 0.6409572761355428,
|
|
"learning_rate": 3.867170370507668e-05,
|
|
"loss": 0.4105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39018845558166504,
|
|
"step": 900,
|
|
"valid_targets_mean": 3908.8,
|
|
"valid_targets_min": 1260
|
|
},
|
|
{
|
|
"epoch": 1.4456869009584665,
|
|
"grad_norm": 0.7493516133581535,
|
|
"learning_rate": 3.8643003571744445e-05,
|
|
"loss": 0.4186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47630575299263,
|
|
"step": 905,
|
|
"valid_targets_mean": 3657.0,
|
|
"valid_targets_min": 1097
|
|
},
|
|
{
|
|
"epoch": 1.4536741214057507,
|
|
"grad_norm": 0.7587597902652502,
|
|
"learning_rate": 3.861400756801938e-05,
|
|
"loss": 0.4131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39435723423957825,
|
|
"step": 910,
|
|
"valid_targets_mean": 2316.2,
|
|
"valid_targets_min": 913
|
|
},
|
|
{
|
|
"epoch": 1.461661341853035,
|
|
"grad_norm": 0.7128627454581141,
|
|
"learning_rate": 3.8584716154077286e-05,
|
|
"loss": 0.4124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3896949589252472,
|
|
"step": 915,
|
|
"valid_targets_mean": 2650.8,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 1.4696485623003195,
|
|
"grad_norm": 0.8029514991007329,
|
|
"learning_rate": 3.855512979478222e-05,
|
|
"loss": 0.4108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38154637813568115,
|
|
"step": 920,
|
|
"valid_targets_mean": 2368.5,
|
|
"valid_targets_min": 930
|
|
},
|
|
{
|
|
"epoch": 1.4776357827476039,
|
|
"grad_norm": 0.6334669104618376,
|
|
"learning_rate": 3.852524895967911e-05,
|
|
"loss": 0.3854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3582165837287903,
|
|
"step": 925,
|
|
"valid_targets_mean": 3368.8,
|
|
"valid_targets_min": 1192
|
|
},
|
|
{
|
|
"epoch": 1.4856230031948883,
|
|
"grad_norm": 0.8917713068764148,
|
|
"learning_rate": 3.8495074122986296e-05,
|
|
"loss": 0.3938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4337556064128876,
|
|
"step": 930,
|
|
"valid_targets_mean": 2025.8,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 1.4936102236421724,
|
|
"grad_norm": 0.7588379655649194,
|
|
"learning_rate": 3.846460576358804e-05,
|
|
"loss": 0.3917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3741896152496338,
|
|
"step": 935,
|
|
"valid_targets_mean": 2364.8,
|
|
"valid_targets_min": 1026
|
|
},
|
|
{
|
|
"epoch": 1.5015974440894568,
|
|
"grad_norm": 0.8540328127517817,
|
|
"learning_rate": 3.843384436502688e-05,
|
|
"loss": 0.4266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46740004420280457,
|
|
"step": 940,
|
|
"valid_targets_mean": 2058.5,
|
|
"valid_targets_min": 1016
|
|
},
|
|
{
|
|
"epoch": 1.5095846645367412,
|
|
"grad_norm": 0.9539029311411441,
|
|
"learning_rate": 3.8402790415496e-05,
|
|
"loss": 0.4104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45888057351112366,
|
|
"step": 945,
|
|
"valid_targets_mean": 1627.0,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 1.5175718849840254,
|
|
"grad_norm": 0.751038979668103,
|
|
"learning_rate": 3.837144440783144e-05,
|
|
"loss": 0.4359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4049059748649597,
|
|
"step": 950,
|
|
"valid_targets_mean": 2620.1,
|
|
"valid_targets_min": 802
|
|
},
|
|
{
|
|
"epoch": 1.5255591054313098,
|
|
"grad_norm": 0.7029060971603168,
|
|
"learning_rate": 3.833980683950431e-05,
|
|
"loss": 0.4158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41180890798568726,
|
|
"step": 955,
|
|
"valid_targets_mean": 2758.8,
|
|
"valid_targets_min": 975
|
|
},
|
|
{
|
|
"epoch": 1.5335463258785942,
|
|
"grad_norm": 0.6552867239639415,
|
|
"learning_rate": 3.8307878212612886e-05,
|
|
"loss": 0.4106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38650572299957275,
|
|
"step": 960,
|
|
"valid_targets_mean": 2928.4,
|
|
"valid_targets_min": 1201
|
|
},
|
|
{
|
|
"epoch": 1.5415335463258786,
|
|
"grad_norm": 0.7070082527077935,
|
|
"learning_rate": 3.827565903387461e-05,
|
|
"loss": 0.3941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3676159977912903,
|
|
"step": 965,
|
|
"valid_targets_mean": 2832.1,
|
|
"valid_targets_min": 1179
|
|
},
|
|
{
|
|
"epoch": 1.549520766773163,
|
|
"grad_norm": 0.6536339089745404,
|
|
"learning_rate": 3.82431498146181e-05,
|
|
"loss": 0.408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4117887616157532,
|
|
"step": 970,
|
|
"valid_targets_mean": 3298.2,
|
|
"valid_targets_min": 1198
|
|
},
|
|
{
|
|
"epoch": 1.5575079872204474,
|
|
"grad_norm": 0.5889747827068286,
|
|
"learning_rate": 3.821035107077499e-05,
|
|
"loss": 0.4181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3776060938835144,
|
|
"step": 975,
|
|
"valid_targets_mean": 3738.7,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 1.5654952076677318,
|
|
"grad_norm": 0.7832242217896728,
|
|
"learning_rate": 3.817726332287179e-05,
|
|
"loss": 0.3962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42307931184768677,
|
|
"step": 980,
|
|
"valid_targets_mean": 2783.3,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 1.573482428115016,
|
|
"grad_norm": 0.7772069399905718,
|
|
"learning_rate": 3.814388709602155e-05,
|
|
"loss": 0.403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4183029532432556,
|
|
"step": 985,
|
|
"valid_targets_mean": 2164.5,
|
|
"valid_targets_min": 973
|
|
},
|
|
{
|
|
"epoch": 1.5814696485623003,
|
|
"grad_norm": 0.6390369071717981,
|
|
"learning_rate": 3.8110222919915626e-05,
|
|
"loss": 0.4097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34238097071647644,
|
|
"step": 990,
|
|
"valid_targets_mean": 2893.9,
|
|
"valid_targets_min": 873
|
|
},
|
|
{
|
|
"epoch": 1.5894568690095847,
|
|
"grad_norm": 0.8835521761409191,
|
|
"learning_rate": 3.8076271328815175e-05,
|
|
"loss": 0.4149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47050607204437256,
|
|
"step": 995,
|
|
"valid_targets_mean": 2025.2,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 1.5974440894568689,
|
|
"grad_norm": 0.6705461849639794,
|
|
"learning_rate": 3.804203286154275e-05,
|
|
"loss": 0.3865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3883458375930786,
|
|
"step": 1000,
|
|
"valid_targets_mean": 3000.8,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 1.6054313099041533,
|
|
"grad_norm": 0.8900798880935058,
|
|
"learning_rate": 3.800750806147371e-05,
|
|
"loss": 0.4132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4859052896499634,
|
|
"step": 1005,
|
|
"valid_targets_mean": 2045.6,
|
|
"valid_targets_min": 762
|
|
},
|
|
{
|
|
"epoch": 1.6134185303514377,
|
|
"grad_norm": 0.8128049456094962,
|
|
"learning_rate": 3.79726974765276e-05,
|
|
"loss": 0.3919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3719909191131592,
|
|
"step": 1010,
|
|
"valid_targets_mean": 3161.9,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 1.621405750798722,
|
|
"grad_norm": 0.6759604430562426,
|
|
"learning_rate": 3.793760165915947e-05,
|
|
"loss": 0.4059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4264131486415863,
|
|
"step": 1015,
|
|
"valid_targets_mean": 2808.2,
|
|
"valid_targets_min": 963
|
|
},
|
|
{
|
|
"epoch": 1.6293929712460065,
|
|
"grad_norm": 0.7966719955909523,
|
|
"learning_rate": 3.7902221166351106e-05,
|
|
"loss": 0.4117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3978045582771301,
|
|
"step": 1020,
|
|
"valid_targets_mean": 2988.4,
|
|
"valid_targets_min": 1067
|
|
},
|
|
{
|
|
"epoch": 1.6373801916932909,
|
|
"grad_norm": 0.7612395889325678,
|
|
"learning_rate": 3.786655655960216e-05,
|
|
"loss": 0.3928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3840201199054718,
|
|
"step": 1025,
|
|
"valid_targets_mean": 2489.2,
|
|
"valid_targets_min": 902
|
|
},
|
|
{
|
|
"epoch": 1.645367412140575,
|
|
"grad_norm": 0.8148859071450223,
|
|
"learning_rate": 3.7830608404921294e-05,
|
|
"loss": 0.3977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4254915118217468,
|
|
"step": 1030,
|
|
"valid_targets_mean": 2051.8,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 1.6533546325878594,
|
|
"grad_norm": 0.8938070386502805,
|
|
"learning_rate": 3.7794377272817144e-05,
|
|
"loss": 0.4106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4843091666698456,
|
|
"step": 1035,
|
|
"valid_targets_mean": 1927.4,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 1.6613418530351438,
|
|
"grad_norm": 0.7147040908487072,
|
|
"learning_rate": 3.775786373828929e-05,
|
|
"loss": 0.4083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37743261456489563,
|
|
"step": 1040,
|
|
"valid_targets_mean": 2456.0,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 1.669329073482428,
|
|
"grad_norm": 0.7333604847912019,
|
|
"learning_rate": 3.7721068380819135e-05,
|
|
"loss": 0.4001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38293105363845825,
|
|
"step": 1045,
|
|
"valid_targets_mean": 2655.9,
|
|
"valid_targets_min": 990
|
|
},
|
|
{
|
|
"epoch": 1.6773162939297124,
|
|
"grad_norm": 0.6778471226057504,
|
|
"learning_rate": 3.768399178436069e-05,
|
|
"loss": 0.3848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35531455278396606,
|
|
"step": 1050,
|
|
"valid_targets_mean": 2518.7,
|
|
"valid_targets_min": 1165
|
|
},
|
|
{
|
|
"epoch": 1.6853035143769968,
|
|
"grad_norm": 0.6909843971414837,
|
|
"learning_rate": 3.764663453733135e-05,
|
|
"loss": 0.3735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34978270530700684,
|
|
"step": 1055,
|
|
"valid_targets_mean": 2506.6,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 1.6932907348242812,
|
|
"grad_norm": 0.7306102020763531,
|
|
"learning_rate": 3.7608997232602475e-05,
|
|
"loss": 0.4156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40535885095596313,
|
|
"step": 1060,
|
|
"valid_targets_mean": 2392.9,
|
|
"valid_targets_min": 1041
|
|
},
|
|
{
|
|
"epoch": 1.7012779552715656,
|
|
"grad_norm": 0.729043257398099,
|
|
"learning_rate": 3.757108046749006e-05,
|
|
"loss": 0.4006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3885780870914459,
|
|
"step": 1065,
|
|
"valid_targets_mean": 2475.4,
|
|
"valid_targets_min": 856
|
|
},
|
|
{
|
|
"epoch": 1.70926517571885,
|
|
"grad_norm": 0.7414463171330334,
|
|
"learning_rate": 3.753288484374524e-05,
|
|
"loss": 0.4098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3787192702293396,
|
|
"step": 1070,
|
|
"valid_targets_mean": 2278.1,
|
|
"valid_targets_min": 955
|
|
},
|
|
{
|
|
"epoch": 1.7172523961661343,
|
|
"grad_norm": 0.7924544878589329,
|
|
"learning_rate": 3.7494410967544674e-05,
|
|
"loss": 0.394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40646055340766907,
|
|
"step": 1075,
|
|
"valid_targets_mean": 2078.6,
|
|
"valid_targets_min": 1036
|
|
},
|
|
{
|
|
"epoch": 1.7252396166134185,
|
|
"grad_norm": 0.7336073570912109,
|
|
"learning_rate": 3.745565944948103e-05,
|
|
"loss": 0.4052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3972054123878479,
|
|
"step": 1080,
|
|
"valid_targets_mean": 2379.5,
|
|
"valid_targets_min": 779
|
|
},
|
|
{
|
|
"epoch": 1.733226837060703,
|
|
"grad_norm": 2.1859239090436144,
|
|
"learning_rate": 3.7416630904553205e-05,
|
|
"loss": 0.4246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37333106994628906,
|
|
"step": 1085,
|
|
"valid_targets_mean": 2322.2,
|
|
"valid_targets_min": 1187
|
|
},
|
|
{
|
|
"epoch": 1.741214057507987,
|
|
"grad_norm": 0.7553254359605456,
|
|
"learning_rate": 3.737732595215663e-05,
|
|
"loss": 0.4099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4096168279647827,
|
|
"step": 1090,
|
|
"valid_targets_mean": 2484.2,
|
|
"valid_targets_min": 831
|
|
},
|
|
{
|
|
"epoch": 1.7492012779552715,
|
|
"grad_norm": 0.8065059981247088,
|
|
"learning_rate": 3.733774521607338e-05,
|
|
"loss": 0.3987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4362156093120575,
|
|
"step": 1095,
|
|
"valid_targets_mean": 2163.1,
|
|
"valid_targets_min": 843
|
|
},
|
|
{
|
|
"epoch": 1.7571884984025559,
|
|
"grad_norm": 0.7915224462337633,
|
|
"learning_rate": 3.729788932446231e-05,
|
|
"loss": 0.3992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3989744782447815,
|
|
"step": 1100,
|
|
"valid_targets_mean": 2284.8,
|
|
"valid_targets_min": 936
|
|
},
|
|
{
|
|
"epoch": 1.7651757188498403,
|
|
"grad_norm": 0.7243472554453476,
|
|
"learning_rate": 3.7257758909849107e-05,
|
|
"loss": 0.4246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3853446841239929,
|
|
"step": 1105,
|
|
"valid_targets_mean": 2483.2,
|
|
"valid_targets_min": 1212
|
|
},
|
|
{
|
|
"epoch": 1.7731629392971247,
|
|
"grad_norm": 0.745207892399302,
|
|
"learning_rate": 3.72173546091162e-05,
|
|
"loss": 0.4127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41758179664611816,
|
|
"step": 1110,
|
|
"valid_targets_mean": 2911.6,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 1.781150159744409,
|
|
"grad_norm": 0.7437953252134866,
|
|
"learning_rate": 3.7176677063492685e-05,
|
|
"loss": 0.3972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41335779428482056,
|
|
"step": 1115,
|
|
"valid_targets_mean": 2545.2,
|
|
"valid_targets_min": 1054
|
|
},
|
|
{
|
|
"epoch": 1.7891373801916934,
|
|
"grad_norm": 0.7564246393429221,
|
|
"learning_rate": 3.713572691854414e-05,
|
|
"loss": 0.3983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38959911465644836,
|
|
"step": 1120,
|
|
"valid_targets_mean": 2253.9,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 1.7971246006389776,
|
|
"grad_norm": 0.7603529231367242,
|
|
"learning_rate": 3.709450482416239e-05,
|
|
"loss": 0.3851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3875444531440735,
|
|
"step": 1125,
|
|
"valid_targets_mean": 2173.9,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 1.805111821086262,
|
|
"grad_norm": 0.7678246105344447,
|
|
"learning_rate": 3.7053011434555165e-05,
|
|
"loss": 0.3868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.393062949180603,
|
|
"step": 1130,
|
|
"valid_targets_mean": 2121.4,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 1.8130990415335462,
|
|
"grad_norm": 0.8911100205250319,
|
|
"learning_rate": 3.701124740823575e-05,
|
|
"loss": 0.4266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45256346464157104,
|
|
"step": 1135,
|
|
"valid_targets_mean": 1827.9,
|
|
"valid_targets_min": 873
|
|
},
|
|
{
|
|
"epoch": 1.8210862619808306,
|
|
"grad_norm": 0.6817299296998189,
|
|
"learning_rate": 3.696921340801253e-05,
|
|
"loss": 0.3604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33451589941978455,
|
|
"step": 1140,
|
|
"valid_targets_mean": 2649.7,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 1.829073482428115,
|
|
"grad_norm": 0.7673942363594747,
|
|
"learning_rate": 3.6926910100978444e-05,
|
|
"loss": 0.3973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4738661050796509,
|
|
"step": 1145,
|
|
"valid_targets_mean": 2683.5,
|
|
"valid_targets_min": 994
|
|
},
|
|
{
|
|
"epoch": 1.8370607028753994,
|
|
"grad_norm": 0.6864317760360853,
|
|
"learning_rate": 3.688433815850041e-05,
|
|
"loss": 0.4008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37595707178115845,
|
|
"step": 1150,
|
|
"valid_targets_mean": 2731.1,
|
|
"valid_targets_min": 946
|
|
},
|
|
{
|
|
"epoch": 1.8450479233226837,
|
|
"grad_norm": 0.7422499597467689,
|
|
"learning_rate": 3.68414982562087e-05,
|
|
"loss": 0.4084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38929957151412964,
|
|
"step": 1155,
|
|
"valid_targets_mean": 2177.1,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 1.8530351437699681,
|
|
"grad_norm": 0.6352100189476874,
|
|
"learning_rate": 3.679839107398618e-05,
|
|
"loss": 0.3984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3333427906036377,
|
|
"step": 1160,
|
|
"valid_targets_mean": 2705.9,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 1.8610223642172525,
|
|
"grad_norm": 0.8636446600031277,
|
|
"learning_rate": 3.6755017295957536e-05,
|
|
"loss": 0.4001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4158846437931061,
|
|
"step": 1165,
|
|
"valid_targets_mean": 1757.2,
|
|
"valid_targets_min": 937
|
|
},
|
|
{
|
|
"epoch": 1.8690095846645367,
|
|
"grad_norm": 0.6791302016165894,
|
|
"learning_rate": 3.67113776104784e-05,
|
|
"loss": 0.4217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42780280113220215,
|
|
"step": 1170,
|
|
"valid_targets_mean": 2972.9,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 1.876996805111821,
|
|
"grad_norm": 0.6481771904856991,
|
|
"learning_rate": 3.6667472710124475e-05,
|
|
"loss": 0.3751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3816065192222595,
|
|
"step": 1175,
|
|
"valid_targets_mean": 3218.8,
|
|
"valid_targets_min": 1332
|
|
},
|
|
{
|
|
"epoch": 1.8849840255591053,
|
|
"grad_norm": 0.7756202651256098,
|
|
"learning_rate": 3.6623303291680465e-05,
|
|
"loss": 0.3898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4046546518802643,
|
|
"step": 1180,
|
|
"valid_targets_mean": 2061.2,
|
|
"valid_targets_min": 944
|
|
},
|
|
{
|
|
"epoch": 1.8929712460063897,
|
|
"grad_norm": 0.7729091235256559,
|
|
"learning_rate": 3.6578870056129086e-05,
|
|
"loss": 0.3903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34702447056770325,
|
|
"step": 1185,
|
|
"valid_targets_mean": 2634.7,
|
|
"valid_targets_min": 823
|
|
},
|
|
{
|
|
"epoch": 1.900958466453674,
|
|
"grad_norm": 0.821768257949459,
|
|
"learning_rate": 3.653417370863992e-05,
|
|
"loss": 0.3869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4143090546131134,
|
|
"step": 1190,
|
|
"valid_targets_mean": 1957.2,
|
|
"valid_targets_min": 1017
|
|
},
|
|
{
|
|
"epoch": 1.9089456869009584,
|
|
"grad_norm": 0.762186282496381,
|
|
"learning_rate": 3.6489214958558206e-05,
|
|
"loss": 0.3959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42136192321777344,
|
|
"step": 1195,
|
|
"valid_targets_mean": 2462.2,
|
|
"valid_targets_min": 932
|
|
},
|
|
{
|
|
"epoch": 1.9169329073482428,
|
|
"grad_norm": 0.8127357075608987,
|
|
"learning_rate": 3.644399451939358e-05,
|
|
"loss": 0.3971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.423073947429657,
|
|
"step": 1200,
|
|
"valid_targets_mean": 2107.4,
|
|
"valid_targets_min": 937
|
|
},
|
|
{
|
|
"epoch": 1.9249201277955272,
|
|
"grad_norm": 0.7169334190124541,
|
|
"learning_rate": 3.639851310880881e-05,
|
|
"loss": 0.4201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3486313819885254,
|
|
"step": 1205,
|
|
"valid_targets_mean": 2765.5,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 1.9329073482428116,
|
|
"grad_norm": 0.8593680491891655,
|
|
"learning_rate": 3.635277144860834e-05,
|
|
"loss": 0.3988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40185636281967163,
|
|
"step": 1210,
|
|
"valid_targets_mean": 2203.9,
|
|
"valid_targets_min": 1133
|
|
},
|
|
{
|
|
"epoch": 1.9408945686900958,
|
|
"grad_norm": 0.8059936863446433,
|
|
"learning_rate": 3.630677026472684e-05,
|
|
"loss": 0.4098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41976696252822876,
|
|
"step": 1215,
|
|
"valid_targets_mean": 2355.6,
|
|
"valid_targets_min": 828
|
|
},
|
|
{
|
|
"epoch": 1.9488817891373802,
|
|
"grad_norm": 0.7109542599022108,
|
|
"learning_rate": 3.626051028721773e-05,
|
|
"loss": 0.4041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39990681409835815,
|
|
"step": 1220,
|
|
"valid_targets_mean": 2683.5,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 1.9568690095846646,
|
|
"grad_norm": 0.6700272044873445,
|
|
"learning_rate": 3.621399225024156e-05,
|
|
"loss": 0.4124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44246742129325867,
|
|
"step": 1225,
|
|
"valid_targets_mean": 3880.9,
|
|
"valid_targets_min": 873
|
|
},
|
|
{
|
|
"epoch": 1.9648562300319488,
|
|
"grad_norm": 0.6605806625778301,
|
|
"learning_rate": 3.616721689205436e-05,
|
|
"loss": 0.3942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3688991665840149,
|
|
"step": 1230,
|
|
"valid_targets_mean": 2773.8,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 1.9728434504792332,
|
|
"grad_norm": 0.6518242390143356,
|
|
"learning_rate": 3.612018495499594e-05,
|
|
"loss": 0.3936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36180076003074646,
|
|
"step": 1235,
|
|
"valid_targets_mean": 3130.9,
|
|
"valid_targets_min": 1140
|
|
},
|
|
{
|
|
"epoch": 1.9808306709265175,
|
|
"grad_norm": 0.6546070806427944,
|
|
"learning_rate": 3.6072897185478074e-05,
|
|
"loss": 0.3959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3583923280239105,
|
|
"step": 1240,
|
|
"valid_targets_mean": 2734.8,
|
|
"valid_targets_min": 1178
|
|
},
|
|
{
|
|
"epoch": 1.988817891373802,
|
|
"grad_norm": 0.7684714736429226,
|
|
"learning_rate": 3.6025354333972714e-05,
|
|
"loss": 0.3999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.381608247756958,
|
|
"step": 1245,
|
|
"valid_targets_mean": 2130.2,
|
|
"valid_targets_min": 1111
|
|
},
|
|
{
|
|
"epoch": 1.9968051118210863,
|
|
"grad_norm": 0.5377820282759979,
|
|
"learning_rate": 3.597755715500002e-05,
|
|
"loss": 0.3561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29479172825813293,
|
|
"step": 1250,
|
|
"valid_targets_mean": 3720.0,
|
|
"valid_targets_min": 1260
|
|
},
|
|
{
|
|
"epoch": 2.0047923322683707,
|
|
"grad_norm": 0.7307015130777053,
|
|
"learning_rate": 3.592950640711642e-05,
|
|
"loss": 0.3471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3592406213283539,
|
|
"step": 1255,
|
|
"valid_targets_mean": 2537.5,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 2.012779552715655,
|
|
"grad_norm": 0.9833773923411218,
|
|
"learning_rate": 3.5881202852902543e-05,
|
|
"loss": 0.371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41569072008132935,
|
|
"step": 1260,
|
|
"valid_targets_mean": 2105.7,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 2.0207667731629395,
|
|
"grad_norm": 0.7661159365309352,
|
|
"learning_rate": 3.583264725895117e-05,
|
|
"loss": 0.3325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29133570194244385,
|
|
"step": 1265,
|
|
"valid_targets_mean": 2250.2,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 2.0287539936102235,
|
|
"grad_norm": 0.7873905957249098,
|
|
"learning_rate": 3.5783840395854984e-05,
|
|
"loss": 0.3534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35231316089630127,
|
|
"step": 1270,
|
|
"valid_targets_mean": 2340.8,
|
|
"valid_targets_min": 881
|
|
},
|
|
{
|
|
"epoch": 2.036741214057508,
|
|
"grad_norm": 0.7507234211415984,
|
|
"learning_rate": 3.5734783038194446e-05,
|
|
"loss": 0.3505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3584486246109009,
|
|
"step": 1275,
|
|
"valid_targets_mean": 2842.9,
|
|
"valid_targets_min": 966
|
|
},
|
|
{
|
|
"epoch": 2.0447284345047922,
|
|
"grad_norm": 0.7180847428915816,
|
|
"learning_rate": 3.5685475964525396e-05,
|
|
"loss": 0.3482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34811705350875854,
|
|
"step": 1280,
|
|
"valid_targets_mean": 2867.8,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 2.0527156549520766,
|
|
"grad_norm": 0.7487200182475788,
|
|
"learning_rate": 3.5635919957366775e-05,
|
|
"loss": 0.3562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3606683611869812,
|
|
"step": 1285,
|
|
"valid_targets_mean": 2671.2,
|
|
"valid_targets_min": 1169
|
|
},
|
|
{
|
|
"epoch": 2.060702875399361,
|
|
"grad_norm": 0.7284802684370832,
|
|
"learning_rate": 3.558611580318817e-05,
|
|
"loss": 0.3446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32244378328323364,
|
|
"step": 1290,
|
|
"valid_targets_mean": 2492.1,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 2.0686900958466454,
|
|
"grad_norm": 0.7520590076117712,
|
|
"learning_rate": 3.5536064292397334e-05,
|
|
"loss": 0.3441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30256298184394836,
|
|
"step": 1295,
|
|
"valid_targets_mean": 2277.5,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 2.07667731629393,
|
|
"grad_norm": 0.7616872474117237,
|
|
"learning_rate": 3.548576621932766e-05,
|
|
"loss": 0.346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36649543046951294,
|
|
"step": 1300,
|
|
"valid_targets_mean": 2481.4,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 2.084664536741214,
|
|
"grad_norm": 0.7627510536302121,
|
|
"learning_rate": 3.543522238222555e-05,
|
|
"loss": 0.344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33701151609420776,
|
|
"step": 1305,
|
|
"valid_targets_mean": 2518.0,
|
|
"valid_targets_min": 1088
|
|
},
|
|
{
|
|
"epoch": 2.0926517571884986,
|
|
"grad_norm": 0.9887540107162078,
|
|
"learning_rate": 3.538443358323777e-05,
|
|
"loss": 0.3571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40574467182159424,
|
|
"step": 1310,
|
|
"valid_targets_mean": 2081.9,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 2.1006389776357826,
|
|
"grad_norm": 0.6575132072803426,
|
|
"learning_rate": 3.5333400628398696e-05,
|
|
"loss": 0.3521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39077243208885193,
|
|
"step": 1315,
|
|
"valid_targets_mean": 3480.4,
|
|
"valid_targets_min": 676
|
|
},
|
|
{
|
|
"epoch": 2.108626198083067,
|
|
"grad_norm": 0.7825042864924548,
|
|
"learning_rate": 3.5282124327617556e-05,
|
|
"loss": 0.3628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3085414171218872,
|
|
"step": 1320,
|
|
"valid_targets_mean": 2038.1,
|
|
"valid_targets_min": 819
|
|
},
|
|
{
|
|
"epoch": 2.1166134185303513,
|
|
"grad_norm": 0.7318663985322585,
|
|
"learning_rate": 3.5230605494665535e-05,
|
|
"loss": 0.3704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3642362952232361,
|
|
"step": 1325,
|
|
"valid_targets_mean": 2941.8,
|
|
"valid_targets_min": 929
|
|
},
|
|
{
|
|
"epoch": 2.1246006389776357,
|
|
"grad_norm": 0.7697002652291693,
|
|
"learning_rate": 3.517884494716289e-05,
|
|
"loss": 0.3647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3499295115470886,
|
|
"step": 1330,
|
|
"valid_targets_mean": 2524.7,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 2.13258785942492,
|
|
"grad_norm": 0.6973975248306689,
|
|
"learning_rate": 3.512684350656596e-05,
|
|
"loss": 0.3372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35685187578201294,
|
|
"step": 1335,
|
|
"valid_targets_mean": 2624.7,
|
|
"valid_targets_min": 1010
|
|
},
|
|
{
|
|
"epoch": 2.1405750798722045,
|
|
"grad_norm": 0.7245226638759887,
|
|
"learning_rate": 3.507460199815414e-05,
|
|
"loss": 0.3445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3740293085575104,
|
|
"step": 1340,
|
|
"valid_targets_mean": 2858.9,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 2.148562300319489,
|
|
"grad_norm": 0.698266309547678,
|
|
"learning_rate": 3.5022121251016786e-05,
|
|
"loss": 0.3613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3305152356624603,
|
|
"step": 1345,
|
|
"valid_targets_mean": 2800.9,
|
|
"valid_targets_min": 937
|
|
},
|
|
{
|
|
"epoch": 2.1565495207667733,
|
|
"grad_norm": 0.7478336258225923,
|
|
"learning_rate": 3.4969402098040015e-05,
|
|
"loss": 0.3422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32141047716140747,
|
|
"step": 1350,
|
|
"valid_targets_mean": 2520.7,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 2.1645367412140577,
|
|
"grad_norm": 0.7157503099270842,
|
|
"learning_rate": 3.491644537589356e-05,
|
|
"loss": 0.3354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32546648383140564,
|
|
"step": 1355,
|
|
"valid_targets_mean": 2741.9,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 2.1725239616613417,
|
|
"grad_norm": 0.8209179966473588,
|
|
"learning_rate": 3.486325192501743e-05,
|
|
"loss": 0.3494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4074542820453644,
|
|
"step": 1360,
|
|
"valid_targets_mean": 2469.2,
|
|
"valid_targets_min": 931
|
|
},
|
|
{
|
|
"epoch": 2.180511182108626,
|
|
"grad_norm": 0.6781946810239498,
|
|
"learning_rate": 3.480982258960859e-05,
|
|
"loss": 0.3434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34149593114852905,
|
|
"step": 1365,
|
|
"valid_targets_mean": 3062.8,
|
|
"valid_targets_min": 1074
|
|
},
|
|
{
|
|
"epoch": 2.1884984025559104,
|
|
"grad_norm": 0.7737454073045287,
|
|
"learning_rate": 3.47561582176076e-05,
|
|
"loss": 0.3713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34286773204803467,
|
|
"step": 1370,
|
|
"valid_targets_mean": 2286.4,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 2.196485623003195,
|
|
"grad_norm": 0.7866502855483422,
|
|
"learning_rate": 3.470225966068507e-05,
|
|
"loss": 0.3458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3597346544265747,
|
|
"step": 1375,
|
|
"valid_targets_mean": 2557.3,
|
|
"valid_targets_min": 912
|
|
},
|
|
{
|
|
"epoch": 2.2044728434504792,
|
|
"grad_norm": 1.0471756768425142,
|
|
"learning_rate": 3.464812777422826e-05,
|
|
"loss": 0.3452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3923761546611786,
|
|
"step": 1380,
|
|
"valid_targets_mean": 2281.4,
|
|
"valid_targets_min": 988
|
|
},
|
|
{
|
|
"epoch": 2.2124600638977636,
|
|
"grad_norm": 0.8758230392451634,
|
|
"learning_rate": 3.459376341732741e-05,
|
|
"loss": 0.3729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.370530366897583,
|
|
"step": 1385,
|
|
"valid_targets_mean": 2239.5,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 2.220447284345048,
|
|
"grad_norm": 0.9412015925670302,
|
|
"learning_rate": 3.453916745276213e-05,
|
|
"loss": 0.3441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37811851501464844,
|
|
"step": 1390,
|
|
"valid_targets_mean": 2260.0,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 2.2284345047923324,
|
|
"grad_norm": 0.6831046766602709,
|
|
"learning_rate": 3.4484340746987745e-05,
|
|
"loss": 0.3705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33743810653686523,
|
|
"step": 1395,
|
|
"valid_targets_mean": 2979.9,
|
|
"valid_targets_min": 1090
|
|
},
|
|
{
|
|
"epoch": 2.236421725239617,
|
|
"grad_norm": 0.6596913601762512,
|
|
"learning_rate": 3.4429284170121494e-05,
|
|
"loss": 0.3499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.340465784072876,
|
|
"step": 1400,
|
|
"valid_targets_mean": 3560.1,
|
|
"valid_targets_min": 817
|
|
},
|
|
{
|
|
"epoch": 2.244408945686901,
|
|
"grad_norm": 0.7252387909203071,
|
|
"learning_rate": 3.4373998595928764e-05,
|
|
"loss": 0.3528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3821272850036621,
|
|
"step": 1405,
|
|
"valid_targets_mean": 2846.5,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 2.252396166134185,
|
|
"grad_norm": 0.7478281092526754,
|
|
"learning_rate": 3.431848490180919e-05,
|
|
"loss": 0.3654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34503355622291565,
|
|
"step": 1410,
|
|
"valid_targets_mean": 2307.9,
|
|
"valid_targets_min": 926
|
|
},
|
|
{
|
|
"epoch": 2.2603833865814695,
|
|
"grad_norm": 0.8461466941581424,
|
|
"learning_rate": 3.4262743968782736e-05,
|
|
"loss": 0.3826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35951662063598633,
|
|
"step": 1415,
|
|
"valid_targets_mean": 2063.3,
|
|
"valid_targets_min": 1046
|
|
},
|
|
{
|
|
"epoch": 2.268370607028754,
|
|
"grad_norm": 0.842822605116236,
|
|
"learning_rate": 3.4206776681475737e-05,
|
|
"loss": 0.3627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3602384328842163,
|
|
"step": 1420,
|
|
"valid_targets_mean": 2027.8,
|
|
"valid_targets_min": 1193
|
|
},
|
|
{
|
|
"epoch": 2.2763578274760383,
|
|
"grad_norm": 0.8400571884512494,
|
|
"learning_rate": 3.415058392810682e-05,
|
|
"loss": 0.3749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36056169867515564,
|
|
"step": 1425,
|
|
"valid_targets_mean": 1960.8,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 2.2843450479233227,
|
|
"grad_norm": 0.783753661621574,
|
|
"learning_rate": 3.409416660047286e-05,
|
|
"loss": 0.3604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37486812472343445,
|
|
"step": 1430,
|
|
"valid_targets_mean": 2434.1,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 2.292332268370607,
|
|
"grad_norm": 0.6156566050197566,
|
|
"learning_rate": 3.403752559393477e-05,
|
|
"loss": 0.3648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.394123911857605,
|
|
"step": 1435,
|
|
"valid_targets_mean": 4097.2,
|
|
"valid_targets_min": 1021
|
|
},
|
|
{
|
|
"epoch": 2.3003194888178915,
|
|
"grad_norm": 0.7913135712948658,
|
|
"learning_rate": 3.398066180740332e-05,
|
|
"loss": 0.38,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38276737928390503,
|
|
"step": 1440,
|
|
"valid_targets_mean": 2424.2,
|
|
"valid_targets_min": 361
|
|
},
|
|
{
|
|
"epoch": 2.308306709265176,
|
|
"grad_norm": 0.7017819000412645,
|
|
"learning_rate": 3.39235761433249e-05,
|
|
"loss": 0.3706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3491944968700409,
|
|
"step": 1445,
|
|
"valid_targets_mean": 2928.1,
|
|
"valid_targets_min": 828
|
|
},
|
|
{
|
|
"epoch": 2.31629392971246,
|
|
"grad_norm": 0.8297073210644338,
|
|
"learning_rate": 3.386626950766713e-05,
|
|
"loss": 0.3875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37846285104751587,
|
|
"step": 1450,
|
|
"valid_targets_mean": 2144.4,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 2.3242811501597442,
|
|
"grad_norm": 0.7536706867066268,
|
|
"learning_rate": 3.380874280990454e-05,
|
|
"loss": 0.3379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3536767363548279,
|
|
"step": 1455,
|
|
"valid_targets_mean": 2526.7,
|
|
"valid_targets_min": 834
|
|
},
|
|
{
|
|
"epoch": 2.3322683706070286,
|
|
"grad_norm": 0.6960950612739384,
|
|
"learning_rate": 3.37509969630041e-05,
|
|
"loss": 0.3679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3577936887741089,
|
|
"step": 1460,
|
|
"valid_targets_mean": 3126.8,
|
|
"valid_targets_min": 1201
|
|
},
|
|
{
|
|
"epoch": 2.340255591054313,
|
|
"grad_norm": 0.6570756373329435,
|
|
"learning_rate": 3.369303288341078e-05,
|
|
"loss": 0.3467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31299734115600586,
|
|
"step": 1465,
|
|
"valid_targets_mean": 2738.1,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 2.3482428115015974,
|
|
"grad_norm": 0.9813920348291255,
|
|
"learning_rate": 3.363485149103292e-05,
|
|
"loss": 0.3757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4447973966598511,
|
|
"step": 1470,
|
|
"valid_targets_mean": 1766.2,
|
|
"valid_targets_min": 1040
|
|
},
|
|
{
|
|
"epoch": 2.356230031948882,
|
|
"grad_norm": 0.7851030915889392,
|
|
"learning_rate": 3.357645370922772e-05,
|
|
"loss": 0.3545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3544445037841797,
|
|
"step": 1475,
|
|
"valid_targets_mean": 2311.8,
|
|
"valid_targets_min": 885
|
|
},
|
|
{
|
|
"epoch": 2.364217252396166,
|
|
"grad_norm": 0.7176714591732453,
|
|
"learning_rate": 3.351784046478655e-05,
|
|
"loss": 0.3621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3755318820476532,
|
|
"step": 1480,
|
|
"valid_targets_mean": 2652.3,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 2.3722044728434506,
|
|
"grad_norm": 0.9094619028118416,
|
|
"learning_rate": 3.345901268792022e-05,
|
|
"loss": 0.3521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3461253046989441,
|
|
"step": 1485,
|
|
"valid_targets_mean": 2354.3,
|
|
"valid_targets_min": 1270
|
|
},
|
|
{
|
|
"epoch": 2.380191693290735,
|
|
"grad_norm": 0.6931482367761702,
|
|
"learning_rate": 3.3399971312244255e-05,
|
|
"loss": 0.3619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3999279737472534,
|
|
"step": 1490,
|
|
"valid_targets_mean": 3045.5,
|
|
"valid_targets_min": 894
|
|
},
|
|
{
|
|
"epoch": 2.3881789137380194,
|
|
"grad_norm": 0.7935334508118652,
|
|
"learning_rate": 3.334071727476406e-05,
|
|
"loss": 0.3568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3708428740501404,
|
|
"step": 1495,
|
|
"valid_targets_mean": 2271.7,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 2.3961661341853033,
|
|
"grad_norm": 0.805059914134251,
|
|
"learning_rate": 3.3281251515860035e-05,
|
|
"loss": 0.345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3594484031200409,
|
|
"step": 1500,
|
|
"valid_targets_mean": 2383.9,
|
|
"valid_targets_min": 1008
|
|
},
|
|
{
|
|
"epoch": 2.4041533546325877,
|
|
"grad_norm": 0.6828354514311282,
|
|
"learning_rate": 3.3221574979272676e-05,
|
|
"loss": 0.367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4220502972602844,
|
|
"step": 1505,
|
|
"valid_targets_mean": 3804.4,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 2.412140575079872,
|
|
"grad_norm": 0.8611916600458017,
|
|
"learning_rate": 3.316168861208759e-05,
|
|
"loss": 0.3588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39711904525756836,
|
|
"step": 1510,
|
|
"valid_targets_mean": 2143.0,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 2.4201277955271565,
|
|
"grad_norm": 0.8536648766858653,
|
|
"learning_rate": 3.310159336472047e-05,
|
|
"loss": 0.3478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4029867947101593,
|
|
"step": 1515,
|
|
"valid_targets_mean": 2132.2,
|
|
"valid_targets_min": 890
|
|
},
|
|
{
|
|
"epoch": 2.428115015974441,
|
|
"grad_norm": 0.7240127616079934,
|
|
"learning_rate": 3.304129019090198e-05,
|
|
"loss": 0.3549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.323263943195343,
|
|
"step": 1520,
|
|
"valid_targets_mean": 2592.6,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 2.4361022364217253,
|
|
"grad_norm": 0.8228567920219118,
|
|
"learning_rate": 3.298078004766267e-05,
|
|
"loss": 0.3624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3735974133014679,
|
|
"step": 1525,
|
|
"valid_targets_mean": 2103.5,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 2.4440894568690097,
|
|
"grad_norm": 0.7204387039018046,
|
|
"learning_rate": 3.292006389531774e-05,
|
|
"loss": 0.3596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3080117702484131,
|
|
"step": 1530,
|
|
"valid_targets_mean": 2572.3,
|
|
"valid_targets_min": 831
|
|
},
|
|
{
|
|
"epoch": 2.452076677316294,
|
|
"grad_norm": 0.8312146457673889,
|
|
"learning_rate": 3.2859142697451835e-05,
|
|
"loss": 0.3553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3744718134403229,
|
|
"step": 1535,
|
|
"valid_targets_mean": 2633.6,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 2.460063897763578,
|
|
"grad_norm": 0.8428542224908769,
|
|
"learning_rate": 3.279801742090372e-05,
|
|
"loss": 0.3471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34937626123428345,
|
|
"step": 1540,
|
|
"valid_targets_mean": 1984.5,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 2.4680511182108624,
|
|
"grad_norm": 0.8078159061834784,
|
|
"learning_rate": 3.2736689035750975e-05,
|
|
"loss": 0.3597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34731441736221313,
|
|
"step": 1545,
|
|
"valid_targets_mean": 2023.4,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 2.476038338658147,
|
|
"grad_norm": 0.7586310114524072,
|
|
"learning_rate": 3.2675158515294554e-05,
|
|
"loss": 0.3521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3595733046531677,
|
|
"step": 1550,
|
|
"valid_targets_mean": 2394.4,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 2.484025559105431,
|
|
"grad_norm": 0.8782118478650375,
|
|
"learning_rate": 3.2613426836043386e-05,
|
|
"loss": 0.3503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3924805521965027,
|
|
"step": 1555,
|
|
"valid_targets_mean": 2016.5,
|
|
"valid_targets_min": 1034
|
|
},
|
|
{
|
|
"epoch": 2.4920127795527156,
|
|
"grad_norm": 1.0201176108922991,
|
|
"learning_rate": 3.2551494977698844e-05,
|
|
"loss": 0.3803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4451709985733032,
|
|
"step": 1560,
|
|
"valid_targets_mean": 2443.2,
|
|
"valid_targets_min": 1005
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"grad_norm": 0.7402484790586774,
|
|
"learning_rate": 3.248936392313921e-05,
|
|
"loss": 0.3549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37195518612861633,
|
|
"step": 1565,
|
|
"valid_targets_mean": 2549.1,
|
|
"valid_targets_min": 1138
|
|
},
|
|
{
|
|
"epoch": 2.5079872204472844,
|
|
"grad_norm": 0.7047917669552944,
|
|
"learning_rate": 3.2427034658404056e-05,
|
|
"loss": 0.3652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3795313239097595,
|
|
"step": 1570,
|
|
"valid_targets_mean": 2770.7,
|
|
"valid_targets_min": 1081
|
|
},
|
|
{
|
|
"epoch": 2.515974440894569,
|
|
"grad_norm": 0.788488696178411,
|
|
"learning_rate": 3.236450817267863e-05,
|
|
"loss": 0.3651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3698144853115082,
|
|
"step": 1575,
|
|
"valid_targets_mean": 2222.3,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 2.523961661341853,
|
|
"grad_norm": 0.7668364391080691,
|
|
"learning_rate": 3.230178545827814e-05,
|
|
"loss": 0.3636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3587292432785034,
|
|
"step": 1580,
|
|
"valid_targets_mean": 2149.9,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 2.5319488817891376,
|
|
"grad_norm": 0.733851995198835,
|
|
"learning_rate": 3.223886751063201e-05,
|
|
"loss": 0.3323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34284400939941406,
|
|
"step": 1585,
|
|
"valid_targets_mean": 2885.8,
|
|
"valid_targets_min": 1033
|
|
},
|
|
{
|
|
"epoch": 2.539936102236422,
|
|
"grad_norm": 0.8960090934073129,
|
|
"learning_rate": 3.2175755328268054e-05,
|
|
"loss": 0.3647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40211963653564453,
|
|
"step": 1590,
|
|
"valid_targets_mean": 1824.8,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 2.547923322683706,
|
|
"grad_norm": 0.7621489034866239,
|
|
"learning_rate": 3.2112449912796675e-05,
|
|
"loss": 0.3428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3575727641582489,
|
|
"step": 1595,
|
|
"valid_targets_mean": 2271.0,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 2.5559105431309903,
|
|
"grad_norm": 0.791584844700413,
|
|
"learning_rate": 3.204895226889494e-05,
|
|
"loss": 0.3513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3226831555366516,
|
|
"step": 1600,
|
|
"valid_targets_mean": 2098.2,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 2.5638977635782747,
|
|
"grad_norm": 0.7369629528977614,
|
|
"learning_rate": 3.1985263404290634e-05,
|
|
"loss": 0.3453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3434581756591797,
|
|
"step": 1605,
|
|
"valid_targets_mean": 2384.9,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 2.571884984025559,
|
|
"grad_norm": 0.6074058272247946,
|
|
"learning_rate": 3.19213843297463e-05,
|
|
"loss": 0.3571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31266868114471436,
|
|
"step": 1610,
|
|
"valid_targets_mean": 3440.7,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 2.5798722044728435,
|
|
"grad_norm": 0.8252170582340995,
|
|
"learning_rate": 3.1857316059043144e-05,
|
|
"loss": 0.3437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3724806010723114,
|
|
"step": 1615,
|
|
"valid_targets_mean": 2174.2,
|
|
"valid_targets_min": 1087
|
|
},
|
|
{
|
|
"epoch": 2.587859424920128,
|
|
"grad_norm": 0.9370489169826299,
|
|
"learning_rate": 3.1793059608964986e-05,
|
|
"loss": 0.3614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39750707149505615,
|
|
"step": 1620,
|
|
"valid_targets_mean": 1770.8,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 2.5958466453674123,
|
|
"grad_norm": 0.6974214335694603,
|
|
"learning_rate": 3.172861599928212e-05,
|
|
"loss": 0.3629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33244627714157104,
|
|
"step": 1625,
|
|
"valid_targets_mean": 2832.3,
|
|
"valid_targets_min": 931
|
|
},
|
|
{
|
|
"epoch": 2.6038338658146962,
|
|
"grad_norm": 0.8568711746903033,
|
|
"learning_rate": 3.166398625273512e-05,
|
|
"loss": 0.4071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3959014117717743,
|
|
"step": 1630,
|
|
"valid_targets_mean": 2114.1,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 2.6118210862619806,
|
|
"grad_norm": 0.7867057707385022,
|
|
"learning_rate": 3.15991713950186e-05,
|
|
"loss": 0.3497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4303067624568939,
|
|
"step": 1635,
|
|
"valid_targets_mean": 2446.9,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 2.619808306709265,
|
|
"grad_norm": 0.7173857160529505,
|
|
"learning_rate": 3.153417245476495e-05,
|
|
"loss": 0.3546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39158427715301514,
|
|
"step": 1640,
|
|
"valid_targets_mean": 2765.5,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 2.6277955271565494,
|
|
"grad_norm": 0.7985801175922084,
|
|
"learning_rate": 3.146899046352804e-05,
|
|
"loss": 0.355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38287901878356934,
|
|
"step": 1645,
|
|
"valid_targets_mean": 2288.4,
|
|
"valid_targets_min": 1096
|
|
},
|
|
{
|
|
"epoch": 2.635782747603834,
|
|
"grad_norm": 0.6866908484179204,
|
|
"learning_rate": 3.140362645576676e-05,
|
|
"loss": 0.3666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36237818002700806,
|
|
"step": 1650,
|
|
"valid_targets_mean": 2950.5,
|
|
"valid_targets_min": 1140
|
|
},
|
|
{
|
|
"epoch": 2.643769968051118,
|
|
"grad_norm": 0.8487513639736733,
|
|
"learning_rate": 3.133808146882871e-05,
|
|
"loss": 0.3555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35930490493774414,
|
|
"step": 1655,
|
|
"valid_targets_mean": 1923.4,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 2.6517571884984026,
|
|
"grad_norm": 0.6730915932880323,
|
|
"learning_rate": 3.1272356542933654e-05,
|
|
"loss": 0.3572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32872721552848816,
|
|
"step": 1660,
|
|
"valid_targets_mean": 3033.0,
|
|
"valid_targets_min": 1065
|
|
},
|
|
{
|
|
"epoch": 2.659744408945687,
|
|
"grad_norm": 0.7722547670491977,
|
|
"learning_rate": 3.120645272115707e-05,
|
|
"loss": 0.3813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36604711413383484,
|
|
"step": 1665,
|
|
"valid_targets_mean": 2520.8,
|
|
"valid_targets_min": 823
|
|
},
|
|
{
|
|
"epoch": 2.6677316293929714,
|
|
"grad_norm": 6.972277213269221,
|
|
"learning_rate": 3.114037104941355e-05,
|
|
"loss": 0.3678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3724774122238159,
|
|
"step": 1670,
|
|
"valid_targets_mean": 2323.9,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 2.6757188498402558,
|
|
"grad_norm": 0.7161640816767955,
|
|
"learning_rate": 3.1074112576440236e-05,
|
|
"loss": 0.3486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3394412398338318,
|
|
"step": 1675,
|
|
"valid_targets_mean": 2666.2,
|
|
"valid_targets_min": 931
|
|
},
|
|
{
|
|
"epoch": 2.68370607028754,
|
|
"grad_norm": 0.8110258702792548,
|
|
"learning_rate": 3.1007678353780154e-05,
|
|
"loss": 0.3478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3701305389404297,
|
|
"step": 1680,
|
|
"valid_targets_mean": 2010.2,
|
|
"valid_targets_min": 934
|
|
},
|
|
{
|
|
"epoch": 2.6916932907348246,
|
|
"grad_norm": 0.7950751695584922,
|
|
"learning_rate": 3.094106943576553e-05,
|
|
"loss": 0.3596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3550404906272888,
|
|
"step": 1685,
|
|
"valid_targets_mean": 2246.4,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 2.6996805111821085,
|
|
"grad_norm": 0.6789871175138987,
|
|
"learning_rate": 3.087428687950108e-05,
|
|
"loss": 0.3465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3317544758319855,
|
|
"step": 1690,
|
|
"valid_targets_mean": 2831.9,
|
|
"valid_targets_min": 1034
|
|
},
|
|
{
|
|
"epoch": 2.707667731629393,
|
|
"grad_norm": 0.6973333916768045,
|
|
"learning_rate": 3.080733174484719e-05,
|
|
"loss": 0.3263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31072407960891724,
|
|
"step": 1695,
|
|
"valid_targets_mean": 2434.2,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 2.7156549520766773,
|
|
"grad_norm": 0.9039087738454327,
|
|
"learning_rate": 3.074020509440313e-05,
|
|
"loss": 0.352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39978793263435364,
|
|
"step": 1700,
|
|
"valid_targets_mean": 1774.1,
|
|
"valid_targets_min": 848
|
|
},
|
|
{
|
|
"epoch": 2.7236421725239617,
|
|
"grad_norm": 0.79100268708857,
|
|
"learning_rate": 3.0672907993490183e-05,
|
|
"loss": 0.3657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36202025413513184,
|
|
"step": 1705,
|
|
"valid_targets_mean": 2609.2,
|
|
"valid_targets_min": 1041
|
|
},
|
|
{
|
|
"epoch": 2.731629392971246,
|
|
"grad_norm": 0.6593350684690652,
|
|
"learning_rate": 3.060544151013473e-05,
|
|
"loss": 0.3407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2972046434879303,
|
|
"step": 1710,
|
|
"valid_targets_mean": 2633.6,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 2.7396166134185305,
|
|
"grad_norm": 0.8330266400517762,
|
|
"learning_rate": 3.053780671505132e-05,
|
|
"loss": 0.3699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40603917837142944,
|
|
"step": 1715,
|
|
"valid_targets_mean": 2819.8,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 2.747603833865815,
|
|
"grad_norm": 0.8057606977626762,
|
|
"learning_rate": 3.047000468162563e-05,
|
|
"loss": 0.3773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36105820536613464,
|
|
"step": 1720,
|
|
"valid_targets_mean": 2031.9,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 2.755591054313099,
|
|
"grad_norm": 0.7465355153765157,
|
|
"learning_rate": 3.0402036485897496e-05,
|
|
"loss": 0.3458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3388982117176056,
|
|
"step": 1725,
|
|
"valid_targets_mean": 2370.1,
|
|
"valid_targets_min": 953
|
|
},
|
|
{
|
|
"epoch": 2.763578274760383,
|
|
"grad_norm": 0.9240965796128426,
|
|
"learning_rate": 3.0333903206543786e-05,
|
|
"loss": 0.354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40329504013061523,
|
|
"step": 1730,
|
|
"valid_targets_mean": 1809.6,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 2.7715654952076676,
|
|
"grad_norm": 0.7528739266844046,
|
|
"learning_rate": 3.02656059248613e-05,
|
|
"loss": 0.3512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3237878978252411,
|
|
"step": 1735,
|
|
"valid_targets_mean": 2512.1,
|
|
"valid_targets_min": 1022
|
|
},
|
|
{
|
|
"epoch": 2.779552715654952,
|
|
"grad_norm": 0.6955579094727651,
|
|
"learning_rate": 3.0197145724749615e-05,
|
|
"loss": 0.352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3791919946670532,
|
|
"step": 1740,
|
|
"valid_targets_mean": 2783.9,
|
|
"valid_targets_min": 1034
|
|
},
|
|
{
|
|
"epoch": 2.7875399361022364,
|
|
"grad_norm": 0.8727262143758859,
|
|
"learning_rate": 3.0128523692693865e-05,
|
|
"loss": 0.3553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3822493553161621,
|
|
"step": 1745,
|
|
"valid_targets_mean": 1752.6,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 2.7955271565495208,
|
|
"grad_norm": 0.8172199316410461,
|
|
"learning_rate": 3.005974091774751e-05,
|
|
"loss": 0.3826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36902374029159546,
|
|
"step": 1750,
|
|
"valid_targets_mean": 2639.9,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 2.803514376996805,
|
|
"grad_norm": 0.7075124501614497,
|
|
"learning_rate": 2.999079849151505e-05,
|
|
"loss": 0.3798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34357282519340515,
|
|
"step": 1755,
|
|
"valid_targets_mean": 2475.2,
|
|
"valid_targets_min": 1127
|
|
},
|
|
{
|
|
"epoch": 2.8115015974440896,
|
|
"grad_norm": 0.6885498377319528,
|
|
"learning_rate": 2.992169750813471e-05,
|
|
"loss": 0.3404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35140857100486755,
|
|
"step": 1760,
|
|
"valid_targets_mean": 2581.6,
|
|
"valid_targets_min": 968
|
|
},
|
|
{
|
|
"epoch": 2.819488817891374,
|
|
"grad_norm": 0.851603812381958,
|
|
"learning_rate": 2.985243906426106e-05,
|
|
"loss": 0.3514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3701210618019104,
|
|
"step": 1765,
|
|
"valid_targets_mean": 2282.6,
|
|
"valid_targets_min": 763
|
|
},
|
|
{
|
|
"epoch": 2.8274760383386583,
|
|
"grad_norm": 0.8060460289072541,
|
|
"learning_rate": 2.9783024259047617e-05,
|
|
"loss": 0.3687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35286760330200195,
|
|
"step": 1770,
|
|
"valid_targets_mean": 2315.8,
|
|
"valid_targets_min": 1061
|
|
},
|
|
{
|
|
"epoch": 2.8354632587859427,
|
|
"grad_norm": 0.7270888579129352,
|
|
"learning_rate": 2.971345419412941e-05,
|
|
"loss": 0.3479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3470805585384369,
|
|
"step": 1775,
|
|
"valid_targets_mean": 2484.6,
|
|
"valid_targets_min": 856
|
|
},
|
|
{
|
|
"epoch": 2.8434504792332267,
|
|
"grad_norm": 0.7690173368238079,
|
|
"learning_rate": 2.964372997360548e-05,
|
|
"loss": 0.3603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3708645701408386,
|
|
"step": 1780,
|
|
"valid_targets_mean": 2337.9,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 2.851437699680511,
|
|
"grad_norm": 0.8194727665697202,
|
|
"learning_rate": 2.957385270402137e-05,
|
|
"loss": 0.3708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3650719225406647,
|
|
"step": 1785,
|
|
"valid_targets_mean": 2043.7,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 2.8594249201277955,
|
|
"grad_norm": 0.7837982950322574,
|
|
"learning_rate": 2.9503823494351565e-05,
|
|
"loss": 0.3406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3433952033519745,
|
|
"step": 1790,
|
|
"valid_targets_mean": 2254.6,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 2.86741214057508,
|
|
"grad_norm": 0.7748612695904107,
|
|
"learning_rate": 2.9433643455981874e-05,
|
|
"loss": 0.3517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34527644515037537,
|
|
"step": 1795,
|
|
"valid_targets_mean": 2216.2,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 2.8753993610223643,
|
|
"grad_norm": 0.7723615666663751,
|
|
"learning_rate": 2.9363313702691827e-05,
|
|
"loss": 0.3472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38402146100997925,
|
|
"step": 1800,
|
|
"valid_targets_mean": 2478.6,
|
|
"valid_targets_min": 963
|
|
},
|
|
{
|
|
"epoch": 2.8833865814696487,
|
|
"grad_norm": 0.8129665345322364,
|
|
"learning_rate": 2.9292835350636957e-05,
|
|
"loss": 0.3467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35540565848350525,
|
|
"step": 1805,
|
|
"valid_targets_mean": 2135.9,
|
|
"valid_targets_min": 904
|
|
},
|
|
{
|
|
"epoch": 2.891373801916933,
|
|
"grad_norm": 0.7913512326898768,
|
|
"learning_rate": 2.922220951833111e-05,
|
|
"loss": 0.3474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39493462443351746,
|
|
"step": 1810,
|
|
"valid_targets_mean": 2305.2,
|
|
"valid_targets_min": 1055
|
|
},
|
|
{
|
|
"epoch": 2.899361022364217,
|
|
"grad_norm": 0.7355407890123292,
|
|
"learning_rate": 2.9151437326628706e-05,
|
|
"loss": 0.3712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4053201675415039,
|
|
"step": 1815,
|
|
"valid_targets_mean": 2545.2,
|
|
"valid_targets_min": 900
|
|
},
|
|
{
|
|
"epoch": 2.9073482428115014,
|
|
"grad_norm": 0.7206526486089542,
|
|
"learning_rate": 2.908051989870692e-05,
|
|
"loss": 0.3569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39946863055229187,
|
|
"step": 1820,
|
|
"valid_targets_mean": 3111.5,
|
|
"valid_targets_min": 903
|
|
},
|
|
{
|
|
"epoch": 2.915335463258786,
|
|
"grad_norm": 0.736423175106571,
|
|
"learning_rate": 2.9009458360047867e-05,
|
|
"loss": 0.3386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36842191219329834,
|
|
"step": 1825,
|
|
"valid_targets_mean": 2563.7,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 2.92332268370607,
|
|
"grad_norm": 0.7030044667438097,
|
|
"learning_rate": 2.8938253838420762e-05,
|
|
"loss": 0.3466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35021406412124634,
|
|
"step": 1830,
|
|
"valid_targets_mean": 2671.2,
|
|
"valid_targets_min": 1198
|
|
},
|
|
{
|
|
"epoch": 2.9313099041533546,
|
|
"grad_norm": 0.6604087038201576,
|
|
"learning_rate": 2.8866907463864006e-05,
|
|
"loss": 0.3479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3227359652519226,
|
|
"step": 1835,
|
|
"valid_targets_mean": 2729.4,
|
|
"valid_targets_min": 1187
|
|
},
|
|
{
|
|
"epoch": 2.939297124600639,
|
|
"grad_norm": 0.7614793377075194,
|
|
"learning_rate": 2.8795420368667225e-05,
|
|
"loss": 0.3873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36556944251060486,
|
|
"step": 1840,
|
|
"valid_targets_mean": 2534.0,
|
|
"valid_targets_min": 779
|
|
},
|
|
{
|
|
"epoch": 2.9472843450479234,
|
|
"grad_norm": 0.7279078494238577,
|
|
"learning_rate": 2.8723793687353354e-05,
|
|
"loss": 0.3401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3116756081581116,
|
|
"step": 1845,
|
|
"valid_targets_mean": 2259.3,
|
|
"valid_targets_min": 563
|
|
},
|
|
{
|
|
"epoch": 2.9552715654952078,
|
|
"grad_norm": 0.8370886155603513,
|
|
"learning_rate": 2.8652028556660593e-05,
|
|
"loss": 0.3602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38726478815078735,
|
|
"step": 1850,
|
|
"valid_targets_mean": 1996.5,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 2.963258785942492,
|
|
"grad_norm": 0.7140064843378341,
|
|
"learning_rate": 2.8580126115524373e-05,
|
|
"loss": 0.3509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35338228940963745,
|
|
"step": 1855,
|
|
"valid_targets_mean": 2756.1,
|
|
"valid_targets_min": 1026
|
|
},
|
|
{
|
|
"epoch": 2.9712460063897765,
|
|
"grad_norm": 0.6535447779158907,
|
|
"learning_rate": 2.8508087505059302e-05,
|
|
"loss": 0.3567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3366844952106476,
|
|
"step": 1860,
|
|
"valid_targets_mean": 2747.4,
|
|
"valid_targets_min": 956
|
|
},
|
|
{
|
|
"epoch": 2.979233226837061,
|
|
"grad_norm": 0.7200167531433231,
|
|
"learning_rate": 2.843591386854102e-05,
|
|
"loss": 0.3411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3818932771682739,
|
|
"step": 1865,
|
|
"valid_targets_mean": 3221.3,
|
|
"valid_targets_min": 1434
|
|
},
|
|
{
|
|
"epoch": 2.987220447284345,
|
|
"grad_norm": 0.8292981852642277,
|
|
"learning_rate": 2.8363606351388068e-05,
|
|
"loss": 0.347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38863205909729004,
|
|
"step": 1870,
|
|
"valid_targets_mean": 2119.6,
|
|
"valid_targets_min": 1175
|
|
},
|
|
{
|
|
"epoch": 2.9952076677316293,
|
|
"grad_norm": 0.5630719561731078,
|
|
"learning_rate": 2.829116610114375e-05,
|
|
"loss": 0.3459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3259061574935913,
|
|
"step": 1875,
|
|
"valid_targets_mean": 3664.6,
|
|
"valid_targets_min": 573
|
|
},
|
|
{
|
|
"epoch": 3.0031948881789137,
|
|
"grad_norm": 0.6736423709172283,
|
|
"learning_rate": 2.821859426745786e-05,
|
|
"loss": 0.3489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2941809296607971,
|
|
"step": 1880,
|
|
"valid_targets_mean": 2834.2,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 3.011182108626198,
|
|
"grad_norm": 0.8160292872265067,
|
|
"learning_rate": 2.8145892002068454e-05,
|
|
"loss": 0.3272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3640558123588562,
|
|
"step": 1885,
|
|
"valid_targets_mean": 2331.7,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 3.0191693290734825,
|
|
"grad_norm": 0.8569890256828284,
|
|
"learning_rate": 2.8073060458783606e-05,
|
|
"loss": 0.3114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2934345304965973,
|
|
"step": 1890,
|
|
"valid_targets_mean": 2713.8,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 3.027156549520767,
|
|
"grad_norm": 0.915745593125638,
|
|
"learning_rate": 2.8000100793463056e-05,
|
|
"loss": 0.3333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31757089495658875,
|
|
"step": 1895,
|
|
"valid_targets_mean": 1587.3,
|
|
"valid_targets_min": 828
|
|
},
|
|
{
|
|
"epoch": 3.0351437699680512,
|
|
"grad_norm": 0.7737453011692252,
|
|
"learning_rate": 2.792701416399988e-05,
|
|
"loss": 0.3217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3050945997238159,
|
|
"step": 1900,
|
|
"valid_targets_mean": 2497.1,
|
|
"valid_targets_min": 257
|
|
},
|
|
{
|
|
"epoch": 3.0431309904153356,
|
|
"grad_norm": 0.9226319494127296,
|
|
"learning_rate": 2.7853801730302134e-05,
|
|
"loss": 0.3095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34390947222709656,
|
|
"step": 1905,
|
|
"valid_targets_mean": 2636.4,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 3.0511182108626196,
|
|
"grad_norm": 0.8631615969093904,
|
|
"learning_rate": 2.7780464654274396e-05,
|
|
"loss": 0.3224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3235800564289093,
|
|
"step": 1910,
|
|
"valid_targets_mean": 2160.9,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 3.059105431309904,
|
|
"grad_norm": 0.7107875450685099,
|
|
"learning_rate": 2.770700409979938e-05,
|
|
"loss": 0.2932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26461490988731384,
|
|
"step": 1915,
|
|
"valid_targets_mean": 2778.4,
|
|
"valid_targets_min": 819
|
|
},
|
|
{
|
|
"epoch": 3.0670926517571884,
|
|
"grad_norm": 0.6222319695164672,
|
|
"learning_rate": 2.7633421232719442e-05,
|
|
"loss": 0.3076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30275171995162964,
|
|
"step": 1920,
|
|
"valid_targets_mean": 3926.6,
|
|
"valid_targets_min": 848
|
|
},
|
|
{
|
|
"epoch": 3.0750798722044728,
|
|
"grad_norm": 0.8370271054010433,
|
|
"learning_rate": 2.7559717220818067e-05,
|
|
"loss": 0.33,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30962634086608887,
|
|
"step": 1925,
|
|
"valid_targets_mean": 2299.7,
|
|
"valid_targets_min": 1132
|
|
},
|
|
{
|
|
"epoch": 3.083067092651757,
|
|
"grad_norm": 0.7271187865092554,
|
|
"learning_rate": 2.7485893233801366e-05,
|
|
"loss": 0.3342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2768843173980713,
|
|
"step": 1930,
|
|
"valid_targets_mean": 2834.2,
|
|
"valid_targets_min": 1103
|
|
},
|
|
{
|
|
"epoch": 3.0910543130990416,
|
|
"grad_norm": 0.7753493326372273,
|
|
"learning_rate": 2.7411950443279458e-05,
|
|
"loss": 0.3025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.300983726978302,
|
|
"step": 1935,
|
|
"valid_targets_mean": 2373.8,
|
|
"valid_targets_min": 881
|
|
},
|
|
{
|
|
"epoch": 3.099041533546326,
|
|
"grad_norm": 0.7509023210013628,
|
|
"learning_rate": 2.7337890022747945e-05,
|
|
"loss": 0.2929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2594980299472809,
|
|
"step": 1940,
|
|
"valid_targets_mean": 2594.6,
|
|
"valid_targets_min": 1002
|
|
},
|
|
{
|
|
"epoch": 3.1070287539936103,
|
|
"grad_norm": 0.8596435339814381,
|
|
"learning_rate": 2.7263713147569243e-05,
|
|
"loss": 0.3154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32720938324928284,
|
|
"step": 1945,
|
|
"valid_targets_mean": 2411.2,
|
|
"valid_targets_min": 1087
|
|
},
|
|
{
|
|
"epoch": 3.1150159744408947,
|
|
"grad_norm": 0.8234574467573099,
|
|
"learning_rate": 2.7189420994953945e-05,
|
|
"loss": 0.3303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3490561246871948,
|
|
"step": 1950,
|
|
"valid_targets_mean": 2496.1,
|
|
"valid_targets_min": 934
|
|
},
|
|
{
|
|
"epoch": 3.123003194888179,
|
|
"grad_norm": 0.8593021029831257,
|
|
"learning_rate": 2.7115014743942114e-05,
|
|
"loss": 0.3012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30254870653152466,
|
|
"step": 1955,
|
|
"valid_targets_mean": 2322.1,
|
|
"valid_targets_min": 1015
|
|
},
|
|
{
|
|
"epoch": 3.130990415335463,
|
|
"grad_norm": 0.7817996693599417,
|
|
"learning_rate": 2.704049557538461e-05,
|
|
"loss": 0.3265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2931901812553406,
|
|
"step": 1960,
|
|
"valid_targets_mean": 2378.4,
|
|
"valid_targets_min": 990
|
|
},
|
|
{
|
|
"epoch": 3.1389776357827475,
|
|
"grad_norm": 0.8139655820158753,
|
|
"learning_rate": 2.696586467192434e-05,
|
|
"loss": 0.3235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3286445140838623,
|
|
"step": 1965,
|
|
"valid_targets_mean": 2152.5,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 3.146964856230032,
|
|
"grad_norm": 0.6089563103819838,
|
|
"learning_rate": 2.6891123217977448e-05,
|
|
"loss": 0.2976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24358676373958588,
|
|
"step": 1970,
|
|
"valid_targets_mean": 3910.9,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 3.1549520766773163,
|
|
"grad_norm": 0.6640963606357626,
|
|
"learning_rate": 2.681627239971458e-05,
|
|
"loss": 0.2968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24149221181869507,
|
|
"step": 1975,
|
|
"valid_targets_mean": 3012.3,
|
|
"valid_targets_min": 1068
|
|
},
|
|
{
|
|
"epoch": 3.1629392971246006,
|
|
"grad_norm": 0.7985749563606189,
|
|
"learning_rate": 2.6741313405041997e-05,
|
|
"loss": 0.3023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3168647587299347,
|
|
"step": 1980,
|
|
"valid_targets_mean": 2709.4,
|
|
"valid_targets_min": 1309
|
|
},
|
|
{
|
|
"epoch": 3.170926517571885,
|
|
"grad_norm": 0.7339331415134622,
|
|
"learning_rate": 2.666624742358279e-05,
|
|
"loss": 0.2893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31010445952415466,
|
|
"step": 1985,
|
|
"valid_targets_mean": 2837.5,
|
|
"valid_targets_min": 1022
|
|
},
|
|
{
|
|
"epoch": 3.1789137380191694,
|
|
"grad_norm": 0.8273209687930265,
|
|
"learning_rate": 2.659107564665794e-05,
|
|
"loss": 0.3333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3452502489089966,
|
|
"step": 1990,
|
|
"valid_targets_mean": 2361.6,
|
|
"valid_targets_min": 1118
|
|
},
|
|
{
|
|
"epoch": 3.186900958466454,
|
|
"grad_norm": 1.0096648929625331,
|
|
"learning_rate": 2.6515799267267445e-05,
|
|
"loss": 0.3137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33967769145965576,
|
|
"step": 1995,
|
|
"valid_targets_mean": 1962.4,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 3.194888178913738,
|
|
"grad_norm": 0.6872144304041945,
|
|
"learning_rate": 2.6440419480071366e-05,
|
|
"loss": 0.3106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33901578187942505,
|
|
"step": 2000,
|
|
"valid_targets_mean": 3611.7,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 3.202875399361022,
|
|
"grad_norm": 0.7059598362473455,
|
|
"learning_rate": 2.6364937481370887e-05,
|
|
"loss": 0.3042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29871073365211487,
|
|
"step": 2005,
|
|
"valid_targets_mean": 3143.2,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 3.2108626198083066,
|
|
"grad_norm": 0.8308290182280206,
|
|
"learning_rate": 2.628935446908933e-05,
|
|
"loss": 0.3066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3320299983024597,
|
|
"step": 2010,
|
|
"valid_targets_mean": 2219.8,
|
|
"valid_targets_min": 725
|
|
},
|
|
{
|
|
"epoch": 3.218849840255591,
|
|
"grad_norm": 0.6340851777887246,
|
|
"learning_rate": 2.6213671642753124e-05,
|
|
"loss": 0.3136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32461097836494446,
|
|
"step": 2015,
|
|
"valid_targets_mean": 4043.0,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 3.2268370607028753,
|
|
"grad_norm": 0.7913180987537052,
|
|
"learning_rate": 2.6137890203472786e-05,
|
|
"loss": 0.3065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33547550439834595,
|
|
"step": 2020,
|
|
"valid_targets_mean": 2600.8,
|
|
"valid_targets_min": 1183
|
|
},
|
|
{
|
|
"epoch": 3.2348242811501597,
|
|
"grad_norm": 0.7627361359178503,
|
|
"learning_rate": 2.606201135392383e-05,
|
|
"loss": 0.2921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28489091992378235,
|
|
"step": 2025,
|
|
"valid_targets_mean": 2540.9,
|
|
"valid_targets_min": 922
|
|
},
|
|
{
|
|
"epoch": 3.242811501597444,
|
|
"grad_norm": 0.6672086712155495,
|
|
"learning_rate": 2.5986036298327725e-05,
|
|
"loss": 0.3058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2726505398750305,
|
|
"step": 2030,
|
|
"valid_targets_mean": 3275.8,
|
|
"valid_targets_min": 883
|
|
},
|
|
{
|
|
"epoch": 3.2507987220447285,
|
|
"grad_norm": 0.727340416987911,
|
|
"learning_rate": 2.590996624243276e-05,
|
|
"loss": 0.3024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29613572359085083,
|
|
"step": 2035,
|
|
"valid_targets_mean": 2909.1,
|
|
"valid_targets_min": 1055
|
|
},
|
|
{
|
|
"epoch": 3.258785942492013,
|
|
"grad_norm": 0.8920605774260191,
|
|
"learning_rate": 2.5833802393494897e-05,
|
|
"loss": 0.292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32466375827789307,
|
|
"step": 2040,
|
|
"valid_targets_mean": 2008.9,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 3.2667731629392973,
|
|
"grad_norm": 0.7890467657670993,
|
|
"learning_rate": 2.5757545960258638e-05,
|
|
"loss": 0.2972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2885025143623352,
|
|
"step": 2045,
|
|
"valid_targets_mean": 2052.9,
|
|
"valid_targets_min": 938
|
|
},
|
|
{
|
|
"epoch": 3.2747603833865817,
|
|
"grad_norm": 0.7562549113435654,
|
|
"learning_rate": 2.5681198152937804e-05,
|
|
"loss": 0.3116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28339189291000366,
|
|
"step": 2050,
|
|
"valid_targets_mean": 2698.2,
|
|
"valid_targets_min": 1203
|
|
},
|
|
{
|
|
"epoch": 3.2827476038338657,
|
|
"grad_norm": 0.8466561781765465,
|
|
"learning_rate": 2.5604760183196377e-05,
|
|
"loss": 0.3408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3689371347427368,
|
|
"step": 2055,
|
|
"valid_targets_mean": 2465.3,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 3.29073482428115,
|
|
"grad_norm": 0.7708458414565943,
|
|
"learning_rate": 2.5528233264129228e-05,
|
|
"loss": 0.3017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26765453815460205,
|
|
"step": 2060,
|
|
"valid_targets_mean": 2332.1,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 3.2987220447284344,
|
|
"grad_norm": 2.095127882148011,
|
|
"learning_rate": 2.54516186102429e-05,
|
|
"loss": 0.3155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29320526123046875,
|
|
"step": 2065,
|
|
"valid_targets_mean": 2436.4,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 3.306709265175719,
|
|
"grad_norm": 0.8686932944461578,
|
|
"learning_rate": 2.5374917437436294e-05,
|
|
"loss": 0.3238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30253884196281433,
|
|
"step": 2070,
|
|
"valid_targets_mean": 2653.9,
|
|
"valid_targets_min": 852
|
|
},
|
|
{
|
|
"epoch": 3.3146964856230032,
|
|
"grad_norm": 0.9408305643433394,
|
|
"learning_rate": 2.5298130962981403e-05,
|
|
"loss": 0.325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32906097173690796,
|
|
"step": 2075,
|
|
"valid_targets_mean": 1727.1,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 3.3226837060702876,
|
|
"grad_norm": 0.7146355246150461,
|
|
"learning_rate": 2.5221260405503985e-05,
|
|
"loss": 0.3294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.307746559381485,
|
|
"step": 2080,
|
|
"valid_targets_mean": 2656.9,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 3.330670926517572,
|
|
"grad_norm": 0.7824500674348659,
|
|
"learning_rate": 2.5144306984964225e-05,
|
|
"loss": 0.3075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31713026762008667,
|
|
"step": 2085,
|
|
"valid_targets_mean": 2360.4,
|
|
"valid_targets_min": 1229
|
|
},
|
|
{
|
|
"epoch": 3.3386581469648564,
|
|
"grad_norm": 0.7578884488122225,
|
|
"learning_rate": 2.5067271922637377e-05,
|
|
"loss": 0.3155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31683745980262756,
|
|
"step": 2090,
|
|
"valid_targets_mean": 2861.6,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 3.3466453674121404,
|
|
"grad_norm": 0.7178783990968142,
|
|
"learning_rate": 2.499015644109435e-05,
|
|
"loss": 0.3156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29979944229125977,
|
|
"step": 2095,
|
|
"valid_targets_mean": 3085.9,
|
|
"valid_targets_min": 953
|
|
},
|
|
{
|
|
"epoch": 3.3546325878594248,
|
|
"grad_norm": 0.8152975179786228,
|
|
"learning_rate": 2.491296176418236e-05,
|
|
"loss": 0.3117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3138399124145508,
|
|
"step": 2100,
|
|
"valid_targets_mean": 2648.7,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 3.362619808306709,
|
|
"grad_norm": 0.9574589822715499,
|
|
"learning_rate": 2.4835689117005472e-05,
|
|
"loss": 0.3054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33040913939476013,
|
|
"step": 2105,
|
|
"valid_targets_mean": 2062.2,
|
|
"valid_targets_min": 1122
|
|
},
|
|
{
|
|
"epoch": 3.3706070287539935,
|
|
"grad_norm": 0.7237713261992792,
|
|
"learning_rate": 2.4758339725905152e-05,
|
|
"loss": 0.3019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2935367822647095,
|
|
"step": 2110,
|
|
"valid_targets_mean": 2926.3,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 3.378594249201278,
|
|
"grad_norm": 0.7405392024159776,
|
|
"learning_rate": 2.4680914818440823e-05,
|
|
"loss": 0.3479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4340866804122925,
|
|
"step": 2115,
|
|
"valid_targets_mean": 3472.1,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 3.3865814696485623,
|
|
"grad_norm": 0.6989579655406557,
|
|
"learning_rate": 2.4603415623370387e-05,
|
|
"loss": 0.2937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2652323544025421,
|
|
"step": 2120,
|
|
"valid_targets_mean": 2833.5,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 3.3945686900958467,
|
|
"grad_norm": 0.9525216933901265,
|
|
"learning_rate": 2.4525843370630697e-05,
|
|
"loss": 0.3075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3475990891456604,
|
|
"step": 2125,
|
|
"valid_targets_mean": 1967.2,
|
|
"valid_targets_min": 1024
|
|
},
|
|
{
|
|
"epoch": 3.402555910543131,
|
|
"grad_norm": 0.7645121394636909,
|
|
"learning_rate": 2.4448199291318058e-05,
|
|
"loss": 0.3337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.323703408241272,
|
|
"step": 2130,
|
|
"valid_targets_mean": 2634.7,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 3.4105431309904155,
|
|
"grad_norm": 0.8807835750995778,
|
|
"learning_rate": 2.4370484617668707e-05,
|
|
"loss": 0.3149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32309281826019287,
|
|
"step": 2135,
|
|
"valid_targets_mean": 1907.6,
|
|
"valid_targets_min": 955
|
|
},
|
|
{
|
|
"epoch": 3.4185303514377,
|
|
"grad_norm": 0.8575591692395351,
|
|
"learning_rate": 2.4292700583039194e-05,
|
|
"loss": 0.3188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3469425439834595,
|
|
"step": 2140,
|
|
"valid_targets_mean": 2226.9,
|
|
"valid_targets_min": 1176
|
|
},
|
|
{
|
|
"epoch": 3.426517571884984,
|
|
"grad_norm": 0.7825456784981535,
|
|
"learning_rate": 2.4214848421886893e-05,
|
|
"loss": 0.3132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34094712138175964,
|
|
"step": 2145,
|
|
"valid_targets_mean": 2894.2,
|
|
"valid_targets_min": 988
|
|
},
|
|
{
|
|
"epoch": 3.4345047923322682,
|
|
"grad_norm": 0.888429464183779,
|
|
"learning_rate": 2.4136929369750344e-05,
|
|
"loss": 0.3329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3566815257072449,
|
|
"step": 2150,
|
|
"valid_targets_mean": 1997.9,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 3.4424920127795526,
|
|
"grad_norm": 0.8936427922421591,
|
|
"learning_rate": 2.4058944663229672e-05,
|
|
"loss": 0.3182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33036041259765625,
|
|
"step": 2155,
|
|
"valid_targets_mean": 1963.2,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 3.450479233226837,
|
|
"grad_norm": 0.6958318024502833,
|
|
"learning_rate": 2.398089553996696e-05,
|
|
"loss": 0.2899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26821523904800415,
|
|
"step": 2160,
|
|
"valid_targets_mean": 3053.4,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 3.4584664536741214,
|
|
"grad_norm": 0.852622261906709,
|
|
"learning_rate": 2.3902783238626604e-05,
|
|
"loss": 0.3348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.320473849773407,
|
|
"step": 2165,
|
|
"valid_targets_mean": 2080.0,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 3.466453674121406,
|
|
"grad_norm": 0.9068056422869886,
|
|
"learning_rate": 2.3824608998875652e-05,
|
|
"loss": 0.323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33264023065567017,
|
|
"step": 2170,
|
|
"valid_targets_mean": 1985.4,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 3.47444089456869,
|
|
"grad_norm": 0.7843998507810446,
|
|
"learning_rate": 2.3746374061364138e-05,
|
|
"loss": 0.3138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31739723682403564,
|
|
"step": 2175,
|
|
"valid_targets_mean": 2523.8,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 3.4824281150159746,
|
|
"grad_norm": 0.7891444954196775,
|
|
"learning_rate": 2.3668079667705404e-05,
|
|
"loss": 0.305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.322551429271698,
|
|
"step": 2180,
|
|
"valid_targets_mean": 2542.8,
|
|
"valid_targets_min": 1049
|
|
},
|
|
{
|
|
"epoch": 3.4904153354632586,
|
|
"grad_norm": 0.7533305916771706,
|
|
"learning_rate": 2.3589727060456342e-05,
|
|
"loss": 0.3298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.303111732006073,
|
|
"step": 2185,
|
|
"valid_targets_mean": 2601.8,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 3.498402555910543,
|
|
"grad_norm": 0.7790243698098006,
|
|
"learning_rate": 2.3511317483097753e-05,
|
|
"loss": 0.3182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27771705389022827,
|
|
"step": 2190,
|
|
"valid_targets_mean": 2643.5,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 3.5063897763578273,
|
|
"grad_norm": 0.8329983052850798,
|
|
"learning_rate": 2.3432852180014548e-05,
|
|
"loss": 0.3344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3886759281158447,
|
|
"step": 2195,
|
|
"valid_targets_mean": 2541.2,
|
|
"valid_targets_min": 946
|
|
},
|
|
{
|
|
"epoch": 3.5143769968051117,
|
|
"grad_norm": 0.811573143622169,
|
|
"learning_rate": 2.3354332396476026e-05,
|
|
"loss": 0.3097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2914184331893921,
|
|
"step": 2200,
|
|
"valid_targets_mean": 2319.4,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 3.522364217252396,
|
|
"grad_norm": 0.7212080277212944,
|
|
"learning_rate": 2.327575937861612e-05,
|
|
"loss": 0.323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3169957995414734,
|
|
"step": 2205,
|
|
"valid_targets_mean": 3722.6,
|
|
"valid_targets_min": 1249
|
|
},
|
|
{
|
|
"epoch": 3.5303514376996805,
|
|
"grad_norm": 0.8076353106057738,
|
|
"learning_rate": 2.3197134373413595e-05,
|
|
"loss": 0.3151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2976832687854767,
|
|
"step": 2210,
|
|
"valid_targets_mean": 2124.6,
|
|
"valid_targets_min": 969
|
|
},
|
|
{
|
|
"epoch": 3.538338658146965,
|
|
"grad_norm": 0.7322349165024253,
|
|
"learning_rate": 2.3118458628672272e-05,
|
|
"loss": 0.2928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29221951961517334,
|
|
"step": 2215,
|
|
"valid_targets_mean": 2507.4,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 3.5463258785942493,
|
|
"grad_norm": 0.8318643790813283,
|
|
"learning_rate": 2.3039733393001233e-05,
|
|
"loss": 0.3197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3322209119796753,
|
|
"step": 2220,
|
|
"valid_targets_mean": 2105.3,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 3.5543130990415337,
|
|
"grad_norm": 0.7662091444084941,
|
|
"learning_rate": 2.2960959915794998e-05,
|
|
"loss": 0.3305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3198537230491638,
|
|
"step": 2225,
|
|
"valid_targets_mean": 2542.9,
|
|
"valid_targets_min": 1101
|
|
},
|
|
{
|
|
"epoch": 3.562300319488818,
|
|
"grad_norm": 0.8030304959984618,
|
|
"learning_rate": 2.2882139447213673e-05,
|
|
"loss": 0.3209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3031326234340668,
|
|
"step": 2230,
|
|
"valid_targets_mean": 2197.2,
|
|
"valid_targets_min": 763
|
|
},
|
|
{
|
|
"epoch": 3.5702875399361025,
|
|
"grad_norm": 0.8967947029859292,
|
|
"learning_rate": 2.280327323816317e-05,
|
|
"loss": 0.3224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3496049642562866,
|
|
"step": 2235,
|
|
"valid_targets_mean": 2182.3,
|
|
"valid_targets_min": 823
|
|
},
|
|
{
|
|
"epoch": 3.5782747603833864,
|
|
"grad_norm": 0.866809864490965,
|
|
"learning_rate": 2.2724362540275288e-05,
|
|
"loss": 0.3239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30169904232025146,
|
|
"step": 2240,
|
|
"valid_targets_mean": 1966.2,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 3.586261980830671,
|
|
"grad_norm": 0.8305309488347965,
|
|
"learning_rate": 2.2645408605887882e-05,
|
|
"loss": 0.3151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36114218831062317,
|
|
"step": 2245,
|
|
"valid_targets_mean": 2602.7,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 3.594249201277955,
|
|
"grad_norm": 0.7691142256945528,
|
|
"learning_rate": 2.2566412688025003e-05,
|
|
"loss": 0.3018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28841304779052734,
|
|
"step": 2250,
|
|
"valid_targets_mean": 2580.2,
|
|
"valid_targets_min": 1477
|
|
},
|
|
{
|
|
"epoch": 3.6022364217252396,
|
|
"grad_norm": 0.8254980230617277,
|
|
"learning_rate": 2.2487376040376958e-05,
|
|
"loss": 0.3154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33250492811203003,
|
|
"step": 2255,
|
|
"valid_targets_mean": 2349.2,
|
|
"valid_targets_min": 1092
|
|
},
|
|
{
|
|
"epoch": 3.610223642172524,
|
|
"grad_norm": 0.8242187278978753,
|
|
"learning_rate": 2.240829991728048e-05,
|
|
"loss": 0.3079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3365738093852997,
|
|
"step": 2260,
|
|
"valid_targets_mean": 2648.1,
|
|
"valid_targets_min": 1163
|
|
},
|
|
{
|
|
"epoch": 3.6182108626198084,
|
|
"grad_norm": 0.9718132806965885,
|
|
"learning_rate": 2.2329185573698778e-05,
|
|
"loss": 0.3099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3190160095691681,
|
|
"step": 2265,
|
|
"valid_targets_mean": 2424.6,
|
|
"valid_targets_min": 976
|
|
},
|
|
{
|
|
"epoch": 3.626198083067093,
|
|
"grad_norm": 0.9094592499048701,
|
|
"learning_rate": 2.225003426520163e-05,
|
|
"loss": 0.3001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3618037700653076,
|
|
"step": 2270,
|
|
"valid_targets_mean": 2228.1,
|
|
"valid_targets_min": 943
|
|
},
|
|
{
|
|
"epoch": 3.6341853035143767,
|
|
"grad_norm": 0.7750393748251048,
|
|
"learning_rate": 2.2170847247945466e-05,
|
|
"loss": 0.3148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32818543910980225,
|
|
"step": 2275,
|
|
"valid_targets_mean": 2644.2,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 3.642172523961661,
|
|
"grad_norm": 0.7980089961691026,
|
|
"learning_rate": 2.209162577865342e-05,
|
|
"loss": 0.2917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2987974286079407,
|
|
"step": 2280,
|
|
"valid_targets_mean": 2434.8,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 3.6501597444089455,
|
|
"grad_norm": 0.8306744555409238,
|
|
"learning_rate": 2.201237111459539e-05,
|
|
"loss": 0.3344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3245820105075836,
|
|
"step": 2285,
|
|
"valid_targets_mean": 2327.9,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 3.65814696485623,
|
|
"grad_norm": 0.8127007139526443,
|
|
"learning_rate": 2.193308451356809e-05,
|
|
"loss": 0.3051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34057268500328064,
|
|
"step": 2290,
|
|
"valid_targets_mean": 2464.0,
|
|
"valid_targets_min": 1005
|
|
},
|
|
{
|
|
"epoch": 3.6661341853035143,
|
|
"grad_norm": 0.8025686633034893,
|
|
"learning_rate": 2.1853767233875084e-05,
|
|
"loss": 0.3135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3205169141292572,
|
|
"step": 2295,
|
|
"valid_targets_mean": 2349.6,
|
|
"valid_targets_min": 1031
|
|
},
|
|
{
|
|
"epoch": 3.6741214057507987,
|
|
"grad_norm": 0.7537349714513184,
|
|
"learning_rate": 2.1774420534306812e-05,
|
|
"loss": 0.3173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.290463924407959,
|
|
"step": 2300,
|
|
"valid_targets_mean": 2403.1,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 3.682108626198083,
|
|
"grad_norm": 0.7340797298211004,
|
|
"learning_rate": 2.1695045674120615e-05,
|
|
"loss": 0.3168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2948509156703949,
|
|
"step": 2305,
|
|
"valid_targets_mean": 2473.7,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 3.6900958466453675,
|
|
"grad_norm": 0.736584826882617,
|
|
"learning_rate": 2.1615643913020755e-05,
|
|
"loss": 0.3349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3027883768081665,
|
|
"step": 2310,
|
|
"valid_targets_mean": 2667.7,
|
|
"valid_targets_min": 1026
|
|
},
|
|
{
|
|
"epoch": 3.698083067092652,
|
|
"grad_norm": 0.7933629982400912,
|
|
"learning_rate": 2.1536216511138423e-05,
|
|
"loss": 0.2991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3057977557182312,
|
|
"step": 2315,
|
|
"valid_targets_mean": 2506.8,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 3.7060702875399363,
|
|
"grad_norm": 0.7719310283887808,
|
|
"learning_rate": 2.145676472901174e-05,
|
|
"loss": 0.3369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3123508095741272,
|
|
"step": 2320,
|
|
"valid_targets_mean": 2501.6,
|
|
"valid_targets_min": 900
|
|
},
|
|
{
|
|
"epoch": 3.7140575079872207,
|
|
"grad_norm": 0.7704570758385201,
|
|
"learning_rate": 2.1377289827565734e-05,
|
|
"loss": 0.3523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28648841381073,
|
|
"step": 2325,
|
|
"valid_targets_mean": 2630.2,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 3.722044728434505,
|
|
"grad_norm": 0.6836313608925653,
|
|
"learning_rate": 2.129779306809236e-05,
|
|
"loss": 0.3107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28666383028030396,
|
|
"step": 2330,
|
|
"valid_targets_mean": 3273.9,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 3.730031948881789,
|
|
"grad_norm": 0.7383029881954862,
|
|
"learning_rate": 2.121827571223046e-05,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2653520107269287,
|
|
"step": 2335,
|
|
"valid_targets_mean": 2448.4,
|
|
"valid_targets_min": 760
|
|
},
|
|
{
|
|
"epoch": 3.7380191693290734,
|
|
"grad_norm": 0.7096141808385372,
|
|
"learning_rate": 2.113873902194575e-05,
|
|
"loss": 0.318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2982664704322815,
|
|
"step": 2340,
|
|
"valid_targets_mean": 3082.1,
|
|
"valid_targets_min": 937
|
|
},
|
|
{
|
|
"epoch": 3.746006389776358,
|
|
"grad_norm": 0.7488680698403448,
|
|
"learning_rate": 2.1059184259510786e-05,
|
|
"loss": 0.3288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.311890184879303,
|
|
"step": 2345,
|
|
"valid_targets_mean": 2644.5,
|
|
"valid_targets_min": 836
|
|
},
|
|
{
|
|
"epoch": 3.753993610223642,
|
|
"grad_norm": 0.8189558717905336,
|
|
"learning_rate": 2.0979612687484935e-05,
|
|
"loss": 0.322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40575727820396423,
|
|
"step": 2350,
|
|
"valid_targets_mean": 2691.4,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 3.7619808306709266,
|
|
"grad_norm": 0.8131535168385922,
|
|
"learning_rate": 2.0900025568694334e-05,
|
|
"loss": 0.3307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.325852632522583,
|
|
"step": 2355,
|
|
"valid_targets_mean": 2515.6,
|
|
"valid_targets_min": 962
|
|
},
|
|
{
|
|
"epoch": 3.769968051118211,
|
|
"grad_norm": 0.715650986434116,
|
|
"learning_rate": 2.082042416621187e-05,
|
|
"loss": 0.3102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25730276107788086,
|
|
"step": 2360,
|
|
"valid_targets_mean": 2620.3,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 3.777955271565495,
|
|
"grad_norm": 0.7429626471086513,
|
|
"learning_rate": 2.0740809743337095e-05,
|
|
"loss": 0.3272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3164355754852295,
|
|
"step": 2365,
|
|
"valid_targets_mean": 3193.6,
|
|
"valid_targets_min": 1225
|
|
},
|
|
{
|
|
"epoch": 3.7859424920127793,
|
|
"grad_norm": 0.8643058749337154,
|
|
"learning_rate": 2.0661183563576202e-05,
|
|
"loss": 0.3003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31777405738830566,
|
|
"step": 2370,
|
|
"valid_targets_mean": 2046.8,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 3.7939297124600637,
|
|
"grad_norm": 0.7669314893300887,
|
|
"learning_rate": 2.0581546890621986e-05,
|
|
"loss": 0.3154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30895188450813293,
|
|
"step": 2375,
|
|
"valid_targets_mean": 2502.1,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 3.801916932907348,
|
|
"grad_norm": 0.7745779768631494,
|
|
"learning_rate": 2.0501900988333755e-05,
|
|
"loss": 0.33,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31349217891693115,
|
|
"step": 2380,
|
|
"valid_targets_mean": 2802.1,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 3.8099041533546325,
|
|
"grad_norm": 0.8071826739547789,
|
|
"learning_rate": 2.0422247120717304e-05,
|
|
"loss": 0.3079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32140111923217773,
|
|
"step": 2385,
|
|
"valid_targets_mean": 2283.5,
|
|
"valid_targets_min": 945
|
|
},
|
|
{
|
|
"epoch": 3.817891373801917,
|
|
"grad_norm": 0.734454232899783,
|
|
"learning_rate": 2.0342586551904824e-05,
|
|
"loss": 0.3288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2930513918399811,
|
|
"step": 2390,
|
|
"valid_targets_mean": 2693.4,
|
|
"valid_targets_min": 946
|
|
},
|
|
{
|
|
"epoch": 3.8258785942492013,
|
|
"grad_norm": 0.9032049233412248,
|
|
"learning_rate": 2.026292054613486e-05,
|
|
"loss": 0.3257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29844075441360474,
|
|
"step": 2395,
|
|
"valid_targets_mean": 2085.5,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 3.8338658146964857,
|
|
"grad_norm": 0.7909727868208604,
|
|
"learning_rate": 2.0183250367732253e-05,
|
|
"loss": 0.3013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3070366680622101,
|
|
"step": 2400,
|
|
"valid_targets_mean": 2378.3,
|
|
"valid_targets_min": 1432
|
|
},
|
|
{
|
|
"epoch": 3.84185303514377,
|
|
"grad_norm": 0.771127898697249,
|
|
"learning_rate": 2.0103577281088065e-05,
|
|
"loss": 0.2912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2701008915901184,
|
|
"step": 2405,
|
|
"valid_targets_mean": 2440.0,
|
|
"valid_targets_min": 804
|
|
},
|
|
{
|
|
"epoch": 3.8498402555910545,
|
|
"grad_norm": 0.709021285875872,
|
|
"learning_rate": 2.00239025506395e-05,
|
|
"loss": 0.3082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31118449568748474,
|
|
"step": 2410,
|
|
"valid_targets_mean": 2966.3,
|
|
"valid_targets_min": 1301
|
|
},
|
|
{
|
|
"epoch": 3.857827476038339,
|
|
"grad_norm": 0.7830686534531816,
|
|
"learning_rate": 1.9944227440849856e-05,
|
|
"loss": 0.3093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3396669626235962,
|
|
"step": 2415,
|
|
"valid_targets_mean": 2507.1,
|
|
"valid_targets_min": 1148
|
|
},
|
|
{
|
|
"epoch": 3.8658146964856233,
|
|
"grad_norm": 0.684522154512478,
|
|
"learning_rate": 1.9864553216188454e-05,
|
|
"loss": 0.2994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2741510570049286,
|
|
"step": 2420,
|
|
"valid_targets_mean": 2855.4,
|
|
"valid_targets_min": 1046
|
|
},
|
|
{
|
|
"epoch": 3.873801916932907,
|
|
"grad_norm": 0.7496925314911651,
|
|
"learning_rate": 1.978488114111057e-05,
|
|
"loss": 0.3011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2875717282295227,
|
|
"step": 2425,
|
|
"valid_targets_mean": 2595.0,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 3.8817891373801916,
|
|
"grad_norm": 0.7396384668306665,
|
|
"learning_rate": 1.970521248003735e-05,
|
|
"loss": 0.3266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3201049566268921,
|
|
"step": 2430,
|
|
"valid_targets_mean": 2870.1,
|
|
"valid_targets_min": 480
|
|
},
|
|
{
|
|
"epoch": 3.889776357827476,
|
|
"grad_norm": 0.7759257785552034,
|
|
"learning_rate": 1.9625548497335783e-05,
|
|
"loss": 0.3107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28428804874420166,
|
|
"step": 2435,
|
|
"valid_targets_mean": 2276.3,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 3.8977635782747604,
|
|
"grad_norm": 0.8743986374212271,
|
|
"learning_rate": 1.9545890457298592e-05,
|
|
"loss": 0.3293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33888500928878784,
|
|
"step": 2440,
|
|
"valid_targets_mean": 2057.8,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 3.905750798722045,
|
|
"grad_norm": 0.758679046027539,
|
|
"learning_rate": 1.9466239624124203e-05,
|
|
"loss": 0.3145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32167601585388184,
|
|
"step": 2445,
|
|
"valid_targets_mean": 2617.2,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 3.913738019169329,
|
|
"grad_norm": 0.7695007701733012,
|
|
"learning_rate": 1.938659726189663e-05,
|
|
"loss": 0.3031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3169788420200348,
|
|
"step": 2450,
|
|
"valid_targets_mean": 2591.6,
|
|
"valid_targets_min": 1074
|
|
},
|
|
{
|
|
"epoch": 3.9217252396166136,
|
|
"grad_norm": 0.9107498601489624,
|
|
"learning_rate": 1.9306964634565505e-05,
|
|
"loss": 0.3299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3718308210372925,
|
|
"step": 2455,
|
|
"valid_targets_mean": 2254.7,
|
|
"valid_targets_min": 563
|
|
},
|
|
{
|
|
"epoch": 3.9297124600638975,
|
|
"grad_norm": 0.7312434881547873,
|
|
"learning_rate": 1.9227343005925926e-05,
|
|
"loss": 0.3375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3114500045776367,
|
|
"step": 2460,
|
|
"valid_targets_mean": 2839.8,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 3.937699680511182,
|
|
"grad_norm": 0.8928380487951789,
|
|
"learning_rate": 1.914773363959845e-05,
|
|
"loss": 0.3339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30661630630493164,
|
|
"step": 2465,
|
|
"valid_targets_mean": 1985.6,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 3.9456869009584663,
|
|
"grad_norm": 0.8711769283453357,
|
|
"learning_rate": 1.906813779900903e-05,
|
|
"loss": 0.3172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32086312770843506,
|
|
"step": 2470,
|
|
"valid_targets_mean": 1780.9,
|
|
"valid_targets_min": 871
|
|
},
|
|
{
|
|
"epoch": 3.9536741214057507,
|
|
"grad_norm": 0.6913323064806477,
|
|
"learning_rate": 1.8988556747368953e-05,
|
|
"loss": 0.3138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3113366663455963,
|
|
"step": 2475,
|
|
"valid_targets_mean": 3348.4,
|
|
"valid_targets_min": 1145
|
|
},
|
|
{
|
|
"epoch": 3.961661341853035,
|
|
"grad_norm": 0.8343154257113743,
|
|
"learning_rate": 1.890899174765481e-05,
|
|
"loss": 0.3136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34437280893325806,
|
|
"step": 2480,
|
|
"valid_targets_mean": 2292.1,
|
|
"valid_targets_min": 1168
|
|
},
|
|
{
|
|
"epoch": 3.9696485623003195,
|
|
"grad_norm": 0.727786876385511,
|
|
"learning_rate": 1.8829444062588433e-05,
|
|
"loss": 0.3163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3268856406211853,
|
|
"step": 2485,
|
|
"valid_targets_mean": 2971.8,
|
|
"valid_targets_min": 1005
|
|
},
|
|
{
|
|
"epoch": 3.977635782747604,
|
|
"grad_norm": 0.9241590810075659,
|
|
"learning_rate": 1.8749914954616882e-05,
|
|
"loss": 0.3181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3660457134246826,
|
|
"step": 2490,
|
|
"valid_targets_mean": 2102.8,
|
|
"valid_targets_min": 972
|
|
},
|
|
{
|
|
"epoch": 3.9856230031948883,
|
|
"grad_norm": 0.7869322557337212,
|
|
"learning_rate": 1.867040568589236e-05,
|
|
"loss": 0.2952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2874675393104553,
|
|
"step": 2495,
|
|
"valid_targets_mean": 2395.6,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 3.9936102236421727,
|
|
"grad_norm": 0.7842157289771978,
|
|
"learning_rate": 1.859091751825225e-05,
|
|
"loss": 0.3178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32232752442359924,
|
|
"step": 2500,
|
|
"valid_targets_mean": 2512.0,
|
|
"valid_targets_min": 873
|
|
},
|
|
{
|
|
"epoch": 4.001597444089457,
|
|
"grad_norm": 0.9324276234574944,
|
|
"learning_rate": 1.8511451713199038e-05,
|
|
"loss": 0.3165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3114590048789978,
|
|
"step": 2505,
|
|
"valid_targets_mean": 1768.8,
|
|
"valid_targets_min": 777
|
|
},
|
|
{
|
|
"epoch": 4.0095846645367414,
|
|
"grad_norm": 0.8482456294797575,
|
|
"learning_rate": 1.843200953188031e-05,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3049367666244507,
|
|
"step": 2510,
|
|
"valid_targets_mean": 2503.9,
|
|
"valid_targets_min": 881
|
|
},
|
|
{
|
|
"epoch": 4.017571884984026,
|
|
"grad_norm": 0.8917193269509122,
|
|
"learning_rate": 1.8352592235068735e-05,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27564072608947754,
|
|
"step": 2515,
|
|
"valid_targets_mean": 2509.5,
|
|
"valid_targets_min": 1135
|
|
},
|
|
{
|
|
"epoch": 4.02555910543131,
|
|
"grad_norm": 0.959152073092878,
|
|
"learning_rate": 1.8273201083142056e-05,
|
|
"loss": 0.2861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28576409816741943,
|
|
"step": 2520,
|
|
"valid_targets_mean": 2604.8,
|
|
"valid_targets_min": 846
|
|
},
|
|
{
|
|
"epoch": 4.033546325878595,
|
|
"grad_norm": 0.9118969425085272,
|
|
"learning_rate": 1.8193837336063096e-05,
|
|
"loss": 0.2786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.263765811920166,
|
|
"step": 2525,
|
|
"valid_targets_mean": 1985.9,
|
|
"valid_targets_min": 866
|
|
},
|
|
{
|
|
"epoch": 4.041533546325879,
|
|
"grad_norm": 0.778656785308403,
|
|
"learning_rate": 1.8114502253359745e-05,
|
|
"loss": 0.2756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23282954096794128,
|
|
"step": 2530,
|
|
"valid_targets_mean": 2504.2,
|
|
"valid_targets_min": 1021
|
|
},
|
|
{
|
|
"epoch": 4.0495207667731625,
|
|
"grad_norm": 0.8009917906107109,
|
|
"learning_rate": 1.8035197094104986e-05,
|
|
"loss": 0.2604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25898435711860657,
|
|
"step": 2535,
|
|
"valid_targets_mean": 2936.9,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 4.057507987220447,
|
|
"grad_norm": 0.8552413328571825,
|
|
"learning_rate": 1.7955923116896908e-05,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30039119720458984,
|
|
"step": 2540,
|
|
"valid_targets_mean": 2328.8,
|
|
"valid_targets_min": 849
|
|
},
|
|
{
|
|
"epoch": 4.065495207667731,
|
|
"grad_norm": 0.7943909473613405,
|
|
"learning_rate": 1.787668157983872e-05,
|
|
"loss": 0.271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.266324520111084,
|
|
"step": 2545,
|
|
"valid_targets_mean": 2452.8,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 4.073482428115016,
|
|
"grad_norm": 0.8685020349842294,
|
|
"learning_rate": 1.779747374051881e-05,
|
|
"loss": 0.285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27934545278549194,
|
|
"step": 2550,
|
|
"valid_targets_mean": 2311.2,
|
|
"valid_targets_min": 970
|
|
},
|
|
{
|
|
"epoch": 4.0814696485623,
|
|
"grad_norm": 0.8468594025261184,
|
|
"learning_rate": 1.7718300855990767e-05,
|
|
"loss": 0.2867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2668966054916382,
|
|
"step": 2555,
|
|
"valid_targets_mean": 2417.5,
|
|
"valid_targets_min": 846
|
|
},
|
|
{
|
|
"epoch": 4.0894568690095845,
|
|
"grad_norm": 0.8172594510875087,
|
|
"learning_rate": 1.7639164182753434e-05,
|
|
"loss": 0.3071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2659934163093567,
|
|
"step": 2560,
|
|
"valid_targets_mean": 2275.6,
|
|
"valid_targets_min": 1081
|
|
},
|
|
{
|
|
"epoch": 4.097444089456869,
|
|
"grad_norm": 0.8131622834124957,
|
|
"learning_rate": 1.7560064976730967e-05,
|
|
"loss": 0.299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2599570155143738,
|
|
"step": 2565,
|
|
"valid_targets_mean": 2486.7,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 4.105431309904153,
|
|
"grad_norm": 0.9125355179944755,
|
|
"learning_rate": 1.7481004493252915e-05,
|
|
"loss": 0.2834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2980831265449524,
|
|
"step": 2570,
|
|
"valid_targets_mean": 2395.5,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 4.113418530351438,
|
|
"grad_norm": 0.8069013787157769,
|
|
"learning_rate": 1.740198398703428e-05,
|
|
"loss": 0.2614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.263817697763443,
|
|
"step": 2575,
|
|
"valid_targets_mean": 2332.4,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 4.121405750798722,
|
|
"grad_norm": 0.7727188177087745,
|
|
"learning_rate": 1.7323004712155613e-05,
|
|
"loss": 0.2683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2636544108390808,
|
|
"step": 2580,
|
|
"valid_targets_mean": 3023.6,
|
|
"valid_targets_min": 984
|
|
},
|
|
{
|
|
"epoch": 4.1293929712460065,
|
|
"grad_norm": 0.7945255792557373,
|
|
"learning_rate": 1.724406792204312e-05,
|
|
"loss": 0.2758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27583134174346924,
|
|
"step": 2585,
|
|
"valid_targets_mean": 2530.1,
|
|
"valid_targets_min": 945
|
|
},
|
|
{
|
|
"epoch": 4.137380191693291,
|
|
"grad_norm": 0.9469151830470133,
|
|
"learning_rate": 1.7165174869448738e-05,
|
|
"loss": 0.2861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30028119683265686,
|
|
"step": 2590,
|
|
"valid_targets_mean": 1900.9,
|
|
"valid_targets_min": 932
|
|
},
|
|
{
|
|
"epoch": 4.145367412140575,
|
|
"grad_norm": 0.9226402089584824,
|
|
"learning_rate": 1.7086326806430298e-05,
|
|
"loss": 0.2758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2772974967956543,
|
|
"step": 2595,
|
|
"valid_targets_mean": 2169.4,
|
|
"valid_targets_min": 1349
|
|
},
|
|
{
|
|
"epoch": 4.15335463258786,
|
|
"grad_norm": 0.9441693068852114,
|
|
"learning_rate": 1.7007524984331623e-05,
|
|
"loss": 0.2786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33314695954322815,
|
|
"step": 2600,
|
|
"valid_targets_mean": 2023.9,
|
|
"valid_targets_min": 1101
|
|
},
|
|
{
|
|
"epoch": 4.161341853035144,
|
|
"grad_norm": 0.951595034327935,
|
|
"learning_rate": 1.692877065376268e-05,
|
|
"loss": 0.2913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2942526042461395,
|
|
"step": 2605,
|
|
"valid_targets_mean": 2214.8,
|
|
"valid_targets_min": 1034
|
|
},
|
|
{
|
|
"epoch": 4.169329073482428,
|
|
"grad_norm": 0.9094355274546153,
|
|
"learning_rate": 1.6850065064579724e-05,
|
|
"loss": 0.2807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3052511215209961,
|
|
"step": 2610,
|
|
"valid_targets_mean": 2451.9,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 4.177316293929713,
|
|
"grad_norm": 0.8519705269230555,
|
|
"learning_rate": 1.677140946586548e-05,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24492701888084412,
|
|
"step": 2615,
|
|
"valid_targets_mean": 2506.8,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 4.185303514376997,
|
|
"grad_norm": 0.7699818528323561,
|
|
"learning_rate": 1.66928051059093e-05,
|
|
"loss": 0.2703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2310146987438202,
|
|
"step": 2620,
|
|
"valid_targets_mean": 2790.1,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 4.193290734824281,
|
|
"grad_norm": 1.7518942165932019,
|
|
"learning_rate": 1.6614253232187363e-05,
|
|
"loss": 0.2951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34809407591819763,
|
|
"step": 2625,
|
|
"valid_targets_mean": 2023.8,
|
|
"valid_targets_min": 1181
|
|
},
|
|
{
|
|
"epoch": 4.201277955271565,
|
|
"grad_norm": 0.8729917233027708,
|
|
"learning_rate": 1.6535755091342876e-05,
|
|
"loss": 0.2574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28835827112197876,
|
|
"step": 2630,
|
|
"valid_targets_mean": 2176.9,
|
|
"valid_targets_min": 912
|
|
},
|
|
{
|
|
"epoch": 4.2092651757188495,
|
|
"grad_norm": 1.0368867474369576,
|
|
"learning_rate": 1.6457311929166286e-05,
|
|
"loss": 0.2606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2866308093070984,
|
|
"step": 2635,
|
|
"valid_targets_mean": 2736.7,
|
|
"valid_targets_min": 1026
|
|
},
|
|
{
|
|
"epoch": 4.217252396166134,
|
|
"grad_norm": 0.8673824989859126,
|
|
"learning_rate": 1.637892499057551e-05,
|
|
"loss": 0.2892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2945547103881836,
|
|
"step": 2640,
|
|
"valid_targets_mean": 2429.2,
|
|
"valid_targets_min": 1048
|
|
},
|
|
{
|
|
"epoch": 4.225239616613418,
|
|
"grad_norm": 0.9214055268281512,
|
|
"learning_rate": 1.6300595519596177e-05,
|
|
"loss": 0.2959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3281001150608063,
|
|
"step": 2645,
|
|
"valid_targets_mean": 2287.4,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 4.233226837060703,
|
|
"grad_norm": 0.8565343526242711,
|
|
"learning_rate": 1.6222324759341897e-05,
|
|
"loss": 0.284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3528786301612854,
|
|
"step": 2650,
|
|
"valid_targets_mean": 2851.1,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 4.241214057507987,
|
|
"grad_norm": 0.811326421043272,
|
|
"learning_rate": 1.6144113951994515e-05,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26833832263946533,
|
|
"step": 2655,
|
|
"valid_targets_mean": 2509.8,
|
|
"valid_targets_min": 1249
|
|
},
|
|
{
|
|
"epoch": 4.2492012779552715,
|
|
"grad_norm": 0.9088855655070118,
|
|
"learning_rate": 1.60659643387844e-05,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2907269597053528,
|
|
"step": 2660,
|
|
"valid_targets_mean": 2083.1,
|
|
"valid_targets_min": 832
|
|
},
|
|
{
|
|
"epoch": 4.257188498402556,
|
|
"grad_norm": 0.8069521028903948,
|
|
"learning_rate": 1.5987877159970765e-05,
|
|
"loss": 0.2939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2852930426597595,
|
|
"step": 2665,
|
|
"valid_targets_mean": 3032.5,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 4.26517571884984,
|
|
"grad_norm": 0.9775804317546377,
|
|
"learning_rate": 1.5909853654821947e-05,
|
|
"loss": 0.2815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2991538643836975,
|
|
"step": 2670,
|
|
"valid_targets_mean": 2250.2,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 4.273162939297125,
|
|
"grad_norm": 0.9148314349614897,
|
|
"learning_rate": 1.583189506159579e-05,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2651051878929138,
|
|
"step": 2675,
|
|
"valid_targets_mean": 2048.8,
|
|
"valid_targets_min": 1042
|
|
},
|
|
{
|
|
"epoch": 4.281150159744409,
|
|
"grad_norm": 0.9542219124606526,
|
|
"learning_rate": 1.575400261751993e-05,
|
|
"loss": 0.2966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.291668564081192,
|
|
"step": 2680,
|
|
"valid_targets_mean": 2205.0,
|
|
"valid_targets_min": 851
|
|
},
|
|
{
|
|
"epoch": 4.289137380191693,
|
|
"grad_norm": 0.7135045308311192,
|
|
"learning_rate": 1.567617755877223e-05,
|
|
"loss": 0.271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24501368403434753,
|
|
"step": 2685,
|
|
"valid_targets_mean": 3269.7,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 4.297124600638978,
|
|
"grad_norm": 1.0330007581693756,
|
|
"learning_rate": 1.5598421120461106e-05,
|
|
"loss": 0.2785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28483593463897705,
|
|
"step": 2690,
|
|
"valid_targets_mean": 1641.1,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 4.305111821086262,
|
|
"grad_norm": 0.9453831332130792,
|
|
"learning_rate": 1.552073453660595e-05,
|
|
"loss": 0.2802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2860102653503418,
|
|
"step": 2695,
|
|
"valid_targets_mean": 1911.6,
|
|
"valid_targets_min": 1171
|
|
},
|
|
{
|
|
"epoch": 4.313099041533547,
|
|
"grad_norm": 0.7834234239247848,
|
|
"learning_rate": 1.5443119040117544e-05,
|
|
"loss": 0.309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27587395906448364,
|
|
"step": 2700,
|
|
"valid_targets_mean": 3283.4,
|
|
"valid_targets_min": 1223
|
|
},
|
|
{
|
|
"epoch": 4.321086261980831,
|
|
"grad_norm": 1.013746841627443,
|
|
"learning_rate": 1.536557586277849e-05,
|
|
"loss": 0.2891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2945508062839508,
|
|
"step": 2705,
|
|
"valid_targets_mean": 1812.6,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 4.329073482428115,
|
|
"grad_norm": 0.8487678794108631,
|
|
"learning_rate": 1.5288106235223655e-05,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2605893909931183,
|
|
"step": 2710,
|
|
"valid_targets_mean": 2569.6,
|
|
"valid_targets_min": 974
|
|
},
|
|
{
|
|
"epoch": 4.3370607028754,
|
|
"grad_norm": 0.8238943487548601,
|
|
"learning_rate": 1.521071138692066e-05,
|
|
"loss": 0.2677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2690702974796295,
|
|
"step": 2715,
|
|
"valid_targets_mean": 2517.1,
|
|
"valid_targets_min": 1389
|
|
},
|
|
{
|
|
"epoch": 4.345047923322683,
|
|
"grad_norm": 0.9146553301338075,
|
|
"learning_rate": 1.5133392546150347e-05,
|
|
"loss": 0.2851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2950320243835449,
|
|
"step": 2720,
|
|
"valid_targets_mean": 2151.6,
|
|
"valid_targets_min": 1180
|
|
},
|
|
{
|
|
"epoch": 4.353035143769968,
|
|
"grad_norm": 0.8097570105911555,
|
|
"learning_rate": 1.5056150939987303e-05,
|
|
"loss": 0.2805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2615821659564972,
|
|
"step": 2725,
|
|
"valid_targets_mean": 2766.2,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 4.361022364217252,
|
|
"grad_norm": 0.8757180863392412,
|
|
"learning_rate": 1.4978987794280363e-05,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2785113751888275,
|
|
"step": 2730,
|
|
"valid_targets_mean": 2375.4,
|
|
"valid_targets_min": 1085
|
|
},
|
|
{
|
|
"epoch": 4.3690095846645365,
|
|
"grad_norm": 0.8513887126776151,
|
|
"learning_rate": 1.4901904333633179e-05,
|
|
"loss": 0.2683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24240964651107788,
|
|
"step": 2735,
|
|
"valid_targets_mean": 2275.0,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 4.376996805111821,
|
|
"grad_norm": 0.8023929191854686,
|
|
"learning_rate": 1.4824901781384777e-05,
|
|
"loss": 0.2922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2856906056404114,
|
|
"step": 2740,
|
|
"valid_targets_mean": 2627.1,
|
|
"valid_targets_min": 967
|
|
},
|
|
{
|
|
"epoch": 4.384984025559105,
|
|
"grad_norm": 0.797408441800025,
|
|
"learning_rate": 1.4747981359590138e-05,
|
|
"loss": 0.2653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.258148729801178,
|
|
"step": 2745,
|
|
"valid_targets_mean": 2414.8,
|
|
"valid_targets_min": 921
|
|
},
|
|
{
|
|
"epoch": 4.39297124600639,
|
|
"grad_norm": 0.788516509441534,
|
|
"learning_rate": 1.4671144289000806e-05,
|
|
"loss": 0.295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2919425070285797,
|
|
"step": 2750,
|
|
"valid_targets_mean": 2907.4,
|
|
"valid_targets_min": 1035
|
|
},
|
|
{
|
|
"epoch": 4.400958466453674,
|
|
"grad_norm": 1.0064653366668732,
|
|
"learning_rate": 1.4594391789045513e-05,
|
|
"loss": 0.284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2962392568588257,
|
|
"step": 2755,
|
|
"valid_targets_mean": 1863.2,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 4.4089456869009584,
|
|
"grad_norm": 0.9506340102490201,
|
|
"learning_rate": 1.4517725077810835e-05,
|
|
"loss": 0.2636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2889561653137207,
|
|
"step": 2760,
|
|
"valid_targets_mean": 2017.3,
|
|
"valid_targets_min": 955
|
|
},
|
|
{
|
|
"epoch": 4.416932907348243,
|
|
"grad_norm": 0.8659101348732412,
|
|
"learning_rate": 1.4441145372021847e-05,
|
|
"loss": 0.2992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27652785181999207,
|
|
"step": 2765,
|
|
"valid_targets_mean": 2312.2,
|
|
"valid_targets_min": 883
|
|
},
|
|
{
|
|
"epoch": 4.424920127795527,
|
|
"grad_norm": 0.7681269858183138,
|
|
"learning_rate": 1.4364653887022831e-05,
|
|
"loss": 0.2835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2835875451564789,
|
|
"step": 2770,
|
|
"valid_targets_mean": 3319.6,
|
|
"valid_targets_min": 1379
|
|
},
|
|
{
|
|
"epoch": 4.432907348242812,
|
|
"grad_norm": 0.7072785506581248,
|
|
"learning_rate": 1.4288251836757956e-05,
|
|
"loss": 0.2616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23713326454162598,
|
|
"step": 2775,
|
|
"valid_targets_mean": 3951.1,
|
|
"valid_targets_min": 1572
|
|
},
|
|
{
|
|
"epoch": 4.440894568690096,
|
|
"grad_norm": 0.8894303529052172,
|
|
"learning_rate": 1.4211940433752052e-05,
|
|
"loss": 0.2772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2671348750591278,
|
|
"step": 2780,
|
|
"valid_targets_mean": 2111.8,
|
|
"valid_targets_min": 938
|
|
},
|
|
{
|
|
"epoch": 4.44888178913738,
|
|
"grad_norm": 0.8510051441566174,
|
|
"learning_rate": 1.4135720889091342e-05,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.261152446269989,
|
|
"step": 2785,
|
|
"valid_targets_mean": 2317.9,
|
|
"valid_targets_min": 676
|
|
},
|
|
{
|
|
"epoch": 4.456869009584665,
|
|
"grad_norm": 0.9409250508200573,
|
|
"learning_rate": 1.4059594412404227e-05,
|
|
"loss": 0.301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30381250381469727,
|
|
"step": 2790,
|
|
"valid_targets_mean": 2299.6,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 4.464856230031949,
|
|
"grad_norm": 0.9433766682387098,
|
|
"learning_rate": 1.3983562211842094e-05,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29213231801986694,
|
|
"step": 2795,
|
|
"valid_targets_mean": 2195.6,
|
|
"valid_targets_min": 903
|
|
},
|
|
{
|
|
"epoch": 4.472843450479234,
|
|
"grad_norm": 0.7233021819826743,
|
|
"learning_rate": 1.390762549406013e-05,
|
|
"loss": 0.29,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2659491300582886,
|
|
"step": 2800,
|
|
"valid_targets_mean": 3180.1,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 4.480830670926517,
|
|
"grad_norm": 0.7712006415597349,
|
|
"learning_rate": 1.3831785464198188e-05,
|
|
"loss": 0.2948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2787235677242279,
|
|
"step": 2805,
|
|
"valid_targets_mean": 2937.8,
|
|
"valid_targets_min": 1024
|
|
},
|
|
{
|
|
"epoch": 4.488817891373802,
|
|
"grad_norm": 0.9778771697318889,
|
|
"learning_rate": 1.3756043325861648e-05,
|
|
"loss": 0.2922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30784228444099426,
|
|
"step": 2810,
|
|
"valid_targets_mean": 1888.0,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 4.496805111821086,
|
|
"grad_norm": 0.9264920955176256,
|
|
"learning_rate": 1.3680400281102326e-05,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28944408893585205,
|
|
"step": 2815,
|
|
"valid_targets_mean": 2205.7,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 4.50479233226837,
|
|
"grad_norm": 0.8533904231485898,
|
|
"learning_rate": 1.3604857530399378e-05,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2850252687931061,
|
|
"step": 2820,
|
|
"valid_targets_mean": 2500.1,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 4.512779552715655,
|
|
"grad_norm": 0.8704638800488371,
|
|
"learning_rate": 1.3529416272640278e-05,
|
|
"loss": 0.2731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28615254163742065,
|
|
"step": 2825,
|
|
"valid_targets_mean": 2373.8,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 4.520766773162939,
|
|
"grad_norm": 0.7799109745588295,
|
|
"learning_rate": 1.3454077705101766e-05,
|
|
"loss": 0.2882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30617034435272217,
|
|
"step": 2830,
|
|
"valid_targets_mean": 3339.3,
|
|
"valid_targets_min": 1250
|
|
},
|
|
{
|
|
"epoch": 4.5287539936102235,
|
|
"grad_norm": 0.8510359130042457,
|
|
"learning_rate": 1.3378843023430856e-05,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2776964604854584,
|
|
"step": 2835,
|
|
"valid_targets_mean": 2357.5,
|
|
"valid_targets_min": 1090
|
|
},
|
|
{
|
|
"epoch": 4.536741214057508,
|
|
"grad_norm": 0.676687552262958,
|
|
"learning_rate": 1.3303713421625864e-05,
|
|
"loss": 0.2804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24182885885238647,
|
|
"step": 2840,
|
|
"valid_targets_mean": 3198.8,
|
|
"valid_targets_min": 1242
|
|
},
|
|
{
|
|
"epoch": 4.544728434504792,
|
|
"grad_norm": 0.8213237973461215,
|
|
"learning_rate": 1.3228690092017452e-05,
|
|
"loss": 0.2697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25150129199028015,
|
|
"step": 2845,
|
|
"valid_targets_mean": 2422.8,
|
|
"valid_targets_min": 976
|
|
},
|
|
{
|
|
"epoch": 4.552715654952077,
|
|
"grad_norm": 0.8195245815731629,
|
|
"learning_rate": 1.3153774225249705e-05,
|
|
"loss": 0.2871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2849723696708679,
|
|
"step": 2850,
|
|
"valid_targets_mean": 2497.5,
|
|
"valid_targets_min": 1005
|
|
},
|
|
{
|
|
"epoch": 4.560702875399361,
|
|
"grad_norm": 0.8998654690399283,
|
|
"learning_rate": 1.3078967010261243e-05,
|
|
"loss": 0.2758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27163732051849365,
|
|
"step": 2855,
|
|
"valid_targets_mean": 2386.8,
|
|
"valid_targets_min": 1384
|
|
},
|
|
{
|
|
"epoch": 4.568690095846645,
|
|
"grad_norm": 0.8317941560088088,
|
|
"learning_rate": 1.3004269634266357e-05,
|
|
"loss": 0.3002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26044827699661255,
|
|
"step": 2860,
|
|
"valid_targets_mean": 2203.2,
|
|
"valid_targets_min": 871
|
|
},
|
|
{
|
|
"epoch": 4.57667731629393,
|
|
"grad_norm": 0.7242747846082378,
|
|
"learning_rate": 1.2929683282736135e-05,
|
|
"loss": 0.2806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2865527868270874,
|
|
"step": 2865,
|
|
"valid_targets_mean": 3188.6,
|
|
"valid_targets_min": 1112
|
|
},
|
|
{
|
|
"epoch": 4.584664536741214,
|
|
"grad_norm": 0.8057451747840729,
|
|
"learning_rate": 1.285520913937969e-05,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28866854310035706,
|
|
"step": 2870,
|
|
"valid_targets_mean": 2670.4,
|
|
"valid_targets_min": 965
|
|
},
|
|
{
|
|
"epoch": 4.592651757188499,
|
|
"grad_norm": 0.8559419844409483,
|
|
"learning_rate": 1.2780848386125354e-05,
|
|
"loss": 0.3024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26415783166885376,
|
|
"step": 2875,
|
|
"valid_targets_mean": 2264.3,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 4.600638977635783,
|
|
"grad_norm": 0.9013863009440287,
|
|
"learning_rate": 1.2706602203101913e-05,
|
|
"loss": 0.2739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2849896252155304,
|
|
"step": 2880,
|
|
"valid_targets_mean": 2103.7,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 4.608626198083067,
|
|
"grad_norm": 0.7710059088507434,
|
|
"learning_rate": 1.2632471768619894e-05,
|
|
"loss": 0.2886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29880955815315247,
|
|
"step": 2885,
|
|
"valid_targets_mean": 2723.6,
|
|
"valid_targets_min": 885
|
|
},
|
|
{
|
|
"epoch": 4.616613418530352,
|
|
"grad_norm": 0.8372599351438447,
|
|
"learning_rate": 1.255845825915286e-05,
|
|
"loss": 0.2916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3064316213130951,
|
|
"step": 2890,
|
|
"valid_targets_mean": 2505.3,
|
|
"valid_targets_min": 946
|
|
},
|
|
{
|
|
"epoch": 4.624600638977636,
|
|
"grad_norm": 0.856569504786585,
|
|
"learning_rate": 1.248456284931873e-05,
|
|
"loss": 0.2823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26590830087661743,
|
|
"step": 2895,
|
|
"valid_targets_mean": 2149.9,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 4.63258785942492,
|
|
"grad_norm": 0.8468594671630943,
|
|
"learning_rate": 1.241078671186115e-05,
|
|
"loss": 0.2881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27486222982406616,
|
|
"step": 2900,
|
|
"valid_targets_mean": 2579.3,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 4.640575079872205,
|
|
"grad_norm": 0.768205507221503,
|
|
"learning_rate": 1.2337131017630873e-05,
|
|
"loss": 0.2848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27169275283813477,
|
|
"step": 2905,
|
|
"valid_targets_mean": 2937.6,
|
|
"valid_targets_min": 1149
|
|
},
|
|
{
|
|
"epoch": 4.6485623003194885,
|
|
"grad_norm": 0.7796994568631648,
|
|
"learning_rate": 1.2263596935567175e-05,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2680303156375885,
|
|
"step": 2910,
|
|
"valid_targets_mean": 2888.6,
|
|
"valid_targets_min": 1379
|
|
},
|
|
{
|
|
"epoch": 4.656549520766773,
|
|
"grad_norm": 0.8251557696981305,
|
|
"learning_rate": 1.2190185632679316e-05,
|
|
"loss": 0.3036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3371589481830597,
|
|
"step": 2915,
|
|
"valid_targets_mean": 2832.8,
|
|
"valid_targets_min": 848
|
|
},
|
|
{
|
|
"epoch": 4.664536741214057,
|
|
"grad_norm": 0.92184042059257,
|
|
"learning_rate": 1.2116898274028008e-05,
|
|
"loss": 0.2896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2772311866283417,
|
|
"step": 2920,
|
|
"valid_targets_mean": 1953.4,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 4.672523961661342,
|
|
"grad_norm": 0.8619546776366847,
|
|
"learning_rate": 1.2043736022706934e-05,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2967589497566223,
|
|
"step": 2925,
|
|
"valid_targets_mean": 2540.2,
|
|
"valid_targets_min": 948
|
|
},
|
|
{
|
|
"epoch": 4.680511182108626,
|
|
"grad_norm": 0.9191641245437345,
|
|
"learning_rate": 1.1970700039824271e-05,
|
|
"loss": 0.2816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2808821201324463,
|
|
"step": 2930,
|
|
"valid_targets_mean": 2096.7,
|
|
"valid_targets_min": 1019
|
|
},
|
|
{
|
|
"epoch": 4.68849840255591,
|
|
"grad_norm": 0.7578627144140523,
|
|
"learning_rate": 1.1897791484484284e-05,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25608596205711365,
|
|
"step": 2935,
|
|
"valid_targets_mean": 3004.1,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 4.696485623003195,
|
|
"grad_norm": 0.9179461644936528,
|
|
"learning_rate": 1.182501151376893e-05,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26754847168922424,
|
|
"step": 2940,
|
|
"valid_targets_mean": 2621.8,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 4.704472843450479,
|
|
"grad_norm": 0.8635029390510522,
|
|
"learning_rate": 1.1752361282719472e-05,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26998892426490784,
|
|
"step": 2945,
|
|
"valid_targets_mean": 2276.6,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 4.712460063897764,
|
|
"grad_norm": 0.8759663833019777,
|
|
"learning_rate": 1.1679841944318171e-05,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2780788540840149,
|
|
"step": 2950,
|
|
"valid_targets_mean": 2253.1,
|
|
"valid_targets_min": 809
|
|
},
|
|
{
|
|
"epoch": 4.720447284345048,
|
|
"grad_norm": 0.8498826304914544,
|
|
"learning_rate": 1.1607454649469991e-05,
|
|
"loss": 0.2667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.257028192281723,
|
|
"step": 2955,
|
|
"valid_targets_mean": 2424.5,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 4.728434504792332,
|
|
"grad_norm": 0.8606288246212083,
|
|
"learning_rate": 1.1535200546984291e-05,
|
|
"loss": 0.2906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3048713207244873,
|
|
"step": 2960,
|
|
"valid_targets_mean": 2382.4,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 4.736421725239617,
|
|
"grad_norm": 0.6090588197110254,
|
|
"learning_rate": 1.1463080783556663e-05,
|
|
"loss": 0.2891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27087897062301636,
|
|
"step": 2965,
|
|
"valid_targets_mean": 4201.1,
|
|
"valid_targets_min": 1298
|
|
},
|
|
{
|
|
"epoch": 4.744408945686901,
|
|
"grad_norm": 0.8021203501043761,
|
|
"learning_rate": 1.139109650375066e-05,
|
|
"loss": 0.2888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2849085330963135,
|
|
"step": 2970,
|
|
"valid_targets_mean": 2901.2,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 4.752396166134186,
|
|
"grad_norm": 0.7282838758000219,
|
|
"learning_rate": 1.1319248849979705e-05,
|
|
"loss": 0.2667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27936381101608276,
|
|
"step": 2975,
|
|
"valid_targets_mean": 3305.5,
|
|
"valid_targets_min": 1085
|
|
},
|
|
{
|
|
"epoch": 4.76038338658147,
|
|
"grad_norm": 0.8934829121606472,
|
|
"learning_rate": 1.1247538962488882e-05,
|
|
"loss": 0.2805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26877570152282715,
|
|
"step": 2980,
|
|
"valid_targets_mean": 2315.6,
|
|
"valid_targets_min": 864
|
|
},
|
|
{
|
|
"epoch": 4.768370607028754,
|
|
"grad_norm": 0.7727018137758453,
|
|
"learning_rate": 1.1175967979336913e-05,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24391278624534607,
|
|
"step": 2985,
|
|
"valid_targets_mean": 2623.9,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 4.776357827476039,
|
|
"grad_norm": 0.7473559824700889,
|
|
"learning_rate": 1.1104537036378054e-05,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2511521279811859,
|
|
"step": 2990,
|
|
"valid_targets_mean": 2769.9,
|
|
"valid_targets_min": 823
|
|
},
|
|
{
|
|
"epoch": 4.784345047923322,
|
|
"grad_norm": 1.0968726241653155,
|
|
"learning_rate": 1.1033247267244063e-05,
|
|
"loss": 0.2669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27009397745132446,
|
|
"step": 2995,
|
|
"valid_targets_mean": 2335.2,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 4.792332268370607,
|
|
"grad_norm": 0.771581833337534,
|
|
"learning_rate": 1.0962099803326257e-05,
|
|
"loss": 0.2783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26282936334609985,
|
|
"step": 3000,
|
|
"valid_targets_mean": 3189.4,
|
|
"valid_targets_min": 1135
|
|
},
|
|
{
|
|
"epoch": 4.800319488817891,
|
|
"grad_norm": 0.7841941809157226,
|
|
"learning_rate": 1.089109577375748e-05,
|
|
"loss": 0.2507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25529754161834717,
|
|
"step": 3005,
|
|
"valid_targets_mean": 2596.4,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 4.8083067092651754,
|
|
"grad_norm": 0.9477206812732758,
|
|
"learning_rate": 1.082023630539427e-05,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30091235041618347,
|
|
"step": 3010,
|
|
"valid_targets_mean": 1865.8,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 4.81629392971246,
|
|
"grad_norm": 0.8521362863260039,
|
|
"learning_rate": 1.07495225227989e-05,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27251100540161133,
|
|
"step": 3015,
|
|
"valid_targets_mean": 2302.6,
|
|
"valid_targets_min": 815
|
|
},
|
|
{
|
|
"epoch": 4.824281150159744,
|
|
"grad_norm": 0.8644403776334407,
|
|
"learning_rate": 1.0678955548221595e-05,
|
|
"loss": 0.2973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37893515825271606,
|
|
"step": 3020,
|
|
"valid_targets_mean": 2601.3,
|
|
"valid_targets_min": 836
|
|
},
|
|
{
|
|
"epoch": 4.832268370607029,
|
|
"grad_norm": 0.8501540174202594,
|
|
"learning_rate": 1.0608536501582654e-05,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2843570113182068,
|
|
"step": 3025,
|
|
"valid_targets_mean": 2499.0,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 4.840255591054313,
|
|
"grad_norm": 0.7968617175192734,
|
|
"learning_rate": 1.0538266500454739e-05,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2686050534248352,
|
|
"step": 3030,
|
|
"valid_targets_mean": 2738.2,
|
|
"valid_targets_min": 1469
|
|
},
|
|
{
|
|
"epoch": 4.848242811501597,
|
|
"grad_norm": 0.8550616392428807,
|
|
"learning_rate": 1.0468146660045118e-05,
|
|
"loss": 0.2829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3118065297603607,
|
|
"step": 3035,
|
|
"valid_targets_mean": 3041.2,
|
|
"valid_targets_min": 852
|
|
},
|
|
{
|
|
"epoch": 4.856230031948882,
|
|
"grad_norm": 0.8840487595992875,
|
|
"learning_rate": 1.0398178093177928e-05,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2524608373641968,
|
|
"step": 3040,
|
|
"valid_targets_mean": 2148.9,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 4.864217252396166,
|
|
"grad_norm": 0.7726316919382694,
|
|
"learning_rate": 1.0328361910276592e-05,
|
|
"loss": 0.2807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2374812364578247,
|
|
"step": 3045,
|
|
"valid_targets_mean": 2670.1,
|
|
"valid_targets_min": 1077
|
|
},
|
|
{
|
|
"epoch": 4.872204472843451,
|
|
"grad_norm": 0.7221733174067081,
|
|
"learning_rate": 1.0258699219346091e-05,
|
|
"loss": 0.2785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25315260887145996,
|
|
"step": 3050,
|
|
"valid_targets_mean": 3247.1,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 4.880191693290735,
|
|
"grad_norm": 0.8621238277729003,
|
|
"learning_rate": 1.0189191125955503e-05,
|
|
"loss": 0.2609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26103848218917847,
|
|
"step": 3055,
|
|
"valid_targets_mean": 2417.1,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 4.888178913738019,
|
|
"grad_norm": 0.8193569680773535,
|
|
"learning_rate": 1.0119838733220342e-05,
|
|
"loss": 0.2951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29293161630630493,
|
|
"step": 3060,
|
|
"valid_targets_mean": 2769.2,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 4.896166134185304,
|
|
"grad_norm": 0.7915852195352976,
|
|
"learning_rate": 1.0050643141785148e-05,
|
|
"loss": 0.3021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30115994811058044,
|
|
"step": 3065,
|
|
"valid_targets_mean": 2920.8,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 4.904153354632588,
|
|
"grad_norm": 0.9670489315872343,
|
|
"learning_rate": 9.981605449805933e-06,
|
|
"loss": 0.2674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2954235076904297,
|
|
"step": 3070,
|
|
"valid_targets_mean": 1940.2,
|
|
"valid_targets_min": 1258
|
|
},
|
|
{
|
|
"epoch": 4.912140575079873,
|
|
"grad_norm": 0.7899179728796167,
|
|
"learning_rate": 9.912726752932823e-06,
|
|
"loss": 0.2805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26001036167144775,
|
|
"step": 3075,
|
|
"valid_targets_mean": 2557.6,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 4.920127795527156,
|
|
"grad_norm": 0.9230056664497797,
|
|
"learning_rate": 9.844008144292643e-06,
|
|
"loss": 0.2735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32020115852355957,
|
|
"step": 3080,
|
|
"valid_targets_mean": 2220.2,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 4.928115015974441,
|
|
"grad_norm": 0.9803620610079173,
|
|
"learning_rate": 9.775450714471537e-06,
|
|
"loss": 0.2667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2837762236595154,
|
|
"step": 3085,
|
|
"valid_targets_mean": 2024.2,
|
|
"valid_targets_min": 990
|
|
},
|
|
{
|
|
"epoch": 4.936102236421725,
|
|
"grad_norm": 0.8675063346612143,
|
|
"learning_rate": 9.707055551497734e-06,
|
|
"loss": 0.2628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26399123668670654,
|
|
"step": 3090,
|
|
"valid_targets_mean": 2561.4,
|
|
"valid_targets_min": 843
|
|
},
|
|
{
|
|
"epoch": 4.944089456869009,
|
|
"grad_norm": 0.8107971067612242,
|
|
"learning_rate": 9.638823740824177e-06,
|
|
"loss": 0.278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2534600496292114,
|
|
"step": 3095,
|
|
"valid_targets_mean": 2779.8,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 4.952076677316294,
|
|
"grad_norm": 0.9692179111808553,
|
|
"learning_rate": 9.570756365311418e-06,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31103652715682983,
|
|
"step": 3100,
|
|
"valid_targets_mean": 2178.6,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 4.960063897763578,
|
|
"grad_norm": 0.84447986941419,
|
|
"learning_rate": 9.502854505210326e-06,
|
|
"loss": 0.2712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23896771669387817,
|
|
"step": 3105,
|
|
"valid_targets_mean": 2097.9,
|
|
"valid_targets_min": 965
|
|
},
|
|
{
|
|
"epoch": 4.968051118210862,
|
|
"grad_norm": 0.9708909316517994,
|
|
"learning_rate": 9.435119238145018e-06,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2830391526222229,
|
|
"step": 3110,
|
|
"valid_targets_mean": 2120.6,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 4.976038338658147,
|
|
"grad_norm": 0.894039867094923,
|
|
"learning_rate": 9.367551639095704e-06,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2783415913581848,
|
|
"step": 3115,
|
|
"valid_targets_mean": 2279.3,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 4.984025559105431,
|
|
"grad_norm": 0.7936665061278042,
|
|
"learning_rate": 9.300152780381664e-06,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2718719244003296,
|
|
"step": 3120,
|
|
"valid_targets_mean": 2787.4,
|
|
"valid_targets_min": 1064
|
|
},
|
|
{
|
|
"epoch": 4.992012779552716,
|
|
"grad_norm": 0.9880275379180513,
|
|
"learning_rate": 9.232923731644216e-06,
|
|
"loss": 0.307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3306204676628113,
|
|
"step": 3125,
|
|
"valid_targets_mean": 2038.9,
|
|
"valid_targets_min": 1030
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.8026348160429615,
|
|
"learning_rate": 9.16586555982972e-06,
|
|
"loss": 0.2784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25479575991630554,
|
|
"step": 3130,
|
|
"valid_targets_mean": 2697.4,
|
|
"valid_targets_min": 852
|
|
},
|
|
{
|
|
"epoch": 5.007987220447284,
|
|
"grad_norm": 0.9984409654727774,
|
|
"learning_rate": 9.098979329172702e-06,
|
|
"loss": 0.2456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.267434298992157,
|
|
"step": 3135,
|
|
"valid_targets_mean": 1864.6,
|
|
"valid_targets_min": 791
|
|
},
|
|
{
|
|
"epoch": 5.015974440894569,
|
|
"grad_norm": 0.7210924204330762,
|
|
"learning_rate": 9.032266101178872e-06,
|
|
"loss": 0.261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.242093026638031,
|
|
"step": 3140,
|
|
"valid_targets_mean": 3319.1,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 5.023961661341853,
|
|
"grad_norm": 0.8676641236723069,
|
|
"learning_rate": 8.965726934608392e-06,
|
|
"loss": 0.2782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29204314947128296,
|
|
"step": 3145,
|
|
"valid_targets_mean": 2749.1,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 5.031948881789138,
|
|
"grad_norm": 0.9889980568404373,
|
|
"learning_rate": 8.899362885458964e-06,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25162041187286377,
|
|
"step": 3150,
|
|
"valid_targets_mean": 2334.8,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 5.039936102236422,
|
|
"grad_norm": 1.001247652788042,
|
|
"learning_rate": 8.833175006949165e-06,
|
|
"loss": 0.2607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26357948780059814,
|
|
"step": 3155,
|
|
"valid_targets_mean": 2165.5,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 5.047923322683706,
|
|
"grad_norm": 0.7888636802207886,
|
|
"learning_rate": 8.767164349501648e-06,
|
|
"loss": 0.2573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23624509572982788,
|
|
"step": 3160,
|
|
"valid_targets_mean": 2926.4,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 5.055910543130991,
|
|
"grad_norm": 0.9254680450509477,
|
|
"learning_rate": 8.70133196072654e-06,
|
|
"loss": 0.2571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23824426531791687,
|
|
"step": 3165,
|
|
"valid_targets_mean": 2029.6,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 5.063897763578275,
|
|
"grad_norm": 1.0532257511369474,
|
|
"learning_rate": 8.63567888540478e-06,
|
|
"loss": 0.2421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25464290380477905,
|
|
"step": 3170,
|
|
"valid_targets_mean": 2144.9,
|
|
"valid_targets_min": 946
|
|
},
|
|
{
|
|
"epoch": 5.0718849840255595,
|
|
"grad_norm": 0.868277883051457,
|
|
"learning_rate": 8.570206165471535e-06,
|
|
"loss": 0.2677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2385791540145874,
|
|
"step": 3175,
|
|
"valid_targets_mean": 2468.9,
|
|
"valid_targets_min": 1106
|
|
},
|
|
{
|
|
"epoch": 5.079872204472843,
|
|
"grad_norm": 0.8052362420449864,
|
|
"learning_rate": 8.504914839999691e-06,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2796471118927002,
|
|
"step": 3180,
|
|
"valid_targets_mean": 2850.3,
|
|
"valid_targets_min": 828
|
|
},
|
|
{
|
|
"epoch": 5.087859424920127,
|
|
"grad_norm": 0.9783300987290167,
|
|
"learning_rate": 8.439805945183333e-06,
|
|
"loss": 0.2482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23765051364898682,
|
|
"step": 3185,
|
|
"valid_targets_mean": 2368.3,
|
|
"valid_targets_min": 1016
|
|
},
|
|
{
|
|
"epoch": 5.095846645367412,
|
|
"grad_norm": 0.945762533566774,
|
|
"learning_rate": 8.374880514321318e-06,
|
|
"loss": 0.2548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26258566975593567,
|
|
"step": 3190,
|
|
"valid_targets_mean": 2205.4,
|
|
"valid_targets_min": 1010
|
|
},
|
|
{
|
|
"epoch": 5.103833865814696,
|
|
"grad_norm": 0.8865476941466789,
|
|
"learning_rate": 8.310139577800864e-06,
|
|
"loss": 0.255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26435157656669617,
|
|
"step": 3195,
|
|
"valid_targets_mean": 2617.2,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 5.111821086261981,
|
|
"grad_norm": 0.7619436172090799,
|
|
"learning_rate": 8.245584163081228e-06,
|
|
"loss": 0.2438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2356569766998291,
|
|
"step": 3200,
|
|
"valid_targets_mean": 3013.1,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 5.119808306709265,
|
|
"grad_norm": 0.9462210151843946,
|
|
"learning_rate": 8.18121529467735e-06,
|
|
"loss": 0.2559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2373546063899994,
|
|
"step": 3205,
|
|
"valid_targets_mean": 2540.6,
|
|
"valid_targets_min": 975
|
|
},
|
|
{
|
|
"epoch": 5.127795527156549,
|
|
"grad_norm": 0.9582110004966133,
|
|
"learning_rate": 8.117033994143635e-06,
|
|
"loss": 0.2542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26488399505615234,
|
|
"step": 3210,
|
|
"valid_targets_mean": 2027.8,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 5.135782747603834,
|
|
"grad_norm": 0.9708451925282411,
|
|
"learning_rate": 8.053041280057733e-06,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2965133786201477,
|
|
"step": 3215,
|
|
"valid_targets_mean": 1985.1,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 5.143769968051118,
|
|
"grad_norm": 0.8840411203479469,
|
|
"learning_rate": 7.989238168004347e-06,
|
|
"loss": 0.2505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23968303203582764,
|
|
"step": 3220,
|
|
"valid_targets_mean": 2409.7,
|
|
"valid_targets_min": 843
|
|
},
|
|
{
|
|
"epoch": 5.151757188498403,
|
|
"grad_norm": 0.8642600592726584,
|
|
"learning_rate": 7.925625670559158e-06,
|
|
"loss": 0.2543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24195238947868347,
|
|
"step": 3225,
|
|
"valid_targets_mean": 2520.4,
|
|
"valid_targets_min": 863
|
|
},
|
|
{
|
|
"epoch": 5.159744408945687,
|
|
"grad_norm": 0.9320240378244489,
|
|
"learning_rate": 7.862204797272716e-06,
|
|
"loss": 0.2448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.273721307516098,
|
|
"step": 3230,
|
|
"valid_targets_mean": 2702.1,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 5.167731629392971,
|
|
"grad_norm": 0.9603601717403545,
|
|
"learning_rate": 7.798976554654438e-06,
|
|
"loss": 0.2496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23371315002441406,
|
|
"step": 3235,
|
|
"valid_targets_mean": 2410.7,
|
|
"valid_targets_min": 1234
|
|
},
|
|
{
|
|
"epoch": 5.175718849840256,
|
|
"grad_norm": 1.0152844390683704,
|
|
"learning_rate": 7.73594194615663e-06,
|
|
"loss": 0.2636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2761167585849762,
|
|
"step": 3240,
|
|
"valid_targets_mean": 1806.8,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 5.18370607028754,
|
|
"grad_norm": 1.0307727900122543,
|
|
"learning_rate": 7.673101972158572e-06,
|
|
"loss": 0.2536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2694966793060303,
|
|
"step": 3245,
|
|
"valid_targets_mean": 1871.4,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 5.1916932907348246,
|
|
"grad_norm": 0.8811479320033861,
|
|
"learning_rate": 7.610457629950621e-06,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27138739824295044,
|
|
"step": 3250,
|
|
"valid_targets_mean": 2482.4,
|
|
"valid_targets_min": 1040
|
|
},
|
|
{
|
|
"epoch": 5.199680511182109,
|
|
"grad_norm": 0.8416754020093322,
|
|
"learning_rate": 7.548009913718402e-06,
|
|
"loss": 0.2574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2667473256587982,
|
|
"step": 3255,
|
|
"valid_targets_mean": 3078.2,
|
|
"valid_targets_min": 896
|
|
},
|
|
{
|
|
"epoch": 5.207667731629393,
|
|
"grad_norm": 0.9609521028258114,
|
|
"learning_rate": 7.485759814527034e-06,
|
|
"loss": 0.2681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27841615676879883,
|
|
"step": 3260,
|
|
"valid_targets_mean": 2239.9,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 5.215654952076678,
|
|
"grad_norm": 0.8944955613609264,
|
|
"learning_rate": 7.423708320305361e-06,
|
|
"loss": 0.2587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25388243794441223,
|
|
"step": 3265,
|
|
"valid_targets_mean": 2603.8,
|
|
"valid_targets_min": 1067
|
|
},
|
|
{
|
|
"epoch": 5.223642172523961,
|
|
"grad_norm": 0.911388064773831,
|
|
"learning_rate": 7.361856415830335e-06,
|
|
"loss": 0.2444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2451879382133484,
|
|
"step": 3270,
|
|
"valid_targets_mean": 2109.1,
|
|
"valid_targets_min": 984
|
|
},
|
|
{
|
|
"epoch": 5.231629392971246,
|
|
"grad_norm": 0.8402089780841747,
|
|
"learning_rate": 7.300205082711336e-06,
|
|
"loss": 0.2562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24045932292938232,
|
|
"step": 3275,
|
|
"valid_targets_mean": 2570.3,
|
|
"valid_targets_min": 1031
|
|
},
|
|
{
|
|
"epoch": 5.23961661341853,
|
|
"grad_norm": 0.8767255370131243,
|
|
"learning_rate": 7.2387552993746204e-06,
|
|
"loss": 0.2537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23550084233283997,
|
|
"step": 3280,
|
|
"valid_targets_mean": 2718.1,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 5.247603833865814,
|
|
"grad_norm": 0.8917951136912307,
|
|
"learning_rate": 7.177508041047769e-06,
|
|
"loss": 0.2412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22237886488437653,
|
|
"step": 3285,
|
|
"valid_targets_mean": 2052.4,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 5.255591054313099,
|
|
"grad_norm": 0.8483735817233142,
|
|
"learning_rate": 7.116464279744262e-06,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26985248923301697,
|
|
"step": 3290,
|
|
"valid_targets_mean": 2770.6,
|
|
"valid_targets_min": 1258
|
|
},
|
|
{
|
|
"epoch": 5.263578274760383,
|
|
"grad_norm": 0.8673402690069193,
|
|
"learning_rate": 7.055624984247977e-06,
|
|
"loss": 0.2517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23807567358016968,
|
|
"step": 3295,
|
|
"valid_targets_mean": 2807.6,
|
|
"valid_targets_min": 923
|
|
},
|
|
{
|
|
"epoch": 5.271565495207668,
|
|
"grad_norm": 0.7420933455820358,
|
|
"learning_rate": 6.9949911200978805e-06,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24933184683322906,
|
|
"step": 3300,
|
|
"valid_targets_mean": 3589.8,
|
|
"valid_targets_min": 1033
|
|
},
|
|
{
|
|
"epoch": 5.279552715654952,
|
|
"grad_norm": 0.7901863655194518,
|
|
"learning_rate": 6.934563649572678e-06,
|
|
"loss": 0.244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23379799723625183,
|
|
"step": 3305,
|
|
"valid_targets_mean": 2505.2,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 5.287539936102236,
|
|
"grad_norm": 0.8704433675136676,
|
|
"learning_rate": 6.874343531675521e-06,
|
|
"loss": 0.2535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2565191388130188,
|
|
"step": 3310,
|
|
"valid_targets_mean": 2484.4,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 5.295527156549521,
|
|
"grad_norm": 0.8464951643416478,
|
|
"learning_rate": 6.814331722118837e-06,
|
|
"loss": 0.2515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2558852434158325,
|
|
"step": 3315,
|
|
"valid_targets_mean": 2540.0,
|
|
"valid_targets_min": 1220
|
|
},
|
|
{
|
|
"epoch": 5.303514376996805,
|
|
"grad_norm": 0.8975534341695368,
|
|
"learning_rate": 6.754529173309112e-06,
|
|
"loss": 0.2498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24197977781295776,
|
|
"step": 3320,
|
|
"valid_targets_mean": 2569.1,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 5.31150159744409,
|
|
"grad_norm": 0.874994492011764,
|
|
"learning_rate": 6.694936834331809e-06,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2235998511314392,
|
|
"step": 3325,
|
|
"valid_targets_mean": 2819.4,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 5.319488817891374,
|
|
"grad_norm": 0.9768249301316632,
|
|
"learning_rate": 6.635555650936278e-06,
|
|
"loss": 0.2611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26266855001449585,
|
|
"step": 3330,
|
|
"valid_targets_mean": 2303.6,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 5.327476038338658,
|
|
"grad_norm": 0.7500050810545402,
|
|
"learning_rate": 6.576386565520794e-06,
|
|
"loss": 0.2362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19468601047992706,
|
|
"step": 3335,
|
|
"valid_targets_mean": 3173.2,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 5.335463258785943,
|
|
"grad_norm": 0.9621756501486439,
|
|
"learning_rate": 6.5174305171175336e-06,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29240113496780396,
|
|
"step": 3340,
|
|
"valid_targets_mean": 2822.6,
|
|
"valid_targets_min": 869
|
|
},
|
|
{
|
|
"epoch": 5.343450479233227,
|
|
"grad_norm": 0.9131294301147355,
|
|
"learning_rate": 6.458688441377734e-06,
|
|
"loss": 0.2684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2677621841430664,
|
|
"step": 3345,
|
|
"valid_targets_mean": 2254.9,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 5.3514376996805115,
|
|
"grad_norm": 0.8004711746369907,
|
|
"learning_rate": 6.400161270556817e-06,
|
|
"loss": 0.244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21414849162101746,
|
|
"step": 3350,
|
|
"valid_targets_mean": 2538.3,
|
|
"valid_targets_min": 967
|
|
},
|
|
{
|
|
"epoch": 5.359424920127796,
|
|
"grad_norm": 0.9730208737063787,
|
|
"learning_rate": 6.341849933499573e-06,
|
|
"loss": 0.2607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28051456809043884,
|
|
"step": 3355,
|
|
"valid_targets_mean": 2099.6,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 5.36741214057508,
|
|
"grad_norm": 0.9045679231575108,
|
|
"learning_rate": 6.283755355625472e-06,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24675720930099487,
|
|
"step": 3360,
|
|
"valid_targets_mean": 2298.3,
|
|
"valid_targets_min": 1434
|
|
},
|
|
{
|
|
"epoch": 5.375399361022364,
|
|
"grad_norm": 0.8429473050482944,
|
|
"learning_rate": 6.225878458913917e-06,
|
|
"loss": 0.2477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2369653731584549,
|
|
"step": 3365,
|
|
"valid_targets_mean": 2837.5,
|
|
"valid_targets_min": 1466
|
|
},
|
|
{
|
|
"epoch": 5.383386581469648,
|
|
"grad_norm": 0.9672143613062925,
|
|
"learning_rate": 6.168220161889678e-06,
|
|
"loss": 0.2661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3191109299659729,
|
|
"step": 3370,
|
|
"valid_targets_mean": 2316.6,
|
|
"valid_targets_min": 763
|
|
},
|
|
{
|
|
"epoch": 5.391373801916933,
|
|
"grad_norm": 0.9985363872437394,
|
|
"learning_rate": 6.110781379608226e-06,
|
|
"loss": 0.2632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.253881573677063,
|
|
"step": 3375,
|
|
"valid_targets_mean": 1944.8,
|
|
"valid_targets_min": 1090
|
|
},
|
|
{
|
|
"epoch": 5.399361022364217,
|
|
"grad_norm": 0.9025652061249814,
|
|
"learning_rate": 6.053563023641318e-06,
|
|
"loss": 0.246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31371989846229553,
|
|
"step": 3380,
|
|
"valid_targets_mean": 2459.3,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 5.407348242811501,
|
|
"grad_norm": 0.7866477902461534,
|
|
"learning_rate": 5.996566002062439e-06,
|
|
"loss": 0.2367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24636295437812805,
|
|
"step": 3385,
|
|
"valid_targets_mean": 2724.2,
|
|
"valid_targets_min": 1086
|
|
},
|
|
{
|
|
"epoch": 5.415335463258786,
|
|
"grad_norm": 1.0219342086135843,
|
|
"learning_rate": 5.939791219432442e-06,
|
|
"loss": 0.2629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26121723651885986,
|
|
"step": 3390,
|
|
"valid_targets_mean": 2359.4,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 5.42332268370607,
|
|
"grad_norm": 0.9364109908971551,
|
|
"learning_rate": 5.8832395767851846e-06,
|
|
"loss": 0.2443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26539772748947144,
|
|
"step": 3395,
|
|
"valid_targets_mean": 2413.3,
|
|
"valid_targets_min": 994
|
|
},
|
|
{
|
|
"epoch": 5.431309904153355,
|
|
"grad_norm": 0.8017689062021867,
|
|
"learning_rate": 5.826911971613198e-06,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27917709946632385,
|
|
"step": 3400,
|
|
"valid_targets_mean": 3180.4,
|
|
"valid_targets_min": 1248
|
|
},
|
|
{
|
|
"epoch": 5.439297124600639,
|
|
"grad_norm": 0.9372073854408419,
|
|
"learning_rate": 5.770809297853495e-06,
|
|
"loss": 0.2555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25993525981903076,
|
|
"step": 3405,
|
|
"valid_targets_mean": 2330.6,
|
|
"valid_targets_min": 487
|
|
},
|
|
{
|
|
"epoch": 5.447284345047923,
|
|
"grad_norm": 0.9241650949642555,
|
|
"learning_rate": 5.714932445873325e-06,
|
|
"loss": 0.2481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2268129289150238,
|
|
"step": 3410,
|
|
"valid_targets_mean": 2412.5,
|
|
"valid_targets_min": 1083
|
|
},
|
|
{
|
|
"epoch": 5.455271565495208,
|
|
"grad_norm": 0.8443088219051473,
|
|
"learning_rate": 5.659282302456115e-06,
|
|
"loss": 0.2569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21237555146217346,
|
|
"step": 3415,
|
|
"valid_targets_mean": 2640.7,
|
|
"valid_targets_min": 885
|
|
},
|
|
{
|
|
"epoch": 5.463258785942492,
|
|
"grad_norm": 0.8340979152010563,
|
|
"learning_rate": 5.603859750787302e-06,
|
|
"loss": 0.2719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23809310793876648,
|
|
"step": 3420,
|
|
"valid_targets_mean": 2788.4,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 5.4712460063897765,
|
|
"grad_norm": 0.8593192856256594,
|
|
"learning_rate": 5.548665670440418e-06,
|
|
"loss": 0.2525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2451857626438141,
|
|
"step": 3425,
|
|
"valid_targets_mean": 2449.3,
|
|
"valid_targets_min": 573
|
|
},
|
|
{
|
|
"epoch": 5.479233226837061,
|
|
"grad_norm": 0.8211780479711487,
|
|
"learning_rate": 5.4937009373630535e-06,
|
|
"loss": 0.2586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2296980768442154,
|
|
"step": 3430,
|
|
"valid_targets_mean": 2637.6,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 5.487220447284345,
|
|
"grad_norm": 0.8610691181994383,
|
|
"learning_rate": 5.438966423862997e-06,
|
|
"loss": 0.2416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23767513036727905,
|
|
"step": 3435,
|
|
"valid_targets_mean": 2469.8,
|
|
"valid_targets_min": 1173
|
|
},
|
|
{
|
|
"epoch": 5.49520766773163,
|
|
"grad_norm": 0.7789296060036187,
|
|
"learning_rate": 5.384462998594384e-06,
|
|
"loss": 0.2423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21838533878326416,
|
|
"step": 3440,
|
|
"valid_targets_mean": 2944.3,
|
|
"valid_targets_min": 789
|
|
},
|
|
{
|
|
"epoch": 5.503194888178914,
|
|
"grad_norm": 0.8597962521057116,
|
|
"learning_rate": 5.330191526543884e-06,
|
|
"loss": 0.2447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21452750265598297,
|
|
"step": 3445,
|
|
"valid_targets_mean": 2531.1,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 5.511182108626198,
|
|
"grad_norm": 0.9936385081090304,
|
|
"learning_rate": 5.276152869017028e-06,
|
|
"loss": 0.2644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28689122200012207,
|
|
"step": 3450,
|
|
"valid_targets_mean": 2130.8,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 5.519169329073483,
|
|
"grad_norm": 0.8160975964320262,
|
|
"learning_rate": 5.22234788362447e-06,
|
|
"loss": 0.2547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22707505524158478,
|
|
"step": 3455,
|
|
"valid_targets_mean": 2881.6,
|
|
"valid_targets_min": 802
|
|
},
|
|
{
|
|
"epoch": 5.527156549520766,
|
|
"grad_norm": 0.8552379235308805,
|
|
"learning_rate": 5.168777424268454e-06,
|
|
"loss": 0.2557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22769717872142792,
|
|
"step": 3460,
|
|
"valid_targets_mean": 2547.2,
|
|
"valid_targets_min": 1079
|
|
},
|
|
{
|
|
"epoch": 5.535143769968051,
|
|
"grad_norm": 0.8535863223696086,
|
|
"learning_rate": 5.115442341129171e-06,
|
|
"loss": 0.2572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24776683747768402,
|
|
"step": 3465,
|
|
"valid_targets_mean": 2497.9,
|
|
"valid_targets_min": 995
|
|
},
|
|
{
|
|
"epoch": 5.543130990415335,
|
|
"grad_norm": 0.9077931266856709,
|
|
"learning_rate": 5.062343480651364e-06,
|
|
"loss": 0.2502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.259856641292572,
|
|
"step": 3470,
|
|
"valid_targets_mean": 2449.8,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 5.55111821086262,
|
|
"grad_norm": 0.9025531560732482,
|
|
"learning_rate": 5.009481685530817e-06,
|
|
"loss": 0.2559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25770407915115356,
|
|
"step": 3475,
|
|
"valid_targets_mean": 2366.1,
|
|
"valid_targets_min": 873
|
|
},
|
|
{
|
|
"epoch": 5.559105431309904,
|
|
"grad_norm": 0.8392013544164802,
|
|
"learning_rate": 4.956857794701026e-06,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25676658749580383,
|
|
"step": 3480,
|
|
"valid_targets_mean": 3043.8,
|
|
"valid_targets_min": 1343
|
|
},
|
|
{
|
|
"epoch": 5.567092651757188,
|
|
"grad_norm": 0.841403387843343,
|
|
"learning_rate": 4.904472643319873e-06,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24472683668136597,
|
|
"step": 3485,
|
|
"valid_targets_mean": 2702.1,
|
|
"valid_targets_min": 1010
|
|
},
|
|
{
|
|
"epoch": 5.575079872204473,
|
|
"grad_norm": 0.8629123016856849,
|
|
"learning_rate": 4.852327062756352e-06,
|
|
"loss": 0.2419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2627047300338745,
|
|
"step": 3490,
|
|
"valid_targets_mean": 2467.8,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 5.583067092651757,
|
|
"grad_norm": 0.9115208149777416,
|
|
"learning_rate": 4.800421880577411e-06,
|
|
"loss": 0.2446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2582412362098694,
|
|
"step": 3495,
|
|
"valid_targets_mean": 2497.1,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 5.5910543130990416,
|
|
"grad_norm": 0.9634662919244513,
|
|
"learning_rate": 4.748757920534779e-06,
|
|
"loss": 0.244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2633087635040283,
|
|
"step": 3500,
|
|
"valid_targets_mean": 2750.6,
|
|
"valid_targets_min": 936
|
|
},
|
|
{
|
|
"epoch": 5.599041533546326,
|
|
"grad_norm": 0.7460111307739355,
|
|
"learning_rate": 4.697336002551947e-06,
|
|
"loss": 0.2468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31317317485809326,
|
|
"step": 3505,
|
|
"valid_targets_mean": 4107.6,
|
|
"valid_targets_min": 1100
|
|
},
|
|
{
|
|
"epoch": 5.60702875399361,
|
|
"grad_norm": 0.8932905418509555,
|
|
"learning_rate": 4.6461569427110684e-06,
|
|
"loss": 0.2312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.220320463180542,
|
|
"step": 3510,
|
|
"valid_targets_mean": 2182.1,
|
|
"valid_targets_min": 762
|
|
},
|
|
{
|
|
"epoch": 5.615015974440895,
|
|
"grad_norm": 0.9496690598262525,
|
|
"learning_rate": 4.5952215532401146e-06,
|
|
"loss": 0.266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2529815137386322,
|
|
"step": 3515,
|
|
"valid_targets_mean": 2132.8,
|
|
"valid_targets_min": 969
|
|
},
|
|
{
|
|
"epoch": 5.623003194888179,
|
|
"grad_norm": 0.8807720392260413,
|
|
"learning_rate": 4.544530642499894e-06,
|
|
"loss": 0.2516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2316727340221405,
|
|
"step": 3520,
|
|
"valid_targets_mean": 2462.5,
|
|
"valid_targets_min": 887
|
|
},
|
|
{
|
|
"epoch": 5.6309904153354635,
|
|
"grad_norm": 0.9496164171850976,
|
|
"learning_rate": 4.4940850149712765e-06,
|
|
"loss": 0.248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23457220196723938,
|
|
"step": 3525,
|
|
"valid_targets_mean": 2293.8,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 5.638977635782748,
|
|
"grad_norm": 0.9526135595398277,
|
|
"learning_rate": 4.443885471242418e-06,
|
|
"loss": 0.2546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2593719959259033,
|
|
"step": 3530,
|
|
"valid_targets_mean": 2115.4,
|
|
"valid_targets_min": 962
|
|
},
|
|
{
|
|
"epoch": 5.646964856230032,
|
|
"grad_norm": 0.8892261829855727,
|
|
"learning_rate": 4.393932807996017e-06,
|
|
"loss": 0.2536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23918218910694122,
|
|
"step": 3535,
|
|
"valid_targets_mean": 2403.1,
|
|
"valid_targets_min": 938
|
|
},
|
|
{
|
|
"epoch": 5.654952076677317,
|
|
"grad_norm": 0.8073984737541614,
|
|
"learning_rate": 4.344227817996735e-06,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2424803078174591,
|
|
"step": 3540,
|
|
"valid_targets_mean": 2618.7,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 5.6629392971246,
|
|
"grad_norm": 1.0158488299099124,
|
|
"learning_rate": 4.294771290078548e-06,
|
|
"loss": 0.2633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2657063603401184,
|
|
"step": 3545,
|
|
"valid_targets_mean": 2196.2,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 5.6709265175718855,
|
|
"grad_norm": 0.8600500290905609,
|
|
"learning_rate": 4.245564009132293e-06,
|
|
"loss": 0.2529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24257491528987885,
|
|
"step": 3550,
|
|
"valid_targets_mean": 2285.6,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 5.678913738019169,
|
|
"grad_norm": 0.8848385223863128,
|
|
"learning_rate": 4.196606756093138e-06,
|
|
"loss": 0.2475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2783249020576477,
|
|
"step": 3555,
|
|
"valid_targets_mean": 2516.8,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 5.686900958466453,
|
|
"grad_norm": 0.9213196754065445,
|
|
"learning_rate": 4.147900307928268e-06,
|
|
"loss": 0.26,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3013501763343811,
|
|
"step": 3560,
|
|
"valid_targets_mean": 2159.2,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 5.694888178913738,
|
|
"grad_norm": 1.0742240529129714,
|
|
"learning_rate": 4.099445437624487e-06,
|
|
"loss": 0.2481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2924937903881073,
|
|
"step": 3565,
|
|
"valid_targets_mean": 1965.3,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 5.702875399361022,
|
|
"grad_norm": 1.9154737668998056,
|
|
"learning_rate": 4.051242914175995e-06,
|
|
"loss": 0.2669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3011783957481384,
|
|
"step": 3570,
|
|
"valid_targets_mean": 2072.6,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 5.710862619808307,
|
|
"grad_norm": 0.919879281648549,
|
|
"learning_rate": 4.003293502572163e-06,
|
|
"loss": 0.2589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.267816424369812,
|
|
"step": 3575,
|
|
"valid_targets_mean": 2076.1,
|
|
"valid_targets_min": 968
|
|
},
|
|
{
|
|
"epoch": 5.718849840255591,
|
|
"grad_norm": 0.9200224478665081,
|
|
"learning_rate": 3.955597963785391e-06,
|
|
"loss": 0.2667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3260323107242584,
|
|
"step": 3580,
|
|
"valid_targets_mean": 2660.7,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 5.726837060702875,
|
|
"grad_norm": 0.8315574817494497,
|
|
"learning_rate": 3.908157054759048e-06,
|
|
"loss": 0.2394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22743427753448486,
|
|
"step": 3585,
|
|
"valid_targets_mean": 2462.6,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 5.73482428115016,
|
|
"grad_norm": 0.9116573730915999,
|
|
"learning_rate": 3.860971528395427e-06,
|
|
"loss": 0.2564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24943113327026367,
|
|
"step": 3590,
|
|
"valid_targets_mean": 2870.2,
|
|
"valid_targets_min": 1015
|
|
},
|
|
{
|
|
"epoch": 5.742811501597444,
|
|
"grad_norm": 0.9750378220643428,
|
|
"learning_rate": 3.814042133543847e-06,
|
|
"loss": 0.2633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26206859946250916,
|
|
"step": 3595,
|
|
"valid_targets_mean": 2222.9,
|
|
"valid_targets_min": 852
|
|
},
|
|
{
|
|
"epoch": 5.7507987220447285,
|
|
"grad_norm": 0.663578557829514,
|
|
"learning_rate": 3.7673696149887117e-06,
|
|
"loss": 0.2531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27504149079322815,
|
|
"step": 3600,
|
|
"valid_targets_mean": 3870.8,
|
|
"valid_targets_min": 1128
|
|
},
|
|
{
|
|
"epoch": 5.758785942492013,
|
|
"grad_norm": 0.9268241030937615,
|
|
"learning_rate": 3.72095471343773e-06,
|
|
"loss": 0.2573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2523951232433319,
|
|
"step": 3605,
|
|
"valid_targets_mean": 2134.8,
|
|
"valid_targets_min": 944
|
|
},
|
|
{
|
|
"epoch": 5.766773162939297,
|
|
"grad_norm": 0.8597090736572556,
|
|
"learning_rate": 3.674798165510136e-06,
|
|
"loss": 0.2422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22982367873191833,
|
|
"step": 3610,
|
|
"valid_targets_mean": 2599.7,
|
|
"valid_targets_min": 997
|
|
},
|
|
{
|
|
"epoch": 5.774760383386582,
|
|
"grad_norm": 1.0030268575438048,
|
|
"learning_rate": 3.6289007037250244e-06,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2791036367416382,
|
|
"step": 3615,
|
|
"valid_targets_mean": 2539.2,
|
|
"valid_targets_min": 962
|
|
},
|
|
{
|
|
"epoch": 5.782747603833866,
|
|
"grad_norm": 0.9462442511311907,
|
|
"learning_rate": 3.5832630564897073e-06,
|
|
"loss": 0.256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25895488262176514,
|
|
"step": 3620,
|
|
"valid_targets_mean": 2168.2,
|
|
"valid_targets_min": 1032
|
|
},
|
|
{
|
|
"epoch": 5.7907348242811505,
|
|
"grad_norm": 0.8397859195736948,
|
|
"learning_rate": 3.5378859480881443e-06,
|
|
"loss": 0.2498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2370823621749878,
|
|
"step": 3625,
|
|
"valid_targets_mean": 2972.4,
|
|
"valid_targets_min": 1290
|
|
},
|
|
{
|
|
"epoch": 5.798722044728435,
|
|
"grad_norm": 0.8192801914336669,
|
|
"learning_rate": 3.492770098669478e-06,
|
|
"loss": 0.263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27959752082824707,
|
|
"step": 3630,
|
|
"valid_targets_mean": 2899.1,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 5.806709265175719,
|
|
"grad_norm": 0.8789166051789172,
|
|
"learning_rate": 3.4479162242365717e-06,
|
|
"loss": 0.2458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23672571778297424,
|
|
"step": 3635,
|
|
"valid_targets_mean": 2428.1,
|
|
"valid_targets_min": 1157
|
|
},
|
|
{
|
|
"epoch": 5.814696485623003,
|
|
"grad_norm": 0.8655456379012354,
|
|
"learning_rate": 3.403325036634679e-06,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23506876826286316,
|
|
"step": 3640,
|
|
"valid_targets_mean": 2477.9,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 5.822683706070287,
|
|
"grad_norm": 0.8398952533889421,
|
|
"learning_rate": 3.3589972435401184e-06,
|
|
"loss": 0.2654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2275758981704712,
|
|
"step": 3645,
|
|
"valid_targets_mean": 3426.1,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 5.830670926517572,
|
|
"grad_norm": 0.7830677346461522,
|
|
"learning_rate": 3.3149335484490553e-06,
|
|
"loss": 0.2425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2350785732269287,
|
|
"step": 3650,
|
|
"valid_targets_mean": 2992.8,
|
|
"valid_targets_min": 1120
|
|
},
|
|
{
|
|
"epoch": 5.838658146964856,
|
|
"grad_norm": 0.8911091036200373,
|
|
"learning_rate": 3.2711346506663346e-06,
|
|
"loss": 0.2775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26126664876937866,
|
|
"step": 3655,
|
|
"valid_targets_mean": 2346.9,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 5.84664536741214,
|
|
"grad_norm": 0.8306420840154823,
|
|
"learning_rate": 3.2276012452943893e-06,
|
|
"loss": 0.259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24493958055973053,
|
|
"step": 3660,
|
|
"valid_targets_mean": 2616.6,
|
|
"valid_targets_min": 946
|
|
},
|
|
{
|
|
"epoch": 5.854632587859425,
|
|
"grad_norm": 0.9257290588636936,
|
|
"learning_rate": 3.1843340232222064e-06,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2718861699104309,
|
|
"step": 3665,
|
|
"valid_targets_mean": 2377.6,
|
|
"valid_targets_min": 1096
|
|
},
|
|
{
|
|
"epoch": 5.862619808306709,
|
|
"grad_norm": 0.8821921069993883,
|
|
"learning_rate": 3.1413336711143437e-06,
|
|
"loss": 0.2497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24770256876945496,
|
|
"step": 3670,
|
|
"valid_targets_mean": 2323.5,
|
|
"valid_targets_min": 1265
|
|
},
|
|
{
|
|
"epoch": 5.8706070287539935,
|
|
"grad_norm": 1.0668842413540396,
|
|
"learning_rate": 3.0986008714000703e-06,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24007384479045868,
|
|
"step": 3675,
|
|
"valid_targets_mean": 1841.8,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 5.878594249201278,
|
|
"grad_norm": 0.9681982405795014,
|
|
"learning_rate": 3.056136302262489e-06,
|
|
"loss": 0.2553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2783893942832947,
|
|
"step": 3680,
|
|
"valid_targets_mean": 2121.6,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 5.886581469648562,
|
|
"grad_norm": 1.0717402333248873,
|
|
"learning_rate": 3.0139406376278212e-06,
|
|
"loss": 0.2388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27632012963294983,
|
|
"step": 3685,
|
|
"valid_targets_mean": 1826.3,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 5.894568690095847,
|
|
"grad_norm": 0.8069760078124262,
|
|
"learning_rate": 2.972014547154671e-06,
|
|
"loss": 0.2537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2502431571483612,
|
|
"step": 3690,
|
|
"valid_targets_mean": 3109.2,
|
|
"valid_targets_min": 332
|
|
},
|
|
{
|
|
"epoch": 5.902555910543131,
|
|
"grad_norm": 0.8740738657297792,
|
|
"learning_rate": 2.930358696223423e-06,
|
|
"loss": 0.2678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2138509303331375,
|
|
"step": 3695,
|
|
"valid_targets_mean": 2511.0,
|
|
"valid_targets_min": 1194
|
|
},
|
|
{
|
|
"epoch": 5.9105431309904155,
|
|
"grad_norm": 0.7295110292762615,
|
|
"learning_rate": 2.8889737459256695e-06,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2911253571510315,
|
|
"step": 3700,
|
|
"valid_targets_mean": 3859.9,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 5.9185303514377,
|
|
"grad_norm": 0.8973287092421024,
|
|
"learning_rate": 2.8478603530537285e-06,
|
|
"loss": 0.261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26486295461654663,
|
|
"step": 3705,
|
|
"valid_targets_mean": 2324.5,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 5.926517571884984,
|
|
"grad_norm": 0.9249606806730015,
|
|
"learning_rate": 2.8070191700902194e-06,
|
|
"loss": 0.2766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25167542695999146,
|
|
"step": 3710,
|
|
"valid_targets_mean": 2308.0,
|
|
"valid_targets_min": 1076
|
|
},
|
|
{
|
|
"epoch": 5.934504792332269,
|
|
"grad_norm": 0.856096926340349,
|
|
"learning_rate": 2.7664508451976903e-06,
|
|
"loss": 0.2579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2853350043296814,
|
|
"step": 3715,
|
|
"valid_targets_mean": 2613.7,
|
|
"valid_targets_min": 997
|
|
},
|
|
{
|
|
"epoch": 5.942492012779553,
|
|
"grad_norm": 0.9195210859037573,
|
|
"learning_rate": 2.726156022208362e-06,
|
|
"loss": 0.2582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24616576731204987,
|
|
"step": 3720,
|
|
"valid_targets_mean": 2415.1,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 5.950479233226837,
|
|
"grad_norm": 0.9446518590073264,
|
|
"learning_rate": 2.6861353406138713e-06,
|
|
"loss": 0.262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26321879029273987,
|
|
"step": 3725,
|
|
"valid_targets_mean": 2201.5,
|
|
"valid_targets_min": 915
|
|
},
|
|
{
|
|
"epoch": 5.958466453674122,
|
|
"grad_norm": 1.0451604164179265,
|
|
"learning_rate": 2.646389435555172e-06,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2698940634727478,
|
|
"step": 3730,
|
|
"valid_targets_mean": 2079.9,
|
|
"valid_targets_min": 1037
|
|
},
|
|
{
|
|
"epoch": 5.966453674121405,
|
|
"grad_norm": 0.9579716279193797,
|
|
"learning_rate": 2.6069189378124015e-06,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2402516007423401,
|
|
"step": 3735,
|
|
"valid_targets_mean": 2024.8,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 5.97444089456869,
|
|
"grad_norm": 0.7833057288708513,
|
|
"learning_rate": 2.567724473794908e-06,
|
|
"loss": 0.2368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21557332575321198,
|
|
"step": 3740,
|
|
"valid_targets_mean": 2962.2,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 5.982428115015974,
|
|
"grad_norm": 0.965963064846103,
|
|
"learning_rate": 2.5288066655312914e-06,
|
|
"loss": 0.254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2886372208595276,
|
|
"step": 3745,
|
|
"valid_targets_mean": 2510.3,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 5.9904153354632586,
|
|
"grad_norm": 0.9391804377883105,
|
|
"learning_rate": 2.4901661306595414e-06,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2614560127258301,
|
|
"step": 3750,
|
|
"valid_targets_mean": 2197.0,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 5.998402555910543,
|
|
"grad_norm": 1.0080653934758697,
|
|
"learning_rate": 2.451803482417234e-06,
|
|
"loss": 0.2625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2837236523628235,
|
|
"step": 3755,
|
|
"valid_targets_mean": 1986.5,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 6.006389776357827,
|
|
"grad_norm": 0.9632247172126351,
|
|
"learning_rate": 2.413719329631785e-06,
|
|
"loss": 0.2419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26739567518234253,
|
|
"step": 3760,
|
|
"valid_targets_mean": 2014.3,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 6.014376996805112,
|
|
"grad_norm": 0.8890191388856584,
|
|
"learning_rate": 2.375914276710811e-06,
|
|
"loss": 0.2415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23384922742843628,
|
|
"step": 3765,
|
|
"valid_targets_mean": 2339.6,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 6.022364217252396,
|
|
"grad_norm": 0.9926339513234526,
|
|
"learning_rate": 2.338388923632513e-06,
|
|
"loss": 0.2442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26004475355148315,
|
|
"step": 3770,
|
|
"valid_targets_mean": 1988.1,
|
|
"valid_targets_min": 1208
|
|
},
|
|
{
|
|
"epoch": 6.0303514376996805,
|
|
"grad_norm": 0.9005345674455754,
|
|
"learning_rate": 2.3011438659361794e-06,
|
|
"loss": 0.2406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27311384677886963,
|
|
"step": 3775,
|
|
"valid_targets_mean": 2367.9,
|
|
"valid_targets_min": 1161
|
|
},
|
|
{
|
|
"epoch": 6.038338658146965,
|
|
"grad_norm": 0.9201644014529717,
|
|
"learning_rate": 2.2641796947127114e-06,
|
|
"loss": 0.2349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2286607027053833,
|
|
"step": 3780,
|
|
"valid_targets_mean": 2012.0,
|
|
"valid_targets_min": 968
|
|
},
|
|
{
|
|
"epoch": 6.046325878594249,
|
|
"grad_norm": 0.7815918012844706,
|
|
"learning_rate": 2.2274969965952553e-06,
|
|
"loss": 0.2196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20188505947589874,
|
|
"step": 3785,
|
|
"valid_targets_mean": 2824.0,
|
|
"valid_targets_min": 1048
|
|
},
|
|
{
|
|
"epoch": 6.054313099041534,
|
|
"grad_norm": 0.9490045790425659,
|
|
"learning_rate": 2.1910963537498887e-06,
|
|
"loss": 0.2442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.261133074760437,
|
|
"step": 3790,
|
|
"valid_targets_mean": 2339.1,
|
|
"valid_targets_min": 954
|
|
},
|
|
{
|
|
"epoch": 6.062300319488818,
|
|
"grad_norm": 0.9284852717548935,
|
|
"learning_rate": 2.1549783438663872e-06,
|
|
"loss": 0.2556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2526922821998596,
|
|
"step": 3795,
|
|
"valid_targets_mean": 2397.6,
|
|
"valid_targets_min": 863
|
|
},
|
|
{
|
|
"epoch": 6.0702875399361025,
|
|
"grad_norm": 0.8160712295928677,
|
|
"learning_rate": 2.1191435401490534e-06,
|
|
"loss": 0.2375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22126713395118713,
|
|
"step": 3800,
|
|
"valid_targets_mean": 2996.4,
|
|
"valid_targets_min": 817
|
|
},
|
|
{
|
|
"epoch": 6.078274760383387,
|
|
"grad_norm": 0.83179201769334,
|
|
"learning_rate": 2.0835925113076062e-06,
|
|
"loss": 0.2329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20945189893245697,
|
|
"step": 3805,
|
|
"valid_targets_mean": 2912.8,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 6.086261980830671,
|
|
"grad_norm": 0.9003596687495238,
|
|
"learning_rate": 2.0483258215481784e-06,
|
|
"loss": 0.2388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2199634313583374,
|
|
"step": 3810,
|
|
"valid_targets_mean": 2487.0,
|
|
"valid_targets_min": 1121
|
|
},
|
|
{
|
|
"epoch": 6.094249201277956,
|
|
"grad_norm": 0.9018603242566148,
|
|
"learning_rate": 2.0133440305643413e-06,
|
|
"loss": 0.2364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2727838158607483,
|
|
"step": 3815,
|
|
"valid_targets_mean": 2617.8,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 6.102236421725239,
|
|
"grad_norm": 0.7983246728498299,
|
|
"learning_rate": 1.9786476935282463e-06,
|
|
"loss": 0.2447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20425739884376526,
|
|
"step": 3820,
|
|
"valid_targets_mean": 2734.9,
|
|
"valid_targets_min": 1047
|
|
},
|
|
{
|
|
"epoch": 6.110223642172524,
|
|
"grad_norm": 0.9312526436968155,
|
|
"learning_rate": 1.944237361081782e-06,
|
|
"loss": 0.2435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22892646491527557,
|
|
"step": 3825,
|
|
"valid_targets_mean": 2336.8,
|
|
"valid_targets_min": 898
|
|
},
|
|
{
|
|
"epoch": 6.118210862619808,
|
|
"grad_norm": 0.9360575450858202,
|
|
"learning_rate": 1.9101135793278746e-06,
|
|
"loss": 0.2445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24663832783699036,
|
|
"step": 3830,
|
|
"valid_targets_mean": 2228.1,
|
|
"valid_targets_min": 1266
|
|
},
|
|
{
|
|
"epoch": 6.126198083067092,
|
|
"grad_norm": 0.8843059074013209,
|
|
"learning_rate": 1.8762768898217732e-06,
|
|
"loss": 0.2419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21915936470031738,
|
|
"step": 3835,
|
|
"valid_targets_mean": 2568.1,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 6.134185303514377,
|
|
"grad_norm": 0.8676199800487951,
|
|
"learning_rate": 1.8427278295625006e-06,
|
|
"loss": 0.2504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23707064986228943,
|
|
"step": 3840,
|
|
"valid_targets_mean": 2802.5,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 6.142172523961661,
|
|
"grad_norm": 0.8799686139341122,
|
|
"learning_rate": 1.8094669309843161e-06,
|
|
"loss": 0.2463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.266718327999115,
|
|
"step": 3845,
|
|
"valid_targets_mean": 2862.2,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 6.1501597444089455,
|
|
"grad_norm": 0.8928460656861622,
|
|
"learning_rate": 1.776494721948241e-06,
|
|
"loss": 0.2458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24365708231925964,
|
|
"step": 3850,
|
|
"valid_targets_mean": 2502.1,
|
|
"valid_targets_min": 1150
|
|
},
|
|
{
|
|
"epoch": 6.15814696485623,
|
|
"grad_norm": 0.8883551694275704,
|
|
"learning_rate": 1.7438117257337239e-06,
|
|
"loss": 0.2441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23181907832622528,
|
|
"step": 3855,
|
|
"valid_targets_mean": 2744.6,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 6.166134185303514,
|
|
"grad_norm": 0.9965470234578769,
|
|
"learning_rate": 1.7114184610302964e-06,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29200488328933716,
|
|
"step": 3860,
|
|
"valid_targets_mean": 2315.3,
|
|
"valid_targets_min": 967
|
|
},
|
|
{
|
|
"epoch": 6.174121405750799,
|
|
"grad_norm": 0.9133400414913558,
|
|
"learning_rate": 1.67931544192937e-06,
|
|
"loss": 0.2384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2366451472043991,
|
|
"step": 3865,
|
|
"valid_targets_mean": 2383.8,
|
|
"valid_targets_min": 903
|
|
},
|
|
{
|
|
"epoch": 6.182108626198083,
|
|
"grad_norm": 0.7743359832824281,
|
|
"learning_rate": 1.6475031779160611e-06,
|
|
"loss": 0.2426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23672086000442505,
|
|
"step": 3870,
|
|
"valid_targets_mean": 3126.1,
|
|
"valid_targets_min": 1006
|
|
},
|
|
{
|
|
"epoch": 6.1900958466453675,
|
|
"grad_norm": 0.8518052706670712,
|
|
"learning_rate": 1.6159821738611192e-06,
|
|
"loss": 0.2398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.242468923330307,
|
|
"step": 3875,
|
|
"valid_targets_mean": 2862.4,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 6.198083067092652,
|
|
"grad_norm": 1.0026666720024844,
|
|
"learning_rate": 1.5847529300128827e-06,
|
|
"loss": 0.2363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22624211013317108,
|
|
"step": 3880,
|
|
"valid_targets_mean": 1964.4,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 6.206070287539936,
|
|
"grad_norm": 1.1939436285875955,
|
|
"learning_rate": 1.5538159419893895e-06,
|
|
"loss": 0.2432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23717725276947021,
|
|
"step": 3885,
|
|
"valid_targets_mean": 2144.8,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 6.214057507987221,
|
|
"grad_norm": 0.962852802311202,
|
|
"learning_rate": 1.5231717007704738e-06,
|
|
"loss": 0.246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2280762940645218,
|
|
"step": 3890,
|
|
"valid_targets_mean": 2061.6,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 6.222044728434505,
|
|
"grad_norm": 0.8387987730662515,
|
|
"learning_rate": 1.4928206926899801e-06,
|
|
"loss": 0.2572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25052428245544434,
|
|
"step": 3895,
|
|
"valid_targets_mean": 3109.9,
|
|
"valid_targets_min": 1183
|
|
},
|
|
{
|
|
"epoch": 6.2300319488817895,
|
|
"grad_norm": 0.9493125968496499,
|
|
"learning_rate": 1.4627633994280599e-06,
|
|
"loss": 0.2418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25812265276908875,
|
|
"step": 3900,
|
|
"valid_targets_mean": 2274.2,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 6.238019169329074,
|
|
"grad_norm": 0.917800543033863,
|
|
"learning_rate": 1.433000298003504e-06,
|
|
"loss": 0.2364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22898101806640625,
|
|
"step": 3905,
|
|
"valid_targets_mean": 2279.3,
|
|
"valid_targets_min": 852
|
|
},
|
|
{
|
|
"epoch": 6.246006389776358,
|
|
"grad_norm": 0.9681735070427119,
|
|
"learning_rate": 1.4035318607662029e-06,
|
|
"loss": 0.2261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21738824248313904,
|
|
"step": 3910,
|
|
"valid_targets_mean": 2210.4,
|
|
"valid_targets_min": 990
|
|
},
|
|
{
|
|
"epoch": 6.253993610223642,
|
|
"grad_norm": 0.9401530434341564,
|
|
"learning_rate": 1.3743585553896144e-06,
|
|
"loss": 0.2297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2129175364971161,
|
|
"step": 3915,
|
|
"valid_targets_mean": 2104.7,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 6.261980830670926,
|
|
"grad_norm": 0.9557313306798625,
|
|
"learning_rate": 1.345480844863376e-06,
|
|
"loss": 0.2518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.233766570687294,
|
|
"step": 3920,
|
|
"valid_targets_mean": 2427.4,
|
|
"valid_targets_min": 857
|
|
},
|
|
{
|
|
"epoch": 6.2699680511182105,
|
|
"grad_norm": 0.8330004742826701,
|
|
"learning_rate": 1.316899187485925e-06,
|
|
"loss": 0.2319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2225666642189026,
|
|
"step": 3925,
|
|
"valid_targets_mean": 2616.0,
|
|
"valid_targets_min": 1157
|
|
},
|
|
{
|
|
"epoch": 6.277955271565495,
|
|
"grad_norm": 0.8745710874858222,
|
|
"learning_rate": 1.288614036857252e-06,
|
|
"loss": 0.2503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22704851627349854,
|
|
"step": 3930,
|
|
"valid_targets_mean": 2428.9,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 6.285942492012779,
|
|
"grad_norm": 1.016378447629092,
|
|
"learning_rate": 1.260625841871692e-06,
|
|
"loss": 0.2374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22519497573375702,
|
|
"step": 3935,
|
|
"valid_targets_mean": 1774.1,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 6.293929712460064,
|
|
"grad_norm": 0.8911204447463723,
|
|
"learning_rate": 1.2329350467107925e-06,
|
|
"loss": 0.2298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23978692293167114,
|
|
"step": 3940,
|
|
"valid_targets_mean": 2488.6,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 6.301916932907348,
|
|
"grad_norm": 0.9384637059303671,
|
|
"learning_rate": 1.2055420908362781e-06,
|
|
"loss": 0.2328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26262155175209045,
|
|
"step": 3945,
|
|
"valid_targets_mean": 2743.2,
|
|
"valid_targets_min": 1260
|
|
},
|
|
{
|
|
"epoch": 6.3099041533546325,
|
|
"grad_norm": 0.8404271139111975,
|
|
"learning_rate": 1.1784474089830612e-06,
|
|
"loss": 0.2316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2195809781551361,
|
|
"step": 3950,
|
|
"valid_targets_mean": 2586.0,
|
|
"valid_targets_min": 1310
|
|
},
|
|
{
|
|
"epoch": 6.317891373801917,
|
|
"grad_norm": 1.056313995412622,
|
|
"learning_rate": 1.1516514311523607e-06,
|
|
"loss": 0.2165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23776283860206604,
|
|
"step": 3955,
|
|
"valid_targets_mean": 1689.2,
|
|
"valid_targets_min": 975
|
|
},
|
|
{
|
|
"epoch": 6.325878594249201,
|
|
"grad_norm": 1.002342222526865,
|
|
"learning_rate": 1.1251545826048593e-06,
|
|
"loss": 0.2502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2403024286031723,
|
|
"step": 3960,
|
|
"valid_targets_mean": 2123.3,
|
|
"valid_targets_min": 1090
|
|
},
|
|
{
|
|
"epoch": 6.333865814696486,
|
|
"grad_norm": 0.830448399146461,
|
|
"learning_rate": 1.098957283853972e-06,
|
|
"loss": 0.2394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3029744029045105,
|
|
"step": 3965,
|
|
"valid_targets_mean": 2784.8,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 6.34185303514377,
|
|
"grad_norm": 0.8864188221559118,
|
|
"learning_rate": 1.0730599506591476e-06,
|
|
"loss": 0.2342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2148996740579605,
|
|
"step": 3970,
|
|
"valid_targets_mean": 2750.1,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 6.3498402555910545,
|
|
"grad_norm": 0.9174262718965588,
|
|
"learning_rate": 1.0474629940192994e-06,
|
|
"loss": 0.2436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21212556958198547,
|
|
"step": 3975,
|
|
"valid_targets_mean": 2411.8,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 6.357827476038339,
|
|
"grad_norm": 0.8127995558518768,
|
|
"learning_rate": 1.0221668201662726e-06,
|
|
"loss": 0.2367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2090303599834442,
|
|
"step": 3980,
|
|
"valid_targets_mean": 3072.2,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 6.365814696485623,
|
|
"grad_norm": 0.905770044521917,
|
|
"learning_rate": 9.971718305583767e-07,
|
|
"loss": 0.2574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23624351620674133,
|
|
"step": 3985,
|
|
"valid_targets_mean": 2599.6,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 6.373801916932908,
|
|
"grad_norm": 0.9718186732526326,
|
|
"learning_rate": 9.724784218740524e-07,
|
|
"loss": 0.2432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26286351680755615,
|
|
"step": 3990,
|
|
"valid_targets_mean": 2735.8,
|
|
"valid_targets_min": 1412
|
|
},
|
|
{
|
|
"epoch": 6.381789137380192,
|
|
"grad_norm": 1.077474634014029,
|
|
"learning_rate": 9.480869860055364e-07,
|
|
"loss": 0.2389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2652905285358429,
|
|
"step": 3995,
|
|
"valid_targets_mean": 1776.4,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 6.389776357827476,
|
|
"grad_norm": 0.9296621473958933,
|
|
"learning_rate": 9.239979100526763e-07,
|
|
"loss": 0.2595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24250204861164093,
|
|
"step": 4000,
|
|
"valid_targets_mean": 2510.6,
|
|
"valid_targets_min": 1103
|
|
},
|
|
{
|
|
"epoch": 6.397763578274761,
|
|
"grad_norm": 1.1250884742008305,
|
|
"learning_rate": 9.002115763167585e-07,
|
|
"loss": 0.2367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22527346014976501,
|
|
"step": 4005,
|
|
"valid_targets_mean": 2233.4,
|
|
"valid_targets_min": 884
|
|
},
|
|
{
|
|
"epoch": 6.405750798722044,
|
|
"grad_norm": 0.9196302868842036,
|
|
"learning_rate": 8.7672836229447e-07,
|
|
"loss": 0.247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24475860595703125,
|
|
"step": 4010,
|
|
"valid_targets_mean": 2932.3,
|
|
"valid_targets_min": 1100
|
|
},
|
|
{
|
|
"epoch": 6.413738019169329,
|
|
"grad_norm": 0.9908528061372956,
|
|
"learning_rate": 8.535486406718684e-07,
|
|
"loss": 0.2415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23507331311702728,
|
|
"step": 4015,
|
|
"valid_targets_mean": 1860.1,
|
|
"valid_targets_min": 817
|
|
},
|
|
{
|
|
"epoch": 6.421725239616613,
|
|
"grad_norm": 0.9940213018240565,
|
|
"learning_rate": 8.306727793185132e-07,
|
|
"loss": 0.2353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25933533906936646,
|
|
"step": 4020,
|
|
"valid_targets_mean": 2310.2,
|
|
"valid_targets_min": 864
|
|
},
|
|
{
|
|
"epoch": 6.4297124600638975,
|
|
"grad_norm": 0.9987001345739963,
|
|
"learning_rate": 8.081011412815965e-07,
|
|
"loss": 0.2392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2533406913280487,
|
|
"step": 4025,
|
|
"valid_targets_mean": 2122.0,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 6.437699680511182,
|
|
"grad_norm": 1.0062751887947134,
|
|
"learning_rate": 7.858340847801815e-07,
|
|
"loss": 0.2358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24631676077842712,
|
|
"step": 4030,
|
|
"valid_targets_mean": 2114.1,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 6.445686900958466,
|
|
"grad_norm": 1.0531961363418418,
|
|
"learning_rate": 7.638719631995406e-07,
|
|
"loss": 0.2411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25359535217285156,
|
|
"step": 4035,
|
|
"valid_targets_mean": 2071.1,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 6.453674121405751,
|
|
"grad_norm": 1.029605421438356,
|
|
"learning_rate": 7.422151250855214e-07,
|
|
"loss": 0.2518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2815782427787781,
|
|
"step": 4040,
|
|
"valid_targets_mean": 1942.4,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 6.461661341853035,
|
|
"grad_norm": 0.9218806469344648,
|
|
"learning_rate": 7.208639141390295e-07,
|
|
"loss": 0.2436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2516721189022064,
|
|
"step": 4045,
|
|
"valid_targets_mean": 2460.7,
|
|
"valid_targets_min": 1143
|
|
},
|
|
{
|
|
"epoch": 6.4696485623003195,
|
|
"grad_norm": 0.9369111075654014,
|
|
"learning_rate": 6.998186692105657e-07,
|
|
"loss": 0.2392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2131759226322174,
|
|
"step": 4050,
|
|
"valid_targets_mean": 2042.4,
|
|
"valid_targets_min": 928
|
|
},
|
|
{
|
|
"epoch": 6.477635782747604,
|
|
"grad_norm": 0.9086360521365513,
|
|
"learning_rate": 6.790797242948644e-07,
|
|
"loss": 0.2337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21078777313232422,
|
|
"step": 4055,
|
|
"valid_targets_mean": 2243.2,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 6.485623003194888,
|
|
"grad_norm": 0.6727980396495065,
|
|
"learning_rate": 6.5864740852557e-07,
|
|
"loss": 0.2454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20258353650569916,
|
|
"step": 4060,
|
|
"valid_targets_mean": 3326.2,
|
|
"valid_targets_min": 993
|
|
},
|
|
{
|
|
"epoch": 6.493610223642173,
|
|
"grad_norm": 0.7690713340433537,
|
|
"learning_rate": 6.385220461700248e-07,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3117547631263733,
|
|
"step": 4065,
|
|
"valid_targets_mean": 3799.6,
|
|
"valid_targets_min": 1028
|
|
},
|
|
{
|
|
"epoch": 6.501597444089457,
|
|
"grad_norm": 1.0155980823674935,
|
|
"learning_rate": 6.187039566241337e-07,
|
|
"loss": 0.2575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24349287152290344,
|
|
"step": 4070,
|
|
"valid_targets_mean": 2244.8,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 6.5095846645367414,
|
|
"grad_norm": 0.8286002848204801,
|
|
"learning_rate": 5.99193454407272e-07,
|
|
"loss": 0.228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2468494474887848,
|
|
"step": 4075,
|
|
"valid_targets_mean": 3158.1,
|
|
"valid_targets_min": 1108
|
|
},
|
|
{
|
|
"epoch": 6.517571884984026,
|
|
"grad_norm": 0.7698708148895567,
|
|
"learning_rate": 5.799908491573148e-07,
|
|
"loss": 0.2248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18109333515167236,
|
|
"step": 4080,
|
|
"valid_targets_mean": 2786.1,
|
|
"valid_targets_min": 920
|
|
},
|
|
{
|
|
"epoch": 6.52555910543131,
|
|
"grad_norm": 0.8878008898046107,
|
|
"learning_rate": 5.610964456257107e-07,
|
|
"loss": 0.2444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22565975785255432,
|
|
"step": 4085,
|
|
"valid_targets_mean": 2502.8,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 6.533546325878595,
|
|
"grad_norm": 0.9444790330875649,
|
|
"learning_rate": 5.425105436726496e-07,
|
|
"loss": 0.2565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2697492837905884,
|
|
"step": 4090,
|
|
"valid_targets_mean": 2552.1,
|
|
"valid_targets_min": 947
|
|
},
|
|
{
|
|
"epoch": 6.541533546325878,
|
|
"grad_norm": 0.8528217500431875,
|
|
"learning_rate": 5.242334382623004e-07,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23368576169013977,
|
|
"step": 4095,
|
|
"valid_targets_mean": 3131.2,
|
|
"valid_targets_min": 1284
|
|
},
|
|
{
|
|
"epoch": 6.549520766773163,
|
|
"grad_norm": 1.0917160577779512,
|
|
"learning_rate": 5.062654194581429e-07,
|
|
"loss": 0.2317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26486602425575256,
|
|
"step": 4100,
|
|
"valid_targets_mean": 2556.1,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 6.557507987220447,
|
|
"grad_norm": 0.9513941499835525,
|
|
"learning_rate": 4.88606772418343e-07,
|
|
"loss": 0.2441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25878143310546875,
|
|
"step": 4105,
|
|
"valid_targets_mean": 2213.4,
|
|
"valid_targets_min": 848
|
|
},
|
|
{
|
|
"epoch": 6.565495207667731,
|
|
"grad_norm": 0.9712833621705644,
|
|
"learning_rate": 4.7125777739123857e-07,
|
|
"loss": 0.2575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2624114155769348,
|
|
"step": 4110,
|
|
"valid_targets_mean": 2850.8,
|
|
"valid_targets_min": 1074
|
|
},
|
|
{
|
|
"epoch": 6.573482428115016,
|
|
"grad_norm": 0.8359966186178622,
|
|
"learning_rate": 4.54218709710903e-07,
|
|
"loss": 0.243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22370928525924683,
|
|
"step": 4115,
|
|
"valid_targets_mean": 3036.8,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 6.5814696485623,
|
|
"grad_norm": 1.116751506576769,
|
|
"learning_rate": 4.374898397927507e-07,
|
|
"loss": 0.2488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2632787227630615,
|
|
"step": 4120,
|
|
"valid_targets_mean": 2270.2,
|
|
"valid_targets_min": 1044
|
|
},
|
|
{
|
|
"epoch": 6.5894568690095845,
|
|
"grad_norm": 0.7839522175334181,
|
|
"learning_rate": 4.210714331292698e-07,
|
|
"loss": 0.2458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22444558143615723,
|
|
"step": 4125,
|
|
"valid_targets_mean": 2873.8,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 6.597444089456869,
|
|
"grad_norm": 0.8192004007957313,
|
|
"learning_rate": 4.049637502857895e-07,
|
|
"loss": 0.2375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2257654070854187,
|
|
"step": 4130,
|
|
"valid_targets_mean": 2757.2,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 6.605431309904153,
|
|
"grad_norm": 1.0055147301867686,
|
|
"learning_rate": 3.8916704689635707e-07,
|
|
"loss": 0.2637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25974178314208984,
|
|
"step": 4135,
|
|
"valid_targets_mean": 2147.9,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 6.613418530351438,
|
|
"grad_norm": 0.9908768182262211,
|
|
"learning_rate": 3.736815736596766e-07,
|
|
"loss": 0.2397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26805776357650757,
|
|
"step": 4140,
|
|
"valid_targets_mean": 2234.4,
|
|
"valid_targets_min": 1105
|
|
},
|
|
{
|
|
"epoch": 6.621405750798722,
|
|
"grad_norm": 1.0629337180937135,
|
|
"learning_rate": 3.5850757633513424e-07,
|
|
"loss": 0.2676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2562737464904785,
|
|
"step": 4145,
|
|
"valid_targets_mean": 1843.2,
|
|
"valid_targets_min": 873
|
|
},
|
|
{
|
|
"epoch": 6.6293929712460065,
|
|
"grad_norm": 1.01013095398439,
|
|
"learning_rate": 3.4364529573888803e-07,
|
|
"loss": 0.2715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3184455633163452,
|
|
"step": 4150,
|
|
"valid_targets_mean": 2527.1,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 6.637380191693291,
|
|
"grad_norm": 0.9054349670643587,
|
|
"learning_rate": 3.2909496774005344e-07,
|
|
"loss": 0.2338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22111965715885162,
|
|
"step": 4155,
|
|
"valid_targets_mean": 2575.2,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 6.645367412140575,
|
|
"grad_norm": 0.8058994716798603,
|
|
"learning_rate": 3.14856823256966e-07,
|
|
"loss": 0.225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20591410994529724,
|
|
"step": 4160,
|
|
"valid_targets_mean": 3162.1,
|
|
"valid_targets_min": 1297
|
|
},
|
|
{
|
|
"epoch": 6.65335463258786,
|
|
"grad_norm": 0.9354742656687647,
|
|
"learning_rate": 3.009310882534999e-07,
|
|
"loss": 0.2371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23457807302474976,
|
|
"step": 4165,
|
|
"valid_targets_mean": 2312.8,
|
|
"valid_targets_min": 1046
|
|
},
|
|
{
|
|
"epoch": 6.661341853035144,
|
|
"grad_norm": 1.101679129259534,
|
|
"learning_rate": 2.8731798373550004e-07,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2572144865989685,
|
|
"step": 4170,
|
|
"valid_targets_mean": 1970.4,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 6.669329073482428,
|
|
"grad_norm": 0.7642209408361453,
|
|
"learning_rate": 2.740177257472576e-07,
|
|
"loss": 0.2294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23325081169605255,
|
|
"step": 4175,
|
|
"valid_targets_mean": 3710.4,
|
|
"valid_targets_min": 912
|
|
},
|
|
{
|
|
"epoch": 6.677316293929713,
|
|
"grad_norm": 0.972235265028549,
|
|
"learning_rate": 2.6103052536810226e-07,
|
|
"loss": 0.2379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22849571704864502,
|
|
"step": 4180,
|
|
"valid_targets_mean": 2155.2,
|
|
"valid_targets_min": 1326
|
|
},
|
|
{
|
|
"epoch": 6.685303514376997,
|
|
"grad_norm": 0.9521708064905643,
|
|
"learning_rate": 2.4835658870902226e-07,
|
|
"loss": 0.2533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26424315571784973,
|
|
"step": 4185,
|
|
"valid_targets_mean": 2548.5,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 6.693290734824281,
|
|
"grad_norm": 1.0437622468013392,
|
|
"learning_rate": 2.3599611690943158e-07,
|
|
"loss": 0.2448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24869664013385773,
|
|
"step": 4190,
|
|
"valid_targets_mean": 1844.8,
|
|
"valid_targets_min": 898
|
|
},
|
|
{
|
|
"epoch": 6.701277955271565,
|
|
"grad_norm": 0.7512504474570952,
|
|
"learning_rate": 2.2394930613393927e-07,
|
|
"loss": 0.2443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31568941473960876,
|
|
"step": 4195,
|
|
"valid_targets_mean": 3776.8,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 6.7092651757188495,
|
|
"grad_norm": 0.8547191061259991,
|
|
"learning_rate": 2.122163475692629e-07,
|
|
"loss": 0.232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2133883535861969,
|
|
"step": 4200,
|
|
"valid_targets_mean": 2902.8,
|
|
"valid_targets_min": 894
|
|
},
|
|
{
|
|
"epoch": 6.717252396166134,
|
|
"grad_norm": 0.9456003226855599,
|
|
"learning_rate": 2.0079742742118878e-07,
|
|
"loss": 0.2555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26655298471450806,
|
|
"step": 4205,
|
|
"valid_targets_mean": 2182.9,
|
|
"valid_targets_min": 1048
|
|
},
|
|
{
|
|
"epoch": 6.725239616613418,
|
|
"grad_norm": 0.9518496277348535,
|
|
"learning_rate": 1.8969272691160334e-07,
|
|
"loss": 0.245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23761367797851562,
|
|
"step": 4210,
|
|
"valid_targets_mean": 2798.6,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 6.733226837060703,
|
|
"grad_norm": 0.8516839946435172,
|
|
"learning_rate": 1.789024222756397e-07,
|
|
"loss": 0.2443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28636062145233154,
|
|
"step": 4215,
|
|
"valid_targets_mean": 2905.1,
|
|
"valid_targets_min": 786
|
|
},
|
|
{
|
|
"epoch": 6.741214057507987,
|
|
"grad_norm": 1.0787658150641894,
|
|
"learning_rate": 1.6842668475885782e-07,
|
|
"loss": 0.2388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24646592140197754,
|
|
"step": 4220,
|
|
"valid_targets_mean": 2078.8,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 6.7492012779552715,
|
|
"grad_norm": 0.8958647617588655,
|
|
"learning_rate": 1.582656806145444e-07,
|
|
"loss": 0.2481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21935656666755676,
|
|
"step": 4225,
|
|
"valid_targets_mean": 2540.3,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 6.757188498402556,
|
|
"grad_norm": 0.8402067750956428,
|
|
"learning_rate": 1.4841957110106388e-07,
|
|
"loss": 0.2349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22793927788734436,
|
|
"step": 4230,
|
|
"valid_targets_mean": 2859.2,
|
|
"valid_targets_min": 1231
|
|
},
|
|
{
|
|
"epoch": 6.76517571884984,
|
|
"grad_norm": 0.9869942615239213,
|
|
"learning_rate": 1.388885124793049e-07,
|
|
"loss": 0.246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24310404062271118,
|
|
"step": 4235,
|
|
"valid_targets_mean": 1987.7,
|
|
"valid_targets_min": 802
|
|
},
|
|
{
|
|
"epoch": 6.773162939297125,
|
|
"grad_norm": 0.8982433467759985,
|
|
"learning_rate": 1.2967265601019573e-07,
|
|
"loss": 0.239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2283564805984497,
|
|
"step": 4240,
|
|
"valid_targets_mean": 2448.6,
|
|
"valid_targets_min": 997
|
|
},
|
|
{
|
|
"epoch": 6.781150159744409,
|
|
"grad_norm": 0.8450536618853598,
|
|
"learning_rate": 1.207721479523105e-07,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24024513363838196,
|
|
"step": 4245,
|
|
"valid_targets_mean": 2844.1,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 6.789137380191693,
|
|
"grad_norm": 0.8490359521419056,
|
|
"learning_rate": 1.1218712955954003e-07,
|
|
"loss": 0.2357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21305924654006958,
|
|
"step": 4250,
|
|
"valid_targets_mean": 2648.9,
|
|
"valid_targets_min": 1262
|
|
},
|
|
{
|
|
"epoch": 6.797124600638978,
|
|
"grad_norm": 0.8635004858863672,
|
|
"learning_rate": 1.0391773707885578e-07,
|
|
"loss": 0.2432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2413269430398941,
|
|
"step": 4255,
|
|
"valid_targets_mean": 2598.8,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 6.805111821086262,
|
|
"grad_norm": 0.9268920413170548,
|
|
"learning_rate": 9.596410174814497e-08,
|
|
"loss": 0.2591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22050106525421143,
|
|
"step": 4260,
|
|
"valid_targets_mean": 2504.9,
|
|
"valid_targets_min": 1041
|
|
},
|
|
{
|
|
"epoch": 6.813099041533547,
|
|
"grad_norm": 0.9832748856163084,
|
|
"learning_rate": 8.83263497941278e-08,
|
|
"loss": 0.2622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3260643482208252,
|
|
"step": 4265,
|
|
"valid_targets_mean": 2826.7,
|
|
"valid_targets_min": 1294
|
|
},
|
|
{
|
|
"epoch": 6.821086261980831,
|
|
"grad_norm": 1.1253592187772148,
|
|
"learning_rate": 8.10046024303568e-08,
|
|
"loss": 0.2617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21810108423233032,
|
|
"step": 4270,
|
|
"valid_targets_mean": 2258.8,
|
|
"valid_targets_min": 1028
|
|
},
|
|
{
|
|
"epoch": 6.8290734824281145,
|
|
"grad_norm": 0.9020398554422216,
|
|
"learning_rate": 7.399897585528726e-08,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27820467948913574,
|
|
"step": 4275,
|
|
"valid_targets_mean": 2641.4,
|
|
"valid_targets_min": 1128
|
|
},
|
|
{
|
|
"epoch": 6.8370607028754,
|
|
"grad_norm": 0.8583883313093937,
|
|
"learning_rate": 6.730958125044319e-08,
|
|
"loss": 0.2429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22428470849990845,
|
|
"step": 4280,
|
|
"valid_targets_mean": 2600.7,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 6.845047923322683,
|
|
"grad_norm": 0.8515809098147336,
|
|
"learning_rate": 6.093652477864309e-08,
|
|
"loss": 0.244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1976698637008667,
|
|
"step": 4285,
|
|
"valid_targets_mean": 2823.4,
|
|
"valid_targets_min": 789
|
|
},
|
|
{
|
|
"epoch": 6.853035143769968,
|
|
"grad_norm": 0.8429545854521847,
|
|
"learning_rate": 5.4879907582316986e-08,
|
|
"loss": 0.2365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20738956332206726,
|
|
"step": 4290,
|
|
"valid_targets_mean": 2692.9,
|
|
"valid_targets_min": 1019
|
|
},
|
|
{
|
|
"epoch": 6.861022364217252,
|
|
"grad_norm": 1.0641054185638537,
|
|
"learning_rate": 4.913982578190535e-08,
|
|
"loss": 0.2419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2451527863740921,
|
|
"step": 4295,
|
|
"valid_targets_mean": 2033.6,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 6.8690095846645365,
|
|
"grad_norm": 1.0156579876709448,
|
|
"learning_rate": 4.3716370474331527e-08,
|
|
"loss": 0.2597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2738833427429199,
|
|
"step": 4300,
|
|
"valid_targets_mean": 2561.5,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 6.876996805111821,
|
|
"grad_norm": 0.8846318276011715,
|
|
"learning_rate": 3.8609627731558405e-08,
|
|
"loss": 0.2436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24272026121616364,
|
|
"step": 4305,
|
|
"valid_targets_mean": 2837.1,
|
|
"valid_targets_min": 997
|
|
},
|
|
{
|
|
"epoch": 6.884984025559105,
|
|
"grad_norm": 0.8680846546228683,
|
|
"learning_rate": 3.381967859920954e-08,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23333126306533813,
|
|
"step": 4310,
|
|
"valid_targets_mean": 2384.0,
|
|
"valid_targets_min": 831
|
|
},
|
|
{
|
|
"epoch": 6.89297124600639,
|
|
"grad_norm": 0.779573429390176,
|
|
"learning_rate": 2.9346599095305685e-08,
|
|
"loss": 0.2548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24015694856643677,
|
|
"step": 4315,
|
|
"valid_targets_mean": 3775.1,
|
|
"valid_targets_min": 1051
|
|
},
|
|
{
|
|
"epoch": 6.900958466453674,
|
|
"grad_norm": 0.7992336848359107,
|
|
"learning_rate": 2.5190460209039146e-08,
|
|
"loss": 0.2606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22445455193519592,
|
|
"step": 4320,
|
|
"valid_targets_mean": 3423.3,
|
|
"valid_targets_min": 1079
|
|
},
|
|
{
|
|
"epoch": 6.9089456869009584,
|
|
"grad_norm": 0.8285403197225836,
|
|
"learning_rate": 2.1351327899656883e-08,
|
|
"loss": 0.2473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25639259815216064,
|
|
"step": 4325,
|
|
"valid_targets_mean": 3105.7,
|
|
"valid_targets_min": 1074
|
|
},
|
|
{
|
|
"epoch": 6.916932907348243,
|
|
"grad_norm": 0.9202827073357112,
|
|
"learning_rate": 1.782926309540578e-08,
|
|
"loss": 0.2444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23128975927829742,
|
|
"step": 4330,
|
|
"valid_targets_mean": 2471.7,
|
|
"valid_targets_min": 846
|
|
},
|
|
{
|
|
"epoch": 6.924920127795527,
|
|
"grad_norm": 1.020365959093813,
|
|
"learning_rate": 1.462432169257344e-08,
|
|
"loss": 0.2704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23014310002326965,
|
|
"step": 4335,
|
|
"valid_targets_mean": 1801.7,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 6.932907348242812,
|
|
"grad_norm": 0.9388111951504066,
|
|
"learning_rate": 1.1736554554604429e-08,
|
|
"loss": 0.2296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2392108291387558,
|
|
"step": 4340,
|
|
"valid_targets_mean": 2223.4,
|
|
"valid_targets_min": 762
|
|
},
|
|
{
|
|
"epoch": 6.940894568690096,
|
|
"grad_norm": 0.9739799523521154,
|
|
"learning_rate": 9.166007511274278e-09,
|
|
"loss": 0.2199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2289280891418457,
|
|
"step": 4345,
|
|
"valid_targets_mean": 1950.4,
|
|
"valid_targets_min": 851
|
|
},
|
|
{
|
|
"epoch": 6.94888178913738,
|
|
"grad_norm": 0.9177155624301259,
|
|
"learning_rate": 6.912721357985597e-09,
|
|
"loss": 0.2367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23037296533584595,
|
|
"step": 4350,
|
|
"valid_targets_mean": 2337.9,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 6.956869009584665,
|
|
"grad_norm": 0.7737633194778379,
|
|
"learning_rate": 4.976731855104166e-09,
|
|
"loss": 0.2346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19476398825645447,
|
|
"step": 4355,
|
|
"valid_targets_mean": 2597.2,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 6.964856230031949,
|
|
"grad_norm": 0.9323175639157613,
|
|
"learning_rate": 3.3580697274016028e-09,
|
|
"loss": 0.2387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.257503479719162,
|
|
"step": 4360,
|
|
"valid_targets_mean": 2127.4,
|
|
"valid_targets_min": 966
|
|
},
|
|
{
|
|
"epoch": 6.972843450479234,
|
|
"grad_norm": 0.9641404455649921,
|
|
"learning_rate": 2.056760663555757e-09,
|
|
"loss": 0.2509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2452305257320404,
|
|
"step": 4365,
|
|
"valid_targets_mean": 2238.1,
|
|
"valid_targets_min": 885
|
|
},
|
|
{
|
|
"epoch": 6.980830670926517,
|
|
"grad_norm": 0.8512094837712004,
|
|
"learning_rate": 1.0728253157599178e-09,
|
|
"loss": 0.2308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22671981155872345,
|
|
"step": 4370,
|
|
"valid_targets_mean": 2782.6,
|
|
"valid_targets_min": 1225
|
|
},
|
|
{
|
|
"epoch": 6.988817891373802,
|
|
"grad_norm": 0.929641334786566,
|
|
"learning_rate": 4.062792993786424e-10,
|
|
"loss": 0.2559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24825161695480347,
|
|
"step": 4375,
|
|
"valid_targets_mean": 2408.7,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 6.996805111821086,
|
|
"grad_norm": 0.8970842912116048,
|
|
"learning_rate": 5.7133192707947705e-11,
|
|
"loss": 0.256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.284351646900177,
|
|
"step": 4380,
|
|
"valid_targets_mean": 2629.9,
|
|
"valid_targets_min": 1067
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3209829330444336,
|
|
"step": 4382,
|
|
"total_flos": 723944083881984.0,
|
|
"train_loss": 0.33799729519704336,
|
|
"train_runtime": 16115.7306,
|
|
"train_samples_per_second": 4.351,
|
|
"train_steps_per_second": 0.272,
|
|
"valid_targets_mean": 2022.3,
|
|
"valid_targets_min": 797
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4382,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 723944083881984.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|