14689 lines
407 KiB
JSON
14689 lines
407 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 6657,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.005257623554153523,
|
|
"grad_norm": 14.338418407480448,
|
|
"learning_rate": 2.4024024024024026e-07,
|
|
"loss": 0.8123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7896162271499634,
|
|
"step": 5,
|
|
"valid_targets_mean": 1423.4,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 0.010515247108307046,
|
|
"grad_norm": 15.250363398872196,
|
|
"learning_rate": 5.405405405405406e-07,
|
|
"loss": 0.8146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8242740035057068,
|
|
"step": 10,
|
|
"valid_targets_mean": 1462.5,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 0.015772870662460567,
|
|
"grad_norm": 13.170887774620747,
|
|
"learning_rate": 8.40840840840841e-07,
|
|
"loss": 0.7847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.749550461769104,
|
|
"step": 15,
|
|
"valid_targets_mean": 1514.8,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 0.02103049421661409,
|
|
"grad_norm": 12.021580563192558,
|
|
"learning_rate": 1.1411411411411411e-06,
|
|
"loss": 0.772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7531605958938599,
|
|
"step": 20,
|
|
"valid_targets_mean": 1450.1,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 0.026288117770767613,
|
|
"grad_norm": 9.812753263810011,
|
|
"learning_rate": 1.4414414414414416e-06,
|
|
"loss": 0.7447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7402019500732422,
|
|
"step": 25,
|
|
"valid_targets_mean": 1548.3,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 0.031545741324921134,
|
|
"grad_norm": 7.151766176611262,
|
|
"learning_rate": 1.7417417417417418e-06,
|
|
"loss": 0.7118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6922894716262817,
|
|
"step": 30,
|
|
"valid_targets_mean": 1539.9,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 0.03680336487907466,
|
|
"grad_norm": 6.414810551517701,
|
|
"learning_rate": 2.0420420420420424e-06,
|
|
"loss": 0.6655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6497060656547546,
|
|
"step": 35,
|
|
"valid_targets_mean": 1543.9,
|
|
"valid_targets_min": 725
|
|
},
|
|
{
|
|
"epoch": 0.04206098843322818,
|
|
"grad_norm": 5.524276666499735,
|
|
"learning_rate": 2.3423423423423424e-06,
|
|
"loss": 0.6395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6292667984962463,
|
|
"step": 40,
|
|
"valid_targets_mean": 1371.5,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 0.0473186119873817,
|
|
"grad_norm": 4.309382783812248,
|
|
"learning_rate": 2.642642642642643e-06,
|
|
"loss": 0.5812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5641481876373291,
|
|
"step": 45,
|
|
"valid_targets_mean": 1560.6,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 0.052576235541535225,
|
|
"grad_norm": 3.977345905029791,
|
|
"learning_rate": 2.942942942942943e-06,
|
|
"loss": 0.5305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4955446720123291,
|
|
"step": 50,
|
|
"valid_targets_mean": 1439.9,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 0.05783385909568875,
|
|
"grad_norm": 2.2191077624395343,
|
|
"learning_rate": 3.2432432432432437e-06,
|
|
"loss": 0.4958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5143522024154663,
|
|
"step": 55,
|
|
"valid_targets_mean": 1455.1,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 0.06309148264984227,
|
|
"grad_norm": 1.699779263330342,
|
|
"learning_rate": 3.5435435435435437e-06,
|
|
"loss": 0.4717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4495570659637451,
|
|
"step": 60,
|
|
"valid_targets_mean": 1540.4,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 0.0683491062039958,
|
|
"grad_norm": 1.558544874011996,
|
|
"learning_rate": 3.843843843843844e-06,
|
|
"loss": 0.4494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42444512248039246,
|
|
"step": 65,
|
|
"valid_targets_mean": 1387.2,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 0.07360672975814932,
|
|
"grad_norm": 1.230255486271066,
|
|
"learning_rate": 4.1441441441441446e-06,
|
|
"loss": 0.4231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3952111601829529,
|
|
"step": 70,
|
|
"valid_targets_mean": 1581.9,
|
|
"valid_targets_min": 865
|
|
},
|
|
{
|
|
"epoch": 0.07886435331230283,
|
|
"grad_norm": 1.1773183195088508,
|
|
"learning_rate": 4.444444444444444e-06,
|
|
"loss": 0.4086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4091920256614685,
|
|
"step": 75,
|
|
"valid_targets_mean": 1635.6,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 0.08412197686645637,
|
|
"grad_norm": 1.212376527573756,
|
|
"learning_rate": 4.7447447447447454e-06,
|
|
"loss": 0.4084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4411184787750244,
|
|
"step": 80,
|
|
"valid_targets_mean": 1542.4,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 0.08937960042060988,
|
|
"grad_norm": 1.1207442958501526,
|
|
"learning_rate": 5.045045045045045e-06,
|
|
"loss": 0.3979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39498135447502136,
|
|
"step": 85,
|
|
"valid_targets_mean": 1879.2,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 0.0946372239747634,
|
|
"grad_norm": 1.2193312077699985,
|
|
"learning_rate": 5.345345345345346e-06,
|
|
"loss": 0.3871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39774513244628906,
|
|
"step": 90,
|
|
"valid_targets_mean": 1409.0,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 0.09989484752891693,
|
|
"grad_norm": 1.1973976745204826,
|
|
"learning_rate": 5.645645645645647e-06,
|
|
"loss": 0.3651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36110740900039673,
|
|
"step": 95,
|
|
"valid_targets_mean": 1377.8,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 0.10515247108307045,
|
|
"grad_norm": 0.9711829227364538,
|
|
"learning_rate": 5.945945945945947e-06,
|
|
"loss": 0.3606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36348843574523926,
|
|
"step": 100,
|
|
"valid_targets_mean": 1811.9,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 0.11041009463722397,
|
|
"grad_norm": 1.0436048206811175,
|
|
"learning_rate": 6.246246246246247e-06,
|
|
"loss": 0.3616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3778885304927826,
|
|
"step": 105,
|
|
"valid_targets_mean": 1814.3,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 0.1156677181913775,
|
|
"grad_norm": 1.0480485575994163,
|
|
"learning_rate": 6.546546546546547e-06,
|
|
"loss": 0.3525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.327975332736969,
|
|
"step": 110,
|
|
"valid_targets_mean": 1493.4,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 0.12092534174553102,
|
|
"grad_norm": 1.077406110311962,
|
|
"learning_rate": 6.846846846846848e-06,
|
|
"loss": 0.3401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32423490285873413,
|
|
"step": 115,
|
|
"valid_targets_mean": 1456.2,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 0.12618296529968454,
|
|
"grad_norm": 1.0059803201811535,
|
|
"learning_rate": 7.147147147147148e-06,
|
|
"loss": 0.3306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32918429374694824,
|
|
"step": 120,
|
|
"valid_targets_mean": 1340.2,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 0.13144058885383805,
|
|
"grad_norm": 1.0566785983665758,
|
|
"learning_rate": 7.447447447447448e-06,
|
|
"loss": 0.3261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3207254409790039,
|
|
"step": 125,
|
|
"valid_targets_mean": 1569.0,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 0.1366982124079916,
|
|
"grad_norm": 0.982078723172686,
|
|
"learning_rate": 7.747747747747749e-06,
|
|
"loss": 0.3199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31849318742752075,
|
|
"step": 130,
|
|
"valid_targets_mean": 1595.7,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 0.14195583596214512,
|
|
"grad_norm": 1.0044175903244321,
|
|
"learning_rate": 8.048048048048048e-06,
|
|
"loss": 0.3176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31595051288604736,
|
|
"step": 135,
|
|
"valid_targets_mean": 1801.3,
|
|
"valid_targets_min": 843
|
|
},
|
|
{
|
|
"epoch": 0.14721345951629863,
|
|
"grad_norm": 1.0412575758676315,
|
|
"learning_rate": 8.348348348348348e-06,
|
|
"loss": 0.3202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32682791352272034,
|
|
"step": 140,
|
|
"valid_targets_mean": 1652.4,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 0.15247108307045215,
|
|
"grad_norm": 1.208059994485327,
|
|
"learning_rate": 8.64864864864865e-06,
|
|
"loss": 0.3065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33247053623199463,
|
|
"step": 145,
|
|
"valid_targets_mean": 1684.6,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 0.15772870662460567,
|
|
"grad_norm": 1.064399334326978,
|
|
"learning_rate": 8.94894894894895e-06,
|
|
"loss": 0.3157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32549673318862915,
|
|
"step": 150,
|
|
"valid_targets_mean": 1482.9,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 0.16298633017875921,
|
|
"grad_norm": 1.024962358387954,
|
|
"learning_rate": 9.24924924924925e-06,
|
|
"loss": 0.3099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3149445652961731,
|
|
"step": 155,
|
|
"valid_targets_mean": 1710.1,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 0.16824395373291273,
|
|
"grad_norm": 0.9678110198452141,
|
|
"learning_rate": 9.54954954954955e-06,
|
|
"loss": 0.2996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3004312217235565,
|
|
"step": 160,
|
|
"valid_targets_mean": 1638.9,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 0.17350157728706625,
|
|
"grad_norm": 1.0120192177417784,
|
|
"learning_rate": 9.849849849849851e-06,
|
|
"loss": 0.2971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3038880228996277,
|
|
"step": 165,
|
|
"valid_targets_mean": 1525.7,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 0.17875920084121977,
|
|
"grad_norm": 1.1147511433571193,
|
|
"learning_rate": 1.015015015015015e-05,
|
|
"loss": 0.285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.268440842628479,
|
|
"step": 170,
|
|
"valid_targets_mean": 1255.4,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 0.18401682439537329,
|
|
"grad_norm": 1.055664621119984,
|
|
"learning_rate": 1.0450450450450452e-05,
|
|
"loss": 0.2909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28668320178985596,
|
|
"step": 175,
|
|
"valid_targets_mean": 1618.7,
|
|
"valid_targets_min": 968
|
|
},
|
|
{
|
|
"epoch": 0.1892744479495268,
|
|
"grad_norm": 1.045906383931406,
|
|
"learning_rate": 1.0750750750750751e-05,
|
|
"loss": 0.2916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.275592178106308,
|
|
"step": 180,
|
|
"valid_targets_mean": 1466.6,
|
|
"valid_targets_min": 724
|
|
},
|
|
{
|
|
"epoch": 0.19453207150368035,
|
|
"grad_norm": 0.9773273807006373,
|
|
"learning_rate": 1.1051051051051051e-05,
|
|
"loss": 0.2937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29187920689582825,
|
|
"step": 185,
|
|
"valid_targets_mean": 1455.1,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 0.19978969505783387,
|
|
"grad_norm": 1.2727714967466175,
|
|
"learning_rate": 1.1351351351351352e-05,
|
|
"loss": 0.2923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3005208373069763,
|
|
"step": 190,
|
|
"valid_targets_mean": 1309.6,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 0.20504731861198738,
|
|
"grad_norm": 1.1244664228985333,
|
|
"learning_rate": 1.1651651651651652e-05,
|
|
"loss": 0.2952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3339601755142212,
|
|
"step": 195,
|
|
"valid_targets_mean": 1437.6,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 0.2103049421661409,
|
|
"grad_norm": 1.0742134062044455,
|
|
"learning_rate": 1.1951951951951951e-05,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.260494202375412,
|
|
"step": 200,
|
|
"valid_targets_mean": 1344.7,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 0.21556256572029442,
|
|
"grad_norm": 1.0009127030529608,
|
|
"learning_rate": 1.2252252252252253e-05,
|
|
"loss": 0.2786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27542197704315186,
|
|
"step": 205,
|
|
"valid_targets_mean": 1535.1,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 0.22082018927444794,
|
|
"grad_norm": 1.0352483052750439,
|
|
"learning_rate": 1.2552552552552552e-05,
|
|
"loss": 0.2986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2706538438796997,
|
|
"step": 210,
|
|
"valid_targets_mean": 1439.4,
|
|
"valid_targets_min": 573
|
|
},
|
|
{
|
|
"epoch": 0.22607781282860148,
|
|
"grad_norm": 0.9767573807189796,
|
|
"learning_rate": 1.2852852852852854e-05,
|
|
"loss": 0.3021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3644488453865051,
|
|
"step": 215,
|
|
"valid_targets_mean": 1967.9,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 0.231335436382755,
|
|
"grad_norm": 0.9537088929251228,
|
|
"learning_rate": 1.3153153153153155e-05,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2756945788860321,
|
|
"step": 220,
|
|
"valid_targets_mean": 1489.4,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 0.23659305993690852,
|
|
"grad_norm": 0.9169366781867578,
|
|
"learning_rate": 1.3453453453453456e-05,
|
|
"loss": 0.2812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2865472733974457,
|
|
"step": 225,
|
|
"valid_targets_mean": 1777.6,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 0.24185068349106204,
|
|
"grad_norm": 1.4480786530892165,
|
|
"learning_rate": 1.3753753753753756e-05,
|
|
"loss": 0.2858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30552157759666443,
|
|
"step": 230,
|
|
"valid_targets_mean": 1648.7,
|
|
"valid_targets_min": 676
|
|
},
|
|
{
|
|
"epoch": 0.24710830704521555,
|
|
"grad_norm": 1.028602809234246,
|
|
"learning_rate": 1.4054054054054055e-05,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.258034348487854,
|
|
"step": 235,
|
|
"valid_targets_mean": 1432.2,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 0.25236593059936907,
|
|
"grad_norm": 0.9618845035216391,
|
|
"learning_rate": 1.4354354354354357e-05,
|
|
"loss": 0.2672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26978522539138794,
|
|
"step": 240,
|
|
"valid_targets_mean": 1549.0,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 0.2576235541535226,
|
|
"grad_norm": 1.1533463384468676,
|
|
"learning_rate": 1.4654654654654656e-05,
|
|
"loss": 0.2662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26131436228752136,
|
|
"step": 245,
|
|
"valid_targets_mean": 1158.9,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 0.2628811777076761,
|
|
"grad_norm": 0.9892775002286349,
|
|
"learning_rate": 1.4954954954954957e-05,
|
|
"loss": 0.2838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30346208810806274,
|
|
"step": 250,
|
|
"valid_targets_mean": 1459.0,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 0.26813880126182965,
|
|
"grad_norm": 0.8754555049354074,
|
|
"learning_rate": 1.5255255255255257e-05,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2807902693748474,
|
|
"step": 255,
|
|
"valid_targets_mean": 1715.9,
|
|
"valid_targets_min": 786
|
|
},
|
|
{
|
|
"epoch": 0.2733964248159832,
|
|
"grad_norm": 0.959347461436203,
|
|
"learning_rate": 1.555555555555556e-05,
|
|
"loss": 0.2668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2573663890361786,
|
|
"step": 260,
|
|
"valid_targets_mean": 1492.1,
|
|
"valid_targets_min": 944
|
|
},
|
|
{
|
|
"epoch": 0.2786540483701367,
|
|
"grad_norm": 1.0059380552130364,
|
|
"learning_rate": 1.5855855855855858e-05,
|
|
"loss": 0.2701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2515960931777954,
|
|
"step": 265,
|
|
"valid_targets_mean": 1427.2,
|
|
"valid_targets_min": 963
|
|
},
|
|
{
|
|
"epoch": 0.28391167192429023,
|
|
"grad_norm": 1.0219411210537412,
|
|
"learning_rate": 1.6156156156156157e-05,
|
|
"loss": 0.2672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2408115416765213,
|
|
"step": 270,
|
|
"valid_targets_mean": 1435.9,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 0.2891692954784437,
|
|
"grad_norm": 0.9020434518290471,
|
|
"learning_rate": 1.6456456456456457e-05,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2702721655368805,
|
|
"step": 275,
|
|
"valid_targets_mean": 1745.8,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 0.29442691903259727,
|
|
"grad_norm": 1.0211125239673158,
|
|
"learning_rate": 1.6756756756756757e-05,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27518072724342346,
|
|
"step": 280,
|
|
"valid_targets_mean": 1522.8,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 0.2996845425867508,
|
|
"grad_norm": 1.0789002466783877,
|
|
"learning_rate": 1.705705705705706e-05,
|
|
"loss": 0.2593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2558009624481201,
|
|
"step": 285,
|
|
"valid_targets_mean": 1510.6,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 0.3049421661409043,
|
|
"grad_norm": 0.9665450503881389,
|
|
"learning_rate": 1.735735735735736e-05,
|
|
"loss": 0.2504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27275264263153076,
|
|
"step": 290,
|
|
"valid_targets_mean": 1509.7,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 0.31019978969505785,
|
|
"grad_norm": 1.1483786872452868,
|
|
"learning_rate": 1.765765765765766e-05,
|
|
"loss": 0.2586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25014638900756836,
|
|
"step": 295,
|
|
"valid_targets_mean": 1489.8,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 0.31545741324921134,
|
|
"grad_norm": 1.0398642224210262,
|
|
"learning_rate": 1.795795795795796e-05,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2589448392391205,
|
|
"step": 300,
|
|
"valid_targets_mean": 1510.6,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 0.3207150368033649,
|
|
"grad_norm": 1.1747477187677065,
|
|
"learning_rate": 1.8258258258258258e-05,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27208995819091797,
|
|
"step": 305,
|
|
"valid_targets_mean": 1534.2,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 0.32597266035751843,
|
|
"grad_norm": 1.210151289764671,
|
|
"learning_rate": 1.855855855855856e-05,
|
|
"loss": 0.2599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26101744174957275,
|
|
"step": 310,
|
|
"valid_targets_mean": 1480.6,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 0.3312302839116719,
|
|
"grad_norm": 1.1167145455105136,
|
|
"learning_rate": 1.885885885885886e-05,
|
|
"loss": 0.2621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.276511549949646,
|
|
"step": 315,
|
|
"valid_targets_mean": 1504.4,
|
|
"valid_targets_min": 922
|
|
},
|
|
{
|
|
"epoch": 0.33648790746582546,
|
|
"grad_norm": 0.9304470939793734,
|
|
"learning_rate": 1.915915915915916e-05,
|
|
"loss": 0.2508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24150532484054565,
|
|
"step": 320,
|
|
"valid_targets_mean": 1624.8,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 0.34174553101997895,
|
|
"grad_norm": 0.9920499590419062,
|
|
"learning_rate": 1.9459459459459463e-05,
|
|
"loss": 0.261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2353840470314026,
|
|
"step": 325,
|
|
"valid_targets_mean": 1320.1,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 0.3470031545741325,
|
|
"grad_norm": 1.2160498593989866,
|
|
"learning_rate": 1.9759759759759763e-05,
|
|
"loss": 0.2581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27118825912475586,
|
|
"step": 330,
|
|
"valid_targets_mean": 1348.9,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 0.352260778128286,
|
|
"grad_norm": 1.0827711833942575,
|
|
"learning_rate": 2.0060060060060062e-05,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2597997188568115,
|
|
"step": 335,
|
|
"valid_targets_mean": 1333.6,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 0.35751840168243953,
|
|
"grad_norm": 0.9895783250955589,
|
|
"learning_rate": 2.0360360360360362e-05,
|
|
"loss": 0.2601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2682701349258423,
|
|
"step": 340,
|
|
"valid_targets_mean": 1649.7,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 0.3627760252365931,
|
|
"grad_norm": 0.9057614789606002,
|
|
"learning_rate": 2.066066066066066e-05,
|
|
"loss": 0.2719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23891566693782806,
|
|
"step": 345,
|
|
"valid_targets_mean": 1819.2,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 0.36803364879074657,
|
|
"grad_norm": 0.889783285793925,
|
|
"learning_rate": 2.0960960960960964e-05,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2716279625892639,
|
|
"step": 350,
|
|
"valid_targets_mean": 1603.2,
|
|
"valid_targets_min": 770
|
|
},
|
|
{
|
|
"epoch": 0.3732912723449001,
|
|
"grad_norm": 0.9536373019208444,
|
|
"learning_rate": 2.1261261261261264e-05,
|
|
"loss": 0.257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24380457401275635,
|
|
"step": 355,
|
|
"valid_targets_mean": 1434.4,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 0.3785488958990536,
|
|
"grad_norm": 1.115443184798519,
|
|
"learning_rate": 2.1561561561561564e-05,
|
|
"loss": 0.256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26643645763397217,
|
|
"step": 360,
|
|
"valid_targets_mean": 1250.6,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 0.38380651945320715,
|
|
"grad_norm": 0.9355379407440322,
|
|
"learning_rate": 2.1861861861861863e-05,
|
|
"loss": 0.2487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23333057761192322,
|
|
"step": 365,
|
|
"valid_targets_mean": 1461.3,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 0.3890641430073607,
|
|
"grad_norm": 1.077924971950134,
|
|
"learning_rate": 2.2162162162162163e-05,
|
|
"loss": 0.2629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2515951991081238,
|
|
"step": 370,
|
|
"valid_targets_mean": 1299.6,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 0.3943217665615142,
|
|
"grad_norm": 0.9940302187277178,
|
|
"learning_rate": 2.2462462462462466e-05,
|
|
"loss": 0.2502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2508964240550995,
|
|
"step": 375,
|
|
"valid_targets_mean": 1337.1,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 0.39957939011566773,
|
|
"grad_norm": 0.9542256345373819,
|
|
"learning_rate": 2.2762762762762765e-05,
|
|
"loss": 0.2447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23581913113594055,
|
|
"step": 380,
|
|
"valid_targets_mean": 1472.1,
|
|
"valid_targets_min": 849
|
|
},
|
|
{
|
|
"epoch": 0.4048370136698212,
|
|
"grad_norm": 1.1979823419315119,
|
|
"learning_rate": 2.3063063063063065e-05,
|
|
"loss": 0.238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23929482698440552,
|
|
"step": 385,
|
|
"valid_targets_mean": 1464.6,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 0.41009463722397477,
|
|
"grad_norm": 1.1127158719166208,
|
|
"learning_rate": 2.3363363363363364e-05,
|
|
"loss": 0.2637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24247753620147705,
|
|
"step": 390,
|
|
"valid_targets_mean": 1378.8,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 0.4153522607781283,
|
|
"grad_norm": 0.9956267557503373,
|
|
"learning_rate": 2.3663663663663664e-05,
|
|
"loss": 0.2404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.212764710187912,
|
|
"step": 395,
|
|
"valid_targets_mean": 1256.2,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 0.4206098843322818,
|
|
"grad_norm": 0.8406949700595693,
|
|
"learning_rate": 2.3963963963963967e-05,
|
|
"loss": 0.2501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2643095850944519,
|
|
"step": 400,
|
|
"valid_targets_mean": 2054.5,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 0.42586750788643535,
|
|
"grad_norm": 1.0208095852397516,
|
|
"learning_rate": 2.4264264264264267e-05,
|
|
"loss": 0.2278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21735705435276031,
|
|
"step": 405,
|
|
"valid_targets_mean": 1743.0,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 0.43112513144058884,
|
|
"grad_norm": 0.9853785993337534,
|
|
"learning_rate": 2.4564564564564566e-05,
|
|
"loss": 0.2372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26856714487075806,
|
|
"step": 410,
|
|
"valid_targets_mean": 1531.3,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 0.4363827549947424,
|
|
"grad_norm": 0.8454632207154943,
|
|
"learning_rate": 2.4864864864864866e-05,
|
|
"loss": 0.2443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24718914926052094,
|
|
"step": 415,
|
|
"valid_targets_mean": 1780.8,
|
|
"valid_targets_min": 639
|
|
},
|
|
{
|
|
"epoch": 0.4416403785488959,
|
|
"grad_norm": 1.3887975257786067,
|
|
"learning_rate": 2.5165165165165165e-05,
|
|
"loss": 0.248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24860824644565582,
|
|
"step": 420,
|
|
"valid_targets_mean": 1485.3,
|
|
"valid_targets_min": 920
|
|
},
|
|
{
|
|
"epoch": 0.4468980021030494,
|
|
"grad_norm": 0.9491452435962007,
|
|
"learning_rate": 2.5465465465465465e-05,
|
|
"loss": 0.243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24007216095924377,
|
|
"step": 425,
|
|
"valid_targets_mean": 1408.1,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 0.45215562565720296,
|
|
"grad_norm": 0.8953373435850912,
|
|
"learning_rate": 2.5765765765765768e-05,
|
|
"loss": 0.2464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24175237119197845,
|
|
"step": 430,
|
|
"valid_targets_mean": 1382.9,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 0.45741324921135645,
|
|
"grad_norm": 1.214865138105168,
|
|
"learning_rate": 2.6066066066066067e-05,
|
|
"loss": 0.2522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2626429498195648,
|
|
"step": 435,
|
|
"valid_targets_mean": 1290.4,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 0.46267087276551,
|
|
"grad_norm": 0.9553001424874544,
|
|
"learning_rate": 2.6366366366366367e-05,
|
|
"loss": 0.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25079411268234253,
|
|
"step": 440,
|
|
"valid_targets_mean": 1449.4,
|
|
"valid_targets_min": 393
|
|
},
|
|
{
|
|
"epoch": 0.4679284963196635,
|
|
"grad_norm": 0.7781379429239883,
|
|
"learning_rate": 2.6666666666666667e-05,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2495959997177124,
|
|
"step": 445,
|
|
"valid_targets_mean": 2912.0,
|
|
"valid_targets_min": 458
|
|
},
|
|
{
|
|
"epoch": 0.47318611987381703,
|
|
"grad_norm": 0.6840617027205644,
|
|
"learning_rate": 2.6966966966966966e-05,
|
|
"loss": 0.1961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17593586444854736,
|
|
"step": 450,
|
|
"valid_targets_mean": 3155.2,
|
|
"valid_targets_min": 1491
|
|
},
|
|
{
|
|
"epoch": 0.4784437434279706,
|
|
"grad_norm": 0.5786177388731288,
|
|
"learning_rate": 2.726726726726727e-05,
|
|
"loss": 0.1718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1732887625694275,
|
|
"step": 455,
|
|
"valid_targets_mean": 3495.2,
|
|
"valid_targets_min": 2068
|
|
},
|
|
{
|
|
"epoch": 0.48370136698212407,
|
|
"grad_norm": 1.0771543738683813,
|
|
"learning_rate": 2.756756756756757e-05,
|
|
"loss": 0.193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16611456871032715,
|
|
"step": 460,
|
|
"valid_targets_mean": 3671.3,
|
|
"valid_targets_min": 2425
|
|
},
|
|
{
|
|
"epoch": 0.4889589905362776,
|
|
"grad_norm": 0.583602213376592,
|
|
"learning_rate": 2.786786786786787e-05,
|
|
"loss": 0.1612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16041299700737,
|
|
"step": 465,
|
|
"valid_targets_mean": 3131.7,
|
|
"valid_targets_min": 1198
|
|
},
|
|
{
|
|
"epoch": 0.4942166140904311,
|
|
"grad_norm": 1.3152094414132671,
|
|
"learning_rate": 2.8168168168168168e-05,
|
|
"loss": 0.1958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2620253264904022,
|
|
"step": 470,
|
|
"valid_targets_mean": 1250.6,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 0.49947423764458465,
|
|
"grad_norm": 0.6807191146727155,
|
|
"learning_rate": 2.8468468468468467e-05,
|
|
"loss": 0.2507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17434732615947723,
|
|
"step": 475,
|
|
"valid_targets_mean": 3237.3,
|
|
"valid_targets_min": 954
|
|
},
|
|
{
|
|
"epoch": 0.5047318611987381,
|
|
"grad_norm": 0.6089177015775961,
|
|
"learning_rate": 2.8768768768768774e-05,
|
|
"loss": 0.1682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19791068136692047,
|
|
"step": 480,
|
|
"valid_targets_mean": 3020.2,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 0.5099894847528917,
|
|
"grad_norm": 0.4706237346681839,
|
|
"learning_rate": 2.9069069069069073e-05,
|
|
"loss": 0.1773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12845684587955475,
|
|
"step": 485,
|
|
"valid_targets_mean": 3357.7,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 0.5152471083070452,
|
|
"grad_norm": 0.4312586193953129,
|
|
"learning_rate": 2.9369369369369373e-05,
|
|
"loss": 0.1879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.130380317568779,
|
|
"step": 490,
|
|
"valid_targets_mean": 4872.1,
|
|
"valid_targets_min": 2802
|
|
},
|
|
{
|
|
"epoch": 0.5205047318611987,
|
|
"grad_norm": 0.5202579147947507,
|
|
"learning_rate": 2.9669669669669673e-05,
|
|
"loss": 0.1497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13587605953216553,
|
|
"step": 495,
|
|
"valid_targets_mean": 2990.6,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 0.5257623554153522,
|
|
"grad_norm": 0.5227023766325937,
|
|
"learning_rate": 2.9969969969969976e-05,
|
|
"loss": 0.1552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1409936547279358,
|
|
"step": 500,
|
|
"valid_targets_mean": 3588.3,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 0.5310199789695058,
|
|
"grad_norm": 0.73045070317564,
|
|
"learning_rate": 3.0270270270270275e-05,
|
|
"loss": 0.1684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.213816300034523,
|
|
"step": 505,
|
|
"valid_targets_mean": 2007.3,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 0.5362776025236593,
|
|
"grad_norm": 0.5094046552569875,
|
|
"learning_rate": 3.0570570570570575e-05,
|
|
"loss": 0.1847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14146271347999573,
|
|
"step": 510,
|
|
"valid_targets_mean": 3185.9,
|
|
"valid_targets_min": 1185
|
|
},
|
|
{
|
|
"epoch": 0.5415352260778128,
|
|
"grad_norm": 0.7568508998245875,
|
|
"learning_rate": 3.0870870870870874e-05,
|
|
"loss": 0.1521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19519582390785217,
|
|
"step": 515,
|
|
"valid_targets_mean": 1841.9,
|
|
"valid_targets_min": 376
|
|
},
|
|
{
|
|
"epoch": 0.5467928496319664,
|
|
"grad_norm": 0.4641975005934539,
|
|
"learning_rate": 3.1171171171171174e-05,
|
|
"loss": 0.1439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10629108548164368,
|
|
"step": 520,
|
|
"valid_targets_mean": 2904.6,
|
|
"valid_targets_min": 910
|
|
},
|
|
{
|
|
"epoch": 0.5520504731861199,
|
|
"grad_norm": 0.5461483785001882,
|
|
"learning_rate": 3.1471471471471473e-05,
|
|
"loss": 0.1312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12796208262443542,
|
|
"step": 525,
|
|
"valid_targets_mean": 3779.6,
|
|
"valid_targets_min": 2798
|
|
},
|
|
{
|
|
"epoch": 0.5573080967402734,
|
|
"grad_norm": 0.5588226619732147,
|
|
"learning_rate": 3.177177177177177e-05,
|
|
"loss": 0.174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15388840436935425,
|
|
"step": 530,
|
|
"valid_targets_mean": 2611.1,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 0.562565720294427,
|
|
"grad_norm": 0.5957845529610092,
|
|
"learning_rate": 3.207207207207207e-05,
|
|
"loss": 0.1653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.170301616191864,
|
|
"step": 535,
|
|
"valid_targets_mean": 2535.8,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 0.5678233438485805,
|
|
"grad_norm": 0.5388537314765058,
|
|
"learning_rate": 3.237237237237238e-05,
|
|
"loss": 0.1651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16233664751052856,
|
|
"step": 540,
|
|
"valid_targets_mean": 3190.8,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 0.573080967402734,
|
|
"grad_norm": 0.7842135944261199,
|
|
"learning_rate": 3.267267267267268e-05,
|
|
"loss": 0.161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1310577094554901,
|
|
"step": 545,
|
|
"valid_targets_mean": 3511.1,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 0.5783385909568874,
|
|
"grad_norm": 0.45798139346184796,
|
|
"learning_rate": 3.297297297297298e-05,
|
|
"loss": 0.2085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12865862250328064,
|
|
"step": 550,
|
|
"valid_targets_mean": 3521.9,
|
|
"valid_targets_min": 902
|
|
},
|
|
{
|
|
"epoch": 0.583596214511041,
|
|
"grad_norm": 0.5444764260919418,
|
|
"learning_rate": 3.327327327327328e-05,
|
|
"loss": 0.1379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14510411024093628,
|
|
"step": 555,
|
|
"valid_targets_mean": 3062.9,
|
|
"valid_targets_min": 1568
|
|
},
|
|
{
|
|
"epoch": 0.5888538380651945,
|
|
"grad_norm": 1.04660911026327,
|
|
"learning_rate": 3.357357357357358e-05,
|
|
"loss": 0.193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2559956908226013,
|
|
"step": 560,
|
|
"valid_targets_mean": 1179.9,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 0.594111461619348,
|
|
"grad_norm": 0.7171985572053283,
|
|
"learning_rate": 3.387387387387388e-05,
|
|
"loss": 0.1771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18237915635108948,
|
|
"step": 565,
|
|
"valid_targets_mean": 2067.3,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 0.5993690851735016,
|
|
"grad_norm": 0.5433556965958147,
|
|
"learning_rate": 3.4174174174174176e-05,
|
|
"loss": 0.1446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11692153662443161,
|
|
"step": 570,
|
|
"valid_targets_mean": 2839.8,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 0.6046267087276551,
|
|
"grad_norm": 0.7251220527202389,
|
|
"learning_rate": 3.4474474474474476e-05,
|
|
"loss": 0.2978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3964594006538391,
|
|
"step": 575,
|
|
"valid_targets_mean": 2888.0,
|
|
"valid_targets_min": 878
|
|
},
|
|
{
|
|
"epoch": 0.6098843322818086,
|
|
"grad_norm": 0.6391538888404741,
|
|
"learning_rate": 3.4774774774774776e-05,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17046722769737244,
|
|
"step": 580,
|
|
"valid_targets_mean": 2609.9,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 0.6151419558359621,
|
|
"grad_norm": 0.47641357301754167,
|
|
"learning_rate": 3.5075075075075075e-05,
|
|
"loss": 0.1573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12254441529512405,
|
|
"step": 585,
|
|
"valid_targets_mean": 3436.7,
|
|
"valid_targets_min": 1968
|
|
},
|
|
{
|
|
"epoch": 0.6203995793901157,
|
|
"grad_norm": 0.7891340904704162,
|
|
"learning_rate": 3.5375375375375375e-05,
|
|
"loss": 0.2494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4564532935619354,
|
|
"step": 590,
|
|
"valid_targets_mean": 2644.9,
|
|
"valid_targets_min": 1153
|
|
},
|
|
{
|
|
"epoch": 0.6256572029442692,
|
|
"grad_norm": 0.3660416250318796,
|
|
"learning_rate": 3.567567567567568e-05,
|
|
"loss": 0.161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11844614893198013,
|
|
"step": 595,
|
|
"valid_targets_mean": 4109.6,
|
|
"valid_targets_min": 1010
|
|
},
|
|
{
|
|
"epoch": 0.6309148264984227,
|
|
"grad_norm": 0.4193625327298725,
|
|
"learning_rate": 3.597597597597598e-05,
|
|
"loss": 0.1352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13238012790679932,
|
|
"step": 600,
|
|
"valid_targets_mean": 4150.4,
|
|
"valid_targets_min": 789
|
|
},
|
|
{
|
|
"epoch": 0.6361724500525763,
|
|
"grad_norm": 0.4545358997390923,
|
|
"learning_rate": 3.627627627627628e-05,
|
|
"loss": 0.1543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16790561378002167,
|
|
"step": 605,
|
|
"valid_targets_mean": 3907.9,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 0.6414300736067298,
|
|
"grad_norm": 0.5238318432896075,
|
|
"learning_rate": 3.657657657657658e-05,
|
|
"loss": 0.1377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1229844018816948,
|
|
"step": 610,
|
|
"valid_targets_mean": 2426.2,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 0.6466876971608833,
|
|
"grad_norm": 0.5894750567460497,
|
|
"learning_rate": 3.687687687687688e-05,
|
|
"loss": 0.1438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15686744451522827,
|
|
"step": 615,
|
|
"valid_targets_mean": 3060.2,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 0.6519453207150369,
|
|
"grad_norm": 0.5707030448482538,
|
|
"learning_rate": 3.717717717717718e-05,
|
|
"loss": 0.1308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1290627419948578,
|
|
"step": 620,
|
|
"valid_targets_mean": 2013.9,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 0.6572029442691903,
|
|
"grad_norm": 0.5570257691076398,
|
|
"learning_rate": 3.747747747747748e-05,
|
|
"loss": 0.139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11516053229570389,
|
|
"step": 625,
|
|
"valid_targets_mean": 3445.8,
|
|
"valid_targets_min": 1770
|
|
},
|
|
{
|
|
"epoch": 0.6624605678233438,
|
|
"grad_norm": 0.4271290599062706,
|
|
"learning_rate": 3.777777777777778e-05,
|
|
"loss": 0.1201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11819294840097427,
|
|
"step": 630,
|
|
"valid_targets_mean": 3535.2,
|
|
"valid_targets_min": 1761
|
|
},
|
|
{
|
|
"epoch": 0.6677181913774973,
|
|
"grad_norm": 0.38302613544250624,
|
|
"learning_rate": 3.807807807807808e-05,
|
|
"loss": 0.2007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09289303421974182,
|
|
"step": 635,
|
|
"valid_targets_mean": 3596.6,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 0.6729758149316509,
|
|
"grad_norm": 0.5465024682283733,
|
|
"learning_rate": 3.837837837837838e-05,
|
|
"loss": 0.121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12863999605178833,
|
|
"step": 640,
|
|
"valid_targets_mean": 3249.3,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 0.6782334384858044,
|
|
"grad_norm": 0.436044478357065,
|
|
"learning_rate": 3.8678678678678684e-05,
|
|
"loss": 0.1253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10554418712854385,
|
|
"step": 645,
|
|
"valid_targets_mean": 3712.8,
|
|
"valid_targets_min": 2535
|
|
},
|
|
{
|
|
"epoch": 0.6834910620399579,
|
|
"grad_norm": 0.47689707142391047,
|
|
"learning_rate": 3.897897897897898e-05,
|
|
"loss": 0.1263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12995974719524384,
|
|
"step": 650,
|
|
"valid_targets_mean": 3262.0,
|
|
"valid_targets_min": 948
|
|
},
|
|
{
|
|
"epoch": 0.6887486855941115,
|
|
"grad_norm": 0.7118294827025267,
|
|
"learning_rate": 3.927927927927928e-05,
|
|
"loss": 0.172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2569500803947449,
|
|
"step": 655,
|
|
"valid_targets_mean": 1491.4,
|
|
"valid_targets_min": 490
|
|
},
|
|
{
|
|
"epoch": 0.694006309148265,
|
|
"grad_norm": 0.41210337778599393,
|
|
"learning_rate": 3.957957957957958e-05,
|
|
"loss": 0.1546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13101404905319214,
|
|
"step": 660,
|
|
"valid_targets_mean": 3696.4,
|
|
"valid_targets_min": 1196
|
|
},
|
|
{
|
|
"epoch": 0.6992639327024185,
|
|
"grad_norm": 0.5867759477767424,
|
|
"learning_rate": 3.987987987987988e-05,
|
|
"loss": 0.1793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17820844054222107,
|
|
"step": 665,
|
|
"valid_targets_mean": 1915.5,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 0.704521556256572,
|
|
"grad_norm": 0.49597583572323783,
|
|
"learning_rate": 3.9999975251805184e-05,
|
|
"loss": 0.1507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13559472560882568,
|
|
"step": 670,
|
|
"valid_targets_mean": 2728.7,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 0.7097791798107256,
|
|
"grad_norm": 0.5231404790443446,
|
|
"learning_rate": 3.9999824013058675e-05,
|
|
"loss": 0.1733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14908647537231445,
|
|
"step": 675,
|
|
"valid_targets_mean": 2827.4,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 0.7150368033648791,
|
|
"grad_norm": 0.5497626230412085,
|
|
"learning_rate": 3.99995352856012e-05,
|
|
"loss": 0.1819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12418384104967117,
|
|
"step": 680,
|
|
"valid_targets_mean": 2377.9,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 0.7202944269190326,
|
|
"grad_norm": 0.527548434551724,
|
|
"learning_rate": 3.999910907141761e-05,
|
|
"loss": 0.1487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1570894569158554,
|
|
"step": 685,
|
|
"valid_targets_mean": 2733.9,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 0.7255520504731862,
|
|
"grad_norm": 0.6018463628202171,
|
|
"learning_rate": 3.9998545373437924e-05,
|
|
"loss": 0.169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13867342472076416,
|
|
"step": 690,
|
|
"valid_targets_mean": 3892.4,
|
|
"valid_targets_min": 2393
|
|
},
|
|
{
|
|
"epoch": 0.7308096740273397,
|
|
"grad_norm": 0.43425828709088526,
|
|
"learning_rate": 3.999784419553728e-05,
|
|
"loss": 0.1534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12710916996002197,
|
|
"step": 695,
|
|
"valid_targets_mean": 3477.7,
|
|
"valid_targets_min": 804
|
|
},
|
|
{
|
|
"epoch": 0.7360672975814931,
|
|
"grad_norm": 0.38137677229642736,
|
|
"learning_rate": 3.9997005542535916e-05,
|
|
"loss": 0.1385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13690686225891113,
|
|
"step": 700,
|
|
"valid_targets_mean": 4753.2,
|
|
"valid_targets_min": 1549
|
|
},
|
|
{
|
|
"epoch": 0.7413249211356467,
|
|
"grad_norm": 0.516289151825183,
|
|
"learning_rate": 3.9996029420199154e-05,
|
|
"loss": 0.1251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13586783409118652,
|
|
"step": 705,
|
|
"valid_targets_mean": 2988.9,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 0.7465825446898002,
|
|
"grad_norm": 0.31228545003568126,
|
|
"learning_rate": 3.9994915835237336e-05,
|
|
"loss": 0.1189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09548960626125336,
|
|
"step": 710,
|
|
"valid_targets_mean": 4565.8,
|
|
"valid_targets_min": 896
|
|
},
|
|
{
|
|
"epoch": 0.7518401682439537,
|
|
"grad_norm": 0.3945771727525488,
|
|
"learning_rate": 3.999366479530581e-05,
|
|
"loss": 0.1325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11906930804252625,
|
|
"step": 715,
|
|
"valid_targets_mean": 3763.9,
|
|
"valid_targets_min": 2696
|
|
},
|
|
{
|
|
"epoch": 0.7570977917981072,
|
|
"grad_norm": 0.7247875055180788,
|
|
"learning_rate": 3.999227630900483e-05,
|
|
"loss": 0.1602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1936461329460144,
|
|
"step": 720,
|
|
"valid_targets_mean": 1522.7,
|
|
"valid_targets_min": 203
|
|
},
|
|
{
|
|
"epoch": 0.7623554153522608,
|
|
"grad_norm": 0.3900335889448843,
|
|
"learning_rate": 3.9990750385879554e-05,
|
|
"loss": 0.136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14570674300193787,
|
|
"step": 725,
|
|
"valid_targets_mean": 3974.7,
|
|
"valid_targets_min": 2443
|
|
},
|
|
{
|
|
"epoch": 0.7676130389064143,
|
|
"grad_norm": 0.6361138958968884,
|
|
"learning_rate": 3.998908703641993e-05,
|
|
"loss": 0.1516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14177639782428741,
|
|
"step": 730,
|
|
"valid_targets_mean": 1714.2,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 0.7728706624605678,
|
|
"grad_norm": 0.4223784054438746,
|
|
"learning_rate": 3.9987286272060644e-05,
|
|
"loss": 0.1138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1300775706768036,
|
|
"step": 735,
|
|
"valid_targets_mean": 3683.6,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 0.7781282860147214,
|
|
"grad_norm": 0.4459119486952276,
|
|
"learning_rate": 3.998534810518104e-05,
|
|
"loss": 0.1468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15969018638134003,
|
|
"step": 740,
|
|
"valid_targets_mean": 2942.3,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 0.7833859095688749,
|
|
"grad_norm": 0.4291063688699222,
|
|
"learning_rate": 3.998327254910504e-05,
|
|
"loss": 0.1357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1203104555606842,
|
|
"step": 745,
|
|
"valid_targets_mean": 3584.6,
|
|
"valid_targets_min": 1744
|
|
},
|
|
{
|
|
"epoch": 0.7886435331230284,
|
|
"grad_norm": 0.581547670567422,
|
|
"learning_rate": 3.998105961810105e-05,
|
|
"loss": 0.1249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1824052333831787,
|
|
"step": 750,
|
|
"valid_targets_mean": 2055.4,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 0.7939011566771819,
|
|
"grad_norm": 0.5621362218057954,
|
|
"learning_rate": 3.997870932738187e-05,
|
|
"loss": 0.1386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14463567733764648,
|
|
"step": 755,
|
|
"valid_targets_mean": 2057.7,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 0.7991587802313355,
|
|
"grad_norm": 0.5485179695009602,
|
|
"learning_rate": 3.997622169310454e-05,
|
|
"loss": 0.1434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16229714453220367,
|
|
"step": 760,
|
|
"valid_targets_mean": 2091.9,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 0.804416403785489,
|
|
"grad_norm": 0.4239666751101835,
|
|
"learning_rate": 3.9973596732370296e-05,
|
|
"loss": 0.1552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12120245397090912,
|
|
"step": 765,
|
|
"valid_targets_mean": 3179.4,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 0.8096740273396424,
|
|
"grad_norm": 0.5585633426392805,
|
|
"learning_rate": 3.997083446322443e-05,
|
|
"loss": 0.1389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13518524169921875,
|
|
"step": 770,
|
|
"valid_targets_mean": 2118.0,
|
|
"valid_targets_min": 495
|
|
},
|
|
{
|
|
"epoch": 0.814931650893796,
|
|
"grad_norm": 0.518219996419831,
|
|
"learning_rate": 3.9967934904656145e-05,
|
|
"loss": 0.1555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13838447630405426,
|
|
"step": 775,
|
|
"valid_targets_mean": 2350.0,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 0.8201892744479495,
|
|
"grad_norm": 0.6963817974170337,
|
|
"learning_rate": 3.9964898076598445e-05,
|
|
"loss": 0.1601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22630974650382996,
|
|
"step": 780,
|
|
"valid_targets_mean": 1728.8,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 0.825446898002103,
|
|
"grad_norm": 0.44668396794098764,
|
|
"learning_rate": 3.996172399992799e-05,
|
|
"loss": 0.1309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15021365880966187,
|
|
"step": 785,
|
|
"valid_targets_mean": 3473.8,
|
|
"valid_targets_min": 1032
|
|
},
|
|
{
|
|
"epoch": 0.8307045215562566,
|
|
"grad_norm": 0.3900514362810737,
|
|
"learning_rate": 3.995841269646496e-05,
|
|
"loss": 0.1123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09701497852802277,
|
|
"step": 790,
|
|
"valid_targets_mean": 3574.1,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 0.8359621451104101,
|
|
"grad_norm": 0.4004405418682501,
|
|
"learning_rate": 3.995496418897291e-05,
|
|
"loss": 0.134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1217922791838646,
|
|
"step": 795,
|
|
"valid_targets_mean": 2909.7,
|
|
"valid_targets_min": 927
|
|
},
|
|
{
|
|
"epoch": 0.8412197686645636,
|
|
"grad_norm": 0.4705994456418147,
|
|
"learning_rate": 3.995137850115856e-05,
|
|
"loss": 0.1691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14017799496650696,
|
|
"step": 800,
|
|
"valid_targets_mean": 3022.2,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 0.8464773922187171,
|
|
"grad_norm": 0.3801804840642882,
|
|
"learning_rate": 3.994765565767174e-05,
|
|
"loss": 0.1125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10342194885015488,
|
|
"step": 805,
|
|
"valid_targets_mean": 2978.2,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 0.8517350157728707,
|
|
"grad_norm": 0.3497300756384891,
|
|
"learning_rate": 3.9943795684105104e-05,
|
|
"loss": 0.1261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10366655886173248,
|
|
"step": 810,
|
|
"valid_targets_mean": 4808.4,
|
|
"valid_targets_min": 976
|
|
},
|
|
{
|
|
"epoch": 0.8569926393270242,
|
|
"grad_norm": 0.36306185739960156,
|
|
"learning_rate": 3.993979860699403e-05,
|
|
"loss": 0.1292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11242718994617462,
|
|
"step": 815,
|
|
"valid_targets_mean": 4306.9,
|
|
"valid_targets_min": 995
|
|
},
|
|
{
|
|
"epoch": 0.8622502628811777,
|
|
"grad_norm": 0.3413807088839418,
|
|
"learning_rate": 3.993566445381641e-05,
|
|
"loss": 0.1077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10108646750450134,
|
|
"step": 820,
|
|
"valid_targets_mean": 4484.6,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 0.8675078864353313,
|
|
"grad_norm": 0.4415224606461962,
|
|
"learning_rate": 3.9931393252992454e-05,
|
|
"loss": 0.1396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1572403758764267,
|
|
"step": 825,
|
|
"valid_targets_mean": 4246.4,
|
|
"valid_targets_min": 2724
|
|
},
|
|
{
|
|
"epoch": 0.8727655099894848,
|
|
"grad_norm": 0.3995550173486753,
|
|
"learning_rate": 3.992698503388453e-05,
|
|
"loss": 0.1173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10369531810283661,
|
|
"step": 830,
|
|
"valid_targets_mean": 2882.8,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 0.8780231335436383,
|
|
"grad_norm": 0.41211727578638874,
|
|
"learning_rate": 3.992243982679691e-05,
|
|
"loss": 0.1599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14552940428256989,
|
|
"step": 835,
|
|
"valid_targets_mean": 2827.2,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 0.8832807570977917,
|
|
"grad_norm": 0.3939868535260792,
|
|
"learning_rate": 3.991775766297562e-05,
|
|
"loss": 0.1485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10490452498197556,
|
|
"step": 840,
|
|
"valid_targets_mean": 3046.0,
|
|
"valid_targets_min": 855
|
|
},
|
|
{
|
|
"epoch": 0.8885383806519453,
|
|
"grad_norm": 0.44835264512341344,
|
|
"learning_rate": 3.991293857460815e-05,
|
|
"loss": 0.126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20958760380744934,
|
|
"step": 845,
|
|
"valid_targets_mean": 2666.1,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 0.8937960042060988,
|
|
"grad_norm": 0.3936824512794017,
|
|
"learning_rate": 3.9907982594823326e-05,
|
|
"loss": 0.1404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12392207235097885,
|
|
"step": 850,
|
|
"valid_targets_mean": 3699.2,
|
|
"valid_targets_min": 1251
|
|
},
|
|
{
|
|
"epoch": 0.8990536277602523,
|
|
"grad_norm": 0.41845825991827296,
|
|
"learning_rate": 3.9902889757691e-05,
|
|
"loss": 0.1274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12586721777915955,
|
|
"step": 855,
|
|
"valid_targets_mean": 2607.3,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 0.9043112513144059,
|
|
"grad_norm": 0.5454890157347261,
|
|
"learning_rate": 3.9897660098221866e-05,
|
|
"loss": 0.1373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14049319922924042,
|
|
"step": 860,
|
|
"valid_targets_mean": 1881.4,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 0.9095688748685594,
|
|
"grad_norm": 0.6780503481583131,
|
|
"learning_rate": 3.98922936523672e-05,
|
|
"loss": 0.1645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21052402257919312,
|
|
"step": 865,
|
|
"valid_targets_mean": 1679.2,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 0.9148264984227129,
|
|
"grad_norm": 0.3689833882584315,
|
|
"learning_rate": 3.9886790457018604e-05,
|
|
"loss": 0.1694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11735684424638748,
|
|
"step": 870,
|
|
"valid_targets_mean": 3019.8,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 0.9200841219768665,
|
|
"grad_norm": 0.37814435403471575,
|
|
"learning_rate": 3.9881150550007776e-05,
|
|
"loss": 0.1383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10002461820840836,
|
|
"step": 875,
|
|
"valid_targets_mean": 3745.3,
|
|
"valid_targets_min": 1041
|
|
},
|
|
{
|
|
"epoch": 0.92534174553102,
|
|
"grad_norm": 0.4516740938999742,
|
|
"learning_rate": 3.987537397010624e-05,
|
|
"loss": 0.1413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13232296705245972,
|
|
"step": 880,
|
|
"valid_targets_mean": 3255.9,
|
|
"valid_targets_min": 676
|
|
},
|
|
{
|
|
"epoch": 0.9305993690851735,
|
|
"grad_norm": 0.47744791805814685,
|
|
"learning_rate": 3.9869460757025064e-05,
|
|
"loss": 0.1253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13045115768909454,
|
|
"step": 885,
|
|
"valid_targets_mean": 2534.0,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 0.935856992639327,
|
|
"grad_norm": 0.3906362600554991,
|
|
"learning_rate": 3.9863410951414616e-05,
|
|
"loss": 0.1256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11122287809848785,
|
|
"step": 890,
|
|
"valid_targets_mean": 2999.9,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 0.9411146161934806,
|
|
"grad_norm": 0.8413867252612346,
|
|
"learning_rate": 3.985722459486425e-05,
|
|
"loss": 0.1259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1752592921257019,
|
|
"step": 895,
|
|
"valid_targets_mean": 1254.4,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 0.9463722397476341,
|
|
"grad_norm": 0.4227153957069782,
|
|
"learning_rate": 3.985090172990206e-05,
|
|
"loss": 0.1517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1223062127828598,
|
|
"step": 900,
|
|
"valid_targets_mean": 3019.1,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 0.9516298633017876,
|
|
"grad_norm": 0.4870668105829589,
|
|
"learning_rate": 3.984444239999455e-05,
|
|
"loss": 0.1169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14176815748214722,
|
|
"step": 905,
|
|
"valid_targets_mean": 2472.1,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 0.9568874868559412,
|
|
"grad_norm": 0.4285690495389545,
|
|
"learning_rate": 3.9837846649546354e-05,
|
|
"loss": 0.124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1291240155696869,
|
|
"step": 910,
|
|
"valid_targets_mean": 3184.0,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 0.9621451104100947,
|
|
"grad_norm": 0.37519230718292484,
|
|
"learning_rate": 3.9831114523899945e-05,
|
|
"loss": 0.1406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12980321049690247,
|
|
"step": 915,
|
|
"valid_targets_mean": 4097.8,
|
|
"valid_targets_min": 1474
|
|
},
|
|
{
|
|
"epoch": 0.9674027339642481,
|
|
"grad_norm": 0.27810477043487897,
|
|
"learning_rate": 3.982424606933529e-05,
|
|
"loss": 0.1106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09453219175338745,
|
|
"step": 920,
|
|
"valid_targets_mean": 5379.9,
|
|
"valid_targets_min": 3677
|
|
},
|
|
{
|
|
"epoch": 0.9726603575184016,
|
|
"grad_norm": 0.3434991396480145,
|
|
"learning_rate": 3.981724133306954e-05,
|
|
"loss": 0.1193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11569681763648987,
|
|
"step": 925,
|
|
"valid_targets_mean": 3455.8,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 0.9779179810725552,
|
|
"grad_norm": 0.37513775382439024,
|
|
"learning_rate": 3.981010036325674e-05,
|
|
"loss": 0.1248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12736296653747559,
|
|
"step": 930,
|
|
"valid_targets_mean": 3888.9,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 0.9831756046267087,
|
|
"grad_norm": 0.465838240040823,
|
|
"learning_rate": 3.980282320898746e-05,
|
|
"loss": 0.237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2295304238796234,
|
|
"step": 935,
|
|
"valid_targets_mean": 2937.6,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 0.9884332281808622,
|
|
"grad_norm": 0.40326534583786583,
|
|
"learning_rate": 3.9795409920288456e-05,
|
|
"loss": 0.1155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11375357955694199,
|
|
"step": 940,
|
|
"valid_targets_mean": 3050.1,
|
|
"valid_targets_min": 941
|
|
},
|
|
{
|
|
"epoch": 0.9936908517350158,
|
|
"grad_norm": 0.409678281306497,
|
|
"learning_rate": 3.978786054812236e-05,
|
|
"loss": 0.1349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12136652320623398,
|
|
"step": 945,
|
|
"valid_targets_mean": 2817.5,
|
|
"valid_targets_min": 878
|
|
},
|
|
{
|
|
"epoch": 0.9989484752891693,
|
|
"grad_norm": 0.5472768786848023,
|
|
"learning_rate": 3.9780175144387304e-05,
|
|
"loss": 0.1584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.258529394865036,
|
|
"step": 950,
|
|
"valid_targets_mean": 2429.1,
|
|
"valid_targets_min": 955
|
|
},
|
|
{
|
|
"epoch": 1.0042060988433228,
|
|
"grad_norm": 0.8869798751645458,
|
|
"learning_rate": 3.977235376191656e-05,
|
|
"loss": 0.262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2949359714984894,
|
|
"step": 955,
|
|
"valid_targets_mean": 1576.1,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 1.0094637223974763,
|
|
"grad_norm": 0.878915468864671,
|
|
"learning_rate": 3.9764396454478195e-05,
|
|
"loss": 0.258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25835198163986206,
|
|
"step": 960,
|
|
"valid_targets_mean": 1537.1,
|
|
"valid_targets_min": 725
|
|
},
|
|
{
|
|
"epoch": 1.0147213459516298,
|
|
"grad_norm": 0.841572126611415,
|
|
"learning_rate": 3.975630327677468e-05,
|
|
"loss": 0.2547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26211017370224,
|
|
"step": 965,
|
|
"valid_targets_mean": 1576.7,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 1.0199789695057835,
|
|
"grad_norm": 0.9031581455432894,
|
|
"learning_rate": 3.974807428444254e-05,
|
|
"loss": 0.2499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25593310594558716,
|
|
"step": 970,
|
|
"valid_targets_mean": 1409.7,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 1.025236593059937,
|
|
"grad_norm": 0.7879468893368036,
|
|
"learning_rate": 3.973970953405195e-05,
|
|
"loss": 0.2455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2508862018585205,
|
|
"step": 975,
|
|
"valid_targets_mean": 1581.3,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 1.0304942166140905,
|
|
"grad_norm": 0.823604647249457,
|
|
"learning_rate": 3.9731209083106354e-05,
|
|
"loss": 0.2525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29526305198669434,
|
|
"step": 980,
|
|
"valid_targets_mean": 1765.4,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 1.035751840168244,
|
|
"grad_norm": 0.788289075689827,
|
|
"learning_rate": 3.972257299004206e-05,
|
|
"loss": 0.237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23224273324012756,
|
|
"step": 985,
|
|
"valid_targets_mean": 1440.2,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 1.0410094637223974,
|
|
"grad_norm": 0.7228954505128617,
|
|
"learning_rate": 3.9713801314227867e-05,
|
|
"loss": 0.2523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32342734932899475,
|
|
"step": 990,
|
|
"valid_targets_mean": 1589.9,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 1.046267087276551,
|
|
"grad_norm": 0.8104565386470577,
|
|
"learning_rate": 3.9704894115964615e-05,
|
|
"loss": 0.2396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25293219089508057,
|
|
"step": 995,
|
|
"valid_targets_mean": 1544.3,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 1.0515247108307044,
|
|
"grad_norm": 0.7039867278511459,
|
|
"learning_rate": 3.9695851456484805e-05,
|
|
"loss": 0.2427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25906291604042053,
|
|
"step": 1000,
|
|
"valid_targets_mean": 1915.3,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 1.0567823343848581,
|
|
"grad_norm": 0.8153535915545935,
|
|
"learning_rate": 3.968667339795218e-05,
|
|
"loss": 0.231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24466818571090698,
|
|
"step": 1005,
|
|
"valid_targets_mean": 1516.1,
|
|
"valid_targets_min": 815
|
|
},
|
|
{
|
|
"epoch": 1.0620399579390116,
|
|
"grad_norm": 0.7463168942397396,
|
|
"learning_rate": 3.9677360003461246e-05,
|
|
"loss": 0.2492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24177688360214233,
|
|
"step": 1010,
|
|
"valid_targets_mean": 1713.7,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 1.0672975814931651,
|
|
"grad_norm": 0.7794388914992689,
|
|
"learning_rate": 3.966791133703691e-05,
|
|
"loss": 0.2329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.230307936668396,
|
|
"step": 1015,
|
|
"valid_targets_mean": 1325.3,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 1.0725552050473186,
|
|
"grad_norm": 0.9178047961430227,
|
|
"learning_rate": 3.965832746363397e-05,
|
|
"loss": 0.2243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23218250274658203,
|
|
"step": 1020,
|
|
"valid_targets_mean": 1349.9,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 1.077812828601472,
|
|
"grad_norm": 0.7978419498391828,
|
|
"learning_rate": 3.964860844913676e-05,
|
|
"loss": 0.2219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2218141257762909,
|
|
"step": 1025,
|
|
"valid_targets_mean": 1355.5,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 1.0830704521556256,
|
|
"grad_norm": 0.7540474715565492,
|
|
"learning_rate": 3.9638754360358585e-05,
|
|
"loss": 0.2361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23646913468837738,
|
|
"step": 1030,
|
|
"valid_targets_mean": 1716.7,
|
|
"valid_targets_min": 911
|
|
},
|
|
{
|
|
"epoch": 1.088328075709779,
|
|
"grad_norm": 1.1981264195677659,
|
|
"learning_rate": 3.962876526504134e-05,
|
|
"loss": 0.2451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23529523611068726,
|
|
"step": 1035,
|
|
"valid_targets_mean": 1278.3,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 1.0935856992639328,
|
|
"grad_norm": 0.8387090304069986,
|
|
"learning_rate": 3.961864123185502e-05,
|
|
"loss": 0.2426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2465025931596756,
|
|
"step": 1040,
|
|
"valid_targets_mean": 1576.7,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 1.0988433228180863,
|
|
"grad_norm": 0.8455985020405359,
|
|
"learning_rate": 3.9608382330397265e-05,
|
|
"loss": 0.2355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23955386877059937,
|
|
"step": 1045,
|
|
"valid_targets_mean": 1733.6,
|
|
"valid_targets_min": 940
|
|
},
|
|
{
|
|
"epoch": 1.1041009463722398,
|
|
"grad_norm": 0.7676754925673644,
|
|
"learning_rate": 3.959798863119284e-05,
|
|
"loss": 0.227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20279844105243683,
|
|
"step": 1050,
|
|
"valid_targets_mean": 1361.2,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 1.1093585699263933,
|
|
"grad_norm": 0.7740129324176807,
|
|
"learning_rate": 3.9587460205693194e-05,
|
|
"loss": 0.2387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2391810417175293,
|
|
"step": 1055,
|
|
"valid_targets_mean": 1457.7,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 1.1146161934805467,
|
|
"grad_norm": 0.88745715053675,
|
|
"learning_rate": 3.9576797126275945e-05,
|
|
"loss": 0.2443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24747875332832336,
|
|
"step": 1060,
|
|
"valid_targets_mean": 1281.0,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 1.1198738170347002,
|
|
"grad_norm": 0.7629569780602973,
|
|
"learning_rate": 3.9565999466244384e-05,
|
|
"loss": 0.2286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22490420937538147,
|
|
"step": 1065,
|
|
"valid_targets_mean": 1466.2,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 1.125131440588854,
|
|
"grad_norm": 0.7914740246261301,
|
|
"learning_rate": 3.955506729982699e-05,
|
|
"loss": 0.2265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23777303099632263,
|
|
"step": 1070,
|
|
"valid_targets_mean": 1515.4,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 1.1303890641430074,
|
|
"grad_norm": 0.6700069466657289,
|
|
"learning_rate": 3.9544000702176896e-05,
|
|
"loss": 0.2243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23064503073692322,
|
|
"step": 1075,
|
|
"valid_targets_mean": 1811.3,
|
|
"valid_targets_min": 563
|
|
},
|
|
{
|
|
"epoch": 1.135646687697161,
|
|
"grad_norm": 0.8263907754698312,
|
|
"learning_rate": 3.953279974937139e-05,
|
|
"loss": 0.2239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23290985822677612,
|
|
"step": 1080,
|
|
"valid_targets_mean": 1495.8,
|
|
"valid_targets_min": 837
|
|
},
|
|
{
|
|
"epoch": 1.1409043112513144,
|
|
"grad_norm": 0.7433952417861344,
|
|
"learning_rate": 3.9521464518411356e-05,
|
|
"loss": 0.2259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22422298789024353,
|
|
"step": 1085,
|
|
"valid_targets_mean": 1653.8,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 1.146161934805468,
|
|
"grad_norm": 0.7292712211069998,
|
|
"learning_rate": 3.950999508722082e-05,
|
|
"loss": 0.2309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19926020503044128,
|
|
"step": 1090,
|
|
"valid_targets_mean": 1380.4,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 1.1514195583596214,
|
|
"grad_norm": 0.7409594041378071,
|
|
"learning_rate": 3.9498391534646325e-05,
|
|
"loss": 0.2193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21704162657260895,
|
|
"step": 1095,
|
|
"valid_targets_mean": 1600.2,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 1.1566771819137749,
|
|
"grad_norm": 0.8349230880619002,
|
|
"learning_rate": 3.948665394045646e-05,
|
|
"loss": 0.2294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23841853439807892,
|
|
"step": 1100,
|
|
"valid_targets_mean": 1447.8,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 1.1619348054679284,
|
|
"grad_norm": 0.7517080365308488,
|
|
"learning_rate": 3.9474782385341255e-05,
|
|
"loss": 0.2306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2518262267112732,
|
|
"step": 1105,
|
|
"valid_targets_mean": 1590.8,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 1.167192429022082,
|
|
"grad_norm": 0.7595939144991899,
|
|
"learning_rate": 3.9462776950911684e-05,
|
|
"loss": 0.2241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22226208448410034,
|
|
"step": 1110,
|
|
"valid_targets_mean": 1486.0,
|
|
"valid_targets_min": 848
|
|
},
|
|
{
|
|
"epoch": 1.1724500525762356,
|
|
"grad_norm": 0.81977601249161,
|
|
"learning_rate": 3.9450637719699046e-05,
|
|
"loss": 0.2177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20779165625572205,
|
|
"step": 1115,
|
|
"valid_targets_mean": 1233.6,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 1.177707676130389,
|
|
"grad_norm": 0.7504295904684078,
|
|
"learning_rate": 3.9438364775154436e-05,
|
|
"loss": 0.2177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20952507853507996,
|
|
"step": 1120,
|
|
"valid_targets_mean": 1408.1,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 1.1829652996845426,
|
|
"grad_norm": 0.7313239983187266,
|
|
"learning_rate": 3.942595820164818e-05,
|
|
"loss": 0.2171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2542307674884796,
|
|
"step": 1125,
|
|
"valid_targets_mean": 1747.6,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 1.188222923238696,
|
|
"grad_norm": 0.7761150837666538,
|
|
"learning_rate": 3.94134180844692e-05,
|
|
"loss": 0.2222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2359396070241928,
|
|
"step": 1130,
|
|
"valid_targets_mean": 1415.1,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 1.1934805467928495,
|
|
"grad_norm": 0.8509889976913751,
|
|
"learning_rate": 3.940074450982449e-05,
|
|
"loss": 0.2186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22314172983169556,
|
|
"step": 1135,
|
|
"valid_targets_mean": 1241.3,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 1.1987381703470033,
|
|
"grad_norm": 0.7953065055183305,
|
|
"learning_rate": 3.93879375648385e-05,
|
|
"loss": 0.2212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2226504534482956,
|
|
"step": 1140,
|
|
"valid_targets_mean": 1566.1,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 1.2039957939011567,
|
|
"grad_norm": 0.7779219592633028,
|
|
"learning_rate": 3.9374997337552496e-05,
|
|
"loss": 0.2237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2328607589006424,
|
|
"step": 1145,
|
|
"valid_targets_mean": 1655.8,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 1.2092534174553102,
|
|
"grad_norm": 0.9610600652279467,
|
|
"learning_rate": 3.936192391692404e-05,
|
|
"loss": 0.2267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20716674625873566,
|
|
"step": 1150,
|
|
"valid_targets_mean": 1325.1,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 1.2145110410094637,
|
|
"grad_norm": 0.7570031091587086,
|
|
"learning_rate": 3.9348717392826306e-05,
|
|
"loss": 0.2148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24780115485191345,
|
|
"step": 1155,
|
|
"valid_targets_mean": 1859.9,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 1.2197686645636172,
|
|
"grad_norm": 0.7619735134338017,
|
|
"learning_rate": 3.933537785604748e-05,
|
|
"loss": 0.2335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24546000361442566,
|
|
"step": 1160,
|
|
"valid_targets_mean": 1715.2,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 1.2250262881177707,
|
|
"grad_norm": 0.7201024650567306,
|
|
"learning_rate": 3.932190539829018e-05,
|
|
"loss": 0.2184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22544372081756592,
|
|
"step": 1165,
|
|
"valid_targets_mean": 1776.1,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 1.2302839116719242,
|
|
"grad_norm": 0.7263537229376624,
|
|
"learning_rate": 3.9308300112170735e-05,
|
|
"loss": 0.2337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19575223326683044,
|
|
"step": 1170,
|
|
"valid_targets_mean": 1405.6,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 1.235541535226078,
|
|
"grad_norm": 0.8579122640367931,
|
|
"learning_rate": 3.929456209121865e-05,
|
|
"loss": 0.2159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21605601906776428,
|
|
"step": 1175,
|
|
"valid_targets_mean": 1445.6,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 1.2407991587802314,
|
|
"grad_norm": 0.8132894541239973,
|
|
"learning_rate": 3.928069142987589e-05,
|
|
"loss": 0.2225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2238626778125763,
|
|
"step": 1180,
|
|
"valid_targets_mean": 1320.9,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 1.2460567823343849,
|
|
"grad_norm": 0.8264251778813105,
|
|
"learning_rate": 3.926668822349625e-05,
|
|
"loss": 0.2242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23769277334213257,
|
|
"step": 1185,
|
|
"valid_targets_mean": 1461.5,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 1.2513144058885384,
|
|
"grad_norm": 0.8544823460426906,
|
|
"learning_rate": 3.925255256834474e-05,
|
|
"loss": 0.209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21869561076164246,
|
|
"step": 1190,
|
|
"valid_targets_mean": 1330.2,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 1.2565720294426919,
|
|
"grad_norm": 0.8468093850705543,
|
|
"learning_rate": 3.923828456159685e-05,
|
|
"loss": 0.214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21936550736427307,
|
|
"step": 1195,
|
|
"valid_targets_mean": 1413.0,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 1.2618296529968454,
|
|
"grad_norm": 0.7583120584384649,
|
|
"learning_rate": 3.922388430133793e-05,
|
|
"loss": 0.2192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21635094285011292,
|
|
"step": 1200,
|
|
"valid_targets_mean": 1483.2,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 1.267087276550999,
|
|
"grad_norm": 0.8225959765510902,
|
|
"learning_rate": 3.9209351886562535e-05,
|
|
"loss": 0.2232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23493990302085876,
|
|
"step": 1205,
|
|
"valid_targets_mean": 1516.4,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 1.2723449001051526,
|
|
"grad_norm": 0.7504836784879231,
|
|
"learning_rate": 3.919468741717367e-05,
|
|
"loss": 0.218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20039993524551392,
|
|
"step": 1210,
|
|
"valid_targets_mean": 1541.4,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 1.277602523659306,
|
|
"grad_norm": 0.8511947034089659,
|
|
"learning_rate": 3.9179890993982186e-05,
|
|
"loss": 0.2142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2272759974002838,
|
|
"step": 1215,
|
|
"valid_targets_mean": 1610.9,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 1.2828601472134595,
|
|
"grad_norm": 0.832455549139663,
|
|
"learning_rate": 3.916496271870603e-05,
|
|
"loss": 0.2199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24332864582538605,
|
|
"step": 1220,
|
|
"valid_targets_mean": 1571.8,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 1.288117770767613,
|
|
"grad_norm": 0.7044386170169681,
|
|
"learning_rate": 3.914990269396957e-05,
|
|
"loss": 0.2139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20751051604747772,
|
|
"step": 1225,
|
|
"valid_targets_mean": 1560.8,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 1.2933753943217665,
|
|
"grad_norm": 0.8464901864200083,
|
|
"learning_rate": 3.913471102330288e-05,
|
|
"loss": 0.2105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21428729593753815,
|
|
"step": 1230,
|
|
"valid_targets_mean": 1341.3,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 1.29863301787592,
|
|
"grad_norm": 0.7188817616548374,
|
|
"learning_rate": 3.911938781114105e-05,
|
|
"loss": 0.2149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19710156321525574,
|
|
"step": 1235,
|
|
"valid_targets_mean": 1448.0,
|
|
"valid_targets_min": 698
|
|
},
|
|
{
|
|
"epoch": 1.3038906414300735,
|
|
"grad_norm": 0.8802628923750944,
|
|
"learning_rate": 3.910393316282345e-05,
|
|
"loss": 0.2012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20695164799690247,
|
|
"step": 1240,
|
|
"valid_targets_mean": 1379.1,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 1.3091482649842272,
|
|
"grad_norm": 0.7828172742417011,
|
|
"learning_rate": 3.9088347184592974e-05,
|
|
"loss": 0.2148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2368948757648468,
|
|
"step": 1245,
|
|
"valid_targets_mean": 1699.4,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 1.3144058885383807,
|
|
"grad_norm": 0.7538652735215456,
|
|
"learning_rate": 3.907262998359539e-05,
|
|
"loss": 0.2115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2101295441389084,
|
|
"step": 1250,
|
|
"valid_targets_mean": 1718.9,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 1.3196635120925342,
|
|
"grad_norm": 0.7619261257748762,
|
|
"learning_rate": 3.905678166787852e-05,
|
|
"loss": 0.2065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22127626836299896,
|
|
"step": 1255,
|
|
"valid_targets_mean": 1481.1,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 1.3249211356466877,
|
|
"grad_norm": 0.7763306255618148,
|
|
"learning_rate": 3.9040802346391555e-05,
|
|
"loss": 0.2153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2114461064338684,
|
|
"step": 1260,
|
|
"valid_targets_mean": 1562.9,
|
|
"valid_targets_min": 855
|
|
},
|
|
{
|
|
"epoch": 1.3301787592008412,
|
|
"grad_norm": 0.7539205539627757,
|
|
"learning_rate": 3.902469212898427e-05,
|
|
"loss": 0.2145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2315191626548767,
|
|
"step": 1265,
|
|
"valid_targets_mean": 1583.2,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 1.3354363827549949,
|
|
"grad_norm": 0.6737336971557036,
|
|
"learning_rate": 3.900845112640631e-05,
|
|
"loss": 0.2134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20278924703598022,
|
|
"step": 1270,
|
|
"valid_targets_mean": 1613.6,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 1.3406940063091484,
|
|
"grad_norm": 0.8831041851437693,
|
|
"learning_rate": 3.8992079450306355e-05,
|
|
"loss": 0.2175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22584810853004456,
|
|
"step": 1275,
|
|
"valid_targets_mean": 1279.4,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 1.3459516298633019,
|
|
"grad_norm": 0.7802323334115124,
|
|
"learning_rate": 3.897557721323145e-05,
|
|
"loss": 0.2083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21834111213684082,
|
|
"step": 1280,
|
|
"valid_targets_mean": 1321.2,
|
|
"valid_targets_min": 244
|
|
},
|
|
{
|
|
"epoch": 1.3512092534174553,
|
|
"grad_norm": 0.8212012968134834,
|
|
"learning_rate": 3.895894452862614e-05,
|
|
"loss": 0.2242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22549492120742798,
|
|
"step": 1285,
|
|
"valid_targets_mean": 1293.1,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 1.3564668769716088,
|
|
"grad_norm": 0.7917399078575781,
|
|
"learning_rate": 3.894218151083176e-05,
|
|
"loss": 0.2124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20146942138671875,
|
|
"step": 1290,
|
|
"valid_targets_mean": 1459.4,
|
|
"valid_targets_min": 585
|
|
},
|
|
{
|
|
"epoch": 1.3617245005257623,
|
|
"grad_norm": 0.7930453318217562,
|
|
"learning_rate": 3.892528827508562e-05,
|
|
"loss": 0.2299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23862096667289734,
|
|
"step": 1295,
|
|
"valid_targets_mean": 1683.5,
|
|
"valid_targets_min": 1048
|
|
},
|
|
{
|
|
"epoch": 1.3669821240799158,
|
|
"grad_norm": 0.6839803408958032,
|
|
"learning_rate": 3.890826493752018e-05,
|
|
"loss": 0.2106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19739103317260742,
|
|
"step": 1300,
|
|
"valid_targets_mean": 1473.7,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 1.3722397476340693,
|
|
"grad_norm": 0.7396865012322326,
|
|
"learning_rate": 3.8891111615162314e-05,
|
|
"loss": 0.2175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19474495947360992,
|
|
"step": 1305,
|
|
"valid_targets_mean": 1436.2,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 1.3774973711882228,
|
|
"grad_norm": 0.7133519470993522,
|
|
"learning_rate": 3.8873828425932486e-05,
|
|
"loss": 0.2099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21923986077308655,
|
|
"step": 1310,
|
|
"valid_targets_mean": 1605.6,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 1.3827549947423765,
|
|
"grad_norm": 0.9644653064060893,
|
|
"learning_rate": 3.8856415488643885e-05,
|
|
"loss": 0.2125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21021606028079987,
|
|
"step": 1315,
|
|
"valid_targets_mean": 1340.1,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 1.38801261829653,
|
|
"grad_norm": 0.76524442768024,
|
|
"learning_rate": 3.88388729230017e-05,
|
|
"loss": 0.2165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2201576828956604,
|
|
"step": 1320,
|
|
"valid_targets_mean": 1546.2,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 1.3932702418506835,
|
|
"grad_norm": 0.6325782587542048,
|
|
"learning_rate": 3.8821200849602215e-05,
|
|
"loss": 0.2074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2096240073442459,
|
|
"step": 1325,
|
|
"valid_targets_mean": 1788.0,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 1.398527865404837,
|
|
"grad_norm": 0.7552381469693424,
|
|
"learning_rate": 3.880339938993204e-05,
|
|
"loss": 0.2046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21048465371131897,
|
|
"step": 1330,
|
|
"valid_targets_mean": 1405.9,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 1.4037854889589905,
|
|
"grad_norm": 0.9598153253569314,
|
|
"learning_rate": 3.878546866636724e-05,
|
|
"loss": 0.1995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21250438690185547,
|
|
"step": 1335,
|
|
"valid_targets_mean": 1576.6,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 1.4090431125131442,
|
|
"grad_norm": 0.7623950015764874,
|
|
"learning_rate": 3.876740880217248e-05,
|
|
"loss": 0.2218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20306065678596497,
|
|
"step": 1340,
|
|
"valid_targets_mean": 1605.6,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 1.4143007360672977,
|
|
"grad_norm": 0.6868132325157823,
|
|
"learning_rate": 3.874921992150026e-05,
|
|
"loss": 0.2084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20207521319389343,
|
|
"step": 1345,
|
|
"valid_targets_mean": 1734.6,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 1.4195583596214512,
|
|
"grad_norm": 0.7260563750450043,
|
|
"learning_rate": 3.873090214938994e-05,
|
|
"loss": 0.201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21663132309913635,
|
|
"step": 1350,
|
|
"valid_targets_mean": 1587.2,
|
|
"valid_targets_min": 1047
|
|
},
|
|
{
|
|
"epoch": 1.4248159831756047,
|
|
"grad_norm": 0.7455453060468663,
|
|
"learning_rate": 3.871245561176698e-05,
|
|
"loss": 0.2009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22021172940731049,
|
|
"step": 1355,
|
|
"valid_targets_mean": 1558.6,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 1.4300736067297581,
|
|
"grad_norm": 0.7186863025208695,
|
|
"learning_rate": 3.869388043544204e-05,
|
|
"loss": 0.1913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20322775840759277,
|
|
"step": 1360,
|
|
"valid_targets_mean": 1574.2,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 1.4353312302839116,
|
|
"grad_norm": 0.7749435575552408,
|
|
"learning_rate": 3.8675176748110076e-05,
|
|
"loss": 0.2093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20808576047420502,
|
|
"step": 1365,
|
|
"valid_targets_mean": 1458.4,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 1.4405888538380651,
|
|
"grad_norm": 0.7473803573920585,
|
|
"learning_rate": 3.865634467834953e-05,
|
|
"loss": 0.208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22573482990264893,
|
|
"step": 1370,
|
|
"valid_targets_mean": 1713.8,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 1.4458464773922186,
|
|
"grad_norm": 0.8618415659674591,
|
|
"learning_rate": 3.863738435562139e-05,
|
|
"loss": 0.2063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1910347044467926,
|
|
"step": 1375,
|
|
"valid_targets_mean": 1463.6,
|
|
"valid_targets_min": 847
|
|
},
|
|
{
|
|
"epoch": 1.4511041009463723,
|
|
"grad_norm": 0.8170056621857298,
|
|
"learning_rate": 3.8618295910268316e-05,
|
|
"loss": 0.2082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20702435076236725,
|
|
"step": 1380,
|
|
"valid_targets_mean": 1666.8,
|
|
"valid_targets_min": 1280
|
|
},
|
|
{
|
|
"epoch": 1.4563617245005258,
|
|
"grad_norm": 0.7023515013028406,
|
|
"learning_rate": 3.859907947351374e-05,
|
|
"loss": 0.2089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21213456988334656,
|
|
"step": 1385,
|
|
"valid_targets_mean": 1611.5,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 1.4616193480546793,
|
|
"grad_norm": 0.6669175601038381,
|
|
"learning_rate": 3.8579735177460994e-05,
|
|
"loss": 0.2149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2246095836162567,
|
|
"step": 1390,
|
|
"valid_targets_mean": 1812.6,
|
|
"valid_targets_min": 918
|
|
},
|
|
{
|
|
"epoch": 1.4668769716088328,
|
|
"grad_norm": 0.5769109450987918,
|
|
"learning_rate": 3.856026315509236e-05,
|
|
"loss": 0.1806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12236759811639786,
|
|
"step": 1395,
|
|
"valid_targets_mean": 3127.1,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 1.4721345951629863,
|
|
"grad_norm": 0.38077630807651636,
|
|
"learning_rate": 3.8540663540268175e-05,
|
|
"loss": 0.1364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11780562996864319,
|
|
"step": 1400,
|
|
"valid_targets_mean": 3549.7,
|
|
"valid_targets_min": 1024
|
|
},
|
|
{
|
|
"epoch": 1.4773922187171398,
|
|
"grad_norm": 0.35371933040544784,
|
|
"learning_rate": 3.852093646772592e-05,
|
|
"loss": 0.1175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0848422721028328,
|
|
"step": 1405,
|
|
"valid_targets_mean": 3607.6,
|
|
"valid_targets_min": 2777
|
|
},
|
|
{
|
|
"epoch": 1.4826498422712935,
|
|
"grad_norm": 0.4340521275453412,
|
|
"learning_rate": 3.850108207307927e-05,
|
|
"loss": 0.1378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14629028737545013,
|
|
"step": 1410,
|
|
"valid_targets_mean": 3433.5,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 1.487907465825447,
|
|
"grad_norm": 0.43194201643869023,
|
|
"learning_rate": 3.848110049281719e-05,
|
|
"loss": 0.1152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1387070119380951,
|
|
"step": 1415,
|
|
"valid_targets_mean": 2446.1,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 1.4931650893796005,
|
|
"grad_norm": 0.4587451848968233,
|
|
"learning_rate": 3.846099186430297e-05,
|
|
"loss": 0.123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13479407131671906,
|
|
"step": 1420,
|
|
"valid_targets_mean": 2739.4,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 1.498422712933754,
|
|
"grad_norm": 0.7533969406924347,
|
|
"learning_rate": 3.8440756325773296e-05,
|
|
"loss": 0.1913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19724354147911072,
|
|
"step": 1425,
|
|
"valid_targets_mean": 1537.3,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 1.5036803364879074,
|
|
"grad_norm": 0.4415590187724359,
|
|
"learning_rate": 3.84203940163373e-05,
|
|
"loss": 0.1188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11975380778312683,
|
|
"step": 1430,
|
|
"valid_targets_mean": 3057.1,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 1.508937960042061,
|
|
"grad_norm": 0.532020727469553,
|
|
"learning_rate": 3.83999050759756e-05,
|
|
"loss": 0.1426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1732349991798401,
|
|
"step": 1435,
|
|
"valid_targets_mean": 2232.5,
|
|
"valid_targets_min": 883
|
|
},
|
|
{
|
|
"epoch": 1.5141955835962144,
|
|
"grad_norm": 0.2684342128074378,
|
|
"learning_rate": 3.837928964553933e-05,
|
|
"loss": 0.1438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08347544074058533,
|
|
"step": 1440,
|
|
"valid_targets_mean": 5158.4,
|
|
"valid_targets_min": 3285
|
|
},
|
|
{
|
|
"epoch": 1.519453207150368,
|
|
"grad_norm": 0.5168820362972649,
|
|
"learning_rate": 3.835854786674918e-05,
|
|
"loss": 0.1117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09910877048969269,
|
|
"step": 1445,
|
|
"valid_targets_mean": 3384.3,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 1.5247108307045214,
|
|
"grad_norm": 0.40504850236739504,
|
|
"learning_rate": 3.8337679882194443e-05,
|
|
"loss": 0.118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1263246089220047,
|
|
"step": 1450,
|
|
"valid_targets_mean": 3542.2,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 1.5299684542586751,
|
|
"grad_norm": 0.5621595744994061,
|
|
"learning_rate": 3.8316685835331984e-05,
|
|
"loss": 0.1176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13412566483020782,
|
|
"step": 1455,
|
|
"valid_targets_mean": 1996.5,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 1.5352260778128286,
|
|
"grad_norm": 0.3082212312592102,
|
|
"learning_rate": 3.8295565870485295e-05,
|
|
"loss": 0.1598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0963553711771965,
|
|
"step": 1460,
|
|
"valid_targets_mean": 4156.0,
|
|
"valid_targets_min": 1961
|
|
},
|
|
{
|
|
"epoch": 1.540483701366982,
|
|
"grad_norm": 0.33766667602063294,
|
|
"learning_rate": 3.827432013284349e-05,
|
|
"loss": 0.1087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11290542781352997,
|
|
"step": 1465,
|
|
"valid_targets_mean": 3535.2,
|
|
"valid_targets_min": 1049
|
|
},
|
|
{
|
|
"epoch": 1.5457413249211358,
|
|
"grad_norm": 0.34872520309057936,
|
|
"learning_rate": 3.825294876846031e-05,
|
|
"loss": 0.1268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0966203510761261,
|
|
"step": 1470,
|
|
"valid_targets_mean": 2985.7,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 1.5509989484752893,
|
|
"grad_norm": 0.367393613733167,
|
|
"learning_rate": 3.823145192425313e-05,
|
|
"loss": 0.1,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1047491729259491,
|
|
"step": 1475,
|
|
"valid_targets_mean": 3023.9,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 1.5562565720294428,
|
|
"grad_norm": 0.6604084615314401,
|
|
"learning_rate": 3.8209829748001894e-05,
|
|
"loss": 0.1341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13880418241024017,
|
|
"step": 1480,
|
|
"valid_targets_mean": 1272.2,
|
|
"valid_targets_min": 515
|
|
},
|
|
{
|
|
"epoch": 1.5615141955835963,
|
|
"grad_norm": 0.4529268402014723,
|
|
"learning_rate": 3.8188082388348186e-05,
|
|
"loss": 0.1313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11649923026561737,
|
|
"step": 1485,
|
|
"valid_targets_mean": 2375.1,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 1.5667718191377498,
|
|
"grad_norm": 0.4727596426144311,
|
|
"learning_rate": 3.816620999479413e-05,
|
|
"loss": 0.1358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13613563776016235,
|
|
"step": 1490,
|
|
"valid_targets_mean": 3174.8,
|
|
"valid_targets_min": 1086
|
|
},
|
|
{
|
|
"epoch": 1.5720294426919033,
|
|
"grad_norm": 0.4114679133248355,
|
|
"learning_rate": 3.8144212717701424e-05,
|
|
"loss": 0.1369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14915388822555542,
|
|
"step": 1495,
|
|
"valid_targets_mean": 2903.8,
|
|
"valid_targets_min": 511
|
|
},
|
|
{
|
|
"epoch": 1.5772870662460567,
|
|
"grad_norm": 0.4556729145812952,
|
|
"learning_rate": 3.812209070829025e-05,
|
|
"loss": 0.1754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12058291584253311,
|
|
"step": 1500,
|
|
"valid_targets_mean": 3491.4,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 1.5825446898002102,
|
|
"grad_norm": 0.38678974999687793,
|
|
"learning_rate": 3.809984411863828e-05,
|
|
"loss": 0.109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11663035303354263,
|
|
"step": 1505,
|
|
"valid_targets_mean": 3222.4,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 1.5878023133543637,
|
|
"grad_norm": 0.6161590845052197,
|
|
"learning_rate": 3.80774731016796e-05,
|
|
"loss": 0.1376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14975017309188843,
|
|
"step": 1510,
|
|
"valid_targets_mean": 1370.8,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 1.5930599369085172,
|
|
"grad_norm": 0.5755669440762874,
|
|
"learning_rate": 3.805497781120369e-05,
|
|
"loss": 0.1563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16455599665641785,
|
|
"step": 1515,
|
|
"valid_targets_mean": 1732.1,
|
|
"valid_targets_min": 323
|
|
},
|
|
{
|
|
"epoch": 1.598317560462671,
|
|
"grad_norm": 0.34685314319658894,
|
|
"learning_rate": 3.8032358401854315e-05,
|
|
"loss": 0.1299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11952777206897736,
|
|
"step": 1520,
|
|
"valid_targets_mean": 3875.1,
|
|
"valid_targets_min": 2827
|
|
},
|
|
{
|
|
"epoch": 1.6035751840168244,
|
|
"grad_norm": 0.4769783021761849,
|
|
"learning_rate": 3.800961502912854e-05,
|
|
"loss": 0.2038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30816492438316345,
|
|
"step": 1525,
|
|
"valid_targets_mean": 3032.8,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 1.608832807570978,
|
|
"grad_norm": 0.4270619949322412,
|
|
"learning_rate": 3.798674784937557e-05,
|
|
"loss": 0.2697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11716529726982117,
|
|
"step": 1530,
|
|
"valid_targets_mean": 3728.1,
|
|
"valid_targets_min": 1932
|
|
},
|
|
{
|
|
"epoch": 1.6140904311251314,
|
|
"grad_norm": 0.5293141403191116,
|
|
"learning_rate": 3.7963757019795756e-05,
|
|
"loss": 0.1382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14936235547065735,
|
|
"step": 1535,
|
|
"valid_targets_mean": 2361.8,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 1.619348054679285,
|
|
"grad_norm": 0.5621738528140149,
|
|
"learning_rate": 3.794064269843946e-05,
|
|
"loss": 0.1541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.263014554977417,
|
|
"step": 1540,
|
|
"valid_targets_mean": 2741.1,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 1.6246056782334386,
|
|
"grad_norm": 0.3137718007315352,
|
|
"learning_rate": 3.791740504420599e-05,
|
|
"loss": 0.1986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09868137538433075,
|
|
"step": 1545,
|
|
"valid_targets_mean": 4440.7,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 1.629863301787592,
|
|
"grad_norm": 0.36903820234668044,
|
|
"learning_rate": 3.789404421684251e-05,
|
|
"loss": 0.1106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16915813088417053,
|
|
"step": 1550,
|
|
"valid_targets_mean": 4360.9,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 1.6351209253417456,
|
|
"grad_norm": 0.3761959595802902,
|
|
"learning_rate": 3.787056037694293e-05,
|
|
"loss": 0.123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13259699940681458,
|
|
"step": 1555,
|
|
"valid_targets_mean": 4034.8,
|
|
"valid_targets_min": 513
|
|
},
|
|
{
|
|
"epoch": 1.640378548895899,
|
|
"grad_norm": 0.3027133757228649,
|
|
"learning_rate": 3.784695368594682e-05,
|
|
"loss": 0.1213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08935047686100006,
|
|
"step": 1560,
|
|
"valid_targets_mean": 3534.8,
|
|
"valid_targets_min": 970
|
|
},
|
|
{
|
|
"epoch": 1.6456361724500526,
|
|
"grad_norm": 0.32760623697392594,
|
|
"learning_rate": 3.782322430613828e-05,
|
|
"loss": 0.1114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10545256733894348,
|
|
"step": 1565,
|
|
"valid_targets_mean": 3806.2,
|
|
"valid_targets_min": 748
|
|
},
|
|
{
|
|
"epoch": 1.650893796004206,
|
|
"grad_norm": 0.3884116537234466,
|
|
"learning_rate": 3.779937240064484e-05,
|
|
"loss": 0.1121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12552164494991302,
|
|
"step": 1570,
|
|
"valid_targets_mean": 3536.1,
|
|
"valid_targets_min": 1873
|
|
},
|
|
{
|
|
"epoch": 1.6561514195583595,
|
|
"grad_norm": 0.4868761468099754,
|
|
"learning_rate": 3.777539813343634e-05,
|
|
"loss": 0.1158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13782522082328796,
|
|
"step": 1575,
|
|
"valid_targets_mean": 2808.6,
|
|
"valid_targets_min": 1939
|
|
},
|
|
{
|
|
"epoch": 1.661409043112513,
|
|
"grad_norm": 0.4119111639303862,
|
|
"learning_rate": 3.7751301669323776e-05,
|
|
"loss": 0.0989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1060827299952507,
|
|
"step": 1580,
|
|
"valid_targets_mean": 2556.4,
|
|
"valid_targets_min": 519
|
|
},
|
|
{
|
|
"epoch": 1.6666666666666665,
|
|
"grad_norm": 0.4784602722823778,
|
|
"learning_rate": 3.772708317395818e-05,
|
|
"loss": 0.1739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14642825722694397,
|
|
"step": 1585,
|
|
"valid_targets_mean": 2305.1,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 1.6719242902208202,
|
|
"grad_norm": 0.39746257174216554,
|
|
"learning_rate": 3.770274281382952e-05,
|
|
"loss": 0.091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09272044897079468,
|
|
"step": 1590,
|
|
"valid_targets_mean": 2936.2,
|
|
"valid_targets_min": 472
|
|
},
|
|
{
|
|
"epoch": 1.6771819137749737,
|
|
"grad_norm": 0.3944235961948414,
|
|
"learning_rate": 3.767828075626551e-05,
|
|
"loss": 0.1056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09672108292579651,
|
|
"step": 1595,
|
|
"valid_targets_mean": 2775.9,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 1.6824395373291272,
|
|
"grad_norm": 0.37014373898998776,
|
|
"learning_rate": 3.7653697169430456e-05,
|
|
"loss": 0.1,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10415723919868469,
|
|
"step": 1600,
|
|
"valid_targets_mean": 3847.4,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 1.687697160883281,
|
|
"grad_norm": 0.4384129522035598,
|
|
"learning_rate": 3.762899222232413e-05,
|
|
"loss": 0.1216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18789973855018616,
|
|
"step": 1605,
|
|
"valid_targets_mean": 2382.8,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 1.6929547844374344,
|
|
"grad_norm": 0.3349104796076761,
|
|
"learning_rate": 3.760416608478061e-05,
|
|
"loss": 0.1492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09439995139837265,
|
|
"step": 1610,
|
|
"valid_targets_mean": 3367.9,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 1.698212407991588,
|
|
"grad_norm": 0.5162761958781051,
|
|
"learning_rate": 3.7579218927467044e-05,
|
|
"loss": 0.1422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13445918262004852,
|
|
"step": 1615,
|
|
"valid_targets_mean": 1583.1,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 1.7034700315457414,
|
|
"grad_norm": 0.6725188940138219,
|
|
"learning_rate": 3.7554150921882596e-05,
|
|
"loss": 0.1321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.184066504240036,
|
|
"step": 1620,
|
|
"valid_targets_mean": 1594.0,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 1.7087276550998949,
|
|
"grad_norm": 0.4638219043357721,
|
|
"learning_rate": 3.752896224035716e-05,
|
|
"loss": 0.1404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14246805012226105,
|
|
"step": 1625,
|
|
"valid_targets_mean": 1982.2,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 1.7139852786540484,
|
|
"grad_norm": 0.3837933566986813,
|
|
"learning_rate": 3.750365305605024e-05,
|
|
"loss": 0.1587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10711023211479187,
|
|
"step": 1630,
|
|
"valid_targets_mean": 3892.2,
|
|
"valid_targets_min": 2440
|
|
},
|
|
{
|
|
"epoch": 1.7192429022082019,
|
|
"grad_norm": 0.4168973977243463,
|
|
"learning_rate": 3.7478223542949704e-05,
|
|
"loss": 0.1191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1168304830789566,
|
|
"step": 1635,
|
|
"valid_targets_mean": 2770.6,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 1.7245005257623554,
|
|
"grad_norm": 0.4833222803482408,
|
|
"learning_rate": 3.745267387587065e-05,
|
|
"loss": 0.145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1150302141904831,
|
|
"step": 1640,
|
|
"valid_targets_mean": 3325.8,
|
|
"valid_targets_min": 755
|
|
},
|
|
{
|
|
"epoch": 1.7297581493165088,
|
|
"grad_norm": 0.42439987683019614,
|
|
"learning_rate": 3.742700423045416e-05,
|
|
"loss": 0.1284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11056456714868546,
|
|
"step": 1645,
|
|
"valid_targets_mean": 1951.9,
|
|
"valid_targets_min": 485
|
|
},
|
|
{
|
|
"epoch": 1.7350157728706623,
|
|
"grad_norm": 0.45306162880433826,
|
|
"learning_rate": 3.7401214783166116e-05,
|
|
"loss": 0.1154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15018393099308014,
|
|
"step": 1650,
|
|
"valid_targets_mean": 3283.8,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 1.7402733964248158,
|
|
"grad_norm": 0.31371898272324233,
|
|
"learning_rate": 3.737530571129596e-05,
|
|
"loss": 0.104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09127908945083618,
|
|
"step": 1655,
|
|
"valid_targets_mean": 4175.6,
|
|
"valid_targets_min": 848
|
|
},
|
|
{
|
|
"epoch": 1.7455310199789695,
|
|
"grad_norm": 0.32930513404044526,
|
|
"learning_rate": 3.734927719295551e-05,
|
|
"loss": 0.1063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09676264226436615,
|
|
"step": 1660,
|
|
"valid_targets_mean": 3676.2,
|
|
"valid_targets_min": 884
|
|
},
|
|
{
|
|
"epoch": 1.750788643533123,
|
|
"grad_norm": 0.34745180374694185,
|
|
"learning_rate": 3.732312940707772e-05,
|
|
"loss": 0.1079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10899867117404938,
|
|
"step": 1665,
|
|
"valid_targets_mean": 3627.8,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 1.7560462670872765,
|
|
"grad_norm": 0.36018071265294194,
|
|
"learning_rate": 3.729686253341543e-05,
|
|
"loss": 0.1234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1398889720439911,
|
|
"step": 1670,
|
|
"valid_targets_mean": 2844.2,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 1.7613038906414302,
|
|
"grad_norm": 0.41573199528414245,
|
|
"learning_rate": 3.7270476752540163e-05,
|
|
"loss": 0.1224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12121666967868805,
|
|
"step": 1675,
|
|
"valid_targets_mean": 3541.2,
|
|
"valid_targets_min": 1814
|
|
},
|
|
{
|
|
"epoch": 1.7665615141955837,
|
|
"grad_norm": 0.45247583140007264,
|
|
"learning_rate": 3.724397224584086e-05,
|
|
"loss": 0.1303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15463131666183472,
|
|
"step": 1680,
|
|
"valid_targets_mean": 2889.8,
|
|
"valid_targets_min": 931
|
|
},
|
|
{
|
|
"epoch": 1.7718191377497372,
|
|
"grad_norm": 0.3435107076206861,
|
|
"learning_rate": 3.7217349195522656e-05,
|
|
"loss": 0.097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09973950684070587,
|
|
"step": 1685,
|
|
"valid_targets_mean": 3737.8,
|
|
"valid_targets_min": 2695
|
|
},
|
|
{
|
|
"epoch": 1.7770767613038907,
|
|
"grad_norm": 0.4872381123497153,
|
|
"learning_rate": 3.7190607784605604e-05,
|
|
"loss": 0.1191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1385570913553238,
|
|
"step": 1690,
|
|
"valid_targets_mean": 1918.2,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 1.7823343848580442,
|
|
"grad_norm": 0.35320585242071134,
|
|
"learning_rate": 3.716374819692341e-05,
|
|
"loss": 0.1201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10413797199726105,
|
|
"step": 1695,
|
|
"valid_targets_mean": 3263.6,
|
|
"valid_targets_min": 1584
|
|
},
|
|
{
|
|
"epoch": 1.7875920084121977,
|
|
"grad_norm": 0.3438644233389937,
|
|
"learning_rate": 3.713677061712223e-05,
|
|
"loss": 0.0947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10569068044424057,
|
|
"step": 1700,
|
|
"valid_targets_mean": 3247.5,
|
|
"valid_targets_min": 1003
|
|
},
|
|
{
|
|
"epoch": 1.7928496319663512,
|
|
"grad_norm": 0.36334760630074076,
|
|
"learning_rate": 3.7109675230659316e-05,
|
|
"loss": 0.122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10258142650127411,
|
|
"step": 1705,
|
|
"valid_targets_mean": 2887.5,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 1.7981072555205047,
|
|
"grad_norm": 0.38849288393029024,
|
|
"learning_rate": 3.7082462223801784e-05,
|
|
"loss": 0.1173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10438039898872375,
|
|
"step": 1710,
|
|
"valid_targets_mean": 3249.1,
|
|
"valid_targets_min": 1003
|
|
},
|
|
{
|
|
"epoch": 1.8033648790746581,
|
|
"grad_norm": 0.4553296580473414,
|
|
"learning_rate": 3.7055131783625364e-05,
|
|
"loss": 0.1397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11888349801301956,
|
|
"step": 1715,
|
|
"valid_targets_mean": 2108.2,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 1.8086225026288116,
|
|
"grad_norm": 0.39704942824372713,
|
|
"learning_rate": 3.702768409801304e-05,
|
|
"loss": 0.1155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10550612956285477,
|
|
"step": 1720,
|
|
"valid_targets_mean": 2457.1,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 1.8138801261829653,
|
|
"grad_norm": 0.5298613398346509,
|
|
"learning_rate": 3.700011935565384e-05,
|
|
"loss": 0.1315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1246199756860733,
|
|
"step": 1725,
|
|
"valid_targets_mean": 1751.3,
|
|
"valid_targets_min": 829
|
|
},
|
|
{
|
|
"epoch": 1.8191377497371188,
|
|
"grad_norm": 0.4953597273119919,
|
|
"learning_rate": 3.697243774604145e-05,
|
|
"loss": 0.1229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12704262137413025,
|
|
"step": 1730,
|
|
"valid_targets_mean": 1958.6,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 1.8243953732912723,
|
|
"grad_norm": 0.45842367827210895,
|
|
"learning_rate": 3.6944639459473e-05,
|
|
"loss": 0.1248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12169969081878662,
|
|
"step": 1735,
|
|
"valid_targets_mean": 2192.6,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 1.8296529968454258,
|
|
"grad_norm": 0.293515666490087,
|
|
"learning_rate": 3.69167246870477e-05,
|
|
"loss": 0.105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07428356260061264,
|
|
"step": 1740,
|
|
"valid_targets_mean": 3516.8,
|
|
"valid_targets_min": 1836
|
|
},
|
|
{
|
|
"epoch": 1.8349106203995795,
|
|
"grad_norm": 0.3157149440919976,
|
|
"learning_rate": 3.6888693620665546e-05,
|
|
"loss": 0.1101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07667044550180435,
|
|
"step": 1745,
|
|
"valid_targets_mean": 3454.2,
|
|
"valid_targets_min": 2543
|
|
},
|
|
{
|
|
"epoch": 1.840168243953733,
|
|
"grad_norm": 0.4153518110935024,
|
|
"learning_rate": 3.686054645302598e-05,
|
|
"loss": 0.142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11578220129013062,
|
|
"step": 1750,
|
|
"valid_targets_mean": 2119.8,
|
|
"valid_targets_min": 446
|
|
},
|
|
{
|
|
"epoch": 1.8454258675078865,
|
|
"grad_norm": 0.45207660433666463,
|
|
"learning_rate": 3.6832283377626603e-05,
|
|
"loss": 0.1021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10898630321025848,
|
|
"step": 1755,
|
|
"valid_targets_mean": 2944.4,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 1.85068349106204,
|
|
"grad_norm": 0.36201037251499957,
|
|
"learning_rate": 3.680390458876182e-05,
|
|
"loss": 0.1087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12806929647922516,
|
|
"step": 1760,
|
|
"valid_targets_mean": 3749.1,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 1.8559411146161935,
|
|
"grad_norm": 0.3132004258024364,
|
|
"learning_rate": 3.67754102815215e-05,
|
|
"loss": 0.1095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09528759121894836,
|
|
"step": 1765,
|
|
"valid_targets_mean": 3300.1,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 1.861198738170347,
|
|
"grad_norm": 0.29773599774518333,
|
|
"learning_rate": 3.6746800651789636e-05,
|
|
"loss": 0.0937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08347194641828537,
|
|
"step": 1770,
|
|
"valid_targets_mean": 4140.2,
|
|
"valid_targets_min": 861
|
|
},
|
|
{
|
|
"epoch": 1.8664563617245005,
|
|
"grad_norm": 0.4243554271166663,
|
|
"learning_rate": 3.671807589624302e-05,
|
|
"loss": 0.1095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1536114364862442,
|
|
"step": 1775,
|
|
"valid_targets_mean": 3947.6,
|
|
"valid_targets_min": 2020
|
|
},
|
|
{
|
|
"epoch": 1.871713985278654,
|
|
"grad_norm": 0.34987506489471215,
|
|
"learning_rate": 3.6689236212349865e-05,
|
|
"loss": 0.11,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11188700795173645,
|
|
"step": 1780,
|
|
"valid_targets_mean": 3724.9,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 1.8769716088328074,
|
|
"grad_norm": 0.4332686685219863,
|
|
"learning_rate": 3.6660281798368485e-05,
|
|
"loss": 0.1323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1156931072473526,
|
|
"step": 1785,
|
|
"valid_targets_mean": 3279.9,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 1.882229232386961,
|
|
"grad_norm": 0.4806651190458711,
|
|
"learning_rate": 3.663121285334586e-05,
|
|
"loss": 0.1361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24918416142463684,
|
|
"step": 1790,
|
|
"valid_targets_mean": 2807.4,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 1.8874868559411146,
|
|
"grad_norm": 0.4214766257021576,
|
|
"learning_rate": 3.660202957711635e-05,
|
|
"loss": 0.0894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11336281895637512,
|
|
"step": 1795,
|
|
"valid_targets_mean": 2360.1,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 1.8927444794952681,
|
|
"grad_norm": 0.34895621187632125,
|
|
"learning_rate": 3.657273217030026e-05,
|
|
"loss": 0.1363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11209186166524887,
|
|
"step": 1800,
|
|
"valid_targets_mean": 3388.2,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 1.8980021030494216,
|
|
"grad_norm": 0.3762665012315893,
|
|
"learning_rate": 3.654332083430252e-05,
|
|
"loss": 0.1089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11618360131978989,
|
|
"step": 1805,
|
|
"valid_targets_mean": 3025.7,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 1.9032597266035753,
|
|
"grad_norm": 0.38840186150542616,
|
|
"learning_rate": 3.651379577131121e-05,
|
|
"loss": 0.115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10022513568401337,
|
|
"step": 1810,
|
|
"valid_targets_mean": 3426.3,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 1.9085173501577288,
|
|
"grad_norm": 0.4766747024321835,
|
|
"learning_rate": 3.648415718429629e-05,
|
|
"loss": 0.1294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14906629920005798,
|
|
"step": 1815,
|
|
"valid_targets_mean": 2339.8,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 1.9137749737118823,
|
|
"grad_norm": 0.4337561229887625,
|
|
"learning_rate": 3.6454405277008087e-05,
|
|
"loss": 0.1635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1729247272014618,
|
|
"step": 1820,
|
|
"valid_targets_mean": 2480.9,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 1.9190325972660358,
|
|
"grad_norm": 0.42775200454161955,
|
|
"learning_rate": 3.6424540253975985e-05,
|
|
"loss": 0.1227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10183486342430115,
|
|
"step": 1825,
|
|
"valid_targets_mean": 2543.4,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 1.9242902208201893,
|
|
"grad_norm": 0.5433460772419119,
|
|
"learning_rate": 3.6394562320506955e-05,
|
|
"loss": 0.1162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1285993456840515,
|
|
"step": 1830,
|
|
"valid_targets_mean": 1684.0,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 1.9295478443743428,
|
|
"grad_norm": 0.37589814485703194,
|
|
"learning_rate": 3.636447168268419e-05,
|
|
"loss": 0.1086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11380413174629211,
|
|
"step": 1835,
|
|
"valid_targets_mean": 2971.1,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 1.9348054679284963,
|
|
"grad_norm": 0.43694253835512376,
|
|
"learning_rate": 3.633426854736566e-05,
|
|
"loss": 0.1108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1216128021478653,
|
|
"step": 1840,
|
|
"valid_targets_mean": 2640.1,
|
|
"valid_targets_min": 832
|
|
},
|
|
{
|
|
"epoch": 1.9400630914826498,
|
|
"grad_norm": 0.5351434455802149,
|
|
"learning_rate": 3.6303953122182695e-05,
|
|
"loss": 0.0971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10800480842590332,
|
|
"step": 1845,
|
|
"valid_targets_mean": 2104.2,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 1.9453207150368033,
|
|
"grad_norm": 0.40867408871935174,
|
|
"learning_rate": 3.6273525615538564e-05,
|
|
"loss": 0.1365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09918899834156036,
|
|
"step": 1850,
|
|
"valid_targets_mean": 2595.4,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 1.9505783385909568,
|
|
"grad_norm": 0.4380264597384522,
|
|
"learning_rate": 3.6242986236607046e-05,
|
|
"loss": 0.0989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10596967488527298,
|
|
"step": 1855,
|
|
"valid_targets_mean": 2202.8,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 1.9558359621451105,
|
|
"grad_norm": 0.36890437184137304,
|
|
"learning_rate": 3.6212335195330976e-05,
|
|
"loss": 0.109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09210898727178574,
|
|
"step": 1860,
|
|
"valid_targets_mean": 2810.8,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 1.961093585699264,
|
|
"grad_norm": 0.4697428864373329,
|
|
"learning_rate": 3.618157270242082e-05,
|
|
"loss": 0.1203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13422542810440063,
|
|
"step": 1865,
|
|
"valid_targets_mean": 3550.4,
|
|
"valid_targets_min": 2017
|
|
},
|
|
{
|
|
"epoch": 1.9663512092534174,
|
|
"grad_norm": 0.259548983566761,
|
|
"learning_rate": 3.615069896935321e-05,
|
|
"loss": 0.1023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08674701303243637,
|
|
"step": 1870,
|
|
"valid_targets_mean": 4882.1,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 1.971608832807571,
|
|
"grad_norm": 0.32231575299830506,
|
|
"learning_rate": 3.6119714208369506e-05,
|
|
"loss": 0.0999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10211837291717529,
|
|
"step": 1875,
|
|
"valid_targets_mean": 3580.6,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 1.9768664563617246,
|
|
"grad_norm": 0.2994321725246202,
|
|
"learning_rate": 3.608861863247432e-05,
|
|
"loss": 0.1057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09767977893352509,
|
|
"step": 1880,
|
|
"valid_targets_mean": 3815.4,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 1.9821240799158781,
|
|
"grad_norm": 0.47456842792846704,
|
|
"learning_rate": 3.6057412455434075e-05,
|
|
"loss": 0.1915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2623363733291626,
|
|
"step": 1885,
|
|
"valid_targets_mean": 2739.1,
|
|
"valid_targets_min": 1473
|
|
},
|
|
{
|
|
"epoch": 1.9873817034700316,
|
|
"grad_norm": 0.3347442976860358,
|
|
"learning_rate": 3.6026095891775494e-05,
|
|
"loss": 0.1217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10281935334205627,
|
|
"step": 1890,
|
|
"valid_targets_mean": 3168.9,
|
|
"valid_targets_min": 527
|
|
},
|
|
{
|
|
"epoch": 1.9926393270241851,
|
|
"grad_norm": 0.3646648981550504,
|
|
"learning_rate": 3.5994669156784184e-05,
|
|
"loss": 0.1161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11803923547267914,
|
|
"step": 1895,
|
|
"valid_targets_mean": 2708.9,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 1.9978969505783386,
|
|
"grad_norm": 0.43282634780454476,
|
|
"learning_rate": 3.5963132466503107e-05,
|
|
"loss": 0.1142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1694352924823761,
|
|
"step": 1900,
|
|
"valid_targets_mean": 2495.1,
|
|
"valid_targets_min": 927
|
|
},
|
|
{
|
|
"epoch": 2.003154574132492,
|
|
"grad_norm": 0.7522957715698859,
|
|
"learning_rate": 3.593148603773111e-05,
|
|
"loss": 0.2091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25149911642074585,
|
|
"step": 1905,
|
|
"valid_targets_mean": 1611.4,
|
|
"valid_targets_min": 788
|
|
},
|
|
{
|
|
"epoch": 2.0084121976866456,
|
|
"grad_norm": 0.68414800058717,
|
|
"learning_rate": 3.5899730088021455e-05,
|
|
"loss": 0.214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19962722063064575,
|
|
"step": 1910,
|
|
"valid_targets_mean": 1591.2,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 2.013669821240799,
|
|
"grad_norm": 0.6996957125797055,
|
|
"learning_rate": 3.586786483568028e-05,
|
|
"loss": 0.204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19607800245285034,
|
|
"step": 1915,
|
|
"valid_targets_mean": 1441.8,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 2.0189274447949526,
|
|
"grad_norm": 0.7897949386423881,
|
|
"learning_rate": 3.583589049976514e-05,
|
|
"loss": 0.2023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1937992125749588,
|
|
"step": 1920,
|
|
"valid_targets_mean": 1389.0,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 2.024185068349106,
|
|
"grad_norm": 0.722154507796051,
|
|
"learning_rate": 3.580380730008348e-05,
|
|
"loss": 0.2009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18644759058952332,
|
|
"step": 1925,
|
|
"valid_targets_mean": 1616.4,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 2.0294426919032595,
|
|
"grad_norm": 0.6730997823927874,
|
|
"learning_rate": 3.577161545719113e-05,
|
|
"loss": 0.1992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19347545504570007,
|
|
"step": 1930,
|
|
"valid_targets_mean": 1518.4,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 2.034700315457413,
|
|
"grad_norm": 0.7160419813944304,
|
|
"learning_rate": 3.573931519239079e-05,
|
|
"loss": 0.2045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1721259206533432,
|
|
"step": 1935,
|
|
"valid_targets_mean": 1189.1,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 2.039957939011567,
|
|
"grad_norm": 0.8271975666607679,
|
|
"learning_rate": 3.5706906727730496e-05,
|
|
"loss": 0.1926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1924540102481842,
|
|
"step": 1940,
|
|
"valid_targets_mean": 1299.6,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 2.0452155625657205,
|
|
"grad_norm": 0.7423576453869396,
|
|
"learning_rate": 3.567439028600211e-05,
|
|
"loss": 0.2106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19493412971496582,
|
|
"step": 1945,
|
|
"valid_targets_mean": 1416.0,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 2.050473186119874,
|
|
"grad_norm": 0.6911407874363379,
|
|
"learning_rate": 3.564176609073979e-05,
|
|
"loss": 0.2001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21674056351184845,
|
|
"step": 1950,
|
|
"valid_targets_mean": 1803.2,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 2.0557308096740274,
|
|
"grad_norm": 0.6752145193438658,
|
|
"learning_rate": 3.5609034366218426e-05,
|
|
"loss": 0.1937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19275152683258057,
|
|
"step": 1955,
|
|
"valid_targets_mean": 1608.1,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 2.060988433228181,
|
|
"grad_norm": 2.530049303548015,
|
|
"learning_rate": 3.5576195337452146e-05,
|
|
"loss": 0.2066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2066556066274643,
|
|
"step": 1960,
|
|
"valid_targets_mean": 1727.7,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 2.0662460567823344,
|
|
"grad_norm": 0.6471764826910803,
|
|
"learning_rate": 3.55432492301927e-05,
|
|
"loss": 0.195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18466389179229736,
|
|
"step": 1965,
|
|
"valid_targets_mean": 1570.2,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 2.071503680336488,
|
|
"grad_norm": 0.7097102879629508,
|
|
"learning_rate": 3.551019627092799e-05,
|
|
"loss": 0.1871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19337697327136993,
|
|
"step": 1970,
|
|
"valid_targets_mean": 1424.5,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 2.0767613038906414,
|
|
"grad_norm": 0.6526169157837053,
|
|
"learning_rate": 3.547703668688044e-05,
|
|
"loss": 0.1843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17414447665214539,
|
|
"step": 1975,
|
|
"valid_targets_mean": 1382.9,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 2.082018927444795,
|
|
"grad_norm": 0.6878509073739828,
|
|
"learning_rate": 3.544377070600549e-05,
|
|
"loss": 0.194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17662809789180756,
|
|
"step": 1980,
|
|
"valid_targets_mean": 1406.4,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 2.0872765509989484,
|
|
"grad_norm": 0.7004045433136924,
|
|
"learning_rate": 3.541039855699e-05,
|
|
"loss": 0.2055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21233880519866943,
|
|
"step": 1985,
|
|
"valid_targets_mean": 1565.7,
|
|
"valid_targets_min": 631
|
|
},
|
|
{
|
|
"epoch": 2.092534174553102,
|
|
"grad_norm": 0.7746417992399471,
|
|
"learning_rate": 3.537692046925065e-05,
|
|
"loss": 0.2055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21321943402290344,
|
|
"step": 1990,
|
|
"valid_targets_mean": 1478.0,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 2.0977917981072554,
|
|
"grad_norm": 0.6885595749055048,
|
|
"learning_rate": 3.534333667293244e-05,
|
|
"loss": 0.1966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20462816953659058,
|
|
"step": 1995,
|
|
"valid_targets_mean": 1647.7,
|
|
"valid_targets_min": 818
|
|
},
|
|
{
|
|
"epoch": 2.103049421661409,
|
|
"grad_norm": 0.6525174503414687,
|
|
"learning_rate": 3.5309647398907056e-05,
|
|
"loss": 0.1966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18002676963806152,
|
|
"step": 2000,
|
|
"valid_targets_mean": 1647.3,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 2.108307045215563,
|
|
"grad_norm": 0.7162817563018055,
|
|
"learning_rate": 3.527585287877125e-05,
|
|
"loss": 0.1944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1998218595981598,
|
|
"step": 2005,
|
|
"valid_targets_mean": 1411.8,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 2.1135646687697163,
|
|
"grad_norm": 0.7478725702392524,
|
|
"learning_rate": 3.5241953344845345e-05,
|
|
"loss": 0.2029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2090776115655899,
|
|
"step": 2010,
|
|
"valid_targets_mean": 1483.4,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 2.1188222923238698,
|
|
"grad_norm": 1.0183268180552594,
|
|
"learning_rate": 3.520794903017153e-05,
|
|
"loss": 0.1947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17826226353645325,
|
|
"step": 2015,
|
|
"valid_targets_mean": 1242.0,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 2.1240799158780233,
|
|
"grad_norm": 0.7146762033496259,
|
|
"learning_rate": 3.517384016851235e-05,
|
|
"loss": 0.1879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19717749953269958,
|
|
"step": 2020,
|
|
"valid_targets_mean": 1514.0,
|
|
"valid_targets_min": 793
|
|
},
|
|
{
|
|
"epoch": 2.1293375394321767,
|
|
"grad_norm": 0.6660659625091833,
|
|
"learning_rate": 3.513962699434903e-05,
|
|
"loss": 0.1892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1777816116809845,
|
|
"step": 2025,
|
|
"valid_targets_mean": 1553.8,
|
|
"valid_targets_min": 886
|
|
},
|
|
{
|
|
"epoch": 2.1345951629863302,
|
|
"grad_norm": 0.6750947443955108,
|
|
"learning_rate": 3.5105309742879894e-05,
|
|
"loss": 0.1872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17612925171852112,
|
|
"step": 2030,
|
|
"valid_targets_mean": 1525.9,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 2.1398527865404837,
|
|
"grad_norm": 0.8047912256569687,
|
|
"learning_rate": 3.507088865001876e-05,
|
|
"loss": 0.1917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19884192943572998,
|
|
"step": 2035,
|
|
"valid_targets_mean": 1614.5,
|
|
"valid_targets_min": 908
|
|
},
|
|
{
|
|
"epoch": 2.145110410094637,
|
|
"grad_norm": 0.726333342895076,
|
|
"learning_rate": 3.5036363952393296e-05,
|
|
"loss": 0.1996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19761952757835388,
|
|
"step": 2040,
|
|
"valid_targets_mean": 1527.8,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 2.1503680336487907,
|
|
"grad_norm": 0.6923696815562899,
|
|
"learning_rate": 3.500173588734339e-05,
|
|
"loss": 0.1809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1872178465127945,
|
|
"step": 2045,
|
|
"valid_targets_mean": 1494.4,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 2.155625657202944,
|
|
"grad_norm": 0.7240265939827225,
|
|
"learning_rate": 3.4967004692919555e-05,
|
|
"loss": 0.1897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18411380052566528,
|
|
"step": 2050,
|
|
"valid_targets_mean": 1455.6,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 2.1608832807570977,
|
|
"grad_norm": 0.7671571892791755,
|
|
"learning_rate": 3.4932170607881226e-05,
|
|
"loss": 0.1923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20216378569602966,
|
|
"step": 2055,
|
|
"valid_targets_mean": 1665.1,
|
|
"valid_targets_min": 804
|
|
},
|
|
{
|
|
"epoch": 2.166140904311251,
|
|
"grad_norm": 0.7262689528795186,
|
|
"learning_rate": 3.4897233871695205e-05,
|
|
"loss": 0.1936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.209253191947937,
|
|
"step": 2060,
|
|
"valid_targets_mean": 1509.7,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 2.1713985278654047,
|
|
"grad_norm": 0.7438939712336335,
|
|
"learning_rate": 3.4862194724533934e-05,
|
|
"loss": 0.1865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19082891941070557,
|
|
"step": 2065,
|
|
"valid_targets_mean": 1408.4,
|
|
"valid_targets_min": 882
|
|
},
|
|
{
|
|
"epoch": 2.176656151419558,
|
|
"grad_norm": 0.6782173450460893,
|
|
"learning_rate": 3.4827053407273894e-05,
|
|
"loss": 0.1825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17597463726997375,
|
|
"step": 2070,
|
|
"valid_targets_mean": 1479.1,
|
|
"valid_targets_min": 660
|
|
},
|
|
{
|
|
"epoch": 2.181913774973712,
|
|
"grad_norm": 0.877909272775887,
|
|
"learning_rate": 3.4791810161493935e-05,
|
|
"loss": 0.1749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17485372722148895,
|
|
"step": 2075,
|
|
"valid_targets_mean": 1459.5,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 2.1871713985278656,
|
|
"grad_norm": 0.7142398254060238,
|
|
"learning_rate": 3.47564652294736e-05,
|
|
"loss": 0.1916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19566529989242554,
|
|
"step": 2080,
|
|
"valid_targets_mean": 1485.9,
|
|
"valid_targets_min": 676
|
|
},
|
|
{
|
|
"epoch": 2.192429022082019,
|
|
"grad_norm": 0.6865584578057562,
|
|
"learning_rate": 3.472101885419149e-05,
|
|
"loss": 0.186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16726654767990112,
|
|
"step": 2085,
|
|
"valid_targets_mean": 1500.9,
|
|
"valid_targets_min": 849
|
|
},
|
|
{
|
|
"epoch": 2.1976866456361726,
|
|
"grad_norm": 0.7185195607043672,
|
|
"learning_rate": 3.468547127932358e-05,
|
|
"loss": 0.1867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17954030632972717,
|
|
"step": 2090,
|
|
"valid_targets_mean": 1381.5,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 2.202944269190326,
|
|
"grad_norm": 0.769727438652912,
|
|
"learning_rate": 3.4649822749241525e-05,
|
|
"loss": 0.1875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18864957988262177,
|
|
"step": 2095,
|
|
"valid_targets_mean": 1446.8,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 2.2082018927444795,
|
|
"grad_norm": 0.7577107328406806,
|
|
"learning_rate": 3.4614073509011e-05,
|
|
"loss": 0.2002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2130323350429535,
|
|
"step": 2100,
|
|
"valid_targets_mean": 1633.3,
|
|
"valid_targets_min": 858
|
|
},
|
|
{
|
|
"epoch": 2.213459516298633,
|
|
"grad_norm": 0.8010918603868054,
|
|
"learning_rate": 3.4578223804390026e-05,
|
|
"loss": 0.1772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1817643791437149,
|
|
"step": 2105,
|
|
"valid_targets_mean": 1395.1,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 2.2187171398527865,
|
|
"grad_norm": 0.7435288929665057,
|
|
"learning_rate": 3.454227388182725e-05,
|
|
"loss": 0.1985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19536858797073364,
|
|
"step": 2110,
|
|
"valid_targets_mean": 1393.9,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 2.22397476340694,
|
|
"grad_norm": 0.7034138762582146,
|
|
"learning_rate": 3.450622398846026e-05,
|
|
"loss": 0.1907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17521831393241882,
|
|
"step": 2115,
|
|
"valid_targets_mean": 1465.8,
|
|
"valid_targets_min": 828
|
|
},
|
|
{
|
|
"epoch": 2.2292323869610935,
|
|
"grad_norm": 0.7624198822734523,
|
|
"learning_rate": 3.447007437211392e-05,
|
|
"loss": 0.2048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19831611216068268,
|
|
"step": 2120,
|
|
"valid_targets_mean": 1430.3,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 2.234490010515247,
|
|
"grad_norm": 0.7792809407586418,
|
|
"learning_rate": 3.443382528129862e-05,
|
|
"loss": 0.1787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1771901547908783,
|
|
"step": 2125,
|
|
"valid_targets_mean": 1465.0,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 2.2397476340694005,
|
|
"grad_norm": 0.7145972839785792,
|
|
"learning_rate": 3.4397476965208604e-05,
|
|
"loss": 0.1885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2003161609172821,
|
|
"step": 2130,
|
|
"valid_targets_mean": 1750.8,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 2.245005257623554,
|
|
"grad_norm": 0.679727513368836,
|
|
"learning_rate": 3.43610296737202e-05,
|
|
"loss": 0.1893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17647641897201538,
|
|
"step": 2135,
|
|
"valid_targets_mean": 1350.2,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 2.250262881177708,
|
|
"grad_norm": 0.7168292262756297,
|
|
"learning_rate": 3.432448365739019e-05,
|
|
"loss": 0.1816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17237195372581482,
|
|
"step": 2140,
|
|
"valid_targets_mean": 1412.2,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 2.2555205047318614,
|
|
"grad_norm": 0.8046272311937696,
|
|
"learning_rate": 3.4287839167454016e-05,
|
|
"loss": 0.1822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17528027296066284,
|
|
"step": 2145,
|
|
"valid_targets_mean": 1399.9,
|
|
"valid_targets_min": 504
|
|
},
|
|
{
|
|
"epoch": 2.260778128286015,
|
|
"grad_norm": 0.6603447025307222,
|
|
"learning_rate": 3.4251096455824076e-05,
|
|
"loss": 0.1887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.184175044298172,
|
|
"step": 2150,
|
|
"valid_targets_mean": 1690.8,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 2.2660357518401684,
|
|
"grad_norm": 0.6434158822206664,
|
|
"learning_rate": 3.421425577508799e-05,
|
|
"loss": 0.1876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1735583245754242,
|
|
"step": 2155,
|
|
"valid_targets_mean": 1568.6,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 2.271293375394322,
|
|
"grad_norm": 0.8009726259772466,
|
|
"learning_rate": 3.417731737850687e-05,
|
|
"loss": 0.1921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1895967721939087,
|
|
"step": 2160,
|
|
"valid_targets_mean": 1525.5,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 2.2765509989484753,
|
|
"grad_norm": 0.643494307871804,
|
|
"learning_rate": 3.4140281520013595e-05,
|
|
"loss": 0.1785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1769677698612213,
|
|
"step": 2165,
|
|
"valid_targets_mean": 1615.4,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 2.281808622502629,
|
|
"grad_norm": 0.726869648079002,
|
|
"learning_rate": 3.4103148454211017e-05,
|
|
"loss": 0.1896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17336110770702362,
|
|
"step": 2170,
|
|
"valid_targets_mean": 1416.5,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 2.2870662460567823,
|
|
"grad_norm": 0.83309030190731,
|
|
"learning_rate": 3.4065918436370244e-05,
|
|
"loss": 0.1889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2031644731760025,
|
|
"step": 2175,
|
|
"valid_targets_mean": 1385.6,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 2.292323869610936,
|
|
"grad_norm": 0.6900344242032486,
|
|
"learning_rate": 3.402859172242889e-05,
|
|
"loss": 0.18,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17275655269622803,
|
|
"step": 2180,
|
|
"valid_targets_mean": 1507.6,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 2.2975814931650893,
|
|
"grad_norm": 0.7526050853659193,
|
|
"learning_rate": 3.399116856898931e-05,
|
|
"loss": 0.1876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19981279969215393,
|
|
"step": 2185,
|
|
"valid_targets_mean": 1629.7,
|
|
"valid_targets_min": 789
|
|
},
|
|
{
|
|
"epoch": 2.302839116719243,
|
|
"grad_norm": 0.7564797889911168,
|
|
"learning_rate": 3.395364923331681e-05,
|
|
"loss": 0.171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17029231786727905,
|
|
"step": 2190,
|
|
"valid_targets_mean": 1373.2,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 2.3080967402733963,
|
|
"grad_norm": 0.7394175133400129,
|
|
"learning_rate": 3.391603397333793e-05,
|
|
"loss": 0.1792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19023913145065308,
|
|
"step": 2195,
|
|
"valid_targets_mean": 1431.1,
|
|
"valid_targets_min": 639
|
|
},
|
|
{
|
|
"epoch": 2.3133543638275498,
|
|
"grad_norm": 0.6295547136967085,
|
|
"learning_rate": 3.387832304763861e-05,
|
|
"loss": 0.1888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18990573287010193,
|
|
"step": 2200,
|
|
"valid_targets_mean": 1871.2,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 2.3186119873817033,
|
|
"grad_norm": 0.7038322770500872,
|
|
"learning_rate": 3.384051671546247e-05,
|
|
"loss": 0.1755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.176752507686615,
|
|
"step": 2205,
|
|
"valid_targets_mean": 1237.9,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 2.3238696109358568,
|
|
"grad_norm": 0.7286234859219742,
|
|
"learning_rate": 3.380261523670899e-05,
|
|
"loss": 0.1872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19026847183704376,
|
|
"step": 2210,
|
|
"valid_targets_mean": 1464.4,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 2.3291272344900107,
|
|
"grad_norm": 0.6864763485278645,
|
|
"learning_rate": 3.376461887193173e-05,
|
|
"loss": 0.1828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17656734585762024,
|
|
"step": 2215,
|
|
"valid_targets_mean": 1596.5,
|
|
"valid_targets_min": 818
|
|
},
|
|
{
|
|
"epoch": 2.334384858044164,
|
|
"grad_norm": 0.6830211289877853,
|
|
"learning_rate": 3.372652788233656e-05,
|
|
"loss": 0.1883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18425202369689941,
|
|
"step": 2220,
|
|
"valid_targets_mean": 1417.5,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 2.3396424815983177,
|
|
"grad_norm": 0.7745185199611311,
|
|
"learning_rate": 3.368834252977982e-05,
|
|
"loss": 0.1854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19619306921958923,
|
|
"step": 2225,
|
|
"valid_targets_mean": 1461.8,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 2.344900105152471,
|
|
"grad_norm": 0.8146170127964989,
|
|
"learning_rate": 3.3650063076766586e-05,
|
|
"loss": 0.1815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17531231045722961,
|
|
"step": 2230,
|
|
"valid_targets_mean": 1477.8,
|
|
"valid_targets_min": 789
|
|
},
|
|
{
|
|
"epoch": 2.3501577287066246,
|
|
"grad_norm": 0.6958921902069705,
|
|
"learning_rate": 3.3611689786448786e-05,
|
|
"loss": 0.1898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1900712549686432,
|
|
"step": 2235,
|
|
"valid_targets_mean": 1517.9,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 2.355415352260778,
|
|
"grad_norm": 0.7717355000293836,
|
|
"learning_rate": 3.357322292262346e-05,
|
|
"loss": 0.1861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2049972414970398,
|
|
"step": 2240,
|
|
"valid_targets_mean": 1540.2,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 2.3606729758149316,
|
|
"grad_norm": 0.720791279524025,
|
|
"learning_rate": 3.353466274973092e-05,
|
|
"loss": 0.1929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19426554441452026,
|
|
"step": 2245,
|
|
"valid_targets_mean": 1807.9,
|
|
"valid_targets_min": 1012
|
|
},
|
|
{
|
|
"epoch": 2.365930599369085,
|
|
"grad_norm": 0.7436225989082415,
|
|
"learning_rate": 3.3496009532852907e-05,
|
|
"loss": 0.1885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2064618468284607,
|
|
"step": 2250,
|
|
"valid_targets_mean": 1649.6,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 2.3711882229232386,
|
|
"grad_norm": 0.7916354128987332,
|
|
"learning_rate": 3.345726353771082e-05,
|
|
"loss": 0.1872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18808495998382568,
|
|
"step": 2255,
|
|
"valid_targets_mean": 1682.4,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 2.376445846477392,
|
|
"grad_norm": 0.7266901064565375,
|
|
"learning_rate": 3.341842503066384e-05,
|
|
"loss": 0.176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16265717148780823,
|
|
"step": 2260,
|
|
"valid_targets_mean": 1117.4,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 2.3817034700315456,
|
|
"grad_norm": 0.721759727205511,
|
|
"learning_rate": 3.3379494278707136e-05,
|
|
"loss": 0.1839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20615462958812714,
|
|
"step": 2265,
|
|
"valid_targets_mean": 1740.9,
|
|
"valid_targets_min": 656
|
|
},
|
|
{
|
|
"epoch": 2.386961093585699,
|
|
"grad_norm": 0.7544065247730406,
|
|
"learning_rate": 3.334047154947e-05,
|
|
"loss": 0.1845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21215087175369263,
|
|
"step": 2270,
|
|
"valid_targets_mean": 1843.3,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 2.392218717139853,
|
|
"grad_norm": 0.7217184866490929,
|
|
"learning_rate": 3.330135711121404e-05,
|
|
"loss": 0.18,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18952813744544983,
|
|
"step": 2275,
|
|
"valid_targets_mean": 1573.6,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 2.3974763406940065,
|
|
"grad_norm": 0.7785411563666498,
|
|
"learning_rate": 3.32621512328313e-05,
|
|
"loss": 0.1737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18069085478782654,
|
|
"step": 2280,
|
|
"valid_targets_mean": 1531.9,
|
|
"valid_targets_min": 959
|
|
},
|
|
{
|
|
"epoch": 2.40273396424816,
|
|
"grad_norm": 0.7234598318665647,
|
|
"learning_rate": 3.3222854183842434e-05,
|
|
"loss": 0.1725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15989463031291962,
|
|
"step": 2285,
|
|
"valid_targets_mean": 1287.7,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 2.4079915878023135,
|
|
"grad_norm": 0.7905465914706342,
|
|
"learning_rate": 3.318346623439486e-05,
|
|
"loss": 0.1944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20036505162715912,
|
|
"step": 2290,
|
|
"valid_targets_mean": 1601.7,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 2.413249211356467,
|
|
"grad_norm": 0.9397015694455316,
|
|
"learning_rate": 3.314398765526087e-05,
|
|
"loss": 0.1805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17739002406597137,
|
|
"step": 2295,
|
|
"valid_targets_mean": 1477.9,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 2.4185068349106205,
|
|
"grad_norm": 0.706169227209052,
|
|
"learning_rate": 3.310441871783581e-05,
|
|
"loss": 0.1714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16355757415294647,
|
|
"step": 2300,
|
|
"valid_targets_mean": 1497.5,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 2.423764458464774,
|
|
"grad_norm": 0.6726507867346725,
|
|
"learning_rate": 3.3064759694136165e-05,
|
|
"loss": 0.1755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16233614087104797,
|
|
"step": 2305,
|
|
"valid_targets_mean": 1422.9,
|
|
"valid_targets_min": 639
|
|
},
|
|
{
|
|
"epoch": 2.4290220820189274,
|
|
"grad_norm": 0.9421393157827742,
|
|
"learning_rate": 3.302501085679776e-05,
|
|
"loss": 0.1666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.169845849275589,
|
|
"step": 2310,
|
|
"valid_targets_mean": 1248.6,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 2.434279705573081,
|
|
"grad_norm": 0.6899969418752109,
|
|
"learning_rate": 3.29851724790738e-05,
|
|
"loss": 0.1806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17680177092552185,
|
|
"step": 2315,
|
|
"valid_targets_mean": 1485.9,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 2.4395373291272344,
|
|
"grad_norm": 0.7056916333594729,
|
|
"learning_rate": 3.294524483483306e-05,
|
|
"loss": 0.1764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16916847229003906,
|
|
"step": 2320,
|
|
"valid_targets_mean": 1496.9,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 2.444794952681388,
|
|
"grad_norm": 0.7207319728703836,
|
|
"learning_rate": 3.290522819855799e-05,
|
|
"loss": 0.1843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1925514191389084,
|
|
"step": 2325,
|
|
"valid_targets_mean": 1693.9,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 2.4500525762355414,
|
|
"grad_norm": 0.7096987739562477,
|
|
"learning_rate": 3.2865122845342776e-05,
|
|
"loss": 0.1782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16411489248275757,
|
|
"step": 2330,
|
|
"valid_targets_mean": 1390.0,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 2.455310199789695,
|
|
"grad_norm": 0.7811065159791979,
|
|
"learning_rate": 3.282492905089151e-05,
|
|
"loss": 0.1793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19206087291240692,
|
|
"step": 2335,
|
|
"valid_targets_mean": 1381.8,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 2.4605678233438484,
|
|
"grad_norm": 0.6634836472231062,
|
|
"learning_rate": 3.2784647091516285e-05,
|
|
"loss": 0.1827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16965967416763306,
|
|
"step": 2340,
|
|
"valid_targets_mean": 1558.3,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 2.465825446898002,
|
|
"grad_norm": 0.5874972460874052,
|
|
"learning_rate": 3.274427724413527e-05,
|
|
"loss": 0.1733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21588899195194244,
|
|
"step": 2345,
|
|
"valid_targets_mean": 2055.1,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 2.471083070452156,
|
|
"grad_norm": 0.36021750066691155,
|
|
"learning_rate": 3.270381978627081e-05,
|
|
"loss": 0.1181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1089751273393631,
|
|
"step": 2350,
|
|
"valid_targets_mean": 3535.2,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 2.4763406940063093,
|
|
"grad_norm": 0.4433635986475339,
|
|
"learning_rate": 3.266327499604755e-05,
|
|
"loss": 0.1067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1179640144109726,
|
|
"step": 2355,
|
|
"valid_targets_mean": 2602.1,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 2.481598317560463,
|
|
"grad_norm": 0.4262428373949676,
|
|
"learning_rate": 3.262264315219049e-05,
|
|
"loss": 0.1084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15705761313438416,
|
|
"step": 2360,
|
|
"valid_targets_mean": 3028.2,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 2.4868559411146163,
|
|
"grad_norm": 0.39713810856872656,
|
|
"learning_rate": 3.258192453402306e-05,
|
|
"loss": 0.1013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09887437522411346,
|
|
"step": 2365,
|
|
"valid_targets_mean": 2374.9,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 2.4921135646687698,
|
|
"grad_norm": 0.3798965505081102,
|
|
"learning_rate": 3.254111942146526e-05,
|
|
"loss": 0.1067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10583527386188507,
|
|
"step": 2370,
|
|
"valid_targets_mean": 2866.5,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 2.4973711882229233,
|
|
"grad_norm": 0.6909618223654735,
|
|
"learning_rate": 3.2500228095031677e-05,
|
|
"loss": 0.156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1656060814857483,
|
|
"step": 2375,
|
|
"valid_targets_mean": 1443.8,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 2.5026288117770767,
|
|
"grad_norm": 0.3645376950455108,
|
|
"learning_rate": 3.2459250835829553e-05,
|
|
"loss": 0.118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11906848102807999,
|
|
"step": 2380,
|
|
"valid_targets_mean": 3098.6,
|
|
"valid_targets_min": 920
|
|
},
|
|
{
|
|
"epoch": 2.5078864353312302,
|
|
"grad_norm": 0.3985346121901146,
|
|
"learning_rate": 3.241818792555692e-05,
|
|
"loss": 0.1154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11283739656209946,
|
|
"step": 2385,
|
|
"valid_targets_mean": 2836.0,
|
|
"valid_targets_min": 859
|
|
},
|
|
{
|
|
"epoch": 2.5131440588853837,
|
|
"grad_norm": 0.2671267899566765,
|
|
"learning_rate": 3.2377039646500565e-05,
|
|
"loss": 0.1431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06598129123449326,
|
|
"step": 2390,
|
|
"valid_targets_mean": 4075.8,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 2.518401682439537,
|
|
"grad_norm": 0.34863111771096733,
|
|
"learning_rate": 3.2335806281534195e-05,
|
|
"loss": 0.0944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12008039653301239,
|
|
"step": 2395,
|
|
"valid_targets_mean": 4244.4,
|
|
"valid_targets_min": 2091
|
|
},
|
|
{
|
|
"epoch": 2.5236593059936907,
|
|
"grad_norm": 0.34054345877293296,
|
|
"learning_rate": 3.229448811411639e-05,
|
|
"loss": 0.0988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10303230583667755,
|
|
"step": 2400,
|
|
"valid_targets_mean": 4220.3,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 2.5289169295478446,
|
|
"grad_norm": 0.33204504204343716,
|
|
"learning_rate": 3.225308542828874e-05,
|
|
"loss": 0.1014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0962679535150528,
|
|
"step": 2405,
|
|
"valid_targets_mean": 3845.9,
|
|
"valid_targets_min": 518
|
|
},
|
|
{
|
|
"epoch": 2.534174553101998,
|
|
"grad_norm": 0.3896243175443543,
|
|
"learning_rate": 3.221159850867385e-05,
|
|
"loss": 0.1491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1067102700471878,
|
|
"step": 2410,
|
|
"valid_targets_mean": 2542.9,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 2.5394321766561516,
|
|
"grad_norm": 0.3002808227690645,
|
|
"learning_rate": 3.217002764047338e-05,
|
|
"loss": 0.0921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08377303183078766,
|
|
"step": 2415,
|
|
"valid_targets_mean": 3707.9,
|
|
"valid_targets_min": 2724
|
|
},
|
|
{
|
|
"epoch": 2.544689800210305,
|
|
"grad_norm": 0.5028060391316009,
|
|
"learning_rate": 3.212837310946609e-05,
|
|
"loss": 0.1129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12688994407653809,
|
|
"step": 2420,
|
|
"valid_targets_mean": 1648.7,
|
|
"valid_targets_min": 518
|
|
},
|
|
{
|
|
"epoch": 2.5499474237644586,
|
|
"grad_norm": 0.28550508665248653,
|
|
"learning_rate": 3.20866352020059e-05,
|
|
"loss": 0.0851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07078361511230469,
|
|
"step": 2425,
|
|
"valid_targets_mean": 3318.9,
|
|
"valid_targets_min": 844
|
|
},
|
|
{
|
|
"epoch": 2.555205047318612,
|
|
"grad_norm": 0.45015381265804133,
|
|
"learning_rate": 3.204481420501989e-05,
|
|
"loss": 0.1112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10451000183820724,
|
|
"step": 2430,
|
|
"valid_targets_mean": 2370.6,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 2.5604626708727656,
|
|
"grad_norm": 0.5125497887078124,
|
|
"learning_rate": 3.200291040600632e-05,
|
|
"loss": 0.1183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1507492959499359,
|
|
"step": 2435,
|
|
"valid_targets_mean": 2091.5,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 2.565720294426919,
|
|
"grad_norm": 0.42792156593961106,
|
|
"learning_rate": 3.196092409303272e-05,
|
|
"loss": 0.1145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11543115228414536,
|
|
"step": 2440,
|
|
"valid_targets_mean": 2961.9,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 2.5709779179810726,
|
|
"grad_norm": 0.38404609759881503,
|
|
"learning_rate": 3.1918855554733804e-05,
|
|
"loss": 0.1165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1083003580570221,
|
|
"step": 2445,
|
|
"valid_targets_mean": 2553.1,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 2.576235541535226,
|
|
"grad_norm": 0.4435954547963332,
|
|
"learning_rate": 3.187670508030959e-05,
|
|
"loss": 0.1602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1930696666240692,
|
|
"step": 2450,
|
|
"valid_targets_mean": 3499.9,
|
|
"valid_targets_min": 2609
|
|
},
|
|
{
|
|
"epoch": 2.5814931650893795,
|
|
"grad_norm": 0.37553455052194584,
|
|
"learning_rate": 3.183447295952334e-05,
|
|
"loss": 0.0959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07744848728179932,
|
|
"step": 2455,
|
|
"valid_targets_mean": 4125.6,
|
|
"valid_targets_min": 3732
|
|
},
|
|
{
|
|
"epoch": 2.586750788643533,
|
|
"grad_norm": 0.6469228961036678,
|
|
"learning_rate": 3.1792159482699606e-05,
|
|
"loss": 0.1151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15886491537094116,
|
|
"step": 2460,
|
|
"valid_targets_mean": 1383.1,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 2.5920084121976865,
|
|
"grad_norm": 0.4191589635266597,
|
|
"learning_rate": 3.174976494072222e-05,
|
|
"loss": 0.1332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10635621100664139,
|
|
"step": 2465,
|
|
"valid_targets_mean": 2926.9,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 2.59726603575184,
|
|
"grad_norm": 0.3402858715999574,
|
|
"learning_rate": 3.170728962503227e-05,
|
|
"loss": 0.1213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10382866114377975,
|
|
"step": 2470,
|
|
"valid_targets_mean": 3999.0,
|
|
"valid_targets_min": 3178
|
|
},
|
|
{
|
|
"epoch": 2.6025236593059935,
|
|
"grad_norm": 0.6105994900667755,
|
|
"learning_rate": 3.1664733827626174e-05,
|
|
"loss": 0.1468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26841890811920166,
|
|
"step": 2475,
|
|
"valid_targets_mean": 1954.8,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 2.607781282860147,
|
|
"grad_norm": 0.4198025547006107,
|
|
"learning_rate": 3.1622097841053574e-05,
|
|
"loss": 0.2823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1229616105556488,
|
|
"step": 2480,
|
|
"valid_targets_mean": 3351.6,
|
|
"valid_targets_min": 1248
|
|
},
|
|
{
|
|
"epoch": 2.6130389064143005,
|
|
"grad_norm": 0.4213232846564987,
|
|
"learning_rate": 3.15793819584154e-05,
|
|
"loss": 0.1153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11470212042331696,
|
|
"step": 2485,
|
|
"valid_targets_mean": 2846.6,
|
|
"valid_targets_min": 833
|
|
},
|
|
{
|
|
"epoch": 2.6182965299684544,
|
|
"grad_norm": 0.4077725239227606,
|
|
"learning_rate": 3.1536586473361815e-05,
|
|
"loss": 0.1154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11247079819440842,
|
|
"step": 2490,
|
|
"valid_targets_mean": 2976.6,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 2.623554153522608,
|
|
"grad_norm": 0.42956799077435265,
|
|
"learning_rate": 3.149371168009022e-05,
|
|
"loss": 0.2104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12841041386127472,
|
|
"step": 2495,
|
|
"valid_targets_mean": 3806.4,
|
|
"valid_targets_min": 2258
|
|
},
|
|
{
|
|
"epoch": 2.6288117770767614,
|
|
"grad_norm": 0.2734498060324378,
|
|
"learning_rate": 3.145075787334319e-05,
|
|
"loss": 0.0852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08369526267051697,
|
|
"step": 2500,
|
|
"valid_targets_mean": 4550.2,
|
|
"valid_targets_min": 1466
|
|
},
|
|
{
|
|
"epoch": 2.634069400630915,
|
|
"grad_norm": 0.33376159399304134,
|
|
"learning_rate": 3.140772534840652e-05,
|
|
"loss": 0.1143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08720729500055313,
|
|
"step": 2505,
|
|
"valid_targets_mean": 3354.7,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 2.6393270241850684,
|
|
"grad_norm": 0.37737954881427266,
|
|
"learning_rate": 3.1364614401107126e-05,
|
|
"loss": 0.1145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11012201756238937,
|
|
"step": 2510,
|
|
"valid_targets_mean": 3625.6,
|
|
"valid_targets_min": 846
|
|
},
|
|
{
|
|
"epoch": 2.644584647739222,
|
|
"grad_norm": 0.37165700619929226,
|
|
"learning_rate": 3.1321425327811044e-05,
|
|
"loss": 0.0944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09358272701501846,
|
|
"step": 2515,
|
|
"valid_targets_mean": 3525.6,
|
|
"valid_targets_min": 1352
|
|
},
|
|
{
|
|
"epoch": 2.6498422712933754,
|
|
"grad_norm": 0.3563107369337592,
|
|
"learning_rate": 3.127815842542138e-05,
|
|
"loss": 0.0951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08345861732959747,
|
|
"step": 2520,
|
|
"valid_targets_mean": 3017.1,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 2.655099894847529,
|
|
"grad_norm": 0.37286748074995496,
|
|
"learning_rate": 3.1234813991376296e-05,
|
|
"loss": 0.0996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08975954353809357,
|
|
"step": 2525,
|
|
"valid_targets_mean": 3565.4,
|
|
"valid_targets_min": 2264
|
|
},
|
|
{
|
|
"epoch": 2.6603575184016823,
|
|
"grad_norm": 0.3581750525136319,
|
|
"learning_rate": 3.119139232364693e-05,
|
|
"loss": 0.0923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08885161578655243,
|
|
"step": 2530,
|
|
"valid_targets_mean": 3534.2,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 2.665615141955836,
|
|
"grad_norm": 0.6377410460278035,
|
|
"learning_rate": 3.1147893720735356e-05,
|
|
"loss": 0.1472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3027060031890869,
|
|
"step": 2535,
|
|
"valid_targets_mean": 2330.0,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 2.6708727655099898,
|
|
"grad_norm": 0.3094795945561733,
|
|
"learning_rate": 3.110431848167255e-05,
|
|
"loss": 0.0895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08099585771560669,
|
|
"step": 2540,
|
|
"valid_targets_mean": 3734.1,
|
|
"valid_targets_min": 2719
|
|
},
|
|
{
|
|
"epoch": 2.6761303890641432,
|
|
"grad_norm": 0.5046490055828962,
|
|
"learning_rate": 3.106066690601633e-05,
|
|
"loss": 0.0907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11604218930006027,
|
|
"step": 2545,
|
|
"valid_targets_mean": 1698.0,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 2.6813880126182967,
|
|
"grad_norm": 0.2908057278242517,
|
|
"learning_rate": 3.101693929384927e-05,
|
|
"loss": 0.0861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07138262689113617,
|
|
"step": 2550,
|
|
"valid_targets_mean": 3763.1,
|
|
"valid_targets_min": 2353
|
|
},
|
|
{
|
|
"epoch": 2.6866456361724502,
|
|
"grad_norm": 0.3616701521439811,
|
|
"learning_rate": 3.097313594577667e-05,
|
|
"loss": 0.0914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09038901329040527,
|
|
"step": 2555,
|
|
"valid_targets_mean": 2663.5,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 2.6919032597266037,
|
|
"grad_norm": 0.3352070782547861,
|
|
"learning_rate": 3.092925716292447e-05,
|
|
"loss": 0.1479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09159399569034576,
|
|
"step": 2560,
|
|
"valid_targets_mean": 3527.6,
|
|
"valid_targets_min": 908
|
|
},
|
|
{
|
|
"epoch": 2.697160883280757,
|
|
"grad_norm": 0.42113658320185515,
|
|
"learning_rate": 3.088530324693719e-05,
|
|
"loss": 0.1203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15114620327949524,
|
|
"step": 2565,
|
|
"valid_targets_mean": 3102.0,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 2.7024185068349107,
|
|
"grad_norm": 0.40640939297887746,
|
|
"learning_rate": 3.0841274499975855e-05,
|
|
"loss": 0.1055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08857488632202148,
|
|
"step": 2570,
|
|
"valid_targets_mean": 2138.5,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 2.707676130389064,
|
|
"grad_norm": 0.7582232852961461,
|
|
"learning_rate": 3.079717122471591e-05,
|
|
"loss": 0.1288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18078215420246124,
|
|
"step": 2575,
|
|
"valid_targets_mean": 1224.7,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 2.7129337539432177,
|
|
"grad_norm": 0.501960236405102,
|
|
"learning_rate": 3.075299372434515e-05,
|
|
"loss": 0.147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13699260354042053,
|
|
"step": 2580,
|
|
"valid_targets_mean": 1942.3,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 2.718191377497371,
|
|
"grad_norm": 0.47803679623958584,
|
|
"learning_rate": 3.0708742302561606e-05,
|
|
"loss": 0.1035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12304352223873138,
|
|
"step": 2585,
|
|
"valid_targets_mean": 2461.9,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 2.7234490010515247,
|
|
"grad_norm": 0.33458570722565917,
|
|
"learning_rate": 3.066441726357153e-05,
|
|
"loss": 0.1293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08373156189918518,
|
|
"step": 2590,
|
|
"valid_targets_mean": 3349.7,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 2.728706624605678,
|
|
"grad_norm": 0.5810954421117924,
|
|
"learning_rate": 3.062001891208721e-05,
|
|
"loss": 0.1137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14271876215934753,
|
|
"step": 2595,
|
|
"valid_targets_mean": 1778.2,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 2.7339642481598316,
|
|
"grad_norm": 0.33866352401190314,
|
|
"learning_rate": 3.0575547553324944e-05,
|
|
"loss": 0.0947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09259197115898132,
|
|
"step": 2600,
|
|
"valid_targets_mean": 3331.4,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 2.739221871713985,
|
|
"grad_norm": 0.2455960232652355,
|
|
"learning_rate": 3.053100349300291e-05,
|
|
"loss": 0.1022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0542142353951931,
|
|
"step": 2605,
|
|
"valid_targets_mean": 4553.9,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 2.7444794952681386,
|
|
"grad_norm": 0.36336803839048115,
|
|
"learning_rate": 3.0486387037339074e-05,
|
|
"loss": 0.0927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08497624844312668,
|
|
"step": 2610,
|
|
"valid_targets_mean": 2921.8,
|
|
"valid_targets_min": 459
|
|
},
|
|
{
|
|
"epoch": 2.749737118822292,
|
|
"grad_norm": 0.35976051819140875,
|
|
"learning_rate": 3.0441698493049078e-05,
|
|
"loss": 0.093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11405770480632782,
|
|
"step": 2615,
|
|
"valid_targets_mean": 4009.8,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 2.7549947423764456,
|
|
"grad_norm": 0.44373125167841454,
|
|
"learning_rate": 3.0396938167344153e-05,
|
|
"loss": 0.1042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13850055634975433,
|
|
"step": 2620,
|
|
"valid_targets_mean": 2390.6,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 2.7602523659305995,
|
|
"grad_norm": 0.6014635705304406,
|
|
"learning_rate": 3.0352106367928974e-05,
|
|
"loss": 0.1114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10286036133766174,
|
|
"step": 2625,
|
|
"valid_targets_mean": 3466.8,
|
|
"valid_targets_min": 1165
|
|
},
|
|
{
|
|
"epoch": 2.765509989484753,
|
|
"grad_norm": 0.38367063359718373,
|
|
"learning_rate": 3.030720340299957e-05,
|
|
"loss": 0.1102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1048555076122284,
|
|
"step": 2630,
|
|
"valid_targets_mean": 3781.1,
|
|
"valid_targets_min": 2901
|
|
},
|
|
{
|
|
"epoch": 2.7707676130389065,
|
|
"grad_norm": 0.33202146237403757,
|
|
"learning_rate": 3.0262229581241197e-05,
|
|
"loss": 0.0954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07668071240186691,
|
|
"step": 2635,
|
|
"valid_targets_mean": 3460.7,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 2.77602523659306,
|
|
"grad_norm": 0.36664381710018973,
|
|
"learning_rate": 3.0217185211826218e-05,
|
|
"loss": 0.0983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08506757020950317,
|
|
"step": 2640,
|
|
"valid_targets_mean": 2821.6,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 2.7812828601472135,
|
|
"grad_norm": 0.3365892420413774,
|
|
"learning_rate": 3.0172070604411957e-05,
|
|
"loss": 0.1108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07851487398147583,
|
|
"step": 2645,
|
|
"valid_targets_mean": 3193.8,
|
|
"valid_targets_min": 724
|
|
},
|
|
{
|
|
"epoch": 2.786540483701367,
|
|
"grad_norm": 0.3107521954990926,
|
|
"learning_rate": 3.0126886069138623e-05,
|
|
"loss": 0.0822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07940559089183807,
|
|
"step": 2650,
|
|
"valid_targets_mean": 3795.8,
|
|
"valid_targets_min": 2769
|
|
},
|
|
{
|
|
"epoch": 2.7917981072555205,
|
|
"grad_norm": 0.3865245672571263,
|
|
"learning_rate": 3.0081631916627114e-05,
|
|
"loss": 0.1071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09602253884077072,
|
|
"step": 2655,
|
|
"valid_targets_mean": 2893.9,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 2.797055730809674,
|
|
"grad_norm": 0.33169201070750626,
|
|
"learning_rate": 3.003630845797693e-05,
|
|
"loss": 0.1015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07864493131637573,
|
|
"step": 2660,
|
|
"valid_targets_mean": 3111.2,
|
|
"valid_targets_min": 387
|
|
},
|
|
{
|
|
"epoch": 2.8023133543638274,
|
|
"grad_norm": 0.3946357391314771,
|
|
"learning_rate": 2.9990916004763996e-05,
|
|
"loss": 0.1201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10464547574520111,
|
|
"step": 2665,
|
|
"valid_targets_mean": 3326.2,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 2.807570977917981,
|
|
"grad_norm": 0.43507979308801414,
|
|
"learning_rate": 2.9945454869038562e-05,
|
|
"loss": 0.1032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10327298939228058,
|
|
"step": 2670,
|
|
"valid_targets_mean": 2347.0,
|
|
"valid_targets_min": 457
|
|
},
|
|
{
|
|
"epoch": 2.812828601472135,
|
|
"grad_norm": 0.5930253754750565,
|
|
"learning_rate": 2.9899925363323022e-05,
|
|
"loss": 0.1106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11379785090684891,
|
|
"step": 2675,
|
|
"valid_targets_mean": 1337.1,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 2.8180862250262884,
|
|
"grad_norm": 0.5468279937396003,
|
|
"learning_rate": 2.9854327800609775e-05,
|
|
"loss": 0.1079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.158033549785614,
|
|
"step": 2680,
|
|
"valid_targets_mean": 1989.9,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 2.823343848580442,
|
|
"grad_norm": 0.43620465228141875,
|
|
"learning_rate": 2.98086624943591e-05,
|
|
"loss": 0.1118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09712845087051392,
|
|
"step": 2685,
|
|
"valid_targets_mean": 2470.5,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 2.8286014721345953,
|
|
"grad_norm": 0.450086400246681,
|
|
"learning_rate": 2.976292975849696e-05,
|
|
"loss": 0.1015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10751894861459732,
|
|
"step": 2690,
|
|
"valid_targets_mean": 2661.2,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 2.833859095688749,
|
|
"grad_norm": 0.7370201864241585,
|
|
"learning_rate": 2.9717129907412857e-05,
|
|
"loss": 0.0965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12240259349346161,
|
|
"step": 2695,
|
|
"valid_targets_mean": 1585.1,
|
|
"valid_targets_min": 526
|
|
},
|
|
{
|
|
"epoch": 2.8391167192429023,
|
|
"grad_norm": 0.6016270598846267,
|
|
"learning_rate": 2.9671263255957697e-05,
|
|
"loss": 0.1192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1466367393732071,
|
|
"step": 2700,
|
|
"valid_targets_mean": 1623.9,
|
|
"valid_targets_min": 637
|
|
},
|
|
{
|
|
"epoch": 2.844374342797056,
|
|
"grad_norm": 0.27378648653324206,
|
|
"learning_rate": 2.9625330119441584e-05,
|
|
"loss": 0.0914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06150565296411514,
|
|
"step": 2705,
|
|
"valid_targets_mean": 3762.9,
|
|
"valid_targets_min": 1440
|
|
},
|
|
{
|
|
"epoch": 2.8496319663512093,
|
|
"grad_norm": 0.38144492600379587,
|
|
"learning_rate": 2.957933081363169e-05,
|
|
"loss": 0.0922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09528522193431854,
|
|
"step": 2710,
|
|
"valid_targets_mean": 4096.2,
|
|
"valid_targets_min": 1773
|
|
},
|
|
{
|
|
"epoch": 2.854889589905363,
|
|
"grad_norm": 0.3884763720171555,
|
|
"learning_rate": 2.953326565475006e-05,
|
|
"loss": 0.1032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11531278491020203,
|
|
"step": 2715,
|
|
"valid_targets_mean": 2958.6,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 2.8601472134595163,
|
|
"grad_norm": 0.43329164171687823,
|
|
"learning_rate": 2.9487134959471445e-05,
|
|
"loss": 0.0851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08293789625167847,
|
|
"step": 2720,
|
|
"valid_targets_mean": 3515.3,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 2.8654048370136698,
|
|
"grad_norm": 0.3359357271875174,
|
|
"learning_rate": 2.944093904492113e-05,
|
|
"loss": 0.0845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09996461868286133,
|
|
"step": 2725,
|
|
"valid_targets_mean": 4868.8,
|
|
"valid_targets_min": 4077
|
|
},
|
|
{
|
|
"epoch": 2.8706624605678233,
|
|
"grad_norm": 0.3620467852222587,
|
|
"learning_rate": 2.9394678228672737e-05,
|
|
"loss": 0.1054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09600923955440521,
|
|
"step": 2730,
|
|
"valid_targets_mean": 3206.5,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 2.8759200841219767,
|
|
"grad_norm": 0.4294598207270518,
|
|
"learning_rate": 2.9348352828746076e-05,
|
|
"loss": 0.1182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12123970687389374,
|
|
"step": 2735,
|
|
"valid_targets_mean": 3558.0,
|
|
"valid_targets_min": 1090
|
|
},
|
|
{
|
|
"epoch": 2.8811777076761302,
|
|
"grad_norm": 0.3751032778537608,
|
|
"learning_rate": 2.9301963163604916e-05,
|
|
"loss": 0.0971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10285535454750061,
|
|
"step": 2740,
|
|
"valid_targets_mean": 3212.2,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 2.8864353312302837,
|
|
"grad_norm": 0.39934160243166605,
|
|
"learning_rate": 2.925550955215483e-05,
|
|
"loss": 0.104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0928100049495697,
|
|
"step": 2745,
|
|
"valid_targets_mean": 2616.9,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 2.891692954784437,
|
|
"grad_norm": 0.34278090460592187,
|
|
"learning_rate": 2.9208992313740993e-05,
|
|
"loss": 0.1209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.079774871468544,
|
|
"step": 2750,
|
|
"valid_targets_mean": 2966.9,
|
|
"valid_targets_min": 485
|
|
},
|
|
{
|
|
"epoch": 2.8969505783385907,
|
|
"grad_norm": 0.39632647949724764,
|
|
"learning_rate": 2.916241176814596e-05,
|
|
"loss": 0.0953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.105677530169487,
|
|
"step": 2755,
|
|
"valid_targets_mean": 3414.2,
|
|
"valid_targets_min": 1048
|
|
},
|
|
{
|
|
"epoch": 2.9022082018927446,
|
|
"grad_norm": 0.363079019721272,
|
|
"learning_rate": 2.9115768235587526e-05,
|
|
"loss": 0.1037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08583900332450867,
|
|
"step": 2760,
|
|
"valid_targets_mean": 2722.4,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 2.907465825446898,
|
|
"grad_norm": 0.4827175083183273,
|
|
"learning_rate": 2.9069062036716454e-05,
|
|
"loss": 0.1053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15601631999015808,
|
|
"step": 2765,
|
|
"valid_targets_mean": 2128.9,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 2.9127234490010516,
|
|
"grad_norm": 0.5147558124862559,
|
|
"learning_rate": 2.9022293492614334e-05,
|
|
"loss": 0.1407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14720407128334045,
|
|
"step": 2770,
|
|
"valid_targets_mean": 1685.1,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 2.917981072555205,
|
|
"grad_norm": 0.4024304901026711,
|
|
"learning_rate": 2.8975462924791334e-05,
|
|
"loss": 0.1217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09662102162837982,
|
|
"step": 2775,
|
|
"valid_targets_mean": 3289.2,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 2.9232386961093586,
|
|
"grad_norm": 0.4295888137611356,
|
|
"learning_rate": 2.892857065518401e-05,
|
|
"loss": 0.0979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09456585347652435,
|
|
"step": 2780,
|
|
"valid_targets_mean": 2418.6,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 2.928496319663512,
|
|
"grad_norm": 0.4074202459874554,
|
|
"learning_rate": 2.8881617006153072e-05,
|
|
"loss": 0.0985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09729564189910889,
|
|
"step": 2785,
|
|
"valid_targets_mean": 3266.9,
|
|
"valid_targets_min": 1591
|
|
},
|
|
{
|
|
"epoch": 2.9337539432176656,
|
|
"grad_norm": 0.35867945229490455,
|
|
"learning_rate": 2.8834602300481207e-05,
|
|
"loss": 0.096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08896251022815704,
|
|
"step": 2790,
|
|
"valid_targets_mean": 2896.2,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 2.939011566771819,
|
|
"grad_norm": 0.35111362106844274,
|
|
"learning_rate": 2.878752686137082e-05,
|
|
"loss": 0.0872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07829457521438599,
|
|
"step": 2795,
|
|
"valid_targets_mean": 2914.8,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 2.9442691903259726,
|
|
"grad_norm": 0.3919187245924711,
|
|
"learning_rate": 2.874039101244183e-05,
|
|
"loss": 0.1209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08010765165090561,
|
|
"step": 2800,
|
|
"valid_targets_mean": 2452.7,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 2.949526813880126,
|
|
"grad_norm": 0.3383637041508128,
|
|
"learning_rate": 2.869319507772944e-05,
|
|
"loss": 0.0869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08066456019878387,
|
|
"step": 2805,
|
|
"valid_targets_mean": 3346.1,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 2.9547844374342795,
|
|
"grad_norm": 0.48600589489422974,
|
|
"learning_rate": 2.864593938168192e-05,
|
|
"loss": 0.0979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11215262115001678,
|
|
"step": 2810,
|
|
"valid_targets_mean": 2584.3,
|
|
"valid_targets_min": 786
|
|
},
|
|
{
|
|
"epoch": 2.9600420609884335,
|
|
"grad_norm": 0.44437151597046454,
|
|
"learning_rate": 2.8598624249158367e-05,
|
|
"loss": 0.0978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11946041882038116,
|
|
"step": 2815,
|
|
"valid_targets_mean": 3273.0,
|
|
"valid_targets_min": 1825
|
|
},
|
|
{
|
|
"epoch": 2.965299684542587,
|
|
"grad_norm": 0.35284810030283154,
|
|
"learning_rate": 2.855125000542647e-05,
|
|
"loss": 0.0992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10998371988534927,
|
|
"step": 2820,
|
|
"valid_targets_mean": 4220.7,
|
|
"valid_targets_min": 2274
|
|
},
|
|
{
|
|
"epoch": 2.9705573080967405,
|
|
"grad_norm": 0.35176137941790625,
|
|
"learning_rate": 2.8503816976160278e-05,
|
|
"loss": 0.0855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10145671665668488,
|
|
"step": 2825,
|
|
"valid_targets_mean": 3440.8,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 2.975814931650894,
|
|
"grad_norm": 0.2819778869137246,
|
|
"learning_rate": 2.8456325487437966e-05,
|
|
"loss": 0.0936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08021849393844604,
|
|
"step": 2830,
|
|
"valid_targets_mean": 4000.1,
|
|
"valid_targets_min": 1017
|
|
},
|
|
{
|
|
"epoch": 2.9810725552050474,
|
|
"grad_norm": 0.5532322086602542,
|
|
"learning_rate": 2.8408775865739578e-05,
|
|
"loss": 0.1435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21923969686031342,
|
|
"step": 2835,
|
|
"valid_targets_mean": 2186.7,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 2.986330178759201,
|
|
"grad_norm": 0.32650521446979597,
|
|
"learning_rate": 2.8361168437944817e-05,
|
|
"loss": 0.1388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07725784182548523,
|
|
"step": 2840,
|
|
"valid_targets_mean": 3648.8,
|
|
"valid_targets_min": 1097
|
|
},
|
|
{
|
|
"epoch": 2.9915878023133544,
|
|
"grad_norm": 0.34666087266548073,
|
|
"learning_rate": 2.8313503531330738e-05,
|
|
"loss": 0.0995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08928908407688141,
|
|
"step": 2845,
|
|
"valid_targets_mean": 3303.7,
|
|
"valid_targets_min": 840
|
|
},
|
|
{
|
|
"epoch": 2.996845425867508,
|
|
"grad_norm": 0.3357698377603881,
|
|
"learning_rate": 2.826578147356956e-05,
|
|
"loss": 0.0915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08458413183689117,
|
|
"step": 2850,
|
|
"valid_targets_mean": 3666.4,
|
|
"valid_targets_min": 2555
|
|
},
|
|
{
|
|
"epoch": 3.0021030494216614,
|
|
"grad_norm": 1.0153830951441243,
|
|
"learning_rate": 2.8218002592726384e-05,
|
|
"loss": 0.1717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2009751796722412,
|
|
"step": 2855,
|
|
"valid_targets_mean": 1553.1,
|
|
"valid_targets_min": 777
|
|
},
|
|
{
|
|
"epoch": 3.007360672975815,
|
|
"grad_norm": 0.9578716659295562,
|
|
"learning_rate": 2.8170167217256934e-05,
|
|
"loss": 0.1962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.188889741897583,
|
|
"step": 2860,
|
|
"valid_targets_mean": 1439.1,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 3.0126182965299684,
|
|
"grad_norm": 0.7480346808263534,
|
|
"learning_rate": 2.8122275676005304e-05,
|
|
"loss": 0.1772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.178565114736557,
|
|
"step": 2865,
|
|
"valid_targets_mean": 1826.0,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 3.017875920084122,
|
|
"grad_norm": 0.7757563210471313,
|
|
"learning_rate": 2.807432829820171e-05,
|
|
"loss": 0.1752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1563483327627182,
|
|
"step": 2870,
|
|
"valid_targets_mean": 1316.2,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 3.0231335436382754,
|
|
"grad_norm": 0.760475622783321,
|
|
"learning_rate": 2.8026325413460215e-05,
|
|
"loss": 0.1745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1838594377040863,
|
|
"step": 2875,
|
|
"valid_targets_mean": 1914.6,
|
|
"valid_targets_min": 819
|
|
},
|
|
{
|
|
"epoch": 3.028391167192429,
|
|
"grad_norm": 1.186599430117999,
|
|
"learning_rate": 2.7978267351776448e-05,
|
|
"loss": 0.1726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17113059759140015,
|
|
"step": 2880,
|
|
"valid_targets_mean": 1383.9,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 3.0336487907465823,
|
|
"grad_norm": 0.8368328709994571,
|
|
"learning_rate": 2.7930154443525377e-05,
|
|
"loss": 0.1807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17734494805335999,
|
|
"step": 2885,
|
|
"valid_targets_mean": 1688.6,
|
|
"valid_targets_min": 930
|
|
},
|
|
{
|
|
"epoch": 3.0389064143007363,
|
|
"grad_norm": 0.7638523242688803,
|
|
"learning_rate": 2.7881987019458992e-05,
|
|
"loss": 0.1618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1756438910961151,
|
|
"step": 2890,
|
|
"valid_targets_mean": 1434.3,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 3.0441640378548898,
|
|
"grad_norm": 0.7603017766385608,
|
|
"learning_rate": 2.7833765410704062e-05,
|
|
"loss": 0.1839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15442828834056854,
|
|
"step": 2895,
|
|
"valid_targets_mean": 1524.4,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 3.0494216614090432,
|
|
"grad_norm": 0.737220054601989,
|
|
"learning_rate": 2.778548994875984e-05,
|
|
"loss": 0.1688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18120095133781433,
|
|
"step": 2900,
|
|
"valid_targets_mean": 1687.2,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 3.0546792849631967,
|
|
"grad_norm": 0.7219646311378729,
|
|
"learning_rate": 2.7737160965495794e-05,
|
|
"loss": 0.1715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17094483971595764,
|
|
"step": 2905,
|
|
"valid_targets_mean": 1439.9,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 3.0599369085173502,
|
|
"grad_norm": 0.947320515199349,
|
|
"learning_rate": 2.768877879314935e-05,
|
|
"loss": 0.1747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16819295287132263,
|
|
"step": 2910,
|
|
"valid_targets_mean": 1466.9,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 3.0651945320715037,
|
|
"grad_norm": 0.8503023131710876,
|
|
"learning_rate": 2.7640343764323535e-05,
|
|
"loss": 0.1753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18158873915672302,
|
|
"step": 2915,
|
|
"valid_targets_mean": 1352.6,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 3.070452155625657,
|
|
"grad_norm": 0.8407989228763362,
|
|
"learning_rate": 2.7591856211984783e-05,
|
|
"loss": 0.1568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15310627222061157,
|
|
"step": 2920,
|
|
"valid_targets_mean": 1488.1,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 3.0757097791798107,
|
|
"grad_norm": 0.7397249986657985,
|
|
"learning_rate": 2.7543316469460565e-05,
|
|
"loss": 0.1607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15087135136127472,
|
|
"step": 2925,
|
|
"valid_targets_mean": 1451.6,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 3.080967402733964,
|
|
"grad_norm": 0.9921587508535049,
|
|
"learning_rate": 2.7494724870437147e-05,
|
|
"loss": 0.1675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2060733586549759,
|
|
"step": 2930,
|
|
"valid_targets_mean": 1735.0,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 3.0862250262881177,
|
|
"grad_norm": 1.1741681951047909,
|
|
"learning_rate": 2.7446081748957306e-05,
|
|
"loss": 0.1702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1643257737159729,
|
|
"step": 2935,
|
|
"valid_targets_mean": 1400.9,
|
|
"valid_targets_min": 892
|
|
},
|
|
{
|
|
"epoch": 3.091482649842271,
|
|
"grad_norm": 0.7875876053749578,
|
|
"learning_rate": 2.7397387439417963e-05,
|
|
"loss": 0.1759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15686167776584625,
|
|
"step": 2940,
|
|
"valid_targets_mean": 1561.1,
|
|
"valid_targets_min": 656
|
|
},
|
|
{
|
|
"epoch": 3.0967402733964247,
|
|
"grad_norm": 0.7115401492107658,
|
|
"learning_rate": 2.7348642276567973e-05,
|
|
"loss": 0.1696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1451965719461441,
|
|
"step": 2945,
|
|
"valid_targets_mean": 1369.1,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 3.101997896950578,
|
|
"grad_norm": 0.9011516758528701,
|
|
"learning_rate": 2.729984659550576e-05,
|
|
"loss": 0.1727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16926482319831848,
|
|
"step": 2950,
|
|
"valid_targets_mean": 1467.0,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 3.107255520504732,
|
|
"grad_norm": 0.7346400824252598,
|
|
"learning_rate": 2.7251000731677035e-05,
|
|
"loss": 0.1645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15575602650642395,
|
|
"step": 2955,
|
|
"valid_targets_mean": 1453.6,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 3.1125131440588856,
|
|
"grad_norm": 0.7689981957849497,
|
|
"learning_rate": 2.72021050208725e-05,
|
|
"loss": 0.1731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1617649793624878,
|
|
"step": 2960,
|
|
"valid_targets_mean": 1422.1,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 3.117770767613039,
|
|
"grad_norm": 0.7275665796012852,
|
|
"learning_rate": 2.715315979922552e-05,
|
|
"loss": 0.1712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1705387532711029,
|
|
"step": 2965,
|
|
"valid_targets_mean": 1521.4,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 3.1230283911671926,
|
|
"grad_norm": 0.7604520991119563,
|
|
"learning_rate": 2.7104165403209843e-05,
|
|
"loss": 0.1568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16756518185138702,
|
|
"step": 2970,
|
|
"valid_targets_mean": 1364.1,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 3.128286014721346,
|
|
"grad_norm": 0.7525764441573639,
|
|
"learning_rate": 2.7055122169637224e-05,
|
|
"loss": 0.1641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16290804743766785,
|
|
"step": 2975,
|
|
"valid_targets_mean": 1556.6,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 3.1335436382754995,
|
|
"grad_norm": 0.7153059892862654,
|
|
"learning_rate": 2.7006030435655205e-05,
|
|
"loss": 0.1616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17549902200698853,
|
|
"step": 2980,
|
|
"valid_targets_mean": 1657.5,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 3.138801261829653,
|
|
"grad_norm": 0.6877280286226078,
|
|
"learning_rate": 2.6956890538744703e-05,
|
|
"loss": 0.1622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1564432978630066,
|
|
"step": 2985,
|
|
"valid_targets_mean": 1545.4,
|
|
"valid_targets_min": 779
|
|
},
|
|
{
|
|
"epoch": 3.1440588853838065,
|
|
"grad_norm": 0.7131649105297678,
|
|
"learning_rate": 2.6907702816717742e-05,
|
|
"loss": 0.1738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17285484075546265,
|
|
"step": 2990,
|
|
"valid_targets_mean": 1601.5,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 3.14931650893796,
|
|
"grad_norm": 0.7209399409701057,
|
|
"learning_rate": 2.685846760771513e-05,
|
|
"loss": 0.1583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.154525026679039,
|
|
"step": 2995,
|
|
"valid_targets_mean": 1467.6,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 3.1545741324921135,
|
|
"grad_norm": 0.7377486784686006,
|
|
"learning_rate": 2.6809185250204113e-05,
|
|
"loss": 0.1642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16196948289871216,
|
|
"step": 3000,
|
|
"valid_targets_mean": 1607.2,
|
|
"valid_targets_min": 847
|
|
},
|
|
{
|
|
"epoch": 3.159831756046267,
|
|
"grad_norm": 0.7947954324853301,
|
|
"learning_rate": 2.6759856082976066e-05,
|
|
"loss": 0.1619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16255517303943634,
|
|
"step": 3005,
|
|
"valid_targets_mean": 1486.9,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 3.1650893796004205,
|
|
"grad_norm": 0.7241271651539062,
|
|
"learning_rate": 2.6710480445144145e-05,
|
|
"loss": 0.1655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13190844655036926,
|
|
"step": 3010,
|
|
"valid_targets_mean": 1312.7,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 3.170347003154574,
|
|
"grad_norm": 0.6916520204368889,
|
|
"learning_rate": 2.666105867614099e-05,
|
|
"loss": 0.1631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14786486327648163,
|
|
"step": 3015,
|
|
"valid_targets_mean": 1413.6,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 3.1756046267087275,
|
|
"grad_norm": 0.7225488622283363,
|
|
"learning_rate": 2.6611591115716345e-05,
|
|
"loss": 0.1594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.151035338640213,
|
|
"step": 3020,
|
|
"valid_targets_mean": 1449.5,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 3.1808622502628814,
|
|
"grad_norm": 0.7266035531391442,
|
|
"learning_rate": 2.6562078103934755e-05,
|
|
"loss": 0.1499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15991780161857605,
|
|
"step": 3025,
|
|
"valid_targets_mean": 1542.5,
|
|
"valid_targets_min": 837
|
|
},
|
|
{
|
|
"epoch": 3.186119873817035,
|
|
"grad_norm": 0.6771043280351067,
|
|
"learning_rate": 2.6512519981173238e-05,
|
|
"loss": 0.162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14401499927043915,
|
|
"step": 3030,
|
|
"valid_targets_mean": 1336.9,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 3.1913774973711884,
|
|
"grad_norm": 0.692571795606894,
|
|
"learning_rate": 2.64629170881189e-05,
|
|
"loss": 0.1631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17246052622795105,
|
|
"step": 3035,
|
|
"valid_targets_mean": 1638.3,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 3.196635120925342,
|
|
"grad_norm": 0.7418225870797496,
|
|
"learning_rate": 2.641326976576664e-05,
|
|
"loss": 0.1584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1674758940935135,
|
|
"step": 3040,
|
|
"valid_targets_mean": 1561.9,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 3.2018927444794953,
|
|
"grad_norm": 0.7497987069090254,
|
|
"learning_rate": 2.6363578355416772e-05,
|
|
"loss": 0.158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14803823828697205,
|
|
"step": 3045,
|
|
"valid_targets_mean": 1301.5,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 3.207150368033649,
|
|
"grad_norm": 0.7653406210240195,
|
|
"learning_rate": 2.6313843198672712e-05,
|
|
"loss": 0.167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17800885438919067,
|
|
"step": 3050,
|
|
"valid_targets_mean": 1505.0,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 3.2124079915878023,
|
|
"grad_norm": 0.7238788011079397,
|
|
"learning_rate": 2.6264064637438585e-05,
|
|
"loss": 0.1591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15153130888938904,
|
|
"step": 3055,
|
|
"valid_targets_mean": 1330.0,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 3.217665615141956,
|
|
"grad_norm": 0.7122139126458839,
|
|
"learning_rate": 2.6214243013916915e-05,
|
|
"loss": 0.1689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1740637719631195,
|
|
"step": 3060,
|
|
"valid_targets_mean": 1778.6,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 3.2229232386961093,
|
|
"grad_norm": 0.7193052428946505,
|
|
"learning_rate": 2.616437867060627e-05,
|
|
"loss": 0.1678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1674605756998062,
|
|
"step": 3065,
|
|
"valid_targets_mean": 1651.5,
|
|
"valid_targets_min": 846
|
|
},
|
|
{
|
|
"epoch": 3.228180862250263,
|
|
"grad_norm": 0.7181685142725793,
|
|
"learning_rate": 2.6114471950298853e-05,
|
|
"loss": 0.1714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16833847761154175,
|
|
"step": 3070,
|
|
"valid_targets_mean": 1563.4,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 3.2334384858044163,
|
|
"grad_norm": 0.7143088741566662,
|
|
"learning_rate": 2.6064523196078248e-05,
|
|
"loss": 0.1563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14991122484207153,
|
|
"step": 3075,
|
|
"valid_targets_mean": 1565.3,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 3.2386961093585698,
|
|
"grad_norm": 0.7341874380160334,
|
|
"learning_rate": 2.6014532751316937e-05,
|
|
"loss": 0.1572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16907265782356262,
|
|
"step": 3080,
|
|
"valid_targets_mean": 1692.3,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 3.2439537329127233,
|
|
"grad_norm": 0.6934991412753697,
|
|
"learning_rate": 2.5964500959674057e-05,
|
|
"loss": 0.1675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16206017136573792,
|
|
"step": 3085,
|
|
"valid_targets_mean": 1596.2,
|
|
"valid_targets_min": 787
|
|
},
|
|
{
|
|
"epoch": 3.249211356466877,
|
|
"grad_norm": 0.6877269014343796,
|
|
"learning_rate": 2.5914428165092956e-05,
|
|
"loss": 0.1557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14042839407920837,
|
|
"step": 3090,
|
|
"valid_targets_mean": 1312.9,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 3.2544689800210307,
|
|
"grad_norm": 0.7094728806143602,
|
|
"learning_rate": 2.5864314711798856e-05,
|
|
"loss": 0.1558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1553335189819336,
|
|
"step": 3095,
|
|
"valid_targets_mean": 1427.8,
|
|
"valid_targets_min": 725
|
|
},
|
|
{
|
|
"epoch": 3.259726603575184,
|
|
"grad_norm": 0.7617044512032434,
|
|
"learning_rate": 2.5814160944296495e-05,
|
|
"loss": 0.1606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18396365642547607,
|
|
"step": 3100,
|
|
"valid_targets_mean": 1703.9,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 3.2649842271293377,
|
|
"grad_norm": 0.7317380322454258,
|
|
"learning_rate": 2.5763967207367752e-05,
|
|
"loss": 0.1623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1718815267086029,
|
|
"step": 3105,
|
|
"valid_targets_mean": 1470.5,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 3.270241850683491,
|
|
"grad_norm": 0.7513859068792503,
|
|
"learning_rate": 2.5713733846069272e-05,
|
|
"loss": 0.1639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1729550063610077,
|
|
"step": 3110,
|
|
"valid_targets_mean": 1629.8,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 3.2754994742376446,
|
|
"grad_norm": 0.6848120186596997,
|
|
"learning_rate": 2.56634612057301e-05,
|
|
"loss": 0.1549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15244747698307037,
|
|
"step": 3115,
|
|
"valid_targets_mean": 1504.1,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 3.280757097791798,
|
|
"grad_norm": 0.673535190257926,
|
|
"learning_rate": 2.561314963194929e-05,
|
|
"loss": 0.1641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18910765647888184,
|
|
"step": 3120,
|
|
"valid_targets_mean": 1898.6,
|
|
"valid_targets_min": 1078
|
|
},
|
|
{
|
|
"epoch": 3.2860147213459516,
|
|
"grad_norm": 0.7007516882187462,
|
|
"learning_rate": 2.556279947059358e-05,
|
|
"loss": 0.1565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1472591906785965,
|
|
"step": 3125,
|
|
"valid_targets_mean": 1387.5,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 3.291272344900105,
|
|
"grad_norm": 0.7129920724705783,
|
|
"learning_rate": 2.551241106779494e-05,
|
|
"loss": 0.161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1591213494539261,
|
|
"step": 3130,
|
|
"valid_targets_mean": 1548.0,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 3.2965299684542586,
|
|
"grad_norm": 0.6984102460849012,
|
|
"learning_rate": 2.5461984769948244e-05,
|
|
"loss": 0.157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1496220827102661,
|
|
"step": 3135,
|
|
"valid_targets_mean": 1337.4,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 3.301787592008412,
|
|
"grad_norm": 0.7558744584631479,
|
|
"learning_rate": 2.5411520923708874e-05,
|
|
"loss": 0.1534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.144131600856781,
|
|
"step": 3140,
|
|
"valid_targets_mean": 1283.6,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 3.3070452155625656,
|
|
"grad_norm": 0.727010367044851,
|
|
"learning_rate": 2.536101987599036e-05,
|
|
"loss": 0.1502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15996873378753662,
|
|
"step": 3145,
|
|
"valid_targets_mean": 1546.1,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 3.312302839116719,
|
|
"grad_norm": 0.7935522676962602,
|
|
"learning_rate": 2.5310481973961935e-05,
|
|
"loss": 0.1635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1686829924583435,
|
|
"step": 3150,
|
|
"valid_targets_mean": 1664.6,
|
|
"valid_targets_min": 1181
|
|
},
|
|
{
|
|
"epoch": 3.3175604626708726,
|
|
"grad_norm": 0.6987166115939653,
|
|
"learning_rate": 2.5259907565046217e-05,
|
|
"loss": 0.1543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14374563097953796,
|
|
"step": 3155,
|
|
"valid_targets_mean": 1281.3,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 3.322818086225026,
|
|
"grad_norm": 0.7704153064356908,
|
|
"learning_rate": 2.5209296996916774e-05,
|
|
"loss": 0.1579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1557459533214569,
|
|
"step": 3160,
|
|
"valid_targets_mean": 1460.1,
|
|
"valid_targets_min": 954
|
|
},
|
|
{
|
|
"epoch": 3.32807570977918,
|
|
"grad_norm": 0.7304887901019812,
|
|
"learning_rate": 2.5158650617495753e-05,
|
|
"loss": 0.1596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1635679304599762,
|
|
"step": 3165,
|
|
"valid_targets_mean": 1661.1,
|
|
"valid_targets_min": 1048
|
|
},
|
|
{
|
|
"epoch": 3.3333333333333335,
|
|
"grad_norm": 0.7871658306590985,
|
|
"learning_rate": 2.5107968774951504e-05,
|
|
"loss": 0.161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15143483877182007,
|
|
"step": 3170,
|
|
"valid_targets_mean": 1388.9,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 3.338590956887487,
|
|
"grad_norm": 0.685491759966997,
|
|
"learning_rate": 2.5057251817696138e-05,
|
|
"loss": 0.1579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16114577651023865,
|
|
"step": 3175,
|
|
"valid_targets_mean": 1682.4,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 3.3438485804416405,
|
|
"grad_norm": 0.7689982433044418,
|
|
"learning_rate": 2.5006500094383176e-05,
|
|
"loss": 0.1597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17659153044223785,
|
|
"step": 3180,
|
|
"valid_targets_mean": 1581.1,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 3.349106203995794,
|
|
"grad_norm": 0.8406749636556262,
|
|
"learning_rate": 2.4955713953905155e-05,
|
|
"loss": 0.1601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16790682077407837,
|
|
"step": 3185,
|
|
"valid_targets_mean": 1454.1,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 3.3543638275499474,
|
|
"grad_norm": 0.7777439527883708,
|
|
"learning_rate": 2.490489374539118e-05,
|
|
"loss": 0.1555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1627415120601654,
|
|
"step": 3190,
|
|
"valid_targets_mean": 1489.4,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 3.359621451104101,
|
|
"grad_norm": 0.797641636045962,
|
|
"learning_rate": 2.4854039818204577e-05,
|
|
"loss": 0.1681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1709059476852417,
|
|
"step": 3195,
|
|
"valid_targets_mean": 1609.5,
|
|
"valid_targets_min": 888
|
|
},
|
|
{
|
|
"epoch": 3.3648790746582544,
|
|
"grad_norm": 0.7371462162762332,
|
|
"learning_rate": 2.480315252194047e-05,
|
|
"loss": 0.1617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15439459681510925,
|
|
"step": 3200,
|
|
"valid_targets_mean": 1547.6,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 3.370136698212408,
|
|
"grad_norm": 1.093522669128121,
|
|
"learning_rate": 2.4752232206423387e-05,
|
|
"loss": 0.1632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16903162002563477,
|
|
"step": 3205,
|
|
"valid_targets_mean": 1502.6,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 3.3753943217665614,
|
|
"grad_norm": 0.707815228654526,
|
|
"learning_rate": 2.4701279221704812e-05,
|
|
"loss": 0.1577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14688000082969666,
|
|
"step": 3210,
|
|
"valid_targets_mean": 1349.7,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 3.380651945320715,
|
|
"grad_norm": 0.7063788662374152,
|
|
"learning_rate": 2.4650293918060845e-05,
|
|
"loss": 0.149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14401158690452576,
|
|
"step": 3215,
|
|
"valid_targets_mean": 1485.7,
|
|
"valid_targets_min": 869
|
|
},
|
|
{
|
|
"epoch": 3.3859095688748684,
|
|
"grad_norm": 0.7506740453173287,
|
|
"learning_rate": 2.4599276645989763e-05,
|
|
"loss": 0.1568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1596394181251526,
|
|
"step": 3220,
|
|
"valid_targets_mean": 1525.7,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 3.3911671924290223,
|
|
"grad_norm": 0.7638974947948802,
|
|
"learning_rate": 2.4548227756209593e-05,
|
|
"loss": 0.1622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15278524160385132,
|
|
"step": 3225,
|
|
"valid_targets_mean": 1422.0,
|
|
"valid_targets_min": 745
|
|
},
|
|
{
|
|
"epoch": 3.396424815983176,
|
|
"grad_norm": 0.7141801225138629,
|
|
"learning_rate": 2.4497147599655726e-05,
|
|
"loss": 0.153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14236235618591309,
|
|
"step": 3230,
|
|
"valid_targets_mean": 1337.4,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 3.4016824395373293,
|
|
"grad_norm": 0.6768440619173867,
|
|
"learning_rate": 2.44460365274785e-05,
|
|
"loss": 0.1557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16601522266864777,
|
|
"step": 3235,
|
|
"valid_targets_mean": 1771.9,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 3.406940063091483,
|
|
"grad_norm": 0.7518520439776654,
|
|
"learning_rate": 2.4394894891040774e-05,
|
|
"loss": 0.1633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18775448203086853,
|
|
"step": 3240,
|
|
"valid_targets_mean": 1592.7,
|
|
"valid_targets_min": 851
|
|
},
|
|
{
|
|
"epoch": 3.4121976866456363,
|
|
"grad_norm": 0.7107350846356748,
|
|
"learning_rate": 2.434372304191553e-05,
|
|
"loss": 0.1605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1428951621055603,
|
|
"step": 3245,
|
|
"valid_targets_mean": 1457.7,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 3.4174553101997898,
|
|
"grad_norm": 0.6855270749314717,
|
|
"learning_rate": 2.4292521331883432e-05,
|
|
"loss": 0.1541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15996533632278442,
|
|
"step": 3250,
|
|
"valid_targets_mean": 1498.1,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 3.4227129337539433,
|
|
"grad_norm": 0.5324207323189583,
|
|
"learning_rate": 2.4241290112930448e-05,
|
|
"loss": 0.153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11845308542251587,
|
|
"step": 3255,
|
|
"valid_targets_mean": 2187.2,
|
|
"valid_targets_min": 917
|
|
},
|
|
{
|
|
"epoch": 3.4279705573080967,
|
|
"grad_norm": 0.6765960167126789,
|
|
"learning_rate": 2.4190029737245368e-05,
|
|
"loss": 0.1425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13978411257266998,
|
|
"step": 3260,
|
|
"valid_targets_mean": 1544.4,
|
|
"valid_targets_min": 930
|
|
},
|
|
{
|
|
"epoch": 3.4332281808622502,
|
|
"grad_norm": 0.716060654288124,
|
|
"learning_rate": 2.4138740557217462e-05,
|
|
"loss": 0.1561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1514171063899994,
|
|
"step": 3265,
|
|
"valid_targets_mean": 1543.6,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 3.4384858044164037,
|
|
"grad_norm": 0.7255969168782287,
|
|
"learning_rate": 2.4087422925433988e-05,
|
|
"loss": 0.1528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15819844603538513,
|
|
"step": 3270,
|
|
"valid_targets_mean": 1659.8,
|
|
"valid_targets_min": 870
|
|
},
|
|
{
|
|
"epoch": 3.443743427970557,
|
|
"grad_norm": 0.7386851568481033,
|
|
"learning_rate": 2.4036077194677803e-05,
|
|
"loss": 0.1557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15675535798072815,
|
|
"step": 3275,
|
|
"valid_targets_mean": 1667.2,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 3.4490010515247107,
|
|
"grad_norm": 0.7215547346775798,
|
|
"learning_rate": 2.3984703717924932e-05,
|
|
"loss": 0.1599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18710145354270935,
|
|
"step": 3280,
|
|
"valid_targets_mean": 1765.9,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 3.454258675078864,
|
|
"grad_norm": 0.7313678336589342,
|
|
"learning_rate": 2.3933302848342127e-05,
|
|
"loss": 0.1494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.148451030254364,
|
|
"step": 3285,
|
|
"valid_targets_mean": 1467.1,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 3.4595162986330177,
|
|
"grad_norm": 0.7838903247589883,
|
|
"learning_rate": 2.388187493928447e-05,
|
|
"loss": 0.1607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15493765473365784,
|
|
"step": 3290,
|
|
"valid_targets_mean": 1358.4,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 3.464773922187171,
|
|
"grad_norm": 0.5443099682007434,
|
|
"learning_rate": 2.3830420344292922e-05,
|
|
"loss": 0.1424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10775116086006165,
|
|
"step": 3295,
|
|
"valid_targets_mean": 2683.5,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 3.470031545741325,
|
|
"grad_norm": 0.5030562872301405,
|
|
"learning_rate": 2.377893941709189e-05,
|
|
"loss": 0.1226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11221227049827576,
|
|
"step": 3300,
|
|
"valid_targets_mean": 2004.5,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 3.4752891692954786,
|
|
"grad_norm": 0.3703418740280842,
|
|
"learning_rate": 2.3727432511586802e-05,
|
|
"loss": 0.0918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09657759964466095,
|
|
"step": 3305,
|
|
"valid_targets_mean": 3137.6,
|
|
"valid_targets_min": 1064
|
|
},
|
|
{
|
|
"epoch": 3.480546792849632,
|
|
"grad_norm": 0.36882944206470225,
|
|
"learning_rate": 2.3675899981861675e-05,
|
|
"loss": 0.0882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09505809843540192,
|
|
"step": 3310,
|
|
"valid_targets_mean": 3310.0,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 3.4858044164037856,
|
|
"grad_norm": 0.2980443098259448,
|
|
"learning_rate": 2.362434218217668e-05,
|
|
"loss": 0.1009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06509885936975479,
|
|
"step": 3315,
|
|
"valid_targets_mean": 3544.6,
|
|
"valid_targets_min": 1223
|
|
},
|
|
{
|
|
"epoch": 3.491062039957939,
|
|
"grad_norm": 0.3775797942330371,
|
|
"learning_rate": 2.3572759466965706e-05,
|
|
"loss": 0.0924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08893433213233948,
|
|
"step": 3320,
|
|
"valid_targets_mean": 3144.9,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 3.4963196635120926,
|
|
"grad_norm": 0.6051734616310629,
|
|
"learning_rate": 2.3521152190833934e-05,
|
|
"loss": 0.13,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1466870903968811,
|
|
"step": 3325,
|
|
"valid_targets_mean": 1682.2,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 3.501577287066246,
|
|
"grad_norm": 0.35000643454214686,
|
|
"learning_rate": 2.346952070855537e-05,
|
|
"loss": 0.1134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08239797502756119,
|
|
"step": 3330,
|
|
"valid_targets_mean": 3284.0,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 3.5068349106203995,
|
|
"grad_norm": 0.41031362369237406,
|
|
"learning_rate": 2.3417865375070433e-05,
|
|
"loss": 0.1034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11980154365301132,
|
|
"step": 3335,
|
|
"valid_targets_mean": 3136.8,
|
|
"valid_targets_min": 846
|
|
},
|
|
{
|
|
"epoch": 3.512092534174553,
|
|
"grad_norm": 0.4072089673499259,
|
|
"learning_rate": 2.336618654548352e-05,
|
|
"loss": 0.1374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14663591980934143,
|
|
"step": 3340,
|
|
"valid_targets_mean": 3446.4,
|
|
"valid_targets_min": 1369
|
|
},
|
|
{
|
|
"epoch": 3.5173501577287065,
|
|
"grad_norm": 0.3727442216958196,
|
|
"learning_rate": 2.331448457506053e-05,
|
|
"loss": 0.0734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10031040012836456,
|
|
"step": 3345,
|
|
"valid_targets_mean": 3461.6,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 3.52260778128286,
|
|
"grad_norm": 0.3994968154863205,
|
|
"learning_rate": 2.326275981922645e-05,
|
|
"loss": 0.091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09783968329429626,
|
|
"step": 3350,
|
|
"valid_targets_mean": 2281.7,
|
|
"valid_targets_min": 447
|
|
},
|
|
{
|
|
"epoch": 3.527865404837014,
|
|
"grad_norm": 0.3550302352287683,
|
|
"learning_rate": 2.3211012633562923e-05,
|
|
"loss": 0.0907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09796998649835587,
|
|
"step": 3355,
|
|
"valid_targets_mean": 3140.8,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 3.5331230283911674,
|
|
"grad_norm": 0.5326623542188318,
|
|
"learning_rate": 2.3159243373805764e-05,
|
|
"loss": 0.1328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23072634637355804,
|
|
"step": 3360,
|
|
"valid_targets_mean": 1930.1,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 3.538380651945321,
|
|
"grad_norm": 0.3688458186750891,
|
|
"learning_rate": 2.3107452395842542e-05,
|
|
"loss": 0.0851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0918334573507309,
|
|
"step": 3365,
|
|
"valid_targets_mean": 3476.9,
|
|
"valid_targets_min": 1743
|
|
},
|
|
{
|
|
"epoch": 3.5436382754994744,
|
|
"grad_norm": 0.37694886254982934,
|
|
"learning_rate": 2.3055640055710132e-05,
|
|
"loss": 0.0913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09475556015968323,
|
|
"step": 3370,
|
|
"valid_targets_mean": 2934.9,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 3.548895899053628,
|
|
"grad_norm": 0.361710277299611,
|
|
"learning_rate": 2.3003806709592268e-05,
|
|
"loss": 0.0841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08646224439144135,
|
|
"step": 3375,
|
|
"valid_targets_mean": 2977.2,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 3.5541535226077814,
|
|
"grad_norm": 0.3923783065630929,
|
|
"learning_rate": 2.295195271381707e-05,
|
|
"loss": 0.0928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10263008624315262,
|
|
"step": 3380,
|
|
"valid_targets_mean": 3493.9,
|
|
"valid_targets_min": 2189
|
|
},
|
|
{
|
|
"epoch": 3.559411146161935,
|
|
"grad_norm": 0.32157805666456296,
|
|
"learning_rate": 2.290007842485463e-05,
|
|
"loss": 0.0957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07095207273960114,
|
|
"step": 3385,
|
|
"valid_targets_mean": 3361.6,
|
|
"valid_targets_min": 812
|
|
},
|
|
{
|
|
"epoch": 3.5646687697160884,
|
|
"grad_norm": 0.45144904186150264,
|
|
"learning_rate": 2.2848184199314546e-05,
|
|
"loss": 0.1074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11917275935411453,
|
|
"step": 3390,
|
|
"valid_targets_mean": 2514.4,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 3.569926393270242,
|
|
"grad_norm": 0.443691654398637,
|
|
"learning_rate": 2.2796270393943472e-05,
|
|
"loss": 0.1029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1120077446103096,
|
|
"step": 3395,
|
|
"valid_targets_mean": 2462.6,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 3.5751840168243953,
|
|
"grad_norm": 0.444025520690789,
|
|
"learning_rate": 2.274433736562264e-05,
|
|
"loss": 0.1275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19354555010795593,
|
|
"step": 3400,
|
|
"valid_targets_mean": 2976.1,
|
|
"valid_targets_min": 1297
|
|
},
|
|
{
|
|
"epoch": 3.580441640378549,
|
|
"grad_norm": 0.3573000146572299,
|
|
"learning_rate": 2.2692385471365465e-05,
|
|
"loss": 0.105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08170770108699799,
|
|
"step": 3405,
|
|
"valid_targets_mean": 2692.6,
|
|
"valid_targets_min": 786
|
|
},
|
|
{
|
|
"epoch": 3.5856992639327023,
|
|
"grad_norm": 0.42377566609320155,
|
|
"learning_rate": 2.264041506831503e-05,
|
|
"loss": 0.0876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09044261276721954,
|
|
"step": 3410,
|
|
"valid_targets_mean": 2406.3,
|
|
"valid_targets_min": 550
|
|
},
|
|
{
|
|
"epoch": 3.590956887486856,
|
|
"grad_norm": 0.403249296947159,
|
|
"learning_rate": 2.258842651374166e-05,
|
|
"loss": 0.1273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09162634611129761,
|
|
"step": 3415,
|
|
"valid_targets_mean": 3270.2,
|
|
"valid_targets_min": 1304
|
|
},
|
|
{
|
|
"epoch": 3.5962145110410093,
|
|
"grad_norm": 0.36497118438614784,
|
|
"learning_rate": 2.2536420165040478e-05,
|
|
"loss": 0.1072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07757177948951721,
|
|
"step": 3420,
|
|
"valid_targets_mean": 2861.4,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 3.601472134595163,
|
|
"grad_norm": 0.4814404833783927,
|
|
"learning_rate": 2.248439637972892e-05,
|
|
"loss": 0.1004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10776933282613754,
|
|
"step": 3425,
|
|
"valid_targets_mean": 2105.1,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 3.6067297581493163,
|
|
"grad_norm": 0.4649186798035194,
|
|
"learning_rate": 2.2432355515444284e-05,
|
|
"loss": 0.2917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23355937004089355,
|
|
"step": 3430,
|
|
"valid_targets_mean": 2730.5,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 3.61198738170347,
|
|
"grad_norm": 0.49596750031321396,
|
|
"learning_rate": 2.2380297929941296e-05,
|
|
"loss": 0.1039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12732480466365814,
|
|
"step": 3435,
|
|
"valid_targets_mean": 1982.8,
|
|
"valid_targets_min": 329
|
|
},
|
|
{
|
|
"epoch": 3.6172450052576237,
|
|
"grad_norm": 0.6331886046407993,
|
|
"learning_rate": 2.2328223981089613e-05,
|
|
"loss": 0.1036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11068670451641083,
|
|
"step": 3440,
|
|
"valid_targets_mean": 1372.6,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 3.622502628811777,
|
|
"grad_norm": 0.3937143683860141,
|
|
"learning_rate": 2.2276134026871393e-05,
|
|
"loss": 0.1906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09766163676977158,
|
|
"step": 3445,
|
|
"valid_targets_mean": 3164.8,
|
|
"valid_targets_min": 666
|
|
},
|
|
{
|
|
"epoch": 3.6277602523659307,
|
|
"grad_norm": 0.24350799716030153,
|
|
"learning_rate": 2.222402842537882e-05,
|
|
"loss": 0.0842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05802469328045845,
|
|
"step": 3450,
|
|
"valid_targets_mean": 5015.1,
|
|
"valid_targets_min": 3529
|
|
},
|
|
{
|
|
"epoch": 3.633017875920084,
|
|
"grad_norm": 0.37203436731137035,
|
|
"learning_rate": 2.2171907534811652e-05,
|
|
"loss": 0.1021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10450957715511322,
|
|
"step": 3455,
|
|
"valid_targets_mean": 4336.9,
|
|
"valid_targets_min": 1746
|
|
},
|
|
{
|
|
"epoch": 3.6382754994742377,
|
|
"grad_norm": 0.41643184533502453,
|
|
"learning_rate": 2.2119771713474732e-05,
|
|
"loss": 0.0976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11065898835659027,
|
|
"step": 3460,
|
|
"valid_targets_mean": 3651.8,
|
|
"valid_targets_min": 1226
|
|
},
|
|
{
|
|
"epoch": 3.643533123028391,
|
|
"grad_norm": 0.3934006119910143,
|
|
"learning_rate": 2.2067621319775564e-05,
|
|
"loss": 0.086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09729157388210297,
|
|
"step": 3465,
|
|
"valid_targets_mean": 3846.0,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 3.6487907465825447,
|
|
"grad_norm": 0.4246895638591527,
|
|
"learning_rate": 2.201545671222183e-05,
|
|
"loss": 0.0868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0853077694773674,
|
|
"step": 3470,
|
|
"valid_targets_mean": 2358.4,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 3.654048370136698,
|
|
"grad_norm": 0.4459133041758443,
|
|
"learning_rate": 2.1963278249418894e-05,
|
|
"loss": 0.0865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10066591203212738,
|
|
"step": 3475,
|
|
"valid_targets_mean": 2915.3,
|
|
"valid_targets_min": 833
|
|
},
|
|
{
|
|
"epoch": 3.6593059936908516,
|
|
"grad_norm": 0.3330196394799087,
|
|
"learning_rate": 2.191108629006742e-05,
|
|
"loss": 0.0832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06883084774017334,
|
|
"step": 3480,
|
|
"valid_targets_mean": 3654.9,
|
|
"valid_targets_min": 1421
|
|
},
|
|
{
|
|
"epoch": 3.664563617245005,
|
|
"grad_norm": 0.4830073737471554,
|
|
"learning_rate": 2.1858881192960814e-05,
|
|
"loss": 0.0913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11881092190742493,
|
|
"step": 3485,
|
|
"valid_targets_mean": 2283.9,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 3.669821240799159,
|
|
"grad_norm": 0.530118378630708,
|
|
"learning_rate": 2.180666331698281e-05,
|
|
"loss": 0.1208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09584526717662811,
|
|
"step": 3490,
|
|
"valid_targets_mean": 1388.7,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 3.6750788643533125,
|
|
"grad_norm": 0.43984703491201566,
|
|
"learning_rate": 2.1754433021104985e-05,
|
|
"loss": 0.0729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08105123788118362,
|
|
"step": 3495,
|
|
"valid_targets_mean": 1633.8,
|
|
"valid_targets_min": 585
|
|
},
|
|
{
|
|
"epoch": 3.680336487907466,
|
|
"grad_norm": 0.4233341877084485,
|
|
"learning_rate": 2.170219066438431e-05,
|
|
"loss": 0.0832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09127180278301239,
|
|
"step": 3500,
|
|
"valid_targets_mean": 2779.3,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 3.6855941114616195,
|
|
"grad_norm": 0.387700922362627,
|
|
"learning_rate": 2.164993660596065e-05,
|
|
"loss": 0.0772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08709383010864258,
|
|
"step": 3505,
|
|
"valid_targets_mean": 2962.7,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 3.690851735015773,
|
|
"grad_norm": 0.40290885939492926,
|
|
"learning_rate": 2.1597671205054326e-05,
|
|
"loss": 0.1306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0947260856628418,
|
|
"step": 3510,
|
|
"valid_targets_mean": 3386.9,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 3.6961093585699265,
|
|
"grad_norm": 0.48380195922315133,
|
|
"learning_rate": 2.1545394820963637e-05,
|
|
"loss": 0.097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13147683441638947,
|
|
"step": 3515,
|
|
"valid_targets_mean": 2628.9,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 3.70136698212408,
|
|
"grad_norm": 0.36573554154558774,
|
|
"learning_rate": 2.149310781306237e-05,
|
|
"loss": 0.104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07513891160488129,
|
|
"step": 3520,
|
|
"valid_targets_mean": 2978.2,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 3.7066246056782335,
|
|
"grad_norm": 0.5112486521023695,
|
|
"learning_rate": 2.1440810540797354e-05,
|
|
"loss": 0.0952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10247130692005157,
|
|
"step": 3525,
|
|
"valid_targets_mean": 1913.0,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 3.711882229232387,
|
|
"grad_norm": 0.5697643091664327,
|
|
"learning_rate": 2.1388503363685985e-05,
|
|
"loss": 0.1391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19898897409439087,
|
|
"step": 3530,
|
|
"valid_targets_mean": 2542.8,
|
|
"valid_targets_min": 853
|
|
},
|
|
{
|
|
"epoch": 3.7171398527865405,
|
|
"grad_norm": 0.435081498437529,
|
|
"learning_rate": 2.133618664131374e-05,
|
|
"loss": 0.0949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09856528043746948,
|
|
"step": 3535,
|
|
"valid_targets_mean": 3325.2,
|
|
"valid_targets_min": 2201
|
|
},
|
|
{
|
|
"epoch": 3.722397476340694,
|
|
"grad_norm": 0.5389864275451385,
|
|
"learning_rate": 2.1283860733331722e-05,
|
|
"loss": 0.1215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11500448733568192,
|
|
"step": 3540,
|
|
"valid_targets_mean": 1593.0,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 3.7276550998948474,
|
|
"grad_norm": 0.5706213977134992,
|
|
"learning_rate": 2.123152599945417e-05,
|
|
"loss": 0.0908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10599765926599503,
|
|
"step": 3545,
|
|
"valid_targets_mean": 1756.6,
|
|
"valid_targets_min": 449
|
|
},
|
|
{
|
|
"epoch": 3.732912723449001,
|
|
"grad_norm": 0.3304693098437503,
|
|
"learning_rate": 2.1179182799456024e-05,
|
|
"loss": 0.0924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08064989745616913,
|
|
"step": 3550,
|
|
"valid_targets_mean": 3063.7,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 3.7381703470031544,
|
|
"grad_norm": 0.530788508146966,
|
|
"learning_rate": 2.112683149317039e-05,
|
|
"loss": 0.098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10121424496173859,
|
|
"step": 3555,
|
|
"valid_targets_mean": 1936.0,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 3.743427970557308,
|
|
"grad_norm": 0.3286043768042034,
|
|
"learning_rate": 2.1074472440486118e-05,
|
|
"loss": 0.0765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08935829252004623,
|
|
"step": 3560,
|
|
"valid_targets_mean": 3996.3,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 3.7486855941114614,
|
|
"grad_norm": 0.36550474794956694,
|
|
"learning_rate": 2.102210600134531e-05,
|
|
"loss": 0.0768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09652663767337799,
|
|
"step": 3565,
|
|
"valid_targets_mean": 3944.3,
|
|
"valid_targets_min": 2036
|
|
},
|
|
{
|
|
"epoch": 3.753943217665615,
|
|
"grad_norm": 0.4325741552974306,
|
|
"learning_rate": 2.096973253574084e-05,
|
|
"loss": 0.0882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09666009247303009,
|
|
"step": 3570,
|
|
"valid_targets_mean": 2266.8,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 3.759200841219769,
|
|
"grad_norm": 0.40791655447527125,
|
|
"learning_rate": 2.09173524037139e-05,
|
|
"loss": 0.1056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09209573268890381,
|
|
"step": 3575,
|
|
"valid_targets_mean": 2960.6,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 3.7644584647739223,
|
|
"grad_norm": 0.38772339677116957,
|
|
"learning_rate": 2.0864965965351495e-05,
|
|
"loss": 0.0999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0913030356168747,
|
|
"step": 3580,
|
|
"valid_targets_mean": 3425.1,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 3.769716088328076,
|
|
"grad_norm": 0.35781538705517263,
|
|
"learning_rate": 2.081257358078398e-05,
|
|
"loss": 0.09,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08367600291967392,
|
|
"step": 3585,
|
|
"valid_targets_mean": 3363.6,
|
|
"valid_targets_min": 2366
|
|
},
|
|
{
|
|
"epoch": 3.7749737118822293,
|
|
"grad_norm": 0.43063352480767536,
|
|
"learning_rate": 2.0760175610182613e-05,
|
|
"loss": 0.0861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10648873448371887,
|
|
"step": 3590,
|
|
"valid_targets_mean": 2006.6,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 3.780231335436383,
|
|
"grad_norm": 0.4255843199869726,
|
|
"learning_rate": 2.0707772413757016e-05,
|
|
"loss": 0.0999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1046498566865921,
|
|
"step": 3595,
|
|
"valid_targets_mean": 3465.1,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 3.7854889589905363,
|
|
"grad_norm": 0.3238682484345786,
|
|
"learning_rate": 2.0655364351752763e-05,
|
|
"loss": 0.0718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07490940392017365,
|
|
"step": 3600,
|
|
"valid_targets_mean": 3908.9,
|
|
"valid_targets_min": 2848
|
|
},
|
|
{
|
|
"epoch": 3.7907465825446898,
|
|
"grad_norm": 0.38324966614871386,
|
|
"learning_rate": 2.060295178444887e-05,
|
|
"loss": 0.0918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07175049185752869,
|
|
"step": 3605,
|
|
"valid_targets_mean": 2616.4,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 3.7960042060988433,
|
|
"grad_norm": 0.443143738568688,
|
|
"learning_rate": 2.055053507215533e-05,
|
|
"loss": 0.092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09204816818237305,
|
|
"step": 3610,
|
|
"valid_targets_mean": 2680.9,
|
|
"valid_targets_min": 1016
|
|
},
|
|
{
|
|
"epoch": 3.8012618296529967,
|
|
"grad_norm": 0.610462234387402,
|
|
"learning_rate": 2.049811457521061e-05,
|
|
"loss": 0.1009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14608034491539001,
|
|
"step": 3615,
|
|
"valid_targets_mean": 1645.9,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 3.8065194532071502,
|
|
"grad_norm": 0.3921422546334914,
|
|
"learning_rate": 2.0445690653979216e-05,
|
|
"loss": 0.0913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06947421282529831,
|
|
"step": 3620,
|
|
"valid_targets_mean": 2570.3,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 3.8117770767613037,
|
|
"grad_norm": 0.6663493421615169,
|
|
"learning_rate": 2.039326366884919e-05,
|
|
"loss": 0.0947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12514297664165497,
|
|
"step": 3625,
|
|
"valid_targets_mean": 1396.1,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 3.8170347003154577,
|
|
"grad_norm": 0.427751684134174,
|
|
"learning_rate": 2.034083398022963e-05,
|
|
"loss": 0.0866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07771643996238708,
|
|
"step": 3630,
|
|
"valid_targets_mean": 2454.5,
|
|
"valid_targets_min": 804
|
|
},
|
|
{
|
|
"epoch": 3.822292323869611,
|
|
"grad_norm": 0.44758511691685116,
|
|
"learning_rate": 2.028840194854822e-05,
|
|
"loss": 0.1117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08018139749765396,
|
|
"step": 3635,
|
|
"valid_targets_mean": 3334.7,
|
|
"valid_targets_min": 1018
|
|
},
|
|
{
|
|
"epoch": 3.8275499474237646,
|
|
"grad_norm": 0.35793157543173476,
|
|
"learning_rate": 2.0235967934248756e-05,
|
|
"loss": 0.0883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07716187834739685,
|
|
"step": 3640,
|
|
"valid_targets_mean": 3069.6,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 3.832807570977918,
|
|
"grad_norm": 0.4702038685141682,
|
|
"learning_rate": 2.018353229778867e-05,
|
|
"loss": 0.0819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08833232522010803,
|
|
"step": 3645,
|
|
"valid_targets_mean": 2432.9,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 3.8380651945320716,
|
|
"grad_norm": 0.47312598716062043,
|
|
"learning_rate": 2.0131095399636522e-05,
|
|
"loss": 0.1024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12234270572662354,
|
|
"step": 3650,
|
|
"valid_targets_mean": 2296.9,
|
|
"valid_targets_min": 342
|
|
},
|
|
{
|
|
"epoch": 3.843322818086225,
|
|
"grad_norm": 0.4050671404779756,
|
|
"learning_rate": 2.0078657600269573e-05,
|
|
"loss": 0.0963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08255276829004288,
|
|
"step": 3655,
|
|
"valid_targets_mean": 3508.6,
|
|
"valid_targets_min": 1935
|
|
},
|
|
{
|
|
"epoch": 3.8485804416403786,
|
|
"grad_norm": 0.4596813722597804,
|
|
"learning_rate": 2.0026219260171262e-05,
|
|
"loss": 0.0745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0901421308517456,
|
|
"step": 3660,
|
|
"valid_targets_mean": 2384.8,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 3.853838065194532,
|
|
"grad_norm": 0.3165528104327489,
|
|
"learning_rate": 1.9973780739828748e-05,
|
|
"loss": 0.0883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0759531706571579,
|
|
"step": 3665,
|
|
"valid_targets_mean": 4542.6,
|
|
"valid_targets_min": 2555
|
|
},
|
|
{
|
|
"epoch": 3.8590956887486856,
|
|
"grad_norm": 0.29289406468084694,
|
|
"learning_rate": 1.9921342399730433e-05,
|
|
"loss": 0.0806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06938598304986954,
|
|
"step": 3670,
|
|
"valid_targets_mean": 3932.6,
|
|
"valid_targets_min": 837
|
|
},
|
|
{
|
|
"epoch": 3.864353312302839,
|
|
"grad_norm": 0.3940315334405671,
|
|
"learning_rate": 1.9868904600363485e-05,
|
|
"loss": 0.0717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07881227135658264,
|
|
"step": 3675,
|
|
"valid_targets_mean": 2556.8,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 3.8696109358569926,
|
|
"grad_norm": 0.3051306597473392,
|
|
"learning_rate": 1.9816467702211342e-05,
|
|
"loss": 0.0955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06934002041816711,
|
|
"step": 3680,
|
|
"valid_targets_mean": 3895.4,
|
|
"valid_targets_min": 2944
|
|
},
|
|
{
|
|
"epoch": 3.874868559411146,
|
|
"grad_norm": 0.4777520869374522,
|
|
"learning_rate": 1.9764032065751248e-05,
|
|
"loss": 0.1015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19092978537082672,
|
|
"step": 3685,
|
|
"valid_targets_mean": 3167.4,
|
|
"valid_targets_min": 1536
|
|
},
|
|
{
|
|
"epoch": 3.8801261829652995,
|
|
"grad_norm": 0.40689502117218584,
|
|
"learning_rate": 1.971159805145178e-05,
|
|
"loss": 0.0896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07239830493927002,
|
|
"step": 3690,
|
|
"valid_targets_mean": 2931.6,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 3.885383806519453,
|
|
"grad_norm": 0.30382264737261905,
|
|
"learning_rate": 1.965916601977038e-05,
|
|
"loss": 0.0961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05693751573562622,
|
|
"step": 3695,
|
|
"valid_targets_mean": 3162.2,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 3.8906414300736065,
|
|
"grad_norm": 0.3839793777783549,
|
|
"learning_rate": 1.9606736331150812e-05,
|
|
"loss": 0.1091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08582743257284164,
|
|
"step": 3700,
|
|
"valid_targets_mean": 3075.7,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 3.89589905362776,
|
|
"grad_norm": 0.39810159285735625,
|
|
"learning_rate": 1.9554309346020784e-05,
|
|
"loss": 0.0793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08765186369419098,
|
|
"step": 3705,
|
|
"valid_targets_mean": 3087.6,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 3.9011566771819135,
|
|
"grad_norm": 0.7291109076977612,
|
|
"learning_rate": 1.9501885424789394e-05,
|
|
"loss": 0.0942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10978750884532928,
|
|
"step": 3710,
|
|
"valid_targets_mean": 1690.7,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 3.9064143007360674,
|
|
"grad_norm": 0.41431235708407105,
|
|
"learning_rate": 1.9449464927844677e-05,
|
|
"loss": 0.0802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07006799429655075,
|
|
"step": 3715,
|
|
"valid_targets_mean": 3497.7,
|
|
"valid_targets_min": 923
|
|
},
|
|
{
|
|
"epoch": 3.911671924290221,
|
|
"grad_norm": 0.5883270075280208,
|
|
"learning_rate": 1.939704821555113e-05,
|
|
"loss": 0.1272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11378846317529678,
|
|
"step": 3720,
|
|
"valid_targets_mean": 1610.4,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 3.9169295478443744,
|
|
"grad_norm": 0.575035243155632,
|
|
"learning_rate": 1.9344635648247244e-05,
|
|
"loss": 0.1179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1258980929851532,
|
|
"step": 3725,
|
|
"valid_targets_mean": 1691.8,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 3.922187171398528,
|
|
"grad_norm": 0.4417901219788984,
|
|
"learning_rate": 1.9292227586242994e-05,
|
|
"loss": 0.0873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09100116789340973,
|
|
"step": 3730,
|
|
"valid_targets_mean": 3664.4,
|
|
"valid_targets_min": 2784
|
|
},
|
|
{
|
|
"epoch": 3.9274447949526814,
|
|
"grad_norm": 0.3826363350707075,
|
|
"learning_rate": 1.9239824389817397e-05,
|
|
"loss": 0.0862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08645817637443542,
|
|
"step": 3735,
|
|
"valid_targets_mean": 3575.8,
|
|
"valid_targets_min": 2490
|
|
},
|
|
{
|
|
"epoch": 3.932702418506835,
|
|
"grad_norm": 0.38902984620855535,
|
|
"learning_rate": 1.9187426419216026e-05,
|
|
"loss": 0.0867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07746557891368866,
|
|
"step": 3740,
|
|
"valid_targets_mean": 2629.2,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 3.9379600420609884,
|
|
"grad_norm": 0.4440574135528579,
|
|
"learning_rate": 1.9135034034648515e-05,
|
|
"loss": 0.0777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07846144586801529,
|
|
"step": 3745,
|
|
"valid_targets_mean": 1929.7,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 3.943217665615142,
|
|
"grad_norm": 0.7524988102545428,
|
|
"learning_rate": 1.90826475962861e-05,
|
|
"loss": 0.1056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1300787627696991,
|
|
"step": 3750,
|
|
"valid_targets_mean": 1150.9,
|
|
"valid_targets_min": 637
|
|
},
|
|
{
|
|
"epoch": 3.9484752891692954,
|
|
"grad_norm": 0.43377208971208925,
|
|
"learning_rate": 1.9030267464259164e-05,
|
|
"loss": 0.0763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07985401898622513,
|
|
"step": 3755,
|
|
"valid_targets_mean": 2598.5,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 3.953732912723449,
|
|
"grad_norm": 0.4014449730261738,
|
|
"learning_rate": 1.8977893998654692e-05,
|
|
"loss": 0.081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07844674587249756,
|
|
"step": 3760,
|
|
"valid_targets_mean": 3492.6,
|
|
"valid_targets_min": 1606
|
|
},
|
|
{
|
|
"epoch": 3.958990536277603,
|
|
"grad_norm": 0.39365643476834655,
|
|
"learning_rate": 1.8925527559513886e-05,
|
|
"loss": 0.0839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08387859165668488,
|
|
"step": 3765,
|
|
"valid_targets_mean": 3144.2,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 3.9642481598317563,
|
|
"grad_norm": 0.3250462893913423,
|
|
"learning_rate": 1.8873168506829614e-05,
|
|
"loss": 0.0897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08184637129306793,
|
|
"step": 3770,
|
|
"valid_targets_mean": 3732.2,
|
|
"valid_targets_min": 350
|
|
},
|
|
{
|
|
"epoch": 3.9695057833859098,
|
|
"grad_norm": 0.3016014649791314,
|
|
"learning_rate": 1.882081720054398e-05,
|
|
"loss": 0.0773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0785076767206192,
|
|
"step": 3775,
|
|
"valid_targets_mean": 3899.2,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 3.9747634069400632,
|
|
"grad_norm": 0.5336857328401563,
|
|
"learning_rate": 1.876847400054583e-05,
|
|
"loss": 0.0856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10663610696792603,
|
|
"step": 3780,
|
|
"valid_targets_mean": 1649.2,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 3.9800210304942167,
|
|
"grad_norm": 0.541958266460311,
|
|
"learning_rate": 1.8716139266668288e-05,
|
|
"loss": 0.1051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17506924271583557,
|
|
"step": 3785,
|
|
"valid_targets_mean": 2908.9,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 3.9852786540483702,
|
|
"grad_norm": 0.3498458322406007,
|
|
"learning_rate": 1.8663813358686267e-05,
|
|
"loss": 0.1526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07919283956289291,
|
|
"step": 3790,
|
|
"valid_targets_mean": 3729.9,
|
|
"valid_targets_min": 2347
|
|
},
|
|
{
|
|
"epoch": 3.9905362776025237,
|
|
"grad_norm": 0.36650049584069216,
|
|
"learning_rate": 1.8611496636314025e-05,
|
|
"loss": 0.0855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08009093999862671,
|
|
"step": 3795,
|
|
"valid_targets_mean": 3266.7,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 3.995793901156677,
|
|
"grad_norm": 0.367992552240383,
|
|
"learning_rate": 1.8559189459202653e-05,
|
|
"loss": 0.0809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08165628463029861,
|
|
"step": 3800,
|
|
"valid_targets_mean": 3567.1,
|
|
"valid_targets_min": 2245
|
|
},
|
|
{
|
|
"epoch": 4.001051524710831,
|
|
"grad_norm": 0.8765624437475582,
|
|
"learning_rate": 1.8506892186937636e-05,
|
|
"loss": 0.1319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16299240291118622,
|
|
"step": 3805,
|
|
"valid_targets_mean": 1607.4,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 4.006309148264984,
|
|
"grad_norm": 0.7481661006229169,
|
|
"learning_rate": 1.845460517903637e-05,
|
|
"loss": 0.1762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16049407422542572,
|
|
"step": 3810,
|
|
"valid_targets_mean": 1344.6,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 4.011566771819138,
|
|
"grad_norm": 0.823041699440833,
|
|
"learning_rate": 1.8402328794945678e-05,
|
|
"loss": 0.1594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.161682590842247,
|
|
"step": 3815,
|
|
"valid_targets_mean": 1412.5,
|
|
"valid_targets_min": 514
|
|
},
|
|
{
|
|
"epoch": 4.016824395373291,
|
|
"grad_norm": 0.8263312908136765,
|
|
"learning_rate": 1.8350063394039352e-05,
|
|
"loss": 0.1582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16678602993488312,
|
|
"step": 3820,
|
|
"valid_targets_mean": 1409.8,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 4.022082018927445,
|
|
"grad_norm": 0.7299100853844972,
|
|
"learning_rate": 1.82978093356157e-05,
|
|
"loss": 0.1471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16625288128852844,
|
|
"step": 3825,
|
|
"valid_targets_mean": 1655.4,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 4.027339642481598,
|
|
"grad_norm": 0.7573676951715616,
|
|
"learning_rate": 1.824556697889502e-05,
|
|
"loss": 0.1544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.134572371840477,
|
|
"step": 3830,
|
|
"valid_targets_mean": 1219.5,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 4.032597266035752,
|
|
"grad_norm": 0.7460064071823531,
|
|
"learning_rate": 1.8193336683017197e-05,
|
|
"loss": 0.1589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14512114226818085,
|
|
"step": 3835,
|
|
"valid_targets_mean": 1534.8,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 4.037854889589905,
|
|
"grad_norm": 0.7467667152761563,
|
|
"learning_rate": 1.8141118807039193e-05,
|
|
"loss": 0.1416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13977526128292084,
|
|
"step": 3840,
|
|
"valid_targets_mean": 1388.5,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 4.043112513144059,
|
|
"grad_norm": 0.7466259477771547,
|
|
"learning_rate": 1.8088913709932582e-05,
|
|
"loss": 0.1644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14778225123882294,
|
|
"step": 3845,
|
|
"valid_targets_mean": 1322.9,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 4.048370136698212,
|
|
"grad_norm": 0.6691684846073358,
|
|
"learning_rate": 1.8036721750581106e-05,
|
|
"loss": 0.1429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13036976754665375,
|
|
"step": 3850,
|
|
"valid_targets_mean": 1573.5,
|
|
"valid_targets_min": 788
|
|
},
|
|
{
|
|
"epoch": 4.053627760252366,
|
|
"grad_norm": 0.7956145711586533,
|
|
"learning_rate": 1.7984543287778185e-05,
|
|
"loss": 0.1521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13885369896888733,
|
|
"step": 3855,
|
|
"valid_targets_mean": 1277.2,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 4.058885383806519,
|
|
"grad_norm": 0.7472855226416464,
|
|
"learning_rate": 1.7932378680224443e-05,
|
|
"loss": 0.1534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1496802419424057,
|
|
"step": 3860,
|
|
"valid_targets_mean": 1454.9,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 4.064143007360673,
|
|
"grad_norm": 0.7075850524471489,
|
|
"learning_rate": 1.7880228286525275e-05,
|
|
"loss": 0.1511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14226964116096497,
|
|
"step": 3865,
|
|
"valid_targets_mean": 1323.2,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 4.069400630914826,
|
|
"grad_norm": 0.7202367129387591,
|
|
"learning_rate": 1.782809246518836e-05,
|
|
"loss": 0.1434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14475390315055847,
|
|
"step": 3870,
|
|
"valid_targets_mean": 1491.4,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 4.0746582544689804,
|
|
"grad_norm": 0.7464426028911113,
|
|
"learning_rate": 1.7775971574621186e-05,
|
|
"loss": 0.1401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14927147328853607,
|
|
"step": 3875,
|
|
"valid_targets_mean": 1625.9,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 4.079915878023134,
|
|
"grad_norm": 0.7237691590361065,
|
|
"learning_rate": 1.772386597312861e-05,
|
|
"loss": 0.1341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1303875744342804,
|
|
"step": 3880,
|
|
"valid_targets_mean": 1360.6,
|
|
"valid_targets_min": 518
|
|
},
|
|
{
|
|
"epoch": 4.085173501577287,
|
|
"grad_norm": 0.8588529853471539,
|
|
"learning_rate": 1.7671776018910397e-05,
|
|
"loss": 0.1565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15439622104167938,
|
|
"step": 3885,
|
|
"valid_targets_mean": 1484.4,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 4.090431125131441,
|
|
"grad_norm": 0.7780792565743658,
|
|
"learning_rate": 1.761970207005871e-05,
|
|
"loss": 0.1563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1545676290988922,
|
|
"step": 3890,
|
|
"valid_targets_mean": 1509.9,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 4.095688748685594,
|
|
"grad_norm": 0.6781007500933507,
|
|
"learning_rate": 1.756764448455572e-05,
|
|
"loss": 0.1493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13919678330421448,
|
|
"step": 3895,
|
|
"valid_targets_mean": 1507.2,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 4.100946372239748,
|
|
"grad_norm": 0.7744685293110305,
|
|
"learning_rate": 1.7515603620271087e-05,
|
|
"loss": 0.1454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16268318891525269,
|
|
"step": 3900,
|
|
"valid_targets_mean": 1620.9,
|
|
"valid_targets_min": 698
|
|
},
|
|
{
|
|
"epoch": 4.106203995793901,
|
|
"grad_norm": 0.7738920005389524,
|
|
"learning_rate": 1.7463579834959525e-05,
|
|
"loss": 0.1464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1758730560541153,
|
|
"step": 3905,
|
|
"valid_targets_mean": 2104.2,
|
|
"valid_targets_min": 832
|
|
},
|
|
{
|
|
"epoch": 4.111461619348055,
|
|
"grad_norm": 0.7449550384729909,
|
|
"learning_rate": 1.7411573486258343e-05,
|
|
"loss": 0.1477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14727957546710968,
|
|
"step": 3910,
|
|
"valid_targets_mean": 1727.2,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 4.116719242902208,
|
|
"grad_norm": 0.8013214299941451,
|
|
"learning_rate": 1.735958493168498e-05,
|
|
"loss": 0.1451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15961864590644836,
|
|
"step": 3915,
|
|
"valid_targets_mean": 1509.1,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 4.121976866456362,
|
|
"grad_norm": 0.7229377546821801,
|
|
"learning_rate": 1.730761452863454e-05,
|
|
"loss": 0.1349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12209049612283707,
|
|
"step": 3920,
|
|
"valid_targets_mean": 1330.2,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 4.127234490010515,
|
|
"grad_norm": 0.7432586495114009,
|
|
"learning_rate": 1.7255662634377365e-05,
|
|
"loss": 0.1407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14521485567092896,
|
|
"step": 3925,
|
|
"valid_targets_mean": 1642.9,
|
|
"valid_targets_min": 824
|
|
},
|
|
{
|
|
"epoch": 4.132492113564669,
|
|
"grad_norm": 0.9356614070212093,
|
|
"learning_rate": 1.720372960605654e-05,
|
|
"loss": 0.1364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12674985826015472,
|
|
"step": 3930,
|
|
"valid_targets_mean": 1331.2,
|
|
"valid_targets_min": 584
|
|
},
|
|
{
|
|
"epoch": 4.137749737118822,
|
|
"grad_norm": 0.7246993147868624,
|
|
"learning_rate": 1.715181580068546e-05,
|
|
"loss": 0.1453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1463506817817688,
|
|
"step": 3935,
|
|
"valid_targets_mean": 1734.8,
|
|
"valid_targets_min": 1033
|
|
},
|
|
{
|
|
"epoch": 4.143007360672976,
|
|
"grad_norm": 0.7553205934847995,
|
|
"learning_rate": 1.7099921575145372e-05,
|
|
"loss": 0.1485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1663040667772293,
|
|
"step": 3940,
|
|
"valid_targets_mean": 1724.6,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 4.148264984227129,
|
|
"grad_norm": 0.7026869178890462,
|
|
"learning_rate": 1.7048047286182945e-05,
|
|
"loss": 0.1401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1284397542476654,
|
|
"step": 3945,
|
|
"valid_targets_mean": 1342.1,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 4.153522607781283,
|
|
"grad_norm": 0.7571640035397575,
|
|
"learning_rate": 1.6996193290407742e-05,
|
|
"loss": 0.1407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14246192574501038,
|
|
"step": 3950,
|
|
"valid_targets_mean": 1532.2,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 4.158780231335436,
|
|
"grad_norm": 0.7746359454091577,
|
|
"learning_rate": 1.694435994428987e-05,
|
|
"loss": 0.1392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11980274319648743,
|
|
"step": 3955,
|
|
"valid_targets_mean": 1112.7,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 4.16403785488959,
|
|
"grad_norm": 0.7789865885616932,
|
|
"learning_rate": 1.6892547604157464e-05,
|
|
"loss": 0.148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1385720670223236,
|
|
"step": 3960,
|
|
"valid_targets_mean": 1402.3,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 4.169295478443743,
|
|
"grad_norm": 0.7409379672132878,
|
|
"learning_rate": 1.6840756626194242e-05,
|
|
"loss": 0.1372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14954648911952972,
|
|
"step": 3965,
|
|
"valid_targets_mean": 1729.4,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 4.174553101997897,
|
|
"grad_norm": 0.8830404211700101,
|
|
"learning_rate": 1.678898736643708e-05,
|
|
"loss": 0.1355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1382630616426468,
|
|
"step": 3970,
|
|
"valid_targets_mean": 1233.4,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 4.17981072555205,
|
|
"grad_norm": 0.7832949923877968,
|
|
"learning_rate": 1.6737240180773554e-05,
|
|
"loss": 0.1257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12926602363586426,
|
|
"step": 3975,
|
|
"valid_targets_mean": 1254.3,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 4.185068349106204,
|
|
"grad_norm": 0.7460084475449279,
|
|
"learning_rate": 1.6685515424939478e-05,
|
|
"loss": 0.1419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1309753954410553,
|
|
"step": 3980,
|
|
"valid_targets_mean": 1454.9,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 4.190325972660357,
|
|
"grad_norm": 0.7826957982549188,
|
|
"learning_rate": 1.6633813454516486e-05,
|
|
"loss": 0.1334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13416488468647003,
|
|
"step": 3985,
|
|
"valid_targets_mean": 1471.0,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 4.195583596214511,
|
|
"grad_norm": 0.7505831254614799,
|
|
"learning_rate": 1.658213462492957e-05,
|
|
"loss": 0.1362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12968222796916962,
|
|
"step": 3990,
|
|
"valid_targets_mean": 1367.9,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 4.200841219768664,
|
|
"grad_norm": 0.7201532762160492,
|
|
"learning_rate": 1.6530479291444636e-05,
|
|
"loss": 0.1378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13182759284973145,
|
|
"step": 3995,
|
|
"valid_targets_mean": 1632.7,
|
|
"valid_targets_min": 892
|
|
},
|
|
{
|
|
"epoch": 4.206098843322818,
|
|
"grad_norm": 0.7095273753928816,
|
|
"learning_rate": 1.6478847809166066e-05,
|
|
"loss": 0.1368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12015292048454285,
|
|
"step": 4000,
|
|
"valid_targets_mean": 1318.6,
|
|
"valid_targets_min": 711
|
|
},
|
|
{
|
|
"epoch": 4.211356466876971,
|
|
"grad_norm": 0.7210862846864323,
|
|
"learning_rate": 1.64272405330343e-05,
|
|
"loss": 0.1412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13294000923633575,
|
|
"step": 4005,
|
|
"valid_targets_mean": 1515.7,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 4.216614090431126,
|
|
"grad_norm": 0.7963901121025583,
|
|
"learning_rate": 1.6375657817823323e-05,
|
|
"loss": 0.1413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14794018864631653,
|
|
"step": 4010,
|
|
"valid_targets_mean": 1453.2,
|
|
"valid_targets_min": 818
|
|
},
|
|
{
|
|
"epoch": 4.221871713985279,
|
|
"grad_norm": 0.7827226440279664,
|
|
"learning_rate": 1.6324100018138328e-05,
|
|
"loss": 0.1452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1447080820798874,
|
|
"step": 4015,
|
|
"valid_targets_mean": 1779.6,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 4.2271293375394325,
|
|
"grad_norm": 0.7558380275172935,
|
|
"learning_rate": 1.6272567488413204e-05,
|
|
"loss": 0.1491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12958121299743652,
|
|
"step": 4020,
|
|
"valid_targets_mean": 1324.1,
|
|
"valid_targets_min": 748
|
|
},
|
|
{
|
|
"epoch": 4.232386961093586,
|
|
"grad_norm": 0.7897997966258203,
|
|
"learning_rate": 1.6221060582908115e-05,
|
|
"loss": 0.1359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14728528261184692,
|
|
"step": 4025,
|
|
"valid_targets_mean": 1583.6,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 4.2376445846477395,
|
|
"grad_norm": 0.7658583092317679,
|
|
"learning_rate": 1.616957965570708e-05,
|
|
"loss": 0.13,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11436955630779266,
|
|
"step": 4030,
|
|
"valid_targets_mean": 1097.2,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 4.242902208201893,
|
|
"grad_norm": 0.8157009302440904,
|
|
"learning_rate": 1.6118125060715534e-05,
|
|
"loss": 0.1457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13531437516212463,
|
|
"step": 4035,
|
|
"valid_targets_mean": 1354.6,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 4.2481598317560465,
|
|
"grad_norm": 0.7601170245470381,
|
|
"learning_rate": 1.6066697151657876e-05,
|
|
"loss": 0.1366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13570845127105713,
|
|
"step": 4040,
|
|
"valid_targets_mean": 1476.6,
|
|
"valid_targets_min": 825
|
|
},
|
|
{
|
|
"epoch": 4.2534174553102,
|
|
"grad_norm": 0.7717376680599746,
|
|
"learning_rate": 1.601529628207508e-05,
|
|
"loss": 0.1294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1313013732433319,
|
|
"step": 4045,
|
|
"valid_targets_mean": 1383.5,
|
|
"valid_targets_min": 829
|
|
},
|
|
{
|
|
"epoch": 4.2586750788643535,
|
|
"grad_norm": 0.749117955068463,
|
|
"learning_rate": 1.5963922805322204e-05,
|
|
"loss": 0.133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1446974277496338,
|
|
"step": 4050,
|
|
"valid_targets_mean": 1727.3,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 4.263932702418507,
|
|
"grad_norm": 0.7128507845507394,
|
|
"learning_rate": 1.5912577074566016e-05,
|
|
"loss": 0.1407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1259775459766388,
|
|
"step": 4055,
|
|
"valid_targets_mean": 1545.9,
|
|
"valid_targets_min": 836
|
|
},
|
|
{
|
|
"epoch": 4.2691903259726605,
|
|
"grad_norm": 0.8222387636285143,
|
|
"learning_rate": 1.5861259442782548e-05,
|
|
"loss": 0.1409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1394100785255432,
|
|
"step": 4060,
|
|
"valid_targets_mean": 1450.4,
|
|
"valid_targets_min": 554
|
|
},
|
|
{
|
|
"epoch": 4.274447949526814,
|
|
"grad_norm": 0.7388188465031793,
|
|
"learning_rate": 1.580997026275464e-05,
|
|
"loss": 0.1366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.135959655046463,
|
|
"step": 4065,
|
|
"valid_targets_mean": 1381.1,
|
|
"valid_targets_min": 843
|
|
},
|
|
{
|
|
"epoch": 4.279705573080967,
|
|
"grad_norm": 0.7268212505021541,
|
|
"learning_rate": 1.5758709887069562e-05,
|
|
"loss": 0.1342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14015160501003265,
|
|
"step": 4070,
|
|
"valid_targets_mean": 1789.8,
|
|
"valid_targets_min": 802
|
|
},
|
|
{
|
|
"epoch": 4.284963196635121,
|
|
"grad_norm": 0.7873039628206715,
|
|
"learning_rate": 1.570747866811658e-05,
|
|
"loss": 0.1416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13989371061325073,
|
|
"step": 4075,
|
|
"valid_targets_mean": 1478.9,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 4.290220820189274,
|
|
"grad_norm": 0.8078812721593096,
|
|
"learning_rate": 1.5656276958084478e-05,
|
|
"loss": 0.1368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12958908081054688,
|
|
"step": 4080,
|
|
"valid_targets_mean": 1466.4,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 4.295478443743428,
|
|
"grad_norm": 0.8344185060991246,
|
|
"learning_rate": 1.560510510895923e-05,
|
|
"loss": 0.1363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13817384839057922,
|
|
"step": 4085,
|
|
"valid_targets_mean": 1375.2,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 4.300736067297581,
|
|
"grad_norm": 0.7543615952982942,
|
|
"learning_rate": 1.5553963472521506e-05,
|
|
"loss": 0.1319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1319047212600708,
|
|
"step": 4090,
|
|
"valid_targets_mean": 1385.6,
|
|
"valid_targets_min": 818
|
|
},
|
|
{
|
|
"epoch": 4.305993690851735,
|
|
"grad_norm": 0.6888589072225046,
|
|
"learning_rate": 1.5502852400344277e-05,
|
|
"loss": 0.1259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10959073901176453,
|
|
"step": 4095,
|
|
"valid_targets_mean": 1475.1,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 4.311251314405888,
|
|
"grad_norm": 0.7921401180973303,
|
|
"learning_rate": 1.545177224379041e-05,
|
|
"loss": 0.1395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12821754813194275,
|
|
"step": 4100,
|
|
"valid_targets_mean": 1375.1,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 4.316508937960042,
|
|
"grad_norm": 0.7787601011408837,
|
|
"learning_rate": 1.5400723354010244e-05,
|
|
"loss": 0.1381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11903885006904602,
|
|
"step": 4105,
|
|
"valid_targets_mean": 1445.8,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 4.321766561514195,
|
|
"grad_norm": 0.8161407948204759,
|
|
"learning_rate": 1.5349706081939158e-05,
|
|
"loss": 0.1338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13431984186172485,
|
|
"step": 4110,
|
|
"valid_targets_mean": 1296.5,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 4.327024185068349,
|
|
"grad_norm": 0.7940002845904401,
|
|
"learning_rate": 1.5298720778295195e-05,
|
|
"loss": 0.1357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12968891859054565,
|
|
"step": 4115,
|
|
"valid_targets_mean": 1412.1,
|
|
"valid_targets_min": 840
|
|
},
|
|
{
|
|
"epoch": 4.332281808622502,
|
|
"grad_norm": 0.7674943122933603,
|
|
"learning_rate": 1.5247767793576625e-05,
|
|
"loss": 0.1401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14080610871315002,
|
|
"step": 4120,
|
|
"valid_targets_mean": 1550.4,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 4.337539432176656,
|
|
"grad_norm": 0.7660184152293514,
|
|
"learning_rate": 1.519684747805953e-05,
|
|
"loss": 0.1335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.143622487783432,
|
|
"step": 4125,
|
|
"valid_targets_mean": 1600.9,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 4.342797055730809,
|
|
"grad_norm": 0.7932998666812077,
|
|
"learning_rate": 1.5145960181795421e-05,
|
|
"loss": 0.1348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12783610820770264,
|
|
"step": 4130,
|
|
"valid_targets_mean": 1450.6,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 4.348054679284963,
|
|
"grad_norm": 0.7858927769703171,
|
|
"learning_rate": 1.509510625460883e-05,
|
|
"loss": 0.1378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13657426834106445,
|
|
"step": 4135,
|
|
"valid_targets_mean": 1553.0,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 4.353312302839116,
|
|
"grad_norm": 0.7871058542091913,
|
|
"learning_rate": 1.5044286046094851e-05,
|
|
"loss": 0.1319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11590927839279175,
|
|
"step": 4140,
|
|
"valid_targets_mean": 1248.1,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 4.358569926393271,
|
|
"grad_norm": 0.8154723418333981,
|
|
"learning_rate": 1.4993499905616823e-05,
|
|
"loss": 0.1431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14865374565124512,
|
|
"step": 4145,
|
|
"valid_targets_mean": 1415.8,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 4.363827549947424,
|
|
"grad_norm": 0.7792253533823932,
|
|
"learning_rate": 1.494274818230387e-05,
|
|
"loss": 0.1427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12811344861984253,
|
|
"step": 4150,
|
|
"valid_targets_mean": 1598.4,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 4.369085173501578,
|
|
"grad_norm": 0.821669667594602,
|
|
"learning_rate": 1.4892031225048503e-05,
|
|
"loss": 0.1368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13663919270038605,
|
|
"step": 4155,
|
|
"valid_targets_mean": 1378.1,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 4.374342797055731,
|
|
"grad_norm": 0.8412746945998283,
|
|
"learning_rate": 1.4841349382504247e-05,
|
|
"loss": 0.1403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15202023088932037,
|
|
"step": 4160,
|
|
"valid_targets_mean": 1548.3,
|
|
"valid_targets_min": 793
|
|
},
|
|
{
|
|
"epoch": 4.379600420609885,
|
|
"grad_norm": 0.7345150853195603,
|
|
"learning_rate": 1.4790703003083236e-05,
|
|
"loss": 0.1267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11928276717662811,
|
|
"step": 4165,
|
|
"valid_targets_mean": 1389.6,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 4.384858044164038,
|
|
"grad_norm": 0.77422626293784,
|
|
"learning_rate": 1.4740092434953793e-05,
|
|
"loss": 0.1312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12621235847473145,
|
|
"step": 4170,
|
|
"valid_targets_mean": 1520.1,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 4.390115667718192,
|
|
"grad_norm": 0.8112255793265316,
|
|
"learning_rate": 1.4689518026038065e-05,
|
|
"loss": 0.1394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12599149346351624,
|
|
"step": 4175,
|
|
"valid_targets_mean": 1298.6,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 4.395373291272345,
|
|
"grad_norm": 0.7926395106584089,
|
|
"learning_rate": 1.4638980124009649e-05,
|
|
"loss": 0.132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1336921900510788,
|
|
"step": 4180,
|
|
"valid_targets_mean": 1391.2,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 4.400630914826499,
|
|
"grad_norm": 0.7275052659304198,
|
|
"learning_rate": 1.458847907629113e-05,
|
|
"loss": 0.1279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12254877388477325,
|
|
"step": 4185,
|
|
"valid_targets_mean": 1421.6,
|
|
"valid_targets_min": 1045
|
|
},
|
|
{
|
|
"epoch": 4.405888538380652,
|
|
"grad_norm": 0.8102367575631457,
|
|
"learning_rate": 1.4538015230051761e-05,
|
|
"loss": 0.1369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1422000229358673,
|
|
"step": 4190,
|
|
"valid_targets_mean": 1567.6,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 4.411146161934806,
|
|
"grad_norm": 0.8033090146453002,
|
|
"learning_rate": 1.4487588932205072e-05,
|
|
"loss": 0.1446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15437643229961395,
|
|
"step": 4195,
|
|
"valid_targets_mean": 1550.9,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 4.416403785488959,
|
|
"grad_norm": 0.8268837868112663,
|
|
"learning_rate": 1.4437200529406425e-05,
|
|
"loss": 0.1286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14400029182434082,
|
|
"step": 4200,
|
|
"valid_targets_mean": 1678.6,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 4.421661409043113,
|
|
"grad_norm": 0.7360355045558858,
|
|
"learning_rate": 1.4386850368050706e-05,
|
|
"loss": 0.1388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14487197995185852,
|
|
"step": 4205,
|
|
"valid_targets_mean": 1625.6,
|
|
"valid_targets_min": 823
|
|
},
|
|
{
|
|
"epoch": 4.426919032597266,
|
|
"grad_norm": 0.7108983738475014,
|
|
"learning_rate": 1.433653879426991e-05,
|
|
"loss": 0.1186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11359036713838577,
|
|
"step": 4210,
|
|
"valid_targets_mean": 1488.6,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 4.4321766561514195,
|
|
"grad_norm": 0.8159538706965361,
|
|
"learning_rate": 1.4286266153930733e-05,
|
|
"loss": 0.1311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13606056571006775,
|
|
"step": 4215,
|
|
"valid_targets_mean": 1418.0,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 4.437434279705573,
|
|
"grad_norm": 0.940569049580046,
|
|
"learning_rate": 1.4236032792632251e-05,
|
|
"loss": 0.1306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13143770396709442,
|
|
"step": 4220,
|
|
"valid_targets_mean": 1187.5,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 4.4426919032597265,
|
|
"grad_norm": 0.8112256287758004,
|
|
"learning_rate": 1.4185839055703511e-05,
|
|
"loss": 0.1355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12770329415798187,
|
|
"step": 4225,
|
|
"valid_targets_mean": 1670.4,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 4.44794952681388,
|
|
"grad_norm": 0.7295873729938533,
|
|
"learning_rate": 1.4135685288201151e-05,
|
|
"loss": 0.1317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12795163691043854,
|
|
"step": 4230,
|
|
"valid_targets_mean": 1437.3,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 4.4532071503680335,
|
|
"grad_norm": 0.7438639117925115,
|
|
"learning_rate": 1.4085571834907046e-05,
|
|
"loss": 0.1355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13000422716140747,
|
|
"step": 4235,
|
|
"valid_targets_mean": 1597.1,
|
|
"valid_targets_min": 888
|
|
},
|
|
{
|
|
"epoch": 4.458464773922187,
|
|
"grad_norm": 0.7854347619692846,
|
|
"learning_rate": 1.4035499040325946e-05,
|
|
"loss": 0.1349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13853585720062256,
|
|
"step": 4240,
|
|
"valid_targets_mean": 1561.9,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 4.4637223974763405,
|
|
"grad_norm": 0.6810473176157009,
|
|
"learning_rate": 1.3985467248683064e-05,
|
|
"loss": 0.1296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12546569108963013,
|
|
"step": 4245,
|
|
"valid_targets_mean": 2256.9,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 4.468980021030494,
|
|
"grad_norm": 0.4278573598882043,
|
|
"learning_rate": 1.3935476803921755e-05,
|
|
"loss": 0.1083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0785178393125534,
|
|
"step": 4250,
|
|
"valid_targets_mean": 3085.9,
|
|
"valid_targets_min": 834
|
|
},
|
|
{
|
|
"epoch": 4.4742376445846475,
|
|
"grad_norm": 0.4168789086494719,
|
|
"learning_rate": 1.3885528049701148e-05,
|
|
"loss": 0.0839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08329863101243973,
|
|
"step": 4255,
|
|
"valid_targets_mean": 3431.4,
|
|
"valid_targets_min": 2790
|
|
},
|
|
{
|
|
"epoch": 4.479495268138801,
|
|
"grad_norm": 0.38746000134994446,
|
|
"learning_rate": 1.3835621329393738e-05,
|
|
"loss": 0.0788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08167532086372375,
|
|
"step": 4260,
|
|
"valid_targets_mean": 3551.9,
|
|
"valid_targets_min": 2133
|
|
},
|
|
{
|
|
"epoch": 4.484752891692954,
|
|
"grad_norm": 0.411473617129491,
|
|
"learning_rate": 1.3785756986083091e-05,
|
|
"loss": 0.0967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0843782126903534,
|
|
"step": 4265,
|
|
"valid_targets_mean": 2739.8,
|
|
"valid_targets_min": 526
|
|
},
|
|
{
|
|
"epoch": 4.490010515247108,
|
|
"grad_norm": 0.431217037259204,
|
|
"learning_rate": 1.3735935362561419e-05,
|
|
"loss": 0.0779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07926741987466812,
|
|
"step": 4270,
|
|
"valid_targets_mean": 2461.2,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 4.495268138801261,
|
|
"grad_norm": 0.9614097819386166,
|
|
"learning_rate": 1.3686156801327293e-05,
|
|
"loss": 0.1084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14519354701042175,
|
|
"step": 4275,
|
|
"valid_targets_mean": 1136.5,
|
|
"valid_targets_min": 525
|
|
},
|
|
{
|
|
"epoch": 4.500525762355416,
|
|
"grad_norm": 0.3988218392818991,
|
|
"learning_rate": 1.3636421644583231e-05,
|
|
"loss": 0.1138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07371596992015839,
|
|
"step": 4280,
|
|
"valid_targets_mean": 3162.1,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 4.505783385909568,
|
|
"grad_norm": 0.3780646101186313,
|
|
"learning_rate": 1.3586730234233367e-05,
|
|
"loss": 0.0859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07808525860309601,
|
|
"step": 4285,
|
|
"valid_targets_mean": 2876.7,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 4.511041009463723,
|
|
"grad_norm": 0.5817849072696845,
|
|
"learning_rate": 1.3537082911881106e-05,
|
|
"loss": 0.1196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21639221906661987,
|
|
"step": 4290,
|
|
"valid_targets_mean": 2300.2,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 4.516298633017876,
|
|
"grad_norm": 0.301823494032142,
|
|
"learning_rate": 1.3487480018826772e-05,
|
|
"loss": 0.0742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06836953014135361,
|
|
"step": 4295,
|
|
"valid_targets_mean": 5440.1,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 4.52155625657203,
|
|
"grad_norm": 0.355945615977774,
|
|
"learning_rate": 1.343792189606525e-05,
|
|
"loss": 0.0812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08197809755802155,
|
|
"step": 4300,
|
|
"valid_targets_mean": 2827.2,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 4.526813880126183,
|
|
"grad_norm": 0.3197853950404343,
|
|
"learning_rate": 1.338840888428366e-05,
|
|
"loss": 0.081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07435546815395355,
|
|
"step": 4305,
|
|
"valid_targets_mean": 3693.8,
|
|
"valid_targets_min": 1006
|
|
},
|
|
{
|
|
"epoch": 4.532071503680337,
|
|
"grad_norm": 0.40653873135896135,
|
|
"learning_rate": 1.3338941323859023e-05,
|
|
"loss": 0.095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10022848844528198,
|
|
"step": 4310,
|
|
"valid_targets_mean": 2589.2,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 4.53732912723449,
|
|
"grad_norm": 0.37737572640615413,
|
|
"learning_rate": 1.3289519554855858e-05,
|
|
"loss": 0.1014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07328416407108307,
|
|
"step": 4315,
|
|
"valid_targets_mean": 2860.2,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 4.542586750788644,
|
|
"grad_norm": 0.40099120673343713,
|
|
"learning_rate": 1.3240143917023938e-05,
|
|
"loss": 0.0797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07614661753177643,
|
|
"step": 4320,
|
|
"valid_targets_mean": 2444.4,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 4.547844374342797,
|
|
"grad_norm": 0.3647763568957528,
|
|
"learning_rate": 1.3190814749795893e-05,
|
|
"loss": 0.0756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07858487218618393,
|
|
"step": 4325,
|
|
"valid_targets_mean": 3651.4,
|
|
"valid_targets_min": 1111
|
|
},
|
|
{
|
|
"epoch": 4.553101997896951,
|
|
"grad_norm": 0.5543459716744039,
|
|
"learning_rate": 1.3141532392284873e-05,
|
|
"loss": 0.0798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12775170803070068,
|
|
"step": 4330,
|
|
"valid_targets_mean": 1973.4,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 4.558359621451104,
|
|
"grad_norm": 0.5362310705045473,
|
|
"learning_rate": 1.3092297183282261e-05,
|
|
"loss": 0.0903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10443666577339172,
|
|
"step": 4335,
|
|
"valid_targets_mean": 1952.9,
|
|
"valid_targets_min": 493
|
|
},
|
|
{
|
|
"epoch": 4.563617245005258,
|
|
"grad_norm": 0.4033115586535664,
|
|
"learning_rate": 1.3043109461255305e-05,
|
|
"loss": 0.0864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07978132367134094,
|
|
"step": 4340,
|
|
"valid_targets_mean": 3431.8,
|
|
"valid_targets_min": 1302
|
|
},
|
|
{
|
|
"epoch": 4.568874868559411,
|
|
"grad_norm": 0.4532542937767393,
|
|
"learning_rate": 1.29939695643448e-05,
|
|
"loss": 0.0917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0826805830001831,
|
|
"step": 4345,
|
|
"valid_targets_mean": 2481.8,
|
|
"valid_targets_min": 802
|
|
},
|
|
{
|
|
"epoch": 4.574132492113565,
|
|
"grad_norm": 0.5536552657539693,
|
|
"learning_rate": 1.2944877830362777e-05,
|
|
"loss": 0.099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13342218101024628,
|
|
"step": 4350,
|
|
"valid_targets_mean": 2078.3,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 4.579390115667718,
|
|
"grad_norm": 0.3862802794923409,
|
|
"learning_rate": 1.289583459679017e-05,
|
|
"loss": 0.1143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08603660762310028,
|
|
"step": 4355,
|
|
"valid_targets_mean": 2960.0,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 4.584647739221872,
|
|
"grad_norm": 0.4240050269230897,
|
|
"learning_rate": 1.2846840200774484e-05,
|
|
"loss": 0.0753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0871136486530304,
|
|
"step": 4360,
|
|
"valid_targets_mean": 2375.6,
|
|
"valid_targets_min": 711
|
|
},
|
|
{
|
|
"epoch": 4.589905362776025,
|
|
"grad_norm": 0.4574251143763385,
|
|
"learning_rate": 1.2797894979127503e-05,
|
|
"loss": 0.1135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12482558935880661,
|
|
"step": 4365,
|
|
"valid_targets_mean": 2584.3,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 4.595162986330179,
|
|
"grad_norm": 0.47247433988660864,
|
|
"learning_rate": 1.2748999268322977e-05,
|
|
"loss": 0.0975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11209756135940552,
|
|
"step": 4370,
|
|
"valid_targets_mean": 2515.2,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 4.600420609884332,
|
|
"grad_norm": 0.56721988852021,
|
|
"learning_rate": 1.2700153404494247e-05,
|
|
"loss": 0.0838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12281329184770584,
|
|
"step": 4375,
|
|
"valid_targets_mean": 2212.8,
|
|
"valid_targets_min": 817
|
|
},
|
|
{
|
|
"epoch": 4.605678233438486,
|
|
"grad_norm": 0.6444590231201038,
|
|
"learning_rate": 1.2651357723432027e-05,
|
|
"loss": 0.2514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39276736974716187,
|
|
"step": 4380,
|
|
"valid_targets_mean": 2390.8,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 4.610935856992639,
|
|
"grad_norm": 0.425248494736928,
|
|
"learning_rate": 1.2602612560582044e-05,
|
|
"loss": 0.113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07209916412830353,
|
|
"step": 4385,
|
|
"valid_targets_mean": 2108.3,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 4.616193480546793,
|
|
"grad_norm": 0.392468813631525,
|
|
"learning_rate": 1.2553918251042701e-05,
|
|
"loss": 0.0956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09675078094005585,
|
|
"step": 4390,
|
|
"valid_targets_mean": 3110.5,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 4.621451104100946,
|
|
"grad_norm": 0.4435373510269777,
|
|
"learning_rate": 1.2505275129562851e-05,
|
|
"loss": 0.1788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15841440856456757,
|
|
"step": 4395,
|
|
"valid_targets_mean": 2953.4,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 4.6267087276550996,
|
|
"grad_norm": 0.3940934433584046,
|
|
"learning_rate": 1.2456683530539446e-05,
|
|
"loss": 0.0821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08367893099784851,
|
|
"step": 4400,
|
|
"valid_targets_mean": 3337.5,
|
|
"valid_targets_min": 830
|
|
},
|
|
{
|
|
"epoch": 4.631966351209253,
|
|
"grad_norm": 0.43397069656142984,
|
|
"learning_rate": 1.2408143788015225e-05,
|
|
"loss": 0.0837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09857040643692017,
|
|
"step": 4405,
|
|
"valid_targets_mean": 4345.2,
|
|
"valid_targets_min": 2665
|
|
},
|
|
{
|
|
"epoch": 4.6372239747634065,
|
|
"grad_norm": 0.34164095688934526,
|
|
"learning_rate": 1.2359656235676468e-05,
|
|
"loss": 0.0858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07392176985740662,
|
|
"step": 4410,
|
|
"valid_targets_mean": 3302.3,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 4.642481598317561,
|
|
"grad_norm": 0.3879656343424053,
|
|
"learning_rate": 1.231122120685066e-05,
|
|
"loss": 0.0783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07721789181232452,
|
|
"step": 4415,
|
|
"valid_targets_mean": 2945.6,
|
|
"valid_targets_min": 825
|
|
},
|
|
{
|
|
"epoch": 4.6477392218717135,
|
|
"grad_norm": 0.38811962478696127,
|
|
"learning_rate": 1.2262839034504208e-05,
|
|
"loss": 0.0796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06672494113445282,
|
|
"step": 4420,
|
|
"valid_targets_mean": 2946.4,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 4.652996845425868,
|
|
"grad_norm": 0.388998894081642,
|
|
"learning_rate": 1.2214510051240164e-05,
|
|
"loss": 0.0732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07016399502754211,
|
|
"step": 4425,
|
|
"valid_targets_mean": 3049.2,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 4.658254468980021,
|
|
"grad_norm": 0.3619988789598876,
|
|
"learning_rate": 1.2166234589295951e-05,
|
|
"loss": 0.0789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07090885937213898,
|
|
"step": 4430,
|
|
"valid_targets_mean": 3680.9,
|
|
"valid_targets_min": 2540
|
|
},
|
|
{
|
|
"epoch": 4.663512092534175,
|
|
"grad_norm": 0.4254054079613278,
|
|
"learning_rate": 1.2118012980541013e-05,
|
|
"loss": 0.0716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08978554606437683,
|
|
"step": 4435,
|
|
"valid_targets_mean": 3068.1,
|
|
"valid_targets_min": 576
|
|
},
|
|
{
|
|
"epoch": 4.668769716088328,
|
|
"grad_norm": 0.3147850073376493,
|
|
"learning_rate": 1.2069845556474626e-05,
|
|
"loss": 0.1116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04920667037367821,
|
|
"step": 4440,
|
|
"valid_targets_mean": 3092.3,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 4.674027339642482,
|
|
"grad_norm": 0.33164638804064306,
|
|
"learning_rate": 1.2021732648223553e-05,
|
|
"loss": 0.0665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05687657743692398,
|
|
"step": 4445,
|
|
"valid_targets_mean": 3000.5,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 4.679284963196635,
|
|
"grad_norm": 0.3718431037513027,
|
|
"learning_rate": 1.1973674586539791e-05,
|
|
"loss": 0.0711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07306220382452011,
|
|
"step": 4450,
|
|
"valid_targets_mean": 3647.8,
|
|
"valid_targets_min": 851
|
|
},
|
|
{
|
|
"epoch": 4.684542586750789,
|
|
"grad_norm": 0.4791926891231177,
|
|
"learning_rate": 1.1925671701798292e-05,
|
|
"loss": 0.0688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.061685558408498764,
|
|
"step": 4455,
|
|
"valid_targets_mean": 2608.7,
|
|
"valid_targets_min": 526
|
|
},
|
|
{
|
|
"epoch": 4.689800210304942,
|
|
"grad_norm": 0.6344681573125026,
|
|
"learning_rate": 1.1877724323994704e-05,
|
|
"loss": 0.1146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14251452684402466,
|
|
"step": 4460,
|
|
"valid_targets_mean": 1622.6,
|
|
"valid_targets_min": 195
|
|
},
|
|
{
|
|
"epoch": 4.695057833859096,
|
|
"grad_norm": 0.4439081129575039,
|
|
"learning_rate": 1.1829832782743074e-05,
|
|
"loss": 0.0798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1002945601940155,
|
|
"step": 4465,
|
|
"valid_targets_mean": 3278.1,
|
|
"valid_targets_min": 840
|
|
},
|
|
{
|
|
"epoch": 4.700315457413249,
|
|
"grad_norm": 0.45781425409055415,
|
|
"learning_rate": 1.178199740727362e-05,
|
|
"loss": 0.1033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08636211603879929,
|
|
"step": 4470,
|
|
"valid_targets_mean": 2136.1,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 4.705573080967403,
|
|
"grad_norm": 0.364943194677604,
|
|
"learning_rate": 1.1734218526430446e-05,
|
|
"loss": 0.0781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07217290997505188,
|
|
"step": 4475,
|
|
"valid_targets_mean": 3365.9,
|
|
"valid_targets_min": 847
|
|
},
|
|
{
|
|
"epoch": 4.710830704521556,
|
|
"grad_norm": 0.9066742961112946,
|
|
"learning_rate": 1.1686496468669269e-05,
|
|
"loss": 0.1061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11579406261444092,
|
|
"step": 4480,
|
|
"valid_targets_mean": 1630.2,
|
|
"valid_targets_min": 824
|
|
},
|
|
{
|
|
"epoch": 4.71608832807571,
|
|
"grad_norm": 0.42100013149054094,
|
|
"learning_rate": 1.1638831562055191e-05,
|
|
"loss": 0.1041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07590813934803009,
|
|
"step": 4485,
|
|
"valid_targets_mean": 2701.2,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 4.721345951629863,
|
|
"grad_norm": 0.6775479142011301,
|
|
"learning_rate": 1.1591224134260425e-05,
|
|
"loss": 0.106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16865578293800354,
|
|
"step": 4490,
|
|
"valid_targets_mean": 1553.4,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 4.726603575184017,
|
|
"grad_norm": 0.4327586439278677,
|
|
"learning_rate": 1.1543674512562037e-05,
|
|
"loss": 0.0805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07839912176132202,
|
|
"step": 4495,
|
|
"valid_targets_mean": 2974.8,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 4.73186119873817,
|
|
"grad_norm": 0.4034140352173875,
|
|
"learning_rate": 1.1496183023839729e-05,
|
|
"loss": 0.0859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07873895764350891,
|
|
"step": 4500,
|
|
"valid_targets_mean": 3796.6,
|
|
"valid_targets_min": 2850
|
|
},
|
|
{
|
|
"epoch": 4.737118822292324,
|
|
"grad_norm": 0.48243823219814264,
|
|
"learning_rate": 1.144874999457354e-05,
|
|
"loss": 0.0838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0818183571100235,
|
|
"step": 4505,
|
|
"valid_targets_mean": 2880.9,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 4.742376445846477,
|
|
"grad_norm": 0.36978017739421254,
|
|
"learning_rate": 1.1401375750841637e-05,
|
|
"loss": 0.0701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08065995573997498,
|
|
"step": 4510,
|
|
"valid_targets_mean": 4210.0,
|
|
"valid_targets_min": 763
|
|
},
|
|
{
|
|
"epoch": 4.747634069400631,
|
|
"grad_norm": 0.33331605818915316,
|
|
"learning_rate": 1.1354060618318086e-05,
|
|
"loss": 0.0664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0686541348695755,
|
|
"step": 4515,
|
|
"valid_targets_mean": 4181.8,
|
|
"valid_targets_min": 2540
|
|
},
|
|
{
|
|
"epoch": 4.752891692954784,
|
|
"grad_norm": 0.33795353883889473,
|
|
"learning_rate": 1.1306804922270568e-05,
|
|
"loss": 0.0789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06736970692873001,
|
|
"step": 4520,
|
|
"valid_targets_mean": 4073.6,
|
|
"valid_targets_min": 1859
|
|
},
|
|
{
|
|
"epoch": 4.758149316508938,
|
|
"grad_norm": 0.3586629736315267,
|
|
"learning_rate": 1.1259608987558175e-05,
|
|
"loss": 0.0955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060045026242733,
|
|
"step": 4525,
|
|
"valid_targets_mean": 2830.4,
|
|
"valid_targets_min": 893
|
|
},
|
|
{
|
|
"epoch": 4.763406940063091,
|
|
"grad_norm": 0.4917418712564499,
|
|
"learning_rate": 1.1212473138629187e-05,
|
|
"loss": 0.0905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09968134015798569,
|
|
"step": 4530,
|
|
"valid_targets_mean": 2476.3,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 4.768664563617245,
|
|
"grad_norm": 0.2803048818061113,
|
|
"learning_rate": 1.1165397699518797e-05,
|
|
"loss": 0.0822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.047491952776908875,
|
|
"step": 4535,
|
|
"valid_targets_mean": 4019.0,
|
|
"valid_targets_min": 1180
|
|
},
|
|
{
|
|
"epoch": 4.773922187171398,
|
|
"grad_norm": 0.3874406407586897,
|
|
"learning_rate": 1.1118382993846933e-05,
|
|
"loss": 0.0725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08119888603687286,
|
|
"step": 4540,
|
|
"valid_targets_mean": 3585.2,
|
|
"valid_targets_min": 945
|
|
},
|
|
{
|
|
"epoch": 4.779179810725552,
|
|
"grad_norm": 0.48850349650856384,
|
|
"learning_rate": 1.1071429344816003e-05,
|
|
"loss": 0.0891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09316246956586838,
|
|
"step": 4545,
|
|
"valid_targets_mean": 3281.9,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 4.784437434279706,
|
|
"grad_norm": 0.29441890076300864,
|
|
"learning_rate": 1.102453707520867e-05,
|
|
"loss": 0.0684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.050600532442331314,
|
|
"step": 4550,
|
|
"valid_targets_mean": 3693.3,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 4.789695057833859,
|
|
"grad_norm": 0.6218549206409588,
|
|
"learning_rate": 1.0977706507385673e-05,
|
|
"loss": 0.0822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09425666928291321,
|
|
"step": 4555,
|
|
"valid_targets_mean": 1394.1,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 4.794952681388013,
|
|
"grad_norm": 0.5405869895858056,
|
|
"learning_rate": 1.0930937963283554e-05,
|
|
"loss": 0.0774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10098089277744293,
|
|
"step": 4560,
|
|
"valid_targets_mean": 3008.5,
|
|
"valid_targets_min": 1020
|
|
},
|
|
{
|
|
"epoch": 4.8002103049421665,
|
|
"grad_norm": 0.5414687913481461,
|
|
"learning_rate": 1.088423176441248e-05,
|
|
"loss": 0.0794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09131792187690735,
|
|
"step": 4565,
|
|
"valid_targets_mean": 1961.6,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 4.80546792849632,
|
|
"grad_norm": 0.5143395929290274,
|
|
"learning_rate": 1.0837588231854044e-05,
|
|
"loss": 0.0944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10974930226802826,
|
|
"step": 4570,
|
|
"valid_targets_mean": 2237.1,
|
|
"valid_targets_min": 748
|
|
},
|
|
{
|
|
"epoch": 4.8107255520504735,
|
|
"grad_norm": 0.534398402750308,
|
|
"learning_rate": 1.0791007686259019e-05,
|
|
"loss": 0.0727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07966640591621399,
|
|
"step": 4575,
|
|
"valid_targets_mean": 1692.9,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 4.815983175604627,
|
|
"grad_norm": 0.4046720762065736,
|
|
"learning_rate": 1.0744490447845172e-05,
|
|
"loss": 0.0839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05888023599982262,
|
|
"step": 4580,
|
|
"valid_targets_mean": 2879.0,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 4.8212407991587805,
|
|
"grad_norm": 0.602601321603702,
|
|
"learning_rate": 1.0698036836395084e-05,
|
|
"loss": 0.0991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07196278870105743,
|
|
"step": 4585,
|
|
"valid_targets_mean": 3544.4,
|
|
"valid_targets_min": 1322
|
|
},
|
|
{
|
|
"epoch": 4.826498422712934,
|
|
"grad_norm": 0.4184262869470214,
|
|
"learning_rate": 1.0651647171253936e-05,
|
|
"loss": 0.0787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07269267737865448,
|
|
"step": 4590,
|
|
"valid_targets_mean": 2639.8,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 4.831756046267087,
|
|
"grad_norm": 0.5176677296781518,
|
|
"learning_rate": 1.0605321771327267e-05,
|
|
"loss": 0.0699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09694170951843262,
|
|
"step": 4595,
|
|
"valid_targets_mean": 2873.4,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 4.837013669821241,
|
|
"grad_norm": 0.4778189406353667,
|
|
"learning_rate": 1.0559060955078873e-05,
|
|
"loss": 0.0847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12576048076152802,
|
|
"step": 4600,
|
|
"valid_targets_mean": 2972.7,
|
|
"valid_targets_min": 1420
|
|
},
|
|
{
|
|
"epoch": 4.842271293375394,
|
|
"grad_norm": 0.4170832496590639,
|
|
"learning_rate": 1.0512865040528558e-05,
|
|
"loss": 0.0933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07232730090618134,
|
|
"step": 4605,
|
|
"valid_targets_mean": 3139.2,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 4.847528916929548,
|
|
"grad_norm": 0.4524793562623884,
|
|
"learning_rate": 1.0466734345249946e-05,
|
|
"loss": 0.064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07442990690469742,
|
|
"step": 4610,
|
|
"valid_targets_mean": 2407.1,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 4.852786540483701,
|
|
"grad_norm": 0.5232546604492923,
|
|
"learning_rate": 1.0420669186368311e-05,
|
|
"loss": 0.0816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08989334106445312,
|
|
"step": 4615,
|
|
"valid_targets_mean": 2313.1,
|
|
"valid_targets_min": 486
|
|
},
|
|
{
|
|
"epoch": 4.858044164037855,
|
|
"grad_norm": 0.37937194140200015,
|
|
"learning_rate": 1.0374669880558419e-05,
|
|
"loss": 0.0725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07011362165212631,
|
|
"step": 4620,
|
|
"valid_targets_mean": 3987.6,
|
|
"valid_targets_min": 1057
|
|
},
|
|
{
|
|
"epoch": 4.863301787592008,
|
|
"grad_norm": 0.35505357590428044,
|
|
"learning_rate": 1.0328736744042311e-05,
|
|
"loss": 0.0616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06438374519348145,
|
|
"step": 4625,
|
|
"valid_targets_mean": 4085.2,
|
|
"valid_targets_min": 1243
|
|
},
|
|
{
|
|
"epoch": 4.868559411146162,
|
|
"grad_norm": 0.3765782150134594,
|
|
"learning_rate": 1.0282870092587144e-05,
|
|
"loss": 0.0878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07843618094921112,
|
|
"step": 4630,
|
|
"valid_targets_mean": 4141.8,
|
|
"valid_targets_min": 3322
|
|
},
|
|
{
|
|
"epoch": 4.873817034700315,
|
|
"grad_norm": 0.4153059992226463,
|
|
"learning_rate": 1.023707024150305e-05,
|
|
"loss": 0.0686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07007251679897308,
|
|
"step": 4635,
|
|
"valid_targets_mean": 2203.5,
|
|
"valid_targets_min": 660
|
|
},
|
|
{
|
|
"epoch": 4.879074658254469,
|
|
"grad_norm": 0.39185270066638633,
|
|
"learning_rate": 1.0191337505640905e-05,
|
|
"loss": 0.1021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06810898333787918,
|
|
"step": 4640,
|
|
"valid_targets_mean": 2767.1,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 4.884332281808622,
|
|
"grad_norm": 0.2948352603788897,
|
|
"learning_rate": 1.0145672199390226e-05,
|
|
"loss": 0.0899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040803369134664536,
|
|
"step": 4645,
|
|
"valid_targets_mean": 3203.1,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 4.889589905362776,
|
|
"grad_norm": 0.5282288868271211,
|
|
"learning_rate": 1.010007463667699e-05,
|
|
"loss": 0.0918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1312532275915146,
|
|
"step": 4650,
|
|
"valid_targets_mean": 3090.6,
|
|
"valid_targets_min": 1556
|
|
},
|
|
{
|
|
"epoch": 4.894847528916929,
|
|
"grad_norm": 0.4137506408486238,
|
|
"learning_rate": 1.0054545130961441e-05,
|
|
"loss": 0.0692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06519424915313721,
|
|
"step": 4655,
|
|
"valid_targets_mean": 3816.9,
|
|
"valid_targets_min": 2619
|
|
},
|
|
{
|
|
"epoch": 4.900105152471083,
|
|
"grad_norm": 0.53057303622189,
|
|
"learning_rate": 1.0009083995236009e-05,
|
|
"loss": 0.0789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08234363049268723,
|
|
"step": 4660,
|
|
"valid_targets_mean": 1804.8,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 4.905362776025236,
|
|
"grad_norm": 0.4063632976890675,
|
|
"learning_rate": 9.963691542023079e-06,
|
|
"loss": 0.0772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07967343181371689,
|
|
"step": 4665,
|
|
"valid_targets_mean": 3685.7,
|
|
"valid_targets_min": 953
|
|
},
|
|
{
|
|
"epoch": 4.91062039957939,
|
|
"grad_norm": 0.7734429760367383,
|
|
"learning_rate": 9.918368083372884e-06,
|
|
"loss": 0.1058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1001024842262268,
|
|
"step": 4670,
|
|
"valid_targets_mean": 1168.2,
|
|
"valid_targets_min": 337
|
|
},
|
|
{
|
|
"epoch": 4.915878023133543,
|
|
"grad_norm": 0.4888579976068423,
|
|
"learning_rate": 9.87311393086138e-06,
|
|
"loss": 0.1031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09964701533317566,
|
|
"step": 4675,
|
|
"valid_targets_mean": 2344.0,
|
|
"valid_targets_min": 459
|
|
},
|
|
{
|
|
"epoch": 4.921135646687697,
|
|
"grad_norm": 0.5202593321678269,
|
|
"learning_rate": 9.827929395588048e-06,
|
|
"loss": 0.0837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09963840991258621,
|
|
"step": 4680,
|
|
"valid_targets_mean": 2441.2,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 4.926393270241851,
|
|
"grad_norm": 0.38623067468242067,
|
|
"learning_rate": 9.782814788173787e-06,
|
|
"loss": 0.0771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0651584044098854,
|
|
"step": 4685,
|
|
"valid_targets_mean": 3706.1,
|
|
"valid_targets_min": 2692
|
|
},
|
|
{
|
|
"epoch": 4.931650893796004,
|
|
"grad_norm": 0.43803480968353947,
|
|
"learning_rate": 9.737770418758808e-06,
|
|
"loss": 0.0789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08176561444997787,
|
|
"step": 4690,
|
|
"valid_targets_mean": 3458.6,
|
|
"valid_targets_min": 2197
|
|
},
|
|
{
|
|
"epoch": 4.936908517350158,
|
|
"grad_norm": 0.3810855503521825,
|
|
"learning_rate": 9.692796597000438e-06,
|
|
"loss": 0.0677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05980216711759567,
|
|
"step": 4695,
|
|
"valid_targets_mean": 2549.4,
|
|
"valid_targets_min": 856
|
|
},
|
|
{
|
|
"epoch": 4.942166140904312,
|
|
"grad_norm": 1.0239897846847723,
|
|
"learning_rate": 9.64789363207103e-06,
|
|
"loss": 0.0846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12131184339523315,
|
|
"step": 4700,
|
|
"valid_targets_mean": 848.2,
|
|
"valid_targets_min": 563
|
|
},
|
|
{
|
|
"epoch": 4.947423764458465,
|
|
"grad_norm": 0.3832887411067774,
|
|
"learning_rate": 9.603061832655847e-06,
|
|
"loss": 0.0765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0646553784608841,
|
|
"step": 4705,
|
|
"valid_targets_mean": 2999.2,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 4.952681388012619,
|
|
"grad_norm": 0.47030377743660756,
|
|
"learning_rate": 9.55830150695093e-06,
|
|
"loss": 0.0721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06938720494508743,
|
|
"step": 4710,
|
|
"valid_targets_mean": 2648.6,
|
|
"valid_targets_min": 779
|
|
},
|
|
{
|
|
"epoch": 4.957939011566772,
|
|
"grad_norm": 0.4582636643027542,
|
|
"learning_rate": 9.513612962660935e-06,
|
|
"loss": 0.0725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0744820311665535,
|
|
"step": 4715,
|
|
"valid_targets_mean": 3591.1,
|
|
"valid_targets_min": 2538
|
|
},
|
|
{
|
|
"epoch": 4.963196635120926,
|
|
"grad_norm": 0.32015615688546256,
|
|
"learning_rate": 9.468996506997093e-06,
|
|
"loss": 0.08,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05729362741112709,
|
|
"step": 4720,
|
|
"valid_targets_mean": 4131.8,
|
|
"valid_targets_min": 485
|
|
},
|
|
{
|
|
"epoch": 4.968454258675079,
|
|
"grad_norm": 0.30273559116878307,
|
|
"learning_rate": 9.424452446675059e-06,
|
|
"loss": 0.0697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07140018790960312,
|
|
"step": 4725,
|
|
"valid_targets_mean": 4730.3,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 4.9737118822292326,
|
|
"grad_norm": 0.37197894705917245,
|
|
"learning_rate": 9.379981087912795e-06,
|
|
"loss": 0.0704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0704803466796875,
|
|
"step": 4730,
|
|
"valid_targets_mean": 3955.4,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 4.978969505783386,
|
|
"grad_norm": 0.4471685236026393,
|
|
"learning_rate": 9.33558273642848e-06,
|
|
"loss": 0.0828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.106879323720932,
|
|
"step": 4735,
|
|
"valid_targets_mean": 2921.7,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 4.9842271293375395,
|
|
"grad_norm": 0.40984484847997404,
|
|
"learning_rate": 9.291257697438393e-06,
|
|
"loss": 0.1595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07882986217737198,
|
|
"step": 4740,
|
|
"valid_targets_mean": 3169.6,
|
|
"valid_targets_min": 1015
|
|
},
|
|
{
|
|
"epoch": 4.989484752891693,
|
|
"grad_norm": 0.47209456294328006,
|
|
"learning_rate": 9.247006275654861e-06,
|
|
"loss": 0.0759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11203093826770782,
|
|
"step": 4745,
|
|
"valid_targets_mean": 2371.4,
|
|
"valid_targets_min": 967
|
|
},
|
|
{
|
|
"epoch": 4.9947423764458465,
|
|
"grad_norm": 0.36764238768667556,
|
|
"learning_rate": 9.202828775284101e-06,
|
|
"loss": 0.0709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06462462246417999,
|
|
"step": 4750,
|
|
"valid_targets_mean": 3504.9,
|
|
"valid_targets_min": 886
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.4445734168860689,
|
|
"learning_rate": 9.158725500024148e-06,
|
|
"loss": 0.1046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0679134726524353,
|
|
"step": 4755,
|
|
"valid_targets_mean": 2843.0,
|
|
"valid_targets_min": 791
|
|
},
|
|
{
|
|
"epoch": 5.0052576235541535,
|
|
"grad_norm": 0.9005010386115802,
|
|
"learning_rate": 9.114696753062816e-06,
|
|
"loss": 0.158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1408640295267105,
|
|
"step": 4760,
|
|
"valid_targets_mean": 1423.4,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 5.010515247108307,
|
|
"grad_norm": 0.7837957016055458,
|
|
"learning_rate": 9.07074283707554e-06,
|
|
"loss": 0.1411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13309352099895477,
|
|
"step": 4765,
|
|
"valid_targets_mean": 1462.5,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 5.0157728706624605,
|
|
"grad_norm": 0.7528858768740857,
|
|
"learning_rate": 9.026864054223337e-06,
|
|
"loss": 0.1383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1365063637495041,
|
|
"step": 4770,
|
|
"valid_targets_mean": 1514.8,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 5.021030494216614,
|
|
"grad_norm": 0.7788963160976209,
|
|
"learning_rate": 8.98306070615073e-06,
|
|
"loss": 0.1289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12093943357467651,
|
|
"step": 4775,
|
|
"valid_targets_mean": 1450.1,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 5.0262881177707674,
|
|
"grad_norm": 0.8483084654151909,
|
|
"learning_rate": 8.93933309398368e-06,
|
|
"loss": 0.1422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15616527199745178,
|
|
"step": 4780,
|
|
"valid_targets_mean": 1548.3,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 5.031545741324921,
|
|
"grad_norm": 0.7540250321317807,
|
|
"learning_rate": 8.89568151832745e-06,
|
|
"loss": 0.1382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13268810510635376,
|
|
"step": 4785,
|
|
"valid_targets_mean": 1539.9,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 5.036803364879074,
|
|
"grad_norm": 0.7300297057608534,
|
|
"learning_rate": 8.852106279264643e-06,
|
|
"loss": 0.1244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12216250598430634,
|
|
"step": 4790,
|
|
"valid_targets_mean": 1543.9,
|
|
"valid_targets_min": 725
|
|
},
|
|
{
|
|
"epoch": 5.042060988433228,
|
|
"grad_norm": 0.8339538250265318,
|
|
"learning_rate": 8.808607676353074e-06,
|
|
"loss": 0.1441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13071037828922272,
|
|
"step": 4795,
|
|
"valid_targets_mean": 1371.5,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 5.047318611987381,
|
|
"grad_norm": 0.7673761030296782,
|
|
"learning_rate": 8.765186008623706e-06,
|
|
"loss": 0.1275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13140574097633362,
|
|
"step": 4800,
|
|
"valid_targets_mean": 1560.6,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 5.052576235541535,
|
|
"grad_norm": 0.7277150857883916,
|
|
"learning_rate": 8.721841574578617e-06,
|
|
"loss": 0.133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11650878190994263,
|
|
"step": 4805,
|
|
"valid_targets_mean": 1439.9,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 5.057833859095688,
|
|
"grad_norm": 0.9002869437597668,
|
|
"learning_rate": 8.678574672188963e-06,
|
|
"loss": 0.1313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15190349519252777,
|
|
"step": 4810,
|
|
"valid_targets_mean": 1455.1,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 5.063091482649842,
|
|
"grad_norm": 0.7262265694953299,
|
|
"learning_rate": 8.635385598892881e-06,
|
|
"loss": 0.133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11696530133485794,
|
|
"step": 4815,
|
|
"valid_targets_mean": 1540.4,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 5.068349106203995,
|
|
"grad_norm": 0.7401947702855186,
|
|
"learning_rate": 8.592274651593482e-06,
|
|
"loss": 0.1244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1198652982711792,
|
|
"step": 4820,
|
|
"valid_targets_mean": 1387.2,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 5.07360672975815,
|
|
"grad_norm": 0.7212377234961793,
|
|
"learning_rate": 8.549242126656814e-06,
|
|
"loss": 0.1225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11112427711486816,
|
|
"step": 4825,
|
|
"valid_targets_mean": 1581.9,
|
|
"valid_targets_min": 865
|
|
},
|
|
{
|
|
"epoch": 5.078864353312303,
|
|
"grad_norm": 0.8193893885403605,
|
|
"learning_rate": 8.506288319909793e-06,
|
|
"loss": 0.1195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13221386075019836,
|
|
"step": 4830,
|
|
"valid_targets_mean": 1635.6,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 5.084121976866457,
|
|
"grad_norm": 0.8628254394165564,
|
|
"learning_rate": 8.463413526638186e-06,
|
|
"loss": 0.1336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14875589311122894,
|
|
"step": 4835,
|
|
"valid_targets_mean": 1542.4,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 5.08937960042061,
|
|
"grad_norm": 0.861777417260219,
|
|
"learning_rate": 8.420618041584604e-06,
|
|
"loss": 0.1362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15293246507644653,
|
|
"step": 4840,
|
|
"valid_targets_mean": 1879.2,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 5.094637223974764,
|
|
"grad_norm": 0.8301782418150703,
|
|
"learning_rate": 8.377902158946427e-06,
|
|
"loss": 0.1341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13325785100460052,
|
|
"step": 4845,
|
|
"valid_targets_mean": 1409.0,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 5.099894847528917,
|
|
"grad_norm": 0.7828651354098343,
|
|
"learning_rate": 8.335266172373832e-06,
|
|
"loss": 0.1225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11418794840574265,
|
|
"step": 4850,
|
|
"valid_targets_mean": 1377.8,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 5.105152471083071,
|
|
"grad_norm": 0.7527964194617114,
|
|
"learning_rate": 8.292710374967737e-06,
|
|
"loss": 0.1255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13174590468406677,
|
|
"step": 4855,
|
|
"valid_targets_mean": 1811.9,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 5.110410094637224,
|
|
"grad_norm": 0.8640937270300099,
|
|
"learning_rate": 8.250235059277792e-06,
|
|
"loss": 0.1332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15677103400230408,
|
|
"step": 4860,
|
|
"valid_targets_mean": 1814.3,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 5.115667718191378,
|
|
"grad_norm": 0.7204529998017919,
|
|
"learning_rate": 8.207840517300398e-06,
|
|
"loss": 0.1236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10853917896747589,
|
|
"step": 4865,
|
|
"valid_targets_mean": 1493.4,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 5.120925341745531,
|
|
"grad_norm": 0.7863794725114303,
|
|
"learning_rate": 8.165527040476666e-06,
|
|
"loss": 0.1237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11814877390861511,
|
|
"step": 4870,
|
|
"valid_targets_mean": 1456.2,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 5.126182965299685,
|
|
"grad_norm": 0.8210569699214127,
|
|
"learning_rate": 8.123294919690413e-06,
|
|
"loss": 0.117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11533160507678986,
|
|
"step": 4875,
|
|
"valid_targets_mean": 1340.2,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 5.131440588853838,
|
|
"grad_norm": 0.9282844049860665,
|
|
"learning_rate": 8.081144445266201e-06,
|
|
"loss": 0.1251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.132160946726799,
|
|
"step": 4880,
|
|
"valid_targets_mean": 1569.0,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 5.136698212407992,
|
|
"grad_norm": 0.8233172786784679,
|
|
"learning_rate": 8.039075906967293e-06,
|
|
"loss": 0.1234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13213878870010376,
|
|
"step": 4885,
|
|
"valid_targets_mean": 1595.7,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 5.141955835962145,
|
|
"grad_norm": 0.7749844452020539,
|
|
"learning_rate": 7.99708959399368e-06,
|
|
"loss": 0.1277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12865442037582397,
|
|
"step": 4890,
|
|
"valid_targets_mean": 1801.3,
|
|
"valid_targets_min": 843
|
|
},
|
|
{
|
|
"epoch": 5.147213459516299,
|
|
"grad_norm": 0.7508507714627392,
|
|
"learning_rate": 7.955185794980117e-06,
|
|
"loss": 0.1293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1306474506855011,
|
|
"step": 4895,
|
|
"valid_targets_mean": 1652.4,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 5.152471083070452,
|
|
"grad_norm": 0.7429461379936947,
|
|
"learning_rate": 7.913364797994111e-06,
|
|
"loss": 0.1208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13677969574928284,
|
|
"step": 4900,
|
|
"valid_targets_mean": 1684.6,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 5.157728706624606,
|
|
"grad_norm": 0.7965727542916411,
|
|
"learning_rate": 7.871626890533917e-06,
|
|
"loss": 0.1253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1270841658115387,
|
|
"step": 4905,
|
|
"valid_targets_mean": 1482.9,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 5.162986330178759,
|
|
"grad_norm": 0.8872711618376462,
|
|
"learning_rate": 7.829972359526626e-06,
|
|
"loss": 0.1249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12530039250850677,
|
|
"step": 4910,
|
|
"valid_targets_mean": 1710.1,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 5.168243953732913,
|
|
"grad_norm": 0.8390256007090793,
|
|
"learning_rate": 7.788401491326155e-06,
|
|
"loss": 0.1167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12561118602752686,
|
|
"step": 4915,
|
|
"valid_targets_mean": 1638.9,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 5.173501577287066,
|
|
"grad_norm": 0.7855405662254581,
|
|
"learning_rate": 7.746914571711264e-06,
|
|
"loss": 0.1196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12468838691711426,
|
|
"step": 4920,
|
|
"valid_targets_mean": 1525.7,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 5.1787592008412195,
|
|
"grad_norm": 0.7809332920528532,
|
|
"learning_rate": 7.705511885883612e-06,
|
|
"loss": 0.1099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10117724537849426,
|
|
"step": 4925,
|
|
"valid_targets_mean": 1255.4,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 5.184016824395373,
|
|
"grad_norm": 0.8513594581031201,
|
|
"learning_rate": 7.664193718465814e-06,
|
|
"loss": 0.124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12775394320487976,
|
|
"step": 4930,
|
|
"valid_targets_mean": 1618.7,
|
|
"valid_targets_min": 968
|
|
},
|
|
{
|
|
"epoch": 5.1892744479495265,
|
|
"grad_norm": 0.7692459940995934,
|
|
"learning_rate": 7.622960353499438e-06,
|
|
"loss": 0.1144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11013204604387283,
|
|
"step": 4935,
|
|
"valid_targets_mean": 1466.6,
|
|
"valid_targets_min": 724
|
|
},
|
|
{
|
|
"epoch": 5.19453207150368,
|
|
"grad_norm": 0.8329152272449081,
|
|
"learning_rate": 7.581812074443084e-06,
|
|
"loss": 0.1196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12576130032539368,
|
|
"step": 4940,
|
|
"valid_targets_mean": 1455.1,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 5.1997896950578335,
|
|
"grad_norm": 1.0151564464097922,
|
|
"learning_rate": 7.5407491641704464e-06,
|
|
"loss": 0.1195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1216731071472168,
|
|
"step": 4945,
|
|
"valid_targets_mean": 1309.6,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 5.205047318611987,
|
|
"grad_norm": 0.9179001865879102,
|
|
"learning_rate": 7.499771904968332e-06,
|
|
"loss": 0.1213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1344885230064392,
|
|
"step": 4950,
|
|
"valid_targets_mean": 1437.6,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 5.2103049421661405,
|
|
"grad_norm": 0.862974881249526,
|
|
"learning_rate": 7.45888057853474e-06,
|
|
"loss": 0.1195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.111325204372406,
|
|
"step": 4955,
|
|
"valid_targets_mean": 1344.7,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 5.215562565720294,
|
|
"grad_norm": 0.7333557496808919,
|
|
"learning_rate": 7.418075465976944e-06,
|
|
"loss": 0.1193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11829602718353271,
|
|
"step": 4960,
|
|
"valid_targets_mean": 1535.1,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 5.220820189274448,
|
|
"grad_norm": 0.7830640312550541,
|
|
"learning_rate": 7.3773568478095184e-06,
|
|
"loss": 0.1269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11570233106613159,
|
|
"step": 4965,
|
|
"valid_targets_mean": 1439.4,
|
|
"valid_targets_min": 573
|
|
},
|
|
{
|
|
"epoch": 5.226077812828602,
|
|
"grad_norm": 0.9281018824741001,
|
|
"learning_rate": 7.336725003952456e-06,
|
|
"loss": 0.1347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.171891987323761,
|
|
"step": 4970,
|
|
"valid_targets_mean": 1967.9,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 5.231335436382755,
|
|
"grad_norm": 0.7602047228259172,
|
|
"learning_rate": 7.296180213729196e-06,
|
|
"loss": 0.1142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11368528008460999,
|
|
"step": 4975,
|
|
"valid_targets_mean": 1489.4,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 5.236593059936909,
|
|
"grad_norm": 0.8163873914502632,
|
|
"learning_rate": 7.255722755864734e-06,
|
|
"loss": 0.1179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1246756985783577,
|
|
"step": 4980,
|
|
"valid_targets_mean": 1777.6,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 5.241850683491062,
|
|
"grad_norm": 0.9037339124607958,
|
|
"learning_rate": 7.21535290848372e-06,
|
|
"loss": 0.1216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13662774860858917,
|
|
"step": 4985,
|
|
"valid_targets_mean": 1648.7,
|
|
"valid_targets_min": 676
|
|
},
|
|
{
|
|
"epoch": 5.247108307045216,
|
|
"grad_norm": 0.8510932605814922,
|
|
"learning_rate": 7.175070949108496e-06,
|
|
"loss": 0.1179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12033072113990784,
|
|
"step": 4990,
|
|
"valid_targets_mean": 1432.2,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 5.252365930599369,
|
|
"grad_norm": 0.8274094624263375,
|
|
"learning_rate": 7.1348771546572315e-06,
|
|
"loss": 0.1126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11697205901145935,
|
|
"step": 4995,
|
|
"valid_targets_mean": 1549.0,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 5.257623554153523,
|
|
"grad_norm": 0.809619153579325,
|
|
"learning_rate": 7.09477180144202e-06,
|
|
"loss": 0.1122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09959600865840912,
|
|
"step": 5000,
|
|
"valid_targets_mean": 1158.9,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 5.262881177707676,
|
|
"grad_norm": 0.8799151043524882,
|
|
"learning_rate": 7.054755165166945e-06,
|
|
"loss": 0.126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12933531403541565,
|
|
"step": 5005,
|
|
"valid_targets_mean": 1459.0,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 5.26813880126183,
|
|
"grad_norm": 0.776583499205732,
|
|
"learning_rate": 7.014827520926206e-06,
|
|
"loss": 0.1205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12277175486087799,
|
|
"step": 5010,
|
|
"valid_targets_mean": 1715.9,
|
|
"valid_targets_min": 786
|
|
},
|
|
{
|
|
"epoch": 5.273396424815983,
|
|
"grad_norm": 0.8198930875261842,
|
|
"learning_rate": 6.9749891432022505e-06,
|
|
"loss": 0.1192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11355794966220856,
|
|
"step": 5015,
|
|
"valid_targets_mean": 1492.1,
|
|
"valid_targets_min": 944
|
|
},
|
|
{
|
|
"epoch": 5.278654048370137,
|
|
"grad_norm": 0.7682054981686061,
|
|
"learning_rate": 6.935240305863844e-06,
|
|
"loss": 0.1158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10861600935459137,
|
|
"step": 5020,
|
|
"valid_targets_mean": 1427.2,
|
|
"valid_targets_min": 963
|
|
},
|
|
{
|
|
"epoch": 5.28391167192429,
|
|
"grad_norm": 0.7566950494544487,
|
|
"learning_rate": 6.895581282164201e-06,
|
|
"loss": 0.124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10474662482738495,
|
|
"step": 5025,
|
|
"valid_targets_mean": 1435.9,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 5.289169295478444,
|
|
"grad_norm": 0.7390738713121238,
|
|
"learning_rate": 6.856012344739138e-06,
|
|
"loss": 0.1212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12179842591285706,
|
|
"step": 5030,
|
|
"valid_targets_mean": 1745.8,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 5.294426919032597,
|
|
"grad_norm": 0.799283542407322,
|
|
"learning_rate": 6.816533765605144e-06,
|
|
"loss": 0.1171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12094929069280624,
|
|
"step": 5035,
|
|
"valid_targets_mean": 1522.8,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 5.299684542586751,
|
|
"grad_norm": 0.749074534278653,
|
|
"learning_rate": 6.7771458161575685e-06,
|
|
"loss": 0.1155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11563584953546524,
|
|
"step": 5040,
|
|
"valid_targets_mean": 1510.6,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 5.304942166140904,
|
|
"grad_norm": 0.8262227489966326,
|
|
"learning_rate": 6.737848767168709e-06,
|
|
"loss": 0.1137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12176688760519028,
|
|
"step": 5045,
|
|
"valid_targets_mean": 1509.7,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 5.310199789695058,
|
|
"grad_norm": 0.8695341534442352,
|
|
"learning_rate": 6.698642888785965e-06,
|
|
"loss": 0.1176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11329036951065063,
|
|
"step": 5050,
|
|
"valid_targets_mean": 1489.8,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 5.315457413249211,
|
|
"grad_norm": 0.8391937804125835,
|
|
"learning_rate": 6.659528450530006e-06,
|
|
"loss": 0.1232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11793771386146545,
|
|
"step": 5055,
|
|
"valid_targets_mean": 1510.6,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 5.320715036803365,
|
|
"grad_norm": 0.8816010968775818,
|
|
"learning_rate": 6.6205057212928755e-06,
|
|
"loss": 0.1137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1263437271118164,
|
|
"step": 5060,
|
|
"valid_targets_mean": 1534.2,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 5.325972660357518,
|
|
"grad_norm": 0.8237219106131886,
|
|
"learning_rate": 6.5815749693361645e-06,
|
|
"loss": 0.1185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.121507428586483,
|
|
"step": 5065,
|
|
"valid_targets_mean": 1480.6,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 5.331230283911672,
|
|
"grad_norm": 0.9004329929490859,
|
|
"learning_rate": 6.542736462289188e-06,
|
|
"loss": 0.1204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12235601246356964,
|
|
"step": 5070,
|
|
"valid_targets_mean": 1504.4,
|
|
"valid_targets_min": 922
|
|
},
|
|
{
|
|
"epoch": 5.336487907465825,
|
|
"grad_norm": 0.7496268161167585,
|
|
"learning_rate": 6.503990467147101e-06,
|
|
"loss": 0.1151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1159660667181015,
|
|
"step": 5075,
|
|
"valid_targets_mean": 1624.8,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 5.341745531019979,
|
|
"grad_norm": 0.7777571192038039,
|
|
"learning_rate": 6.465337250269086e-06,
|
|
"loss": 0.1196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10574095696210861,
|
|
"step": 5080,
|
|
"valid_targets_mean": 1320.1,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 5.347003154574132,
|
|
"grad_norm": 0.833015588661154,
|
|
"learning_rate": 6.426777077376538e-06,
|
|
"loss": 0.1181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12290021777153015,
|
|
"step": 5085,
|
|
"valid_targets_mean": 1348.9,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 5.352260778128286,
|
|
"grad_norm": 0.8521212672387877,
|
|
"learning_rate": 6.388310213551223e-06,
|
|
"loss": 0.1169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10389544069766998,
|
|
"step": 5090,
|
|
"valid_targets_mean": 1333.6,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 5.357518401682439,
|
|
"grad_norm": 0.9133948974777728,
|
|
"learning_rate": 6.349936923233422e-06,
|
|
"loss": 0.1186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13117776811122894,
|
|
"step": 5095,
|
|
"valid_targets_mean": 1649.7,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 5.3627760252365935,
|
|
"grad_norm": 0.783763722428151,
|
|
"learning_rate": 6.311657470220178e-06,
|
|
"loss": 0.1282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11626783758401871,
|
|
"step": 5100,
|
|
"valid_targets_mean": 1819.2,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 5.368033648790747,
|
|
"grad_norm": 0.7799184597124955,
|
|
"learning_rate": 6.273472117663446e-06,
|
|
"loss": 0.1177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12296243757009506,
|
|
"step": 5105,
|
|
"valid_targets_mean": 1603.2,
|
|
"valid_targets_min": 770
|
|
},
|
|
{
|
|
"epoch": 5.3732912723449004,
|
|
"grad_norm": 0.7878676819686895,
|
|
"learning_rate": 6.2353811280682715e-06,
|
|
"loss": 0.1187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11289148777723312,
|
|
"step": 5110,
|
|
"valid_targets_mean": 1434.4,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 5.378548895899054,
|
|
"grad_norm": 0.9061613572105647,
|
|
"learning_rate": 6.19738476329101e-06,
|
|
"loss": 0.1158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11136360466480255,
|
|
"step": 5115,
|
|
"valid_targets_mean": 1250.6,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 5.383806519453207,
|
|
"grad_norm": 0.790316147629043,
|
|
"learning_rate": 6.159483284537533e-06,
|
|
"loss": 0.112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10790354013442993,
|
|
"step": 5120,
|
|
"valid_targets_mean": 1461.3,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 5.389064143007361,
|
|
"grad_norm": 0.8603836573474936,
|
|
"learning_rate": 6.121676952361395e-06,
|
|
"loss": 0.1204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11386443674564362,
|
|
"step": 5125,
|
|
"valid_targets_mean": 1299.6,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 5.394321766561514,
|
|
"grad_norm": 0.8504138415456971,
|
|
"learning_rate": 6.083966026662076e-06,
|
|
"loss": 0.112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11406168341636658,
|
|
"step": 5130,
|
|
"valid_targets_mean": 1337.1,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 5.399579390115668,
|
|
"grad_norm": 0.8004192475623578,
|
|
"learning_rate": 6.046350766683194e-06,
|
|
"loss": 0.113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11288442462682724,
|
|
"step": 5135,
|
|
"valid_targets_mean": 1472.1,
|
|
"valid_targets_min": 849
|
|
},
|
|
{
|
|
"epoch": 5.404837013669821,
|
|
"grad_norm": 0.802643951488826,
|
|
"learning_rate": 6.0088314310107e-06,
|
|
"loss": 0.1186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11967991292476654,
|
|
"step": 5140,
|
|
"valid_targets_mean": 1464.6,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 5.410094637223975,
|
|
"grad_norm": 0.7966846618111609,
|
|
"learning_rate": 5.9714082775711115e-06,
|
|
"loss": 0.1229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10847964137792587,
|
|
"step": 5145,
|
|
"valid_targets_mean": 1378.8,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 5.415352260778128,
|
|
"grad_norm": 0.7232157253256309,
|
|
"learning_rate": 5.934081563629764e-06,
|
|
"loss": 0.1136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09298036247491837,
|
|
"step": 5150,
|
|
"valid_targets_mean": 1256.2,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 5.420609884332282,
|
|
"grad_norm": 0.8020934155278029,
|
|
"learning_rate": 5.896851545788987e-06,
|
|
"loss": 0.1217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13270168006420135,
|
|
"step": 5155,
|
|
"valid_targets_mean": 2054.5,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 5.425867507886435,
|
|
"grad_norm": 0.7236585753424789,
|
|
"learning_rate": 5.859718479986407e-06,
|
|
"loss": 0.1091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10811392217874527,
|
|
"step": 5160,
|
|
"valid_targets_mean": 1743.0,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 5.431125131440589,
|
|
"grad_norm": 0.8626665525950744,
|
|
"learning_rate": 5.822682621493132e-06,
|
|
"loss": 0.1099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13062599301338196,
|
|
"step": 5165,
|
|
"valid_targets_mean": 1531.3,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 5.436382754994742,
|
|
"grad_norm": 0.7359508503916516,
|
|
"learning_rate": 5.7857442249120155e-06,
|
|
"loss": 0.1141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11487710475921631,
|
|
"step": 5170,
|
|
"valid_targets_mean": 1780.8,
|
|
"valid_targets_min": 639
|
|
},
|
|
{
|
|
"epoch": 5.441640378548896,
|
|
"grad_norm": 0.7896426613147863,
|
|
"learning_rate": 5.748903544175934e-06,
|
|
"loss": 0.1187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1125001609325409,
|
|
"step": 5175,
|
|
"valid_targets_mean": 1485.3,
|
|
"valid_targets_min": 920
|
|
},
|
|
{
|
|
"epoch": 5.446898002103049,
|
|
"grad_norm": 0.8392109408911339,
|
|
"learning_rate": 5.712160832545992e-06,
|
|
"loss": 0.1151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10780222713947296,
|
|
"step": 5180,
|
|
"valid_targets_mean": 1408.1,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 5.452155625657203,
|
|
"grad_norm": 0.7633564455285415,
|
|
"learning_rate": 5.675516342609811e-06,
|
|
"loss": 0.1178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11002111434936523,
|
|
"step": 5185,
|
|
"valid_targets_mean": 1382.9,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 5.457413249211356,
|
|
"grad_norm": 0.8817069529885941,
|
|
"learning_rate": 5.638970326279802e-06,
|
|
"loss": 0.1153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12003359943628311,
|
|
"step": 5190,
|
|
"valid_targets_mean": 1290.4,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 5.46267087276551,
|
|
"grad_norm": 0.888054456172169,
|
|
"learning_rate": 5.602523034791407e-06,
|
|
"loss": 0.1148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1100107878446579,
|
|
"step": 5195,
|
|
"valid_targets_mean": 1449.4,
|
|
"valid_targets_min": 393
|
|
},
|
|
{
|
|
"epoch": 5.467928496319663,
|
|
"grad_norm": 0.6440286107130968,
|
|
"learning_rate": 5.566174718701378e-06,
|
|
"loss": 0.1071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10465237498283386,
|
|
"step": 5200,
|
|
"valid_targets_mean": 2912.0,
|
|
"valid_targets_min": 458
|
|
},
|
|
{
|
|
"epoch": 5.473186119873817,
|
|
"grad_norm": 0.4906964684696538,
|
|
"learning_rate": 5.529925627886079e-06,
|
|
"loss": 0.0757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07015372812747955,
|
|
"step": 5205,
|
|
"valid_targets_mean": 3155.2,
|
|
"valid_targets_min": 1491
|
|
},
|
|
{
|
|
"epoch": 5.47844374342797,
|
|
"grad_norm": 0.4411436651011096,
|
|
"learning_rate": 5.493776011539749e-06,
|
|
"loss": 0.0722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07336966693401337,
|
|
"step": 5210,
|
|
"valid_targets_mean": 3495.2,
|
|
"valid_targets_min": 2068
|
|
},
|
|
{
|
|
"epoch": 5.483701366982124,
|
|
"grad_norm": 0.38990773237108856,
|
|
"learning_rate": 5.457726118172761e-06,
|
|
"loss": 0.0886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07696102559566498,
|
|
"step": 5215,
|
|
"valid_targets_mean": 3671.3,
|
|
"valid_targets_min": 2425
|
|
},
|
|
{
|
|
"epoch": 5.488958990536277,
|
|
"grad_norm": 0.40235911971158483,
|
|
"learning_rate": 5.421776195609982e-06,
|
|
"loss": 0.0718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07264817506074905,
|
|
"step": 5220,
|
|
"valid_targets_mean": 3131.7,
|
|
"valid_targets_min": 1198
|
|
},
|
|
{
|
|
"epoch": 5.494216614090431,
|
|
"grad_norm": 0.7366905018196068,
|
|
"learning_rate": 5.385926490989e-06,
|
|
"loss": 0.0872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13003386557102203,
|
|
"step": 5225,
|
|
"valid_targets_mean": 1250.6,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 5.499474237644584,
|
|
"grad_norm": 0.43867962358000606,
|
|
"learning_rate": 5.350177250758479e-06,
|
|
"loss": 0.1184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08369845151901245,
|
|
"step": 5230,
|
|
"valid_targets_mean": 3237.3,
|
|
"valid_targets_min": 954
|
|
},
|
|
{
|
|
"epoch": 5.504731861198739,
|
|
"grad_norm": 0.4840523314937838,
|
|
"learning_rate": 5.314528720676424e-06,
|
|
"loss": 0.0779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09003119170665741,
|
|
"step": 5235,
|
|
"valid_targets_mean": 3020.2,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 5.509989484752892,
|
|
"grad_norm": 0.3522203690286094,
|
|
"learning_rate": 5.2789811458085085e-06,
|
|
"loss": 0.0837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055763859301805496,
|
|
"step": 5240,
|
|
"valid_targets_mean": 3357.7,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 5.515247108307046,
|
|
"grad_norm": 0.34572863197273945,
|
|
"learning_rate": 5.243534770526404e-06,
|
|
"loss": 0.0968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05645620822906494,
|
|
"step": 5245,
|
|
"valid_targets_mean": 4872.1,
|
|
"valid_targets_min": 2802
|
|
},
|
|
{
|
|
"epoch": 5.520504731861199,
|
|
"grad_norm": 0.4146113190989375,
|
|
"learning_rate": 5.208189838506074e-06,
|
|
"loss": 0.0717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06516373157501221,
|
|
"step": 5250,
|
|
"valid_targets_mean": 2990.6,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 5.5257623554153525,
|
|
"grad_norm": 0.3409140953289381,
|
|
"learning_rate": 5.172946592726109e-06,
|
|
"loss": 0.0757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06866136193275452,
|
|
"step": 5255,
|
|
"valid_targets_mean": 3588.3,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 5.531019978969506,
|
|
"grad_norm": 0.5133526501421567,
|
|
"learning_rate": 5.137805275466072e-06,
|
|
"loss": 0.082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11353409290313721,
|
|
"step": 5260,
|
|
"valid_targets_mean": 2007.3,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 5.5362776025236595,
|
|
"grad_norm": 0.39553778922882177,
|
|
"learning_rate": 5.1027661283048036e-06,
|
|
"loss": 0.0988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0642944723367691,
|
|
"step": 5265,
|
|
"valid_targets_mean": 3185.9,
|
|
"valid_targets_min": 1185
|
|
},
|
|
{
|
|
"epoch": 5.541535226077813,
|
|
"grad_norm": 0.537024794805001,
|
|
"learning_rate": 5.067829392118775e-06,
|
|
"loss": 0.0718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09173554182052612,
|
|
"step": 5270,
|
|
"valid_targets_mean": 1841.9,
|
|
"valid_targets_min": 376
|
|
},
|
|
{
|
|
"epoch": 5.5467928496319665,
|
|
"grad_norm": 0.3928391902108944,
|
|
"learning_rate": 5.03299530708045e-06,
|
|
"loss": 0.0682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048176899552345276,
|
|
"step": 5275,
|
|
"valid_targets_mean": 2904.6,
|
|
"valid_targets_min": 910
|
|
},
|
|
{
|
|
"epoch": 5.55205047318612,
|
|
"grad_norm": 0.3710063012964714,
|
|
"learning_rate": 4.998264112656617e-06,
|
|
"loss": 0.0636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06277212500572205,
|
|
"step": 5280,
|
|
"valid_targets_mean": 3779.6,
|
|
"valid_targets_min": 2798
|
|
},
|
|
{
|
|
"epoch": 5.5573080967402735,
|
|
"grad_norm": 0.47273065075899023,
|
|
"learning_rate": 4.963636047606712e-06,
|
|
"loss": 0.0861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0711083710193634,
|
|
"step": 5285,
|
|
"valid_targets_mean": 2611.1,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 5.562565720294427,
|
|
"grad_norm": 0.4563423626132676,
|
|
"learning_rate": 4.929111349981244e-06,
|
|
"loss": 0.0827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08847686648368835,
|
|
"step": 5290,
|
|
"valid_targets_mean": 2535.8,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 5.5678233438485805,
|
|
"grad_norm": 0.4951752395502576,
|
|
"learning_rate": 4.894690257120114e-06,
|
|
"loss": 0.0831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08223095536231995,
|
|
"step": 5295,
|
|
"valid_targets_mean": 3190.8,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 5.573080967402734,
|
|
"grad_norm": 0.3781062874552959,
|
|
"learning_rate": 4.860373005650985e-06,
|
|
"loss": 0.0806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0642784982919693,
|
|
"step": 5300,
|
|
"valid_targets_mean": 3511.1,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 5.578338590956887,
|
|
"grad_norm": 0.3607086035283592,
|
|
"learning_rate": 4.826159831487656e-06,
|
|
"loss": 0.1142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06361603736877441,
|
|
"step": 5305,
|
|
"valid_targets_mean": 3521.9,
|
|
"valid_targets_min": 902
|
|
},
|
|
{
|
|
"epoch": 5.583596214511041,
|
|
"grad_norm": 0.4047310585797599,
|
|
"learning_rate": 4.792050969828474e-06,
|
|
"loss": 0.0676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0712086632847786,
|
|
"step": 5310,
|
|
"valid_targets_mean": 3062.9,
|
|
"valid_targets_min": 1568
|
|
},
|
|
{
|
|
"epoch": 5.588853838065194,
|
|
"grad_norm": 0.7671277660530172,
|
|
"learning_rate": 4.758046655154664e-06,
|
|
"loss": 0.0968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11731690913438797,
|
|
"step": 5315,
|
|
"valid_targets_mean": 1179.9,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 5.594111461619348,
|
|
"grad_norm": 0.5536578463924902,
|
|
"learning_rate": 4.72414712122875e-06,
|
|
"loss": 0.091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10014639794826508,
|
|
"step": 5320,
|
|
"valid_targets_mean": 2067.3,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 5.599369085173501,
|
|
"grad_norm": 0.4516327627947546,
|
|
"learning_rate": 4.690352601092954e-06,
|
|
"loss": 0.0744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06019534170627594,
|
|
"step": 5325,
|
|
"valid_targets_mean": 2839.8,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 5.604626708727655,
|
|
"grad_norm": 0.5889218711339791,
|
|
"learning_rate": 4.656663327067563e-06,
|
|
"loss": 0.1864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2736669182777405,
|
|
"step": 5330,
|
|
"valid_targets_mean": 2888.0,
|
|
"valid_targets_min": 878
|
|
},
|
|
{
|
|
"epoch": 5.609884332281808,
|
|
"grad_norm": 0.5600330545340294,
|
|
"learning_rate": 4.623079530749355e-06,
|
|
"loss": 0.1678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08541359007358551,
|
|
"step": 5335,
|
|
"valid_targets_mean": 2609.9,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 5.615141955835962,
|
|
"grad_norm": 0.3511085199552129,
|
|
"learning_rate": 4.589601443010012e-06,
|
|
"loss": 0.0816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06310705095529556,
|
|
"step": 5340,
|
|
"valid_targets_mean": 3436.7,
|
|
"valid_targets_min": 1968
|
|
},
|
|
{
|
|
"epoch": 5.620399579390115,
|
|
"grad_norm": 0.6721906973672241,
|
|
"learning_rate": 4.55622929399451e-06,
|
|
"loss": 0.1564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33643388748168945,
|
|
"step": 5345,
|
|
"valid_targets_mean": 2644.9,
|
|
"valid_targets_min": 1153
|
|
},
|
|
{
|
|
"epoch": 5.625657202944269,
|
|
"grad_norm": 0.3166222980260612,
|
|
"learning_rate": 4.522963313119564e-06,
|
|
"loss": 0.0894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.059898123145103455,
|
|
"step": 5350,
|
|
"valid_targets_mean": 4109.6,
|
|
"valid_targets_min": 1010
|
|
},
|
|
{
|
|
"epoch": 5.630914826498422,
|
|
"grad_norm": 0.371302765978117,
|
|
"learning_rate": 4.48980372907202e-06,
|
|
"loss": 0.0741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06634600460529327,
|
|
"step": 5355,
|
|
"valid_targets_mean": 4150.4,
|
|
"valid_targets_min": 789
|
|
},
|
|
{
|
|
"epoch": 5.636172450052577,
|
|
"grad_norm": 0.4363528525094378,
|
|
"learning_rate": 4.456750769807303e-06,
|
|
"loss": 0.083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09076633304357529,
|
|
"step": 5360,
|
|
"valid_targets_mean": 3907.9,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 5.641430073606729,
|
|
"grad_norm": 0.3881675184846364,
|
|
"learning_rate": 4.4238046625478635e-06,
|
|
"loss": 0.0703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06289593130350113,
|
|
"step": 5365,
|
|
"valid_targets_mean": 2426.2,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 5.646687697160884,
|
|
"grad_norm": 0.44115229165527364,
|
|
"learning_rate": 4.390965633781579e-06,
|
|
"loss": 0.0747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08500753343105316,
|
|
"step": 5370,
|
|
"valid_targets_mean": 3060.2,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 5.651945320715037,
|
|
"grad_norm": 0.4163457086701572,
|
|
"learning_rate": 4.358233909260215e-06,
|
|
"loss": 0.0653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06481081247329712,
|
|
"step": 5375,
|
|
"valid_targets_mean": 2013.9,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 5.657202944269191,
|
|
"grad_norm": 0.3489182883360053,
|
|
"learning_rate": 4.3256097139978934e-06,
|
|
"loss": 0.0716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.057638153433799744,
|
|
"step": 5380,
|
|
"valid_targets_mean": 3445.8,
|
|
"valid_targets_min": 1770
|
|
},
|
|
{
|
|
"epoch": 5.662460567823344,
|
|
"grad_norm": 0.36595440417044695,
|
|
"learning_rate": 4.293093272269513e-06,
|
|
"loss": 0.0611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060730718076229095,
|
|
"step": 5385,
|
|
"valid_targets_mean": 3535.2,
|
|
"valid_targets_min": 1761
|
|
},
|
|
{
|
|
"epoch": 5.667718191377498,
|
|
"grad_norm": 0.3444894547671019,
|
|
"learning_rate": 4.260684807609217e-06,
|
|
"loss": 0.1103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043215736746788025,
|
|
"step": 5390,
|
|
"valid_targets_mean": 3596.6,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 5.672975814931651,
|
|
"grad_norm": 0.4007388592256331,
|
|
"learning_rate": 4.22838454280887e-06,
|
|
"loss": 0.0585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.061987798660993576,
|
|
"step": 5395,
|
|
"valid_targets_mean": 3249.3,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 5.678233438485805,
|
|
"grad_norm": 0.3675365529597313,
|
|
"learning_rate": 4.196192699916528e-06,
|
|
"loss": 0.0612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05437369644641876,
|
|
"step": 5400,
|
|
"valid_targets_mean": 3712.8,
|
|
"valid_targets_min": 2535
|
|
},
|
|
{
|
|
"epoch": 5.683491062039958,
|
|
"grad_norm": 0.43640798823250465,
|
|
"learning_rate": 4.164109500234865e-06,
|
|
"loss": 0.0648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06860943138599396,
|
|
"step": 5405,
|
|
"valid_targets_mean": 3262.0,
|
|
"valid_targets_min": 948
|
|
},
|
|
{
|
|
"epoch": 5.688748685594112,
|
|
"grad_norm": 0.6561863580374057,
|
|
"learning_rate": 4.1321351643197235e-06,
|
|
"loss": 0.0898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13322700560092926,
|
|
"step": 5410,
|
|
"valid_targets_mean": 1491.4,
|
|
"valid_targets_min": 490
|
|
},
|
|
{
|
|
"epoch": 5.694006309148265,
|
|
"grad_norm": 0.4358763223896758,
|
|
"learning_rate": 4.100269911978549e-06,
|
|
"loss": 0.0803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07011303305625916,
|
|
"step": 5415,
|
|
"valid_targets_mean": 3696.4,
|
|
"valid_targets_min": 1196
|
|
},
|
|
{
|
|
"epoch": 5.699263932702419,
|
|
"grad_norm": 0.5739171072047936,
|
|
"learning_rate": 4.068513962268892e-06,
|
|
"loss": 0.0977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08923690021038055,
|
|
"step": 5420,
|
|
"valid_targets_mean": 1915.5,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 5.704521556256572,
|
|
"grad_norm": 0.42608581865170847,
|
|
"learning_rate": 4.036867533496895e-06,
|
|
"loss": 0.0728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06260919570922852,
|
|
"step": 5425,
|
|
"valid_targets_mean": 2728.7,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 5.709779179810726,
|
|
"grad_norm": 0.4701969559376083,
|
|
"learning_rate": 4.00533084321582e-06,
|
|
"loss": 0.0887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07386186718940735,
|
|
"step": 5430,
|
|
"valid_targets_mean": 2827.4,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 5.715036803364879,
|
|
"grad_norm": 0.43626358271938687,
|
|
"learning_rate": 3.9739041082245114e-06,
|
|
"loss": 0.1038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07005483657121658,
|
|
"step": 5435,
|
|
"valid_targets_mean": 2377.9,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 5.720294426919033,
|
|
"grad_norm": 0.4464928971357519,
|
|
"learning_rate": 3.942587544565932e-06,
|
|
"loss": 0.0799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08836561441421509,
|
|
"step": 5440,
|
|
"valid_targets_mean": 2733.9,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 5.725552050473186,
|
|
"grad_norm": 0.4369438601678796,
|
|
"learning_rate": 3.9113813675256816e-06,
|
|
"loss": 0.0895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06848982721567154,
|
|
"step": 5445,
|
|
"valid_targets_mean": 3892.4,
|
|
"valid_targets_min": 2393
|
|
},
|
|
{
|
|
"epoch": 5.7308096740273395,
|
|
"grad_norm": 0.41273524626035957,
|
|
"learning_rate": 3.8802857916305006e-06,
|
|
"loss": 0.0778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06610330939292908,
|
|
"step": 5450,
|
|
"valid_targets_mean": 3477.7,
|
|
"valid_targets_min": 804
|
|
},
|
|
{
|
|
"epoch": 5.736067297581493,
|
|
"grad_norm": 0.3871325032451887,
|
|
"learning_rate": 3.849301030646797e-06,
|
|
"loss": 0.0771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07895655930042267,
|
|
"step": 5455,
|
|
"valid_targets_mean": 4753.2,
|
|
"valid_targets_min": 1549
|
|
},
|
|
{
|
|
"epoch": 5.7413249211356465,
|
|
"grad_norm": 0.4217346153198867,
|
|
"learning_rate": 3.818427297579186e-06,
|
|
"loss": 0.0643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06543637812137604,
|
|
"step": 5460,
|
|
"valid_targets_mean": 2988.9,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 5.7465825446898,
|
|
"grad_norm": 0.3136343984963145,
|
|
"learning_rate": 3.787664804669027e-06,
|
|
"loss": 0.0625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051543548703193665,
|
|
"step": 5465,
|
|
"valid_targets_mean": 4565.8,
|
|
"valid_targets_min": 896
|
|
},
|
|
{
|
|
"epoch": 5.7518401682439535,
|
|
"grad_norm": 0.3621915752518174,
|
|
"learning_rate": 3.7570137633929647e-06,
|
|
"loss": 0.0723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06575092673301697,
|
|
"step": 5470,
|
|
"valid_targets_mean": 3763.9,
|
|
"valid_targets_min": 2696
|
|
},
|
|
{
|
|
"epoch": 5.757097791798107,
|
|
"grad_norm": 0.6122320525546388,
|
|
"learning_rate": 3.7264743844614424e-06,
|
|
"loss": 0.089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10286790877580643,
|
|
"step": 5475,
|
|
"valid_targets_mean": 1522.7,
|
|
"valid_targets_min": 203
|
|
},
|
|
{
|
|
"epoch": 5.7623554153522605,
|
|
"grad_norm": 0.4098948000241012,
|
|
"learning_rate": 3.6960468778173097e-06,
|
|
"loss": 0.0761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0847419798374176,
|
|
"step": 5480,
|
|
"valid_targets_mean": 3974.7,
|
|
"valid_targets_min": 2443
|
|
},
|
|
{
|
|
"epoch": 5.767613038906414,
|
|
"grad_norm": 0.5008404289507667,
|
|
"learning_rate": 3.665731452634347e-06,
|
|
"loss": 0.0854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07621408998966217,
|
|
"step": 5485,
|
|
"valid_targets_mean": 1714.2,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 5.7728706624605675,
|
|
"grad_norm": 0.3784535088474085,
|
|
"learning_rate": 3.6355283173158153e-06,
|
|
"loss": 0.0598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06724974513053894,
|
|
"step": 5490,
|
|
"valid_targets_mean": 3683.6,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 5.778128286014722,
|
|
"grad_norm": 0.4780606297581987,
|
|
"learning_rate": 3.6054376794930467e-06,
|
|
"loss": 0.0791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08645504713058472,
|
|
"step": 5495,
|
|
"valid_targets_mean": 2942.3,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 5.783385909568874,
|
|
"grad_norm": 0.40912613469195874,
|
|
"learning_rate": 3.5754597460240216e-06,
|
|
"loss": 0.0695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06450048089027405,
|
|
"step": 5500,
|
|
"valid_targets_mean": 3584.6,
|
|
"valid_targets_min": 1744
|
|
},
|
|
{
|
|
"epoch": 5.788643533123029,
|
|
"grad_norm": 0.6190582766331852,
|
|
"learning_rate": 3.5455947229919185e-06,
|
|
"loss": 0.0671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10218099504709244,
|
|
"step": 5505,
|
|
"valid_targets_mean": 2055.4,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 5.793901156677181,
|
|
"grad_norm": 0.4863579439971359,
|
|
"learning_rate": 3.515842815703716e-06,
|
|
"loss": 0.0684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07434908300638199,
|
|
"step": 5510,
|
|
"valid_targets_mean": 2057.7,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 5.799158780231336,
|
|
"grad_norm": 0.5401129388023387,
|
|
"learning_rate": 3.4862042286887943e-06,
|
|
"loss": 0.0745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08237025141716003,
|
|
"step": 5515,
|
|
"valid_targets_mean": 2091.9,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 5.804416403785489,
|
|
"grad_norm": 0.39507077111937705,
|
|
"learning_rate": 3.456679165697494e-06,
|
|
"loss": 0.083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06957225501537323,
|
|
"step": 5520,
|
|
"valid_targets_mean": 3179.4,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 5.809674027339643,
|
|
"grad_norm": 0.4904748389811793,
|
|
"learning_rate": 3.427267829699741e-06,
|
|
"loss": 0.0715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06819421052932739,
|
|
"step": 5525,
|
|
"valid_targets_mean": 2118.0,
|
|
"valid_targets_min": 495
|
|
},
|
|
{
|
|
"epoch": 5.814931650893796,
|
|
"grad_norm": 0.4705278464386405,
|
|
"learning_rate": 3.3979704228836586e-06,
|
|
"loss": 0.0804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07215887308120728,
|
|
"step": 5530,
|
|
"valid_targets_mean": 2350.0,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 5.82018927444795,
|
|
"grad_norm": 0.6215471041321221,
|
|
"learning_rate": 3.3687871466541424e-06,
|
|
"loss": 0.088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12827762961387634,
|
|
"step": 5535,
|
|
"valid_targets_mean": 1728.8,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 5.825446898002103,
|
|
"grad_norm": 0.5242690029033589,
|
|
"learning_rate": 3.339718201631521e-06,
|
|
"loss": 0.0716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08257465064525604,
|
|
"step": 5540,
|
|
"valid_targets_mean": 3473.8,
|
|
"valid_targets_min": 1032
|
|
},
|
|
{
|
|
"epoch": 5.830704521556257,
|
|
"grad_norm": 0.3725043819909827,
|
|
"learning_rate": 3.3107637876501352e-06,
|
|
"loss": 0.0591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04687100648880005,
|
|
"step": 5545,
|
|
"valid_targets_mean": 3574.1,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 5.83596214511041,
|
|
"grad_norm": 0.4061944510779138,
|
|
"learning_rate": 3.2819241037569838e-06,
|
|
"loss": 0.0713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0636071115732193,
|
|
"step": 5550,
|
|
"valid_targets_mean": 2909.7,
|
|
"valid_targets_min": 927
|
|
},
|
|
{
|
|
"epoch": 5.841219768664564,
|
|
"grad_norm": 0.4711251498892174,
|
|
"learning_rate": 3.253199348210372e-06,
|
|
"loss": 0.0957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07953070849180222,
|
|
"step": 5555,
|
|
"valid_targets_mean": 3022.2,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 5.846477392218717,
|
|
"grad_norm": 0.3797374307174812,
|
|
"learning_rate": 3.2245897184785103e-06,
|
|
"loss": 0.0576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05033276230096817,
|
|
"step": 5560,
|
|
"valid_targets_mean": 2978.2,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 5.851735015772871,
|
|
"grad_norm": 0.3727209934715664,
|
|
"learning_rate": 3.1960954112381825e-06,
|
|
"loss": 0.0723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06503856182098389,
|
|
"step": 5565,
|
|
"valid_targets_mean": 4808.4,
|
|
"valid_targets_min": 976
|
|
},
|
|
{
|
|
"epoch": 5.856992639327024,
|
|
"grad_norm": 0.3672931375398731,
|
|
"learning_rate": 3.1677166223733934e-06,
|
|
"loss": 0.0696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06206974387168884,
|
|
"step": 5570,
|
|
"valid_targets_mean": 4306.9,
|
|
"valid_targets_min": 995
|
|
},
|
|
{
|
|
"epoch": 5.862250262881178,
|
|
"grad_norm": 0.3247757190871145,
|
|
"learning_rate": 3.1394535469740273e-06,
|
|
"loss": 0.0571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053987935185432434,
|
|
"step": 5575,
|
|
"valid_targets_mean": 4484.6,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 5.867507886435331,
|
|
"grad_norm": 0.44223634627808067,
|
|
"learning_rate": 3.111306379334462e-06,
|
|
"loss": 0.0786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09267568588256836,
|
|
"step": 5580,
|
|
"valid_targets_mean": 4246.4,
|
|
"valid_targets_min": 2724
|
|
},
|
|
{
|
|
"epoch": 5.872765509989485,
|
|
"grad_norm": 0.33839878301938703,
|
|
"learning_rate": 3.083275312952301e-06,
|
|
"loss": 0.0642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05013374239206314,
|
|
"step": 5585,
|
|
"valid_targets_mean": 2882.8,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 5.878023133543638,
|
|
"grad_norm": 0.41545640646602267,
|
|
"learning_rate": 3.055360540527006e-06,
|
|
"loss": 0.0953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07960711419582367,
|
|
"step": 5590,
|
|
"valid_targets_mean": 2827.2,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 5.883280757097792,
|
|
"grad_norm": 0.3289053035698674,
|
|
"learning_rate": 3.0275622539585556e-06,
|
|
"loss": 0.0888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.059524841606616974,
|
|
"step": 5595,
|
|
"valid_targets_mean": 3046.0,
|
|
"valid_targets_min": 855
|
|
},
|
|
{
|
|
"epoch": 5.888538380651945,
|
|
"grad_norm": 0.48164133495913375,
|
|
"learning_rate": 2.999880644346165e-06,
|
|
"loss": 0.0674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12587189674377441,
|
|
"step": 5600,
|
|
"valid_targets_mean": 2666.1,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 5.893796004206099,
|
|
"grad_norm": 0.437680418717453,
|
|
"learning_rate": 2.9723159019869597e-06,
|
|
"loss": 0.0757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06669393181800842,
|
|
"step": 5605,
|
|
"valid_targets_mean": 3699.2,
|
|
"valid_targets_min": 1251
|
|
},
|
|
{
|
|
"epoch": 5.899053627760252,
|
|
"grad_norm": 0.4552807263478258,
|
|
"learning_rate": 2.9448682163746413e-06,
|
|
"loss": 0.0693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06853412091732025,
|
|
"step": 5610,
|
|
"valid_targets_mean": 2607.3,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 5.904311251314406,
|
|
"grad_norm": 0.5564581144895816,
|
|
"learning_rate": 2.917537776198216e-06,
|
|
"loss": 0.0699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06953615695238113,
|
|
"step": 5615,
|
|
"valid_targets_mean": 1881.4,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 5.909568874868559,
|
|
"grad_norm": 0.7076667181439968,
|
|
"learning_rate": 2.8903247693406932e-06,
|
|
"loss": 0.0932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12509091198444366,
|
|
"step": 5620,
|
|
"valid_targets_mean": 1679.2,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 5.914826498422713,
|
|
"grad_norm": 0.37113324347173715,
|
|
"learning_rate": 2.863229382877777e-06,
|
|
"loss": 0.0951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06510509550571442,
|
|
"step": 5625,
|
|
"valid_targets_mean": 3019.8,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 5.920084121976867,
|
|
"grad_norm": 0.37553389579031443,
|
|
"learning_rate": 2.8362518030765904e-06,
|
|
"loss": 0.076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05372827500104904,
|
|
"step": 5630,
|
|
"valid_targets_mean": 3745.3,
|
|
"valid_targets_min": 1041
|
|
},
|
|
{
|
|
"epoch": 5.9253417455310196,
|
|
"grad_norm": 0.45557279748286655,
|
|
"learning_rate": 2.8093922153944065e-06,
|
|
"loss": 0.0763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07182173430919647,
|
|
"step": 5635,
|
|
"valid_targets_mean": 3255.9,
|
|
"valid_targets_min": 676
|
|
},
|
|
{
|
|
"epoch": 5.930599369085174,
|
|
"grad_norm": 0.5217169672169305,
|
|
"learning_rate": 2.782650804477347e-06,
|
|
"loss": 0.0686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07140591740608215,
|
|
"step": 5640,
|
|
"valid_targets_mean": 2534.0,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 5.9358569926393265,
|
|
"grad_norm": 0.39912615203410573,
|
|
"learning_rate": 2.7560277541591427e-06,
|
|
"loss": 0.0652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05698162317276001,
|
|
"step": 5645,
|
|
"valid_targets_mean": 2999.9,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 5.941114616193481,
|
|
"grad_norm": 0.8478603884193808,
|
|
"learning_rate": 2.7295232474598445e-06,
|
|
"loss": 0.0655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09507367759943008,
|
|
"step": 5650,
|
|
"valid_targets_mean": 1254.4,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 5.946372239747634,
|
|
"grad_norm": 0.4861537301010663,
|
|
"learning_rate": 2.703137466584571e-06,
|
|
"loss": 0.0798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06851141899824142,
|
|
"step": 5655,
|
|
"valid_targets_mean": 3019.1,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 5.951629863301788,
|
|
"grad_norm": 0.5360077718524827,
|
|
"learning_rate": 2.6768705929222827e-06,
|
|
"loss": 0.0645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07459817826747894,
|
|
"step": 5660,
|
|
"valid_targets_mean": 2472.1,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 5.956887486855941,
|
|
"grad_norm": 0.43417037003952114,
|
|
"learning_rate": 2.6507228070444922e-06,
|
|
"loss": 0.0643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06640955805778503,
|
|
"step": 5665,
|
|
"valid_targets_mean": 3184.0,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 5.962145110410095,
|
|
"grad_norm": 0.4363314543656136,
|
|
"learning_rate": 2.6246942887040416e-06,
|
|
"loss": 0.0765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07214406877756119,
|
|
"step": 5670,
|
|
"valid_targets_mean": 4097.8,
|
|
"valid_targets_min": 1474
|
|
},
|
|
{
|
|
"epoch": 5.967402733964248,
|
|
"grad_norm": 0.35176548595035,
|
|
"learning_rate": 2.5987852168338922e-06,
|
|
"loss": 0.0616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052599914371967316,
|
|
"step": 5675,
|
|
"valid_targets_mean": 5379.9,
|
|
"valid_targets_min": 3677
|
|
},
|
|
{
|
|
"epoch": 5.972660357518402,
|
|
"grad_norm": 0.3953005192034419,
|
|
"learning_rate": 2.5729957695458454e-06,
|
|
"loss": 0.0646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05832815542817116,
|
|
"step": 5680,
|
|
"valid_targets_mean": 3455.8,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 5.977917981072555,
|
|
"grad_norm": 0.45263421607065923,
|
|
"learning_rate": 2.5473261241293547e-06,
|
|
"loss": 0.0692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07711224257946014,
|
|
"step": 5685,
|
|
"valid_targets_mean": 3888.9,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 5.983175604626709,
|
|
"grad_norm": 0.4832589836132502,
|
|
"learning_rate": 2.521776457050302e-06,
|
|
"loss": 0.1572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1471479833126068,
|
|
"step": 5690,
|
|
"valid_targets_mean": 2937.6,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 5.988433228180862,
|
|
"grad_norm": 0.3635672452356186,
|
|
"learning_rate": 2.4963469439497703e-06,
|
|
"loss": 0.0632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.059753306210041046,
|
|
"step": 5695,
|
|
"valid_targets_mean": 3050.1,
|
|
"valid_targets_min": 941
|
|
},
|
|
{
|
|
"epoch": 5.993690851735016,
|
|
"grad_norm": 0.43990503610989284,
|
|
"learning_rate": 2.4710377596428404e-06,
|
|
"loss": 0.0734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06187095493078232,
|
|
"step": 5700,
|
|
"valid_targets_mean": 2817.5,
|
|
"valid_targets_min": 878
|
|
},
|
|
{
|
|
"epoch": 5.998948475289169,
|
|
"grad_norm": 0.5950208952697916,
|
|
"learning_rate": 2.4458490781174084e-06,
|
|
"loss": 0.0968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1841873973608017,
|
|
"step": 5705,
|
|
"valid_targets_mean": 2429.1,
|
|
"valid_targets_min": 955
|
|
},
|
|
{
|
|
"epoch": 6.004206098843323,
|
|
"grad_norm": 1.056405448573602,
|
|
"learning_rate": 2.4207810725329583e-06,
|
|
"loss": 0.1304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15957674384117126,
|
|
"step": 5710,
|
|
"valid_targets_mean": 1576.1,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 6.009463722397476,
|
|
"grad_norm": 0.8832827367460852,
|
|
"learning_rate": 2.395833915219401e-06,
|
|
"loss": 0.1315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13401654362678528,
|
|
"step": 5715,
|
|
"valid_targets_mean": 1537.1,
|
|
"valid_targets_min": 725
|
|
},
|
|
{
|
|
"epoch": 6.01472134595163,
|
|
"grad_norm": 0.8438837993875398,
|
|
"learning_rate": 2.3710077776758713e-06,
|
|
"loss": 0.1261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13347040116786957,
|
|
"step": 5720,
|
|
"valid_targets_mean": 1576.7,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 6.019978969505783,
|
|
"grad_norm": 0.8243968700714618,
|
|
"learning_rate": 2.3463028305695447e-06,
|
|
"loss": 0.1207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12472514808177948,
|
|
"step": 5725,
|
|
"valid_targets_mean": 1409.7,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 6.025236593059937,
|
|
"grad_norm": 0.8443977567975072,
|
|
"learning_rate": 2.3217192437344925e-06,
|
|
"loss": 0.1244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12544682621955872,
|
|
"step": 5730,
|
|
"valid_targets_mean": 1581.3,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 6.03049421661409,
|
|
"grad_norm": 0.8262835707529608,
|
|
"learning_rate": 2.2972571861704784e-06,
|
|
"loss": 0.1315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.161672905087471,
|
|
"step": 5735,
|
|
"valid_targets_mean": 1765.4,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 6.035751840168244,
|
|
"grad_norm": 0.7493785382076814,
|
|
"learning_rate": 2.2729168260418224e-06,
|
|
"loss": 0.1153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11074835062026978,
|
|
"step": 5740,
|
|
"valid_targets_mean": 1440.2,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 6.041009463722397,
|
|
"grad_norm": 0.7650150016103158,
|
|
"learning_rate": 2.2486983306762332e-06,
|
|
"loss": 0.1312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2043965756893158,
|
|
"step": 5745,
|
|
"valid_targets_mean": 1589.9,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 6.046267087276551,
|
|
"grad_norm": 0.8118441009037011,
|
|
"learning_rate": 2.224601866563665e-06,
|
|
"loss": 0.1168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12593913078308105,
|
|
"step": 5750,
|
|
"valid_targets_mean": 1544.3,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 6.051524710830704,
|
|
"grad_norm": 0.8488874207848223,
|
|
"learning_rate": 2.2006275993551563e-06,
|
|
"loss": 0.1251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1356077790260315,
|
|
"step": 5755,
|
|
"valid_targets_mean": 1915.3,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 6.056782334384858,
|
|
"grad_norm": 0.7943128359084249,
|
|
"learning_rate": 2.176775693861719e-06,
|
|
"loss": 0.1132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11740730702877045,
|
|
"step": 5760,
|
|
"valid_targets_mean": 1516.1,
|
|
"valid_targets_min": 815
|
|
},
|
|
{
|
|
"epoch": 6.062039957939011,
|
|
"grad_norm": 0.8281286315241209,
|
|
"learning_rate": 2.1530463140531886e-06,
|
|
"loss": 0.1291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12747472524642944,
|
|
"step": 5765,
|
|
"valid_targets_mean": 1713.7,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 6.067297581493165,
|
|
"grad_norm": 0.8099123331780564,
|
|
"learning_rate": 2.129439623057077e-06,
|
|
"loss": 0.113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10697382688522339,
|
|
"step": 5770,
|
|
"valid_targets_mean": 1325.3,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 6.072555205047319,
|
|
"grad_norm": 0.8065990975663488,
|
|
"learning_rate": 2.105955783157498e-06,
|
|
"loss": 0.1138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11023028194904327,
|
|
"step": 5775,
|
|
"valid_targets_mean": 1349.9,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 6.0778128286014725,
|
|
"grad_norm": 0.7851180412673159,
|
|
"learning_rate": 2.0825949557940174e-06,
|
|
"loss": 0.1054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10124966502189636,
|
|
"step": 5780,
|
|
"valid_targets_mean": 1355.5,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 6.083070452155626,
|
|
"grad_norm": 0.79802762010396,
|
|
"learning_rate": 2.059357301560547e-06,
|
|
"loss": 0.1194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11919783800840378,
|
|
"step": 5785,
|
|
"valid_targets_mean": 1716.7,
|
|
"valid_targets_min": 911
|
|
},
|
|
{
|
|
"epoch": 6.0883280757097795,
|
|
"grad_norm": 0.8707769812997141,
|
|
"learning_rate": 2.036242980204244e-06,
|
|
"loss": 0.124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11366402357816696,
|
|
"step": 5790,
|
|
"valid_targets_mean": 1278.3,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 6.093585699263933,
|
|
"grad_norm": 0.8560519537309498,
|
|
"learning_rate": 2.0132521506244294e-06,
|
|
"loss": 0.1277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1303253173828125,
|
|
"step": 5795,
|
|
"valid_targets_mean": 1576.7,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 6.0988433228180865,
|
|
"grad_norm": 0.8024527979995779,
|
|
"learning_rate": 1.9903849708714664e-06,
|
|
"loss": 0.1158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1220255121588707,
|
|
"step": 5800,
|
|
"valid_targets_mean": 1733.6,
|
|
"valid_targets_min": 940
|
|
},
|
|
{
|
|
"epoch": 6.10410094637224,
|
|
"grad_norm": 0.7302956525360211,
|
|
"learning_rate": 1.967641598145684e-06,
|
|
"loss": 0.1121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09471730142831802,
|
|
"step": 5805,
|
|
"valid_targets_mean": 1361.2,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 6.1093585699263935,
|
|
"grad_norm": 0.7809589037777808,
|
|
"learning_rate": 1.9450221887963194e-06,
|
|
"loss": 0.1175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1004934087395668,
|
|
"step": 5810,
|
|
"valid_targets_mean": 1457.7,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 6.114616193480547,
|
|
"grad_norm": 0.8687769105285652,
|
|
"learning_rate": 1.922526898320407e-06,
|
|
"loss": 0.1219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11705152690410614,
|
|
"step": 5815,
|
|
"valid_targets_mean": 1281.0,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 6.1198738170347005,
|
|
"grad_norm": 1.0257711661224076,
|
|
"learning_rate": 1.900155881361727e-06,
|
|
"loss": 0.1109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10995705425739288,
|
|
"step": 5820,
|
|
"valid_targets_mean": 1466.2,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 6.125131440588854,
|
|
"grad_norm": 0.8509635243706488,
|
|
"learning_rate": 1.8779092917097564e-06,
|
|
"loss": 0.1081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11150787770748138,
|
|
"step": 5825,
|
|
"valid_targets_mean": 1515.4,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 6.130389064143007,
|
|
"grad_norm": 0.7561951051975627,
|
|
"learning_rate": 1.85578728229858e-06,
|
|
"loss": 0.1112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11924959719181061,
|
|
"step": 5830,
|
|
"valid_targets_mean": 1811.3,
|
|
"valid_targets_min": 563
|
|
},
|
|
{
|
|
"epoch": 6.135646687697161,
|
|
"grad_norm": 0.8704344303654569,
|
|
"learning_rate": 1.8337900052058732e-06,
|
|
"loss": 0.1133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1195128783583641,
|
|
"step": 5835,
|
|
"valid_targets_mean": 1495.8,
|
|
"valid_targets_min": 837
|
|
},
|
|
{
|
|
"epoch": 6.140904311251314,
|
|
"grad_norm": 0.8398306482112976,
|
|
"learning_rate": 1.811917611651821e-06,
|
|
"loss": 0.1186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12384511530399323,
|
|
"step": 5840,
|
|
"valid_targets_mean": 1653.8,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 6.146161934805468,
|
|
"grad_norm": 0.7651328935133273,
|
|
"learning_rate": 1.7901702519981068e-06,
|
|
"loss": 0.1184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10116033256053925,
|
|
"step": 5845,
|
|
"valid_targets_mean": 1380.4,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 6.151419558359621,
|
|
"grad_norm": 0.7954008913290201,
|
|
"learning_rate": 1.7685480757468765e-06,
|
|
"loss": 0.1094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10791818052530289,
|
|
"step": 5850,
|
|
"valid_targets_mean": 1600.2,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 6.156677181913775,
|
|
"grad_norm": 0.8377229014967783,
|
|
"learning_rate": 1.7470512315396894e-06,
|
|
"loss": 0.1169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1209423616528511,
|
|
"step": 5855,
|
|
"valid_targets_mean": 1447.8,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 6.161934805467928,
|
|
"grad_norm": 0.8378541971434684,
|
|
"learning_rate": 1.7256798671565111e-06,
|
|
"loss": 0.1146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1366468369960785,
|
|
"step": 5860,
|
|
"valid_targets_mean": 1590.8,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 6.167192429022082,
|
|
"grad_norm": 0.8048332759300525,
|
|
"learning_rate": 1.7044341295147116e-06,
|
|
"loss": 0.107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10240399837493896,
|
|
"step": 5865,
|
|
"valid_targets_mean": 1486.0,
|
|
"valid_targets_min": 848
|
|
},
|
|
{
|
|
"epoch": 6.172450052576235,
|
|
"grad_norm": 0.7908342032482969,
|
|
"learning_rate": 1.683314164668024e-06,
|
|
"loss": 0.1095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09846100956201553,
|
|
"step": 5870,
|
|
"valid_targets_mean": 1233.6,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 6.177707676130389,
|
|
"grad_norm": 0.7476579438876946,
|
|
"learning_rate": 1.6623201178055603e-06,
|
|
"loss": 0.1042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09678193926811218,
|
|
"step": 5875,
|
|
"valid_targets_mean": 1408.1,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 6.182965299684542,
|
|
"grad_norm": 0.918072502833983,
|
|
"learning_rate": 1.6414521332508183e-06,
|
|
"loss": 0.108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1330566555261612,
|
|
"step": 5880,
|
|
"valid_targets_mean": 1747.6,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 6.188222923238696,
|
|
"grad_norm": 0.851594586310522,
|
|
"learning_rate": 1.6207103544606795e-06,
|
|
"loss": 0.1074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1153300553560257,
|
|
"step": 5885,
|
|
"valid_targets_mean": 1415.1,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 6.193480546792849,
|
|
"grad_norm": 0.8913181180169621,
|
|
"learning_rate": 1.6000949240244047e-06,
|
|
"loss": 0.1065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1070786714553833,
|
|
"step": 5890,
|
|
"valid_targets_mean": 1241.3,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 6.198738170347003,
|
|
"grad_norm": 0.877886888931176,
|
|
"learning_rate": 1.5796059836626998e-06,
|
|
"loss": 0.11,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11365164816379547,
|
|
"step": 5895,
|
|
"valid_targets_mean": 1566.1,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 6.203995793901156,
|
|
"grad_norm": 0.9072964812458807,
|
|
"learning_rate": 1.5592436742267048e-06,
|
|
"loss": 0.1085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12517592310905457,
|
|
"step": 5900,
|
|
"valid_targets_mean": 1655.8,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 6.20925341745531,
|
|
"grad_norm": 0.8664928853792824,
|
|
"learning_rate": 1.5390081356970331e-06,
|
|
"loss": 0.1138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10317286849021912,
|
|
"step": 5905,
|
|
"valid_targets_mean": 1325.1,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 6.214511041009464,
|
|
"grad_norm": 0.8472902836032864,
|
|
"learning_rate": 1.5188995071828117e-06,
|
|
"loss": 0.1084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13048255443572998,
|
|
"step": 5910,
|
|
"valid_targets_mean": 1859.9,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 6.219768664563618,
|
|
"grad_norm": 0.9817704310215676,
|
|
"learning_rate": 1.498917926920731e-06,
|
|
"loss": 0.1175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13278211653232574,
|
|
"step": 5915,
|
|
"valid_targets_mean": 1715.2,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 6.225026288117771,
|
|
"grad_norm": 0.8300762826519462,
|
|
"learning_rate": 1.4790635322740855e-06,
|
|
"loss": 0.1132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12267100811004639,
|
|
"step": 5920,
|
|
"valid_targets_mean": 1776.1,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 6.230283911671925,
|
|
"grad_norm": 0.7485334780876247,
|
|
"learning_rate": 1.4593364597318305e-06,
|
|
"loss": 0.1154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0925360918045044,
|
|
"step": 5925,
|
|
"valid_targets_mean": 1405.6,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 6.235541535226078,
|
|
"grad_norm": 0.7991821174411781,
|
|
"learning_rate": 1.4397368449076443e-06,
|
|
"loss": 0.1063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1004900187253952,
|
|
"step": 5930,
|
|
"valid_targets_mean": 1445.6,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 6.240799158780232,
|
|
"grad_norm": 0.8794127672319397,
|
|
"learning_rate": 1.4202648225390103e-06,
|
|
"loss": 0.109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10697475075721741,
|
|
"step": 5935,
|
|
"valid_targets_mean": 1320.9,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 6.246056782334385,
|
|
"grad_norm": 0.8342636597973683,
|
|
"learning_rate": 1.4009205264862646e-06,
|
|
"loss": 0.1106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11376285552978516,
|
|
"step": 5940,
|
|
"valid_targets_mean": 1461.5,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 6.251314405888539,
|
|
"grad_norm": 0.8477474608441585,
|
|
"learning_rate": 1.3817040897316903e-06,
|
|
"loss": 0.103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11012783646583557,
|
|
"step": 5945,
|
|
"valid_targets_mean": 1330.2,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 6.256572029442692,
|
|
"grad_norm": 0.8454187125928052,
|
|
"learning_rate": 1.362615644378611e-06,
|
|
"loss": 0.1058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10677572339773178,
|
|
"step": 5950,
|
|
"valid_targets_mean": 1413.0,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 6.261829652996846,
|
|
"grad_norm": 0.8280382374490074,
|
|
"learning_rate": 1.3436553216504721e-06,
|
|
"loss": 0.1102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10449901968240738,
|
|
"step": 5955,
|
|
"valid_targets_mean": 1483.2,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 6.267087276550999,
|
|
"grad_norm": 0.8761840994592133,
|
|
"learning_rate": 1.324823251889924e-06,
|
|
"loss": 0.1118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11426833271980286,
|
|
"step": 5960,
|
|
"valid_targets_mean": 1516.4,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 6.2723449001051526,
|
|
"grad_norm": 0.7883641561966342,
|
|
"learning_rate": 1.3061195645579661e-06,
|
|
"loss": 0.1112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09905272722244263,
|
|
"step": 5965,
|
|
"valid_targets_mean": 1541.4,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 6.277602523659306,
|
|
"grad_norm": 0.8300834931135926,
|
|
"learning_rate": 1.2875443882330218e-06,
|
|
"loss": 0.107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11823458224534988,
|
|
"step": 5970,
|
|
"valid_targets_mean": 1610.9,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 6.2828601472134595,
|
|
"grad_norm": 0.9275697681576301,
|
|
"learning_rate": 1.269097850610066e-06,
|
|
"loss": 0.1148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12318852543830872,
|
|
"step": 5975,
|
|
"valid_targets_mean": 1571.8,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 6.288117770767613,
|
|
"grad_norm": 0.8032415472343256,
|
|
"learning_rate": 1.250780078499747e-06,
|
|
"loss": 0.1075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11173338443040848,
|
|
"step": 5980,
|
|
"valid_targets_mean": 1560.8,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 6.2933753943217665,
|
|
"grad_norm": 0.8263637067303546,
|
|
"learning_rate": 1.2325911978275196e-06,
|
|
"loss": 0.1072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10697021335363388,
|
|
"step": 5985,
|
|
"valid_targets_mean": 1341.3,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 6.29863301787592,
|
|
"grad_norm": 0.7947732322040042,
|
|
"learning_rate": 1.214531333632769e-06,
|
|
"loss": 0.1062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09152212738990784,
|
|
"step": 5990,
|
|
"valid_targets_mean": 1448.0,
|
|
"valid_targets_min": 698
|
|
},
|
|
{
|
|
"epoch": 6.3038906414300735,
|
|
"grad_norm": 0.861027582543773,
|
|
"learning_rate": 1.1966006100679596e-06,
|
|
"loss": 0.1029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10293729603290558,
|
|
"step": 5995,
|
|
"valid_targets_mean": 1379.1,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 6.309148264984227,
|
|
"grad_norm": 0.9826437169525866,
|
|
"learning_rate": 1.1787991503977846e-06,
|
|
"loss": 0.1098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13235735893249512,
|
|
"step": 6000,
|
|
"valid_targets_mean": 1699.4,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 6.3144058885383805,
|
|
"grad_norm": 0.9541692555323759,
|
|
"learning_rate": 1.1611270769983051e-06,
|
|
"loss": 0.1131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11527444422245026,
|
|
"step": 6005,
|
|
"valid_targets_mean": 1718.9,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 6.319663512092534,
|
|
"grad_norm": 0.8316730507580145,
|
|
"learning_rate": 1.143584511356115e-06,
|
|
"loss": 0.1023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10918165743350983,
|
|
"step": 6010,
|
|
"valid_targets_mean": 1481.1,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 6.3249211356466875,
|
|
"grad_norm": 0.7859618568757966,
|
|
"learning_rate": 1.1261715740675205e-06,
|
|
"loss": 0.1092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11200223863124847,
|
|
"step": 6015,
|
|
"valid_targets_mean": 1562.9,
|
|
"valid_targets_min": 855
|
|
},
|
|
{
|
|
"epoch": 6.330178759200841,
|
|
"grad_norm": 0.8357770703657998,
|
|
"learning_rate": 1.108888384837683e-06,
|
|
"loss": 0.1101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11746874451637268,
|
|
"step": 6020,
|
|
"valid_targets_mean": 1583.2,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 6.335436382754994,
|
|
"grad_norm": 0.7763446678484582,
|
|
"learning_rate": 1.0917350624798262e-06,
|
|
"loss": 0.1068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10101410001516342,
|
|
"step": 6025,
|
|
"valid_targets_mean": 1613.6,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 6.340694006309148,
|
|
"grad_norm": 0.9173311179547599,
|
|
"learning_rate": 1.07471172491439e-06,
|
|
"loss": 0.1118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10745055973529816,
|
|
"step": 6030,
|
|
"valid_targets_mean": 1279.4,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 6.345951629863301,
|
|
"grad_norm": 0.8344114280070869,
|
|
"learning_rate": 1.0578184891682408e-06,
|
|
"loss": 0.1047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09839329868555069,
|
|
"step": 6035,
|
|
"valid_targets_mean": 1321.2,
|
|
"valid_targets_min": 244
|
|
},
|
|
{
|
|
"epoch": 6.351209253417455,
|
|
"grad_norm": 0.9102659735172188,
|
|
"learning_rate": 1.041055471373864e-06,
|
|
"loss": 0.1097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10918129980564117,
|
|
"step": 6040,
|
|
"valid_targets_mean": 1293.1,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 6.356466876971609,
|
|
"grad_norm": 0.797875234386722,
|
|
"learning_rate": 1.0244227867685597e-06,
|
|
"loss": 0.1032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10231631249189377,
|
|
"step": 6045,
|
|
"valid_targets_mean": 1459.4,
|
|
"valid_targets_min": 585
|
|
},
|
|
{
|
|
"epoch": 6.361724500525763,
|
|
"grad_norm": 0.8982983030084953,
|
|
"learning_rate": 1.0079205496936484e-06,
|
|
"loss": 0.121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12712499499320984,
|
|
"step": 6050,
|
|
"valid_targets_mean": 1683.5,
|
|
"valid_targets_min": 1048
|
|
},
|
|
{
|
|
"epoch": 6.366982124079916,
|
|
"grad_norm": 0.735713484328951,
|
|
"learning_rate": 9.915488735936995e-07,
|
|
"loss": 0.1065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09809092432260513,
|
|
"step": 6055,
|
|
"valid_targets_mean": 1473.7,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 6.37223974763407,
|
|
"grad_norm": 0.8151161730500948,
|
|
"learning_rate": 9.753078710157316e-07,
|
|
"loss": 0.1102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09711694717407227,
|
|
"step": 6060,
|
|
"valid_targets_mean": 1436.2,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 6.377497371188223,
|
|
"grad_norm": 0.8749570747293455,
|
|
"learning_rate": 9.59197653608448e-07,
|
|
"loss": 0.1068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11560262739658356,
|
|
"step": 6065,
|
|
"valid_targets_mean": 1605.6,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 6.382754994742377,
|
|
"grad_norm": 0.8715454554432535,
|
|
"learning_rate": 9.432183321214805e-07,
|
|
"loss": 0.1024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10432256758213043,
|
|
"step": 6070,
|
|
"valid_targets_mean": 1340.1,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 6.38801261829653,
|
|
"grad_norm": 0.8525222943418735,
|
|
"learning_rate": 9.273700164046162e-07,
|
|
"loss": 0.11,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11443788558244705,
|
|
"step": 6075,
|
|
"valid_targets_mean": 1546.2,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 6.393270241850684,
|
|
"grad_norm": 0.7308095830875175,
|
|
"learning_rate": 9.11652815407027e-07,
|
|
"loss": 0.1019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1003701239824295,
|
|
"step": 6080,
|
|
"valid_targets_mean": 1788.0,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 6.398527865404837,
|
|
"grad_norm": 0.867952223512354,
|
|
"learning_rate": 8.960668371765569e-07,
|
|
"loss": 0.1033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10790401697158813,
|
|
"step": 6085,
|
|
"valid_targets_mean": 1405.9,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 6.403785488958991,
|
|
"grad_norm": 0.8806644704943195,
|
|
"learning_rate": 8.806121888589492e-07,
|
|
"loss": 0.1074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11704282462596893,
|
|
"step": 6090,
|
|
"valid_targets_mean": 1576.6,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 6.409043112513144,
|
|
"grad_norm": 0.8128337498310754,
|
|
"learning_rate": 8.652889766971229e-07,
|
|
"loss": 0.1148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1078612431883812,
|
|
"step": 6095,
|
|
"valid_targets_mean": 1605.6,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 6.414300736067298,
|
|
"grad_norm": 0.8091111187848541,
|
|
"learning_rate": 8.500973060304374e-07,
|
|
"loss": 0.107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10989522933959961,
|
|
"step": 6100,
|
|
"valid_targets_mean": 1734.6,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 6.419558359621451,
|
|
"grad_norm": 0.8677978121525696,
|
|
"learning_rate": 8.350372812939778e-07,
|
|
"loss": 0.1038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11552289873361588,
|
|
"step": 6105,
|
|
"valid_targets_mean": 1587.2,
|
|
"valid_targets_min": 1047
|
|
},
|
|
{
|
|
"epoch": 6.424815983175605,
|
|
"grad_norm": 0.8175987568356211,
|
|
"learning_rate": 8.201090060178174e-07,
|
|
"loss": 0.1047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1079326868057251,
|
|
"step": 6110,
|
|
"valid_targets_mean": 1558.6,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 6.430073606729758,
|
|
"grad_norm": 0.7713354444357884,
|
|
"learning_rate": 8.053125828263297e-07,
|
|
"loss": 0.0963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10174345970153809,
|
|
"step": 6115,
|
|
"valid_targets_mean": 1574.2,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 6.435331230283912,
|
|
"grad_norm": 0.8772480874069559,
|
|
"learning_rate": 7.906481134374688e-07,
|
|
"loss": 0.1071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10616987943649292,
|
|
"step": 6120,
|
|
"valid_targets_mean": 1458.4,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 6.440588853838065,
|
|
"grad_norm": 0.8957916481779873,
|
|
"learning_rate": 7.761156986620677e-07,
|
|
"loss": 0.1089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12607461214065552,
|
|
"step": 6125,
|
|
"valid_targets_mean": 1713.8,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 6.445846477392219,
|
|
"grad_norm": 0.7721506634303918,
|
|
"learning_rate": 7.617154384031545e-07,
|
|
"loss": 0.1066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10052677243947983,
|
|
"step": 6130,
|
|
"valid_targets_mean": 1463.6,
|
|
"valid_targets_min": 847
|
|
},
|
|
{
|
|
"epoch": 6.451104100946372,
|
|
"grad_norm": 0.7411463137773325,
|
|
"learning_rate": 7.474474316552638e-07,
|
|
"loss": 0.1074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1051454246044159,
|
|
"step": 6135,
|
|
"valid_targets_mean": 1666.8,
|
|
"valid_targets_min": 1280
|
|
},
|
|
{
|
|
"epoch": 6.456361724500526,
|
|
"grad_norm": 0.7981849402747365,
|
|
"learning_rate": 7.33311776503749e-07,
|
|
"loss": 0.1031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1067662388086319,
|
|
"step": 6140,
|
|
"valid_targets_mean": 1611.5,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 6.461619348054679,
|
|
"grad_norm": 0.7987623242866554,
|
|
"learning_rate": 7.193085701241175e-07,
|
|
"loss": 0.1066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11374513059854507,
|
|
"step": 6145,
|
|
"valid_targets_mean": 1812.6,
|
|
"valid_targets_min": 918
|
|
},
|
|
{
|
|
"epoch": 6.466876971608833,
|
|
"grad_norm": 0.5524972757850786,
|
|
"learning_rate": 7.054379087813568e-07,
|
|
"loss": 0.1009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07066299021244049,
|
|
"step": 6150,
|
|
"valid_targets_mean": 3127.1,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 6.472134595162986,
|
|
"grad_norm": 0.48501086183905745,
|
|
"learning_rate": 6.916998878292691e-07,
|
|
"loss": 0.0786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06848639249801636,
|
|
"step": 6155,
|
|
"valid_targets_mean": 3549.7,
|
|
"valid_targets_min": 1024
|
|
},
|
|
{
|
|
"epoch": 6.4773922187171395,
|
|
"grad_norm": 0.3794394367263926,
|
|
"learning_rate": 6.780946017098289e-07,
|
|
"loss": 0.0683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.047510214149951935,
|
|
"step": 6160,
|
|
"valid_targets_mean": 3607.6,
|
|
"valid_targets_min": 2777
|
|
},
|
|
{
|
|
"epoch": 6.482649842271293,
|
|
"grad_norm": 0.5550466030514543,
|
|
"learning_rate": 6.646221439525225e-07,
|
|
"loss": 0.0853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08951736986637115,
|
|
"step": 6165,
|
|
"valid_targets_mean": 3433.5,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 6.4879074658254465,
|
|
"grad_norm": 0.5207584835394102,
|
|
"learning_rate": 6.512826071737021e-07,
|
|
"loss": 0.0697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08131983876228333,
|
|
"step": 6170,
|
|
"valid_targets_mean": 2446.1,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 6.4931650893796,
|
|
"grad_norm": 0.544380953297463,
|
|
"learning_rate": 6.380760830759669e-07,
|
|
"loss": 0.0723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08120328187942505,
|
|
"step": 6175,
|
|
"valid_targets_mean": 2739.4,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 6.498422712933754,
|
|
"grad_norm": 0.8360085997505144,
|
|
"learning_rate": 6.250026624475092e-07,
|
|
"loss": 0.1223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1252402663230896,
|
|
"step": 6180,
|
|
"valid_targets_mean": 1537.3,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 6.503680336487907,
|
|
"grad_norm": 0.46518429024428354,
|
|
"learning_rate": 6.12062435161509e-07,
|
|
"loss": 0.0732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06999659538269043,
|
|
"step": 6185,
|
|
"valid_targets_mean": 3057.1,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 6.508937960042061,
|
|
"grad_norm": 0.5337598823318066,
|
|
"learning_rate": 5.992554901755121e-07,
|
|
"loss": 0.0869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10769539326429367,
|
|
"step": 6190,
|
|
"valid_targets_mean": 2232.5,
|
|
"valid_targets_min": 883
|
|
},
|
|
{
|
|
"epoch": 6.514195583596215,
|
|
"grad_norm": 0.3480269503418339,
|
|
"learning_rate": 5.865819155308039e-07,
|
|
"loss": 0.0938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04843660816550255,
|
|
"step": 6195,
|
|
"valid_targets_mean": 5158.4,
|
|
"valid_targets_min": 3285
|
|
},
|
|
{
|
|
"epoch": 6.519453207150368,
|
|
"grad_norm": 0.39725832400756,
|
|
"learning_rate": 5.740417983518253e-07,
|
|
"loss": 0.0675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05901825428009033,
|
|
"step": 6200,
|
|
"valid_targets_mean": 3384.3,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 6.524710830704522,
|
|
"grad_norm": 0.46447315886120744,
|
|
"learning_rate": 5.61635224845567e-07,
|
|
"loss": 0.0725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07760268449783325,
|
|
"step": 6205,
|
|
"valid_targets_mean": 3542.2,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 6.529968454258675,
|
|
"grad_norm": 0.5801427464493447,
|
|
"learning_rate": 5.493622803009602e-07,
|
|
"loss": 0.07,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07645406574010849,
|
|
"step": 6210,
|
|
"valid_targets_mean": 1996.5,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 6.535226077812829,
|
|
"grad_norm": 0.38317492793314245,
|
|
"learning_rate": 5.372230490883246e-07,
|
|
"loss": 0.105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056134678423404694,
|
|
"step": 6215,
|
|
"valid_targets_mean": 4156.0,
|
|
"valid_targets_min": 1961
|
|
},
|
|
{
|
|
"epoch": 6.540483701366982,
|
|
"grad_norm": 0.41486826474114835,
|
|
"learning_rate": 5.252176146587484e-07,
|
|
"loss": 0.0636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06410335004329681,
|
|
"step": 6220,
|
|
"valid_targets_mean": 3535.2,
|
|
"valid_targets_min": 1049
|
|
},
|
|
{
|
|
"epoch": 6.545741324921136,
|
|
"grad_norm": 0.38424545726777193,
|
|
"learning_rate": 5.133460595435447e-07,
|
|
"loss": 0.0734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05492096766829491,
|
|
"step": 6225,
|
|
"valid_targets_mean": 2985.7,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 6.550998948475289,
|
|
"grad_norm": 0.49335827474356153,
|
|
"learning_rate": 5.016084653536756e-07,
|
|
"loss": 0.0581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.061297088861465454,
|
|
"step": 6230,
|
|
"valid_targets_mean": 3023.9,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 6.556256572029443,
|
|
"grad_norm": 0.6635861323521509,
|
|
"learning_rate": 4.900049127791851e-07,
|
|
"loss": 0.0814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08327782154083252,
|
|
"step": 6235,
|
|
"valid_targets_mean": 1272.2,
|
|
"valid_targets_min": 515
|
|
},
|
|
{
|
|
"epoch": 6.561514195583596,
|
|
"grad_norm": 0.43734083333484386,
|
|
"learning_rate": 4.785354815886445e-07,
|
|
"loss": 0.0758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06597829610109329,
|
|
"step": 6240,
|
|
"valid_targets_mean": 2375.1,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 6.56677181913775,
|
|
"grad_norm": 0.49182093385310827,
|
|
"learning_rate": 4.6720025062862106e-07,
|
|
"loss": 0.0811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08136197924613953,
|
|
"step": 6245,
|
|
"valid_targets_mean": 3174.8,
|
|
"valid_targets_min": 1086
|
|
},
|
|
{
|
|
"epoch": 6.572029442691903,
|
|
"grad_norm": 0.5150382452934233,
|
|
"learning_rate": 4.559992978231087e-07,
|
|
"loss": 0.0812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0961027592420578,
|
|
"step": 6250,
|
|
"valid_targets_mean": 2903.8,
|
|
"valid_targets_min": 511
|
|
},
|
|
{
|
|
"epoch": 6.577287066246057,
|
|
"grad_norm": 0.47101295595598486,
|
|
"learning_rate": 4.4493270017301305e-07,
|
|
"loss": 0.1107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07581216841936111,
|
|
"step": 6255,
|
|
"valid_targets_mean": 3491.4,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 6.58254468980021,
|
|
"grad_norm": 0.4648965066149495,
|
|
"learning_rate": 4.340005337556186e-07,
|
|
"loss": 0.0637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06689706444740295,
|
|
"step": 6260,
|
|
"valid_targets_mean": 3222.4,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 6.587802313354364,
|
|
"grad_norm": 0.6564120309065031,
|
|
"learning_rate": 4.232028737240623e-07,
|
|
"loss": 0.0847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09197407960891724,
|
|
"step": 6265,
|
|
"valid_targets_mean": 1370.8,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 6.593059936908517,
|
|
"grad_norm": 0.6647659425431381,
|
|
"learning_rate": 4.125397943068099e-07,
|
|
"loss": 0.0913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09101127833127975,
|
|
"step": 6270,
|
|
"valid_targets_mean": 1732.1,
|
|
"valid_targets_min": 323
|
|
},
|
|
{
|
|
"epoch": 6.598317560462671,
|
|
"grad_norm": 0.4655433765952755,
|
|
"learning_rate": 4.0201136880716027e-07,
|
|
"loss": 0.0797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07048631459474564,
|
|
"step": 6275,
|
|
"valid_targets_mean": 3875.1,
|
|
"valid_targets_min": 2827
|
|
},
|
|
{
|
|
"epoch": 6.603575184016824,
|
|
"grad_norm": 0.578990966436419,
|
|
"learning_rate": 3.9161766960273517e-07,
|
|
"loss": 0.1396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23547233641147614,
|
|
"step": 6280,
|
|
"valid_targets_mean": 3032.8,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 6.608832807570978,
|
|
"grad_norm": 0.44296797004729804,
|
|
"learning_rate": 3.8135876814497927e-07,
|
|
"loss": 0.2018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0736391618847847,
|
|
"step": 6285,
|
|
"valid_targets_mean": 3728.1,
|
|
"valid_targets_min": 1932
|
|
},
|
|
{
|
|
"epoch": 6.614090431125131,
|
|
"grad_norm": 0.5345478654954723,
|
|
"learning_rate": 3.7123473495866314e-07,
|
|
"loss": 0.0825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0921144187450409,
|
|
"step": 6290,
|
|
"valid_targets_mean": 2361.8,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 6.619348054679285,
|
|
"grad_norm": 0.6712258843084848,
|
|
"learning_rate": 3.61245639641421e-07,
|
|
"loss": 0.0984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1883959174156189,
|
|
"step": 6295,
|
|
"valid_targets_mean": 2741.1,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 6.624605678233438,
|
|
"grad_norm": 0.36617906460561456,
|
|
"learning_rate": 3.513915508632448e-07,
|
|
"loss": 0.1422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.062396831810474396,
|
|
"step": 6300,
|
|
"valid_targets_mean": 4440.7,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 6.629863301787592,
|
|
"grad_norm": 0.5480528569445628,
|
|
"learning_rate": 3.4167253636602893e-07,
|
|
"loss": 0.0709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1156037449836731,
|
|
"step": 6305,
|
|
"valid_targets_mean": 4360.9,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 6.635120925341745,
|
|
"grad_norm": 0.45482490047304375,
|
|
"learning_rate": 3.3208866296310147e-07,
|
|
"loss": 0.0756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08658730238676071,
|
|
"step": 6310,
|
|
"valid_targets_mean": 4034.8,
|
|
"valid_targets_min": 513
|
|
},
|
|
{
|
|
"epoch": 6.6403785488958995,
|
|
"grad_norm": 0.33861109819356333,
|
|
"learning_rate": 3.2263999653876057e-07,
|
|
"loss": 0.0728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05370061844587326,
|
|
"step": 6315,
|
|
"valid_targets_mean": 3534.8,
|
|
"valid_targets_min": 970
|
|
},
|
|
{
|
|
"epoch": 6.645636172450052,
|
|
"grad_norm": 0.381047002476017,
|
|
"learning_rate": 3.133266020478254e-07,
|
|
"loss": 0.0672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06742705404758453,
|
|
"step": 6320,
|
|
"valid_targets_mean": 3806.2,
|
|
"valid_targets_min": 748
|
|
},
|
|
{
|
|
"epoch": 6.6508937960042065,
|
|
"grad_norm": 0.48814012056034195,
|
|
"learning_rate": 3.0414854351519476e-07,
|
|
"loss": 0.0665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07534006237983704,
|
|
"step": 6325,
|
|
"valid_targets_mean": 3536.1,
|
|
"valid_targets_min": 1873
|
|
},
|
|
{
|
|
"epoch": 6.65615141955836,
|
|
"grad_norm": 0.5135246496208872,
|
|
"learning_rate": 2.951058840353893e-07,
|
|
"loss": 0.0699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08683723211288452,
|
|
"step": 6330,
|
|
"valid_targets_mean": 2808.6,
|
|
"valid_targets_min": 1939
|
|
},
|
|
{
|
|
"epoch": 6.6614090431125135,
|
|
"grad_norm": 0.4131782280152961,
|
|
"learning_rate": 2.861986857721388e-07,
|
|
"loss": 0.0579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060513995587825775,
|
|
"step": 6335,
|
|
"valid_targets_mean": 2556.4,
|
|
"valid_targets_min": 519
|
|
},
|
|
{
|
|
"epoch": 6.666666666666667,
|
|
"grad_norm": 0.5623539902771256,
|
|
"learning_rate": 2.7742700995794457e-07,
|
|
"loss": 0.11,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09023596346378326,
|
|
"step": 6340,
|
|
"valid_targets_mean": 2305.1,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 6.6719242902208205,
|
|
"grad_norm": 0.3718507555452981,
|
|
"learning_rate": 2.687909168936509e-07,
|
|
"loss": 0.0524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052152276039123535,
|
|
"step": 6345,
|
|
"valid_targets_mean": 2936.2,
|
|
"valid_targets_min": 472
|
|
},
|
|
{
|
|
"epoch": 6.677181913774974,
|
|
"grad_norm": 0.3654923398679437,
|
|
"learning_rate": 2.6029046594805206e-07,
|
|
"loss": 0.0605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056927390396595,
|
|
"step": 6350,
|
|
"valid_targets_mean": 2775.9,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 6.682439537329127,
|
|
"grad_norm": 0.4102354641164959,
|
|
"learning_rate": 2.519257155574617e-07,
|
|
"loss": 0.0596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06437982618808746,
|
|
"step": 6355,
|
|
"valid_targets_mean": 3847.4,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 6.687697160883281,
|
|
"grad_norm": 0.48384970907690883,
|
|
"learning_rate": 2.436967232253218e-07,
|
|
"loss": 0.0738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1256387084722519,
|
|
"step": 6360,
|
|
"valid_targets_mean": 2382.8,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 6.692954784437434,
|
|
"grad_norm": 0.3651370754933973,
|
|
"learning_rate": 2.3560354552180976e-07,
|
|
"loss": 0.0895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05824588984251022,
|
|
"step": 6365,
|
|
"valid_targets_mean": 3367.9,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 6.698212407991588,
|
|
"grad_norm": 0.5849891621462965,
|
|
"learning_rate": 2.27646238083441e-07,
|
|
"loss": 0.0908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0712180882692337,
|
|
"step": 6370,
|
|
"valid_targets_mean": 1583.1,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 6.703470031545741,
|
|
"grad_norm": 0.6839730797579563,
|
|
"learning_rate": 2.1982485561269805e-07,
|
|
"loss": 0.075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09953617304563522,
|
|
"step": 6375,
|
|
"valid_targets_mean": 1594.0,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 6.708727655099895,
|
|
"grad_norm": 0.5295988879787382,
|
|
"learning_rate": 2.1213945187763764e-07,
|
|
"loss": 0.0837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08865515142679214,
|
|
"step": 6380,
|
|
"valid_targets_mean": 1982.2,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 6.713985278654048,
|
|
"grad_norm": 0.38690209052475394,
|
|
"learning_rate": 2.0459007971154632e-07,
|
|
"loss": 0.1014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0660017654299736,
|
|
"step": 6385,
|
|
"valid_targets_mean": 3892.2,
|
|
"valid_targets_min": 2440
|
|
},
|
|
{
|
|
"epoch": 6.719242902208202,
|
|
"grad_norm": 0.45309379568257596,
|
|
"learning_rate": 1.9717679101254549e-07,
|
|
"loss": 0.0736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06845031678676605,
|
|
"step": 6390,
|
|
"valid_targets_mean": 2770.6,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 6.724500525762355,
|
|
"grad_norm": 0.4955813216821868,
|
|
"learning_rate": 1.898996367432604e-07,
|
|
"loss": 0.09,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0703897699713707,
|
|
"step": 6395,
|
|
"valid_targets_mean": 3325.8,
|
|
"valid_targets_min": 755
|
|
},
|
|
{
|
|
"epoch": 6.729758149316509,
|
|
"grad_norm": 0.4469529556688231,
|
|
"learning_rate": 1.8275866693046263e-07,
|
|
"loss": 0.0751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06479282677173615,
|
|
"step": 6400,
|
|
"valid_targets_mean": 1951.9,
|
|
"valid_targets_min": 485
|
|
},
|
|
{
|
|
"epoch": 6.735015772870662,
|
|
"grad_norm": 0.45838941321833887,
|
|
"learning_rate": 1.7575393066471714e-07,
|
|
"loss": 0.0722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10326488316059113,
|
|
"step": 6405,
|
|
"valid_targets_mean": 3283.8,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 6.740273396424816,
|
|
"grad_norm": 0.34698247212854855,
|
|
"learning_rate": 1.6888547610005802e-07,
|
|
"loss": 0.0645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05749543383717537,
|
|
"step": 6410,
|
|
"valid_targets_mean": 4175.6,
|
|
"valid_targets_min": 848
|
|
},
|
|
{
|
|
"epoch": 6.745531019978969,
|
|
"grad_norm": 0.3779011861557256,
|
|
"learning_rate": 1.6215335045364656e-07,
|
|
"loss": 0.0631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.054585136473178864,
|
|
"step": 6415,
|
|
"valid_targets_mean": 3676.2,
|
|
"valid_targets_min": 884
|
|
},
|
|
{
|
|
"epoch": 6.750788643533123,
|
|
"grad_norm": 0.41789036578968497,
|
|
"learning_rate": 1.5555760000545595e-07,
|
|
"loss": 0.0673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06992784142494202,
|
|
"step": 6420,
|
|
"valid_targets_mean": 3627.8,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 6.756046267087276,
|
|
"grad_norm": 0.42404967047065883,
|
|
"learning_rate": 1.4909827009794486e-07,
|
|
"loss": 0.0792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0973619669675827,
|
|
"step": 6425,
|
|
"valid_targets_mean": 2844.2,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 6.76130389064143,
|
|
"grad_norm": 0.4336047857713409,
|
|
"learning_rate": 1.4277540513575328e-07,
|
|
"loss": 0.077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08207525312900543,
|
|
"step": 6430,
|
|
"valid_targets_mean": 3541.2,
|
|
"valid_targets_min": 1814
|
|
},
|
|
{
|
|
"epoch": 6.766561514195583,
|
|
"grad_norm": 0.5364441964738733,
|
|
"learning_rate": 1.3658904858538936e-07,
|
|
"loss": 0.0848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09877628087997437,
|
|
"step": 6435,
|
|
"valid_targets_mean": 2889.8,
|
|
"valid_targets_min": 931
|
|
},
|
|
{
|
|
"epoch": 6.771819137749737,
|
|
"grad_norm": 0.36625564062895793,
|
|
"learning_rate": 1.3053924297493858e-07,
|
|
"loss": 0.0595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06484304368495941,
|
|
"step": 6440,
|
|
"valid_targets_mean": 3737.8,
|
|
"valid_targets_min": 2695
|
|
},
|
|
{
|
|
"epoch": 6.77707676130389,
|
|
"grad_norm": 0.5412776019933817,
|
|
"learning_rate": 1.2462602989376404e-07,
|
|
"loss": 0.0726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08294028043746948,
|
|
"step": 6445,
|
|
"valid_targets_mean": 1918.2,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 6.782334384858045,
|
|
"grad_norm": 0.3984422160277871,
|
|
"learning_rate": 1.1884944999222658e-07,
|
|
"loss": 0.0709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05830984562635422,
|
|
"step": 6450,
|
|
"valid_targets_mean": 3263.6,
|
|
"valid_targets_min": 1584
|
|
},
|
|
{
|
|
"epoch": 6.787592008412197,
|
|
"grad_norm": 0.39046213687264053,
|
|
"learning_rate": 1.1320954298140063e-07,
|
|
"loss": 0.0574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0653320848941803,
|
|
"step": 6455,
|
|
"valid_targets_mean": 3247.5,
|
|
"valid_targets_min": 1003
|
|
},
|
|
{
|
|
"epoch": 6.792849631966352,
|
|
"grad_norm": 0.3886721526102773,
|
|
"learning_rate": 1.0770634763280552e-07,
|
|
"loss": 0.0715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06041385605931282,
|
|
"step": 6460,
|
|
"valid_targets_mean": 2887.5,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 6.798107255520505,
|
|
"grad_norm": 0.42114525909840816,
|
|
"learning_rate": 1.023399017781368e-07,
|
|
"loss": 0.0704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06531903892755508,
|
|
"step": 6465,
|
|
"valid_targets_mean": 3249.1,
|
|
"valid_targets_min": 1003
|
|
},
|
|
{
|
|
"epoch": 6.803364879074659,
|
|
"grad_norm": 0.4703782400079518,
|
|
"learning_rate": 9.711024230900423e-08,
|
|
"loss": 0.0828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06593647599220276,
|
|
"step": 6470,
|
|
"valid_targets_mean": 2108.2,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 6.808622502628812,
|
|
"grad_norm": 0.4250471242642898,
|
|
"learning_rate": 9.201740517668089e-08,
|
|
"loss": 0.0693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06148836016654968,
|
|
"step": 6475,
|
|
"valid_targets_mean": 2457.1,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 6.813880126182966,
|
|
"grad_norm": 0.5638516909636172,
|
|
"learning_rate": 8.706142539185447e-08,
|
|
"loss": 0.0767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07306583970785141,
|
|
"step": 6480,
|
|
"valid_targets_mean": 1751.3,
|
|
"valid_targets_min": 829
|
|
},
|
|
{
|
|
"epoch": 6.819137749737119,
|
|
"grad_norm": 0.4540823384194377,
|
|
"learning_rate": 8.224233702438966e-08,
|
|
"loss": 0.0744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07954973727464676,
|
|
"step": 6485,
|
|
"valid_targets_mean": 1958.6,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 6.8243953732912725,
|
|
"grad_norm": 0.4910919666792136,
|
|
"learning_rate": 7.756017320309283e-08,
|
|
"loss": 0.0775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07550451904535294,
|
|
"step": 6490,
|
|
"valid_targets_mean": 2192.6,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 6.829652996845426,
|
|
"grad_norm": 0.4206177656567329,
|
|
"learning_rate": 7.301496611547665e-08,
|
|
"loss": 0.0638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041690800338983536,
|
|
"step": 6495,
|
|
"valid_targets_mean": 3516.8,
|
|
"valid_targets_min": 1836
|
|
},
|
|
{
|
|
"epoch": 6.8349106203995795,
|
|
"grad_norm": 0.3773738904949249,
|
|
"learning_rate": 6.86067470075491e-08,
|
|
"loss": 0.0653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04687555879354477,
|
|
"step": 6500,
|
|
"valid_targets_mean": 3454.2,
|
|
"valid_targets_min": 2543
|
|
},
|
|
{
|
|
"epoch": 6.840168243953733,
|
|
"grad_norm": 0.4563756234236278,
|
|
"learning_rate": 6.433554618359816e-08,
|
|
"loss": 0.0896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07210962474346161,
|
|
"step": 6505,
|
|
"valid_targets_mean": 2119.8,
|
|
"valid_targets_min": 446
|
|
},
|
|
{
|
|
"epoch": 6.8454258675078865,
|
|
"grad_norm": 0.5475546212908606,
|
|
"learning_rate": 6.020139300597638e-08,
|
|
"loss": 0.061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060839422047138214,
|
|
"step": 6510,
|
|
"valid_targets_mean": 2944.4,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 6.85068349106204,
|
|
"grad_norm": 0.4804288102156721,
|
|
"learning_rate": 5.620431589490105e-08,
|
|
"loss": 0.0671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0812021791934967,
|
|
"step": 6515,
|
|
"valid_targets_mean": 3749.1,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 6.8559411146161935,
|
|
"grad_norm": 0.37197474506671335,
|
|
"learning_rate": 5.234434232826324e-08,
|
|
"loss": 0.0679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05977034568786621,
|
|
"step": 6520,
|
|
"valid_targets_mean": 3300.1,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 6.861198738170347,
|
|
"grad_norm": 0.34878996125982237,
|
|
"learning_rate": 4.862149884143907e-08,
|
|
"loss": 0.0568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05171991139650345,
|
|
"step": 6525,
|
|
"valid_targets_mean": 4140.2,
|
|
"valid_targets_min": 861
|
|
},
|
|
{
|
|
"epoch": 6.8664563617245005,
|
|
"grad_norm": 0.5125166627753399,
|
|
"learning_rate": 4.503581102709875e-08,
|
|
"loss": 0.0686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10298755764961243,
|
|
"step": 6530,
|
|
"valid_targets_mean": 3947.6,
|
|
"valid_targets_min": 2020
|
|
},
|
|
{
|
|
"epoch": 6.871713985278654,
|
|
"grad_norm": 0.43331284972761114,
|
|
"learning_rate": 4.1587303535040035e-08,
|
|
"loss": 0.0704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0699128806591034,
|
|
"step": 6535,
|
|
"valid_targets_mean": 3724.9,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 6.8769716088328074,
|
|
"grad_norm": 0.4372840859803025,
|
|
"learning_rate": 3.827600007201282e-08,
|
|
"loss": 0.0868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06751592457294464,
|
|
"step": 6540,
|
|
"valid_targets_mean": 3279.9,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 6.882229232386961,
|
|
"grad_norm": 0.5845756715897907,
|
|
"learning_rate": 3.510192340156149e-08,
|
|
"loss": 0.0898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18710404634475708,
|
|
"step": 6545,
|
|
"valid_targets_mean": 2807.4,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 6.887486855941114,
|
|
"grad_norm": 0.4566154404397145,
|
|
"learning_rate": 3.20650953438606e-08,
|
|
"loss": 0.0517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0638362467288971,
|
|
"step": 6550,
|
|
"valid_targets_mean": 2360.1,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 6.892744479495268,
|
|
"grad_norm": 0.4271135835525021,
|
|
"learning_rate": 2.9165536775574987e-08,
|
|
"loss": 0.0847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0660814568400383,
|
|
"step": 6555,
|
|
"valid_targets_mean": 3388.2,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 6.898002103049421,
|
|
"grad_norm": 0.4199480926290346,
|
|
"learning_rate": 2.6403267629706575e-08,
|
|
"loss": 0.0665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06848834455013275,
|
|
"step": 6560,
|
|
"valid_targets_mean": 3025.7,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 6.903259726603575,
|
|
"grad_norm": 0.38400761009100814,
|
|
"learning_rate": 2.3778306895467785e-08,
|
|
"loss": 0.067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05867878720164299,
|
|
"step": 6565,
|
|
"valid_targets_mean": 3426.3,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 6.908517350157728,
|
|
"grad_norm": 0.4830151526372453,
|
|
"learning_rate": 2.1290672618135e-08,
|
|
"loss": 0.0793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09053267538547516,
|
|
"step": 6570,
|
|
"valid_targets_mean": 2339.8,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 6.913774973711882,
|
|
"grad_norm": 0.490713299022986,
|
|
"learning_rate": 1.8940381898946424e-08,
|
|
"loss": 0.104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1143651232123375,
|
|
"step": 6575,
|
|
"valid_targets_mean": 2480.9,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 6.919032597266035,
|
|
"grad_norm": 0.41154086999628936,
|
|
"learning_rate": 1.6727450894959973e-08,
|
|
"loss": 0.0759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06120670586824417,
|
|
"step": 6580,
|
|
"valid_targets_mean": 2543.4,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 6.92429022082019,
|
|
"grad_norm": 0.523892654936522,
|
|
"learning_rate": 1.4651894818966671e-08,
|
|
"loss": 0.07,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07591065764427185,
|
|
"step": 6585,
|
|
"valid_targets_mean": 1684.0,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 6.929547844374342,
|
|
"grad_norm": 0.4250053502226287,
|
|
"learning_rate": 1.2713727939364096e-08,
|
|
"loss": 0.0661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06668030470609665,
|
|
"step": 6590,
|
|
"valid_targets_mean": 2971.1,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 6.934805467928497,
|
|
"grad_norm": 0.47442923744851695,
|
|
"learning_rate": 1.091296358007643e-08,
|
|
"loss": 0.0654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06776121258735657,
|
|
"step": 6595,
|
|
"valid_targets_mean": 2640.1,
|
|
"valid_targets_min": 832
|
|
},
|
|
{
|
|
"epoch": 6.94006309148265,
|
|
"grad_norm": 0.4650772531933131,
|
|
"learning_rate": 9.249614120450113e-09,
|
|
"loss": 0.0555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.061403002589941025,
|
|
"step": 6600,
|
|
"valid_targets_mean": 2104.2,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 6.945320715036804,
|
|
"grad_norm": 0.432011520330159,
|
|
"learning_rate": 7.723690995171673e-09,
|
|
"loss": 0.0812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058847442269325256,
|
|
"step": 6605,
|
|
"valid_targets_mean": 2595.4,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 6.950578338590957,
|
|
"grad_norm": 0.48972432746720485,
|
|
"learning_rate": 6.335204694196684e-09,
|
|
"loss": 0.0607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06145904213190079,
|
|
"step": 6610,
|
|
"valid_targets_mean": 2202.8,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 6.955835962145111,
|
|
"grad_norm": 0.37478262650741523,
|
|
"learning_rate": 5.084164762667598e-09,
|
|
"loss": 0.0632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05222868174314499,
|
|
"step": 6615,
|
|
"valid_targets_mean": 2810.8,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 6.961093585699264,
|
|
"grad_norm": 0.5353009120426401,
|
|
"learning_rate": 3.970579800853802e-09,
|
|
"loss": 0.0718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08522634208202362,
|
|
"step": 6620,
|
|
"valid_targets_mean": 3550.4,
|
|
"valid_targets_min": 2017
|
|
},
|
|
{
|
|
"epoch": 6.966351209253418,
|
|
"grad_norm": 0.3423937100860187,
|
|
"learning_rate": 2.9944574640894398e-09,
|
|
"loss": 0.0631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053340837359428406,
|
|
"step": 6625,
|
|
"valid_targets_mean": 4882.1,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 6.971608832807571,
|
|
"grad_norm": 0.38481907467412085,
|
|
"learning_rate": 2.1558044627267847e-09,
|
|
"loss": 0.0609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060645852237939835,
|
|
"step": 6630,
|
|
"valid_targets_mean": 3580.6,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 6.976866456361725,
|
|
"grad_norm": 0.3561829621876444,
|
|
"learning_rate": 1.4546265620785094e-09,
|
|
"loss": 0.0627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05833351984620094,
|
|
"step": 6635,
|
|
"valid_targets_mean": 3815.4,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 6.982124079915878,
|
|
"grad_norm": 0.7199776089504053,
|
|
"learning_rate": 8.909285823910374e-10,
|
|
"loss": 0.1401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20033405721187592,
|
|
"step": 6640,
|
|
"valid_targets_mean": 2739.1,
|
|
"valid_targets_min": 1473
|
|
},
|
|
{
|
|
"epoch": 6.987381703470032,
|
|
"grad_norm": 0.3796322592574655,
|
|
"learning_rate": 4.647143988067981e-10,
|
|
"loss": 0.0778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.062201619148254395,
|
|
"step": 6645,
|
|
"valid_targets_mean": 3168.9,
|
|
"valid_targets_min": 527
|
|
},
|
|
{
|
|
"epoch": 6.992639327024185,
|
|
"grad_norm": 0.3951104364519597,
|
|
"learning_rate": 1.7598694132869853e-10,
|
|
"loss": 0.0699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07217049598693848,
|
|
"step": 6650,
|
|
"valid_targets_mean": 2708.9,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 6.997896950578339,
|
|
"grad_norm": 0.48474520987982883,
|
|
"learning_rate": 2.474819481568247e-11,
|
|
"loss": 0.0692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11167128384113312,
|
|
"step": 6655,
|
|
"valid_targets_mean": 2495.1,
|
|
"valid_targets_min": 927
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05594291538000107,
|
|
"step": 6657,
|
|
"total_flos": 1429285307547648.0,
|
|
"train_loss": 0.14208543798229403,
|
|
"train_runtime": 24540.64,
|
|
"train_samples_per_second": 4.338,
|
|
"train_steps_per_second": 0.271,
|
|
"valid_targets_mean": 2843.0,
|
|
"valid_targets_min": 791
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 6657,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 200,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1429285307547648.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|