9904 lines
266 KiB
JSON
9904 lines
266 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4480,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0078125,
|
|
"grad_norm": 17.96756904824426,
|
|
"learning_rate": 3.5714285714285716e-07,
|
|
"loss": 0.8939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41230475902557373,
|
|
"step": 5,
|
|
"valid_targets_mean": 5005.6,
|
|
"valid_targets_min": 1698
|
|
},
|
|
{
|
|
"epoch": 0.015625,
|
|
"grad_norm": 19.549264193552855,
|
|
"learning_rate": 8.035714285714287e-07,
|
|
"loss": 0.8589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34911048412323,
|
|
"step": 10,
|
|
"valid_targets_mean": 2209.5,
|
|
"valid_targets_min": 513
|
|
},
|
|
{
|
|
"epoch": 0.0234375,
|
|
"grad_norm": 14.963073374442443,
|
|
"learning_rate": 1.25e-06,
|
|
"loss": 0.9059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4477040469646454,
|
|
"step": 15,
|
|
"valid_targets_mean": 4400.9,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 0.03125,
|
|
"grad_norm": 11.94374174666485,
|
|
"learning_rate": 1.6964285714285717e-06,
|
|
"loss": 0.8576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.552347719669342,
|
|
"step": 20,
|
|
"valid_targets_mean": 5440.4,
|
|
"valid_targets_min": 990
|
|
},
|
|
{
|
|
"epoch": 0.0390625,
|
|
"grad_norm": 9.210761726533649,
|
|
"learning_rate": 2.1428571428571427e-06,
|
|
"loss": 0.8242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4557860195636749,
|
|
"step": 25,
|
|
"valid_targets_mean": 4849.1,
|
|
"valid_targets_min": 2126
|
|
},
|
|
{
|
|
"epoch": 0.046875,
|
|
"grad_norm": 5.649029712497959,
|
|
"learning_rate": 2.5892857142857148e-06,
|
|
"loss": 0.7701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5368146300315857,
|
|
"step": 30,
|
|
"valid_targets_mean": 3446.0,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 0.0546875,
|
|
"grad_norm": 3.4022715505831047,
|
|
"learning_rate": 3.0357142857142856e-06,
|
|
"loss": 0.7537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3388116657733917,
|
|
"step": 35,
|
|
"valid_targets_mean": 2473.4,
|
|
"valid_targets_min": 631
|
|
},
|
|
{
|
|
"epoch": 0.0625,
|
|
"grad_norm": 2.382440940205184,
|
|
"learning_rate": 3.482142857142857e-06,
|
|
"loss": 0.7136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3536578416824341,
|
|
"step": 40,
|
|
"valid_targets_mean": 2207.2,
|
|
"valid_targets_min": 921
|
|
},
|
|
{
|
|
"epoch": 0.0703125,
|
|
"grad_norm": 1.7810928183074606,
|
|
"learning_rate": 3.928571428571429e-06,
|
|
"loss": 0.6872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2516762614250183,
|
|
"step": 45,
|
|
"valid_targets_mean": 2933.0,
|
|
"valid_targets_min": 1357
|
|
},
|
|
{
|
|
"epoch": 0.078125,
|
|
"grad_norm": 1.3440176553537058,
|
|
"learning_rate": 4.3750000000000005e-06,
|
|
"loss": 0.666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30899614095687866,
|
|
"step": 50,
|
|
"valid_targets_mean": 4440.1,
|
|
"valid_targets_min": 1710
|
|
},
|
|
{
|
|
"epoch": 0.0859375,
|
|
"grad_norm": 1.1491942330927796,
|
|
"learning_rate": 4.821428571428572e-06,
|
|
"loss": 0.6808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.367305189371109,
|
|
"step": 55,
|
|
"valid_targets_mean": 3518.9,
|
|
"valid_targets_min": 1633
|
|
},
|
|
{
|
|
"epoch": 0.09375,
|
|
"grad_norm": 0.6632664112758828,
|
|
"learning_rate": 5.267857142857144e-06,
|
|
"loss": 0.6359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3249462842941284,
|
|
"step": 60,
|
|
"valid_targets_mean": 7553.0,
|
|
"valid_targets_min": 2074
|
|
},
|
|
{
|
|
"epoch": 0.1015625,
|
|
"grad_norm": 0.939732060687424,
|
|
"learning_rate": 5.7142857142857145e-06,
|
|
"loss": 0.6653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23909389972686768,
|
|
"step": 65,
|
|
"valid_targets_mean": 2467.4,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 0.109375,
|
|
"grad_norm": 1.1934554447716919,
|
|
"learning_rate": 6.160714285714286e-06,
|
|
"loss": 0.5873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29758235812187195,
|
|
"step": 70,
|
|
"valid_targets_mean": 5148.9,
|
|
"valid_targets_min": 1297
|
|
},
|
|
{
|
|
"epoch": 0.1171875,
|
|
"grad_norm": 0.7889736660044546,
|
|
"learning_rate": 6.607142857142858e-06,
|
|
"loss": 0.624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2716752588748932,
|
|
"step": 75,
|
|
"valid_targets_mean": 3193.4,
|
|
"valid_targets_min": 921
|
|
},
|
|
{
|
|
"epoch": 0.125,
|
|
"grad_norm": 0.795209585514614,
|
|
"learning_rate": 7.053571428571429e-06,
|
|
"loss": 0.6065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3290722370147705,
|
|
"step": 80,
|
|
"valid_targets_mean": 3385.4,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 0.1328125,
|
|
"grad_norm": 0.7471873933851971,
|
|
"learning_rate": 7.500000000000001e-06,
|
|
"loss": 0.601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3724328875541687,
|
|
"step": 85,
|
|
"valid_targets_mean": 4007.1,
|
|
"valid_targets_min": 1817
|
|
},
|
|
{
|
|
"epoch": 0.140625,
|
|
"grad_norm": 0.8993710065257964,
|
|
"learning_rate": 7.946428571428571e-06,
|
|
"loss": 0.5805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3273639380931854,
|
|
"step": 90,
|
|
"valid_targets_mean": 2257.5,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 0.1484375,
|
|
"grad_norm": 0.6085301390610264,
|
|
"learning_rate": 8.392857142857144e-06,
|
|
"loss": 0.574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25863131880760193,
|
|
"step": 95,
|
|
"valid_targets_mean": 4760.6,
|
|
"valid_targets_min": 2056
|
|
},
|
|
{
|
|
"epoch": 0.15625,
|
|
"grad_norm": 0.6633413673717193,
|
|
"learning_rate": 8.839285714285714e-06,
|
|
"loss": 0.5394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17457596957683563,
|
|
"step": 100,
|
|
"valid_targets_mean": 4331.2,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 0.1640625,
|
|
"grad_norm": 0.6379715689447211,
|
|
"learning_rate": 9.285714285714288e-06,
|
|
"loss": 0.5411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23743145167827606,
|
|
"step": 105,
|
|
"valid_targets_mean": 3688.6,
|
|
"valid_targets_min": 1174
|
|
},
|
|
{
|
|
"epoch": 0.171875,
|
|
"grad_norm": 0.7058544900092659,
|
|
"learning_rate": 9.732142857142858e-06,
|
|
"loss": 0.5566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2953992187976837,
|
|
"step": 110,
|
|
"valid_targets_mean": 4303.8,
|
|
"valid_targets_min": 1583
|
|
},
|
|
{
|
|
"epoch": 0.1796875,
|
|
"grad_norm": 0.6014957009141635,
|
|
"learning_rate": 1.0178571428571429e-05,
|
|
"loss": 0.5367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16641384363174438,
|
|
"step": 115,
|
|
"valid_targets_mean": 3292.5,
|
|
"valid_targets_min": 2029
|
|
},
|
|
{
|
|
"epoch": 0.1875,
|
|
"grad_norm": 0.7818673075251512,
|
|
"learning_rate": 1.0625e-05,
|
|
"loss": 0.5607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29473888874053955,
|
|
"step": 120,
|
|
"valid_targets_mean": 3652.6,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 0.1953125,
|
|
"grad_norm": 0.7049342901237339,
|
|
"learning_rate": 1.1071428571428572e-05,
|
|
"loss": 0.5621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.302550345659256,
|
|
"step": 125,
|
|
"valid_targets_mean": 5160.9,
|
|
"valid_targets_min": 1949
|
|
},
|
|
{
|
|
"epoch": 0.203125,
|
|
"grad_norm": 0.733587341786186,
|
|
"learning_rate": 1.1517857142857142e-05,
|
|
"loss": 0.5355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24653804302215576,
|
|
"step": 130,
|
|
"valid_targets_mean": 3611.8,
|
|
"valid_targets_min": 1552
|
|
},
|
|
{
|
|
"epoch": 0.2109375,
|
|
"grad_norm": 0.6233505662423392,
|
|
"learning_rate": 1.1964285714285716e-05,
|
|
"loss": 0.4895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19609537720680237,
|
|
"step": 135,
|
|
"valid_targets_mean": 3779.8,
|
|
"valid_targets_min": 1278
|
|
},
|
|
{
|
|
"epoch": 0.21875,
|
|
"grad_norm": 0.6432261446906483,
|
|
"learning_rate": 1.2410714285714287e-05,
|
|
"loss": 0.526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.264431893825531,
|
|
"step": 140,
|
|
"valid_targets_mean": 5093.6,
|
|
"valid_targets_min": 2036
|
|
},
|
|
{
|
|
"epoch": 0.2265625,
|
|
"grad_norm": 0.711263949283809,
|
|
"learning_rate": 1.2857142857142859e-05,
|
|
"loss": 0.5398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24528563022613525,
|
|
"step": 145,
|
|
"valid_targets_mean": 3377.5,
|
|
"valid_targets_min": 1966
|
|
},
|
|
{
|
|
"epoch": 0.234375,
|
|
"grad_norm": 0.6146384552610212,
|
|
"learning_rate": 1.3303571428571429e-05,
|
|
"loss": 0.5483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2812318801879883,
|
|
"step": 150,
|
|
"valid_targets_mean": 4638.2,
|
|
"valid_targets_min": 660
|
|
},
|
|
{
|
|
"epoch": 0.2421875,
|
|
"grad_norm": 0.6291447812634194,
|
|
"learning_rate": 1.375e-05,
|
|
"loss": 0.5122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23729528486728668,
|
|
"step": 155,
|
|
"valid_targets_mean": 4104.2,
|
|
"valid_targets_min": 2021
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"grad_norm": 0.6846859685715388,
|
|
"learning_rate": 1.4196428571428574e-05,
|
|
"loss": 0.5073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27851545810699463,
|
|
"step": 160,
|
|
"valid_targets_mean": 3912.9,
|
|
"valid_targets_min": 1012
|
|
},
|
|
{
|
|
"epoch": 0.2578125,
|
|
"grad_norm": 0.8525319213558747,
|
|
"learning_rate": 1.4642857142857144e-05,
|
|
"loss": 0.5146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2541823983192444,
|
|
"step": 165,
|
|
"valid_targets_mean": 2239.1,
|
|
"valid_targets_min": 1082
|
|
},
|
|
{
|
|
"epoch": 0.265625,
|
|
"grad_norm": 0.7037300480504363,
|
|
"learning_rate": 1.5089285714285715e-05,
|
|
"loss": 0.5065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14099031686782837,
|
|
"step": 170,
|
|
"valid_targets_mean": 2020.1,
|
|
"valid_targets_min": 898
|
|
},
|
|
{
|
|
"epoch": 0.2734375,
|
|
"grad_norm": 0.543966931990601,
|
|
"learning_rate": 1.553571428571429e-05,
|
|
"loss": 0.5036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30978795886039734,
|
|
"step": 175,
|
|
"valid_targets_mean": 7194.6,
|
|
"valid_targets_min": 1477
|
|
},
|
|
{
|
|
"epoch": 0.28125,
|
|
"grad_norm": 0.7488069350286215,
|
|
"learning_rate": 1.598214285714286e-05,
|
|
"loss": 0.4979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1593264788389206,
|
|
"step": 180,
|
|
"valid_targets_mean": 2323.5,
|
|
"valid_targets_min": 1255
|
|
},
|
|
{
|
|
"epoch": 0.2890625,
|
|
"grad_norm": 0.7556804619768549,
|
|
"learning_rate": 1.642857142857143e-05,
|
|
"loss": 0.4827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25917303562164307,
|
|
"step": 185,
|
|
"valid_targets_mean": 3841.2,
|
|
"valid_targets_min": 1324
|
|
},
|
|
{
|
|
"epoch": 0.296875,
|
|
"grad_norm": 0.6484295767787353,
|
|
"learning_rate": 1.6875e-05,
|
|
"loss": 0.4991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11816577613353729,
|
|
"step": 190,
|
|
"valid_targets_mean": 2090.5,
|
|
"valid_targets_min": 1008
|
|
},
|
|
{
|
|
"epoch": 0.3046875,
|
|
"grad_norm": 0.9675521420077724,
|
|
"learning_rate": 1.7321428571428572e-05,
|
|
"loss": 0.4807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2871496081352234,
|
|
"step": 195,
|
|
"valid_targets_mean": 3262.0,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 0.3125,
|
|
"grad_norm": 0.8868906874285698,
|
|
"learning_rate": 1.7767857142857143e-05,
|
|
"loss": 0.4817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3107619285583496,
|
|
"step": 200,
|
|
"valid_targets_mean": 4601.6,
|
|
"valid_targets_min": 755
|
|
},
|
|
{
|
|
"epoch": 0.3203125,
|
|
"grad_norm": 0.6676153314740336,
|
|
"learning_rate": 1.8214285714285715e-05,
|
|
"loss": 0.4896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26793426275253296,
|
|
"step": 205,
|
|
"valid_targets_mean": 4669.9,
|
|
"valid_targets_min": 1298
|
|
},
|
|
{
|
|
"epoch": 0.328125,
|
|
"grad_norm": 0.7051206753355692,
|
|
"learning_rate": 1.8660714285714287e-05,
|
|
"loss": 0.4867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2229864001274109,
|
|
"step": 210,
|
|
"valid_targets_mean": 3401.6,
|
|
"valid_targets_min": 1495
|
|
},
|
|
{
|
|
"epoch": 0.3359375,
|
|
"grad_norm": 0.7015483205055708,
|
|
"learning_rate": 1.910714285714286e-05,
|
|
"loss": 0.4731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18718403577804565,
|
|
"step": 215,
|
|
"valid_targets_mean": 2801.8,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 0.34375,
|
|
"grad_norm": 0.808895836105466,
|
|
"learning_rate": 1.955357142857143e-05,
|
|
"loss": 0.4676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.198102667927742,
|
|
"step": 220,
|
|
"valid_targets_mean": 2671.9,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 0.3515625,
|
|
"grad_norm": 0.7875629099119407,
|
|
"learning_rate": 2e-05,
|
|
"loss": 0.4866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31548118591308594,
|
|
"step": 225,
|
|
"valid_targets_mean": 4708.9,
|
|
"valid_targets_min": 992
|
|
},
|
|
{
|
|
"epoch": 0.359375,
|
|
"grad_norm": 0.7425543349386009,
|
|
"learning_rate": 2.0446428571428573e-05,
|
|
"loss": 0.4732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19166424870491028,
|
|
"step": 230,
|
|
"valid_targets_mean": 2994.0,
|
|
"valid_targets_min": 978
|
|
},
|
|
{
|
|
"epoch": 0.3671875,
|
|
"grad_norm": 0.7859185327269593,
|
|
"learning_rate": 2.0892857142857145e-05,
|
|
"loss": 0.476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34309425950050354,
|
|
"step": 235,
|
|
"valid_targets_mean": 4568.6,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 0.375,
|
|
"grad_norm": 0.6214503259557023,
|
|
"learning_rate": 2.1339285714285717e-05,
|
|
"loss": 0.4711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16691872477531433,
|
|
"step": 240,
|
|
"valid_targets_mean": 3472.2,
|
|
"valid_targets_min": 1426
|
|
},
|
|
{
|
|
"epoch": 0.3828125,
|
|
"grad_norm": 0.7466520389969196,
|
|
"learning_rate": 2.1785714285714285e-05,
|
|
"loss": 0.4583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22022241353988647,
|
|
"step": 245,
|
|
"valid_targets_mean": 2929.8,
|
|
"valid_targets_min": 500
|
|
},
|
|
{
|
|
"epoch": 0.390625,
|
|
"grad_norm": 0.7808001134788266,
|
|
"learning_rate": 2.2232142857142856e-05,
|
|
"loss": 0.4772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2855392396450043,
|
|
"step": 250,
|
|
"valid_targets_mean": 4683.8,
|
|
"valid_targets_min": 2232
|
|
},
|
|
{
|
|
"epoch": 0.3984375,
|
|
"grad_norm": 0.6664889538677775,
|
|
"learning_rate": 2.267857142857143e-05,
|
|
"loss": 0.4678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18999344110488892,
|
|
"step": 255,
|
|
"valid_targets_mean": 3088.6,
|
|
"valid_targets_min": 1187
|
|
},
|
|
{
|
|
"epoch": 0.40625,
|
|
"grad_norm": 0.685738642684446,
|
|
"learning_rate": 2.3125000000000003e-05,
|
|
"loss": 0.4624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2763436734676361,
|
|
"step": 260,
|
|
"valid_targets_mean": 3795.0,
|
|
"valid_targets_min": 1065
|
|
},
|
|
{
|
|
"epoch": 0.4140625,
|
|
"grad_norm": 0.8839960358985133,
|
|
"learning_rate": 2.3571428571428575e-05,
|
|
"loss": 0.451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3162103295326233,
|
|
"step": 265,
|
|
"valid_targets_mean": 3780.5,
|
|
"valid_targets_min": 1626
|
|
},
|
|
{
|
|
"epoch": 0.421875,
|
|
"grad_norm": 0.796864784177994,
|
|
"learning_rate": 2.4017857142857146e-05,
|
|
"loss": 0.4634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18539923429489136,
|
|
"step": 270,
|
|
"valid_targets_mean": 3097.8,
|
|
"valid_targets_min": 1336
|
|
},
|
|
{
|
|
"epoch": 0.4296875,
|
|
"grad_norm": 0.6887553683985818,
|
|
"learning_rate": 2.4464285714285718e-05,
|
|
"loss": 0.4865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20794397592544556,
|
|
"step": 275,
|
|
"valid_targets_mean": 3608.9,
|
|
"valid_targets_min": 576
|
|
},
|
|
{
|
|
"epoch": 0.4375,
|
|
"grad_norm": 0.7962626441708563,
|
|
"learning_rate": 2.4910714285714286e-05,
|
|
"loss": 0.4735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2610498070716858,
|
|
"step": 280,
|
|
"valid_targets_mean": 3725.6,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 0.4453125,
|
|
"grad_norm": 0.697185907933457,
|
|
"learning_rate": 2.5357142857142858e-05,
|
|
"loss": 0.4731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22635391354560852,
|
|
"step": 285,
|
|
"valid_targets_mean": 4185.6,
|
|
"valid_targets_min": 1970
|
|
},
|
|
{
|
|
"epoch": 0.453125,
|
|
"grad_norm": 0.6677410018360095,
|
|
"learning_rate": 2.580357142857143e-05,
|
|
"loss": 0.4635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26091107726097107,
|
|
"step": 290,
|
|
"valid_targets_mean": 4815.5,
|
|
"valid_targets_min": 1761
|
|
},
|
|
{
|
|
"epoch": 0.4609375,
|
|
"grad_norm": 0.7107805199263176,
|
|
"learning_rate": 2.625e-05,
|
|
"loss": 0.4665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2518097162246704,
|
|
"step": 295,
|
|
"valid_targets_mean": 3553.9,
|
|
"valid_targets_min": 920
|
|
},
|
|
{
|
|
"epoch": 0.46875,
|
|
"grad_norm": 0.83107876341815,
|
|
"learning_rate": 2.6696428571428573e-05,
|
|
"loss": 0.4542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23311705887317657,
|
|
"step": 300,
|
|
"valid_targets_mean": 3395.8,
|
|
"valid_targets_min": 1640
|
|
},
|
|
{
|
|
"epoch": 0.4765625,
|
|
"grad_norm": 0.8062208342199118,
|
|
"learning_rate": 2.7142857142857148e-05,
|
|
"loss": 0.4336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21704252064228058,
|
|
"step": 305,
|
|
"valid_targets_mean": 3245.5,
|
|
"valid_targets_min": 745
|
|
},
|
|
{
|
|
"epoch": 0.484375,
|
|
"grad_norm": 0.7027737842230039,
|
|
"learning_rate": 2.758928571428572e-05,
|
|
"loss": 0.4343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1845030039548874,
|
|
"step": 310,
|
|
"valid_targets_mean": 3798.1,
|
|
"valid_targets_min": 1015
|
|
},
|
|
{
|
|
"epoch": 0.4921875,
|
|
"grad_norm": 0.7657183197869262,
|
|
"learning_rate": 2.8035714285714288e-05,
|
|
"loss": 0.4436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21143358945846558,
|
|
"step": 315,
|
|
"valid_targets_mean": 3946.0,
|
|
"valid_targets_min": 1508
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"grad_norm": 0.5931801718232698,
|
|
"learning_rate": 2.848214285714286e-05,
|
|
"loss": 0.4465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2661380469799042,
|
|
"step": 320,
|
|
"valid_targets_mean": 6625.1,
|
|
"valid_targets_min": 1847
|
|
},
|
|
{
|
|
"epoch": 0.5078125,
|
|
"grad_norm": 0.743931649785124,
|
|
"learning_rate": 2.892857142857143e-05,
|
|
"loss": 0.4067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2271675020456314,
|
|
"step": 325,
|
|
"valid_targets_mean": 4289.2,
|
|
"valid_targets_min": 1165
|
|
},
|
|
{
|
|
"epoch": 0.515625,
|
|
"grad_norm": 0.6749559813522829,
|
|
"learning_rate": 2.9375000000000003e-05,
|
|
"loss": 0.4729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10594234615564346,
|
|
"step": 330,
|
|
"valid_targets_mean": 2080.6,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 0.5234375,
|
|
"grad_norm": 0.8402508663688718,
|
|
"learning_rate": 2.9821428571428574e-05,
|
|
"loss": 0.4697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1548396646976471,
|
|
"step": 335,
|
|
"valid_targets_mean": 2019.6,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 0.53125,
|
|
"grad_norm": 0.7297904766690302,
|
|
"learning_rate": 3.0267857142857146e-05,
|
|
"loss": 0.4394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20972487330436707,
|
|
"step": 340,
|
|
"valid_targets_mean": 2635.0,
|
|
"valid_targets_min": 1044
|
|
},
|
|
{
|
|
"epoch": 0.5390625,
|
|
"grad_norm": 0.8819172140579167,
|
|
"learning_rate": 3.071428571428572e-05,
|
|
"loss": 0.4165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.170149028301239,
|
|
"step": 345,
|
|
"valid_targets_mean": 1902.0,
|
|
"valid_targets_min": 1441
|
|
},
|
|
{
|
|
"epoch": 0.546875,
|
|
"grad_norm": 0.6739024637165539,
|
|
"learning_rate": 3.116071428571429e-05,
|
|
"loss": 0.418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14502549171447754,
|
|
"step": 350,
|
|
"valid_targets_mean": 2743.8,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 0.5546875,
|
|
"grad_norm": 0.6661634715932028,
|
|
"learning_rate": 3.160714285714286e-05,
|
|
"loss": 0.44,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20062237977981567,
|
|
"step": 355,
|
|
"valid_targets_mean": 4894.2,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 0.5625,
|
|
"grad_norm": 0.647978730900593,
|
|
"learning_rate": 3.205357142857143e-05,
|
|
"loss": 0.4145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20155119895935059,
|
|
"step": 360,
|
|
"valid_targets_mean": 3887.8,
|
|
"valid_targets_min": 1174
|
|
},
|
|
{
|
|
"epoch": 0.5703125,
|
|
"grad_norm": 0.7015412746404979,
|
|
"learning_rate": 3.2500000000000004e-05,
|
|
"loss": 0.4128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15700119733810425,
|
|
"step": 365,
|
|
"valid_targets_mean": 2921.5,
|
|
"valid_targets_min": 1635
|
|
},
|
|
{
|
|
"epoch": 0.578125,
|
|
"grad_norm": 0.7486578718525809,
|
|
"learning_rate": 3.2946428571428576e-05,
|
|
"loss": 0.428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20678569376468658,
|
|
"step": 370,
|
|
"valid_targets_mean": 4323.1,
|
|
"valid_targets_min": 1448
|
|
},
|
|
{
|
|
"epoch": 0.5859375,
|
|
"grad_norm": 0.838169154202404,
|
|
"learning_rate": 3.339285714285715e-05,
|
|
"loss": 0.4194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29163122177124023,
|
|
"step": 375,
|
|
"valid_targets_mean": 3624.4,
|
|
"valid_targets_min": 1269
|
|
},
|
|
{
|
|
"epoch": 0.59375,
|
|
"grad_norm": 0.9387879423155059,
|
|
"learning_rate": 3.383928571428572e-05,
|
|
"loss": 0.4047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18844100832939148,
|
|
"step": 380,
|
|
"valid_targets_mean": 2875.9,
|
|
"valid_targets_min": 1960
|
|
},
|
|
{
|
|
"epoch": 0.6015625,
|
|
"grad_norm": 1.1173013169968338,
|
|
"learning_rate": 3.4285714285714284e-05,
|
|
"loss": 0.419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3231787085533142,
|
|
"step": 385,
|
|
"valid_targets_mean": 4131.2,
|
|
"valid_targets_min": 1037
|
|
},
|
|
{
|
|
"epoch": 0.609375,
|
|
"grad_norm": 0.7518418854036697,
|
|
"learning_rate": 3.473214285714286e-05,
|
|
"loss": 0.4177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24728673696517944,
|
|
"step": 390,
|
|
"valid_targets_mean": 4212.0,
|
|
"valid_targets_min": 1140
|
|
},
|
|
{
|
|
"epoch": 0.6171875,
|
|
"grad_norm": 0.6877153784997025,
|
|
"learning_rate": 3.5178571428571434e-05,
|
|
"loss": 0.437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2569195628166199,
|
|
"step": 395,
|
|
"valid_targets_mean": 4949.9,
|
|
"valid_targets_min": 1180
|
|
},
|
|
{
|
|
"epoch": 0.625,
|
|
"grad_norm": 0.8030409995296262,
|
|
"learning_rate": 3.5625000000000005e-05,
|
|
"loss": 0.4528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23973846435546875,
|
|
"step": 400,
|
|
"valid_targets_mean": 3094.4,
|
|
"valid_targets_min": 1112
|
|
},
|
|
{
|
|
"epoch": 0.6328125,
|
|
"grad_norm": 0.698272388936546,
|
|
"learning_rate": 3.607142857142858e-05,
|
|
"loss": 0.4697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3204219937324524,
|
|
"step": 405,
|
|
"valid_targets_mean": 4685.0,
|
|
"valid_targets_min": 1592
|
|
},
|
|
{
|
|
"epoch": 0.640625,
|
|
"grad_norm": 0.8224849866322584,
|
|
"learning_rate": 3.651785714285715e-05,
|
|
"loss": 0.4404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2267025113105774,
|
|
"step": 410,
|
|
"valid_targets_mean": 2413.8,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 0.6484375,
|
|
"grad_norm": 0.8433450197564629,
|
|
"learning_rate": 3.696428571428572e-05,
|
|
"loss": 0.4484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20454196631908417,
|
|
"step": 415,
|
|
"valid_targets_mean": 2724.8,
|
|
"valid_targets_min": 1546
|
|
},
|
|
{
|
|
"epoch": 0.65625,
|
|
"grad_norm": 0.8919819279342223,
|
|
"learning_rate": 3.7410714285714285e-05,
|
|
"loss": 0.4154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2260209321975708,
|
|
"step": 420,
|
|
"valid_targets_mean": 3473.6,
|
|
"valid_targets_min": 1153
|
|
},
|
|
{
|
|
"epoch": 0.6640625,
|
|
"grad_norm": 0.7616809398711054,
|
|
"learning_rate": 3.785714285714286e-05,
|
|
"loss": 0.4248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16338583827018738,
|
|
"step": 425,
|
|
"valid_targets_mean": 2196.5,
|
|
"valid_targets_min": 1296
|
|
},
|
|
{
|
|
"epoch": 0.671875,
|
|
"grad_norm": 0.8722023426774858,
|
|
"learning_rate": 3.830357142857143e-05,
|
|
"loss": 0.4208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14858098328113556,
|
|
"step": 430,
|
|
"valid_targets_mean": 2638.6,
|
|
"valid_targets_min": 711
|
|
},
|
|
{
|
|
"epoch": 0.6796875,
|
|
"grad_norm": 0.6600840964820062,
|
|
"learning_rate": 3.875e-05,
|
|
"loss": 0.3991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1440969556570053,
|
|
"step": 435,
|
|
"valid_targets_mean": 3230.9,
|
|
"valid_targets_min": 1101
|
|
},
|
|
{
|
|
"epoch": 0.6875,
|
|
"grad_norm": 1.4062614972920673,
|
|
"learning_rate": 3.919642857142858e-05,
|
|
"loss": 0.4058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1737440526485443,
|
|
"step": 440,
|
|
"valid_targets_mean": 2463.6,
|
|
"valid_targets_min": 1567
|
|
},
|
|
{
|
|
"epoch": 0.6953125,
|
|
"grad_norm": 0.6429151903032931,
|
|
"learning_rate": 3.964285714285715e-05,
|
|
"loss": 0.3939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20987603068351746,
|
|
"step": 445,
|
|
"valid_targets_mean": 4365.1,
|
|
"valid_targets_min": 1864
|
|
},
|
|
{
|
|
"epoch": 0.703125,
|
|
"grad_norm": 0.761943663872867,
|
|
"learning_rate": 3.9999993929021756e-05,
|
|
"loss": 0.4122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21558108925819397,
|
|
"step": 450,
|
|
"valid_targets_mean": 2933.1,
|
|
"valid_targets_min": 1145
|
|
},
|
|
{
|
|
"epoch": 0.7109375,
|
|
"grad_norm": 0.7384721961667817,
|
|
"learning_rate": 3.999978144517021e-05,
|
|
"loss": 0.3934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18356360495090485,
|
|
"step": 455,
|
|
"valid_targets_mean": 2846.9,
|
|
"valid_targets_min": 1223
|
|
},
|
|
{
|
|
"epoch": 0.71875,
|
|
"grad_norm": 0.7662994267458438,
|
|
"learning_rate": 3.999926541609213e-05,
|
|
"loss": 0.4323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26505139470100403,
|
|
"step": 460,
|
|
"valid_targets_mean": 3600.2,
|
|
"valid_targets_min": 886
|
|
},
|
|
{
|
|
"epoch": 0.7265625,
|
|
"grad_norm": 0.6784337783120244,
|
|
"learning_rate": 3.99984458496195e-05,
|
|
"loss": 0.4008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14435040950775146,
|
|
"step": 465,
|
|
"valid_targets_mean": 3417.6,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 0.734375,
|
|
"grad_norm": 0.6899798162410619,
|
|
"learning_rate": 3.9997322758191244e-05,
|
|
"loss": 0.4401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16640058159828186,
|
|
"step": 470,
|
|
"valid_targets_mean": 2475.9,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 0.7421875,
|
|
"grad_norm": 0.7598107876667082,
|
|
"learning_rate": 3.999589615885298e-05,
|
|
"loss": 0.4138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1500675082206726,
|
|
"step": 475,
|
|
"valid_targets_mean": 2404.8,
|
|
"valid_targets_min": 1309
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"grad_norm": 0.5991250672122442,
|
|
"learning_rate": 3.999416607325684e-05,
|
|
"loss": 0.3915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19600877165794373,
|
|
"step": 480,
|
|
"valid_targets_mean": 4544.0,
|
|
"valid_targets_min": 1877
|
|
},
|
|
{
|
|
"epoch": 0.7578125,
|
|
"grad_norm": 0.8350151469563925,
|
|
"learning_rate": 3.9992132527661065e-05,
|
|
"loss": 0.41,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2139078974723816,
|
|
"step": 485,
|
|
"valid_targets_mean": 2748.1,
|
|
"valid_targets_min": 1036
|
|
},
|
|
{
|
|
"epoch": 0.765625,
|
|
"grad_norm": 0.742724916364065,
|
|
"learning_rate": 3.998979555292963e-05,
|
|
"loss": 0.3979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14377310872077942,
|
|
"step": 490,
|
|
"valid_targets_mean": 1976.2,
|
|
"valid_targets_min": 847
|
|
},
|
|
{
|
|
"epoch": 0.7734375,
|
|
"grad_norm": 0.6032888980372508,
|
|
"learning_rate": 3.998715518453182e-05,
|
|
"loss": 0.4087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1352177858352661,
|
|
"step": 495,
|
|
"valid_targets_mean": 3793.0,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 0.78125,
|
|
"grad_norm": 0.5312387034154019,
|
|
"learning_rate": 3.998421146254162e-05,
|
|
"loss": 0.4116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19723546504974365,
|
|
"step": 500,
|
|
"valid_targets_mean": 5989.4,
|
|
"valid_targets_min": 2127
|
|
},
|
|
{
|
|
"epoch": 0.7890625,
|
|
"grad_norm": 0.6640317905800676,
|
|
"learning_rate": 3.998096443163716e-05,
|
|
"loss": 0.3916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19736121594905853,
|
|
"step": 505,
|
|
"valid_targets_mean": 4948.1,
|
|
"valid_targets_min": 1223
|
|
},
|
|
{
|
|
"epoch": 0.796875,
|
|
"grad_norm": 0.7263525856000522,
|
|
"learning_rate": 3.9977414141100015e-05,
|
|
"loss": 0.4007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16192728281021118,
|
|
"step": 510,
|
|
"valid_targets_mean": 2475.1,
|
|
"valid_targets_min": 1589
|
|
},
|
|
{
|
|
"epoch": 0.8046875,
|
|
"grad_norm": 0.6542177511338231,
|
|
"learning_rate": 3.997356064481446e-05,
|
|
"loss": 0.4148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19146600365638733,
|
|
"step": 515,
|
|
"valid_targets_mean": 4269.4,
|
|
"valid_targets_min": 1738
|
|
},
|
|
{
|
|
"epoch": 0.8125,
|
|
"grad_norm": 0.7792448501349808,
|
|
"learning_rate": 3.996940400126666e-05,
|
|
"loss": 0.412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2324947714805603,
|
|
"step": 520,
|
|
"valid_targets_mean": 3206.6,
|
|
"valid_targets_min": 793
|
|
},
|
|
{
|
|
"epoch": 0.8203125,
|
|
"grad_norm": 0.6795491323084827,
|
|
"learning_rate": 3.996494427354376e-05,
|
|
"loss": 0.4455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25609952211380005,
|
|
"step": 525,
|
|
"valid_targets_mean": 4190.6,
|
|
"valid_targets_min": 1714
|
|
},
|
|
{
|
|
"epoch": 0.828125,
|
|
"grad_norm": 0.6358929941341841,
|
|
"learning_rate": 3.996018152933297e-05,
|
|
"loss": 0.4094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17612898349761963,
|
|
"step": 530,
|
|
"valid_targets_mean": 3364.1,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 0.8359375,
|
|
"grad_norm": 0.6478724535554868,
|
|
"learning_rate": 3.9955115840920474e-05,
|
|
"loss": 0.3831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15491518378257751,
|
|
"step": 535,
|
|
"valid_targets_mean": 3165.9,
|
|
"valid_targets_min": 1268
|
|
},
|
|
{
|
|
"epoch": 0.84375,
|
|
"grad_norm": 0.6240299900223509,
|
|
"learning_rate": 3.99497472851904e-05,
|
|
"loss": 0.4025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21161368489265442,
|
|
"step": 540,
|
|
"valid_targets_mean": 4849.1,
|
|
"valid_targets_min": 2212
|
|
},
|
|
{
|
|
"epoch": 0.8515625,
|
|
"grad_norm": 0.8887151067828934,
|
|
"learning_rate": 3.9944075943623605e-05,
|
|
"loss": 0.4133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12745846807956696,
|
|
"step": 545,
|
|
"valid_targets_mean": 2297.4,
|
|
"valid_targets_min": 932
|
|
},
|
|
{
|
|
"epoch": 0.859375,
|
|
"grad_norm": 0.6968349823968902,
|
|
"learning_rate": 3.9938101902296466e-05,
|
|
"loss": 0.4145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19802552461624146,
|
|
"step": 550,
|
|
"valid_targets_mean": 3379.8,
|
|
"valid_targets_min": 1434
|
|
},
|
|
{
|
|
"epoch": 0.8671875,
|
|
"grad_norm": 0.6483466284323246,
|
|
"learning_rate": 3.993182525187956e-05,
|
|
"loss": 0.3995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16744013130664825,
|
|
"step": 555,
|
|
"valid_targets_mean": 2976.2,
|
|
"valid_targets_min": 1174
|
|
},
|
|
{
|
|
"epoch": 0.875,
|
|
"grad_norm": 0.7088204134833905,
|
|
"learning_rate": 3.99252460876363e-05,
|
|
"loss": 0.3867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16968658566474915,
|
|
"step": 560,
|
|
"valid_targets_mean": 3568.8,
|
|
"valid_targets_min": 1716
|
|
},
|
|
{
|
|
"epoch": 0.8828125,
|
|
"grad_norm": 0.8109534418593113,
|
|
"learning_rate": 3.991836450942146e-05,
|
|
"loss": 0.4176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2757621109485626,
|
|
"step": 565,
|
|
"valid_targets_mean": 3308.6,
|
|
"valid_targets_min": 2094
|
|
},
|
|
{
|
|
"epoch": 0.890625,
|
|
"grad_norm": 0.7782148542395121,
|
|
"learning_rate": 3.991118062167969e-05,
|
|
"loss": 0.4152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23976993560791016,
|
|
"step": 570,
|
|
"valid_targets_mean": 2853.8,
|
|
"valid_targets_min": 724
|
|
},
|
|
{
|
|
"epoch": 0.8984375,
|
|
"grad_norm": 0.8321789676035424,
|
|
"learning_rate": 3.990369453344394e-05,
|
|
"loss": 0.4217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.172775000333786,
|
|
"step": 575,
|
|
"valid_targets_mean": 2051.9,
|
|
"valid_targets_min": 855
|
|
},
|
|
{
|
|
"epoch": 0.90625,
|
|
"grad_norm": 0.8661157813587554,
|
|
"learning_rate": 3.989590635833375e-05,
|
|
"loss": 0.3768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19614984095096588,
|
|
"step": 580,
|
|
"valid_targets_mean": 2451.1,
|
|
"valid_targets_min": 1561
|
|
},
|
|
{
|
|
"epoch": 0.9140625,
|
|
"grad_norm": 0.7697819410121262,
|
|
"learning_rate": 3.98878162145536e-05,
|
|
"loss": 0.3882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16987182199954987,
|
|
"step": 585,
|
|
"valid_targets_mean": 2678.0,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 0.921875,
|
|
"grad_norm": 0.6204541118418326,
|
|
"learning_rate": 3.987942422489104e-05,
|
|
"loss": 0.3813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16581398248672485,
|
|
"step": 590,
|
|
"valid_targets_mean": 3684.2,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 0.9296875,
|
|
"grad_norm": 0.7941834269646919,
|
|
"learning_rate": 3.987073051671489e-05,
|
|
"loss": 0.3759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19133873283863068,
|
|
"step": 595,
|
|
"valid_targets_mean": 2947.9,
|
|
"valid_targets_min": 1254
|
|
},
|
|
{
|
|
"epoch": 0.9375,
|
|
"grad_norm": 0.9093803674433801,
|
|
"learning_rate": 3.986173522197327e-05,
|
|
"loss": 0.3817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1790042519569397,
|
|
"step": 600,
|
|
"valid_targets_mean": 3058.9,
|
|
"valid_targets_min": 1465
|
|
},
|
|
{
|
|
"epoch": 0.9453125,
|
|
"grad_norm": 0.8189986435806678,
|
|
"learning_rate": 3.985243847719162e-05,
|
|
"loss": 0.3923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2238115668296814,
|
|
"step": 605,
|
|
"valid_targets_mean": 2986.2,
|
|
"valid_targets_min": 1458
|
|
},
|
|
{
|
|
"epoch": 0.953125,
|
|
"grad_norm": 0.6423460496271767,
|
|
"learning_rate": 3.98428404234706e-05,
|
|
"loss": 0.3698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29294848442077637,
|
|
"step": 610,
|
|
"valid_targets_mean": 5247.4,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 0.9609375,
|
|
"grad_norm": 0.723390888854924,
|
|
"learning_rate": 3.983294120648396e-05,
|
|
"loss": 0.4177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2191966474056244,
|
|
"step": 615,
|
|
"valid_targets_mean": 3214.4,
|
|
"valid_targets_min": 1453
|
|
},
|
|
{
|
|
"epoch": 0.96875,
|
|
"grad_norm": 0.6471428325279565,
|
|
"learning_rate": 3.982274097647637e-05,
|
|
"loss": 0.382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22970233857631683,
|
|
"step": 620,
|
|
"valid_targets_mean": 3596.9,
|
|
"valid_targets_min": 1221
|
|
},
|
|
{
|
|
"epoch": 0.9765625,
|
|
"grad_norm": 0.711535328952705,
|
|
"learning_rate": 3.9812239888261054e-05,
|
|
"loss": 0.4428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21255776286125183,
|
|
"step": 625,
|
|
"valid_targets_mean": 3400.9,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 0.984375,
|
|
"grad_norm": 0.6717649883959258,
|
|
"learning_rate": 3.980143810121753e-05,
|
|
"loss": 0.4159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12616944313049316,
|
|
"step": 630,
|
|
"valid_targets_mean": 3052.5,
|
|
"valid_targets_min": 1276
|
|
},
|
|
{
|
|
"epoch": 0.9921875,
|
|
"grad_norm": 0.702899590753551,
|
|
"learning_rate": 3.9790335779289136e-05,
|
|
"loss": 0.4047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14998459815979004,
|
|
"step": 635,
|
|
"valid_targets_mean": 3167.6,
|
|
"valid_targets_min": 1833
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 1.0536075206924722,
|
|
"learning_rate": 3.977893309098054e-05,
|
|
"loss": 0.4103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2650356888771057,
|
|
"step": 640,
|
|
"valid_targets_mean": 4440.5,
|
|
"valid_targets_min": 1817
|
|
},
|
|
{
|
|
"epoch": 1.0078125,
|
|
"grad_norm": 0.717480844989384,
|
|
"learning_rate": 3.976723020935522e-05,
|
|
"loss": 0.3958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2495318055152893,
|
|
"step": 645,
|
|
"valid_targets_mean": 3964.1,
|
|
"valid_targets_min": 1220
|
|
},
|
|
{
|
|
"epoch": 1.015625,
|
|
"grad_norm": 1.1333948585004614,
|
|
"learning_rate": 3.975522731203281e-05,
|
|
"loss": 0.3593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20741133391857147,
|
|
"step": 650,
|
|
"valid_targets_mean": 2578.8,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 1.0234375,
|
|
"grad_norm": 0.7063936064680646,
|
|
"learning_rate": 3.974292458118641e-05,
|
|
"loss": 0.3703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18767188489437103,
|
|
"step": 655,
|
|
"valid_targets_mean": 3479.0,
|
|
"valid_targets_min": 1216
|
|
},
|
|
{
|
|
"epoch": 1.03125,
|
|
"grad_norm": 0.6202932862959388,
|
|
"learning_rate": 3.973032220353982e-05,
|
|
"loss": 0.355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24497324228286743,
|
|
"step": 660,
|
|
"valid_targets_mean": 5560.9,
|
|
"valid_targets_min": 917
|
|
},
|
|
{
|
|
"epoch": 1.0390625,
|
|
"grad_norm": 0.7854147121344943,
|
|
"learning_rate": 3.971742037036472e-05,
|
|
"loss": 0.3997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29854217171669006,
|
|
"step": 665,
|
|
"valid_targets_mean": 3701.1,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 1.046875,
|
|
"grad_norm": 0.6239118499581965,
|
|
"learning_rate": 3.970421927747773e-05,
|
|
"loss": 0.3872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2754225730895996,
|
|
"step": 670,
|
|
"valid_targets_mean": 5022.5,
|
|
"valid_targets_min": 1266
|
|
},
|
|
{
|
|
"epoch": 1.0546875,
|
|
"grad_norm": 0.6615988807618366,
|
|
"learning_rate": 3.969071912523748e-05,
|
|
"loss": 0.3643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22817538678646088,
|
|
"step": 675,
|
|
"valid_targets_mean": 4072.8,
|
|
"valid_targets_min": 1207
|
|
},
|
|
{
|
|
"epoch": 1.0625,
|
|
"grad_norm": 0.6923395899259556,
|
|
"learning_rate": 3.967692011854155e-05,
|
|
"loss": 0.363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16829726099967957,
|
|
"step": 680,
|
|
"valid_targets_mean": 3964.4,
|
|
"valid_targets_min": 1312
|
|
},
|
|
{
|
|
"epoch": 1.0703125,
|
|
"grad_norm": 0.6794545027309182,
|
|
"learning_rate": 3.966282246682336e-05,
|
|
"loss": 0.3726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2231837660074234,
|
|
"step": 685,
|
|
"valid_targets_mean": 4236.6,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 1.078125,
|
|
"grad_norm": 0.6474740723049768,
|
|
"learning_rate": 3.964842638404898e-05,
|
|
"loss": 0.3616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21102049946784973,
|
|
"step": 690,
|
|
"valid_targets_mean": 4003.4,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 1.0859375,
|
|
"grad_norm": 0.7043361092133104,
|
|
"learning_rate": 3.9633732088713935e-05,
|
|
"loss": 0.3677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14129799604415894,
|
|
"step": 695,
|
|
"valid_targets_mean": 3038.0,
|
|
"valid_targets_min": 1031
|
|
},
|
|
{
|
|
"epoch": 1.09375,
|
|
"grad_norm": 0.8387734695251665,
|
|
"learning_rate": 3.96187398038398e-05,
|
|
"loss": 0.3942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18181580305099487,
|
|
"step": 700,
|
|
"valid_targets_mean": 3199.2,
|
|
"valid_targets_min": 1278
|
|
},
|
|
{
|
|
"epoch": 1.1015625,
|
|
"grad_norm": 0.6400773796676754,
|
|
"learning_rate": 3.9603449756970877e-05,
|
|
"loss": 0.3694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17386242747306824,
|
|
"step": 705,
|
|
"valid_targets_mean": 4438.2,
|
|
"valid_targets_min": 1671
|
|
},
|
|
{
|
|
"epoch": 1.109375,
|
|
"grad_norm": 0.7753059038787088,
|
|
"learning_rate": 3.958786218017077e-05,
|
|
"loss": 0.4142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18268132209777832,
|
|
"step": 710,
|
|
"valid_targets_mean": 2582.9,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 1.1171875,
|
|
"grad_norm": 0.967154953926223,
|
|
"learning_rate": 3.957197731001877e-05,
|
|
"loss": 0.3931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2697431743144989,
|
|
"step": 715,
|
|
"valid_targets_mean": 3402.9,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 1.125,
|
|
"grad_norm": 0.7294060811403659,
|
|
"learning_rate": 3.955579538760635e-05,
|
|
"loss": 0.3729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1693190336227417,
|
|
"step": 720,
|
|
"valid_targets_mean": 2534.0,
|
|
"valid_targets_min": 1585
|
|
},
|
|
{
|
|
"epoch": 1.1328125,
|
|
"grad_norm": 0.596103810489754,
|
|
"learning_rate": 3.9539316658533435e-05,
|
|
"loss": 0.3483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17193399369716644,
|
|
"step": 725,
|
|
"valid_targets_mean": 4207.5,
|
|
"valid_targets_min": 1117
|
|
},
|
|
{
|
|
"epoch": 1.140625,
|
|
"grad_norm": 0.6608800570761942,
|
|
"learning_rate": 3.952254137290476e-05,
|
|
"loss": 0.4064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2925645709037781,
|
|
"step": 730,
|
|
"valid_targets_mean": 5449.4,
|
|
"valid_targets_min": 1125
|
|
},
|
|
{
|
|
"epoch": 1.1484375,
|
|
"grad_norm": 0.5430624747531989,
|
|
"learning_rate": 3.9505469785326e-05,
|
|
"loss": 0.3761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.205300971865654,
|
|
"step": 735,
|
|
"valid_targets_mean": 6273.9,
|
|
"valid_targets_min": 1539
|
|
},
|
|
{
|
|
"epoch": 1.15625,
|
|
"grad_norm": 0.6673093321728433,
|
|
"learning_rate": 3.948810215489992e-05,
|
|
"loss": 0.3785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23226603865623474,
|
|
"step": 740,
|
|
"valid_targets_mean": 4604.9,
|
|
"valid_targets_min": 1434
|
|
},
|
|
{
|
|
"epoch": 1.1640625,
|
|
"grad_norm": 0.8522836110168983,
|
|
"learning_rate": 3.9470438745222465e-05,
|
|
"loss": 0.3882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.178795725107193,
|
|
"step": 745,
|
|
"valid_targets_mean": 2705.0,
|
|
"valid_targets_min": 823
|
|
},
|
|
{
|
|
"epoch": 1.171875,
|
|
"grad_norm": 0.7156080473020594,
|
|
"learning_rate": 3.9452479824378764e-05,
|
|
"loss": 0.3538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18847236037254333,
|
|
"step": 750,
|
|
"valid_targets_mean": 3378.4,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 1.1796875,
|
|
"grad_norm": 0.7777643194532898,
|
|
"learning_rate": 3.943422566493902e-05,
|
|
"loss": 0.3809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14694583415985107,
|
|
"step": 755,
|
|
"valid_targets_mean": 2872.0,
|
|
"valid_targets_min": 925
|
|
},
|
|
{
|
|
"epoch": 1.1875,
|
|
"grad_norm": 0.6088770895286124,
|
|
"learning_rate": 3.941567654395441e-05,
|
|
"loss": 0.3479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13523423671722412,
|
|
"step": 760,
|
|
"valid_targets_mean": 3037.9,
|
|
"valid_targets_min": 1741
|
|
},
|
|
{
|
|
"epoch": 1.1953125,
|
|
"grad_norm": 0.7817452076397042,
|
|
"learning_rate": 3.939683274295287e-05,
|
|
"loss": 0.3485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1438584327697754,
|
|
"step": 765,
|
|
"valid_targets_mean": 2892.0,
|
|
"valid_targets_min": 1620
|
|
},
|
|
{
|
|
"epoch": 1.203125,
|
|
"grad_norm": 0.6751172975791888,
|
|
"learning_rate": 3.937769454793482e-05,
|
|
"loss": 0.3833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11834487318992615,
|
|
"step": 770,
|
|
"valid_targets_mean": 2260.9,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 1.2109375,
|
|
"grad_norm": 0.7726883048527896,
|
|
"learning_rate": 3.9358262249368805e-05,
|
|
"loss": 0.3899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11126775294542313,
|
|
"step": 775,
|
|
"valid_targets_mean": 1909.8,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 1.21875,
|
|
"grad_norm": 0.5956614818124912,
|
|
"learning_rate": 3.933853614218713e-05,
|
|
"loss": 0.3856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13173893094062805,
|
|
"step": 780,
|
|
"valid_targets_mean": 2888.6,
|
|
"valid_targets_min": 1747
|
|
},
|
|
{
|
|
"epoch": 1.2265625,
|
|
"grad_norm": 0.7955621901527012,
|
|
"learning_rate": 3.931851652578137e-05,
|
|
"loss": 0.3921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21286198496818542,
|
|
"step": 785,
|
|
"valid_targets_mean": 2701.2,
|
|
"valid_targets_min": 1633
|
|
},
|
|
{
|
|
"epoch": 1.234375,
|
|
"grad_norm": 0.583602848222839,
|
|
"learning_rate": 3.929820370399777e-05,
|
|
"loss": 0.3387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1771073043346405,
|
|
"step": 790,
|
|
"valid_targets_mean": 5449.1,
|
|
"valid_targets_min": 1433
|
|
},
|
|
{
|
|
"epoch": 1.2421875,
|
|
"grad_norm": 0.8688199779154842,
|
|
"learning_rate": 3.927759798513271e-05,
|
|
"loss": 0.3608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13444875180721283,
|
|
"step": 795,
|
|
"valid_targets_mean": 2292.5,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"grad_norm": 0.6079867923707432,
|
|
"learning_rate": 3.9256699681927995e-05,
|
|
"loss": 0.3877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20655576884746552,
|
|
"step": 800,
|
|
"valid_targets_mean": 4670.4,
|
|
"valid_targets_min": 984
|
|
},
|
|
{
|
|
"epoch": 1.2578125,
|
|
"grad_norm": 0.6227531045061623,
|
|
"learning_rate": 3.923550911156609e-05,
|
|
"loss": 0.3924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17854513227939606,
|
|
"step": 805,
|
|
"valid_targets_mean": 4073.6,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 1.265625,
|
|
"grad_norm": 0.7323560417752012,
|
|
"learning_rate": 3.9214026595665334e-05,
|
|
"loss": 0.3696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16765354573726654,
|
|
"step": 810,
|
|
"valid_targets_mean": 2485.0,
|
|
"valid_targets_min": 763
|
|
},
|
|
{
|
|
"epoch": 1.2734375,
|
|
"grad_norm": 0.5905137006716488,
|
|
"learning_rate": 3.9192252460275064e-05,
|
|
"loss": 0.3518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19035747647285461,
|
|
"step": 815,
|
|
"valid_targets_mean": 4908.9,
|
|
"valid_targets_min": 1604
|
|
},
|
|
{
|
|
"epoch": 1.28125,
|
|
"grad_norm": 1.2350599854400148,
|
|
"learning_rate": 3.9170187035870616e-05,
|
|
"loss": 0.3745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18624623119831085,
|
|
"step": 820,
|
|
"valid_targets_mean": 3075.4,
|
|
"valid_targets_min": 1830
|
|
},
|
|
{
|
|
"epoch": 1.2890625,
|
|
"grad_norm": 0.6140849163211575,
|
|
"learning_rate": 3.914783065734836e-05,
|
|
"loss": 0.3771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1564536690711975,
|
|
"step": 825,
|
|
"valid_targets_mean": 3508.4,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 1.296875,
|
|
"grad_norm": 0.8204374626120744,
|
|
"learning_rate": 3.912518366402062e-05,
|
|
"loss": 0.354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14130899310112,
|
|
"step": 830,
|
|
"valid_targets_mean": 2414.4,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 1.3046875,
|
|
"grad_norm": 0.5199907606908566,
|
|
"learning_rate": 3.9102246399610485e-05,
|
|
"loss": 0.3404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13302895426750183,
|
|
"step": 835,
|
|
"valid_targets_mean": 4480.8,
|
|
"valid_targets_min": 1697
|
|
},
|
|
{
|
|
"epoch": 1.3125,
|
|
"grad_norm": 0.6042383048305005,
|
|
"learning_rate": 3.9079019212246604e-05,
|
|
"loss": 0.3607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14602449536323547,
|
|
"step": 840,
|
|
"valid_targets_mean": 3447.0,
|
|
"valid_targets_min": 787
|
|
},
|
|
{
|
|
"epoch": 1.3203125,
|
|
"grad_norm": 0.5795783849108678,
|
|
"learning_rate": 3.905550245445793e-05,
|
|
"loss": 0.3536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20295895636081696,
|
|
"step": 845,
|
|
"valid_targets_mean": 5652.8,
|
|
"valid_targets_min": 1601
|
|
},
|
|
{
|
|
"epoch": 1.328125,
|
|
"grad_norm": 0.7532601180974956,
|
|
"learning_rate": 3.903169648316834e-05,
|
|
"loss": 0.3787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16471198201179504,
|
|
"step": 850,
|
|
"valid_targets_mean": 2042.9,
|
|
"valid_targets_min": 1076
|
|
},
|
|
{
|
|
"epoch": 1.3359375,
|
|
"grad_norm": 0.7377874385945884,
|
|
"learning_rate": 3.9007601659691226e-05,
|
|
"loss": 0.3413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20832914113998413,
|
|
"step": 855,
|
|
"valid_targets_mean": 4012.5,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 1.34375,
|
|
"grad_norm": 0.6483086949453459,
|
|
"learning_rate": 3.898321834972402e-05,
|
|
"loss": 0.3745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1801690012216568,
|
|
"step": 860,
|
|
"valid_targets_mean": 3688.4,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 1.3515625,
|
|
"grad_norm": 0.6621289211568006,
|
|
"learning_rate": 3.895854692334264e-05,
|
|
"loss": 0.4036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11350404471158981,
|
|
"step": 865,
|
|
"valid_targets_mean": 2403.1,
|
|
"valid_targets_min": 1107
|
|
},
|
|
{
|
|
"epoch": 1.359375,
|
|
"grad_norm": 0.7051452503544638,
|
|
"learning_rate": 3.893358775499585e-05,
|
|
"loss": 0.3915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17734670639038086,
|
|
"step": 870,
|
|
"valid_targets_mean": 2632.0,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 1.3671875,
|
|
"grad_norm": 0.5880846650353504,
|
|
"learning_rate": 3.8908341223499625e-05,
|
|
"loss": 0.3713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24848781526088715,
|
|
"step": 875,
|
|
"valid_targets_mean": 5130.4,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 1.375,
|
|
"grad_norm": 0.641586733358507,
|
|
"learning_rate": 3.8882807712031344e-05,
|
|
"loss": 0.3783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19222408533096313,
|
|
"step": 880,
|
|
"valid_targets_mean": 3762.5,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 1.3828125,
|
|
"grad_norm": 0.7041910277773263,
|
|
"learning_rate": 3.885698760812403e-05,
|
|
"loss": 0.387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1821584850549698,
|
|
"step": 885,
|
|
"valid_targets_mean": 3641.6,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 1.390625,
|
|
"grad_norm": 0.7666496746495958,
|
|
"learning_rate": 3.883088130366042e-05,
|
|
"loss": 0.3773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19667081534862518,
|
|
"step": 890,
|
|
"valid_targets_mean": 3263.5,
|
|
"valid_targets_min": 1176
|
|
},
|
|
{
|
|
"epoch": 1.3984375,
|
|
"grad_norm": 0.61507661429378,
|
|
"learning_rate": 3.880448919486705e-05,
|
|
"loss": 0.3445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1326030194759369,
|
|
"step": 895,
|
|
"valid_targets_mean": 2744.5,
|
|
"valid_targets_min": 1209
|
|
},
|
|
{
|
|
"epoch": 1.40625,
|
|
"grad_norm": 0.7097969141953309,
|
|
"learning_rate": 3.877781168230824e-05,
|
|
"loss": 0.3694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17062215507030487,
|
|
"step": 900,
|
|
"valid_targets_mean": 2368.6,
|
|
"valid_targets_min": 1410
|
|
},
|
|
{
|
|
"epoch": 1.4140625,
|
|
"grad_norm": 0.6682124601414049,
|
|
"learning_rate": 3.875084917087999e-05,
|
|
"loss": 0.3663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15661239624023438,
|
|
"step": 905,
|
|
"valid_targets_mean": 2827.1,
|
|
"valid_targets_min": 1788
|
|
},
|
|
{
|
|
"epoch": 1.421875,
|
|
"grad_norm": 0.5542337843431767,
|
|
"learning_rate": 3.8723602069803845e-05,
|
|
"loss": 0.3578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14766310155391693,
|
|
"step": 910,
|
|
"valid_targets_mean": 4753.6,
|
|
"valid_targets_min": 2584
|
|
},
|
|
{
|
|
"epoch": 1.4296875,
|
|
"grad_norm": 0.4485249780103347,
|
|
"learning_rate": 3.8696070792620704e-05,
|
|
"loss": 0.3545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19252242147922516,
|
|
"step": 915,
|
|
"valid_targets_mean": 7060.6,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 1.4375,
|
|
"grad_norm": 0.7060759073419828,
|
|
"learning_rate": 3.8668255757184524e-05,
|
|
"loss": 0.3487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1855391561985016,
|
|
"step": 920,
|
|
"valid_targets_mean": 2534.6,
|
|
"valid_targets_min": 1212
|
|
},
|
|
{
|
|
"epoch": 1.4453125,
|
|
"grad_norm": 0.6371299720911998,
|
|
"learning_rate": 3.864015738565598e-05,
|
|
"loss": 0.3797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14003197848796844,
|
|
"step": 925,
|
|
"valid_targets_mean": 3398.9,
|
|
"valid_targets_min": 853
|
|
},
|
|
{
|
|
"epoch": 1.453125,
|
|
"grad_norm": 0.7255684250131219,
|
|
"learning_rate": 3.8611776104496045e-05,
|
|
"loss": 0.3579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16949379444122314,
|
|
"step": 930,
|
|
"valid_targets_mean": 2950.5,
|
|
"valid_targets_min": 1398
|
|
},
|
|
{
|
|
"epoch": 1.4609375,
|
|
"grad_norm": 0.8059891295490175,
|
|
"learning_rate": 3.858311234445957e-05,
|
|
"loss": 0.3804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18340042233467102,
|
|
"step": 935,
|
|
"valid_targets_mean": 2367.6,
|
|
"valid_targets_min": 1406
|
|
},
|
|
{
|
|
"epoch": 1.46875,
|
|
"grad_norm": 0.5212046996849041,
|
|
"learning_rate": 3.855416654058867e-05,
|
|
"loss": 0.347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0967634916305542,
|
|
"step": 940,
|
|
"valid_targets_mean": 3118.6,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 1.4765625,
|
|
"grad_norm": 0.5558268682138805,
|
|
"learning_rate": 3.852493913220618e-05,
|
|
"loss": 0.3508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13894714415073395,
|
|
"step": 945,
|
|
"valid_targets_mean": 3866.8,
|
|
"valid_targets_min": 863
|
|
},
|
|
{
|
|
"epoch": 1.484375,
|
|
"grad_norm": 0.4156154381584771,
|
|
"learning_rate": 3.8495430562908965e-05,
|
|
"loss": 0.3571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1600133180618286,
|
|
"step": 950,
|
|
"valid_targets_mean": 6566.5,
|
|
"valid_targets_min": 1539
|
|
},
|
|
{
|
|
"epoch": 1.4921875,
|
|
"grad_norm": 0.6929944969702655,
|
|
"learning_rate": 3.846564128056118e-05,
|
|
"loss": 0.3674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21267889440059662,
|
|
"step": 955,
|
|
"valid_targets_mean": 2846.0,
|
|
"valid_targets_min": 1201
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"grad_norm": 0.6939723793261908,
|
|
"learning_rate": 3.84355717372875e-05,
|
|
"loss": 0.3552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1929103434085846,
|
|
"step": 960,
|
|
"valid_targets_mean": 2762.9,
|
|
"valid_targets_min": 834
|
|
},
|
|
{
|
|
"epoch": 1.5078125,
|
|
"grad_norm": 0.6984088178705736,
|
|
"learning_rate": 3.840522238946621e-05,
|
|
"loss": 0.3615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1564529985189438,
|
|
"step": 965,
|
|
"valid_targets_mean": 2596.9,
|
|
"valid_targets_min": 916
|
|
},
|
|
{
|
|
"epoch": 1.515625,
|
|
"grad_norm": 0.6533584049042327,
|
|
"learning_rate": 3.8374593697722354e-05,
|
|
"loss": 0.343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13548174500465393,
|
|
"step": 970,
|
|
"valid_targets_mean": 2516.0,
|
|
"valid_targets_min": 656
|
|
},
|
|
{
|
|
"epoch": 1.5234375,
|
|
"grad_norm": 0.61379751156259,
|
|
"learning_rate": 3.834368612692064e-05,
|
|
"loss": 0.3724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19175985455513,
|
|
"step": 975,
|
|
"valid_targets_mean": 3408.5,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 1.53125,
|
|
"grad_norm": 0.6819736347777707,
|
|
"learning_rate": 3.83125001461585e-05,
|
|
"loss": 0.3609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27571791410446167,
|
|
"step": 980,
|
|
"valid_targets_mean": 4233.0,
|
|
"valid_targets_min": 1433
|
|
},
|
|
{
|
|
"epoch": 1.5390625,
|
|
"grad_norm": 0.5612984405743221,
|
|
"learning_rate": 3.8281036228758866e-05,
|
|
"loss": 0.3671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21386763453483582,
|
|
"step": 985,
|
|
"valid_targets_mean": 5036.9,
|
|
"valid_targets_min": 1077
|
|
},
|
|
{
|
|
"epoch": 1.546875,
|
|
"grad_norm": 0.7087689861146638,
|
|
"learning_rate": 3.824929485226307e-05,
|
|
"loss": 0.365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1734396070241928,
|
|
"step": 990,
|
|
"valid_targets_mean": 3133.5,
|
|
"valid_targets_min": 1554
|
|
},
|
|
{
|
|
"epoch": 1.5546875,
|
|
"grad_norm": 0.7047838065374341,
|
|
"learning_rate": 3.821727649842352e-05,
|
|
"loss": 0.351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1505778282880783,
|
|
"step": 995,
|
|
"valid_targets_mean": 2884.8,
|
|
"valid_targets_min": 1442
|
|
},
|
|
{
|
|
"epoch": 1.5625,
|
|
"grad_norm": 0.5782633230420272,
|
|
"learning_rate": 3.818498165319647e-05,
|
|
"loss": 0.3453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16242533922195435,
|
|
"step": 1000,
|
|
"valid_targets_mean": 3941.5,
|
|
"valid_targets_min": 1484
|
|
},
|
|
{
|
|
"epoch": 1.5703125,
|
|
"grad_norm": 0.7617447352248516,
|
|
"learning_rate": 3.8152410806734574e-05,
|
|
"loss": 0.3824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18587122857570648,
|
|
"step": 1005,
|
|
"valid_targets_mean": 3155.9,
|
|
"valid_targets_min": 969
|
|
},
|
|
{
|
|
"epoch": 1.578125,
|
|
"grad_norm": 0.8051173791232117,
|
|
"learning_rate": 3.811956445337948e-05,
|
|
"loss": 0.3605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20389509201049805,
|
|
"step": 1010,
|
|
"valid_targets_mean": 2071.6,
|
|
"valid_targets_min": 967
|
|
},
|
|
{
|
|
"epoch": 1.5859375,
|
|
"grad_norm": 0.6149926316590131,
|
|
"learning_rate": 3.8086443091654345e-05,
|
|
"loss": 0.3567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1493193507194519,
|
|
"step": 1015,
|
|
"valid_targets_mean": 2578.6,
|
|
"valid_targets_min": 532
|
|
},
|
|
{
|
|
"epoch": 1.59375,
|
|
"grad_norm": 0.6106467301006042,
|
|
"learning_rate": 3.805304722425619e-05,
|
|
"loss": 0.3522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16361454129219055,
|
|
"step": 1020,
|
|
"valid_targets_mean": 3387.8,
|
|
"valid_targets_min": 656
|
|
},
|
|
{
|
|
"epoch": 1.6015625,
|
|
"grad_norm": 0.6012163153945732,
|
|
"learning_rate": 3.801937735804838e-05,
|
|
"loss": 0.3468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18078261613845825,
|
|
"step": 1025,
|
|
"valid_targets_mean": 3736.4,
|
|
"valid_targets_min": 1545
|
|
},
|
|
{
|
|
"epoch": 1.609375,
|
|
"grad_norm": 0.6222397690503602,
|
|
"learning_rate": 3.7985434004052867e-05,
|
|
"loss": 0.36,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13860692083835602,
|
|
"step": 1030,
|
|
"valid_targets_mean": 2907.6,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 1.6171875,
|
|
"grad_norm": 0.6311242910410201,
|
|
"learning_rate": 3.795121767744242e-05,
|
|
"loss": 0.3658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14113786816596985,
|
|
"step": 1035,
|
|
"valid_targets_mean": 2720.9,
|
|
"valid_targets_min": 1493
|
|
},
|
|
{
|
|
"epoch": 1.625,
|
|
"grad_norm": 0.7183884474371636,
|
|
"learning_rate": 3.791672889753284e-05,
|
|
"loss": 0.3824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18946769833564758,
|
|
"step": 1040,
|
|
"valid_targets_mean": 3218.9,
|
|
"valid_targets_min": 1306
|
|
},
|
|
{
|
|
"epoch": 1.6328125,
|
|
"grad_norm": 0.7698498428390836,
|
|
"learning_rate": 3.788196818777508e-05,
|
|
"loss": 0.3699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21778106689453125,
|
|
"step": 1045,
|
|
"valid_targets_mean": 3178.9,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 1.640625,
|
|
"grad_norm": 0.6339250977118923,
|
|
"learning_rate": 3.7846936075747294e-05,
|
|
"loss": 0.345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12941375374794006,
|
|
"step": 1050,
|
|
"valid_targets_mean": 2742.5,
|
|
"valid_targets_min": 1223
|
|
},
|
|
{
|
|
"epoch": 1.6484375,
|
|
"grad_norm": 0.5605893825129544,
|
|
"learning_rate": 3.7811633093146796e-05,
|
|
"loss": 0.3444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1594964861869812,
|
|
"step": 1055,
|
|
"valid_targets_mean": 4212.2,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 1.65625,
|
|
"grad_norm": 0.6714487665962013,
|
|
"learning_rate": 3.7776059775782034e-05,
|
|
"loss": 0.3885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21013669669628143,
|
|
"step": 1060,
|
|
"valid_targets_mean": 3160.6,
|
|
"valid_targets_min": 1176
|
|
},
|
|
{
|
|
"epoch": 1.6640625,
|
|
"grad_norm": 0.6339065933772607,
|
|
"learning_rate": 3.774021666356444e-05,
|
|
"loss": 0.3517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15483132004737854,
|
|
"step": 1065,
|
|
"valid_targets_mean": 2634.0,
|
|
"valid_targets_min": 941
|
|
},
|
|
{
|
|
"epoch": 1.671875,
|
|
"grad_norm": 0.6106560881751476,
|
|
"learning_rate": 3.770410430050025e-05,
|
|
"loss": 0.3721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1664782166481018,
|
|
"step": 1070,
|
|
"valid_targets_mean": 3533.5,
|
|
"valid_targets_min": 2212
|
|
},
|
|
{
|
|
"epoch": 1.6796875,
|
|
"grad_norm": 0.690336242651043,
|
|
"learning_rate": 3.7667723234682216e-05,
|
|
"loss": 0.3646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14471693336963654,
|
|
"step": 1075,
|
|
"valid_targets_mean": 2540.2,
|
|
"valid_targets_min": 1005
|
|
},
|
|
{
|
|
"epoch": 1.6875,
|
|
"grad_norm": 0.5828607695646199,
|
|
"learning_rate": 3.763107401828132e-05,
|
|
"loss": 0.342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12046236544847488,
|
|
"step": 1080,
|
|
"valid_targets_mean": 3159.0,
|
|
"valid_targets_min": 1028
|
|
},
|
|
{
|
|
"epoch": 1.6953125,
|
|
"grad_norm": 0.6640208616664027,
|
|
"learning_rate": 3.759415720753837e-05,
|
|
"loss": 0.3815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16401463747024536,
|
|
"step": 1085,
|
|
"valid_targets_mean": 3356.5,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 1.703125,
|
|
"grad_norm": 0.6381677546345116,
|
|
"learning_rate": 3.755697336275558e-05,
|
|
"loss": 0.364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16993117332458496,
|
|
"step": 1090,
|
|
"valid_targets_mean": 3583.1,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 1.7109375,
|
|
"grad_norm": 0.6138339517369661,
|
|
"learning_rate": 3.751952304828804e-05,
|
|
"loss": 0.344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1908346712589264,
|
|
"step": 1095,
|
|
"valid_targets_mean": 4108.8,
|
|
"valid_targets_min": 1278
|
|
},
|
|
{
|
|
"epoch": 1.71875,
|
|
"grad_norm": 0.6887787371555527,
|
|
"learning_rate": 3.748180683253518e-05,
|
|
"loss": 0.3404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13385693728923798,
|
|
"step": 1100,
|
|
"valid_targets_mean": 2283.5,
|
|
"valid_targets_min": 1217
|
|
},
|
|
{
|
|
"epoch": 1.7265625,
|
|
"grad_norm": 0.61476727331446,
|
|
"learning_rate": 3.744382528793211e-05,
|
|
"loss": 0.3642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16393037140369415,
|
|
"step": 1105,
|
|
"valid_targets_mean": 5005.5,
|
|
"valid_targets_min": 1599
|
|
},
|
|
{
|
|
"epoch": 1.734375,
|
|
"grad_norm": 0.569483575195429,
|
|
"learning_rate": 3.740557899094096e-05,
|
|
"loss": 0.3498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20551279187202454,
|
|
"step": 1110,
|
|
"valid_targets_mean": 5758.0,
|
|
"valid_targets_min": 3096
|
|
},
|
|
{
|
|
"epoch": 1.7421875,
|
|
"grad_norm": 0.5992564089488164,
|
|
"learning_rate": 3.7367068522042115e-05,
|
|
"loss": 0.3511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20950135588645935,
|
|
"step": 1115,
|
|
"valid_targets_mean": 4741.0,
|
|
"valid_targets_min": 1125
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"grad_norm": 0.8157446974046557,
|
|
"learning_rate": 3.732829446572541e-05,
|
|
"loss": 0.3578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12662388384342194,
|
|
"step": 1120,
|
|
"valid_targets_mean": 1994.9,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 1.7578125,
|
|
"grad_norm": 0.7318981276010909,
|
|
"learning_rate": 3.728925741048127e-05,
|
|
"loss": 0.3462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.181562602519989,
|
|
"step": 1125,
|
|
"valid_targets_mean": 2797.0,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 1.765625,
|
|
"grad_norm": 0.6206660141378648,
|
|
"learning_rate": 3.724995794879176e-05,
|
|
"loss": 0.3592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15590308606624603,
|
|
"step": 1130,
|
|
"valid_targets_mean": 3253.0,
|
|
"valid_targets_min": 1285
|
|
},
|
|
{
|
|
"epoch": 1.7734375,
|
|
"grad_norm": 0.7773023307940932,
|
|
"learning_rate": 3.721039667712158e-05,
|
|
"loss": 0.3653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2208060622215271,
|
|
"step": 1135,
|
|
"valid_targets_mean": 3277.6,
|
|
"valid_targets_min": 1595
|
|
},
|
|
{
|
|
"epoch": 1.78125,
|
|
"grad_norm": 0.678079810172107,
|
|
"learning_rate": 3.717057419590907e-05,
|
|
"loss": 0.3824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2587229907512665,
|
|
"step": 1140,
|
|
"valid_targets_mean": 4060.6,
|
|
"valid_targets_min": 2056
|
|
},
|
|
{
|
|
"epoch": 1.7890625,
|
|
"grad_norm": 0.6119753818402225,
|
|
"learning_rate": 3.713049110955703e-05,
|
|
"loss": 0.3366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1446475088596344,
|
|
"step": 1145,
|
|
"valid_targets_mean": 3203.1,
|
|
"valid_targets_min": 815
|
|
},
|
|
{
|
|
"epoch": 1.796875,
|
|
"grad_norm": 0.5169342304856328,
|
|
"learning_rate": 3.709014802642359e-05,
|
|
"loss": 0.3548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12101506441831589,
|
|
"step": 1150,
|
|
"valid_targets_mean": 3626.5,
|
|
"valid_targets_min": 563
|
|
},
|
|
{
|
|
"epoch": 1.8046875,
|
|
"grad_norm": 0.6833742842788032,
|
|
"learning_rate": 3.704954555881294e-05,
|
|
"loss": 0.3515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24048751592636108,
|
|
"step": 1155,
|
|
"valid_targets_mean": 3812.4,
|
|
"valid_targets_min": 2203
|
|
},
|
|
{
|
|
"epoch": 1.8125,
|
|
"grad_norm": 0.5487692848082963,
|
|
"learning_rate": 3.70086843229661e-05,
|
|
"loss": 0.3352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.124627023935318,
|
|
"step": 1160,
|
|
"valid_targets_mean": 3438.2,
|
|
"valid_targets_min": 1893
|
|
},
|
|
{
|
|
"epoch": 1.8203125,
|
|
"grad_norm": 0.5970881708649468,
|
|
"learning_rate": 3.696756493905148e-05,
|
|
"loss": 0.3535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16782565414905548,
|
|
"step": 1165,
|
|
"valid_targets_mean": 3563.4,
|
|
"valid_targets_min": 1146
|
|
},
|
|
{
|
|
"epoch": 1.828125,
|
|
"grad_norm": 0.6059392562830068,
|
|
"learning_rate": 3.6926188031155545e-05,
|
|
"loss": 0.3395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1678028106689453,
|
|
"step": 1170,
|
|
"valid_targets_mean": 3841.4,
|
|
"valid_targets_min": 760
|
|
},
|
|
{
|
|
"epoch": 1.8359375,
|
|
"grad_norm": 0.7331040902441825,
|
|
"learning_rate": 3.688455422727331e-05,
|
|
"loss": 0.3624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2555208206176758,
|
|
"step": 1175,
|
|
"valid_targets_mean": 4151.9,
|
|
"valid_targets_min": 1478
|
|
},
|
|
{
|
|
"epoch": 1.84375,
|
|
"grad_norm": 0.6213813005894813,
|
|
"learning_rate": 3.684266415929878e-05,
|
|
"loss": 0.3709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19481641054153442,
|
|
"step": 1180,
|
|
"valid_targets_mean": 3746.9,
|
|
"valid_targets_min": 1335
|
|
},
|
|
{
|
|
"epoch": 1.8515625,
|
|
"grad_norm": 0.5558018806737768,
|
|
"learning_rate": 3.680051846301543e-05,
|
|
"loss": 0.3487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11920695006847382,
|
|
"step": 1185,
|
|
"valid_targets_mean": 2795.6,
|
|
"valid_targets_min": 1552
|
|
},
|
|
{
|
|
"epoch": 1.859375,
|
|
"grad_norm": 0.6891925153659144,
|
|
"learning_rate": 3.6758117778086494e-05,
|
|
"loss": 0.3541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2519769072532654,
|
|
"step": 1190,
|
|
"valid_targets_mean": 3515.2,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 1.8671875,
|
|
"grad_norm": 0.6416195801536151,
|
|
"learning_rate": 3.671546274804527e-05,
|
|
"loss": 0.3438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1290416717529297,
|
|
"step": 1195,
|
|
"valid_targets_mean": 2739.1,
|
|
"valid_targets_min": 853
|
|
},
|
|
{
|
|
"epoch": 1.875,
|
|
"grad_norm": 0.597719454795886,
|
|
"learning_rate": 3.667255402028538e-05,
|
|
"loss": 0.3518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12543544173240662,
|
|
"step": 1200,
|
|
"valid_targets_mean": 2955.0,
|
|
"valid_targets_min": 1567
|
|
},
|
|
{
|
|
"epoch": 1.8828125,
|
|
"grad_norm": 0.6216728535077944,
|
|
"learning_rate": 3.662939224605091e-05,
|
|
"loss": 0.354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15662992000579834,
|
|
"step": 1205,
|
|
"valid_targets_mean": 3229.9,
|
|
"valid_targets_min": 1040
|
|
},
|
|
{
|
|
"epoch": 1.890625,
|
|
"grad_norm": 1.171352699399867,
|
|
"learning_rate": 3.658597808042655e-05,
|
|
"loss": 0.3371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1665850728750229,
|
|
"step": 1210,
|
|
"valid_targets_mean": 2962.9,
|
|
"valid_targets_min": 778
|
|
},
|
|
{
|
|
"epoch": 1.8984375,
|
|
"grad_norm": 0.6146797810248883,
|
|
"learning_rate": 3.654231218232763e-05,
|
|
"loss": 0.3738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23089055716991425,
|
|
"step": 1215,
|
|
"valid_targets_mean": 4185.9,
|
|
"valid_targets_min": 1075
|
|
},
|
|
{
|
|
"epoch": 1.90625,
|
|
"grad_norm": 0.6215041942430861,
|
|
"learning_rate": 3.6498395214490144e-05,
|
|
"loss": 0.3428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15774253010749817,
|
|
"step": 1220,
|
|
"valid_targets_mean": 2989.4,
|
|
"valid_targets_min": 862
|
|
},
|
|
{
|
|
"epoch": 1.9140625,
|
|
"grad_norm": 0.4826675385648382,
|
|
"learning_rate": 3.645422784346067e-05,
|
|
"loss": 0.3426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20675471425056458,
|
|
"step": 1225,
|
|
"valid_targets_mean": 7118.2,
|
|
"valid_targets_min": 423
|
|
},
|
|
{
|
|
"epoch": 1.921875,
|
|
"grad_norm": 0.600735047502117,
|
|
"learning_rate": 3.640981073958627e-05,
|
|
"loss": 0.337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13122627139091492,
|
|
"step": 1230,
|
|
"valid_targets_mean": 3128.4,
|
|
"valid_targets_min": 1542
|
|
},
|
|
{
|
|
"epoch": 1.9296875,
|
|
"grad_norm": 0.5862461972808842,
|
|
"learning_rate": 3.636514457700431e-05,
|
|
"loss": 0.3427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18167629837989807,
|
|
"step": 1235,
|
|
"valid_targets_mean": 3558.2,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 1.9375,
|
|
"grad_norm": 0.6495345966389012,
|
|
"learning_rate": 3.63202300336322e-05,
|
|
"loss": 0.3598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14999762177467346,
|
|
"step": 1240,
|
|
"valid_targets_mean": 2730.4,
|
|
"valid_targets_min": 419
|
|
},
|
|
{
|
|
"epoch": 1.9453125,
|
|
"grad_norm": 0.6420736141718513,
|
|
"learning_rate": 3.627506779115717e-05,
|
|
"loss": 0.3727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1486133337020874,
|
|
"step": 1245,
|
|
"valid_targets_mean": 2826.9,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 1.953125,
|
|
"grad_norm": 1.1147409487272022,
|
|
"learning_rate": 3.622965853502586e-05,
|
|
"loss": 0.3686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23948267102241516,
|
|
"step": 1250,
|
|
"valid_targets_mean": 6169.1,
|
|
"valid_targets_min": 1626
|
|
},
|
|
{
|
|
"epoch": 1.9609375,
|
|
"grad_norm": 0.740206516175118,
|
|
"learning_rate": 3.618400295443395e-05,
|
|
"loss": 0.3263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15904131531715393,
|
|
"step": 1255,
|
|
"valid_targets_mean": 2390.9,
|
|
"valid_targets_min": 956
|
|
},
|
|
{
|
|
"epoch": 1.96875,
|
|
"grad_norm": 0.6484542120745403,
|
|
"learning_rate": 3.613810174231568e-05,
|
|
"loss": 0.3695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15228737890720367,
|
|
"step": 1260,
|
|
"valid_targets_mean": 2265.9,
|
|
"valid_targets_min": 1473
|
|
},
|
|
{
|
|
"epoch": 1.9765625,
|
|
"grad_norm": 0.7323228620689355,
|
|
"learning_rate": 3.609195559533337e-05,
|
|
"loss": 0.3561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17078956961631775,
|
|
"step": 1265,
|
|
"valid_targets_mean": 3812.9,
|
|
"valid_targets_min": 724
|
|
},
|
|
{
|
|
"epoch": 1.984375,
|
|
"grad_norm": 0.600622523094574,
|
|
"learning_rate": 3.60455652138668e-05,
|
|
"loss": 0.3562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2196221500635147,
|
|
"step": 1270,
|
|
"valid_targets_mean": 4434.2,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 1.9921875,
|
|
"grad_norm": 0.6390041051074408,
|
|
"learning_rate": 3.5998931302002594e-05,
|
|
"loss": 0.3628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17158284783363342,
|
|
"step": 1275,
|
|
"valid_targets_mean": 2725.8,
|
|
"valid_targets_min": 886
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 0.6123558409120109,
|
|
"learning_rate": 3.595205456752357e-05,
|
|
"loss": 0.3367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13605687022209167,
|
|
"step": 1280,
|
|
"valid_targets_mean": 3398.2,
|
|
"valid_targets_min": 1549
|
|
},
|
|
{
|
|
"epoch": 2.0078125,
|
|
"grad_norm": 0.776953952387867,
|
|
"learning_rate": 3.590493572189795e-05,
|
|
"loss": 0.3518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12498514354228973,
|
|
"step": 1285,
|
|
"valid_targets_mean": 2230.1,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 2.015625,
|
|
"grad_norm": 0.7562568028297588,
|
|
"learning_rate": 3.585757548026858e-05,
|
|
"loss": 0.2988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15171687304973602,
|
|
"step": 1290,
|
|
"valid_targets_mean": 2719.1,
|
|
"valid_targets_min": 1292
|
|
},
|
|
{
|
|
"epoch": 2.0234375,
|
|
"grad_norm": 0.6505296106312963,
|
|
"learning_rate": 3.5809974561442074e-05,
|
|
"loss": 0.3158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16986115276813507,
|
|
"step": 1295,
|
|
"valid_targets_mean": 4864.4,
|
|
"valid_targets_min": 445
|
|
},
|
|
{
|
|
"epoch": 2.03125,
|
|
"grad_norm": 0.6979755088869309,
|
|
"learning_rate": 3.5762133687877914e-05,
|
|
"loss": 0.3322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17145171761512756,
|
|
"step": 1300,
|
|
"valid_targets_mean": 3216.5,
|
|
"valid_targets_min": 1545
|
|
},
|
|
{
|
|
"epoch": 2.0390625,
|
|
"grad_norm": 0.6750500404419466,
|
|
"learning_rate": 3.571405358567748e-05,
|
|
"loss": 0.3282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13886791467666626,
|
|
"step": 1305,
|
|
"valid_targets_mean": 2691.4,
|
|
"valid_targets_min": 928
|
|
},
|
|
{
|
|
"epoch": 2.046875,
|
|
"grad_norm": 0.6473464683516768,
|
|
"learning_rate": 3.566573498457301e-05,
|
|
"loss": 0.3167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21840128302574158,
|
|
"step": 1310,
|
|
"valid_targets_mean": 3912.4,
|
|
"valid_targets_min": 1778
|
|
},
|
|
{
|
|
"epoch": 2.0546875,
|
|
"grad_norm": 0.5742481963439168,
|
|
"learning_rate": 3.561717861791657e-05,
|
|
"loss": 0.326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13447214663028717,
|
|
"step": 1315,
|
|
"valid_targets_mean": 3597.6,
|
|
"valid_targets_min": 1212
|
|
},
|
|
{
|
|
"epoch": 2.0625,
|
|
"grad_norm": 0.67168886352824,
|
|
"learning_rate": 3.556838522266886e-05,
|
|
"loss": 0.2986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1397567242383957,
|
|
"step": 1320,
|
|
"valid_targets_mean": 2635.5,
|
|
"valid_targets_min": 1553
|
|
},
|
|
{
|
|
"epoch": 2.0703125,
|
|
"grad_norm": 0.8561957174143824,
|
|
"learning_rate": 3.55193555393881e-05,
|
|
"loss": 0.3571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2027505338191986,
|
|
"step": 1325,
|
|
"valid_targets_mean": 2760.4,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 2.078125,
|
|
"grad_norm": 0.7369821693546623,
|
|
"learning_rate": 3.5470090312218733e-05,
|
|
"loss": 0.3171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22253325581550598,
|
|
"step": 1330,
|
|
"valid_targets_mean": 4016.2,
|
|
"valid_targets_min": 1752
|
|
},
|
|
{
|
|
"epoch": 2.0859375,
|
|
"grad_norm": 0.6800227582321624,
|
|
"learning_rate": 3.542059028888016e-05,
|
|
"loss": 0.343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15180522203445435,
|
|
"step": 1335,
|
|
"valid_targets_mean": 3475.1,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 2.09375,
|
|
"grad_norm": 0.7809404959124135,
|
|
"learning_rate": 3.5370856220655366e-05,
|
|
"loss": 0.3505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15260334312915802,
|
|
"step": 1340,
|
|
"valid_targets_mean": 2113.2,
|
|
"valid_targets_min": 698
|
|
},
|
|
{
|
|
"epoch": 2.1015625,
|
|
"grad_norm": 0.5953677909436215,
|
|
"learning_rate": 3.532088886237956e-05,
|
|
"loss": 0.3299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17524832487106323,
|
|
"step": 1345,
|
|
"valid_targets_mean": 4110.6,
|
|
"valid_targets_min": 1427
|
|
},
|
|
{
|
|
"epoch": 2.109375,
|
|
"grad_norm": 0.619749119683035,
|
|
"learning_rate": 3.5270688972428696e-05,
|
|
"loss": 0.3324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14120814204216003,
|
|
"step": 1350,
|
|
"valid_targets_mean": 3056.2,
|
|
"valid_targets_min": 925
|
|
},
|
|
{
|
|
"epoch": 2.1171875,
|
|
"grad_norm": 0.6256418100698415,
|
|
"learning_rate": 3.522025731270792e-05,
|
|
"loss": 0.3175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14663541316986084,
|
|
"step": 1355,
|
|
"valid_targets_mean": 4030.4,
|
|
"valid_targets_min": 1465
|
|
},
|
|
{
|
|
"epoch": 2.125,
|
|
"grad_norm": 0.6683507739868575,
|
|
"learning_rate": 3.5169594648640104e-05,
|
|
"loss": 0.3517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12918737530708313,
|
|
"step": 1360,
|
|
"valid_targets_mean": 3775.8,
|
|
"valid_targets_min": 1025
|
|
},
|
|
{
|
|
"epoch": 2.1328125,
|
|
"grad_norm": 0.7815530839077914,
|
|
"learning_rate": 3.511870174915413e-05,
|
|
"loss": 0.3241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1389225721359253,
|
|
"step": 1365,
|
|
"valid_targets_mean": 2968.6,
|
|
"valid_targets_min": 1746
|
|
},
|
|
{
|
|
"epoch": 2.140625,
|
|
"grad_norm": 0.6649323066273028,
|
|
"learning_rate": 3.506757938667327e-05,
|
|
"loss": 0.3422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1816273033618927,
|
|
"step": 1370,
|
|
"valid_targets_mean": 3332.2,
|
|
"valid_targets_min": 959
|
|
},
|
|
{
|
|
"epoch": 2.1484375,
|
|
"grad_norm": 0.5877398437042285,
|
|
"learning_rate": 3.501622833710346e-05,
|
|
"loss": 0.3146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20212605595588684,
|
|
"step": 1375,
|
|
"valid_targets_mean": 5349.1,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 2.15625,
|
|
"grad_norm": 0.7242150083059982,
|
|
"learning_rate": 3.496464937982152e-05,
|
|
"loss": 0.3189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17189019918441772,
|
|
"step": 1380,
|
|
"valid_targets_mean": 2673.2,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 2.1640625,
|
|
"grad_norm": 0.6040493471852524,
|
|
"learning_rate": 3.4912843297663315e-05,
|
|
"loss": 0.3044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16079488396644592,
|
|
"step": 1385,
|
|
"valid_targets_mean": 3757.4,
|
|
"valid_targets_min": 1018
|
|
},
|
|
{
|
|
"epoch": 2.171875,
|
|
"grad_norm": 0.5450508738338139,
|
|
"learning_rate": 3.486081087691188e-05,
|
|
"loss": 0.3612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11166802048683167,
|
|
"step": 1390,
|
|
"valid_targets_mean": 2068.1,
|
|
"valid_targets_min": 1388
|
|
},
|
|
{
|
|
"epoch": 2.1796875,
|
|
"grad_norm": 0.7718999738792057,
|
|
"learning_rate": 3.480855290728551e-05,
|
|
"loss": 0.3534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1735982596874237,
|
|
"step": 1395,
|
|
"valid_targets_mean": 3047.6,
|
|
"valid_targets_min": 1269
|
|
},
|
|
{
|
|
"epoch": 2.1875,
|
|
"grad_norm": 0.5785703682193379,
|
|
"learning_rate": 3.475607018192571e-05,
|
|
"loss": 0.3317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18063984811306,
|
|
"step": 1400,
|
|
"valid_targets_mean": 4543.1,
|
|
"valid_targets_min": 1118
|
|
},
|
|
{
|
|
"epoch": 2.1953125,
|
|
"grad_norm": 0.5666929840293492,
|
|
"learning_rate": 3.4703363497385244e-05,
|
|
"loss": 0.317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13888630270957947,
|
|
"step": 1405,
|
|
"valid_targets_mean": 3694.4,
|
|
"valid_targets_min": 1229
|
|
},
|
|
{
|
|
"epoch": 2.203125,
|
|
"grad_norm": 0.5556113943264658,
|
|
"learning_rate": 3.465043365361596e-05,
|
|
"loss": 0.3234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17539942264556885,
|
|
"step": 1410,
|
|
"valid_targets_mean": 5610.4,
|
|
"valid_targets_min": 1900
|
|
},
|
|
{
|
|
"epoch": 2.2109375,
|
|
"grad_norm": 0.5967922205311086,
|
|
"learning_rate": 3.459728145395671e-05,
|
|
"loss": 0.3149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10626139491796494,
|
|
"step": 1415,
|
|
"valid_targets_mean": 2808.9,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 2.21875,
|
|
"grad_norm": 0.5712775429455358,
|
|
"learning_rate": 3.4543907705121155e-05,
|
|
"loss": 0.3517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12819114327430725,
|
|
"step": 1420,
|
|
"valid_targets_mean": 3401.1,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 2.2265625,
|
|
"grad_norm": 0.6858382051828804,
|
|
"learning_rate": 3.4490313217185454e-05,
|
|
"loss": 0.3096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1498335301876068,
|
|
"step": 1425,
|
|
"valid_targets_mean": 2467.1,
|
|
"valid_targets_min": 968
|
|
},
|
|
{
|
|
"epoch": 2.234375,
|
|
"grad_norm": 0.9338354795579554,
|
|
"learning_rate": 3.443649880357607e-05,
|
|
"loss": 0.317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1729341447353363,
|
|
"step": 1430,
|
|
"valid_targets_mean": 3072.4,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 2.2421875,
|
|
"grad_norm": 0.5072526655685483,
|
|
"learning_rate": 3.438246528105732e-05,
|
|
"loss": 0.3477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10873201489448547,
|
|
"step": 1435,
|
|
"valid_targets_mean": 2813.4,
|
|
"valid_targets_min": 513
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"grad_norm": 0.6210287375190788,
|
|
"learning_rate": 3.4328213469719075e-05,
|
|
"loss": 0.3572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.142804354429245,
|
|
"step": 1440,
|
|
"valid_targets_mean": 3202.1,
|
|
"valid_targets_min": 1400
|
|
},
|
|
{
|
|
"epoch": 2.2578125,
|
|
"grad_norm": 0.6581618708858813,
|
|
"learning_rate": 3.4273744192964247e-05,
|
|
"loss": 0.3268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18490169942378998,
|
|
"step": 1445,
|
|
"valid_targets_mean": 3757.5,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 2.265625,
|
|
"grad_norm": 0.5905604826586393,
|
|
"learning_rate": 3.421905827749631e-05,
|
|
"loss": 0.3674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13184097409248352,
|
|
"step": 1450,
|
|
"valid_targets_mean": 3684.0,
|
|
"valid_targets_min": 1727
|
|
},
|
|
{
|
|
"epoch": 2.2734375,
|
|
"grad_norm": 0.6515249499999463,
|
|
"learning_rate": 3.4164156553306784e-05,
|
|
"loss": 0.3183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18977703154087067,
|
|
"step": 1455,
|
|
"valid_targets_mean": 3409.4,
|
|
"valid_targets_min": 789
|
|
},
|
|
{
|
|
"epoch": 2.28125,
|
|
"grad_norm": 0.6229537649698605,
|
|
"learning_rate": 3.410903985366258e-05,
|
|
"loss": 0.3427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17458570003509521,
|
|
"step": 1460,
|
|
"valid_targets_mean": 4612.0,
|
|
"valid_targets_min": 1600
|
|
},
|
|
{
|
|
"epoch": 2.2890625,
|
|
"grad_norm": 0.6264247015768099,
|
|
"learning_rate": 3.405370901509339e-05,
|
|
"loss": 0.3454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14873284101486206,
|
|
"step": 1465,
|
|
"valid_targets_mean": 3562.2,
|
|
"valid_targets_min": 894
|
|
},
|
|
{
|
|
"epoch": 2.296875,
|
|
"grad_norm": 0.632203588446576,
|
|
"learning_rate": 3.399816487737898e-05,
|
|
"loss": 0.3637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20787769556045532,
|
|
"step": 1470,
|
|
"valid_targets_mean": 3916.5,
|
|
"valid_targets_min": 1501
|
|
},
|
|
{
|
|
"epoch": 2.3046875,
|
|
"grad_norm": 0.6535683029000344,
|
|
"learning_rate": 3.394240828353647e-05,
|
|
"loss": 0.332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1715097874403,
|
|
"step": 1475,
|
|
"valid_targets_mean": 4486.9,
|
|
"valid_targets_min": 1029
|
|
},
|
|
{
|
|
"epoch": 2.3125,
|
|
"grad_norm": 0.7049046001078905,
|
|
"learning_rate": 3.388644007980749e-05,
|
|
"loss": 0.3355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14430192112922668,
|
|
"step": 1480,
|
|
"valid_targets_mean": 2409.2,
|
|
"valid_targets_min": 1322
|
|
},
|
|
{
|
|
"epoch": 2.3203125,
|
|
"grad_norm": 0.576657742022855,
|
|
"learning_rate": 3.3830261115645395e-05,
|
|
"loss": 0.3114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15123814344406128,
|
|
"step": 1485,
|
|
"valid_targets_mean": 3417.6,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 2.328125,
|
|
"grad_norm": 0.6896783621383658,
|
|
"learning_rate": 3.37738722437023e-05,
|
|
"loss": 0.326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15201659500598907,
|
|
"step": 1490,
|
|
"valid_targets_mean": 2912.6,
|
|
"valid_targets_min": 1017
|
|
},
|
|
{
|
|
"epoch": 2.3359375,
|
|
"grad_norm": 0.48626413377589367,
|
|
"learning_rate": 3.371727431981622e-05,
|
|
"loss": 0.332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1445101946592331,
|
|
"step": 1495,
|
|
"valid_targets_mean": 4322.5,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 2.34375,
|
|
"grad_norm": 0.6051702235788214,
|
|
"learning_rate": 3.366046820299802e-05,
|
|
"loss": 0.317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15196427702903748,
|
|
"step": 1500,
|
|
"valid_targets_mean": 3176.1,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 2.3515625,
|
|
"grad_norm": 0.5129801421809865,
|
|
"learning_rate": 3.360345475541839e-05,
|
|
"loss": 0.3072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16199371218681335,
|
|
"step": 1505,
|
|
"valid_targets_mean": 4716.2,
|
|
"valid_targets_min": 1712
|
|
},
|
|
{
|
|
"epoch": 2.359375,
|
|
"grad_norm": 0.7515083551415259,
|
|
"learning_rate": 3.354623484239479e-05,
|
|
"loss": 0.3265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.243810772895813,
|
|
"step": 1510,
|
|
"valid_targets_mean": 5831.4,
|
|
"valid_targets_min": 1928
|
|
},
|
|
{
|
|
"epoch": 2.3671875,
|
|
"grad_norm": 0.6669801993574458,
|
|
"learning_rate": 3.348880933237829e-05,
|
|
"loss": 0.3137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13401362299919128,
|
|
"step": 1515,
|
|
"valid_targets_mean": 2652.4,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 2.375,
|
|
"grad_norm": 0.635454208028576,
|
|
"learning_rate": 3.3431179096940375e-05,
|
|
"loss": 0.3137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16088241338729858,
|
|
"step": 1520,
|
|
"valid_targets_mean": 3273.1,
|
|
"valid_targets_min": 1734
|
|
},
|
|
{
|
|
"epoch": 2.3828125,
|
|
"grad_norm": 0.5794638113950737,
|
|
"learning_rate": 3.337334501075974e-05,
|
|
"loss": 0.334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18412697315216064,
|
|
"step": 1525,
|
|
"valid_targets_mean": 3939.6,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 2.390625,
|
|
"grad_norm": 0.6708765301167766,
|
|
"learning_rate": 3.331530795160903e-05,
|
|
"loss": 0.3342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16259382665157318,
|
|
"step": 1530,
|
|
"valid_targets_mean": 3469.0,
|
|
"valid_targets_min": 1674
|
|
},
|
|
{
|
|
"epoch": 2.3984375,
|
|
"grad_norm": 0.6249318807906089,
|
|
"learning_rate": 3.325706880034149e-05,
|
|
"loss": 0.3077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17496773600578308,
|
|
"step": 1535,
|
|
"valid_targets_mean": 3588.4,
|
|
"valid_targets_min": 859
|
|
},
|
|
{
|
|
"epoch": 2.40625,
|
|
"grad_norm": 0.5829130932307192,
|
|
"learning_rate": 3.319862844087759e-05,
|
|
"loss": 0.317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22975051403045654,
|
|
"step": 1540,
|
|
"valid_targets_mean": 5450.5,
|
|
"valid_targets_min": 1665
|
|
},
|
|
{
|
|
"epoch": 2.4140625,
|
|
"grad_norm": 0.546395310764196,
|
|
"learning_rate": 3.3139987760191615e-05,
|
|
"loss": 0.3377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11468281596899033,
|
|
"step": 1545,
|
|
"valid_targets_mean": 3389.9,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 2.421875,
|
|
"grad_norm": 0.5706679465129194,
|
|
"learning_rate": 3.308114764829824e-05,
|
|
"loss": 0.3023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1333240568637848,
|
|
"step": 1550,
|
|
"valid_targets_mean": 3944.0,
|
|
"valid_targets_min": 1484
|
|
},
|
|
{
|
|
"epoch": 2.4296875,
|
|
"grad_norm": 0.6696622976493322,
|
|
"learning_rate": 3.3022108998238986e-05,
|
|
"loss": 0.3224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19734542071819305,
|
|
"step": 1555,
|
|
"valid_targets_mean": 3280.6,
|
|
"valid_targets_min": 1470
|
|
},
|
|
{
|
|
"epoch": 2.4375,
|
|
"grad_norm": 0.7138645373557458,
|
|
"learning_rate": 3.296287270606865e-05,
|
|
"loss": 0.3227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14296910166740417,
|
|
"step": 1560,
|
|
"valid_targets_mean": 3646.8,
|
|
"valid_targets_min": 1757
|
|
},
|
|
{
|
|
"epoch": 2.4453125,
|
|
"grad_norm": 0.5592791839939918,
|
|
"learning_rate": 3.290343967084176e-05,
|
|
"loss": 0.3313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1571827232837677,
|
|
"step": 1565,
|
|
"valid_targets_mean": 3904.2,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 2.453125,
|
|
"grad_norm": 0.6260302670180717,
|
|
"learning_rate": 3.2843810794598856e-05,
|
|
"loss": 0.3276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1411897838115692,
|
|
"step": 1570,
|
|
"valid_targets_mean": 3063.1,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 2.4609375,
|
|
"grad_norm": 0.6732264810921186,
|
|
"learning_rate": 3.278398698235289e-05,
|
|
"loss": 0.3126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17424802482128143,
|
|
"step": 1575,
|
|
"valid_targets_mean": 2902.1,
|
|
"valid_targets_min": 1449
|
|
},
|
|
{
|
|
"epoch": 2.46875,
|
|
"grad_norm": 0.5796182991871822,
|
|
"learning_rate": 3.27239691420754e-05,
|
|
"loss": 0.3428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19173116981983185,
|
|
"step": 1580,
|
|
"valid_targets_mean": 4411.2,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 2.4765625,
|
|
"grad_norm": 0.6101048368759894,
|
|
"learning_rate": 3.2663758184682804e-05,
|
|
"loss": 0.3183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.183045893907547,
|
|
"step": 1585,
|
|
"valid_targets_mean": 3207.9,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 2.484375,
|
|
"grad_norm": 0.6382933451270865,
|
|
"learning_rate": 3.2603355024022495e-05,
|
|
"loss": 0.3319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11083351075649261,
|
|
"step": 1590,
|
|
"valid_targets_mean": 2813.4,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 2.4921875,
|
|
"grad_norm": 0.646527887331307,
|
|
"learning_rate": 3.254276057685907e-05,
|
|
"loss": 0.3334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16375058889389038,
|
|
"step": 1595,
|
|
"valid_targets_mean": 3297.6,
|
|
"valid_targets_min": 1634
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"grad_norm": 0.6056627444528203,
|
|
"learning_rate": 3.2481975762860325e-05,
|
|
"loss": 0.3443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10950455069541931,
|
|
"step": 1600,
|
|
"valid_targets_mean": 2965.1,
|
|
"valid_targets_min": 1436
|
|
},
|
|
{
|
|
"epoch": 2.5078125,
|
|
"grad_norm": 0.9676664619329729,
|
|
"learning_rate": 3.2421001504583364e-05,
|
|
"loss": 0.3401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17635026574134827,
|
|
"step": 1605,
|
|
"valid_targets_mean": 3991.2,
|
|
"valid_targets_min": 1364
|
|
},
|
|
{
|
|
"epoch": 2.515625,
|
|
"grad_norm": 0.6089794932203908,
|
|
"learning_rate": 3.235983872746054e-05,
|
|
"loss": 0.325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17221324145793915,
|
|
"step": 1610,
|
|
"valid_targets_mean": 3864.9,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 2.5234375,
|
|
"grad_norm": 0.6641916562034885,
|
|
"learning_rate": 3.229848835978544e-05,
|
|
"loss": 0.3257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14394773542881012,
|
|
"step": 1615,
|
|
"valid_targets_mean": 2614.6,
|
|
"valid_targets_min": 855
|
|
},
|
|
{
|
|
"epoch": 2.53125,
|
|
"grad_norm": 0.8104375343085687,
|
|
"learning_rate": 3.2236951332698816e-05,
|
|
"loss": 0.3053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21126845479011536,
|
|
"step": 1620,
|
|
"valid_targets_mean": 2125.9,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 2.5390625,
|
|
"grad_norm": 0.5368721147253929,
|
|
"learning_rate": 3.217522858017442e-05,
|
|
"loss": 0.3237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1449304223060608,
|
|
"step": 1625,
|
|
"valid_targets_mean": 4838.2,
|
|
"valid_targets_min": 1674
|
|
},
|
|
{
|
|
"epoch": 2.546875,
|
|
"grad_norm": 0.7467577715707205,
|
|
"learning_rate": 3.211332103900482e-05,
|
|
"loss": 0.3241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1720157116651535,
|
|
"step": 1630,
|
|
"valid_targets_mean": 2964.9,
|
|
"valid_targets_min": 1622
|
|
},
|
|
{
|
|
"epoch": 2.5546875,
|
|
"grad_norm": 0.5056530302997803,
|
|
"learning_rate": 3.205122964878721e-05,
|
|
"loss": 0.3059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1767379343509674,
|
|
"step": 1635,
|
|
"valid_targets_mean": 5176.6,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 2.5625,
|
|
"grad_norm": 0.5807950880650562,
|
|
"learning_rate": 3.198895535190917e-05,
|
|
"loss": 0.3224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19092446565628052,
|
|
"step": 1640,
|
|
"valid_targets_mean": 4433.1,
|
|
"valid_targets_min": 1387
|
|
},
|
|
{
|
|
"epoch": 2.5703125,
|
|
"grad_norm": 0.683626077647659,
|
|
"learning_rate": 3.192649909353429e-05,
|
|
"loss": 0.3406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18231673538684845,
|
|
"step": 1645,
|
|
"valid_targets_mean": 3225.9,
|
|
"valid_targets_min": 1354
|
|
},
|
|
{
|
|
"epoch": 2.578125,
|
|
"grad_norm": 0.5789348261115961,
|
|
"learning_rate": 3.186386182158788e-05,
|
|
"loss": 0.3346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19874432682991028,
|
|
"step": 1650,
|
|
"valid_targets_mean": 4092.2,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 2.5859375,
|
|
"grad_norm": 0.642047629377951,
|
|
"learning_rate": 3.1801044486742567e-05,
|
|
"loss": 0.3103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16537031531333923,
|
|
"step": 1655,
|
|
"valid_targets_mean": 3350.0,
|
|
"valid_targets_min": 1621
|
|
},
|
|
{
|
|
"epoch": 2.59375,
|
|
"grad_norm": 0.673948323528257,
|
|
"learning_rate": 3.17380480424039e-05,
|
|
"loss": 0.3293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16258078813552856,
|
|
"step": 1660,
|
|
"valid_targets_mean": 3557.4,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 2.6015625,
|
|
"grad_norm": 0.49249741878474795,
|
|
"learning_rate": 3.1674873444695804e-05,
|
|
"loss": 0.3377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17182646691799164,
|
|
"step": 1665,
|
|
"valid_targets_mean": 5618.5,
|
|
"valid_targets_min": 1452
|
|
},
|
|
{
|
|
"epoch": 2.609375,
|
|
"grad_norm": 0.7038918408271834,
|
|
"learning_rate": 3.161152165244614e-05,
|
|
"loss": 0.3466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1885913610458374,
|
|
"step": 1670,
|
|
"valid_targets_mean": 2763.4,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 2.6171875,
|
|
"grad_norm": 0.600261968867076,
|
|
"learning_rate": 3.154799362717213e-05,
|
|
"loss": 0.3325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16389790177345276,
|
|
"step": 1675,
|
|
"valid_targets_mean": 3678.6,
|
|
"valid_targets_min": 1033
|
|
},
|
|
{
|
|
"epoch": 2.625,
|
|
"grad_norm": 0.6739612798519752,
|
|
"learning_rate": 3.1484290333065754e-05,
|
|
"loss": 0.3437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13849405944347382,
|
|
"step": 1680,
|
|
"valid_targets_mean": 2895.6,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 2.6328125,
|
|
"grad_norm": 0.49069182279499296,
|
|
"learning_rate": 3.142041273697911e-05,
|
|
"loss": 0.3421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08869343250989914,
|
|
"step": 1685,
|
|
"valid_targets_mean": 2226.0,
|
|
"valid_targets_min": 1479
|
|
},
|
|
{
|
|
"epoch": 2.640625,
|
|
"grad_norm": 0.6029532377152429,
|
|
"learning_rate": 3.135636180840976e-05,
|
|
"loss": 0.3166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12168297916650772,
|
|
"step": 1690,
|
|
"valid_targets_mean": 2824.8,
|
|
"valid_targets_min": 1386
|
|
},
|
|
{
|
|
"epoch": 2.6484375,
|
|
"grad_norm": 0.9012002558321225,
|
|
"learning_rate": 3.1292138519486013e-05,
|
|
"loss": 0.3528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1271367073059082,
|
|
"step": 1695,
|
|
"valid_targets_mean": 2932.1,
|
|
"valid_targets_min": 1296
|
|
},
|
|
{
|
|
"epoch": 2.65625,
|
|
"grad_norm": 0.5035490757970967,
|
|
"learning_rate": 3.1227743844952164e-05,
|
|
"loss": 0.3316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10457470268011093,
|
|
"step": 1700,
|
|
"valid_targets_mean": 4541.6,
|
|
"valid_targets_min": 1740
|
|
},
|
|
{
|
|
"epoch": 2.6640625,
|
|
"grad_norm": 0.5166678134732465,
|
|
"learning_rate": 3.1163178762153686e-05,
|
|
"loss": 0.3169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17898811399936676,
|
|
"step": 1705,
|
|
"valid_targets_mean": 5124.8,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 2.671875,
|
|
"grad_norm": 0.5608623048043715,
|
|
"learning_rate": 3.109844425102242e-05,
|
|
"loss": 0.3419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24330657720565796,
|
|
"step": 1710,
|
|
"valid_targets_mean": 5777.5,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 2.6796875,
|
|
"grad_norm": 0.6502462538448983,
|
|
"learning_rate": 3.103354129406172e-05,
|
|
"loss": 0.3463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11797823756933212,
|
|
"step": 1715,
|
|
"valid_targets_mean": 2194.9,
|
|
"valid_targets_min": 1127
|
|
},
|
|
{
|
|
"epoch": 2.6875,
|
|
"grad_norm": 0.7192792543546414,
|
|
"learning_rate": 3.0968470876331456e-05,
|
|
"loss": 0.3327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15223903954029083,
|
|
"step": 1720,
|
|
"valid_targets_mean": 3459.0,
|
|
"valid_targets_min": 1339
|
|
},
|
|
{
|
|
"epoch": 2.6953125,
|
|
"grad_norm": 0.6344985348958683,
|
|
"learning_rate": 3.090323398543318e-05,
|
|
"loss": 0.3262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20012539625167847,
|
|
"step": 1725,
|
|
"valid_targets_mean": 4333.8,
|
|
"valid_targets_min": 1364
|
|
},
|
|
{
|
|
"epoch": 2.703125,
|
|
"grad_norm": 0.5406292506639756,
|
|
"learning_rate": 3.0837831611495036e-05,
|
|
"loss": 0.36,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20269277691841125,
|
|
"step": 1730,
|
|
"valid_targets_mean": 5003.2,
|
|
"valid_targets_min": 1339
|
|
},
|
|
{
|
|
"epoch": 2.7109375,
|
|
"grad_norm": 0.6643741215213833,
|
|
"learning_rate": 3.077226474715681e-05,
|
|
"loss": 0.3298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1961633861064911,
|
|
"step": 1735,
|
|
"valid_targets_mean": 3351.9,
|
|
"valid_targets_min": 1238
|
|
},
|
|
{
|
|
"epoch": 2.71875,
|
|
"grad_norm": 0.597800703911311,
|
|
"learning_rate": 3.070653438755479e-05,
|
|
"loss": 0.3451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09221368283033371,
|
|
"step": 1740,
|
|
"valid_targets_mean": 2762.1,
|
|
"valid_targets_min": 1366
|
|
},
|
|
{
|
|
"epoch": 2.7265625,
|
|
"grad_norm": 0.5151838263108344,
|
|
"learning_rate": 3.064064153030673e-05,
|
|
"loss": 0.3085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11551568657159805,
|
|
"step": 1745,
|
|
"valid_targets_mean": 3646.9,
|
|
"valid_targets_min": 1571
|
|
},
|
|
{
|
|
"epoch": 2.734375,
|
|
"grad_norm": 0.6362877953823195,
|
|
"learning_rate": 3.057458717549668e-05,
|
|
"loss": 0.321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1358792930841446,
|
|
"step": 1750,
|
|
"valid_targets_mean": 3000.1,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 2.7421875,
|
|
"grad_norm": 0.5994495941842987,
|
|
"learning_rate": 3.0508372325659805e-05,
|
|
"loss": 0.3221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17006945610046387,
|
|
"step": 1755,
|
|
"valid_targets_mean": 3354.9,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"grad_norm": 0.5804396111853006,
|
|
"learning_rate": 3.0441997985767145e-05,
|
|
"loss": 0.3246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1631045639514923,
|
|
"step": 1760,
|
|
"valid_targets_mean": 4417.5,
|
|
"valid_targets_min": 2325
|
|
},
|
|
{
|
|
"epoch": 2.7578125,
|
|
"grad_norm": 0.647949453619468,
|
|
"learning_rate": 3.0375465163210433e-05,
|
|
"loss": 0.3436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22176723182201385,
|
|
"step": 1765,
|
|
"valid_targets_mean": 4536.2,
|
|
"valid_targets_min": 1103
|
|
},
|
|
{
|
|
"epoch": 2.765625,
|
|
"grad_norm": 0.5967295034601429,
|
|
"learning_rate": 3.030877486778672e-05,
|
|
"loss": 0.311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16908469796180725,
|
|
"step": 1770,
|
|
"valid_targets_mean": 3885.0,
|
|
"valid_targets_min": 846
|
|
},
|
|
{
|
|
"epoch": 2.7734375,
|
|
"grad_norm": 0.6897965642702147,
|
|
"learning_rate": 3.0241928111683126e-05,
|
|
"loss": 0.3254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1709640622138977,
|
|
"step": 1775,
|
|
"valid_targets_mean": 2475.2,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 2.78125,
|
|
"grad_norm": 0.5244448871749552,
|
|
"learning_rate": 3.0174925909461406e-05,
|
|
"loss": 0.308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08593578636646271,
|
|
"step": 1780,
|
|
"valid_targets_mean": 2804.8,
|
|
"valid_targets_min": 1362
|
|
},
|
|
{
|
|
"epoch": 2.7890625,
|
|
"grad_norm": 0.716597713294748,
|
|
"learning_rate": 3.010776927804262e-05,
|
|
"loss": 0.3279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17839771509170532,
|
|
"step": 1785,
|
|
"valid_targets_mean": 2759.2,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 2.796875,
|
|
"grad_norm": 0.5738966089571728,
|
|
"learning_rate": 3.004045923669164e-05,
|
|
"loss": 0.3245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15756843984127045,
|
|
"step": 1790,
|
|
"valid_targets_mean": 3767.9,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 2.8046875,
|
|
"grad_norm": 0.6589835827333673,
|
|
"learning_rate": 2.9972996807001728e-05,
|
|
"loss": 0.315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1216583177447319,
|
|
"step": 1795,
|
|
"valid_targets_mean": 2165.0,
|
|
"valid_targets_min": 1123
|
|
},
|
|
{
|
|
"epoch": 2.8125,
|
|
"grad_norm": 0.4717934220482112,
|
|
"learning_rate": 2.9905383012878994e-05,
|
|
"loss": 0.3171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17367221415042877,
|
|
"step": 1800,
|
|
"valid_targets_mean": 5805.6,
|
|
"valid_targets_min": 2229
|
|
},
|
|
{
|
|
"epoch": 2.8203125,
|
|
"grad_norm": 0.5553107555745015,
|
|
"learning_rate": 2.983761888052687e-05,
|
|
"loss": 0.3306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14174191653728485,
|
|
"step": 1805,
|
|
"valid_targets_mean": 4022.2,
|
|
"valid_targets_min": 1024
|
|
},
|
|
{
|
|
"epoch": 2.828125,
|
|
"grad_norm": 1.5327533173659647,
|
|
"learning_rate": 2.976970543843054e-05,
|
|
"loss": 0.3217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21615874767303467,
|
|
"step": 1810,
|
|
"valid_targets_mean": 2713.8,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 2.8359375,
|
|
"grad_norm": 0.5579466570502067,
|
|
"learning_rate": 2.9701643717341335e-05,
|
|
"loss": 0.319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09078259021043777,
|
|
"step": 1815,
|
|
"valid_targets_mean": 2259.1,
|
|
"valid_targets_min": 352
|
|
},
|
|
{
|
|
"epoch": 2.84375,
|
|
"grad_norm": 0.5465848901294869,
|
|
"learning_rate": 2.963343475026107e-05,
|
|
"loss": 0.3285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1586659550666809,
|
|
"step": 1820,
|
|
"valid_targets_mean": 3825.5,
|
|
"valid_targets_min": 914
|
|
},
|
|
{
|
|
"epoch": 2.8515625,
|
|
"grad_norm": 0.6439916459247743,
|
|
"learning_rate": 2.956507957242637e-05,
|
|
"loss": 0.3264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1448800265789032,
|
|
"step": 1825,
|
|
"valid_targets_mean": 3523.6,
|
|
"valid_targets_min": 1108
|
|
},
|
|
{
|
|
"epoch": 2.859375,
|
|
"grad_norm": 0.6447381061485634,
|
|
"learning_rate": 2.9496579221292966e-05,
|
|
"loss": 0.3321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16045837104320526,
|
|
"step": 1830,
|
|
"valid_targets_mean": 3337.9,
|
|
"valid_targets_min": 1536
|
|
},
|
|
{
|
|
"epoch": 2.8671875,
|
|
"grad_norm": 0.6740262409841432,
|
|
"learning_rate": 2.9427934736519962e-05,
|
|
"loss": 0.3061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12142470479011536,
|
|
"step": 1835,
|
|
"valid_targets_mean": 2021.1,
|
|
"valid_targets_min": 1319
|
|
},
|
|
{
|
|
"epoch": 2.875,
|
|
"grad_norm": 0.58169142722094,
|
|
"learning_rate": 2.935914715995401e-05,
|
|
"loss": 0.3408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19773763418197632,
|
|
"step": 1840,
|
|
"valid_targets_mean": 4013.5,
|
|
"valid_targets_min": 1942
|
|
},
|
|
{
|
|
"epoch": 2.8828125,
|
|
"grad_norm": 0.5547979457571613,
|
|
"learning_rate": 2.9290217535613555e-05,
|
|
"loss": 0.3277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15622423589229584,
|
|
"step": 1845,
|
|
"valid_targets_mean": 3848.1,
|
|
"valid_targets_min": 849
|
|
},
|
|
{
|
|
"epoch": 2.890625,
|
|
"grad_norm": 0.7161665150127227,
|
|
"learning_rate": 2.9221146909672954e-05,
|
|
"loss": 0.311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17908824980258942,
|
|
"step": 1850,
|
|
"valid_targets_mean": 2929.6,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 2.8984375,
|
|
"grad_norm": 0.6316980679914165,
|
|
"learning_rate": 2.9151936330446594e-05,
|
|
"loss": 0.3331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1929895579814911,
|
|
"step": 1855,
|
|
"valid_targets_mean": 3928.6,
|
|
"valid_targets_min": 1086
|
|
},
|
|
{
|
|
"epoch": 2.90625,
|
|
"grad_norm": 0.5729807172811694,
|
|
"learning_rate": 2.9082586848373008e-05,
|
|
"loss": 0.3144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2174421101808548,
|
|
"step": 1860,
|
|
"valid_targets_mean": 5100.0,
|
|
"valid_targets_min": 1541
|
|
},
|
|
{
|
|
"epoch": 2.9140625,
|
|
"grad_norm": 0.5989757638683435,
|
|
"learning_rate": 2.901309951599891e-05,
|
|
"loss": 0.3365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15374526381492615,
|
|
"step": 1865,
|
|
"valid_targets_mean": 3899.8,
|
|
"valid_targets_min": 1597
|
|
},
|
|
{
|
|
"epoch": 2.921875,
|
|
"grad_norm": 0.6795678217469556,
|
|
"learning_rate": 2.894347538796322e-05,
|
|
"loss": 0.3507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20630629360675812,
|
|
"step": 1870,
|
|
"valid_targets_mean": 3545.6,
|
|
"valid_targets_min": 1226
|
|
},
|
|
{
|
|
"epoch": 2.9296875,
|
|
"grad_norm": 0.5475571896273654,
|
|
"learning_rate": 2.8873715520981077e-05,
|
|
"loss": 0.3224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16184917092323303,
|
|
"step": 1875,
|
|
"valid_targets_mean": 5756.2,
|
|
"valid_targets_min": 1146
|
|
},
|
|
{
|
|
"epoch": 2.9375,
|
|
"grad_norm": 0.6921680410026269,
|
|
"learning_rate": 2.8803820973827784e-05,
|
|
"loss": 0.3108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15202295780181885,
|
|
"step": 1880,
|
|
"valid_targets_mean": 2703.4,
|
|
"valid_targets_min": 1079
|
|
},
|
|
{
|
|
"epoch": 2.9453125,
|
|
"grad_norm": 0.5528089338481788,
|
|
"learning_rate": 2.873379280732274e-05,
|
|
"loss": 0.297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11657953262329102,
|
|
"step": 1885,
|
|
"valid_targets_mean": 3178.9,
|
|
"valid_targets_min": 1299
|
|
},
|
|
{
|
|
"epoch": 2.953125,
|
|
"grad_norm": 0.5526274542586725,
|
|
"learning_rate": 2.8663632084313343e-05,
|
|
"loss": 0.3158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23053047060966492,
|
|
"step": 1890,
|
|
"valid_targets_mean": 5161.9,
|
|
"valid_targets_min": 1430
|
|
},
|
|
{
|
|
"epoch": 2.9609375,
|
|
"grad_norm": 0.6381471378582848,
|
|
"learning_rate": 2.859333986965885e-05,
|
|
"loss": 0.3135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16156978905200958,
|
|
"step": 1895,
|
|
"valid_targets_mean": 3425.2,
|
|
"valid_targets_min": 1464
|
|
},
|
|
{
|
|
"epoch": 2.96875,
|
|
"grad_norm": 0.4908844933974295,
|
|
"learning_rate": 2.852291723021424e-05,
|
|
"loss": 0.3039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12322995811700821,
|
|
"step": 1900,
|
|
"valid_targets_mean": 4508.8,
|
|
"valid_targets_min": 779
|
|
},
|
|
{
|
|
"epoch": 2.9765625,
|
|
"grad_norm": 0.4907823995529347,
|
|
"learning_rate": 2.8452365234813992e-05,
|
|
"loss": 0.3309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12578988075256348,
|
|
"step": 1905,
|
|
"valid_targets_mean": 4282.9,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 2.984375,
|
|
"grad_norm": 0.6582904576469492,
|
|
"learning_rate": 2.838168495425588e-05,
|
|
"loss": 0.3362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.143172949552536,
|
|
"step": 1910,
|
|
"valid_targets_mean": 3072.1,
|
|
"valid_targets_min": 1492
|
|
},
|
|
{
|
|
"epoch": 2.9921875,
|
|
"grad_norm": 0.630002392911482,
|
|
"learning_rate": 2.8310877461284708e-05,
|
|
"loss": 0.3035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12922264635562897,
|
|
"step": 1915,
|
|
"valid_targets_mean": 2682.5,
|
|
"valid_targets_min": 1309
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 0.5694448235390295,
|
|
"learning_rate": 2.8239943830576054e-05,
|
|
"loss": 0.3426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22056590020656586,
|
|
"step": 1920,
|
|
"valid_targets_mean": 4795.1,
|
|
"valid_targets_min": 885
|
|
},
|
|
{
|
|
"epoch": 3.0078125,
|
|
"grad_norm": 0.6160389856102886,
|
|
"learning_rate": 2.8168885138719927e-05,
|
|
"loss": 0.3207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10456937551498413,
|
|
"step": 1925,
|
|
"valid_targets_mean": 2244.5,
|
|
"valid_targets_min": 1043
|
|
},
|
|
{
|
|
"epoch": 3.015625,
|
|
"grad_norm": 0.5928032623259631,
|
|
"learning_rate": 2.8097702464204446e-05,
|
|
"loss": 0.3076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21220722794532776,
|
|
"step": 1930,
|
|
"valid_targets_mean": 5180.4,
|
|
"valid_targets_min": 1521
|
|
},
|
|
{
|
|
"epoch": 3.0234375,
|
|
"grad_norm": 0.6591017617248139,
|
|
"learning_rate": 2.802639688739948e-05,
|
|
"loss": 0.3204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20998281240463257,
|
|
"step": 1935,
|
|
"valid_targets_mean": 4255.0,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 3.03125,
|
|
"grad_norm": 0.6936553021967344,
|
|
"learning_rate": 2.7954969490540223e-05,
|
|
"loss": 0.3072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13693615794181824,
|
|
"step": 1940,
|
|
"valid_targets_mean": 3575.9,
|
|
"valid_targets_min": 1106
|
|
},
|
|
{
|
|
"epoch": 3.0390625,
|
|
"grad_norm": 0.675924189325635,
|
|
"learning_rate": 2.788342135771079e-05,
|
|
"loss": 0.2994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11398326605558395,
|
|
"step": 1945,
|
|
"valid_targets_mean": 2261.4,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 3.046875,
|
|
"grad_norm": 0.5639031861582785,
|
|
"learning_rate": 2.7811753574827754e-05,
|
|
"loss": 0.2907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13011200726032257,
|
|
"step": 1950,
|
|
"valid_targets_mean": 3783.2,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 3.0546875,
|
|
"grad_norm": 0.7113340649341242,
|
|
"learning_rate": 2.7739967229623675e-05,
|
|
"loss": 0.3119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14893494546413422,
|
|
"step": 1955,
|
|
"valid_targets_mean": 3085.9,
|
|
"valid_targets_min": 1459
|
|
},
|
|
{
|
|
"epoch": 3.0625,
|
|
"grad_norm": 0.6480070304386037,
|
|
"learning_rate": 2.7668063411630574e-05,
|
|
"loss": 0.3109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10932555049657822,
|
|
"step": 1960,
|
|
"valid_targets_mean": 2932.0,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 3.0703125,
|
|
"grad_norm": 0.47454217821502054,
|
|
"learning_rate": 2.7596043212163426e-05,
|
|
"loss": 0.3279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11920058727264404,
|
|
"step": 1965,
|
|
"valid_targets_mean": 4721.1,
|
|
"valid_targets_min": 1217
|
|
},
|
|
{
|
|
"epoch": 3.078125,
|
|
"grad_norm": 0.767747287689,
|
|
"learning_rate": 2.7523907724303547e-05,
|
|
"loss": 0.2944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14564688503742218,
|
|
"step": 1970,
|
|
"valid_targets_mean": 3557.8,
|
|
"valid_targets_min": 823
|
|
},
|
|
{
|
|
"epoch": 3.0859375,
|
|
"grad_norm": 0.5195833209779028,
|
|
"learning_rate": 2.745165804288206e-05,
|
|
"loss": 0.2849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10540787875652313,
|
|
"step": 1975,
|
|
"valid_targets_mean": 3687.5,
|
|
"valid_targets_min": 1684
|
|
},
|
|
{
|
|
"epoch": 3.09375,
|
|
"grad_norm": 0.6645504798358579,
|
|
"learning_rate": 2.737929526446325e-05,
|
|
"loss": 0.3276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13857142627239227,
|
|
"step": 1980,
|
|
"valid_targets_mean": 3110.9,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 3.1015625,
|
|
"grad_norm": 0.7259155107906596,
|
|
"learning_rate": 2.7306820487327906e-05,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11406374722719193,
|
|
"step": 1985,
|
|
"valid_targets_mean": 3601.0,
|
|
"valid_targets_min": 1426
|
|
},
|
|
{
|
|
"epoch": 3.109375,
|
|
"grad_norm": 0.5739073711092522,
|
|
"learning_rate": 2.7234234811456683e-05,
|
|
"loss": 0.2967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12927541136741638,
|
|
"step": 1990,
|
|
"valid_targets_mean": 3216.9,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 3.1171875,
|
|
"grad_norm": 0.5993495037814198,
|
|
"learning_rate": 2.716153933851339e-05,
|
|
"loss": 0.2946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14130699634552002,
|
|
"step": 1995,
|
|
"valid_targets_mean": 3873.1,
|
|
"valid_targets_min": 2448
|
|
},
|
|
{
|
|
"epoch": 3.125,
|
|
"grad_norm": 0.5421461984371212,
|
|
"learning_rate": 2.7088735171828283e-05,
|
|
"loss": 0.298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13294240832328796,
|
|
"step": 2000,
|
|
"valid_targets_mean": 4413.6,
|
|
"valid_targets_min": 1057
|
|
},
|
|
{
|
|
"epoch": 3.1328125,
|
|
"grad_norm": 0.759250994474141,
|
|
"learning_rate": 2.701582341638129e-05,
|
|
"loss": 0.2987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10335798561573029,
|
|
"step": 2005,
|
|
"valid_targets_mean": 1814.2,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 3.140625,
|
|
"grad_norm": 0.6656690714806082,
|
|
"learning_rate": 2.694280517878528e-05,
|
|
"loss": 0.3041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14705723524093628,
|
|
"step": 2010,
|
|
"valid_targets_mean": 2965.1,
|
|
"valid_targets_min": 1501
|
|
},
|
|
{
|
|
"epoch": 3.1484375,
|
|
"grad_norm": 0.589758799901099,
|
|
"learning_rate": 2.6869681567269223e-05,
|
|
"loss": 0.3086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17932362854480743,
|
|
"step": 2015,
|
|
"valid_targets_mean": 4419.1,
|
|
"valid_targets_min": 1753
|
|
},
|
|
{
|
|
"epoch": 3.15625,
|
|
"grad_norm": 0.7481235452908217,
|
|
"learning_rate": 2.679645369166142e-05,
|
|
"loss": 0.3086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21365806460380554,
|
|
"step": 2020,
|
|
"valid_targets_mean": 3001.9,
|
|
"valid_targets_min": 1073
|
|
},
|
|
{
|
|
"epoch": 3.1640625,
|
|
"grad_norm": 0.7053922167095342,
|
|
"learning_rate": 2.672312266337262e-05,
|
|
"loss": 0.2914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15979883074760437,
|
|
"step": 2025,
|
|
"valid_targets_mean": 3375.0,
|
|
"valid_targets_min": 1405
|
|
},
|
|
{
|
|
"epoch": 3.171875,
|
|
"grad_norm": 0.6445935246330247,
|
|
"learning_rate": 2.664968959537916e-05,
|
|
"loss": 0.298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09808365255594254,
|
|
"step": 2030,
|
|
"valid_targets_mean": 2087.6,
|
|
"valid_targets_min": 303
|
|
},
|
|
{
|
|
"epoch": 3.1796875,
|
|
"grad_norm": 0.6172193112768782,
|
|
"learning_rate": 2.6576155602206082e-05,
|
|
"loss": 0.3232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09721830487251282,
|
|
"step": 2035,
|
|
"valid_targets_mean": 2242.9,
|
|
"valid_targets_min": 476
|
|
},
|
|
{
|
|
"epoch": 3.1875,
|
|
"grad_norm": 0.8976057669847863,
|
|
"learning_rate": 2.650252179991022e-05,
|
|
"loss": 0.2913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1410972774028778,
|
|
"step": 2040,
|
|
"valid_targets_mean": 2635.4,
|
|
"valid_targets_min": 1089
|
|
},
|
|
{
|
|
"epoch": 3.1953125,
|
|
"grad_norm": 0.6272165809514652,
|
|
"learning_rate": 2.6428789306063233e-05,
|
|
"loss": 0.3107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1263716071844101,
|
|
"step": 2045,
|
|
"valid_targets_mean": 3487.1,
|
|
"valid_targets_min": 1225
|
|
},
|
|
{
|
|
"epoch": 3.203125,
|
|
"grad_norm": 0.6306514175683192,
|
|
"learning_rate": 2.6354959239734694e-05,
|
|
"loss": 0.3072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10337403416633606,
|
|
"step": 2050,
|
|
"valid_targets_mean": 2560.8,
|
|
"valid_targets_min": 1478
|
|
},
|
|
{
|
|
"epoch": 3.2109375,
|
|
"grad_norm": 0.7375465045058122,
|
|
"learning_rate": 2.6281032721475047e-05,
|
|
"loss": 0.2932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1513032764196396,
|
|
"step": 2055,
|
|
"valid_targets_mean": 2934.6,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 3.21875,
|
|
"grad_norm": 3.052263668174963,
|
|
"learning_rate": 2.620701087329864e-05,
|
|
"loss": 0.3008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1458505094051361,
|
|
"step": 2060,
|
|
"valid_targets_mean": 3097.1,
|
|
"valid_targets_min": 1725
|
|
},
|
|
{
|
|
"epoch": 3.2265625,
|
|
"grad_norm": 0.5883310431366956,
|
|
"learning_rate": 2.613289481866669e-05,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.118716299533844,
|
|
"step": 2065,
|
|
"valid_targets_mean": 2794.1,
|
|
"valid_targets_min": 1057
|
|
},
|
|
{
|
|
"epoch": 3.234375,
|
|
"grad_norm": 0.5507898481285975,
|
|
"learning_rate": 2.605868568247021e-05,
|
|
"loss": 0.3289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15194180607795715,
|
|
"step": 2070,
|
|
"valid_targets_mean": 4891.9,
|
|
"valid_targets_min": 1413
|
|
},
|
|
{
|
|
"epoch": 3.2421875,
|
|
"grad_norm": 0.6648504285441111,
|
|
"learning_rate": 2.5984384591012978e-05,
|
|
"loss": 0.3081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15788394212722778,
|
|
"step": 2075,
|
|
"valid_targets_mean": 2854.6,
|
|
"valid_targets_min": 1878
|
|
},
|
|
{
|
|
"epoch": 3.25,
|
|
"grad_norm": 0.6262748256031877,
|
|
"learning_rate": 2.590999267199438e-05,
|
|
"loss": 0.3342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3013624846935272,
|
|
"step": 2080,
|
|
"valid_targets_mean": 5331.0,
|
|
"valid_targets_min": 1146
|
|
},
|
|
{
|
|
"epoch": 3.2578125,
|
|
"grad_norm": 0.652956991120199,
|
|
"learning_rate": 2.5835511054492357e-05,
|
|
"loss": 0.2996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18528978526592255,
|
|
"step": 2085,
|
|
"valid_targets_mean": 4064.2,
|
|
"valid_targets_min": 1883
|
|
},
|
|
{
|
|
"epoch": 3.265625,
|
|
"grad_norm": 0.5491589891812001,
|
|
"learning_rate": 2.5760940868946237e-05,
|
|
"loss": 0.3011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12732404470443726,
|
|
"step": 2090,
|
|
"valid_targets_mean": 4300.2,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 3.2734375,
|
|
"grad_norm": 0.7232429552309644,
|
|
"learning_rate": 2.5686283247139584e-05,
|
|
"loss": 0.3122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19467595219612122,
|
|
"step": 2095,
|
|
"valid_targets_mean": 3053.5,
|
|
"valid_targets_min": 1348
|
|
},
|
|
{
|
|
"epoch": 3.28125,
|
|
"grad_norm": 0.6393820775525721,
|
|
"learning_rate": 2.561153932218301e-05,
|
|
"loss": 0.2927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13762244582176208,
|
|
"step": 2100,
|
|
"valid_targets_mean": 3247.1,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 3.2890625,
|
|
"grad_norm": 0.5673477885653916,
|
|
"learning_rate": 2.5536710228496986e-05,
|
|
"loss": 0.3078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17992708086967468,
|
|
"step": 2105,
|
|
"valid_targets_mean": 5358.4,
|
|
"valid_targets_min": 1578
|
|
},
|
|
{
|
|
"epoch": 3.296875,
|
|
"grad_norm": 0.5981796019070807,
|
|
"learning_rate": 2.5461797101794654e-05,
|
|
"loss": 0.311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18709968030452728,
|
|
"step": 2110,
|
|
"valid_targets_mean": 4243.1,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 3.3046875,
|
|
"grad_norm": 0.6590902843870795,
|
|
"learning_rate": 2.5386801079064527e-05,
|
|
"loss": 0.3056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13530650734901428,
|
|
"step": 2115,
|
|
"valid_targets_mean": 2852.2,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 3.3125,
|
|
"grad_norm": 0.4850334051271829,
|
|
"learning_rate": 2.531172329855327e-05,
|
|
"loss": 0.317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1200372651219368,
|
|
"step": 2120,
|
|
"valid_targets_mean": 5093.2,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 3.3203125,
|
|
"grad_norm": 0.6058569376597182,
|
|
"learning_rate": 2.5236564899748442e-05,
|
|
"loss": 0.3108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19731497764587402,
|
|
"step": 2125,
|
|
"valid_targets_mean": 5651.0,
|
|
"valid_targets_min": 965
|
|
},
|
|
{
|
|
"epoch": 3.328125,
|
|
"grad_norm": 0.5739142781117275,
|
|
"learning_rate": 2.5161327023361172e-05,
|
|
"loss": 0.2871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14834779500961304,
|
|
"step": 2130,
|
|
"valid_targets_mean": 4900.9,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 3.3359375,
|
|
"grad_norm": 0.528502714216959,
|
|
"learning_rate": 2.5086010811308835e-05,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12762004137039185,
|
|
"step": 2135,
|
|
"valid_targets_mean": 3611.9,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 3.34375,
|
|
"grad_norm": 0.5531885503177704,
|
|
"learning_rate": 2.5010617406697768e-05,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1568545401096344,
|
|
"step": 2140,
|
|
"valid_targets_mean": 5449.2,
|
|
"valid_targets_min": 1634
|
|
},
|
|
{
|
|
"epoch": 3.3515625,
|
|
"grad_norm": 0.6243590525481849,
|
|
"learning_rate": 2.493514795380587e-05,
|
|
"loss": 0.3089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18997368216514587,
|
|
"step": 2145,
|
|
"valid_targets_mean": 4093.1,
|
|
"valid_targets_min": 748
|
|
},
|
|
{
|
|
"epoch": 3.359375,
|
|
"grad_norm": 0.5120626703616974,
|
|
"learning_rate": 2.485960359806528e-05,
|
|
"loss": 0.3185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21305415034294128,
|
|
"step": 2150,
|
|
"valid_targets_mean": 6523.2,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 3.3671875,
|
|
"grad_norm": 0.5701088417614197,
|
|
"learning_rate": 2.4783985486044945e-05,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17381185293197632,
|
|
"step": 2155,
|
|
"valid_targets_mean": 5227.0,
|
|
"valid_targets_min": 1437
|
|
},
|
|
{
|
|
"epoch": 3.375,
|
|
"grad_norm": 0.6285976111108152,
|
|
"learning_rate": 2.470829476543325e-05,
|
|
"loss": 0.2886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1389385163784027,
|
|
"step": 2160,
|
|
"valid_targets_mean": 3117.2,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 3.3828125,
|
|
"grad_norm": 0.6149192922656284,
|
|
"learning_rate": 2.463253258502061e-05,
|
|
"loss": 0.2781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09437514841556549,
|
|
"step": 2165,
|
|
"valid_targets_mean": 2412.6,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 3.390625,
|
|
"grad_norm": 0.7517637846733674,
|
|
"learning_rate": 2.4556700094681988e-05,
|
|
"loss": 0.3099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10935580730438232,
|
|
"step": 2170,
|
|
"valid_targets_mean": 2975.1,
|
|
"valid_targets_min": 992
|
|
},
|
|
{
|
|
"epoch": 3.3984375,
|
|
"grad_norm": 0.9523939185084906,
|
|
"learning_rate": 2.4480798445359494e-05,
|
|
"loss": 0.3225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17592230439186096,
|
|
"step": 2175,
|
|
"valid_targets_mean": 3557.8,
|
|
"valid_targets_min": 925
|
|
},
|
|
{
|
|
"epoch": 3.40625,
|
|
"grad_norm": 0.6778601536533837,
|
|
"learning_rate": 2.4404828789044876e-05,
|
|
"loss": 0.3131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16050031781196594,
|
|
"step": 2180,
|
|
"valid_targets_mean": 3780.8,
|
|
"valid_targets_min": 1167
|
|
},
|
|
{
|
|
"epoch": 3.4140625,
|
|
"grad_norm": 0.654837246525572,
|
|
"learning_rate": 2.4328792278762058e-05,
|
|
"loss": 0.3108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16276603937149048,
|
|
"step": 2185,
|
|
"valid_targets_mean": 4171.8,
|
|
"valid_targets_min": 1659
|
|
},
|
|
{
|
|
"epoch": 3.421875,
|
|
"grad_norm": 0.6269752016595972,
|
|
"learning_rate": 2.425269006854965e-05,
|
|
"loss": 0.3113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1389651894569397,
|
|
"step": 2190,
|
|
"valid_targets_mean": 3272.4,
|
|
"valid_targets_min": 1104
|
|
},
|
|
{
|
|
"epoch": 3.4296875,
|
|
"grad_norm": 0.77918218266119,
|
|
"learning_rate": 2.41765233134434e-05,
|
|
"loss": 0.295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1734730303287506,
|
|
"step": 2195,
|
|
"valid_targets_mean": 2401.0,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 3.4375,
|
|
"grad_norm": 0.5532270772347552,
|
|
"learning_rate": 2.4100293169458687e-05,
|
|
"loss": 0.2951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18668201565742493,
|
|
"step": 2200,
|
|
"valid_targets_mean": 5710.4,
|
|
"valid_targets_min": 1634
|
|
},
|
|
{
|
|
"epoch": 3.4453125,
|
|
"grad_norm": 0.5572170862419372,
|
|
"learning_rate": 2.402400079357297e-05,
|
|
"loss": 0.3169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1718933880329132,
|
|
"step": 2205,
|
|
"valid_targets_mean": 5388.5,
|
|
"valid_targets_min": 965
|
|
},
|
|
{
|
|
"epoch": 3.453125,
|
|
"grad_norm": 0.6172989838410633,
|
|
"learning_rate": 2.3947647343708226e-05,
|
|
"loss": 0.32,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1435641646385193,
|
|
"step": 2210,
|
|
"valid_targets_mean": 3299.0,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 3.4609375,
|
|
"grad_norm": 0.6340702282176685,
|
|
"learning_rate": 2.3871233978713387e-05,
|
|
"loss": 0.2992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1372242420911789,
|
|
"step": 2215,
|
|
"valid_targets_mean": 2773.2,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 3.46875,
|
|
"grad_norm": 0.6507409735566828,
|
|
"learning_rate": 2.379476185834673e-05,
|
|
"loss": 0.3028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17228344082832336,
|
|
"step": 2220,
|
|
"valid_targets_mean": 3965.1,
|
|
"valid_targets_min": 2041
|
|
},
|
|
{
|
|
"epoch": 3.4765625,
|
|
"grad_norm": 0.6303609207547352,
|
|
"learning_rate": 2.3718232143258296e-05,
|
|
"loss": 0.2865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12796024978160858,
|
|
"step": 2225,
|
|
"valid_targets_mean": 2529.6,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 3.484375,
|
|
"grad_norm": 0.5934886986170207,
|
|
"learning_rate": 2.364164599497226e-05,
|
|
"loss": 0.3096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17317438125610352,
|
|
"step": 2230,
|
|
"valid_targets_mean": 4482.5,
|
|
"valid_targets_min": 1196
|
|
},
|
|
{
|
|
"epoch": 3.4921875,
|
|
"grad_norm": 0.5563019352754568,
|
|
"learning_rate": 2.3565004575869318e-05,
|
|
"loss": 0.3167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1666184514760971,
|
|
"step": 2235,
|
|
"valid_targets_mean": 3879.6,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 3.5,
|
|
"grad_norm": 0.6794936713037374,
|
|
"learning_rate": 2.348830904916902e-05,
|
|
"loss": 0.2834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.188237264752388,
|
|
"step": 2240,
|
|
"valid_targets_mean": 4566.5,
|
|
"valid_targets_min": 1545
|
|
},
|
|
{
|
|
"epoch": 3.5078125,
|
|
"grad_norm": 0.4597489180613699,
|
|
"learning_rate": 2.3411560578912137e-05,
|
|
"loss": 0.2982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13482022285461426,
|
|
"step": 2245,
|
|
"valid_targets_mean": 5636.0,
|
|
"valid_targets_min": 1446
|
|
},
|
|
{
|
|
"epoch": 3.515625,
|
|
"grad_norm": 0.5357468878702384,
|
|
"learning_rate": 2.3334760329942992e-05,
|
|
"loss": 0.2933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09022724628448486,
|
|
"step": 2250,
|
|
"valid_targets_mean": 2963.6,
|
|
"valid_targets_min": 1181
|
|
},
|
|
{
|
|
"epoch": 3.5234375,
|
|
"grad_norm": 0.7404351726371096,
|
|
"learning_rate": 2.325790946789178e-05,
|
|
"loss": 0.3071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15003375709056854,
|
|
"step": 2255,
|
|
"valid_targets_mean": 2312.1,
|
|
"valid_targets_min": 1180
|
|
},
|
|
{
|
|
"epoch": 3.53125,
|
|
"grad_norm": 0.5074848491782636,
|
|
"learning_rate": 2.3181009159156845e-05,
|
|
"loss": 0.2901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13939183950424194,
|
|
"step": 2260,
|
|
"valid_targets_mean": 4683.2,
|
|
"valid_targets_min": 882
|
|
},
|
|
{
|
|
"epoch": 3.5390625,
|
|
"grad_norm": 0.42656977165008775,
|
|
"learning_rate": 2.3104060570887032e-05,
|
|
"loss": 0.2892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15056177973747253,
|
|
"step": 2265,
|
|
"valid_targets_mean": 7040.1,
|
|
"valid_targets_min": 1355
|
|
},
|
|
{
|
|
"epoch": 3.546875,
|
|
"grad_norm": 0.7725386564640964,
|
|
"learning_rate": 2.302706487096394e-05,
|
|
"loss": 0.3066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16732686758041382,
|
|
"step": 2270,
|
|
"valid_targets_mean": 2709.8,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 3.5546875,
|
|
"grad_norm": 0.6861527226321527,
|
|
"learning_rate": 2.2950023227984195e-05,
|
|
"loss": 0.2824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15176402032375336,
|
|
"step": 2275,
|
|
"valid_targets_mean": 3130.6,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 3.5625,
|
|
"grad_norm": 0.6869860958715793,
|
|
"learning_rate": 2.287293681124172e-05,
|
|
"loss": 0.302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1377551555633545,
|
|
"step": 2280,
|
|
"valid_targets_mean": 2595.4,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 3.5703125,
|
|
"grad_norm": 0.620841594759895,
|
|
"learning_rate": 2.2795806790709992e-05,
|
|
"loss": 0.3109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13764545321464539,
|
|
"step": 2285,
|
|
"valid_targets_mean": 3837.2,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 3.578125,
|
|
"grad_norm": 0.6225302432006105,
|
|
"learning_rate": 2.2718634337024285e-05,
|
|
"loss": 0.3178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17910321056842804,
|
|
"step": 2290,
|
|
"valid_targets_mean": 3730.6,
|
|
"valid_targets_min": 1361
|
|
},
|
|
{
|
|
"epoch": 3.5859375,
|
|
"grad_norm": 0.6087823045765182,
|
|
"learning_rate": 2.26414206214639e-05,
|
|
"loss": 0.3006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14104045927524567,
|
|
"step": 2295,
|
|
"valid_targets_mean": 3819.4,
|
|
"valid_targets_min": 1313
|
|
},
|
|
{
|
|
"epoch": 3.59375,
|
|
"grad_norm": 0.5350469743901212,
|
|
"learning_rate": 2.2564166815934367e-05,
|
|
"loss": 0.3187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10280948877334595,
|
|
"step": 2300,
|
|
"valid_targets_mean": 3424.1,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 3.6015625,
|
|
"grad_norm": 0.5859292161216156,
|
|
"learning_rate": 2.2486874092949708e-05,
|
|
"loss": 0.3075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1024356558918953,
|
|
"step": 2305,
|
|
"valid_targets_mean": 3320.0,
|
|
"valid_targets_min": 1455
|
|
},
|
|
{
|
|
"epoch": 3.609375,
|
|
"grad_norm": 0.6050887966025194,
|
|
"learning_rate": 2.240954362561459e-05,
|
|
"loss": 0.3085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13312239944934845,
|
|
"step": 2310,
|
|
"valid_targets_mean": 2872.4,
|
|
"valid_targets_min": 1539
|
|
},
|
|
{
|
|
"epoch": 3.6171875,
|
|
"grad_norm": 0.5168349409407551,
|
|
"learning_rate": 2.2332176587606553e-05,
|
|
"loss": 0.3006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1483728587627411,
|
|
"step": 2315,
|
|
"valid_targets_mean": 3496.9,
|
|
"valid_targets_min": 871
|
|
},
|
|
{
|
|
"epoch": 3.625,
|
|
"grad_norm": 0.6209016725552876,
|
|
"learning_rate": 2.2254774153158185e-05,
|
|
"loss": 0.29,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1664799153804779,
|
|
"step": 2320,
|
|
"valid_targets_mean": 3507.4,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 3.6328125,
|
|
"grad_norm": 0.7192558326410591,
|
|
"learning_rate": 2.2177337497039292e-05,
|
|
"loss": 0.2923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20758378505706787,
|
|
"step": 2325,
|
|
"valid_targets_mean": 3642.1,
|
|
"valid_targets_min": 953
|
|
},
|
|
{
|
|
"epoch": 3.640625,
|
|
"grad_norm": 0.7116559754155051,
|
|
"learning_rate": 2.2099867794539095e-05,
|
|
"loss": 0.298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1880105435848236,
|
|
"step": 2330,
|
|
"valid_targets_mean": 3737.8,
|
|
"valid_targets_min": 1496
|
|
},
|
|
{
|
|
"epoch": 3.6484375,
|
|
"grad_norm": 0.513835017815762,
|
|
"learning_rate": 2.2022366221448346e-05,
|
|
"loss": 0.2945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16050559282302856,
|
|
"step": 2335,
|
|
"valid_targets_mean": 5300.4,
|
|
"valid_targets_min": 1786
|
|
},
|
|
{
|
|
"epoch": 3.65625,
|
|
"grad_norm": 0.4995853566802839,
|
|
"learning_rate": 2.194483395404154e-05,
|
|
"loss": 0.3137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12402613461017609,
|
|
"step": 2340,
|
|
"valid_targets_mean": 5121.2,
|
|
"valid_targets_min": 2410
|
|
},
|
|
{
|
|
"epoch": 3.6640625,
|
|
"grad_norm": 0.7280160406360658,
|
|
"learning_rate": 2.1867272169059014e-05,
|
|
"loss": 0.2992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15356336534023285,
|
|
"step": 2345,
|
|
"valid_targets_mean": 2316.2,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 3.671875,
|
|
"grad_norm": 0.5960487480908755,
|
|
"learning_rate": 2.178968204368912e-05,
|
|
"loss": 0.3155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18685145676136017,
|
|
"step": 2350,
|
|
"valid_targets_mean": 4414.5,
|
|
"valid_targets_min": 1958
|
|
},
|
|
{
|
|
"epoch": 3.6796875,
|
|
"grad_norm": 0.5447052505132214,
|
|
"learning_rate": 2.1712064755550323e-05,
|
|
"loss": 0.3186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14566168189048767,
|
|
"step": 2355,
|
|
"valid_targets_mean": 4888.2,
|
|
"valid_targets_min": 1045
|
|
},
|
|
{
|
|
"epoch": 3.6875,
|
|
"grad_norm": 0.5931531247938723,
|
|
"learning_rate": 2.1634421482673368e-05,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13443520665168762,
|
|
"step": 2360,
|
|
"valid_targets_mean": 3690.9,
|
|
"valid_targets_min": 1610
|
|
},
|
|
{
|
|
"epoch": 3.6953125,
|
|
"grad_norm": 0.7011379173550384,
|
|
"learning_rate": 2.1556753403483377e-05,
|
|
"loss": 0.2847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10157973319292068,
|
|
"step": 2365,
|
|
"valid_targets_mean": 2014.0,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 3.703125,
|
|
"grad_norm": 0.5276815864020572,
|
|
"learning_rate": 2.1479061696781963e-05,
|
|
"loss": 0.3033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09303733706474304,
|
|
"step": 2370,
|
|
"valid_targets_mean": 2695.5,
|
|
"valid_targets_min": 866
|
|
},
|
|
{
|
|
"epoch": 3.7109375,
|
|
"grad_norm": 0.5994673195699125,
|
|
"learning_rate": 2.1401347541729347e-05,
|
|
"loss": 0.3251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2538551092147827,
|
|
"step": 2375,
|
|
"valid_targets_mean": 5315.1,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 3.71875,
|
|
"grad_norm": 0.6254139115635049,
|
|
"learning_rate": 2.1323612117826464e-05,
|
|
"loss": 0.3155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1437162160873413,
|
|
"step": 2380,
|
|
"valid_targets_mean": 3563.0,
|
|
"valid_targets_min": 1436
|
|
},
|
|
{
|
|
"epoch": 3.7265625,
|
|
"grad_norm": 0.519352978583004,
|
|
"learning_rate": 2.1245856604897045e-05,
|
|
"loss": 0.3003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08513516187667847,
|
|
"step": 2385,
|
|
"valid_targets_mean": 3105.5,
|
|
"valid_targets_min": 1342
|
|
},
|
|
{
|
|
"epoch": 3.734375,
|
|
"grad_norm": 0.6278781332954578,
|
|
"learning_rate": 2.1168082183069724e-05,
|
|
"loss": 0.3124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13861608505249023,
|
|
"step": 2390,
|
|
"valid_targets_mean": 2601.1,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 3.7421875,
|
|
"grad_norm": 0.6771844009747952,
|
|
"learning_rate": 2.109029003276013e-05,
|
|
"loss": 0.3244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16527557373046875,
|
|
"step": 2395,
|
|
"valid_targets_mean": 3646.5,
|
|
"valid_targets_min": 1633
|
|
},
|
|
{
|
|
"epoch": 3.75,
|
|
"grad_norm": 0.5338702877802065,
|
|
"learning_rate": 2.1012481334652953e-05,
|
|
"loss": 0.2832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1592840552330017,
|
|
"step": 2400,
|
|
"valid_targets_mean": 4925.5,
|
|
"valid_targets_min": 1776
|
|
},
|
|
{
|
|
"epoch": 3.7578125,
|
|
"grad_norm": 0.5094587375829984,
|
|
"learning_rate": 2.093465726968405e-05,
|
|
"loss": 0.2797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07806219160556793,
|
|
"step": 2405,
|
|
"valid_targets_mean": 2821.8,
|
|
"valid_targets_min": 1710
|
|
},
|
|
{
|
|
"epoch": 3.765625,
|
|
"grad_norm": 0.5739654414299689,
|
|
"learning_rate": 2.0856819019022495e-05,
|
|
"loss": 0.3094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11556532233953476,
|
|
"step": 2410,
|
|
"valid_targets_mean": 3086.6,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 3.7734375,
|
|
"grad_norm": 0.6275058540013283,
|
|
"learning_rate": 2.077896776405267e-05,
|
|
"loss": 0.3114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11903493106365204,
|
|
"step": 2415,
|
|
"valid_targets_mean": 3065.2,
|
|
"valid_targets_min": 1359
|
|
},
|
|
{
|
|
"epoch": 3.78125,
|
|
"grad_norm": 0.5289007365160224,
|
|
"learning_rate": 2.0701104686356328e-05,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20308583974838257,
|
|
"step": 2420,
|
|
"valid_targets_mean": 5873.1,
|
|
"valid_targets_min": 1381
|
|
},
|
|
{
|
|
"epoch": 3.7890625,
|
|
"grad_norm": 0.702260360610347,
|
|
"learning_rate": 2.0623230967694666e-05,
|
|
"loss": 0.3037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1433137059211731,
|
|
"step": 2425,
|
|
"valid_targets_mean": 2640.8,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 3.796875,
|
|
"grad_norm": 0.6903658054045206,
|
|
"learning_rate": 2.0545347789990374e-05,
|
|
"loss": 0.3124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11275959014892578,
|
|
"step": 2430,
|
|
"valid_targets_mean": 2422.4,
|
|
"valid_targets_min": 1197
|
|
},
|
|
{
|
|
"epoch": 3.8046875,
|
|
"grad_norm": 0.7836986662956738,
|
|
"learning_rate": 2.04674563353097e-05,
|
|
"loss": 0.3141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15782076120376587,
|
|
"step": 2435,
|
|
"valid_targets_mean": 2578.6,
|
|
"valid_targets_min": 1216
|
|
},
|
|
{
|
|
"epoch": 3.8125,
|
|
"grad_norm": 0.738141158346513,
|
|
"learning_rate": 2.038955778584455e-05,
|
|
"loss": 0.2954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17139139771461487,
|
|
"step": 2440,
|
|
"valid_targets_mean": 2817.5,
|
|
"valid_targets_min": 809
|
|
},
|
|
{
|
|
"epoch": 3.8203125,
|
|
"grad_norm": 0.6191132280754744,
|
|
"learning_rate": 2.0311653323894464e-05,
|
|
"loss": 0.3148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13116103410720825,
|
|
"step": 2445,
|
|
"valid_targets_mean": 3348.5,
|
|
"valid_targets_min": 863
|
|
},
|
|
{
|
|
"epoch": 3.828125,
|
|
"grad_norm": 0.6220526865867526,
|
|
"learning_rate": 2.0233744131848755e-05,
|
|
"loss": 0.3152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09697461873292923,
|
|
"step": 2450,
|
|
"valid_targets_mean": 2536.2,
|
|
"valid_targets_min": 1706
|
|
},
|
|
{
|
|
"epoch": 3.8359375,
|
|
"grad_norm": 0.6742842617127349,
|
|
"learning_rate": 2.01558313921685e-05,
|
|
"loss": 0.2711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1804228127002716,
|
|
"step": 2455,
|
|
"valid_targets_mean": 4179.4,
|
|
"valid_targets_min": 995
|
|
},
|
|
{
|
|
"epoch": 3.84375,
|
|
"grad_norm": 0.6132827738023419,
|
|
"learning_rate": 2.0077916287368643e-05,
|
|
"loss": 0.2979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12814073264598846,
|
|
"step": 2460,
|
|
"valid_targets_mean": 2862.2,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 3.8515625,
|
|
"grad_norm": 0.5417290807132001,
|
|
"learning_rate": 2e-05,
|
|
"loss": 0.3056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16362407803535461,
|
|
"step": 2465,
|
|
"valid_targets_mean": 5084.9,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 3.859375,
|
|
"grad_norm": 0.608912557042839,
|
|
"learning_rate": 1.992208371263136e-05,
|
|
"loss": 0.3327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1743316948413849,
|
|
"step": 2470,
|
|
"valid_targets_mean": 3989.6,
|
|
"valid_targets_min": 1645
|
|
},
|
|
{
|
|
"epoch": 3.8671875,
|
|
"grad_norm": 0.49770667425501985,
|
|
"learning_rate": 1.9844168607831505e-05,
|
|
"loss": 0.3053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15715748071670532,
|
|
"step": 2475,
|
|
"valid_targets_mean": 5533.4,
|
|
"valid_targets_min": 2056
|
|
},
|
|
{
|
|
"epoch": 3.875,
|
|
"grad_norm": 0.6072344394767604,
|
|
"learning_rate": 1.9766255868151248e-05,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09351038932800293,
|
|
"step": 2480,
|
|
"valid_targets_mean": 2787.5,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 3.8828125,
|
|
"grad_norm": 0.781941197465663,
|
|
"learning_rate": 1.9688346676105542e-05,
|
|
"loss": 0.2973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1596384048461914,
|
|
"step": 2485,
|
|
"valid_targets_mean": 1944.2,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 3.890625,
|
|
"grad_norm": 0.5604919120465528,
|
|
"learning_rate": 1.961044221415545e-05,
|
|
"loss": 0.2989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11914297938346863,
|
|
"step": 2490,
|
|
"valid_targets_mean": 4012.6,
|
|
"valid_targets_min": 1561
|
|
},
|
|
{
|
|
"epoch": 3.8984375,
|
|
"grad_norm": 0.7052612297577078,
|
|
"learning_rate": 1.9532543664690305e-05,
|
|
"loss": 0.3209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16409805417060852,
|
|
"step": 2495,
|
|
"valid_targets_mean": 2879.2,
|
|
"valid_targets_min": 853
|
|
},
|
|
{
|
|
"epoch": 3.90625,
|
|
"grad_norm": 0.590487562142605,
|
|
"learning_rate": 1.9454652210009636e-05,
|
|
"loss": 0.3201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16870886087417603,
|
|
"step": 2500,
|
|
"valid_targets_mean": 4582.4,
|
|
"valid_targets_min": 2050
|
|
},
|
|
{
|
|
"epoch": 3.9140625,
|
|
"grad_norm": 0.716078917391518,
|
|
"learning_rate": 1.937676903230534e-05,
|
|
"loss": 0.3049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16251221299171448,
|
|
"step": 2505,
|
|
"valid_targets_mean": 3038.5,
|
|
"valid_targets_min": 1414
|
|
},
|
|
{
|
|
"epoch": 3.921875,
|
|
"grad_norm": 0.6023463965240262,
|
|
"learning_rate": 1.929889531364368e-05,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08113180845975876,
|
|
"step": 2510,
|
|
"valid_targets_mean": 2001.8,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 3.9296875,
|
|
"grad_norm": 0.5641528888938031,
|
|
"learning_rate": 1.922103223594734e-05,
|
|
"loss": 0.2941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1293378323316574,
|
|
"step": 2515,
|
|
"valid_targets_mean": 3722.8,
|
|
"valid_targets_min": 1686
|
|
},
|
|
{
|
|
"epoch": 3.9375,
|
|
"grad_norm": 0.6411224059860134,
|
|
"learning_rate": 1.914318098097751e-05,
|
|
"loss": 0.3019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13295301795005798,
|
|
"step": 2520,
|
|
"valid_targets_mean": 2803.1,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 3.9453125,
|
|
"grad_norm": 0.7266333792799692,
|
|
"learning_rate": 1.9065342730315958e-05,
|
|
"loss": 0.3211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15825489163398743,
|
|
"step": 2525,
|
|
"valid_targets_mean": 2512.4,
|
|
"valid_targets_min": 928
|
|
},
|
|
{
|
|
"epoch": 3.953125,
|
|
"grad_norm": 0.5343658040266234,
|
|
"learning_rate": 1.8987518665347053e-05,
|
|
"loss": 0.3049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19471552968025208,
|
|
"step": 2530,
|
|
"valid_targets_mean": 5338.5,
|
|
"valid_targets_min": 1568
|
|
},
|
|
{
|
|
"epoch": 3.9609375,
|
|
"grad_norm": 0.7348968131820346,
|
|
"learning_rate": 1.8909709967239873e-05,
|
|
"loss": 0.3122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14440813660621643,
|
|
"step": 2535,
|
|
"valid_targets_mean": 2701.2,
|
|
"valid_targets_min": 1259
|
|
},
|
|
{
|
|
"epoch": 3.96875,
|
|
"grad_norm": 0.5915068299588706,
|
|
"learning_rate": 1.8831917816930282e-05,
|
|
"loss": 0.3041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11208482086658478,
|
|
"step": 2540,
|
|
"valid_targets_mean": 2807.8,
|
|
"valid_targets_min": 1105
|
|
},
|
|
{
|
|
"epoch": 3.9765625,
|
|
"grad_norm": 0.6200802532818883,
|
|
"learning_rate": 1.8754143395102958e-05,
|
|
"loss": 0.3264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12460586428642273,
|
|
"step": 2545,
|
|
"valid_targets_mean": 2606.9,
|
|
"valid_targets_min": 910
|
|
},
|
|
{
|
|
"epoch": 3.984375,
|
|
"grad_norm": 0.6472747215117117,
|
|
"learning_rate": 1.8676387882173543e-05,
|
|
"loss": 0.2775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.188481867313385,
|
|
"step": 2550,
|
|
"valid_targets_mean": 3759.9,
|
|
"valid_targets_min": 938
|
|
},
|
|
{
|
|
"epoch": 3.9921875,
|
|
"grad_norm": 0.6540730114921622,
|
|
"learning_rate": 1.8598652458270653e-05,
|
|
"loss": 0.2825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14098089933395386,
|
|
"step": 2555,
|
|
"valid_targets_mean": 3168.1,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"grad_norm": 0.6256819401871079,
|
|
"learning_rate": 1.8520938303218044e-05,
|
|
"loss": 0.2836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16406318545341492,
|
|
"step": 2560,
|
|
"valid_targets_mean": 3881.8,
|
|
"valid_targets_min": 1146
|
|
},
|
|
{
|
|
"epoch": 4.0078125,
|
|
"grad_norm": 0.625488084953199,
|
|
"learning_rate": 1.844324659651663e-05,
|
|
"loss": 0.2805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1765892505645752,
|
|
"step": 2565,
|
|
"valid_targets_mean": 4040.5,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 4.015625,
|
|
"grad_norm": 0.8103987048187203,
|
|
"learning_rate": 1.8365578517326642e-05,
|
|
"loss": 0.3004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.157423198223114,
|
|
"step": 2570,
|
|
"valid_targets_mean": 2310.6,
|
|
"valid_targets_min": 1680
|
|
},
|
|
{
|
|
"epoch": 4.0234375,
|
|
"grad_norm": 0.6510887727356819,
|
|
"learning_rate": 1.8287935244449684e-05,
|
|
"loss": 0.301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14246268570423126,
|
|
"step": 2575,
|
|
"valid_targets_mean": 2592.9,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 4.03125,
|
|
"grad_norm": 0.6867512125633302,
|
|
"learning_rate": 1.821031795631089e-05,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1686811000108719,
|
|
"step": 2580,
|
|
"valid_targets_mean": 3730.4,
|
|
"valid_targets_min": 1034
|
|
},
|
|
{
|
|
"epoch": 4.0390625,
|
|
"grad_norm": 0.5731359299323292,
|
|
"learning_rate": 1.813272783094099e-05,
|
|
"loss": 0.2784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12381470948457718,
|
|
"step": 2585,
|
|
"valid_targets_mean": 3561.4,
|
|
"valid_targets_min": 1470
|
|
},
|
|
{
|
|
"epoch": 4.046875,
|
|
"grad_norm": 0.5261218975726141,
|
|
"learning_rate": 1.805516604595847e-05,
|
|
"loss": 0.2754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12387430667877197,
|
|
"step": 2590,
|
|
"valid_targets_mean": 4740.5,
|
|
"valid_targets_min": 998
|
|
},
|
|
{
|
|
"epoch": 4.0546875,
|
|
"grad_norm": 0.6584547964804731,
|
|
"learning_rate": 1.7977633778551657e-05,
|
|
"loss": 0.2965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2099209725856781,
|
|
"step": 2595,
|
|
"valid_targets_mean": 4234.0,
|
|
"valid_targets_min": 829
|
|
},
|
|
{
|
|
"epoch": 4.0625,
|
|
"grad_norm": 0.5726223044045348,
|
|
"learning_rate": 1.7900132205460912e-05,
|
|
"loss": 0.2746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07594846189022064,
|
|
"step": 2600,
|
|
"valid_targets_mean": 2429.8,
|
|
"valid_targets_min": 917
|
|
},
|
|
{
|
|
"epoch": 4.0703125,
|
|
"grad_norm": 0.7256306365645276,
|
|
"learning_rate": 1.7822662502960714e-05,
|
|
"loss": 0.2925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16545625030994415,
|
|
"step": 2605,
|
|
"valid_targets_mean": 3151.6,
|
|
"valid_targets_min": 561
|
|
},
|
|
{
|
|
"epoch": 4.078125,
|
|
"grad_norm": 0.546778675148899,
|
|
"learning_rate": 1.774522584684182e-05,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13061267137527466,
|
|
"step": 2610,
|
|
"valid_targets_mean": 4004.1,
|
|
"valid_targets_min": 1839
|
|
},
|
|
{
|
|
"epoch": 4.0859375,
|
|
"grad_norm": 0.6355186334671213,
|
|
"learning_rate": 1.7667823412393454e-05,
|
|
"loss": 0.2768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12892846763134003,
|
|
"step": 2615,
|
|
"valid_targets_mean": 4036.1,
|
|
"valid_targets_min": 994
|
|
},
|
|
{
|
|
"epoch": 4.09375,
|
|
"grad_norm": 0.6132474118294579,
|
|
"learning_rate": 1.7590456374385415e-05,
|
|
"loss": 0.2797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.154718279838562,
|
|
"step": 2620,
|
|
"valid_targets_mean": 4307.0,
|
|
"valid_targets_min": 1465
|
|
},
|
|
{
|
|
"epoch": 4.1015625,
|
|
"grad_norm": 0.6325397845649458,
|
|
"learning_rate": 1.7513125907050302e-05,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14153432846069336,
|
|
"step": 2625,
|
|
"valid_targets_mean": 3951.0,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 4.109375,
|
|
"grad_norm": 0.6941054951288518,
|
|
"learning_rate": 1.7435833184065637e-05,
|
|
"loss": 0.261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12256962805986404,
|
|
"step": 2630,
|
|
"valid_targets_mean": 2919.1,
|
|
"valid_targets_min": 1000
|
|
},
|
|
{
|
|
"epoch": 4.1171875,
|
|
"grad_norm": 0.8413998500272192,
|
|
"learning_rate": 1.735857937853611e-05,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1562446653842926,
|
|
"step": 2635,
|
|
"valid_targets_mean": 2435.0,
|
|
"valid_targets_min": 888
|
|
},
|
|
{
|
|
"epoch": 4.125,
|
|
"grad_norm": 0.6918140243255283,
|
|
"learning_rate": 1.728136566297572e-05,
|
|
"loss": 0.2817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14862212538719177,
|
|
"step": 2640,
|
|
"valid_targets_mean": 3096.4,
|
|
"valid_targets_min": 2178
|
|
},
|
|
{
|
|
"epoch": 4.1328125,
|
|
"grad_norm": 0.5508476825236356,
|
|
"learning_rate": 1.7204193209290018e-05,
|
|
"loss": 0.2932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1897600293159485,
|
|
"step": 2645,
|
|
"valid_targets_mean": 5309.6,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 4.140625,
|
|
"grad_norm": 0.7809359413892581,
|
|
"learning_rate": 1.7127063188758287e-05,
|
|
"loss": 0.2721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14083723723888397,
|
|
"step": 2650,
|
|
"valid_targets_mean": 3088.2,
|
|
"valid_targets_min": 1836
|
|
},
|
|
{
|
|
"epoch": 4.1484375,
|
|
"grad_norm": 0.6392710920336176,
|
|
"learning_rate": 1.7049976772015812e-05,
|
|
"loss": 0.2811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1474234163761139,
|
|
"step": 2655,
|
|
"valid_targets_mean": 3845.0,
|
|
"valid_targets_min": 1794
|
|
},
|
|
{
|
|
"epoch": 4.15625,
|
|
"grad_norm": 0.8506556229435398,
|
|
"learning_rate": 1.6972935129036068e-05,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20970328152179718,
|
|
"step": 2660,
|
|
"valid_targets_mean": 6317.6,
|
|
"valid_targets_min": 1878
|
|
},
|
|
{
|
|
"epoch": 4.1640625,
|
|
"grad_norm": 0.6195784682204512,
|
|
"learning_rate": 1.6895939429112968e-05,
|
|
"loss": 0.2863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10827621817588806,
|
|
"step": 2665,
|
|
"valid_targets_mean": 2628.0,
|
|
"valid_targets_min": 1329
|
|
},
|
|
{
|
|
"epoch": 4.171875,
|
|
"grad_norm": 1.6416298962751616,
|
|
"learning_rate": 1.681899084084316e-05,
|
|
"loss": 0.2761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13153234124183655,
|
|
"step": 2670,
|
|
"valid_targets_mean": 4098.8,
|
|
"valid_targets_min": 1837
|
|
},
|
|
{
|
|
"epoch": 4.1796875,
|
|
"grad_norm": 0.5540705315733201,
|
|
"learning_rate": 1.6742090532108228e-05,
|
|
"loss": 0.3091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1368524134159088,
|
|
"step": 2675,
|
|
"valid_targets_mean": 4585.5,
|
|
"valid_targets_min": 1980
|
|
},
|
|
{
|
|
"epoch": 4.1875,
|
|
"grad_norm": 0.8658221106120761,
|
|
"learning_rate": 1.666523967005701e-05,
|
|
"loss": 0.2964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2410935014486313,
|
|
"step": 2680,
|
|
"valid_targets_mean": 2975.5,
|
|
"valid_targets_min": 1052
|
|
},
|
|
{
|
|
"epoch": 4.1953125,
|
|
"grad_norm": 0.504867884742332,
|
|
"learning_rate": 1.6588439421087863e-05,
|
|
"loss": 0.2766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18471260368824005,
|
|
"step": 2685,
|
|
"valid_targets_mean": 6701.2,
|
|
"valid_targets_min": 2194
|
|
},
|
|
{
|
|
"epoch": 4.203125,
|
|
"grad_norm": 0.6030602728543439,
|
|
"learning_rate": 1.6511690950830986e-05,
|
|
"loss": 0.2783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17376114428043365,
|
|
"step": 2690,
|
|
"valid_targets_mean": 3931.1,
|
|
"valid_targets_min": 698
|
|
},
|
|
{
|
|
"epoch": 4.2109375,
|
|
"grad_norm": 0.7292308019952624,
|
|
"learning_rate": 1.6434995424130692e-05,
|
|
"loss": 0.2832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12503142654895782,
|
|
"step": 2695,
|
|
"valid_targets_mean": 2918.0,
|
|
"valid_targets_min": 941
|
|
},
|
|
{
|
|
"epoch": 4.21875,
|
|
"grad_norm": 0.631006012163087,
|
|
"learning_rate": 1.6358354005027747e-05,
|
|
"loss": 0.2783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09282973408699036,
|
|
"step": 2700,
|
|
"valid_targets_mean": 2390.1,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 4.2265625,
|
|
"grad_norm": 0.7491650769172855,
|
|
"learning_rate": 1.628176785674171e-05,
|
|
"loss": 0.2907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1293015033006668,
|
|
"step": 2705,
|
|
"valid_targets_mean": 2350.8,
|
|
"valid_targets_min": 1006
|
|
},
|
|
{
|
|
"epoch": 4.234375,
|
|
"grad_norm": 0.6483163551190007,
|
|
"learning_rate": 1.620523814165328e-05,
|
|
"loss": 0.293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11039422452449799,
|
|
"step": 2710,
|
|
"valid_targets_mean": 3023.6,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 4.2421875,
|
|
"grad_norm": 0.6112615129293612,
|
|
"learning_rate": 1.6128766021286623e-05,
|
|
"loss": 0.2796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20007625222206116,
|
|
"step": 2715,
|
|
"valid_targets_mean": 5474.0,
|
|
"valid_targets_min": 1550
|
|
},
|
|
{
|
|
"epoch": 4.25,
|
|
"grad_norm": 0.5680458227650763,
|
|
"learning_rate": 1.6052352656291774e-05,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1790829300880432,
|
|
"step": 2720,
|
|
"valid_targets_mean": 3707.4,
|
|
"valid_targets_min": 1186
|
|
},
|
|
{
|
|
"epoch": 4.2578125,
|
|
"grad_norm": 0.6299219898732393,
|
|
"learning_rate": 1.5975999206427037e-05,
|
|
"loss": 0.3,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12491565942764282,
|
|
"step": 2725,
|
|
"valid_targets_mean": 3663.6,
|
|
"valid_targets_min": 1024
|
|
},
|
|
{
|
|
"epoch": 4.265625,
|
|
"grad_norm": 0.7915606706524485,
|
|
"learning_rate": 1.5899706830541317e-05,
|
|
"loss": 0.298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14550164341926575,
|
|
"step": 2730,
|
|
"valid_targets_mean": 2373.1,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 4.2734375,
|
|
"grad_norm": 0.538989424593775,
|
|
"learning_rate": 1.5823476686556605e-05,
|
|
"loss": 0.2793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11618025600910187,
|
|
"step": 2735,
|
|
"valid_targets_mean": 4237.9,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 4.28125,
|
|
"grad_norm": 0.6082423599209387,
|
|
"learning_rate": 1.574730993145035e-05,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09508912265300751,
|
|
"step": 2740,
|
|
"valid_targets_mean": 2607.9,
|
|
"valid_targets_min": 1371
|
|
},
|
|
{
|
|
"epoch": 4.2890625,
|
|
"grad_norm": 0.5219488611817891,
|
|
"learning_rate": 1.5671207721237945e-05,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11458978056907654,
|
|
"step": 2745,
|
|
"valid_targets_mean": 4343.4,
|
|
"valid_targets_min": 1168
|
|
},
|
|
{
|
|
"epoch": 4.296875,
|
|
"grad_norm": 0.6767833429726451,
|
|
"learning_rate": 1.5595171210955134e-05,
|
|
"loss": 0.2848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19635698199272156,
|
|
"step": 2750,
|
|
"valid_targets_mean": 3290.2,
|
|
"valid_targets_min": 1661
|
|
},
|
|
{
|
|
"epoch": 4.3046875,
|
|
"grad_norm": 0.5177121685925234,
|
|
"learning_rate": 1.5519201554640516e-05,
|
|
"loss": 0.2785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14509481191635132,
|
|
"step": 2755,
|
|
"valid_targets_mean": 5406.9,
|
|
"valid_targets_min": 2104
|
|
},
|
|
{
|
|
"epoch": 4.3125,
|
|
"grad_norm": 0.8353108381530278,
|
|
"learning_rate": 1.5443299905318015e-05,
|
|
"loss": 0.2847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16213908791542053,
|
|
"step": 2760,
|
|
"valid_targets_mean": 3914.6,
|
|
"valid_targets_min": 1684
|
|
},
|
|
{
|
|
"epoch": 4.3203125,
|
|
"grad_norm": 0.5987125691388511,
|
|
"learning_rate": 1.53674674149794e-05,
|
|
"loss": 0.2924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2269667088985443,
|
|
"step": 2765,
|
|
"valid_targets_mean": 5759.9,
|
|
"valid_targets_min": 1193
|
|
},
|
|
{
|
|
"epoch": 4.328125,
|
|
"grad_norm": 0.6002404242052487,
|
|
"learning_rate": 1.5291705234566755e-05,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08715994656085968,
|
|
"step": 2770,
|
|
"valid_targets_mean": 2283.5,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 4.3359375,
|
|
"grad_norm": 0.5135842133349133,
|
|
"learning_rate": 1.5216014513955067e-05,
|
|
"loss": 0.2659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11198130249977112,
|
|
"step": 2775,
|
|
"valid_targets_mean": 5090.1,
|
|
"valid_targets_min": 1458
|
|
},
|
|
{
|
|
"epoch": 4.34375,
|
|
"grad_norm": 0.5888789115663621,
|
|
"learning_rate": 1.5140396401934725e-05,
|
|
"loss": 0.2858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14496682584285736,
|
|
"step": 2780,
|
|
"valid_targets_mean": 4257.1,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 4.3515625,
|
|
"grad_norm": 0.584867203822269,
|
|
"learning_rate": 1.5064852046194127e-05,
|
|
"loss": 0.2751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.142946258187294,
|
|
"step": 2785,
|
|
"valid_targets_mean": 4458.8,
|
|
"valid_targets_min": 1103
|
|
},
|
|
{
|
|
"epoch": 4.359375,
|
|
"grad_norm": 0.6354861134305071,
|
|
"learning_rate": 1.4989382593302237e-05,
|
|
"loss": 0.2729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15726788341999054,
|
|
"step": 2790,
|
|
"valid_targets_mean": 3627.9,
|
|
"valid_targets_min": 1671
|
|
},
|
|
{
|
|
"epoch": 4.3671875,
|
|
"grad_norm": 0.8778633230584901,
|
|
"learning_rate": 1.4913989188691163e-05,
|
|
"loss": 0.2803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19539764523506165,
|
|
"step": 2795,
|
|
"valid_targets_mean": 3975.8,
|
|
"valid_targets_min": 846
|
|
},
|
|
{
|
|
"epoch": 4.375,
|
|
"grad_norm": 0.6090154634110567,
|
|
"learning_rate": 1.4838672976638835e-05,
|
|
"loss": 0.2743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1676468700170517,
|
|
"step": 2800,
|
|
"valid_targets_mean": 5028.4,
|
|
"valid_targets_min": 1184
|
|
},
|
|
{
|
|
"epoch": 4.3828125,
|
|
"grad_norm": 0.6229785245680947,
|
|
"learning_rate": 1.476343510025156e-05,
|
|
"loss": 0.2865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15102171897888184,
|
|
"step": 2805,
|
|
"valid_targets_mean": 4059.4,
|
|
"valid_targets_min": 1936
|
|
},
|
|
{
|
|
"epoch": 4.390625,
|
|
"grad_norm": 0.7604386046580136,
|
|
"learning_rate": 1.4688276701446738e-05,
|
|
"loss": 0.2845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11287308484315872,
|
|
"step": 2810,
|
|
"valid_targets_mean": 2416.6,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 4.3984375,
|
|
"grad_norm": 0.7036954023192811,
|
|
"learning_rate": 1.4613198920935481e-05,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13656935095787048,
|
|
"step": 2815,
|
|
"valid_targets_mean": 3198.6,
|
|
"valid_targets_min": 1145
|
|
},
|
|
{
|
|
"epoch": 4.40625,
|
|
"grad_norm": 0.6104743550753127,
|
|
"learning_rate": 1.4538202898205351e-05,
|
|
"loss": 0.2953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08928655087947845,
|
|
"step": 2820,
|
|
"valid_targets_mean": 2436.2,
|
|
"valid_targets_min": 1181
|
|
},
|
|
{
|
|
"epoch": 4.4140625,
|
|
"grad_norm": 0.5534206796491535,
|
|
"learning_rate": 1.4463289771503015e-05,
|
|
"loss": 0.2829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10380382090806961,
|
|
"step": 2825,
|
|
"valid_targets_mean": 3584.6,
|
|
"valid_targets_min": 1369
|
|
},
|
|
{
|
|
"epoch": 4.421875,
|
|
"grad_norm": 0.6239797978353032,
|
|
"learning_rate": 1.4388460677817001e-05,
|
|
"loss": 0.2815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16579653322696686,
|
|
"step": 2830,
|
|
"valid_targets_mean": 4272.9,
|
|
"valid_targets_min": 1134
|
|
},
|
|
{
|
|
"epoch": 4.4296875,
|
|
"grad_norm": 0.8747018731497135,
|
|
"learning_rate": 1.431371675286042e-05,
|
|
"loss": 0.2896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14084258675575256,
|
|
"step": 2835,
|
|
"valid_targets_mean": 3364.0,
|
|
"valid_targets_min": 1371
|
|
},
|
|
{
|
|
"epoch": 4.4375,
|
|
"grad_norm": 0.5746336436126764,
|
|
"learning_rate": 1.4239059131053768e-05,
|
|
"loss": 0.2918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11762410402297974,
|
|
"step": 2840,
|
|
"valid_targets_mean": 3510.5,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 4.4453125,
|
|
"grad_norm": 0.661253542650945,
|
|
"learning_rate": 1.4164488945507646e-05,
|
|
"loss": 0.2572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11503124237060547,
|
|
"step": 2845,
|
|
"valid_targets_mean": 2365.8,
|
|
"valid_targets_min": 1268
|
|
},
|
|
{
|
|
"epoch": 4.453125,
|
|
"grad_norm": 0.6299257032858594,
|
|
"learning_rate": 1.409000732800562e-05,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13229981064796448,
|
|
"step": 2850,
|
|
"valid_targets_mean": 4243.9,
|
|
"valid_targets_min": 1094
|
|
},
|
|
{
|
|
"epoch": 4.4609375,
|
|
"grad_norm": 0.5686617711986884,
|
|
"learning_rate": 1.4015615408987029e-05,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1767759621143341,
|
|
"step": 2855,
|
|
"valid_targets_mean": 4370.2,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 4.46875,
|
|
"grad_norm": 0.6273434538253783,
|
|
"learning_rate": 1.3941314317529789e-05,
|
|
"loss": 0.315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09249315410852432,
|
|
"step": 2860,
|
|
"valid_targets_mean": 2508.2,
|
|
"valid_targets_min": 777
|
|
},
|
|
{
|
|
"epoch": 4.4765625,
|
|
"grad_norm": 0.5816297765366594,
|
|
"learning_rate": 1.3867105181333318e-05,
|
|
"loss": 0.2826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15088599920272827,
|
|
"step": 2865,
|
|
"valid_targets_mean": 4641.9,
|
|
"valid_targets_min": 1677
|
|
},
|
|
{
|
|
"epoch": 4.484375,
|
|
"grad_norm": 0.6847955778388221,
|
|
"learning_rate": 1.3792989126701362e-05,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14340859651565552,
|
|
"step": 2870,
|
|
"valid_targets_mean": 3462.2,
|
|
"valid_targets_min": 1923
|
|
},
|
|
{
|
|
"epoch": 4.4921875,
|
|
"grad_norm": 0.7474200332102245,
|
|
"learning_rate": 1.3718967278524962e-05,
|
|
"loss": 0.2943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14468644559383392,
|
|
"step": 2875,
|
|
"valid_targets_mean": 2459.9,
|
|
"valid_targets_min": 1577
|
|
},
|
|
{
|
|
"epoch": 4.5,
|
|
"grad_norm": 0.5996772921932495,
|
|
"learning_rate": 1.3645040760265311e-05,
|
|
"loss": 0.2899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10466516762971878,
|
|
"step": 2880,
|
|
"valid_targets_mean": 2555.1,
|
|
"valid_targets_min": 847
|
|
},
|
|
{
|
|
"epoch": 4.5078125,
|
|
"grad_norm": 0.6634201968683433,
|
|
"learning_rate": 1.3571210693936774e-05,
|
|
"loss": 0.3071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13855378329753876,
|
|
"step": 2885,
|
|
"valid_targets_mean": 3416.6,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 4.515625,
|
|
"grad_norm": 0.6077301518907945,
|
|
"learning_rate": 1.3497478200089786e-05,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15323516726493835,
|
|
"step": 2890,
|
|
"valid_targets_mean": 3764.6,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 4.5234375,
|
|
"grad_norm": 0.5670167264356067,
|
|
"learning_rate": 1.3423844397793927e-05,
|
|
"loss": 0.2841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12673699855804443,
|
|
"step": 2895,
|
|
"valid_targets_mean": 3613.1,
|
|
"valid_targets_min": 1184
|
|
},
|
|
{
|
|
"epoch": 4.53125,
|
|
"grad_norm": 0.5148933602173208,
|
|
"learning_rate": 1.3350310404620848e-05,
|
|
"loss": 0.2805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10225557535886765,
|
|
"step": 2900,
|
|
"valid_targets_mean": 4232.1,
|
|
"valid_targets_min": 1230
|
|
},
|
|
{
|
|
"epoch": 4.5390625,
|
|
"grad_norm": 0.4642731001190078,
|
|
"learning_rate": 1.327687733662739e-05,
|
|
"loss": 0.3027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17087167501449585,
|
|
"step": 2905,
|
|
"valid_targets_mean": 8690.2,
|
|
"valid_targets_min": 1434
|
|
},
|
|
{
|
|
"epoch": 4.546875,
|
|
"grad_norm": 0.756807958630529,
|
|
"learning_rate": 1.3203546308338583e-05,
|
|
"loss": 0.2656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15724797546863556,
|
|
"step": 2910,
|
|
"valid_targets_mean": 2678.0,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 4.5546875,
|
|
"grad_norm": 0.5378228097466479,
|
|
"learning_rate": 1.3130318432730777e-05,
|
|
"loss": 0.3057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1119692325592041,
|
|
"step": 2915,
|
|
"valid_targets_mean": 4652.4,
|
|
"valid_targets_min": 1571
|
|
},
|
|
{
|
|
"epoch": 4.5625,
|
|
"grad_norm": 0.6419000814048547,
|
|
"learning_rate": 1.3057194821214729e-05,
|
|
"loss": 0.281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15838055312633514,
|
|
"step": 2920,
|
|
"valid_targets_mean": 4336.8,
|
|
"valid_targets_min": 2019
|
|
},
|
|
{
|
|
"epoch": 4.5703125,
|
|
"grad_norm": 0.7300686550006091,
|
|
"learning_rate": 1.298417658361871e-05,
|
|
"loss": 0.2855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14732694625854492,
|
|
"step": 2925,
|
|
"valid_targets_mean": 3498.2,
|
|
"valid_targets_min": 1774
|
|
},
|
|
{
|
|
"epoch": 4.578125,
|
|
"grad_norm": 0.730953347549573,
|
|
"learning_rate": 1.2911264828171723e-05,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15795598924160004,
|
|
"step": 2930,
|
|
"valid_targets_mean": 2932.1,
|
|
"valid_targets_min": 1150
|
|
},
|
|
{
|
|
"epoch": 4.5859375,
|
|
"grad_norm": 0.6179170262011963,
|
|
"learning_rate": 1.283846066148661e-05,
|
|
"loss": 0.2584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08885601162910461,
|
|
"step": 2935,
|
|
"valid_targets_mean": 2599.8,
|
|
"valid_targets_min": 1223
|
|
},
|
|
{
|
|
"epoch": 4.59375,
|
|
"grad_norm": 0.6155843887003176,
|
|
"learning_rate": 1.2765765188543327e-05,
|
|
"loss": 0.2813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11113843321800232,
|
|
"step": 2940,
|
|
"valid_targets_mean": 2967.5,
|
|
"valid_targets_min": 1648
|
|
},
|
|
{
|
|
"epoch": 4.6015625,
|
|
"grad_norm": 0.721889331935039,
|
|
"learning_rate": 1.26931795126721e-05,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18430714309215546,
|
|
"step": 2945,
|
|
"valid_targets_mean": 3553.1,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 4.609375,
|
|
"grad_norm": 0.6692593454724904,
|
|
"learning_rate": 1.2620704735536759e-05,
|
|
"loss": 0.2775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14001671969890594,
|
|
"step": 2950,
|
|
"valid_targets_mean": 3285.4,
|
|
"valid_targets_min": 1512
|
|
},
|
|
{
|
|
"epoch": 4.6171875,
|
|
"grad_norm": 0.5644361784576368,
|
|
"learning_rate": 1.2548341957117942e-05,
|
|
"loss": 0.2871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10042232275009155,
|
|
"step": 2955,
|
|
"valid_targets_mean": 3439.9,
|
|
"valid_targets_min": 936
|
|
},
|
|
{
|
|
"epoch": 4.625,
|
|
"grad_norm": 0.7026533342927573,
|
|
"learning_rate": 1.247609227569646e-05,
|
|
"loss": 0.2873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10756687074899673,
|
|
"step": 2960,
|
|
"valid_targets_mean": 2037.4,
|
|
"valid_targets_min": 1432
|
|
},
|
|
{
|
|
"epoch": 4.6328125,
|
|
"grad_norm": 0.5405811297078977,
|
|
"learning_rate": 1.2403956787836582e-05,
|
|
"loss": 0.2946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13475222885608673,
|
|
"step": 2965,
|
|
"valid_targets_mean": 4762.9,
|
|
"valid_targets_min": 1419
|
|
},
|
|
{
|
|
"epoch": 4.640625,
|
|
"grad_norm": 0.7054289521269776,
|
|
"learning_rate": 1.2331936588369426e-05,
|
|
"loss": 0.3032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14897608757019043,
|
|
"step": 2970,
|
|
"valid_targets_mean": 3910.4,
|
|
"valid_targets_min": 1366
|
|
},
|
|
{
|
|
"epoch": 4.6484375,
|
|
"grad_norm": 0.6413437204619514,
|
|
"learning_rate": 1.2260032770376334e-05,
|
|
"loss": 0.2782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15918327867984772,
|
|
"step": 2975,
|
|
"valid_targets_mean": 4144.6,
|
|
"valid_targets_min": 1441
|
|
},
|
|
{
|
|
"epoch": 4.65625,
|
|
"grad_norm": 0.6208976520032975,
|
|
"learning_rate": 1.2188246425172247e-05,
|
|
"loss": 0.3121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18356776237487793,
|
|
"step": 2980,
|
|
"valid_targets_mean": 4059.5,
|
|
"valid_targets_min": 351
|
|
},
|
|
{
|
|
"epoch": 4.6640625,
|
|
"grad_norm": 0.6409790086041228,
|
|
"learning_rate": 1.2116578642289216e-05,
|
|
"loss": 0.2722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13731835782527924,
|
|
"step": 2985,
|
|
"valid_targets_mean": 3537.6,
|
|
"valid_targets_min": 859
|
|
},
|
|
{
|
|
"epoch": 4.671875,
|
|
"grad_norm": 0.5320375675830128,
|
|
"learning_rate": 1.204503050945978e-05,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1147543266415596,
|
|
"step": 2990,
|
|
"valid_targets_mean": 4308.1,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 4.6796875,
|
|
"grad_norm": 0.6059060488481588,
|
|
"learning_rate": 1.1973603112600525e-05,
|
|
"loss": 0.2965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2182338833808899,
|
|
"step": 2995,
|
|
"valid_targets_mean": 6059.8,
|
|
"valid_targets_min": 1157
|
|
},
|
|
{
|
|
"epoch": 4.6875,
|
|
"grad_norm": 0.6603465295966748,
|
|
"learning_rate": 1.1902297535795552e-05,
|
|
"loss": 0.2639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1071544736623764,
|
|
"step": 3000,
|
|
"valid_targets_mean": 2786.4,
|
|
"valid_targets_min": 1703
|
|
},
|
|
{
|
|
"epoch": 4.6953125,
|
|
"grad_norm": 0.6407784753345941,
|
|
"learning_rate": 1.183111486128008e-05,
|
|
"loss": 0.2713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14530377089977264,
|
|
"step": 3005,
|
|
"valid_targets_mean": 3858.8,
|
|
"valid_targets_min": 1812
|
|
},
|
|
{
|
|
"epoch": 4.703125,
|
|
"grad_norm": 0.6107371077724639,
|
|
"learning_rate": 1.1760056169423953e-05,
|
|
"loss": 0.3041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13617102801799774,
|
|
"step": 3010,
|
|
"valid_targets_mean": 4346.8,
|
|
"valid_targets_min": 900
|
|
},
|
|
{
|
|
"epoch": 4.7109375,
|
|
"grad_norm": 0.6273218381167215,
|
|
"learning_rate": 1.16891225387153e-05,
|
|
"loss": 0.2807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1538863480091095,
|
|
"step": 3015,
|
|
"valid_targets_mean": 4161.8,
|
|
"valid_targets_min": 1391
|
|
},
|
|
{
|
|
"epoch": 4.71875,
|
|
"grad_norm": 0.6112660399549492,
|
|
"learning_rate": 1.161831504574413e-05,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10622471570968628,
|
|
"step": 3020,
|
|
"valid_targets_mean": 3282.9,
|
|
"valid_targets_min": 914
|
|
},
|
|
{
|
|
"epoch": 4.7265625,
|
|
"grad_norm": 0.6790337579612867,
|
|
"learning_rate": 1.1547634765186016e-05,
|
|
"loss": 0.2543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11274809390306473,
|
|
"step": 3025,
|
|
"valid_targets_mean": 2551.9,
|
|
"valid_targets_min": 1165
|
|
},
|
|
{
|
|
"epoch": 4.734375,
|
|
"grad_norm": 0.6589100641422403,
|
|
"learning_rate": 1.1477082769785763e-05,
|
|
"loss": 0.3083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16488507390022278,
|
|
"step": 3030,
|
|
"valid_targets_mean": 4018.1,
|
|
"valid_targets_min": 1329
|
|
},
|
|
{
|
|
"epoch": 4.7421875,
|
|
"grad_norm": 0.5948605585783905,
|
|
"learning_rate": 1.1406660130341153e-05,
|
|
"loss": 0.3113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2061789333820343,
|
|
"step": 3035,
|
|
"valid_targets_mean": 5080.9,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 4.75,
|
|
"grad_norm": 0.6735999999092203,
|
|
"learning_rate": 1.1336367915686664e-05,
|
|
"loss": 0.2856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1391688883304596,
|
|
"step": 3040,
|
|
"valid_targets_mean": 3042.9,
|
|
"valid_targets_min": 1463
|
|
},
|
|
{
|
|
"epoch": 4.7578125,
|
|
"grad_norm": 0.707191743448738,
|
|
"learning_rate": 1.1266207192677263e-05,
|
|
"loss": 0.2689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13711535930633545,
|
|
"step": 3045,
|
|
"valid_targets_mean": 2988.1,
|
|
"valid_targets_min": 1199
|
|
},
|
|
{
|
|
"epoch": 4.765625,
|
|
"grad_norm": 0.6331949567081411,
|
|
"learning_rate": 1.1196179026172216e-05,
|
|
"loss": 0.2868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17094683647155762,
|
|
"step": 3050,
|
|
"valid_targets_mean": 4750.4,
|
|
"valid_targets_min": 1707
|
|
},
|
|
{
|
|
"epoch": 4.7734375,
|
|
"grad_norm": 0.5363501042893994,
|
|
"learning_rate": 1.1126284479018923e-05,
|
|
"loss": 0.2811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10897098481655121,
|
|
"step": 3055,
|
|
"valid_targets_mean": 4260.5,
|
|
"valid_targets_min": 1318
|
|
},
|
|
{
|
|
"epoch": 4.78125,
|
|
"grad_norm": 0.5295365707417661,
|
|
"learning_rate": 1.1056524612036789e-05,
|
|
"loss": 0.2998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10250885784626007,
|
|
"step": 3060,
|
|
"valid_targets_mean": 4327.2,
|
|
"valid_targets_min": 1387
|
|
},
|
|
{
|
|
"epoch": 4.7890625,
|
|
"grad_norm": 0.5804325529108302,
|
|
"learning_rate": 1.0986900484001092e-05,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14565108716487885,
|
|
"step": 3065,
|
|
"valid_targets_mean": 4440.6,
|
|
"valid_targets_min": 1838
|
|
},
|
|
{
|
|
"epoch": 4.796875,
|
|
"grad_norm": 0.6169320739983436,
|
|
"learning_rate": 1.0917413151626999e-05,
|
|
"loss": 0.2825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11483198404312134,
|
|
"step": 3070,
|
|
"valid_targets_mean": 2840.9,
|
|
"valid_targets_min": 812
|
|
},
|
|
{
|
|
"epoch": 4.8046875,
|
|
"grad_norm": 0.6791801582659563,
|
|
"learning_rate": 1.0848063669553413e-05,
|
|
"loss": 0.2836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1801331490278244,
|
|
"step": 3075,
|
|
"valid_targets_mean": 3635.4,
|
|
"valid_targets_min": 1203
|
|
},
|
|
{
|
|
"epoch": 4.8125,
|
|
"grad_norm": 0.679691134184932,
|
|
"learning_rate": 1.0778853090327056e-05,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18285803496837616,
|
|
"step": 3080,
|
|
"valid_targets_mean": 4095.4,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 4.8203125,
|
|
"grad_norm": 0.7730202228119102,
|
|
"learning_rate": 1.070978246438645e-05,
|
|
"loss": 0.3011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11215900629758835,
|
|
"step": 3085,
|
|
"valid_targets_mean": 1758.5,
|
|
"valid_targets_min": 920
|
|
},
|
|
{
|
|
"epoch": 4.828125,
|
|
"grad_norm": 0.6766175827785786,
|
|
"learning_rate": 1.0640852840046003e-05,
|
|
"loss": 0.2744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12460511922836304,
|
|
"step": 3090,
|
|
"valid_targets_mean": 3058.9,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 4.8359375,
|
|
"grad_norm": 0.49333808931187656,
|
|
"learning_rate": 1.0572065263480046e-05,
|
|
"loss": 0.2692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09470351040363312,
|
|
"step": 3095,
|
|
"valid_targets_mean": 3929.8,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 4.84375,
|
|
"grad_norm": 0.7951526542211214,
|
|
"learning_rate": 1.0503420778707038e-05,
|
|
"loss": 0.283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16283205151557922,
|
|
"step": 3100,
|
|
"valid_targets_mean": 2876.2,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 4.8515625,
|
|
"grad_norm": 0.7155624692745239,
|
|
"learning_rate": 1.0434920427573643e-05,
|
|
"loss": 0.2841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11920382082462311,
|
|
"step": 3105,
|
|
"valid_targets_mean": 2299.0,
|
|
"valid_targets_min": 979
|
|
},
|
|
{
|
|
"epoch": 4.859375,
|
|
"grad_norm": 0.7698362002815019,
|
|
"learning_rate": 1.036656524973893e-05,
|
|
"loss": 0.2915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20647820830345154,
|
|
"step": 3110,
|
|
"valid_targets_mean": 3316.9,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 4.8671875,
|
|
"grad_norm": 0.5971617740387133,
|
|
"learning_rate": 1.0298356282658668e-05,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15046551823616028,
|
|
"step": 3115,
|
|
"valid_targets_mean": 4391.8,
|
|
"valid_targets_min": 1686
|
|
},
|
|
{
|
|
"epoch": 4.875,
|
|
"grad_norm": 0.5322768520252721,
|
|
"learning_rate": 1.0230294561569454e-05,
|
|
"loss": 0.2844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1563924252986908,
|
|
"step": 3120,
|
|
"valid_targets_mean": 5952.8,
|
|
"valid_targets_min": 3503
|
|
},
|
|
{
|
|
"epoch": 4.8828125,
|
|
"grad_norm": 0.5674332596869363,
|
|
"learning_rate": 1.0162381119473137e-05,
|
|
"loss": 0.2719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15309269726276398,
|
|
"step": 3125,
|
|
"valid_targets_mean": 4994.4,
|
|
"valid_targets_min": 1210
|
|
},
|
|
{
|
|
"epoch": 4.890625,
|
|
"grad_norm": 0.535783741798832,
|
|
"learning_rate": 1.0094616987121013e-05,
|
|
"loss": 0.2926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20612335205078125,
|
|
"step": 3130,
|
|
"valid_targets_mean": 6419.2,
|
|
"valid_targets_min": 824
|
|
},
|
|
{
|
|
"epoch": 4.8984375,
|
|
"grad_norm": 0.460933839081061,
|
|
"learning_rate": 1.0027003192998275e-05,
|
|
"loss": 0.2968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13312917947769165,
|
|
"step": 3135,
|
|
"valid_targets_mean": 7083.4,
|
|
"valid_targets_min": 2532
|
|
},
|
|
{
|
|
"epoch": 4.90625,
|
|
"grad_norm": 0.8354124553251605,
|
|
"learning_rate": 9.959540763308361e-06,
|
|
"loss": 0.2842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1601850688457489,
|
|
"step": 3140,
|
|
"valid_targets_mean": 3243.1,
|
|
"valid_targets_min": 1212
|
|
},
|
|
{
|
|
"epoch": 4.9140625,
|
|
"grad_norm": 0.6454841135764238,
|
|
"learning_rate": 9.892230721957393e-06,
|
|
"loss": 0.2569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13274839520454407,
|
|
"step": 3145,
|
|
"valid_targets_mean": 3104.5,
|
|
"valid_targets_min": 1523
|
|
},
|
|
{
|
|
"epoch": 4.921875,
|
|
"grad_norm": 0.7778879501323761,
|
|
"learning_rate": 9.825074090538595e-06,
|
|
"loss": 0.2695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10812464356422424,
|
|
"step": 3150,
|
|
"valid_targets_mean": 2349.5,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 4.9296875,
|
|
"grad_norm": 0.6307880710697913,
|
|
"learning_rate": 9.758071888316887e-06,
|
|
"loss": 0.2569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1041250005364418,
|
|
"step": 3155,
|
|
"valid_targets_mean": 2727.9,
|
|
"valid_targets_min": 423
|
|
},
|
|
{
|
|
"epoch": 4.9375,
|
|
"grad_norm": 0.7314931160076291,
|
|
"learning_rate": 9.691225132213286e-06,
|
|
"loss": 0.2853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1607392430305481,
|
|
"step": 3160,
|
|
"valid_targets_mean": 2891.8,
|
|
"valid_targets_min": 1757
|
|
},
|
|
{
|
|
"epoch": 4.9453125,
|
|
"grad_norm": 0.7287143579407839,
|
|
"learning_rate": 9.624534836789568e-06,
|
|
"loss": 0.2949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17852626740932465,
|
|
"step": 3165,
|
|
"valid_targets_mean": 3437.1,
|
|
"valid_targets_min": 1860
|
|
},
|
|
{
|
|
"epoch": 4.953125,
|
|
"grad_norm": 0.5992272311522904,
|
|
"learning_rate": 9.558002014232858e-06,
|
|
"loss": 0.2933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18514734506607056,
|
|
"step": 3170,
|
|
"valid_targets_mean": 4898.4,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 4.9609375,
|
|
"grad_norm": 0.653243891985835,
|
|
"learning_rate": 9.491627674340203e-06,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1216670572757721,
|
|
"step": 3175,
|
|
"valid_targets_mean": 3181.6,
|
|
"valid_targets_min": 1397
|
|
},
|
|
{
|
|
"epoch": 4.96875,
|
|
"grad_norm": 0.5730840856097837,
|
|
"learning_rate": 9.42541282450332e-06,
|
|
"loss": 0.2886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14794345200061798,
|
|
"step": 3180,
|
|
"valid_targets_mean": 4030.6,
|
|
"valid_targets_min": 1357
|
|
},
|
|
{
|
|
"epoch": 4.9765625,
|
|
"grad_norm": 0.6114339634843177,
|
|
"learning_rate": 9.359358469693272e-06,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20590324699878693,
|
|
"step": 3185,
|
|
"valid_targets_mean": 4999.8,
|
|
"valid_targets_min": 927
|
|
},
|
|
{
|
|
"epoch": 4.984375,
|
|
"grad_norm": 0.6132436219196769,
|
|
"learning_rate": 9.293465612445217e-06,
|
|
"loss": 0.2835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15772338211536407,
|
|
"step": 3190,
|
|
"valid_targets_mean": 4787.2,
|
|
"valid_targets_min": 1572
|
|
},
|
|
{
|
|
"epoch": 4.9921875,
|
|
"grad_norm": 0.7090899123951322,
|
|
"learning_rate": 9.2277352528432e-06,
|
|
"loss": 0.2903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13576951622962952,
|
|
"step": 3195,
|
|
"valid_targets_mean": 2964.8,
|
|
"valid_targets_min": 1442
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.5343431674097054,
|
|
"learning_rate": 9.162168388504972e-06,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11201247572898865,
|
|
"step": 3200,
|
|
"valid_targets_mean": 3906.0,
|
|
"valid_targets_min": 1013
|
|
},
|
|
{
|
|
"epoch": 5.0078125,
|
|
"grad_norm": 0.5046936670946438,
|
|
"learning_rate": 9.096766014566825e-06,
|
|
"loss": 0.2719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11499462276697159,
|
|
"step": 3205,
|
|
"valid_targets_mean": 4330.1,
|
|
"valid_targets_min": 1756
|
|
},
|
|
{
|
|
"epoch": 5.015625,
|
|
"grad_norm": 0.6974054088929094,
|
|
"learning_rate": 9.031529123668553e-06,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11898432672023773,
|
|
"step": 3210,
|
|
"valid_targets_mean": 2859.2,
|
|
"valid_targets_min": 1299
|
|
},
|
|
{
|
|
"epoch": 5.0234375,
|
|
"grad_norm": 0.7314122360056066,
|
|
"learning_rate": 8.966458705938293e-06,
|
|
"loss": 0.2745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10447195172309875,
|
|
"step": 3215,
|
|
"valid_targets_mean": 2606.8,
|
|
"valid_targets_min": 1212
|
|
},
|
|
{
|
|
"epoch": 5.03125,
|
|
"grad_norm": 0.7199724835318668,
|
|
"learning_rate": 8.901555748977584e-06,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11118433624505997,
|
|
"step": 3220,
|
|
"valid_targets_mean": 2759.1,
|
|
"valid_targets_min": 1075
|
|
},
|
|
{
|
|
"epoch": 5.0390625,
|
|
"grad_norm": 0.60133967475612,
|
|
"learning_rate": 8.836821237846325e-06,
|
|
"loss": 0.2555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1524762213230133,
|
|
"step": 3225,
|
|
"valid_targets_mean": 4326.4,
|
|
"valid_targets_min": 948
|
|
},
|
|
{
|
|
"epoch": 5.046875,
|
|
"grad_norm": 0.6843729026609424,
|
|
"learning_rate": 8.772256155047847e-06,
|
|
"loss": 0.2436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08896399289369583,
|
|
"step": 3230,
|
|
"valid_targets_mean": 2607.4,
|
|
"valid_targets_min": 1216
|
|
},
|
|
{
|
|
"epoch": 5.0546875,
|
|
"grad_norm": 0.501952965596312,
|
|
"learning_rate": 8.707861480513993e-06,
|
|
"loss": 0.26,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09482182562351227,
|
|
"step": 3235,
|
|
"valid_targets_mean": 3602.9,
|
|
"valid_targets_min": 1406
|
|
},
|
|
{
|
|
"epoch": 5.0625,
|
|
"grad_norm": 0.5887841545107604,
|
|
"learning_rate": 8.643638191590247e-06,
|
|
"loss": 0.2497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16887958347797394,
|
|
"step": 3240,
|
|
"valid_targets_mean": 6411.0,
|
|
"valid_targets_min": 1561
|
|
},
|
|
{
|
|
"epoch": 5.0703125,
|
|
"grad_norm": 0.6297523134914647,
|
|
"learning_rate": 8.579587263020897e-06,
|
|
"loss": 0.2744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08280028402805328,
|
|
"step": 3245,
|
|
"valid_targets_mean": 1986.1,
|
|
"valid_targets_min": 1452
|
|
},
|
|
{
|
|
"epoch": 5.078125,
|
|
"grad_norm": 0.7252810404283574,
|
|
"learning_rate": 8.515709666934249e-06,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12945008277893066,
|
|
"step": 3250,
|
|
"valid_targets_mean": 3456.0,
|
|
"valid_targets_min": 1110
|
|
},
|
|
{
|
|
"epoch": 5.0859375,
|
|
"grad_norm": 0.5713533792080013,
|
|
"learning_rate": 8.452006372827875e-06,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11849690973758698,
|
|
"step": 3255,
|
|
"valid_targets_mean": 4617.4,
|
|
"valid_targets_min": 1509
|
|
},
|
|
{
|
|
"epoch": 5.09375,
|
|
"grad_norm": 0.5194225998532879,
|
|
"learning_rate": 8.388478347553858e-06,
|
|
"loss": 0.2978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24853534996509552,
|
|
"step": 3260,
|
|
"valid_targets_mean": 7894.9,
|
|
"valid_targets_min": 1330
|
|
},
|
|
{
|
|
"epoch": 5.1015625,
|
|
"grad_norm": 0.5199670343111807,
|
|
"learning_rate": 8.325126555304208e-06,
|
|
"loss": 0.2893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14205271005630493,
|
|
"step": 3265,
|
|
"valid_targets_mean": 5046.1,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 5.109375,
|
|
"grad_norm": 0.5763829337007623,
|
|
"learning_rate": 8.261951957596113e-06,
|
|
"loss": 0.275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09178589284420013,
|
|
"step": 3270,
|
|
"valid_targets_mean": 3527.0,
|
|
"valid_targets_min": 1043
|
|
},
|
|
{
|
|
"epoch": 5.1171875,
|
|
"grad_norm": 0.5468683990326231,
|
|
"learning_rate": 8.19895551325744e-06,
|
|
"loss": 0.2718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17381441593170166,
|
|
"step": 3275,
|
|
"valid_targets_mean": 5975.1,
|
|
"valid_targets_min": 1786
|
|
},
|
|
{
|
|
"epoch": 5.125,
|
|
"grad_norm": 0.5648736995196442,
|
|
"learning_rate": 8.136138178412134e-06,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09574223309755325,
|
|
"step": 3280,
|
|
"valid_targets_mean": 2663.4,
|
|
"valid_targets_min": 1184
|
|
},
|
|
{
|
|
"epoch": 5.1328125,
|
|
"grad_norm": 0.602550651148445,
|
|
"learning_rate": 8.07350090646572e-06,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17916516959667206,
|
|
"step": 3285,
|
|
"valid_targets_mean": 5375.9,
|
|
"valid_targets_min": 2012
|
|
},
|
|
{
|
|
"epoch": 5.140625,
|
|
"grad_norm": 0.5741563953131104,
|
|
"learning_rate": 8.011044648090833e-06,
|
|
"loss": 0.2597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12947413325309753,
|
|
"step": 3290,
|
|
"valid_targets_mean": 3765.4,
|
|
"valid_targets_min": 1364
|
|
},
|
|
{
|
|
"epoch": 5.1484375,
|
|
"grad_norm": 0.806041501002495,
|
|
"learning_rate": 7.948770351212786e-06,
|
|
"loss": 0.2584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16101224720478058,
|
|
"step": 3295,
|
|
"valid_targets_mean": 2742.0,
|
|
"valid_targets_min": 748
|
|
},
|
|
{
|
|
"epoch": 5.15625,
|
|
"grad_norm": 0.7714803654156688,
|
|
"learning_rate": 7.886678960995186e-06,
|
|
"loss": 0.2697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10475930571556091,
|
|
"step": 3300,
|
|
"valid_targets_mean": 2265.4,
|
|
"valid_targets_min": 1073
|
|
},
|
|
{
|
|
"epoch": 5.1640625,
|
|
"grad_norm": 0.5843158942987076,
|
|
"learning_rate": 7.824771419825588e-06,
|
|
"loss": 0.2661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11120818555355072,
|
|
"step": 3305,
|
|
"valid_targets_mean": 5228.5,
|
|
"valid_targets_min": 1601
|
|
},
|
|
{
|
|
"epoch": 5.171875,
|
|
"grad_norm": 0.6186921555873753,
|
|
"learning_rate": 7.763048667301192e-06,
|
|
"loss": 0.2579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1085093542933464,
|
|
"step": 3310,
|
|
"valid_targets_mean": 3891.2,
|
|
"valid_targets_min": 1530
|
|
},
|
|
{
|
|
"epoch": 5.1796875,
|
|
"grad_norm": 0.5866240434126099,
|
|
"learning_rate": 7.701511640214563e-06,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13206243515014648,
|
|
"step": 3315,
|
|
"valid_targets_mean": 4389.2,
|
|
"valid_targets_min": 2325
|
|
},
|
|
{
|
|
"epoch": 5.1875,
|
|
"grad_norm": 0.6672098581468722,
|
|
"learning_rate": 7.640161272539475e-06,
|
|
"loss": 0.2538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0993528962135315,
|
|
"step": 3320,
|
|
"valid_targets_mean": 3078.4,
|
|
"valid_targets_min": 1440
|
|
},
|
|
{
|
|
"epoch": 5.1953125,
|
|
"grad_norm": 0.6213052085383842,
|
|
"learning_rate": 7.578998495416645e-06,
|
|
"loss": 0.3079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11047904193401337,
|
|
"step": 3325,
|
|
"valid_targets_mean": 3344.1,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 5.203125,
|
|
"grad_norm": 0.654095913546447,
|
|
"learning_rate": 7.518024237139676e-06,
|
|
"loss": 0.2654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18792012333869934,
|
|
"step": 3330,
|
|
"valid_targets_mean": 5080.4,
|
|
"valid_targets_min": 1163
|
|
},
|
|
{
|
|
"epoch": 5.2109375,
|
|
"grad_norm": 0.642848874258468,
|
|
"learning_rate": 7.457239423140936e-06,
|
|
"loss": 0.2757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10866480320692062,
|
|
"step": 3335,
|
|
"valid_targets_mean": 2871.8,
|
|
"valid_targets_min": 631
|
|
},
|
|
{
|
|
"epoch": 5.21875,
|
|
"grad_norm": 0.6703218548935982,
|
|
"learning_rate": 7.39664497597751e-06,
|
|
"loss": 0.2766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11649449169635773,
|
|
"step": 3340,
|
|
"valid_targets_mean": 3097.0,
|
|
"valid_targets_min": 1492
|
|
},
|
|
{
|
|
"epoch": 5.2265625,
|
|
"grad_norm": 0.7316269852795628,
|
|
"learning_rate": 7.336241815317207e-06,
|
|
"loss": 0.2684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14323726296424866,
|
|
"step": 3345,
|
|
"valid_targets_mean": 2687.6,
|
|
"valid_targets_min": 1060
|
|
},
|
|
{
|
|
"epoch": 5.234375,
|
|
"grad_norm": 0.4749485490074528,
|
|
"learning_rate": 7.276030857924601e-06,
|
|
"loss": 0.2643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1774798333644867,
|
|
"step": 3350,
|
|
"valid_targets_mean": 7780.2,
|
|
"valid_targets_min": 1686
|
|
},
|
|
{
|
|
"epoch": 5.2421875,
|
|
"grad_norm": 0.6398509921620527,
|
|
"learning_rate": 7.216013017647112e-06,
|
|
"loss": 0.2704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1395539939403534,
|
|
"step": 3355,
|
|
"valid_targets_mean": 4590.6,
|
|
"valid_targets_min": 1720
|
|
},
|
|
{
|
|
"epoch": 5.25,
|
|
"grad_norm": 1.304478900964069,
|
|
"learning_rate": 7.156189205401143e-06,
|
|
"loss": 0.2928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22404244542121887,
|
|
"step": 3360,
|
|
"valid_targets_mean": 3531.6,
|
|
"valid_targets_min": 1101
|
|
},
|
|
{
|
|
"epoch": 5.2578125,
|
|
"grad_norm": 0.5500606020604805,
|
|
"learning_rate": 7.096560329158253e-06,
|
|
"loss": 0.2609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17683762311935425,
|
|
"step": 3365,
|
|
"valid_targets_mean": 6371.0,
|
|
"valid_targets_min": 2166
|
|
},
|
|
{
|
|
"epoch": 5.265625,
|
|
"grad_norm": 0.512963845541382,
|
|
"learning_rate": 7.03712729393135e-06,
|
|
"loss": 0.2788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06865803897380829,
|
|
"step": 3370,
|
|
"valid_targets_mean": 3034.0,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 5.2734375,
|
|
"grad_norm": 0.7809358088524075,
|
|
"learning_rate": 6.977891001761021e-06,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14702051877975464,
|
|
"step": 3375,
|
|
"valid_targets_mean": 3084.8,
|
|
"valid_targets_min": 373
|
|
},
|
|
{
|
|
"epoch": 5.28125,
|
|
"grad_norm": 0.8174072403181407,
|
|
"learning_rate": 6.918852351701764e-06,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11364416033029556,
|
|
"step": 3380,
|
|
"valid_targets_mean": 1742.2,
|
|
"valid_targets_min": 1042
|
|
},
|
|
{
|
|
"epoch": 5.2890625,
|
|
"grad_norm": 0.6246302859329512,
|
|
"learning_rate": 6.860012239808393e-06,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14385686814785004,
|
|
"step": 3385,
|
|
"valid_targets_mean": 4335.5,
|
|
"valid_targets_min": 1711
|
|
},
|
|
{
|
|
"epoch": 5.296875,
|
|
"grad_norm": 0.6701244119308003,
|
|
"learning_rate": 6.801371559122423e-06,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10745108127593994,
|
|
"step": 3390,
|
|
"valid_targets_mean": 2752.9,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 5.3046875,
|
|
"grad_norm": 0.6687161219141166,
|
|
"learning_rate": 6.7429311996585135e-06,
|
|
"loss": 0.2633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16643503308296204,
|
|
"step": 3395,
|
|
"valid_targets_mean": 4615.4,
|
|
"valid_targets_min": 2269
|
|
},
|
|
{
|
|
"epoch": 5.3125,
|
|
"grad_norm": 0.6213629565910009,
|
|
"learning_rate": 6.684692048390966e-06,
|
|
"loss": 0.2754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18504589796066284,
|
|
"step": 3400,
|
|
"valid_targets_mean": 4564.6,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 5.3203125,
|
|
"grad_norm": 0.6183382645236792,
|
|
"learning_rate": 6.6266549892402665e-06,
|
|
"loss": 0.2514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13984109461307526,
|
|
"step": 3405,
|
|
"valid_targets_mean": 4463.6,
|
|
"valid_targets_min": 1678
|
|
},
|
|
{
|
|
"epoch": 5.328125,
|
|
"grad_norm": 0.6473478725967595,
|
|
"learning_rate": 6.568820903059632e-06,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12108758091926575,
|
|
"step": 3410,
|
|
"valid_targets_mean": 3439.9,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 5.3359375,
|
|
"grad_norm": 0.6746577375457637,
|
|
"learning_rate": 6.511190667621714e-06,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14549319446086884,
|
|
"step": 3415,
|
|
"valid_targets_mean": 2935.0,
|
|
"valid_targets_min": 1417
|
|
},
|
|
{
|
|
"epoch": 5.34375,
|
|
"grad_norm": 0.8212952771764874,
|
|
"learning_rate": 6.453765157605214e-06,
|
|
"loss": 0.287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12886053323745728,
|
|
"step": 3420,
|
|
"valid_targets_mean": 2285.8,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 5.3515625,
|
|
"grad_norm": 0.5512002467883099,
|
|
"learning_rate": 6.396545244581609e-06,
|
|
"loss": 0.2774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15963593125343323,
|
|
"step": 3425,
|
|
"valid_targets_mean": 6103.5,
|
|
"valid_targets_min": 1138
|
|
},
|
|
{
|
|
"epoch": 5.359375,
|
|
"grad_norm": 0.6465562341020791,
|
|
"learning_rate": 6.33953179700199e-06,
|
|
"loss": 0.2618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1082988828420639,
|
|
"step": 3430,
|
|
"valid_targets_mean": 2946.9,
|
|
"valid_targets_min": 2204
|
|
},
|
|
{
|
|
"epoch": 5.3671875,
|
|
"grad_norm": 0.6614474110375007,
|
|
"learning_rate": 6.282725680183786e-06,
|
|
"loss": 0.2675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17406649887561798,
|
|
"step": 3435,
|
|
"valid_targets_mean": 4502.5,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 5.375,
|
|
"grad_norm": 0.6956988430133145,
|
|
"learning_rate": 6.226127756297704e-06,
|
|
"loss": 0.2796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1733933687210083,
|
|
"step": 3440,
|
|
"valid_targets_mean": 4204.8,
|
|
"valid_targets_min": 1402
|
|
},
|
|
{
|
|
"epoch": 5.3828125,
|
|
"grad_norm": 0.6439121117847131,
|
|
"learning_rate": 6.169738884354615e-06,
|
|
"loss": 0.2451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14098671078681946,
|
|
"step": 3445,
|
|
"valid_targets_mean": 4244.8,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 5.390625,
|
|
"grad_norm": 0.5610488679683071,
|
|
"learning_rate": 6.113559920192511e-06,
|
|
"loss": 0.2915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2253178060054779,
|
|
"step": 3450,
|
|
"valid_targets_mean": 5764.4,
|
|
"valid_targets_min": 2010
|
|
},
|
|
{
|
|
"epoch": 5.3984375,
|
|
"grad_norm": 0.6116861538742633,
|
|
"learning_rate": 6.057591716463536e-06,
|
|
"loss": 0.2738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2208654284477234,
|
|
"step": 3455,
|
|
"valid_targets_mean": 5984.9,
|
|
"valid_targets_min": 1013
|
|
},
|
|
{
|
|
"epoch": 5.40625,
|
|
"grad_norm": 0.6216778377136561,
|
|
"learning_rate": 6.001835122621029e-06,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15918587148189545,
|
|
"step": 3460,
|
|
"valid_targets_mean": 4408.8,
|
|
"valid_targets_min": 979
|
|
},
|
|
{
|
|
"epoch": 5.4140625,
|
|
"grad_norm": 0.704626342875288,
|
|
"learning_rate": 5.946290984906617e-06,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13279172778129578,
|
|
"step": 3465,
|
|
"valid_targets_mean": 3185.2,
|
|
"valid_targets_min": 1503
|
|
},
|
|
{
|
|
"epoch": 5.421875,
|
|
"grad_norm": 0.631204014934318,
|
|
"learning_rate": 5.8909601463374275e-06,
|
|
"loss": 0.2731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2013317346572876,
|
|
"step": 3470,
|
|
"valid_targets_mean": 4176.6,
|
|
"valid_targets_min": 1278
|
|
},
|
|
{
|
|
"epoch": 5.4296875,
|
|
"grad_norm": 0.6175115230428686,
|
|
"learning_rate": 5.835843446693219e-06,
|
|
"loss": 0.2716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14624445140361786,
|
|
"step": 3475,
|
|
"valid_targets_mean": 4566.5,
|
|
"valid_targets_min": 818
|
|
},
|
|
{
|
|
"epoch": 5.4375,
|
|
"grad_norm": 0.6172042564159322,
|
|
"learning_rate": 5.780941722503681e-06,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14069512486457825,
|
|
"step": 3480,
|
|
"valid_targets_mean": 4201.0,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 5.4453125,
|
|
"grad_norm": 0.5792132305080712,
|
|
"learning_rate": 5.726255807035759e-06,
|
|
"loss": 0.2693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17955777049064636,
|
|
"step": 3485,
|
|
"valid_targets_mean": 5900.9,
|
|
"valid_targets_min": 1770
|
|
},
|
|
{
|
|
"epoch": 5.453125,
|
|
"grad_norm": 0.8072724936458875,
|
|
"learning_rate": 5.671786530280932e-06,
|
|
"loss": 0.2642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11328478157520294,
|
|
"step": 3490,
|
|
"valid_targets_mean": 3699.5,
|
|
"valid_targets_min": 1206
|
|
},
|
|
{
|
|
"epoch": 5.4609375,
|
|
"grad_norm": 0.6818974867591465,
|
|
"learning_rate": 5.6175347189426875e-06,
|
|
"loss": 0.2891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18506446480751038,
|
|
"step": 3495,
|
|
"valid_targets_mean": 4451.2,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 5.46875,
|
|
"grad_norm": 0.597504611729965,
|
|
"learning_rate": 5.5635011964239415e-06,
|
|
"loss": 0.2633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18918883800506592,
|
|
"step": 3500,
|
|
"valid_targets_mean": 4544.9,
|
|
"valid_targets_min": 1363
|
|
},
|
|
{
|
|
"epoch": 5.4765625,
|
|
"grad_norm": 0.5533903571308518,
|
|
"learning_rate": 5.509686782814547e-06,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1189752146601677,
|
|
"step": 3505,
|
|
"valid_targets_mean": 4011.8,
|
|
"valid_targets_min": 1877
|
|
},
|
|
{
|
|
"epoch": 5.484375,
|
|
"grad_norm": 0.6189591104676915,
|
|
"learning_rate": 5.4560922948788496e-06,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14066731929779053,
|
|
"step": 3510,
|
|
"valid_targets_mean": 4555.4,
|
|
"valid_targets_min": 1624
|
|
},
|
|
{
|
|
"epoch": 5.4921875,
|
|
"grad_norm": 0.7264571010453796,
|
|
"learning_rate": 5.402718546043293e-06,
|
|
"loss": 0.2871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13631436228752136,
|
|
"step": 3515,
|
|
"valid_targets_mean": 2626.6,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 5.5,
|
|
"grad_norm": 0.6060772528756809,
|
|
"learning_rate": 5.349566346384043e-06,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10958144068717957,
|
|
"step": 3520,
|
|
"valid_targets_mean": 3026.4,
|
|
"valid_targets_min": 890
|
|
},
|
|
{
|
|
"epoch": 5.5078125,
|
|
"grad_norm": 0.7153197618947812,
|
|
"learning_rate": 5.296636502614767e-06,
|
|
"loss": 0.2813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10425803065299988,
|
|
"step": 3525,
|
|
"valid_targets_mean": 2692.2,
|
|
"valid_targets_min": 561
|
|
},
|
|
{
|
|
"epoch": 5.515625,
|
|
"grad_norm": 0.5498179651464947,
|
|
"learning_rate": 5.2439298180742935e-06,
|
|
"loss": 0.2709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15543559193611145,
|
|
"step": 3530,
|
|
"valid_targets_mean": 6302.5,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 5.5234375,
|
|
"grad_norm": 0.6756550557340587,
|
|
"learning_rate": 5.191447092714497e-06,
|
|
"loss": 0.283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13293832540512085,
|
|
"step": 3535,
|
|
"valid_targets_mean": 3315.9,
|
|
"valid_targets_min": 1562
|
|
},
|
|
{
|
|
"epoch": 5.53125,
|
|
"grad_norm": 0.5893238849797229,
|
|
"learning_rate": 5.13918912308812e-06,
|
|
"loss": 0.2796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11908221989870071,
|
|
"step": 3540,
|
|
"valid_targets_mean": 4078.2,
|
|
"valid_targets_min": 1554
|
|
},
|
|
{
|
|
"epoch": 5.5390625,
|
|
"grad_norm": 0.5366783611723772,
|
|
"learning_rate": 5.087156702336689e-06,
|
|
"loss": 0.2695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1514512002468109,
|
|
"step": 3545,
|
|
"valid_targets_mean": 5389.8,
|
|
"valid_targets_min": 1541
|
|
},
|
|
{
|
|
"epoch": 5.546875,
|
|
"grad_norm": 0.6490161636319042,
|
|
"learning_rate": 5.035350620178485e-06,
|
|
"loss": 0.2712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10361853241920471,
|
|
"step": 3550,
|
|
"valid_targets_mean": 2990.0,
|
|
"valid_targets_min": 1596
|
|
},
|
|
{
|
|
"epoch": 5.5546875,
|
|
"grad_norm": 0.7589436115106437,
|
|
"learning_rate": 4.983771662896544e-06,
|
|
"loss": 0.263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0990154892206192,
|
|
"step": 3555,
|
|
"valid_targets_mean": 1757.0,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 5.5625,
|
|
"grad_norm": 0.6258807621280661,
|
|
"learning_rate": 4.932420613326736e-06,
|
|
"loss": 0.2602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10476483404636383,
|
|
"step": 3560,
|
|
"valid_targets_mean": 3208.1,
|
|
"valid_targets_min": 1223
|
|
},
|
|
{
|
|
"epoch": 5.5703125,
|
|
"grad_norm": 0.6278868742297677,
|
|
"learning_rate": 4.881298250845874e-06,
|
|
"loss": 0.2476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1371040940284729,
|
|
"step": 3565,
|
|
"valid_targets_mean": 4043.8,
|
|
"valid_targets_min": 1775
|
|
},
|
|
{
|
|
"epoch": 5.578125,
|
|
"grad_norm": 0.6684521428280122,
|
|
"learning_rate": 4.830405351359902e-06,
|
|
"loss": 0.2702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08206835389137268,
|
|
"step": 3570,
|
|
"valid_targets_mean": 2517.5,
|
|
"valid_targets_min": 1568
|
|
},
|
|
{
|
|
"epoch": 5.5859375,
|
|
"grad_norm": 0.6403066035900613,
|
|
"learning_rate": 4.779742687292075e-06,
|
|
"loss": 0.2813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10635063052177429,
|
|
"step": 3575,
|
|
"valid_targets_mean": 2761.9,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 5.59375,
|
|
"grad_norm": 0.6750954374563015,
|
|
"learning_rate": 4.729311027571315e-06,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21257925033569336,
|
|
"step": 3580,
|
|
"valid_targets_mean": 4519.4,
|
|
"valid_targets_min": 2169
|
|
},
|
|
{
|
|
"epoch": 5.6015625,
|
|
"grad_norm": 0.5679948008184622,
|
|
"learning_rate": 4.679111137620442e-06,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12635569274425507,
|
|
"step": 3585,
|
|
"valid_targets_mean": 4806.5,
|
|
"valid_targets_min": 2038
|
|
},
|
|
{
|
|
"epoch": 5.609375,
|
|
"grad_norm": 0.6759392897936433,
|
|
"learning_rate": 4.629143779344641e-06,
|
|
"loss": 0.2616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19631728529930115,
|
|
"step": 3590,
|
|
"valid_targets_mean": 5165.6,
|
|
"valid_targets_min": 1870
|
|
},
|
|
{
|
|
"epoch": 5.6171875,
|
|
"grad_norm": 0.7313875801124594,
|
|
"learning_rate": 4.579409711119851e-06,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10421043634414673,
|
|
"step": 3595,
|
|
"valid_targets_mean": 2234.9,
|
|
"valid_targets_min": 871
|
|
},
|
|
{
|
|
"epoch": 5.625,
|
|
"grad_norm": 0.6737954676137935,
|
|
"learning_rate": 4.529909687781271e-06,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12556084990501404,
|
|
"step": 3600,
|
|
"valid_targets_mean": 3480.8,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 5.6328125,
|
|
"grad_norm": 0.6901309783919696,
|
|
"learning_rate": 4.480644460611902e-06,
|
|
"loss": 0.27,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15941768884658813,
|
|
"step": 3605,
|
|
"valid_targets_mean": 4748.9,
|
|
"valid_targets_min": 1903
|
|
},
|
|
{
|
|
"epoch": 5.640625,
|
|
"grad_norm": 0.7217096944537199,
|
|
"learning_rate": 4.431614777331141e-06,
|
|
"loss": 0.2553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1630745232105255,
|
|
"step": 3610,
|
|
"valid_targets_mean": 3485.0,
|
|
"valid_targets_min": 1196
|
|
},
|
|
{
|
|
"epoch": 5.6484375,
|
|
"grad_norm": 0.7927222109430115,
|
|
"learning_rate": 4.3828213820834375e-06,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13383173942565918,
|
|
"step": 3615,
|
|
"valid_targets_mean": 2760.4,
|
|
"valid_targets_min": 1098
|
|
},
|
|
{
|
|
"epoch": 5.65625,
|
|
"grad_norm": 0.7427010641634977,
|
|
"learning_rate": 4.334265015426993e-06,
|
|
"loss": 0.2685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1423024833202362,
|
|
"step": 3620,
|
|
"valid_targets_mean": 3534.2,
|
|
"valid_targets_min": 1704
|
|
},
|
|
{
|
|
"epoch": 5.6640625,
|
|
"grad_norm": 0.723550705838431,
|
|
"learning_rate": 4.285946414322531e-06,
|
|
"loss": 0.2666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12356115877628326,
|
|
"step": 3625,
|
|
"valid_targets_mean": 2353.4,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 5.671875,
|
|
"grad_norm": 0.6173482128611767,
|
|
"learning_rate": 4.237866312122087e-06,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1897842437028885,
|
|
"step": 3630,
|
|
"valid_targets_mean": 4568.1,
|
|
"valid_targets_min": 2119
|
|
},
|
|
{
|
|
"epoch": 5.6796875,
|
|
"grad_norm": 0.5341471102067765,
|
|
"learning_rate": 4.190025438557932e-06,
|
|
"loss": 0.2669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08332079648971558,
|
|
"step": 3635,
|
|
"valid_targets_mean": 4347.9,
|
|
"valid_targets_min": 1302
|
|
},
|
|
{
|
|
"epoch": 5.6875,
|
|
"grad_norm": 0.6027015635895797,
|
|
"learning_rate": 4.142424519731427e-06,
|
|
"loss": 0.261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19937267899513245,
|
|
"step": 3640,
|
|
"valid_targets_mean": 5918.8,
|
|
"valid_targets_min": 1496
|
|
},
|
|
{
|
|
"epoch": 5.6953125,
|
|
"grad_norm": 0.5853132782920005,
|
|
"learning_rate": 4.0950642781020524e-06,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10349492728710175,
|
|
"step": 3645,
|
|
"valid_targets_mean": 4844.0,
|
|
"valid_targets_min": 1269
|
|
},
|
|
{
|
|
"epoch": 5.703125,
|
|
"grad_norm": 0.6289196433792089,
|
|
"learning_rate": 4.04794543247643e-06,
|
|
"loss": 0.2625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14592915773391724,
|
|
"step": 3650,
|
|
"valid_targets_mean": 4422.4,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 5.7109375,
|
|
"grad_norm": 0.673354865208065,
|
|
"learning_rate": 4.0010686979974075e-06,
|
|
"loss": 0.2805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1684604436159134,
|
|
"step": 3655,
|
|
"valid_targets_mean": 4423.0,
|
|
"valid_targets_min": 1530
|
|
},
|
|
{
|
|
"epoch": 5.71875,
|
|
"grad_norm": 0.5241702621907239,
|
|
"learning_rate": 3.954434786133207e-06,
|
|
"loss": 0.2704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12942439317703247,
|
|
"step": 3660,
|
|
"valid_targets_mean": 5505.0,
|
|
"valid_targets_min": 1273
|
|
},
|
|
{
|
|
"epoch": 5.7265625,
|
|
"grad_norm": 0.640280093435949,
|
|
"learning_rate": 3.908044404666633e-06,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10417072474956512,
|
|
"step": 3665,
|
|
"valid_targets_mean": 3491.4,
|
|
"valid_targets_min": 1704
|
|
},
|
|
{
|
|
"epoch": 5.734375,
|
|
"grad_norm": 0.6659176132161894,
|
|
"learning_rate": 3.86189825768432e-06,
|
|
"loss": 0.2678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16642507910728455,
|
|
"step": 3670,
|
|
"valid_targets_mean": 3926.4,
|
|
"valid_targets_min": 1263
|
|
},
|
|
{
|
|
"epoch": 5.7421875,
|
|
"grad_norm": 0.48589402758912137,
|
|
"learning_rate": 3.815997045566056e-06,
|
|
"loss": 0.2613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10184939205646515,
|
|
"step": 3675,
|
|
"valid_targets_mean": 4378.9,
|
|
"valid_targets_min": 1055
|
|
},
|
|
{
|
|
"epoch": 5.75,
|
|
"grad_norm": 2.124579579082943,
|
|
"learning_rate": 3.770341464974148e-06,
|
|
"loss": 0.3002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18715925514698029,
|
|
"step": 3680,
|
|
"valid_targets_mean": 2744.9,
|
|
"valid_targets_min": 1274
|
|
},
|
|
{
|
|
"epoch": 5.7578125,
|
|
"grad_norm": 0.6690464776104572,
|
|
"learning_rate": 3.724932208842831e-06,
|
|
"loss": 0.2611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1855975091457367,
|
|
"step": 3685,
|
|
"valid_targets_mean": 4963.0,
|
|
"valid_targets_min": 1084
|
|
},
|
|
{
|
|
"epoch": 5.765625,
|
|
"grad_norm": 0.6942346895596813,
|
|
"learning_rate": 3.6797699663678033e-06,
|
|
"loss": 0.283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15811976790428162,
|
|
"step": 3690,
|
|
"valid_targets_mean": 3490.8,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 5.7734375,
|
|
"grad_norm": 0.617994614257123,
|
|
"learning_rate": 3.6348554229957e-06,
|
|
"loss": 0.284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24881428480148315,
|
|
"step": 3695,
|
|
"valid_targets_mean": 5033.6,
|
|
"valid_targets_min": 1024
|
|
},
|
|
{
|
|
"epoch": 5.78125,
|
|
"grad_norm": 0.6162181289274892,
|
|
"learning_rate": 3.5901892604137323e-06,
|
|
"loss": 0.2741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0951337218284607,
|
|
"step": 3700,
|
|
"valid_targets_mean": 2884.6,
|
|
"valid_targets_min": 1222
|
|
},
|
|
{
|
|
"epoch": 5.7890625,
|
|
"grad_norm": 0.7798809525187682,
|
|
"learning_rate": 3.545772156539333e-06,
|
|
"loss": 0.2906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1131541058421135,
|
|
"step": 3705,
|
|
"valid_targets_mean": 2227.9,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 5.796875,
|
|
"grad_norm": 0.6130517673582406,
|
|
"learning_rate": 3.5016047855098668e-06,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1031871885061264,
|
|
"step": 3710,
|
|
"valid_targets_mean": 3718.9,
|
|
"valid_targets_min": 1963
|
|
},
|
|
{
|
|
"epoch": 5.8046875,
|
|
"grad_norm": 0.6516919942576133,
|
|
"learning_rate": 3.457687817672377e-06,
|
|
"loss": 0.2644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2004844844341278,
|
|
"step": 3715,
|
|
"valid_targets_mean": 5261.9,
|
|
"valid_targets_min": 1549
|
|
},
|
|
{
|
|
"epoch": 5.8125,
|
|
"grad_norm": 0.6947853873163778,
|
|
"learning_rate": 3.4140219195734623e-06,
|
|
"loss": 0.2541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0996290072798729,
|
|
"step": 3720,
|
|
"valid_targets_mean": 2788.5,
|
|
"valid_targets_min": 1732
|
|
},
|
|
{
|
|
"epoch": 5.8203125,
|
|
"grad_norm": 0.5724603723300458,
|
|
"learning_rate": 3.3706077539490933e-06,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09630700200796127,
|
|
"step": 3725,
|
|
"valid_targets_mean": 3485.0,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 5.828125,
|
|
"grad_norm": 0.7907119333960065,
|
|
"learning_rate": 3.327445979714623e-06,
|
|
"loss": 0.2713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11837601661682129,
|
|
"step": 3730,
|
|
"valid_targets_mean": 2560.1,
|
|
"valid_targets_min": 893
|
|
},
|
|
{
|
|
"epoch": 5.8359375,
|
|
"grad_norm": 0.6924586435095025,
|
|
"learning_rate": 3.284537251954736e-06,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1616465300321579,
|
|
"step": 3735,
|
|
"valid_targets_mean": 4626.4,
|
|
"valid_targets_min": 1268
|
|
},
|
|
{
|
|
"epoch": 5.84375,
|
|
"grad_norm": 0.6864772622743396,
|
|
"learning_rate": 3.241882221913508e-06,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10835295915603638,
|
|
"step": 3740,
|
|
"valid_targets_mean": 3301.4,
|
|
"valid_targets_min": 1064
|
|
},
|
|
{
|
|
"epoch": 5.8515625,
|
|
"grad_norm": 0.7766942188040188,
|
|
"learning_rate": 3.199481536984572e-06,
|
|
"loss": 0.2719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13444839417934418,
|
|
"step": 3745,
|
|
"valid_targets_mean": 2224.2,
|
|
"valid_targets_min": 974
|
|
},
|
|
{
|
|
"epoch": 5.859375,
|
|
"grad_norm": 0.6924768240375514,
|
|
"learning_rate": 3.157335840701221e-06,
|
|
"loss": 0.2469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14776311814785004,
|
|
"step": 3750,
|
|
"valid_targets_mean": 3477.0,
|
|
"valid_targets_min": 1470
|
|
},
|
|
{
|
|
"epoch": 5.8671875,
|
|
"grad_norm": 0.591543231852692,
|
|
"learning_rate": 3.1154457727266974e-06,
|
|
"loss": 0.2587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11279892921447754,
|
|
"step": 3755,
|
|
"valid_targets_mean": 3673.8,
|
|
"valid_targets_min": 2404
|
|
},
|
|
{
|
|
"epoch": 5.875,
|
|
"grad_norm": 0.8346470977048491,
|
|
"learning_rate": 3.0738119688444558e-06,
|
|
"loss": 0.2506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1544916182756424,
|
|
"step": 3760,
|
|
"valid_targets_mean": 3066.4,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 5.8828125,
|
|
"grad_norm": 0.6294803424158907,
|
|
"learning_rate": 3.0324350609485266e-06,
|
|
"loss": 0.2618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10168902575969696,
|
|
"step": 3765,
|
|
"valid_targets_mean": 2995.8,
|
|
"valid_targets_min": 1569
|
|
},
|
|
{
|
|
"epoch": 5.890625,
|
|
"grad_norm": 0.6992592593073286,
|
|
"learning_rate": 2.991315677033906e-06,
|
|
"loss": 0.2819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1786080002784729,
|
|
"step": 3770,
|
|
"valid_targets_mean": 3918.1,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 5.8984375,
|
|
"grad_norm": 0.5994728501389917,
|
|
"learning_rate": 2.9504544411870627e-06,
|
|
"loss": 0.2783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16310274600982666,
|
|
"step": 3775,
|
|
"valid_targets_mean": 4906.2,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 5.90625,
|
|
"grad_norm": 0.4245518204568104,
|
|
"learning_rate": 2.9098519735764187e-06,
|
|
"loss": 0.2518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05600607767701149,
|
|
"step": 3780,
|
|
"valid_targets_mean": 3641.9,
|
|
"valid_targets_min": 1019
|
|
},
|
|
{
|
|
"epoch": 5.9140625,
|
|
"grad_norm": 0.6253377146349989,
|
|
"learning_rate": 2.8695088904429737e-06,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11982671171426773,
|
|
"step": 3785,
|
|
"valid_targets_mean": 3291.9,
|
|
"valid_targets_min": 990
|
|
},
|
|
{
|
|
"epoch": 5.921875,
|
|
"grad_norm": 0.553632199840292,
|
|
"learning_rate": 2.8294258040909328e-06,
|
|
"loss": 0.2454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1020302027463913,
|
|
"step": 3790,
|
|
"valid_targets_mean": 4149.2,
|
|
"valid_targets_min": 1287
|
|
},
|
|
{
|
|
"epoch": 5.9296875,
|
|
"grad_norm": 0.6375752838317469,
|
|
"learning_rate": 2.7896033228784226e-06,
|
|
"loss": 0.3008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1510707139968872,
|
|
"step": 3795,
|
|
"valid_targets_mean": 3712.6,
|
|
"valid_targets_min": 994
|
|
},
|
|
{
|
|
"epoch": 5.9375,
|
|
"grad_norm": 0.7657166632230655,
|
|
"learning_rate": 2.7500420512082482e-06,
|
|
"loss": 0.2665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13170671463012695,
|
|
"step": 3800,
|
|
"valid_targets_mean": 3169.1,
|
|
"valid_targets_min": 1536
|
|
},
|
|
{
|
|
"epoch": 5.9453125,
|
|
"grad_norm": 0.6110284505124861,
|
|
"learning_rate": 2.710742589518731e-06,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08706323057413101,
|
|
"step": 3805,
|
|
"valid_targets_mean": 3203.9,
|
|
"valid_targets_min": 1044
|
|
},
|
|
{
|
|
"epoch": 5.953125,
|
|
"grad_norm": 0.5934196711960971,
|
|
"learning_rate": 2.6717055342745913e-06,
|
|
"loss": 0.261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0940280482172966,
|
|
"step": 3810,
|
|
"valid_targets_mean": 3271.9,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 5.9609375,
|
|
"grad_norm": 0.5584237049083842,
|
|
"learning_rate": 2.6329314779578917e-06,
|
|
"loss": 0.2543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09052562713623047,
|
|
"step": 3815,
|
|
"valid_targets_mean": 2976.8,
|
|
"valid_targets_min": 895
|
|
},
|
|
{
|
|
"epoch": 5.96875,
|
|
"grad_norm": 0.6585025909647281,
|
|
"learning_rate": 2.594421009059049e-06,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12110359221696854,
|
|
"step": 3820,
|
|
"valid_targets_mean": 3367.2,
|
|
"valid_targets_min": 1117
|
|
},
|
|
{
|
|
"epoch": 5.9765625,
|
|
"grad_norm": 0.5673096133291792,
|
|
"learning_rate": 2.556174712067894e-06,
|
|
"loss": 0.2697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15002688765525818,
|
|
"step": 3825,
|
|
"valid_targets_mean": 4772.1,
|
|
"valid_targets_min": 1503
|
|
},
|
|
{
|
|
"epoch": 5.984375,
|
|
"grad_norm": 0.5553649291617744,
|
|
"learning_rate": 2.5181931674648265e-06,
|
|
"loss": 0.2672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10599038004875183,
|
|
"step": 3830,
|
|
"valid_targets_mean": 3451.1,
|
|
"valid_targets_min": 1513
|
|
},
|
|
{
|
|
"epoch": 5.9921875,
|
|
"grad_norm": 0.7022055501333647,
|
|
"learning_rate": 2.480476951711963e-06,
|
|
"loss": 0.2678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13294464349746704,
|
|
"step": 3835,
|
|
"valid_targets_mean": 3447.9,
|
|
"valid_targets_min": 974
|
|
},
|
|
{
|
|
"epoch": 6.0,
|
|
"grad_norm": 0.7923994232480275,
|
|
"learning_rate": 2.4430266372444254e-06,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11869074404239655,
|
|
"step": 3840,
|
|
"valid_targets_mean": 2419.5,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 6.0078125,
|
|
"grad_norm": 0.7505885580931568,
|
|
"learning_rate": 2.4058427924616344e-06,
|
|
"loss": 0.2744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13158443570137024,
|
|
"step": 3845,
|
|
"valid_targets_mean": 4794.8,
|
|
"valid_targets_min": 1691
|
|
},
|
|
{
|
|
"epoch": 6.015625,
|
|
"grad_norm": 0.6640981793549243,
|
|
"learning_rate": 2.3689259817186884e-06,
|
|
"loss": 0.2513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1224999725818634,
|
|
"step": 3850,
|
|
"valid_targets_mean": 3338.9,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 6.0234375,
|
|
"grad_norm": 0.6385146796940634,
|
|
"learning_rate": 2.3322767653177915e-06,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09019853919744492,
|
|
"step": 3855,
|
|
"valid_targets_mean": 2667.0,
|
|
"valid_targets_min": 977
|
|
},
|
|
{
|
|
"epoch": 6.03125,
|
|
"grad_norm": 0.7688080752903361,
|
|
"learning_rate": 2.2958956994997593e-06,
|
|
"loss": 0.2605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15413054823875427,
|
|
"step": 3860,
|
|
"valid_targets_mean": 3191.6,
|
|
"valid_targets_min": 1912
|
|
},
|
|
{
|
|
"epoch": 6.0390625,
|
|
"grad_norm": 0.6132947546578028,
|
|
"learning_rate": 2.259783336435566e-06,
|
|
"loss": 0.2581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10858497023582458,
|
|
"step": 3865,
|
|
"valid_targets_mean": 3362.6,
|
|
"valid_targets_min": 1334
|
|
},
|
|
{
|
|
"epoch": 6.046875,
|
|
"grad_norm": 0.5265692734573072,
|
|
"learning_rate": 2.2239402242179753e-06,
|
|
"loss": 0.2543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1480739414691925,
|
|
"step": 3870,
|
|
"valid_targets_mean": 6193.4,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 6.0546875,
|
|
"grad_norm": 0.8187466089013116,
|
|
"learning_rate": 2.1883669068532164e-06,
|
|
"loss": 0.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09416438639163971,
|
|
"step": 3875,
|
|
"valid_targets_mean": 1689.9,
|
|
"valid_targets_min": 419
|
|
},
|
|
{
|
|
"epoch": 6.0625,
|
|
"grad_norm": 0.6357225312705009,
|
|
"learning_rate": 2.1530639242527095e-06,
|
|
"loss": 0.2644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1128154918551445,
|
|
"step": 3880,
|
|
"valid_targets_mean": 3278.6,
|
|
"valid_targets_min": 963
|
|
},
|
|
{
|
|
"epoch": 6.0703125,
|
|
"grad_norm": 0.6365721040321523,
|
|
"learning_rate": 2.118031812224921e-06,
|
|
"loss": 0.2612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06967419385910034,
|
|
"step": 3885,
|
|
"valid_targets_mean": 2418.6,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 6.078125,
|
|
"grad_norm": 0.6791949244314719,
|
|
"learning_rate": 2.083271102467166e-06,
|
|
"loss": 0.2545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12802845239639282,
|
|
"step": 3890,
|
|
"valid_targets_mean": 3900.5,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 6.0859375,
|
|
"grad_norm": 0.6104740320409568,
|
|
"learning_rate": 2.0487823225575897e-06,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07714955508708954,
|
|
"step": 3895,
|
|
"valid_targets_mean": 2375.8,
|
|
"valid_targets_min": 1735
|
|
},
|
|
{
|
|
"epoch": 6.09375,
|
|
"grad_norm": 0.7645535416125689,
|
|
"learning_rate": 2.0145659959471397e-06,
|
|
"loss": 0.2589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16248643398284912,
|
|
"step": 3900,
|
|
"valid_targets_mean": 3234.2,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 6.1015625,
|
|
"grad_norm": 0.608051365902165,
|
|
"learning_rate": 1.9806226419516195e-06,
|
|
"loss": 0.2735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11406989395618439,
|
|
"step": 3905,
|
|
"valid_targets_mean": 4229.5,
|
|
"valid_targets_min": 1315
|
|
},
|
|
{
|
|
"epoch": 6.109375,
|
|
"grad_norm": 0.5111270905750861,
|
|
"learning_rate": 1.946952775743813e-06,
|
|
"loss": 0.2846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10390783101320267,
|
|
"step": 3910,
|
|
"valid_targets_mean": 4862.8,
|
|
"valid_targets_min": 1928
|
|
},
|
|
{
|
|
"epoch": 6.1171875,
|
|
"grad_norm": 0.6645911620911651,
|
|
"learning_rate": 1.913556908345664e-06,
|
|
"loss": 0.2741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13434872031211853,
|
|
"step": 3915,
|
|
"valid_targets_mean": 3793.9,
|
|
"valid_targets_min": 1477
|
|
},
|
|
{
|
|
"epoch": 6.125,
|
|
"grad_norm": 0.7075095487952572,
|
|
"learning_rate": 1.880435546620516e-06,
|
|
"loss": 0.2812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12585048377513885,
|
|
"step": 3920,
|
|
"valid_targets_mean": 2976.5,
|
|
"valid_targets_min": 833
|
|
},
|
|
{
|
|
"epoch": 6.1328125,
|
|
"grad_norm": 0.7312269653713259,
|
|
"learning_rate": 1.8475891932654266e-06,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13066476583480835,
|
|
"step": 3925,
|
|
"valid_targets_mean": 2782.0,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 6.140625,
|
|
"grad_norm": 0.5271736718667194,
|
|
"learning_rate": 1.8150183468035366e-06,
|
|
"loss": 0.2515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13340997695922852,
|
|
"step": 3930,
|
|
"valid_targets_mean": 5487.9,
|
|
"valid_targets_min": 1470
|
|
},
|
|
{
|
|
"epoch": 6.1484375,
|
|
"grad_norm": 0.6337990208690824,
|
|
"learning_rate": 1.782723501576482e-06,
|
|
"loss": 0.2541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21919742226600647,
|
|
"step": 3935,
|
|
"valid_targets_mean": 6038.2,
|
|
"valid_targets_min": 1413
|
|
},
|
|
{
|
|
"epoch": 6.15625,
|
|
"grad_norm": 0.7166350006939413,
|
|
"learning_rate": 1.750705147736942e-06,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10220938920974731,
|
|
"step": 3940,
|
|
"valid_targets_mean": 2907.6,
|
|
"valid_targets_min": 1935
|
|
},
|
|
{
|
|
"epoch": 6.1640625,
|
|
"grad_norm": 0.5269240918548754,
|
|
"learning_rate": 1.71896377124114e-06,
|
|
"loss": 0.2579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12403245270252228,
|
|
"step": 3945,
|
|
"valid_targets_mean": 4278.4,
|
|
"valid_targets_min": 2603
|
|
},
|
|
{
|
|
"epoch": 6.171875,
|
|
"grad_norm": 0.7528331033180756,
|
|
"learning_rate": 1.6874998538415077e-06,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16533511877059937,
|
|
"step": 3950,
|
|
"valid_targets_mean": 2971.9,
|
|
"valid_targets_min": 1194
|
|
},
|
|
{
|
|
"epoch": 6.1796875,
|
|
"grad_norm": 0.6265419737874007,
|
|
"learning_rate": 1.6563138730793627e-06,
|
|
"loss": 0.2413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12288747727870941,
|
|
"step": 3955,
|
|
"valid_targets_mean": 4662.2,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 6.1875,
|
|
"grad_norm": 0.7196311388951724,
|
|
"learning_rate": 1.6254063022776546e-06,
|
|
"loss": 0.2558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10614748299121857,
|
|
"step": 3960,
|
|
"valid_targets_mean": 2184.8,
|
|
"valid_targets_min": 1032
|
|
},
|
|
{
|
|
"epoch": 6.1953125,
|
|
"grad_norm": 0.7524698722351988,
|
|
"learning_rate": 1.5947776105337886e-06,
|
|
"loss": 0.2815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17295202612876892,
|
|
"step": 3965,
|
|
"valid_targets_mean": 3574.9,
|
|
"valid_targets_min": 916
|
|
},
|
|
{
|
|
"epoch": 6.203125,
|
|
"grad_norm": 0.6918320642338893,
|
|
"learning_rate": 1.5644282627125095e-06,
|
|
"loss": 0.251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10769148170948029,
|
|
"step": 3970,
|
|
"valid_targets_mean": 2970.0,
|
|
"valid_targets_min": 1171
|
|
},
|
|
{
|
|
"epoch": 6.2109375,
|
|
"grad_norm": 0.6544073866568701,
|
|
"learning_rate": 1.534358719438822e-06,
|
|
"loss": 0.2601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09773297607898712,
|
|
"step": 3975,
|
|
"valid_targets_mean": 2812.4,
|
|
"valid_targets_min": 1053
|
|
},
|
|
{
|
|
"epoch": 6.21875,
|
|
"grad_norm": 0.6424196821020789,
|
|
"learning_rate": 1.504569437091039e-06,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11783915758132935,
|
|
"step": 3980,
|
|
"valid_targets_mean": 3395.9,
|
|
"valid_targets_min": 1693
|
|
},
|
|
{
|
|
"epoch": 6.2265625,
|
|
"grad_norm": 0.6071755001781759,
|
|
"learning_rate": 1.475060867793827e-06,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17076680064201355,
|
|
"step": 3985,
|
|
"valid_targets_mean": 6116.4,
|
|
"valid_targets_min": 2237
|
|
},
|
|
{
|
|
"epoch": 6.234375,
|
|
"grad_norm": 0.6653376202316331,
|
|
"learning_rate": 1.4458334594113344e-06,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14667293429374695,
|
|
"step": 3990,
|
|
"valid_targets_mean": 4233.9,
|
|
"valid_targets_min": 1252
|
|
},
|
|
{
|
|
"epoch": 6.2421875,
|
|
"grad_norm": 0.6859597893673824,
|
|
"learning_rate": 1.4168876555404377e-06,
|
|
"loss": 0.2569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07992937415838242,
|
|
"step": 3995,
|
|
"valid_targets_mean": 1986.9,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 6.25,
|
|
"grad_norm": 0.7538368200015717,
|
|
"learning_rate": 1.3882238955039595e-06,
|
|
"loss": 0.2621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10055240988731384,
|
|
"step": 4000,
|
|
"valid_targets_mean": 2048.2,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 6.2578125,
|
|
"grad_norm": 0.6232855477560945,
|
|
"learning_rate": 1.3598426143440312e-06,
|
|
"loss": 0.2815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19466641545295715,
|
|
"step": 4005,
|
|
"valid_targets_mean": 5997.5,
|
|
"valid_targets_min": 2393
|
|
},
|
|
{
|
|
"epoch": 6.265625,
|
|
"grad_norm": 0.6490112969042833,
|
|
"learning_rate": 1.3317442428154825e-06,
|
|
"loss": 0.262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12241148203611374,
|
|
"step": 4010,
|
|
"valid_targets_mean": 3118.4,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 6.2734375,
|
|
"grad_norm": 0.6735559222317468,
|
|
"learning_rate": 1.3039292073792998e-06,
|
|
"loss": 0.259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21115313470363617,
|
|
"step": 4015,
|
|
"valid_targets_mean": 4555.5,
|
|
"valid_targets_min": 1434
|
|
},
|
|
{
|
|
"epoch": 6.28125,
|
|
"grad_norm": 0.6536001721571784,
|
|
"learning_rate": 1.2763979301961604e-06,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12998642027378082,
|
|
"step": 4020,
|
|
"valid_targets_mean": 3588.9,
|
|
"valid_targets_min": 1764
|
|
},
|
|
{
|
|
"epoch": 6.2890625,
|
|
"grad_norm": 0.586346520862165,
|
|
"learning_rate": 1.2491508291200183e-06,
|
|
"loss": 0.2381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09626303613185883,
|
|
"step": 4025,
|
|
"valid_targets_mean": 4389.4,
|
|
"valid_targets_min": 1503
|
|
},
|
|
{
|
|
"epoch": 6.296875,
|
|
"grad_norm": 0.6138887405563823,
|
|
"learning_rate": 1.2221883176917614e-06,
|
|
"loss": 0.2368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11844293773174286,
|
|
"step": 4030,
|
|
"valid_targets_mean": 4042.2,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 6.3046875,
|
|
"grad_norm": 0.733331145507061,
|
|
"learning_rate": 1.195510805132951e-06,
|
|
"loss": 0.264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09563855081796646,
|
|
"step": 4035,
|
|
"valid_targets_mean": 2939.0,
|
|
"valid_targets_min": 1117
|
|
},
|
|
{
|
|
"epoch": 6.3125,
|
|
"grad_norm": 0.5918021332721832,
|
|
"learning_rate": 1.1691186963395861e-06,
|
|
"loss": 0.2616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11854833364486694,
|
|
"step": 4040,
|
|
"valid_targets_mean": 4086.8,
|
|
"valid_targets_min": 1844
|
|
},
|
|
{
|
|
"epoch": 6.3203125,
|
|
"grad_norm": 0.6473784743742477,
|
|
"learning_rate": 1.143012391875975e-06,
|
|
"loss": 0.2587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14378473162651062,
|
|
"step": 4045,
|
|
"valid_targets_mean": 3905.6,
|
|
"valid_targets_min": 1293
|
|
},
|
|
{
|
|
"epoch": 6.328125,
|
|
"grad_norm": 0.5286103398851947,
|
|
"learning_rate": 1.1171922879686603e-06,
|
|
"loss": 0.26,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09342914819717407,
|
|
"step": 4050,
|
|
"valid_targets_mean": 4996.9,
|
|
"valid_targets_min": 1571
|
|
},
|
|
{
|
|
"epoch": 6.3359375,
|
|
"grad_norm": 0.7314249733853018,
|
|
"learning_rate": 1.0916587765003816e-06,
|
|
"loss": 0.2912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1194927915930748,
|
|
"step": 4055,
|
|
"valid_targets_mean": 2313.1,
|
|
"valid_targets_min": 698
|
|
},
|
|
{
|
|
"epoch": 6.34375,
|
|
"grad_norm": 0.5744852280612444,
|
|
"learning_rate": 1.0664122450041514e-06,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20207640528678894,
|
|
"step": 4060,
|
|
"valid_targets_mean": 5556.8,
|
|
"valid_targets_min": 893
|
|
},
|
|
{
|
|
"epoch": 6.3515625,
|
|
"grad_norm": 0.6655578627951989,
|
|
"learning_rate": 1.0414530766573661e-06,
|
|
"loss": 0.2373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09609033167362213,
|
|
"step": 4065,
|
|
"valid_targets_mean": 2999.8,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 6.359375,
|
|
"grad_norm": 0.6045482700129424,
|
|
"learning_rate": 1.016781650275982e-06,
|
|
"loss": 0.2565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09569937735795975,
|
|
"step": 4070,
|
|
"valid_targets_mean": 3522.4,
|
|
"valid_targets_min": 1238
|
|
},
|
|
{
|
|
"epoch": 6.3671875,
|
|
"grad_norm": 0.6890844172733326,
|
|
"learning_rate": 9.923983403087777e-07,
|
|
"loss": 0.272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09199053049087524,
|
|
"step": 4075,
|
|
"valid_targets_mean": 2956.4,
|
|
"valid_targets_min": 1139
|
|
},
|
|
{
|
|
"epoch": 6.375,
|
|
"grad_norm": 0.6606878387068186,
|
|
"learning_rate": 9.68303516831668e-07,
|
|
"loss": 0.2628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09685331583023071,
|
|
"step": 4080,
|
|
"valid_targets_mean": 2600.6,
|
|
"valid_targets_min": 1022
|
|
},
|
|
{
|
|
"epoch": 6.3828125,
|
|
"grad_norm": 0.6384956487055757,
|
|
"learning_rate": 9.444975455420735e-07,
|
|
"loss": 0.2571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15595637261867523,
|
|
"step": 4085,
|
|
"valid_targets_mean": 4299.4,
|
|
"valid_targets_min": 1804
|
|
},
|
|
{
|
|
"epoch": 6.390625,
|
|
"grad_norm": 0.6614754084141997,
|
|
"learning_rate": 9.209807877534005e-07,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0832962691783905,
|
|
"step": 4090,
|
|
"valid_targets_mean": 2745.5,
|
|
"valid_targets_min": 1123
|
|
},
|
|
{
|
|
"epoch": 6.3984375,
|
|
"grad_norm": 0.598072404251396,
|
|
"learning_rate": 8.977536003895193e-07,
|
|
"loss": 0.2487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1346684694290161,
|
|
"step": 4095,
|
|
"valid_targets_mean": 4422.2,
|
|
"valid_targets_min": 1005
|
|
},
|
|
{
|
|
"epoch": 6.40625,
|
|
"grad_norm": 0.6686056955132272,
|
|
"learning_rate": 8.748163359793804e-07,
|
|
"loss": 0.2817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15318574011325836,
|
|
"step": 4100,
|
|
"valid_targets_mean": 3263.2,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 6.4140625,
|
|
"grad_norm": 0.7300369873346376,
|
|
"learning_rate": 8.521693426516387e-07,
|
|
"loss": 0.2722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2054601013660431,
|
|
"step": 4105,
|
|
"valid_targets_mean": 3683.4,
|
|
"valid_targets_min": 1017
|
|
},
|
|
{
|
|
"epoch": 6.421875,
|
|
"grad_norm": 0.6844014663277475,
|
|
"learning_rate": 8.298129641293906e-07,
|
|
"loss": 0.2572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17993682622909546,
|
|
"step": 4110,
|
|
"valid_targets_mean": 4514.6,
|
|
"valid_targets_min": 1238
|
|
},
|
|
{
|
|
"epoch": 6.4296875,
|
|
"grad_norm": 0.6628670856358843,
|
|
"learning_rate": 8.077475397249435e-07,
|
|
"loss": 0.2721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13384006917476654,
|
|
"step": 4115,
|
|
"valid_targets_mean": 3579.2,
|
|
"valid_targets_min": 1757
|
|
},
|
|
{
|
|
"epoch": 6.4375,
|
|
"grad_norm": 0.6144387909648968,
|
|
"learning_rate": 7.859734043346656e-07,
|
|
"loss": 0.2689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1313067078590393,
|
|
"step": 4120,
|
|
"valid_targets_mean": 4058.2,
|
|
"valid_targets_min": 884
|
|
},
|
|
{
|
|
"epoch": 6.4453125,
|
|
"grad_norm": 0.6871790874938675,
|
|
"learning_rate": 7.644908884339153e-07,
|
|
"loss": 0.2607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11054828763008118,
|
|
"step": 4125,
|
|
"valid_targets_mean": 2458.0,
|
|
"valid_targets_min": 815
|
|
},
|
|
{
|
|
"epoch": 6.453125,
|
|
"grad_norm": 0.591298324167781,
|
|
"learning_rate": 7.433003180720111e-07,
|
|
"loss": 0.2425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11840618401765823,
|
|
"step": 4130,
|
|
"valid_targets_mean": 3848.8,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 6.4609375,
|
|
"grad_norm": 0.5483198041029432,
|
|
"learning_rate": 7.224020148672939e-07,
|
|
"loss": 0.263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12371744215488434,
|
|
"step": 4135,
|
|
"valid_targets_mean": 5571.2,
|
|
"valid_targets_min": 2134
|
|
},
|
|
{
|
|
"epoch": 6.46875,
|
|
"grad_norm": 0.6835223597790899,
|
|
"learning_rate": 7.017962960022329e-07,
|
|
"loss": 0.2459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13768139481544495,
|
|
"step": 4140,
|
|
"valid_targets_mean": 3358.2,
|
|
"valid_targets_min": 1652
|
|
},
|
|
{
|
|
"epoch": 6.4765625,
|
|
"grad_norm": 0.8329801199707676,
|
|
"learning_rate": 6.814834742186361e-07,
|
|
"loss": 0.2692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17619851231575012,
|
|
"step": 4145,
|
|
"valid_targets_mean": 2963.1,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 6.484375,
|
|
"grad_norm": 0.6625067095404238,
|
|
"learning_rate": 6.614638578128674e-07,
|
|
"loss": 0.275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13412591814994812,
|
|
"step": 4150,
|
|
"valid_targets_mean": 3779.6,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 6.4921875,
|
|
"grad_norm": 0.8761101936413094,
|
|
"learning_rate": 6.417377506311995e-07,
|
|
"loss": 0.259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19025081396102905,
|
|
"step": 4155,
|
|
"valid_targets_mean": 2944.5,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 6.5,
|
|
"grad_norm": 0.6648802832182904,
|
|
"learning_rate": 6.223054520651883e-07,
|
|
"loss": 0.2721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06739646941423416,
|
|
"step": 4160,
|
|
"valid_targets_mean": 1892.2,
|
|
"valid_targets_min": 1233
|
|
},
|
|
{
|
|
"epoch": 6.5078125,
|
|
"grad_norm": 0.627529786237451,
|
|
"learning_rate": 6.031672570471325e-07,
|
|
"loss": 0.2377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14967185258865356,
|
|
"step": 4165,
|
|
"valid_targets_mean": 4563.9,
|
|
"valid_targets_min": 1179
|
|
},
|
|
{
|
|
"epoch": 6.515625,
|
|
"grad_norm": 0.6827675021203882,
|
|
"learning_rate": 5.843234560455902e-07,
|
|
"loss": 0.2639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13482683897018433,
|
|
"step": 4170,
|
|
"valid_targets_mean": 3721.5,
|
|
"valid_targets_min": 1300
|
|
},
|
|
{
|
|
"epoch": 6.5234375,
|
|
"grad_norm": 0.6364480792737484,
|
|
"learning_rate": 5.657743350609801e-07,
|
|
"loss": 0.2508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09386356174945831,
|
|
"step": 4175,
|
|
"valid_targets_mean": 2635.9,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 6.53125,
|
|
"grad_norm": 0.7750882243286654,
|
|
"learning_rate": 5.475201756212367e-07,
|
|
"loss": 0.2915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13889993727207184,
|
|
"step": 4180,
|
|
"valid_targets_mean": 2579.5,
|
|
"valid_targets_min": 1274
|
|
},
|
|
{
|
|
"epoch": 6.5390625,
|
|
"grad_norm": 0.709966111928431,
|
|
"learning_rate": 5.295612547775331e-07,
|
|
"loss": 0.2861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20912089943885803,
|
|
"step": 4185,
|
|
"valid_targets_mean": 4791.9,
|
|
"valid_targets_min": 1771
|
|
},
|
|
{
|
|
"epoch": 6.546875,
|
|
"grad_norm": 0.6338580208833083,
|
|
"learning_rate": 5.118978451000866e-07,
|
|
"loss": 0.2804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08135390281677246,
|
|
"step": 4190,
|
|
"valid_targets_mean": 2648.5,
|
|
"valid_targets_min": 1524
|
|
},
|
|
{
|
|
"epoch": 6.5546875,
|
|
"grad_norm": 0.6900518749070957,
|
|
"learning_rate": 4.94530214674005e-07,
|
|
"loss": 0.2687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12678664922714233,
|
|
"step": 4195,
|
|
"valid_targets_mean": 3519.8,
|
|
"valid_targets_min": 1465
|
|
},
|
|
{
|
|
"epoch": 6.5625,
|
|
"grad_norm": 0.6403969387114905,
|
|
"learning_rate": 4.774586270952397e-07,
|
|
"loss": 0.2566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23219828307628632,
|
|
"step": 4200,
|
|
"valid_targets_mean": 5390.0,
|
|
"valid_targets_min": 1389
|
|
},
|
|
{
|
|
"epoch": 6.5703125,
|
|
"grad_norm": 0.5313764529428376,
|
|
"learning_rate": 4.6068334146656567e-07,
|
|
"loss": 0.2571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1719360202550888,
|
|
"step": 4205,
|
|
"valid_targets_mean": 5886.1,
|
|
"valid_targets_min": 1588
|
|
},
|
|
{
|
|
"epoch": 6.578125,
|
|
"grad_norm": 0.7118283981426333,
|
|
"learning_rate": 4.4420461239366165e-07,
|
|
"loss": 0.2404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13191410899162292,
|
|
"step": 4210,
|
|
"valid_targets_mean": 3355.6,
|
|
"valid_targets_min": 1326
|
|
},
|
|
{
|
|
"epoch": 6.5859375,
|
|
"grad_norm": 0.6384174233587612,
|
|
"learning_rate": 4.280226899812334e-07,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10137508809566498,
|
|
"step": 4215,
|
|
"valid_targets_mean": 3268.5,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 6.59375,
|
|
"grad_norm": 0.6700147905037847,
|
|
"learning_rate": 4.1213781982923473e-07,
|
|
"loss": 0.2672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1353142410516739,
|
|
"step": 4220,
|
|
"valid_targets_mean": 3228.0,
|
|
"valid_targets_min": 1432
|
|
},
|
|
{
|
|
"epoch": 6.6015625,
|
|
"grad_norm": 0.6665522861185934,
|
|
"learning_rate": 3.965502430291235e-07,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13723230361938477,
|
|
"step": 4225,
|
|
"valid_targets_mean": 3108.5,
|
|
"valid_targets_min": 888
|
|
},
|
|
{
|
|
"epoch": 6.609375,
|
|
"grad_norm": 0.6538508153226694,
|
|
"learning_rate": 3.812601961602114e-07,
|
|
"loss": 0.2428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10166826844215393,
|
|
"step": 4230,
|
|
"valid_targets_mean": 3365.0,
|
|
"valid_targets_min": 1474
|
|
},
|
|
{
|
|
"epoch": 6.6171875,
|
|
"grad_norm": 0.5556227840504839,
|
|
"learning_rate": 3.662679112860712e-07,
|
|
"loss": 0.2416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06461115181446075,
|
|
"step": 4235,
|
|
"valid_targets_mean": 2731.9,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 6.625,
|
|
"grad_norm": 0.664970510077525,
|
|
"learning_rate": 3.5157361595101747e-07,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1564149558544159,
|
|
"step": 4240,
|
|
"valid_targets_mean": 3808.4,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 6.6328125,
|
|
"grad_norm": 0.6379350869667123,
|
|
"learning_rate": 3.371775331766447e-07,
|
|
"loss": 0.2385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12332307547330856,
|
|
"step": 4245,
|
|
"valid_targets_mean": 3585.1,
|
|
"valid_targets_min": 1677
|
|
},
|
|
{
|
|
"epoch": 6.640625,
|
|
"grad_norm": 0.6544535801834479,
|
|
"learning_rate": 3.230798814584502e-07,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12080800533294678,
|
|
"step": 4250,
|
|
"valid_targets_mean": 3130.1,
|
|
"valid_targets_min": 1176
|
|
},
|
|
{
|
|
"epoch": 6.6484375,
|
|
"grad_norm": 0.6295251424601187,
|
|
"learning_rate": 3.092808747625209e-07,
|
|
"loss": 0.2695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09908099472522736,
|
|
"step": 4255,
|
|
"valid_targets_mean": 3617.0,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 6.65625,
|
|
"grad_norm": 0.68769775722903,
|
|
"learning_rate": 2.95780722522272e-07,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1429365575313568,
|
|
"step": 4260,
|
|
"valid_targets_mean": 3880.1,
|
|
"valid_targets_min": 945
|
|
},
|
|
{
|
|
"epoch": 6.6640625,
|
|
"grad_norm": 0.6947148201777125,
|
|
"learning_rate": 2.825796296352823e-07,
|
|
"loss": 0.2689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14064309000968933,
|
|
"step": 4265,
|
|
"valid_targets_mean": 3110.6,
|
|
"valid_targets_min": 1128
|
|
},
|
|
{
|
|
"epoch": 6.671875,
|
|
"grad_norm": 0.5845005486946475,
|
|
"learning_rate": 2.696777964601793e-07,
|
|
"loss": 0.2569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1077839583158493,
|
|
"step": 4270,
|
|
"valid_targets_mean": 3809.9,
|
|
"valid_targets_min": 1507
|
|
},
|
|
{
|
|
"epoch": 6.6796875,
|
|
"grad_norm": 0.5503329380710252,
|
|
"learning_rate": 2.5707541881359264e-07,
|
|
"loss": 0.2557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1523076444864273,
|
|
"step": 4275,
|
|
"valid_targets_mean": 5653.2,
|
|
"valid_targets_min": 2652
|
|
},
|
|
{
|
|
"epoch": 6.6875,
|
|
"grad_norm": 0.6282088561814149,
|
|
"learning_rate": 2.447726879671941e-07,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17982710897922516,
|
|
"step": 4280,
|
|
"valid_targets_mean": 4235.4,
|
|
"valid_targets_min": 1461
|
|
},
|
|
{
|
|
"epoch": 6.6953125,
|
|
"grad_norm": 0.6698033154094151,
|
|
"learning_rate": 2.3276979064478678e-07,
|
|
"loss": 0.2652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18125875294208527,
|
|
"step": 4285,
|
|
"valid_targets_mean": 3844.2,
|
|
"valid_targets_min": 1599
|
|
},
|
|
{
|
|
"epoch": 6.703125,
|
|
"grad_norm": 0.6305815187177156,
|
|
"learning_rate": 2.2106690901946727e-07,
|
|
"loss": 0.2304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12745238840579987,
|
|
"step": 4290,
|
|
"valid_targets_mean": 4285.5,
|
|
"valid_targets_min": 2001
|
|
},
|
|
{
|
|
"epoch": 6.7109375,
|
|
"grad_norm": 0.7204426794948032,
|
|
"learning_rate": 2.0966422071087012e-07,
|
|
"loss": 0.2552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.095721036195755,
|
|
"step": 4295,
|
|
"valid_targets_mean": 2056.9,
|
|
"valid_targets_min": 817
|
|
},
|
|
{
|
|
"epoch": 6.71875,
|
|
"grad_norm": 0.6705384392924296,
|
|
"learning_rate": 1.9856189878247e-07,
|
|
"loss": 0.2712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20985791087150574,
|
|
"step": 4300,
|
|
"valid_targets_mean": 4231.6,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 6.7265625,
|
|
"grad_norm": 0.6071597303634354,
|
|
"learning_rate": 1.8776011173894383e-07,
|
|
"loss": 0.2482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06896130740642548,
|
|
"step": 4305,
|
|
"valid_targets_mean": 2182.5,
|
|
"valid_targets_min": 1210
|
|
},
|
|
{
|
|
"epoch": 6.734375,
|
|
"grad_norm": 0.6155836181393393,
|
|
"learning_rate": 1.7725902352363488e-07,
|
|
"loss": 0.2625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09441882371902466,
|
|
"step": 4310,
|
|
"valid_targets_mean": 2815.0,
|
|
"valid_targets_min": 1779
|
|
},
|
|
{
|
|
"epoch": 6.7421875,
|
|
"grad_norm": 0.5545082409376448,
|
|
"learning_rate": 1.6705879351603947e-07,
|
|
"loss": 0.2714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10620395839214325,
|
|
"step": 4315,
|
|
"valid_targets_mean": 4186.0,
|
|
"valid_targets_min": 1101
|
|
},
|
|
{
|
|
"epoch": 6.75,
|
|
"grad_norm": 0.6110893309211615,
|
|
"learning_rate": 1.571595765294065e-07,
|
|
"loss": 0.2636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12079726904630661,
|
|
"step": 4320,
|
|
"valid_targets_mean": 3759.9,
|
|
"valid_targets_min": 1666
|
|
},
|
|
{
|
|
"epoch": 6.7578125,
|
|
"grad_norm": 0.668401332116379,
|
|
"learning_rate": 1.4756152280838375e-07,
|
|
"loss": 0.2501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09188161790370941,
|
|
"step": 4325,
|
|
"valid_targets_mean": 3151.6,
|
|
"valid_targets_min": 1572
|
|
},
|
|
{
|
|
"epoch": 6.765625,
|
|
"grad_norm": 0.8249793850470816,
|
|
"learning_rate": 1.38264778026731e-07,
|
|
"loss": 0.2683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12976078689098358,
|
|
"step": 4330,
|
|
"valid_targets_mean": 2208.2,
|
|
"valid_targets_min": 864
|
|
},
|
|
{
|
|
"epoch": 6.7734375,
|
|
"grad_norm": 0.6650689530603149,
|
|
"learning_rate": 1.292694832851127e-07,
|
|
"loss": 0.2826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15542012453079224,
|
|
"step": 4335,
|
|
"valid_targets_mean": 4124.2,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 6.78125,
|
|
"grad_norm": 0.5611434005721546,
|
|
"learning_rate": 1.2057577510896424e-07,
|
|
"loss": 0.2325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1156553328037262,
|
|
"step": 4340,
|
|
"valid_targets_mean": 4983.5,
|
|
"valid_targets_min": 2409
|
|
},
|
|
{
|
|
"epoch": 6.7890625,
|
|
"grad_norm": 0.7500527936646517,
|
|
"learning_rate": 1.1218378544640474e-07,
|
|
"loss": 0.259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15409687161445618,
|
|
"step": 4345,
|
|
"valid_targets_mean": 3441.9,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 6.796875,
|
|
"grad_norm": 0.6084231325632709,
|
|
"learning_rate": 1.0409364166624969e-07,
|
|
"loss": 0.2686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13375400006771088,
|
|
"step": 4350,
|
|
"valid_targets_mean": 3517.5,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 6.8046875,
|
|
"grad_norm": 0.6066684897290082,
|
|
"learning_rate": 9.630546655606365e-08,
|
|
"loss": 0.2625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1303100287914276,
|
|
"step": 4355,
|
|
"valid_targets_mean": 5542.8,
|
|
"valid_targets_min": 2439
|
|
},
|
|
{
|
|
"epoch": 6.8125,
|
|
"grad_norm": 0.6567723995233268,
|
|
"learning_rate": 8.881937832030619e-08,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17117667198181152,
|
|
"step": 4360,
|
|
"valid_targets_mean": 4185.5,
|
|
"valid_targets_min": 1105
|
|
},
|
|
{
|
|
"epoch": 6.8203125,
|
|
"grad_norm": 0.6538771751541541,
|
|
"learning_rate": 8.163549057854437e-08,
|
|
"loss": 0.2684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13967782258987427,
|
|
"step": 4365,
|
|
"valid_targets_mean": 4041.5,
|
|
"valid_targets_min": 1285
|
|
},
|
|
{
|
|
"epoch": 6.828125,
|
|
"grad_norm": 0.6171360610439036,
|
|
"learning_rate": 7.47539123637031e-08,
|
|
"loss": 0.2449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1728893369436264,
|
|
"step": 4370,
|
|
"valid_targets_mean": 5409.1,
|
|
"valid_targets_min": 2511
|
|
},
|
|
{
|
|
"epoch": 6.8359375,
|
|
"grad_norm": 0.64675490379598,
|
|
"learning_rate": 6.817474812043756e-08,
|
|
"loss": 0.2674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1058511883020401,
|
|
"step": 4375,
|
|
"valid_targets_mean": 2510.1,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 6.84375,
|
|
"grad_norm": 0.5929133799252578,
|
|
"learning_rate": 6.189809770353439e-08,
|
|
"loss": 0.2684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08088253438472748,
|
|
"step": 4380,
|
|
"valid_targets_mean": 3212.5,
|
|
"valid_targets_min": 1117
|
|
},
|
|
{
|
|
"epoch": 6.8515625,
|
|
"grad_norm": 0.6497305378006099,
|
|
"learning_rate": 5.592405637639742e-08,
|
|
"loss": 0.26,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12661197781562805,
|
|
"step": 4385,
|
|
"valid_targets_mean": 4013.2,
|
|
"valid_targets_min": 1163
|
|
},
|
|
{
|
|
"epoch": 6.859375,
|
|
"grad_norm": 0.565551552611999,
|
|
"learning_rate": 5.025271480960436e-08,
|
|
"loss": 0.2615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09549446403980255,
|
|
"step": 4390,
|
|
"valid_targets_mean": 3634.1,
|
|
"valid_targets_min": 777
|
|
},
|
|
{
|
|
"epoch": 6.8671875,
|
|
"grad_norm": 0.5924328713504752,
|
|
"learning_rate": 4.4884159079527925e-08,
|
|
"loss": 0.2631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12203621119260788,
|
|
"step": 4395,
|
|
"valid_targets_mean": 4759.9,
|
|
"valid_targets_min": 1652
|
|
},
|
|
{
|
|
"epoch": 6.875,
|
|
"grad_norm": 0.5869877210191302,
|
|
"learning_rate": 3.981847066703459e-08,
|
|
"loss": 0.2622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11341831088066101,
|
|
"step": 4400,
|
|
"valid_targets_mean": 4646.1,
|
|
"valid_targets_min": 2091
|
|
},
|
|
{
|
|
"epoch": 6.8828125,
|
|
"grad_norm": 0.7415775373268617,
|
|
"learning_rate": 3.5055726456236786e-08,
|
|
"loss": 0.2497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12463448941707611,
|
|
"step": 4405,
|
|
"valid_targets_mean": 3713.6,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 6.890625,
|
|
"grad_norm": 0.5843723512786586,
|
|
"learning_rate": 3.059599873334263e-08,
|
|
"loss": 0.2577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14402280747890472,
|
|
"step": 4410,
|
|
"valid_targets_mean": 5680.4,
|
|
"valid_targets_min": 2612
|
|
},
|
|
{
|
|
"epoch": 6.8984375,
|
|
"grad_norm": 0.7137140989087183,
|
|
"learning_rate": 2.6439355185541303e-08,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09825268387794495,
|
|
"step": 4415,
|
|
"valid_targets_mean": 2273.5,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 6.90625,
|
|
"grad_norm": 0.7416210960876288,
|
|
"learning_rate": 2.258585889998832e-08,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13531829416751862,
|
|
"step": 4420,
|
|
"valid_targets_mean": 2565.5,
|
|
"valid_targets_min": 1145
|
|
},
|
|
{
|
|
"epoch": 6.9140625,
|
|
"grad_norm": 0.8061792141129214,
|
|
"learning_rate": 1.9035568362844037e-08,
|
|
"loss": 0.2508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1556365191936493,
|
|
"step": 4425,
|
|
"valid_targets_mean": 3102.5,
|
|
"valid_targets_min": 1337
|
|
},
|
|
{
|
|
"epoch": 6.921875,
|
|
"grad_norm": 0.7183982134520797,
|
|
"learning_rate": 1.578853745838549e-08,
|
|
"loss": 0.2703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1430552452802658,
|
|
"step": 4430,
|
|
"valid_targets_mean": 3722.5,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 6.9296875,
|
|
"grad_norm": 0.6305283820899493,
|
|
"learning_rate": 1.2844815468184835e-08,
|
|
"loss": 0.2718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12510234117507935,
|
|
"step": 4435,
|
|
"valid_targets_mean": 3576.0,
|
|
"valid_targets_min": 881
|
|
},
|
|
{
|
|
"epoch": 6.9375,
|
|
"grad_norm": 0.6637210861895438,
|
|
"learning_rate": 1.0204447070372159e-08,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19001562893390656,
|
|
"step": 4440,
|
|
"valid_targets_mean": 4535.1,
|
|
"valid_targets_min": 1575
|
|
},
|
|
{
|
|
"epoch": 6.9453125,
|
|
"grad_norm": 0.6627297452185061,
|
|
"learning_rate": 7.867472338942694e-09,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14613941311836243,
|
|
"step": 4445,
|
|
"valid_targets_mean": 3860.2,
|
|
"valid_targets_min": 1372
|
|
},
|
|
{
|
|
"epoch": 6.953125,
|
|
"grad_norm": 0.7559748144681239,
|
|
"learning_rate": 5.833926743161744e-09,
|
|
"loss": 0.2574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13705304265022278,
|
|
"step": 4450,
|
|
"valid_targets_mean": 3022.6,
|
|
"valid_targets_min": 787
|
|
},
|
|
{
|
|
"epoch": 6.9609375,
|
|
"grad_norm": 0.7701600576013987,
|
|
"learning_rate": 4.1038411470206705e-09,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13650774955749512,
|
|
"step": 4455,
|
|
"valid_targets_mean": 3065.2,
|
|
"valid_targets_min": 925
|
|
},
|
|
{
|
|
"epoch": 6.96875,
|
|
"grad_norm": 0.652473820758627,
|
|
"learning_rate": 2.6772418087639417e-09,
|
|
"loss": 0.2715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1341947466135025,
|
|
"step": 4460,
|
|
"valid_targets_mean": 3732.9,
|
|
"valid_targets_min": 1364
|
|
},
|
|
{
|
|
"epoch": 6.9765625,
|
|
"grad_norm": 0.6153219086397805,
|
|
"learning_rate": 1.5541503805027725e-09,
|
|
"loss": 0.2825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0654231384396553,
|
|
"step": 4465,
|
|
"valid_targets_mean": 2601.1,
|
|
"valid_targets_min": 879
|
|
},
|
|
{
|
|
"epoch": 6.984375,
|
|
"grad_norm": 0.6928286354878301,
|
|
"learning_rate": 7.345839078753969e-10,
|
|
"loss": 0.2587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2103203535079956,
|
|
"step": 4470,
|
|
"valid_targets_mean": 5344.1,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 6.9921875,
|
|
"grad_norm": 0.8617987656598876,
|
|
"learning_rate": 2.1855482979171726e-10,
|
|
"loss": 0.2792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1518891304731369,
|
|
"step": 4475,
|
|
"valid_targets_mean": 5344.1,
|
|
"valid_targets_min": 1629
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"grad_norm": 0.7000936471277994,
|
|
"learning_rate": 6.070978244565595e-12,
|
|
"loss": 0.2806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15736517310142517,
|
|
"step": 4480,
|
|
"valid_targets_mean": 3889.8,
|
|
"valid_targets_min": 1146
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15736517310142517,
|
|
"step": 4480,
|
|
"total_flos": 1.1754390626437693e+18,
|
|
"train_loss": 0.32822010551712344,
|
|
"train_runtime": 43234.1073,
|
|
"train_samples_per_second": 1.657,
|
|
"train_steps_per_second": 0.104,
|
|
"valid_targets_mean": 3889.8,
|
|
"valid_targets_min": 1146
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4480,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1.1754390626437693e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|