Model: penfever/GLM-4_6-gemini25flash-stackexchange-overflow-32ep-512k-fixeps Source: Original Platform
2402 lines
66 KiB
JSON
2402 lines
66 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 1071,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.032679738562091505,
|
|
"grad_norm": 11.060789182905086,
|
|
"learning_rate": 1.4814814814814815e-06,
|
|
"loss": 0.8827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8205808401107788,
|
|
"step": 5,
|
|
"valid_targets_mean": 779.9,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 0.06535947712418301,
|
|
"grad_norm": 6.973307415232975,
|
|
"learning_rate": 3.3333333333333333e-06,
|
|
"loss": 0.8066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8751524686813354,
|
|
"step": 10,
|
|
"valid_targets_mean": 1240.4,
|
|
"valid_targets_min": 217
|
|
},
|
|
{
|
|
"epoch": 0.09803921568627451,
|
|
"grad_norm": 4.5603144559339395,
|
|
"learning_rate": 5.185185185185185e-06,
|
|
"loss": 0.7469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7398011684417725,
|
|
"step": 15,
|
|
"valid_targets_mean": 669.1,
|
|
"valid_targets_min": 285
|
|
},
|
|
{
|
|
"epoch": 0.13071895424836602,
|
|
"grad_norm": 3.5517010420743618,
|
|
"learning_rate": 7.0370370370370375e-06,
|
|
"loss": 0.6986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8484277129173279,
|
|
"step": 20,
|
|
"valid_targets_mean": 701.3,
|
|
"valid_targets_min": 235
|
|
},
|
|
{
|
|
"epoch": 0.16339869281045752,
|
|
"grad_norm": 1.71312096923623,
|
|
"learning_rate": 8.888888888888888e-06,
|
|
"loss": 0.7292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7061011791229248,
|
|
"step": 25,
|
|
"valid_targets_mean": 1292.7,
|
|
"valid_targets_min": 226
|
|
},
|
|
{
|
|
"epoch": 0.19607843137254902,
|
|
"grad_norm": 1.1836260079640273,
|
|
"learning_rate": 1.0740740740740742e-05,
|
|
"loss": 0.6299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5523278713226318,
|
|
"step": 30,
|
|
"valid_targets_mean": 1475.2,
|
|
"valid_targets_min": 285
|
|
},
|
|
{
|
|
"epoch": 0.22875816993464052,
|
|
"grad_norm": 0.7908890063519861,
|
|
"learning_rate": 1.2592592592592593e-05,
|
|
"loss": 0.4857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3864622116088867,
|
|
"step": 35,
|
|
"valid_targets_mean": 2134.2,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 0.26143790849673204,
|
|
"grad_norm": 1.3350691541627218,
|
|
"learning_rate": 1.4444444444444446e-05,
|
|
"loss": 0.5807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4980165958404541,
|
|
"step": 40,
|
|
"valid_targets_mean": 929.5,
|
|
"valid_targets_min": 296
|
|
},
|
|
{
|
|
"epoch": 0.29411764705882354,
|
|
"grad_norm": 1.3513898058061415,
|
|
"learning_rate": 1.6296296296296297e-05,
|
|
"loss": 0.6403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6828616857528687,
|
|
"step": 45,
|
|
"valid_targets_mean": 1063.6,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 0.32679738562091504,
|
|
"grad_norm": 1.0371862834617929,
|
|
"learning_rate": 1.814814814814815e-05,
|
|
"loss": 0.5481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4910392761230469,
|
|
"step": 50,
|
|
"valid_targets_mean": 1120.2,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 0.35947712418300654,
|
|
"grad_norm": 1.280369114672039,
|
|
"learning_rate": 2e-05,
|
|
"loss": 0.475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5395255088806152,
|
|
"step": 55,
|
|
"valid_targets_mean": 998.9,
|
|
"valid_targets_min": 242
|
|
},
|
|
{
|
|
"epoch": 0.39215686274509803,
|
|
"grad_norm": 1.117311206993178,
|
|
"learning_rate": 2.1851851851851852e-05,
|
|
"loss": 0.5648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5647487640380859,
|
|
"step": 60,
|
|
"valid_targets_mean": 1208.2,
|
|
"valid_targets_min": 271
|
|
},
|
|
{
|
|
"epoch": 0.42483660130718953,
|
|
"grad_norm": 1.1653791876480772,
|
|
"learning_rate": 2.3703703703703703e-05,
|
|
"loss": 0.5476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5218506455421448,
|
|
"step": 65,
|
|
"valid_targets_mean": 1001.4,
|
|
"valid_targets_min": 280
|
|
},
|
|
{
|
|
"epoch": 0.45751633986928103,
|
|
"grad_norm": 1.174967214115349,
|
|
"learning_rate": 2.5555555555555554e-05,
|
|
"loss": 0.5201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47852823138237,
|
|
"step": 70,
|
|
"valid_targets_mean": 1043.4,
|
|
"valid_targets_min": 243
|
|
},
|
|
{
|
|
"epoch": 0.49019607843137253,
|
|
"grad_norm": 1.6604928495972278,
|
|
"learning_rate": 2.740740740740741e-05,
|
|
"loss": 0.5551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5851165056228638,
|
|
"step": 75,
|
|
"valid_targets_mean": 576.4,
|
|
"valid_targets_min": 287
|
|
},
|
|
{
|
|
"epoch": 0.5228758169934641,
|
|
"grad_norm": 1.0202504936662373,
|
|
"learning_rate": 2.9259259259259262e-05,
|
|
"loss": 0.5125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3803825378417969,
|
|
"step": 80,
|
|
"valid_targets_mean": 1004.8,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 0.5555555555555556,
|
|
"grad_norm": 1.1312120882470664,
|
|
"learning_rate": 3.111111111111112e-05,
|
|
"loss": 0.5344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5755863189697266,
|
|
"step": 85,
|
|
"valid_targets_mean": 1221.0,
|
|
"valid_targets_min": 243
|
|
},
|
|
{
|
|
"epoch": 0.5882352941176471,
|
|
"grad_norm": 1.4325847007248265,
|
|
"learning_rate": 3.2962962962962964e-05,
|
|
"loss": 0.5168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6161978244781494,
|
|
"step": 90,
|
|
"valid_targets_mean": 746.8,
|
|
"valid_targets_min": 322
|
|
},
|
|
{
|
|
"epoch": 0.6209150326797386,
|
|
"grad_norm": 0.9183077187637829,
|
|
"learning_rate": 3.481481481481482e-05,
|
|
"loss": 0.5328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45081230998039246,
|
|
"step": 95,
|
|
"valid_targets_mean": 1345.3,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 0.6535947712418301,
|
|
"grad_norm": 1.6429963233048106,
|
|
"learning_rate": 3.6666666666666666e-05,
|
|
"loss": 0.582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6670611500740051,
|
|
"step": 100,
|
|
"valid_targets_mean": 619.7,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 0.6862745098039216,
|
|
"grad_norm": 1.1663889041506792,
|
|
"learning_rate": 3.851851851851852e-05,
|
|
"loss": 0.4839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4529034495353699,
|
|
"step": 105,
|
|
"valid_targets_mean": 863.6,
|
|
"valid_targets_min": 236
|
|
},
|
|
{
|
|
"epoch": 0.7189542483660131,
|
|
"grad_norm": 1.2334368369716797,
|
|
"learning_rate": 3.9999893574233685e-05,
|
|
"loss": 0.5261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4745844304561615,
|
|
"step": 110,
|
|
"valid_targets_mean": 830.2,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 0.7516339869281046,
|
|
"grad_norm": 1.249990193797269,
|
|
"learning_rate": 3.9996168791339075e-05,
|
|
"loss": 0.5166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4642437696456909,
|
|
"step": 115,
|
|
"valid_targets_mean": 989.8,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 0.7843137254901961,
|
|
"grad_norm": 1.2281972228406923,
|
|
"learning_rate": 3.998712385271904e-05,
|
|
"loss": 0.5019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48900026082992554,
|
|
"step": 120,
|
|
"valid_targets_mean": 832.8,
|
|
"valid_targets_min": 255
|
|
},
|
|
{
|
|
"epoch": 0.8169934640522876,
|
|
"grad_norm": 1.3239536346835794,
|
|
"learning_rate": 3.997276116485867e-05,
|
|
"loss": 0.5344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6641391515731812,
|
|
"step": 125,
|
|
"valid_targets_mean": 930.1,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 0.8496732026143791,
|
|
"grad_norm": 1.1579149024229984,
|
|
"learning_rate": 3.995308454907679e-05,
|
|
"loss": 0.542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5919806957244873,
|
|
"step": 130,
|
|
"valid_targets_mean": 1064.2,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 0.8823529411764706,
|
|
"grad_norm": 1.0457817537071603,
|
|
"learning_rate": 3.992809924050924e-05,
|
|
"loss": 0.4896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5484431385993958,
|
|
"step": 135,
|
|
"valid_targets_mean": 1360.4,
|
|
"valid_targets_min": 228
|
|
},
|
|
{
|
|
"epoch": 0.9150326797385621,
|
|
"grad_norm": 1.2511239930549563,
|
|
"learning_rate": 3.9897811886716054e-05,
|
|
"loss": 0.5363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.610672652721405,
|
|
"step": 140,
|
|
"valid_targets_mean": 1068.2,
|
|
"valid_targets_min": 252
|
|
},
|
|
{
|
|
"epoch": 0.9477124183006536,
|
|
"grad_norm": 1.2337054029635952,
|
|
"learning_rate": 3.986223054591281e-05,
|
|
"loss": 0.5173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4866800606250763,
|
|
"step": 145,
|
|
"valid_targets_mean": 781.0,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 0.9803921568627451,
|
|
"grad_norm": 0.8076936922856879,
|
|
"learning_rate": 3.982136468482665e-05,
|
|
"loss": 0.4419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35834211111068726,
|
|
"step": 150,
|
|
"valid_targets_mean": 1508.8,
|
|
"valid_targets_min": 264
|
|
},
|
|
{
|
|
"epoch": 1.0130718954248366,
|
|
"grad_norm": 0.7776333910648919,
|
|
"learning_rate": 3.9775225176177595e-05,
|
|
"loss": 0.3684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27405640482902527,
|
|
"step": 155,
|
|
"valid_targets_mean": 1327.5,
|
|
"valid_targets_min": 235
|
|
},
|
|
{
|
|
"epoch": 1.0457516339869282,
|
|
"grad_norm": 1.1734500557798586,
|
|
"learning_rate": 3.972382429578577e-05,
|
|
"loss": 0.442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37759023904800415,
|
|
"step": 160,
|
|
"valid_targets_mean": 763.9,
|
|
"valid_targets_min": 244
|
|
},
|
|
{
|
|
"epoch": 1.0784313725490196,
|
|
"grad_norm": 0.9239963152479674,
|
|
"learning_rate": 3.966717571930529e-05,
|
|
"loss": 0.4546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3895590901374817,
|
|
"step": 165,
|
|
"valid_targets_mean": 1424.2,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 1.1111111111111112,
|
|
"grad_norm": 0.7807825561166607,
|
|
"learning_rate": 3.960529451858575e-05,
|
|
"loss": 0.4344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3694984018802643,
|
|
"step": 170,
|
|
"valid_targets_mean": 1572.8,
|
|
"valid_targets_min": 327
|
|
},
|
|
{
|
|
"epoch": 1.1437908496732025,
|
|
"grad_norm": 0.7003455825119332,
|
|
"learning_rate": 3.9538197157662226e-05,
|
|
"loss": 0.4177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2527535557746887,
|
|
"step": 175,
|
|
"valid_targets_mean": 1521.2,
|
|
"valid_targets_min": 217
|
|
},
|
|
{
|
|
"epoch": 1.1764705882352942,
|
|
"grad_norm": 1.7156135500256005,
|
|
"learning_rate": 3.946590148837487e-05,
|
|
"loss": 0.4335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47262683510780334,
|
|
"step": 180,
|
|
"valid_targets_mean": 566.9,
|
|
"valid_targets_min": 271
|
|
},
|
|
{
|
|
"epoch": 1.2091503267973855,
|
|
"grad_norm": 1.008970459747819,
|
|
"learning_rate": 3.9388426745619266e-05,
|
|
"loss": 0.4002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3759155869483948,
|
|
"step": 185,
|
|
"valid_targets_mean": 1439.6,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 1.2418300653594772,
|
|
"grad_norm": 1.327191452209238,
|
|
"learning_rate": 3.930579354222883e-05,
|
|
"loss": 0.4606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.463574081659317,
|
|
"step": 190,
|
|
"valid_targets_mean": 751.6,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 1.2745098039215685,
|
|
"grad_norm": 1.1345668523426535,
|
|
"learning_rate": 3.921802386349057e-05,
|
|
"loss": 0.4677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43786492943763733,
|
|
"step": 195,
|
|
"valid_targets_mean": 980.7,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 1.3071895424836601,
|
|
"grad_norm": 1.377027240152856,
|
|
"learning_rate": 3.912514106129576e-05,
|
|
"loss": 0.458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5057384967803955,
|
|
"step": 200,
|
|
"valid_targets_mean": 731.2,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 1.3398692810457518,
|
|
"grad_norm": 0.8297808392093082,
|
|
"learning_rate": 3.902716984792685e-05,
|
|
"loss": 0.4572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32464444637298584,
|
|
"step": 205,
|
|
"valid_targets_mean": 1297.6,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 1.3725490196078431,
|
|
"grad_norm": 0.982008610679383,
|
|
"learning_rate": 3.8924136289482686e-05,
|
|
"loss": 0.4438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49354782700538635,
|
|
"step": 210,
|
|
"valid_targets_mean": 1354.9,
|
|
"valid_targets_min": 240
|
|
},
|
|
{
|
|
"epoch": 1.4052287581699345,
|
|
"grad_norm": 0.9583843270801939,
|
|
"learning_rate": 3.881606779894329e-05,
|
|
"loss": 0.476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4370919466018677,
|
|
"step": 215,
|
|
"valid_targets_mean": 1446.8,
|
|
"valid_targets_min": 271
|
|
},
|
|
{
|
|
"epoch": 1.4379084967320261,
|
|
"grad_norm": 0.981958627933756,
|
|
"learning_rate": 3.8702993128876455e-05,
|
|
"loss": 0.4424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4009607434272766,
|
|
"step": 220,
|
|
"valid_targets_mean": 1175.4,
|
|
"valid_targets_min": 244
|
|
},
|
|
{
|
|
"epoch": 1.4705882352941178,
|
|
"grad_norm": 1.0318349122572381,
|
|
"learning_rate": 3.858494236378785e-05,
|
|
"loss": 0.4517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34431201219558716,
|
|
"step": 225,
|
|
"valid_targets_mean": 848.2,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 1.5032679738562091,
|
|
"grad_norm": 1.1926829814685804,
|
|
"learning_rate": 3.846194691211678e-05,
|
|
"loss": 0.4507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5188771486282349,
|
|
"step": 230,
|
|
"valid_targets_mean": 975.6,
|
|
"valid_targets_min": 265
|
|
},
|
|
{
|
|
"epoch": 1.5359477124183005,
|
|
"grad_norm": 1.3287118805218745,
|
|
"learning_rate": 3.8334039497879694e-05,
|
|
"loss": 0.4525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43521052598953247,
|
|
"step": 235,
|
|
"valid_targets_mean": 755.8,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 1.5686274509803921,
|
|
"grad_norm": 1.0912341313805085,
|
|
"learning_rate": 3.8201254151963664e-05,
|
|
"loss": 0.4507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4125358462333679,
|
|
"step": 240,
|
|
"valid_targets_mean": 935.4,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 1.6013071895424837,
|
|
"grad_norm": 1.337631920998683,
|
|
"learning_rate": 3.8063626203072196e-05,
|
|
"loss": 0.4416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4946654736995697,
|
|
"step": 245,
|
|
"valid_targets_mean": 760.1,
|
|
"valid_targets_min": 229
|
|
},
|
|
{
|
|
"epoch": 1.6339869281045751,
|
|
"grad_norm": 0.7868890760626824,
|
|
"learning_rate": 3.792119226832569e-05,
|
|
"loss": 0.4301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3513905704021454,
|
|
"step": 250,
|
|
"valid_targets_mean": 1832.1,
|
|
"valid_targets_min": 302
|
|
},
|
|
{
|
|
"epoch": 1.6666666666666665,
|
|
"grad_norm": 1.1418891712589625,
|
|
"learning_rate": 3.7773990243519154e-05,
|
|
"loss": 0.4653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43997740745544434,
|
|
"step": 255,
|
|
"valid_targets_mean": 1025.1,
|
|
"valid_targets_min": 259
|
|
},
|
|
{
|
|
"epoch": 1.6993464052287581,
|
|
"grad_norm": 1.1982818361901046,
|
|
"learning_rate": 3.762205929303969e-05,
|
|
"loss": 0.443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.523041844367981,
|
|
"step": 260,
|
|
"valid_targets_mean": 893.4,
|
|
"valid_targets_min": 243
|
|
},
|
|
{
|
|
"epoch": 1.7320261437908497,
|
|
"grad_norm": 1.4756814409685264,
|
|
"learning_rate": 3.746543983944646e-05,
|
|
"loss": 0.4581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4994322657585144,
|
|
"step": 265,
|
|
"valid_targets_mean": 681.1,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 1.7647058823529411,
|
|
"grad_norm": 1.2251966207419902,
|
|
"learning_rate": 3.730417355271593e-05,
|
|
"loss": 0.4599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4771292209625244,
|
|
"step": 270,
|
|
"valid_targets_mean": 823.2,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 1.7973856209150327,
|
|
"grad_norm": 0.8560952770470027,
|
|
"learning_rate": 3.713830333915517e-05,
|
|
"loss": 0.4125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3659961223602295,
|
|
"step": 275,
|
|
"valid_targets_mean": 1743.6,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 1.8300653594771243,
|
|
"grad_norm": 1.2640409291326877,
|
|
"learning_rate": 3.6967873329986305e-05,
|
|
"loss": 0.3786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40237635374069214,
|
|
"step": 280,
|
|
"valid_targets_mean": 707.1,
|
|
"valid_targets_min": 235
|
|
},
|
|
{
|
|
"epoch": 1.8627450980392157,
|
|
"grad_norm": 1.6042954967809109,
|
|
"learning_rate": 3.679292886960497e-05,
|
|
"loss": 0.436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47581565380096436,
|
|
"step": 285,
|
|
"valid_targets_mean": 964.8,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 1.8954248366013071,
|
|
"grad_norm": 1.5217135823702368,
|
|
"learning_rate": 3.661351650351608e-05,
|
|
"loss": 0.4431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4720376431941986,
|
|
"step": 290,
|
|
"valid_targets_mean": 604.7,
|
|
"valid_targets_min": 236
|
|
},
|
|
{
|
|
"epoch": 1.9281045751633987,
|
|
"grad_norm": 1.4782923070866694,
|
|
"learning_rate": 3.642968396594995e-05,
|
|
"loss": 0.4515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5038082003593445,
|
|
"step": 295,
|
|
"valid_targets_mean": 618.2,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 1.9607843137254903,
|
|
"grad_norm": 1.1511433477713358,
|
|
"learning_rate": 3.624148016716222e-05,
|
|
"loss": 0.4676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5166301727294922,
|
|
"step": 300,
|
|
"valid_targets_mean": 1017.7,
|
|
"valid_targets_min": 263
|
|
},
|
|
{
|
|
"epoch": 1.9934640522875817,
|
|
"grad_norm": 0.7376916907598469,
|
|
"learning_rate": 3.604895518042081e-05,
|
|
"loss": 0.4447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2603156566619873,
|
|
"step": 305,
|
|
"valid_targets_mean": 1753.6,
|
|
"valid_targets_min": 253
|
|
},
|
|
{
|
|
"epoch": 2.026143790849673,
|
|
"grad_norm": 1.1136650197465474,
|
|
"learning_rate": 3.585216022868356e-05,
|
|
"loss": 0.3317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3538415729999542,
|
|
"step": 310,
|
|
"valid_targets_mean": 857.9,
|
|
"valid_targets_min": 267
|
|
},
|
|
{
|
|
"epoch": 2.0588235294117645,
|
|
"grad_norm": 1.1254159871502543,
|
|
"learning_rate": 3.565114767096984e-05,
|
|
"loss": 0.3789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3277912735939026,
|
|
"step": 315,
|
|
"valid_targets_mean": 981.1,
|
|
"valid_targets_min": 253
|
|
},
|
|
{
|
|
"epoch": 2.0915032679738563,
|
|
"grad_norm": 1.3128993623554719,
|
|
"learning_rate": 3.544597098843001e-05,
|
|
"loss": 0.3653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37582293152809143,
|
|
"step": 320,
|
|
"valid_targets_mean": 853.1,
|
|
"valid_targets_min": 231
|
|
},
|
|
{
|
|
"epoch": 2.1241830065359477,
|
|
"grad_norm": 1.192620898541865,
|
|
"learning_rate": 3.5236684770116295e-05,
|
|
"loss": 0.3748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4059767723083496,
|
|
"step": 325,
|
|
"valid_targets_mean": 1326.2,
|
|
"valid_targets_min": 219
|
|
},
|
|
{
|
|
"epoch": 2.156862745098039,
|
|
"grad_norm": 1.0971126049747322,
|
|
"learning_rate": 3.502334469845886e-05,
|
|
"loss": 0.3466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33508726954460144,
|
|
"step": 330,
|
|
"valid_targets_mean": 1035.2,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 2.189542483660131,
|
|
"grad_norm": 1.24119071299791,
|
|
"learning_rate": 3.4806007534451075e-05,
|
|
"loss": 0.3717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4131796360015869,
|
|
"step": 335,
|
|
"valid_targets_mean": 1173.8,
|
|
"valid_targets_min": 235
|
|
},
|
|
{
|
|
"epoch": 2.2222222222222223,
|
|
"grad_norm": 0.9019703404636363,
|
|
"learning_rate": 3.458473110254767e-05,
|
|
"loss": 0.3992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3299303650856018,
|
|
"step": 340,
|
|
"valid_targets_mean": 1531.8,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 2.2549019607843137,
|
|
"grad_norm": 1.3137476700390396,
|
|
"learning_rate": 3.43595742752801e-05,
|
|
"loss": 0.3575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33964088559150696,
|
|
"step": 345,
|
|
"valid_targets_mean": 799.1,
|
|
"valid_targets_min": 285
|
|
},
|
|
{
|
|
"epoch": 2.287581699346405,
|
|
"grad_norm": 1.3766367222049374,
|
|
"learning_rate": 3.413059695759297e-05,
|
|
"loss": 0.3993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41154414415359497,
|
|
"step": 350,
|
|
"valid_targets_mean": 812.0,
|
|
"valid_targets_min": 222
|
|
},
|
|
{
|
|
"epoch": 2.3202614379084965,
|
|
"grad_norm": 0.694606217367308,
|
|
"learning_rate": 3.389786007090581e-05,
|
|
"loss": 0.3489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22677206993103027,
|
|
"step": 355,
|
|
"valid_targets_mean": 1935.1,
|
|
"valid_targets_min": 252
|
|
},
|
|
{
|
|
"epoch": 2.3529411764705883,
|
|
"grad_norm": 1.6554928059464398,
|
|
"learning_rate": 3.3661425536904354e-05,
|
|
"loss": 0.3707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4165865480899811,
|
|
"step": 360,
|
|
"valid_targets_mean": 564.1,
|
|
"valid_targets_min": 242
|
|
},
|
|
{
|
|
"epoch": 2.3856209150326797,
|
|
"grad_norm": 1.092528086923834,
|
|
"learning_rate": 3.3421356261065805e-05,
|
|
"loss": 0.344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4011123776435852,
|
|
"step": 365,
|
|
"valid_targets_mean": 1385.9,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 2.418300653594771,
|
|
"grad_norm": 1.0606643850646214,
|
|
"learning_rate": 3.317771611592222e-05,
|
|
"loss": 0.3509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3307228088378906,
|
|
"step": 370,
|
|
"valid_targets_mean": 1428.1,
|
|
"valid_targets_min": 320
|
|
},
|
|
{
|
|
"epoch": 2.450980392156863,
|
|
"grad_norm": 1.2049996354335248,
|
|
"learning_rate": 3.293056992406671e-05,
|
|
"loss": 0.3718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3773796856403351,
|
|
"step": 375,
|
|
"valid_targets_mean": 1160.4,
|
|
"valid_targets_min": 216
|
|
},
|
|
{
|
|
"epoch": 2.4836601307189543,
|
|
"grad_norm": 1.2899198397563096,
|
|
"learning_rate": 3.267998344090679e-05,
|
|
"loss": 0.3411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38119545578956604,
|
|
"step": 380,
|
|
"valid_targets_mean": 775.1,
|
|
"valid_targets_min": 240
|
|
},
|
|
{
|
|
"epoch": 2.5163398692810457,
|
|
"grad_norm": 1.1656133845239687,
|
|
"learning_rate": 3.242602333716958e-05,
|
|
"loss": 0.3492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3696956932544708,
|
|
"step": 385,
|
|
"valid_targets_mean": 932.7,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 2.549019607843137,
|
|
"grad_norm": 1.0524939861495455,
|
|
"learning_rate": 3.21687571811635e-05,
|
|
"loss": 0.3939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3500986695289612,
|
|
"step": 390,
|
|
"valid_targets_mean": 1204.0,
|
|
"valid_targets_min": 264
|
|
},
|
|
{
|
|
"epoch": 2.581699346405229,
|
|
"grad_norm": 1.5158107702474672,
|
|
"learning_rate": 3.190825342080109e-05,
|
|
"loss": 0.3632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41287726163864136,
|
|
"step": 395,
|
|
"valid_targets_mean": 684.8,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 2.6143790849673203,
|
|
"grad_norm": 1.0708264531248797,
|
|
"learning_rate": 3.164458136538789e-05,
|
|
"loss": 0.3384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2635033130645752,
|
|
"step": 400,
|
|
"valid_targets_mean": 931.7,
|
|
"valid_targets_min": 285
|
|
},
|
|
{
|
|
"epoch": 2.6470588235294117,
|
|
"grad_norm": 1.1926595250829215,
|
|
"learning_rate": 3.137781116718206e-05,
|
|
"loss": 0.3844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3295871317386627,
|
|
"step": 405,
|
|
"valid_targets_mean": 954.6,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 2.6797385620915035,
|
|
"grad_norm": 1.4678135870422717,
|
|
"learning_rate": 3.110801380272975e-05,
|
|
"loss": 0.3714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40647825598716736,
|
|
"step": 410,
|
|
"valid_targets_mean": 701.2,
|
|
"valid_targets_min": 224
|
|
},
|
|
{
|
|
"epoch": 2.712418300653595,
|
|
"grad_norm": 1.2361572946977641,
|
|
"learning_rate": 3.0835261053981226e-05,
|
|
"loss": 0.374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41568267345428467,
|
|
"step": 415,
|
|
"valid_targets_mean": 1122.1,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 2.7450980392156863,
|
|
"grad_norm": 1.0225753867309941,
|
|
"learning_rate": 3.055962548919257e-05,
|
|
"loss": 0.3541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32175296545028687,
|
|
"step": 420,
|
|
"valid_targets_mean": 1174.4,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 2.7777777777777777,
|
|
"grad_norm": 0.8940629697485692,
|
|
"learning_rate": 3.0281180443618337e-05,
|
|
"loss": 0.3622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30009371042251587,
|
|
"step": 425,
|
|
"valid_targets_mean": 1711.0,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 2.810457516339869,
|
|
"grad_norm": 0.8560645122217327,
|
|
"learning_rate": 3.0000000000000004e-05,
|
|
"loss": 0.3183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2567868232727051,
|
|
"step": 430,
|
|
"valid_targets_mean": 1575.6,
|
|
"valid_targets_min": 181
|
|
},
|
|
{
|
|
"epoch": 2.843137254901961,
|
|
"grad_norm": 1.4792577436560297,
|
|
"learning_rate": 2.9716158968855665e-05,
|
|
"loss": 0.3729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3718124330043793,
|
|
"step": 435,
|
|
"valid_targets_mean": 682.1,
|
|
"valid_targets_min": 256
|
|
},
|
|
{
|
|
"epoch": 2.8758169934640523,
|
|
"grad_norm": 0.7873403934813155,
|
|
"learning_rate": 2.9429732868576e-05,
|
|
"loss": 0.3203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22500234842300415,
|
|
"step": 440,
|
|
"valid_targets_mean": 1949.4,
|
|
"valid_targets_min": 243
|
|
},
|
|
{
|
|
"epoch": 2.9084967320261437,
|
|
"grad_norm": 1.31414956514437,
|
|
"learning_rate": 2.9140797905331964e-05,
|
|
"loss": 0.3724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3907439410686493,
|
|
"step": 445,
|
|
"valid_targets_mean": 806.3,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 2.9411764705882355,
|
|
"grad_norm": 1.5773260718196254,
|
|
"learning_rate": 2.884943095279946e-05,
|
|
"loss": 0.3809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44405168294906616,
|
|
"step": 450,
|
|
"valid_targets_mean": 660.8,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 2.973856209150327,
|
|
"grad_norm": 1.1417920727842683,
|
|
"learning_rate": 2.8555709531706423e-05,
|
|
"loss": 0.3911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2857981026172638,
|
|
"step": 455,
|
|
"valid_targets_mean": 842.0,
|
|
"valid_targets_min": 255
|
|
},
|
|
{
|
|
"epoch": 3.0065359477124183,
|
|
"grad_norm": 1.4849160766480622,
|
|
"learning_rate": 2.825971178920777e-05,
|
|
"loss": 0.3648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2776503562927246,
|
|
"step": 460,
|
|
"valid_targets_mean": 744.2,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 3.0392156862745097,
|
|
"grad_norm": 1.6833400262584757,
|
|
"learning_rate": 2.796151647809364e-05,
|
|
"loss": 0.2969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3672228455543518,
|
|
"step": 465,
|
|
"valid_targets_mean": 619.8,
|
|
"valid_targets_min": 253
|
|
},
|
|
{
|
|
"epoch": 3.0718954248366015,
|
|
"grad_norm": 1.5789185393237866,
|
|
"learning_rate": 2.7661202935836536e-05,
|
|
"loss": 0.3121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3267885148525238,
|
|
"step": 470,
|
|
"valid_targets_mean": 747.2,
|
|
"valid_targets_min": 253
|
|
},
|
|
{
|
|
"epoch": 3.104575163398693,
|
|
"grad_norm": 1.525005567408752,
|
|
"learning_rate": 2.73588510634829e-05,
|
|
"loss": 0.3271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3514612913131714,
|
|
"step": 475,
|
|
"valid_targets_mean": 866.7,
|
|
"valid_targets_min": 291
|
|
},
|
|
{
|
|
"epoch": 3.1372549019607843,
|
|
"grad_norm": 1.2913239483066061,
|
|
"learning_rate": 2.7054541304394736e-05,
|
|
"loss": 0.3019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2418668270111084,
|
|
"step": 480,
|
|
"valid_targets_mean": 889.4,
|
|
"valid_targets_min": 243
|
|
},
|
|
{
|
|
"epoch": 3.1699346405228757,
|
|
"grad_norm": 1.2082673756279816,
|
|
"learning_rate": 2.6748354622846962e-05,
|
|
"loss": 0.251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24368639290332794,
|
|
"step": 485,
|
|
"valid_targets_mean": 1031.2,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 3.2026143790849675,
|
|
"grad_norm": 1.1922254782261243,
|
|
"learning_rate": 2.6440372482486127e-05,
|
|
"loss": 0.2652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2941184639930725,
|
|
"step": 490,
|
|
"valid_targets_mean": 1239.8,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 3.235294117647059,
|
|
"grad_norm": 1.6204743095393073,
|
|
"learning_rate": 2.613067682465631e-05,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34600523114204407,
|
|
"step": 495,
|
|
"valid_targets_mean": 727.4,
|
|
"valid_targets_min": 259
|
|
},
|
|
{
|
|
"epoch": 3.2679738562091503,
|
|
"grad_norm": 1.7067005151903063,
|
|
"learning_rate": 2.5819350046597927e-05,
|
|
"loss": 0.297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33082401752471924,
|
|
"step": 500,
|
|
"valid_targets_mean": 711.6,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 3.3006535947712417,
|
|
"grad_norm": 1.344918045352721,
|
|
"learning_rate": 2.55064749795252e-05,
|
|
"loss": 0.3077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29931801557540894,
|
|
"step": 505,
|
|
"valid_targets_mean": 1001.1,
|
|
"valid_targets_min": 219
|
|
},
|
|
{
|
|
"epoch": 3.3333333333333335,
|
|
"grad_norm": 1.5947027063326324,
|
|
"learning_rate": 2.519213486658819e-05,
|
|
"loss": 0.2939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3358372151851654,
|
|
"step": 510,
|
|
"valid_targets_mean": 640.4,
|
|
"valid_targets_min": 257
|
|
},
|
|
{
|
|
"epoch": 3.366013071895425,
|
|
"grad_norm": 2.1390767780943434,
|
|
"learning_rate": 2.4876413340725244e-05,
|
|
"loss": 0.3398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3361467719078064,
|
|
"step": 515,
|
|
"valid_targets_mean": 840.6,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 3.3986928104575163,
|
|
"grad_norm": 1.4750832729481385,
|
|
"learning_rate": 2.4559394402411703e-05,
|
|
"loss": 0.3096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28374865651130676,
|
|
"step": 520,
|
|
"valid_targets_mean": 662.6,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 3.431372549019608,
|
|
"grad_norm": 1.1883189575628739,
|
|
"learning_rate": 2.4241162397310836e-05,
|
|
"loss": 0.2718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.288798451423645,
|
|
"step": 525,
|
|
"valid_targets_mean": 1315.8,
|
|
"valid_targets_min": 253
|
|
},
|
|
{
|
|
"epoch": 3.4640522875816995,
|
|
"grad_norm": 1.4617369234648558,
|
|
"learning_rate": 2.3921801993832964e-05,
|
|
"loss": 0.2899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35622355341911316,
|
|
"step": 530,
|
|
"valid_targets_mean": 895.0,
|
|
"valid_targets_min": 292
|
|
},
|
|
{
|
|
"epoch": 3.496732026143791,
|
|
"grad_norm": 1.0468449297189089,
|
|
"learning_rate": 2.3601398160608667e-05,
|
|
"loss": 0.2905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2579011917114258,
|
|
"step": 535,
|
|
"valid_targets_mean": 1593.1,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 3.5294117647058822,
|
|
"grad_norm": 1.7782119793681848,
|
|
"learning_rate": 2.3280036143882145e-05,
|
|
"loss": 0.3064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.340090811252594,
|
|
"step": 540,
|
|
"valid_targets_mean": 528.3,
|
|
"valid_targets_min": 285
|
|
},
|
|
{
|
|
"epoch": 3.5620915032679736,
|
|
"grad_norm": 1.1956892052285935,
|
|
"learning_rate": 2.2957801444830684e-05,
|
|
"loss": 0.2679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2931376099586487,
|
|
"step": 545,
|
|
"valid_targets_mean": 1387.6,
|
|
"valid_targets_min": 280
|
|
},
|
|
{
|
|
"epoch": 3.5947712418300655,
|
|
"grad_norm": 1.330594535756061,
|
|
"learning_rate": 2.2634779796816377e-05,
|
|
"loss": 0.329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29296964406967163,
|
|
"step": 550,
|
|
"valid_targets_mean": 1105.3,
|
|
"valid_targets_min": 279
|
|
},
|
|
{
|
|
"epoch": 3.627450980392157,
|
|
"grad_norm": 0.9003334164561678,
|
|
"learning_rate": 2.2311057142575953e-05,
|
|
"loss": 0.2683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1992267370223999,
|
|
"step": 555,
|
|
"valid_targets_mean": 1443.8,
|
|
"valid_targets_min": 236
|
|
},
|
|
{
|
|
"epoch": 3.6601307189542482,
|
|
"grad_norm": 1.5529200780881063,
|
|
"learning_rate": 2.198671961135498e-05,
|
|
"loss": 0.2962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3122693598270416,
|
|
"step": 560,
|
|
"valid_targets_mean": 701.2,
|
|
"valid_targets_min": 240
|
|
},
|
|
{
|
|
"epoch": 3.69281045751634,
|
|
"grad_norm": 1.5208864513474185,
|
|
"learning_rate": 2.166185349599245e-05,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26625901460647583,
|
|
"step": 565,
|
|
"valid_targets_mean": 877.5,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 3.7254901960784315,
|
|
"grad_norm": 1.0194886982642561,
|
|
"learning_rate": 2.1336545229961772e-05,
|
|
"loss": 0.3037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25733599066734314,
|
|
"step": 570,
|
|
"valid_targets_mean": 1400.1,
|
|
"valid_targets_min": 231
|
|
},
|
|
{
|
|
"epoch": 3.758169934640523,
|
|
"grad_norm": 1.1707112854047086,
|
|
"learning_rate": 2.1010881364374404e-05,
|
|
"loss": 0.323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35556066036224365,
|
|
"step": 575,
|
|
"valid_targets_mean": 1315.2,
|
|
"valid_targets_min": 230
|
|
},
|
|
{
|
|
"epoch": 3.7908496732026142,
|
|
"grad_norm": 1.0812520101554173,
|
|
"learning_rate": 2.0684948544952217e-05,
|
|
"loss": 0.2629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21427355706691742,
|
|
"step": 580,
|
|
"valid_targets_mean": 978.9,
|
|
"valid_targets_min": 196
|
|
},
|
|
{
|
|
"epoch": 3.8235294117647056,
|
|
"grad_norm": 1.7609304063968088,
|
|
"learning_rate": 2.0358833488974556e-05,
|
|
"loss": 0.3031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28584054112434387,
|
|
"step": 585,
|
|
"valid_targets_mean": 803.2,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 3.8562091503267975,
|
|
"grad_norm": 1.372524503176478,
|
|
"learning_rate": 2.0032622962206428e-05,
|
|
"loss": 0.3152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25724029541015625,
|
|
"step": 590,
|
|
"valid_targets_mean": 837.6,
|
|
"valid_targets_min": 240
|
|
},
|
|
{
|
|
"epoch": 3.888888888888889,
|
|
"grad_norm": 1.551633309136953,
|
|
"learning_rate": 1.9706403755813672e-05,
|
|
"loss": 0.2989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3098876476287842,
|
|
"step": 595,
|
|
"valid_targets_mean": 778.6,
|
|
"valid_targets_min": 252
|
|
},
|
|
{
|
|
"epoch": 3.9215686274509802,
|
|
"grad_norm": 1.4200996111858124,
|
|
"learning_rate": 1.9380262663271407e-05,
|
|
"loss": 0.2982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28785258531570435,
|
|
"step": 600,
|
|
"valid_targets_mean": 1276.4,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 3.954248366013072,
|
|
"grad_norm": 1.1423875399407577,
|
|
"learning_rate": 1.9054286457271892e-05,
|
|
"loss": 0.2867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26374930143356323,
|
|
"step": 605,
|
|
"valid_targets_mean": 1187.8,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 3.9869281045751634,
|
|
"grad_norm": 1.047432790499157,
|
|
"learning_rate": 1.8728561866637886e-05,
|
|
"loss": 0.3039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22139273583889008,
|
|
"step": 610,
|
|
"valid_targets_mean": 1173.8,
|
|
"valid_targets_min": 241
|
|
},
|
|
{
|
|
"epoch": 4.019607843137255,
|
|
"grad_norm": 1.1536745154975416,
|
|
"learning_rate": 1.840317555324764e-05,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24080944061279297,
|
|
"step": 615,
|
|
"valid_targets_mean": 1125.4,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 4.052287581699346,
|
|
"grad_norm": 2.128292235679121,
|
|
"learning_rate": 1.8078214088977817e-05,
|
|
"loss": 0.2196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2866114377975464,
|
|
"step": 620,
|
|
"valid_targets_mean": 497.8,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 4.084967320261438,
|
|
"grad_norm": 1.0323930725322927,
|
|
"learning_rate": 1.7753763932670257e-05,
|
|
"loss": 0.2553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18582549691200256,
|
|
"step": 625,
|
|
"valid_targets_mean": 1817.0,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 4.117647058823529,
|
|
"grad_norm": 1.296142459740979,
|
|
"learning_rate": 1.742991140712881e-05,
|
|
"loss": 0.2434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1894800066947937,
|
|
"step": 630,
|
|
"valid_targets_mean": 1049.4,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 4.150326797385621,
|
|
"grad_norm": 1.710406042987915,
|
|
"learning_rate": 1.7106742676152454e-05,
|
|
"loss": 0.2344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23869173228740692,
|
|
"step": 635,
|
|
"valid_targets_mean": 681.4,
|
|
"valid_targets_min": 236
|
|
},
|
|
{
|
|
"epoch": 4.183006535947713,
|
|
"grad_norm": 1.4576865063285587,
|
|
"learning_rate": 1.678434372161064e-05,
|
|
"loss": 0.2277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18657907843589783,
|
|
"step": 640,
|
|
"valid_targets_mean": 978.1,
|
|
"valid_targets_min": 219
|
|
},
|
|
{
|
|
"epoch": 4.215686274509804,
|
|
"grad_norm": 1.0562589689629556,
|
|
"learning_rate": 1.646280032056704e-05,
|
|
"loss": 0.2179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1481604427099228,
|
|
"step": 645,
|
|
"valid_targets_mean": 1139.1,
|
|
"valid_targets_min": 235
|
|
},
|
|
{
|
|
"epoch": 4.248366013071895,
|
|
"grad_norm": 1.4916205632940045,
|
|
"learning_rate": 1.6142198022457853e-05,
|
|
"loss": 0.233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24131783843040466,
|
|
"step": 650,
|
|
"valid_targets_mean": 1026.0,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 4.281045751633987,
|
|
"grad_norm": 1.659388286048901,
|
|
"learning_rate": 1.5822622126330597e-05,
|
|
"loss": 0.2493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2790229022502899,
|
|
"step": 655,
|
|
"valid_targets_mean": 866.1,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 4.313725490196078,
|
|
"grad_norm": 1.73499163192446,
|
|
"learning_rate": 1.550415765814955e-05,
|
|
"loss": 0.2102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19306321442127228,
|
|
"step": 660,
|
|
"valid_targets_mean": 854.9,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 4.34640522875817,
|
|
"grad_norm": 1.7154850302464963,
|
|
"learning_rate": 1.5186889348173857e-05,
|
|
"loss": 0.2354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25681236386299133,
|
|
"step": 665,
|
|
"valid_targets_mean": 689.3,
|
|
"valid_targets_min": 217
|
|
},
|
|
{
|
|
"epoch": 4.379084967320262,
|
|
"grad_norm": 1.488660446451009,
|
|
"learning_rate": 1.487090160841433e-05,
|
|
"loss": 0.2582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2581811547279358,
|
|
"step": 670,
|
|
"valid_targets_mean": 959.4,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 4.411764705882353,
|
|
"grad_norm": 1.1881498761592073,
|
|
"learning_rate": 1.4556278510174827e-05,
|
|
"loss": 0.2262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19084610044956207,
|
|
"step": 675,
|
|
"valid_targets_mean": 1209.3,
|
|
"valid_targets_min": 226
|
|
},
|
|
{
|
|
"epoch": 4.444444444444445,
|
|
"grad_norm": 1.2552326944423986,
|
|
"learning_rate": 1.424310376168441e-05,
|
|
"loss": 0.238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1857946366071701,
|
|
"step": 680,
|
|
"valid_targets_mean": 1078.2,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 4.477124183006536,
|
|
"grad_norm": 1.4969283216444473,
|
|
"learning_rate": 1.3931460685826022e-05,
|
|
"loss": 0.2424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22742964327335358,
|
|
"step": 685,
|
|
"valid_targets_mean": 1085.7,
|
|
"valid_targets_min": 219
|
|
},
|
|
{
|
|
"epoch": 4.509803921568627,
|
|
"grad_norm": 1.154906103363336,
|
|
"learning_rate": 1.3621432197967664e-05,
|
|
"loss": 0.2228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1935591995716095,
|
|
"step": 690,
|
|
"valid_targets_mean": 1364.5,
|
|
"valid_targets_min": 287
|
|
},
|
|
{
|
|
"epoch": 4.542483660130719,
|
|
"grad_norm": 1.81580756139067,
|
|
"learning_rate": 1.3313100783902097e-05,
|
|
"loss": 0.2396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2677111327648163,
|
|
"step": 695,
|
|
"valid_targets_mean": 630.1,
|
|
"valid_targets_min": 243
|
|
},
|
|
{
|
|
"epoch": 4.57516339869281,
|
|
"grad_norm": 1.4394912824181447,
|
|
"learning_rate": 1.3006548477900735e-05,
|
|
"loss": 0.2442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2861871123313904,
|
|
"step": 700,
|
|
"valid_targets_mean": 1012.4,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 4.607843137254902,
|
|
"grad_norm": 1.7782439408173458,
|
|
"learning_rate": 1.270185684088771e-05,
|
|
"loss": 0.221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2639431953430176,
|
|
"step": 705,
|
|
"valid_targets_mean": 893.2,
|
|
"valid_targets_min": 218
|
|
},
|
|
{
|
|
"epoch": 4.640522875816993,
|
|
"grad_norm": 1.3770119209689564,
|
|
"learning_rate": 1.2399106938739903e-05,
|
|
"loss": 0.2347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22172415256500244,
|
|
"step": 710,
|
|
"valid_targets_mean": 898.8,
|
|
"valid_targets_min": 285
|
|
},
|
|
{
|
|
"epoch": 4.673202614379085,
|
|
"grad_norm": 1.5667989718671909,
|
|
"learning_rate": 1.2098379320718633e-05,
|
|
"loss": 0.2288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25639814138412476,
|
|
"step": 715,
|
|
"valid_targets_mean": 774.8,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 4.705882352941177,
|
|
"grad_norm": 1.6863304630924782,
|
|
"learning_rate": 1.179975399803881e-05,
|
|
"loss": 0.2324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2574901878833771,
|
|
"step": 720,
|
|
"valid_targets_mean": 951.9,
|
|
"valid_targets_min": 235
|
|
},
|
|
{
|
|
"epoch": 4.738562091503268,
|
|
"grad_norm": 1.835300847793114,
|
|
"learning_rate": 1.1503310422581286e-05,
|
|
"loss": 0.2347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23203280568122864,
|
|
"step": 725,
|
|
"valid_targets_mean": 602.1,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 4.771241830065359,
|
|
"grad_norm": 1.4010835866515463,
|
|
"learning_rate": 1.1209127465753978e-05,
|
|
"loss": 0.2208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2546510100364685,
|
|
"step": 730,
|
|
"valid_targets_mean": 1022.6,
|
|
"valid_targets_min": 225
|
|
},
|
|
{
|
|
"epoch": 4.803921568627451,
|
|
"grad_norm": 1.6323587959134847,
|
|
"learning_rate": 1.0917283397507392e-05,
|
|
"loss": 0.2089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2114773392677307,
|
|
"step": 735,
|
|
"valid_targets_mean": 891.1,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 4.836601307189542,
|
|
"grad_norm": 1.8162227837000073,
|
|
"learning_rate": 1.0627855865510294e-05,
|
|
"loss": 0.2332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.259385347366333,
|
|
"step": 740,
|
|
"valid_targets_mean": 751.1,
|
|
"valid_targets_min": 295
|
|
},
|
|
{
|
|
"epoch": 4.8692810457516345,
|
|
"grad_norm": 1.5458544536658743,
|
|
"learning_rate": 1.034092187449082e-05,
|
|
"loss": 0.2475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1714608073234558,
|
|
"step": 745,
|
|
"valid_targets_mean": 771.1,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 4.901960784313726,
|
|
"grad_norm": 1.3785541410906907,
|
|
"learning_rate": 1.0056557765748684e-05,
|
|
"loss": 0.2137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24545931816101074,
|
|
"step": 750,
|
|
"valid_targets_mean": 995.0,
|
|
"valid_targets_min": 267
|
|
},
|
|
{
|
|
"epoch": 4.934640522875817,
|
|
"grad_norm": 1.3403750984818765,
|
|
"learning_rate": 9.774839196843953e-06,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25746211409568787,
|
|
"step": 755,
|
|
"valid_targets_mean": 1251.8,
|
|
"valid_targets_min": 222
|
|
},
|
|
{
|
|
"epoch": 4.967320261437909,
|
|
"grad_norm": 1.619409098874215,
|
|
"learning_rate": 9.49584112146765e-06,
|
|
"loss": 0.2221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2491072118282318,
|
|
"step": 760,
|
|
"valid_targets_mean": 717.7,
|
|
"valid_targets_min": 219
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 1.532384895615285,
|
|
"learning_rate": 9.21963776949969e-06,
|
|
"loss": 0.2197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2333248406648636,
|
|
"step": 765,
|
|
"valid_targets_mean": 766.5,
|
|
"valid_targets_min": 240
|
|
},
|
|
{
|
|
"epoch": 5.032679738562091,
|
|
"grad_norm": 1.7276785744874288,
|
|
"learning_rate": 8.946302627259363e-06,
|
|
"loss": 0.1998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20002366602420807,
|
|
"step": 770,
|
|
"valid_targets_mean": 606.7,
|
|
"valid_targets_min": 255
|
|
},
|
|
{
|
|
"epoch": 5.065359477124183,
|
|
"grad_norm": 1.3996086672565842,
|
|
"learning_rate": 8.67590841795366e-06,
|
|
"loss": 0.1893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16175302863121033,
|
|
"step": 775,
|
|
"valid_targets_mean": 969.9,
|
|
"valid_targets_min": 257
|
|
},
|
|
{
|
|
"epoch": 5.098039215686274,
|
|
"grad_norm": 1.3153977909254402,
|
|
"learning_rate": 8.408527082328605e-06,
|
|
"loss": 0.1854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1600106954574585,
|
|
"step": 780,
|
|
"valid_targets_mean": 1062.1,
|
|
"valid_targets_min": 257
|
|
},
|
|
{
|
|
"epoch": 5.130718954248366,
|
|
"grad_norm": 1.5111274969532478,
|
|
"learning_rate": 8.144229759528835e-06,
|
|
"loss": 0.1881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15232618153095245,
|
|
"step": 785,
|
|
"valid_targets_mean": 868.0,
|
|
"valid_targets_min": 243
|
|
},
|
|
{
|
|
"epoch": 5.163398692810458,
|
|
"grad_norm": 1.30810433786,
|
|
"learning_rate": 7.883086768170369e-06,
|
|
"loss": 0.1847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15596047043800354,
|
|
"step": 790,
|
|
"valid_targets_mean": 1220.3,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 5.196078431372549,
|
|
"grad_norm": 1.2979766762023117,
|
|
"learning_rate": 7.625167587631732e-06,
|
|
"loss": 0.2001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1786264181137085,
|
|
"step": 795,
|
|
"valid_targets_mean": 1454.9,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 5.228758169934641,
|
|
"grad_norm": 1.5405276569855957,
|
|
"learning_rate": 7.370540839568372e-06,
|
|
"loss": 0.1901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15867263078689575,
|
|
"step": 800,
|
|
"valid_targets_mean": 666.9,
|
|
"valid_targets_min": 240
|
|
},
|
|
{
|
|
"epoch": 5.261437908496732,
|
|
"grad_norm": 1.5240591186359513,
|
|
"learning_rate": 7.119274269655265e-06,
|
|
"loss": 0.2073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2132365107536316,
|
|
"step": 805,
|
|
"valid_targets_mean": 1250.9,
|
|
"valid_targets_min": 264
|
|
},
|
|
{
|
|
"epoch": 5.294117647058823,
|
|
"grad_norm": 1.703110559867695,
|
|
"learning_rate": 6.87143472956256e-06,
|
|
"loss": 0.1913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1889723688364029,
|
|
"step": 810,
|
|
"valid_targets_mean": 872.8,
|
|
"valid_targets_min": 253
|
|
},
|
|
{
|
|
"epoch": 5.326797385620915,
|
|
"grad_norm": 1.3548680024706885,
|
|
"learning_rate": 6.627088159169146e-06,
|
|
"loss": 0.1651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16059917211532593,
|
|
"step": 815,
|
|
"valid_targets_mean": 1151.3,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 5.359477124183006,
|
|
"grad_norm": 1.766434503841294,
|
|
"learning_rate": 6.3862995690187505e-06,
|
|
"loss": 0.1777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1978355199098587,
|
|
"step": 820,
|
|
"valid_targets_mean": 688.6,
|
|
"valid_targets_min": 242
|
|
},
|
|
{
|
|
"epoch": 5.392156862745098,
|
|
"grad_norm": 1.681475454731881,
|
|
"learning_rate": 6.1491330230232944e-06,
|
|
"loss": 0.2005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21753373742103577,
|
|
"step": 825,
|
|
"valid_targets_mean": 723.4,
|
|
"valid_targets_min": 230
|
|
},
|
|
{
|
|
"epoch": 5.42483660130719,
|
|
"grad_norm": 1.436963945472817,
|
|
"learning_rate": 5.915651621418172e-06,
|
|
"loss": 0.1839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16063036024570465,
|
|
"step": 830,
|
|
"valid_targets_mean": 1000.5,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 5.457516339869281,
|
|
"grad_norm": 1.6529262568452745,
|
|
"learning_rate": 5.6859174839738576e-06,
|
|
"loss": 0.2127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22638444602489471,
|
|
"step": 835,
|
|
"valid_targets_mean": 919.0,
|
|
"valid_targets_min": 226
|
|
},
|
|
{
|
|
"epoch": 5.490196078431373,
|
|
"grad_norm": 1.3361133443873117,
|
|
"learning_rate": 5.459991733468375e-06,
|
|
"loss": 0.1693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1512422263622284,
|
|
"step": 840,
|
|
"valid_targets_mean": 1104.3,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 5.522875816993464,
|
|
"grad_norm": 1.4473821491885344,
|
|
"learning_rate": 5.237934479425091e-06,
|
|
"loss": 0.2142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1956217885017395,
|
|
"step": 845,
|
|
"valid_targets_mean": 1222.6,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 5.555555555555555,
|
|
"grad_norm": 2.161746051226456,
|
|
"learning_rate": 5.019804802120027e-06,
|
|
"loss": 0.2004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2561233341693878,
|
|
"step": 850,
|
|
"valid_targets_mean": 559.7,
|
|
"valid_targets_min": 217
|
|
},
|
|
{
|
|
"epoch": 5.588235294117647,
|
|
"grad_norm": 1.7693395053973981,
|
|
"learning_rate": 4.805660736863023e-06,
|
|
"loss": 0.1831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19513806700706482,
|
|
"step": 855,
|
|
"valid_targets_mean": 617.1,
|
|
"valid_targets_min": 287
|
|
},
|
|
{
|
|
"epoch": 5.620915032679738,
|
|
"grad_norm": 1.3552287003331067,
|
|
"learning_rate": 4.595559258556963e-06,
|
|
"loss": 0.188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1493796408176422,
|
|
"step": 860,
|
|
"valid_targets_mean": 1039.1,
|
|
"valid_targets_min": 222
|
|
},
|
|
{
|
|
"epoch": 5.65359477124183,
|
|
"grad_norm": 1.3819829355501174,
|
|
"learning_rate": 4.389556266539081e-06,
|
|
"loss": 0.2014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17469432950019836,
|
|
"step": 865,
|
|
"valid_targets_mean": 1213.0,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 5.686274509803922,
|
|
"grad_norm": 1.1282318414623302,
|
|
"learning_rate": 4.187706569708472e-06,
|
|
"loss": 0.2089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17088523507118225,
|
|
"step": 870,
|
|
"valid_targets_mean": 1821.5,
|
|
"valid_targets_min": 255
|
|
},
|
|
{
|
|
"epoch": 5.718954248366013,
|
|
"grad_norm": 1.5706108442305917,
|
|
"learning_rate": 3.990063871943681e-06,
|
|
"loss": 0.1865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20732146501541138,
|
|
"step": 875,
|
|
"valid_targets_mean": 747.2,
|
|
"valid_targets_min": 257
|
|
},
|
|
{
|
|
"epoch": 5.751633986928105,
|
|
"grad_norm": 1.2522615185034307,
|
|
"learning_rate": 3.796680757814344e-06,
|
|
"loss": 0.2047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16442234814167023,
|
|
"step": 880,
|
|
"valid_targets_mean": 1394.4,
|
|
"valid_targets_min": 257
|
|
},
|
|
{
|
|
"epoch": 5.784313725490196,
|
|
"grad_norm": 1.5816802112242634,
|
|
"learning_rate": 3.6076086785905708e-06,
|
|
"loss": 0.1978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20470181107521057,
|
|
"step": 885,
|
|
"valid_targets_mean": 890.7,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 5.816993464052287,
|
|
"grad_norm": 1.7816275222819353,
|
|
"learning_rate": 3.4228979385539153e-06,
|
|
"loss": 0.1838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2052854597568512,
|
|
"step": 890,
|
|
"valid_targets_mean": 843.9,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 5.849673202614379,
|
|
"grad_norm": 2.7820140943526086,
|
|
"learning_rate": 3.242597681613471e-06,
|
|
"loss": 0.2068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3125446140766144,
|
|
"step": 895,
|
|
"valid_targets_mean": 460.5,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 5.882352941176471,
|
|
"grad_norm": 1.089471821874377,
|
|
"learning_rate": 3.0667558782306782e-06,
|
|
"loss": 0.1695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12326781451702118,
|
|
"step": 900,
|
|
"valid_targets_mean": 1121.2,
|
|
"valid_targets_min": 219
|
|
},
|
|
{
|
|
"epoch": 5.915032679738562,
|
|
"grad_norm": 1.7665649158055792,
|
|
"learning_rate": 2.895419312656409e-06,
|
|
"loss": 0.1708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19156697392463684,
|
|
"step": 905,
|
|
"valid_targets_mean": 633.0,
|
|
"valid_targets_min": 257
|
|
},
|
|
{
|
|
"epoch": 5.947712418300654,
|
|
"grad_norm": 1.6688900557662307,
|
|
"learning_rate": 2.7286335704835788e-06,
|
|
"loss": 0.1935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18692585825920105,
|
|
"step": 910,
|
|
"valid_targets_mean": 1061.8,
|
|
"valid_targets_min": 243
|
|
},
|
|
{
|
|
"epoch": 5.980392156862745,
|
|
"grad_norm": 1.7410431833200393,
|
|
"learning_rate": 2.566443026518692e-06,
|
|
"loss": 0.2118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2652069926261902,
|
|
"step": 915,
|
|
"valid_targets_mean": 920.4,
|
|
"valid_targets_min": 247
|
|
},
|
|
{
|
|
"epoch": 6.0130718954248366,
|
|
"grad_norm": 1.5421028631124705,
|
|
"learning_rate": 2.4088908329755678e-06,
|
|
"loss": 0.187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17312213778495789,
|
|
"step": 920,
|
|
"valid_targets_mean": 767.6,
|
|
"valid_targets_min": 259
|
|
},
|
|
{
|
|
"epoch": 6.045751633986928,
|
|
"grad_norm": 1.5991633631226976,
|
|
"learning_rate": 2.256018907994284e-06,
|
|
"loss": 0.139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17524030804634094,
|
|
"step": 925,
|
|
"valid_targets_mean": 662.6,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 6.078431372549019,
|
|
"grad_norm": 1.3250031504324633,
|
|
"learning_rate": 2.107867924488509e-06,
|
|
"loss": 0.1548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12431657314300537,
|
|
"step": 930,
|
|
"valid_targets_mean": 958.3,
|
|
"valid_targets_min": 218
|
|
},
|
|
{
|
|
"epoch": 6.111111111111111,
|
|
"grad_norm": 1.2101774128898037,
|
|
"learning_rate": 1.9644772993241166e-06,
|
|
"loss": 0.1588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1561109721660614,
|
|
"step": 935,
|
|
"valid_targets_mean": 1136.2,
|
|
"valid_targets_min": 318
|
|
},
|
|
{
|
|
"epoch": 6.143790849673203,
|
|
"grad_norm": 1.925356995762324,
|
|
"learning_rate": 1.8258851828319678e-06,
|
|
"loss": 0.1741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17998185753822327,
|
|
"step": 940,
|
|
"valid_targets_mean": 535.2,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 6.176470588235294,
|
|
"grad_norm": 1.737151480718245,
|
|
"learning_rate": 1.692128448657695e-06,
|
|
"loss": 0.1674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18432410061359406,
|
|
"step": 945,
|
|
"valid_targets_mean": 649.9,
|
|
"valid_targets_min": 235
|
|
},
|
|
{
|
|
"epoch": 6.209150326797386,
|
|
"grad_norm": 1.4475105159337607,
|
|
"learning_rate": 1.5632426839511494e-06,
|
|
"loss": 0.1791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1958422064781189,
|
|
"step": 950,
|
|
"valid_targets_mean": 1076.3,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 6.241830065359477,
|
|
"grad_norm": 1.846609568443287,
|
|
"learning_rate": 1.4392621798981154e-06,
|
|
"loss": 0.1831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20268788933753967,
|
|
"step": 955,
|
|
"valid_targets_mean": 756.5,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 6.2745098039215685,
|
|
"grad_norm": 2.1487512775179427,
|
|
"learning_rate": 1.3202199225968481e-06,
|
|
"loss": 0.1799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21326568722724915,
|
|
"step": 960,
|
|
"valid_targets_mean": 772.2,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 6.30718954248366,
|
|
"grad_norm": 1.9580761806861307,
|
|
"learning_rate": 1.2061475842818337e-06,
|
|
"loss": 0.178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19729763269424438,
|
|
"step": 965,
|
|
"valid_targets_mean": 605.1,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 6.339869281045751,
|
|
"grad_norm": 2.549605085746636,
|
|
"learning_rate": 1.0970755148971057e-06,
|
|
"loss": 0.1761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27497926354408264,
|
|
"step": 970,
|
|
"valid_targets_mean": 525.9,
|
|
"valid_targets_min": 226
|
|
},
|
|
{
|
|
"epoch": 6.372549019607844,
|
|
"grad_norm": 1.9954702320448223,
|
|
"learning_rate": 9.930327340213908e-07,
|
|
"loss": 0.1725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24627891182899475,
|
|
"step": 975,
|
|
"valid_targets_mean": 668.2,
|
|
"valid_targets_min": 219
|
|
},
|
|
{
|
|
"epoch": 6.405228758169935,
|
|
"grad_norm": 1.4583942689664242,
|
|
"learning_rate": 8.940469231471893e-07,
|
|
"loss": 0.163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15345948934555054,
|
|
"step": 980,
|
|
"valid_targets_mean": 996.8,
|
|
"valid_targets_min": 231
|
|
},
|
|
{
|
|
"epoch": 6.437908496732026,
|
|
"grad_norm": 1.2627907187503407,
|
|
"learning_rate": 8.001444183158602e-07,
|
|
"loss": 0.1796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19024597108364105,
|
|
"step": 985,
|
|
"valid_targets_mean": 1571.8,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 6.470588235294118,
|
|
"grad_norm": 1.5109741084844859,
|
|
"learning_rate": 7.1135020311071e-07,
|
|
"loss": 0.1758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15086877346038818,
|
|
"step": 990,
|
|
"valid_targets_mean": 737.7,
|
|
"valid_targets_min": 226
|
|
},
|
|
{
|
|
"epoch": 6.503267973856209,
|
|
"grad_norm": 1.6130378471299882,
|
|
"learning_rate": 6.276879020098769e-07,
|
|
"loss": 0.1753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19527272880077362,
|
|
"step": 995,
|
|
"valid_targets_mean": 776.8,
|
|
"valid_targets_min": 276
|
|
},
|
|
{
|
|
"epoch": 6.5359477124183005,
|
|
"grad_norm": 1.0872457293239366,
|
|
"learning_rate": 5.491797741008232e-07,
|
|
"loss": 0.1872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14185872673988342,
|
|
"step": 1000,
|
|
"valid_targets_mean": 1552.2,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 6.568627450980392,
|
|
"grad_norm": 1.5205248767147233,
|
|
"learning_rate": 4.758467071581363e-07,
|
|
"loss": 0.1598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15201915800571442,
|
|
"step": 1005,
|
|
"valid_targets_mean": 837.2,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 6.601307189542483,
|
|
"grad_norm": 1.5480417864738425,
|
|
"learning_rate": 4.077082120861309e-07,
|
|
"loss": 0.1659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18016156554222107,
|
|
"step": 1010,
|
|
"valid_targets_mean": 948.2,
|
|
"valid_targets_min": 259
|
|
},
|
|
{
|
|
"epoch": 6.633986928104575,
|
|
"grad_norm": 1.4118892710756072,
|
|
"learning_rate": 3.4478241772780695e-07,
|
|
"loss": 0.1596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15900777280330658,
|
|
"step": 1015,
|
|
"valid_targets_mean": 1190.9,
|
|
"valid_targets_min": 283
|
|
},
|
|
{
|
|
"epoch": 6.666666666666667,
|
|
"grad_norm": 1.7202119833416936,
|
|
"learning_rate": 2.8708606604151757e-07,
|
|
"loss": 0.1885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19836293160915375,
|
|
"step": 1020,
|
|
"valid_targets_mean": 671.6,
|
|
"valid_targets_min": 242
|
|
},
|
|
{
|
|
"epoch": 6.699346405228758,
|
|
"grad_norm": 1.6749245084847142,
|
|
"learning_rate": 2.346345076466272e-07,
|
|
"loss": 0.1514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17178763449192047,
|
|
"step": 1025,
|
|
"valid_targets_mean": 923.9,
|
|
"valid_targets_min": 196
|
|
},
|
|
{
|
|
"epoch": 6.73202614379085,
|
|
"grad_norm": 1.3050532980878529,
|
|
"learning_rate": 1.8744169773932784e-07,
|
|
"loss": 0.1579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1522129327058792,
|
|
"step": 1030,
|
|
"valid_targets_mean": 1047.8,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 6.764705882352941,
|
|
"grad_norm": 1.7719572564362351,
|
|
"learning_rate": 1.4552019237976e-07,
|
|
"loss": 0.1979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22099488973617554,
|
|
"step": 1035,
|
|
"valid_targets_mean": 829.3,
|
|
"valid_targets_min": 288
|
|
},
|
|
{
|
|
"epoch": 6.7973856209150325,
|
|
"grad_norm": 1.4013677157515687,
|
|
"learning_rate": 1.0888114515134274e-07,
|
|
"loss": 0.1934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17296719551086426,
|
|
"step": 1040,
|
|
"valid_targets_mean": 1032.6,
|
|
"valid_targets_min": 255
|
|
},
|
|
{
|
|
"epoch": 6.830065359477124,
|
|
"grad_norm": 1.4687939649728274,
|
|
"learning_rate": 7.753430419328301e-08,
|
|
"loss": 0.1638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16742470860481262,
|
|
"step": 1045,
|
|
"valid_targets_mean": 1204.0,
|
|
"valid_targets_min": 271
|
|
},
|
|
{
|
|
"epoch": 6.862745098039216,
|
|
"grad_norm": 1.3669411898643498,
|
|
"learning_rate": 5.1488009606979195e-08,
|
|
"loss": 0.1568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13698092103004456,
|
|
"step": 1050,
|
|
"valid_targets_mean": 980.1,
|
|
"valid_targets_min": 276
|
|
},
|
|
{
|
|
"epoch": 6.895424836601308,
|
|
"grad_norm": 1.119666687197484,
|
|
"learning_rate": 3.074919123708275e-08,
|
|
"loss": 0.191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13298380374908447,
|
|
"step": 1055,
|
|
"valid_targets_mean": 1280.6,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 6.928104575163399,
|
|
"grad_norm": 1.5063802393305312,
|
|
"learning_rate": 1.5323366827737496e-08,
|
|
"loss": 0.193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17369940876960754,
|
|
"step": 1060,
|
|
"valid_targets_mean": 982.8,
|
|
"valid_targets_min": 219
|
|
},
|
|
{
|
|
"epoch": 6.96078431372549,
|
|
"grad_norm": 1.614719027777907,
|
|
"learning_rate": 5.2146405545427935e-09,
|
|
"loss": 0.1605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1444833129644394,
|
|
"step": 1065,
|
|
"valid_targets_mean": 855.0,
|
|
"valid_targets_min": 235
|
|
},
|
|
{
|
|
"epoch": 6.993464052287582,
|
|
"grad_norm": 1.435878529641309,
|
|
"learning_rate": 4.2570193260482727e-10,
|
|
"loss": 0.1657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.153692826628685,
|
|
"step": 1070,
|
|
"valid_targets_mean": 1271.9,
|
|
"valid_targets_min": 230
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21700537204742432,
|
|
"step": 1071,
|
|
"total_flos": 87884495978496.0,
|
|
"train_loss": 0.3235391679834696,
|
|
"train_runtime": 3658.4254,
|
|
"train_samples_per_second": 4.669,
|
|
"train_steps_per_second": 0.293,
|
|
"valid_targets_mean": 902.0,
|
|
"valid_targets_min": 329
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 1071,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 200,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 87884495978496.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|