4193 lines
116 KiB
JSON
4193 lines
116 KiB
JSON
|
|
{
|
||
|
|
"best_global_step": null,
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 7.0,
|
||
|
|
"eval_steps": 500,
|
||
|
|
"global_step": 1890,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"epoch": 0.018518518518518517,
|
||
|
|
"grad_norm": 27.69713638899012,
|
||
|
|
"learning_rate": 8.465608465608466e-07,
|
||
|
|
"loss": 0.8879,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.4629935324192047,
|
||
|
|
"step": 5,
|
||
|
|
"valid_targets_mean": 5865.4,
|
||
|
|
"valid_targets_min": 789
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.037037037037037035,
|
||
|
|
"grad_norm": 23.290742086079,
|
||
|
|
"learning_rate": 1.904761904761905e-06,
|
||
|
|
"loss": 0.87,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.4101138412952423,
|
||
|
|
"step": 10,
|
||
|
|
"valid_targets_mean": 6581.6,
|
||
|
|
"valid_targets_min": 4761
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.05555555555555555,
|
||
|
|
"grad_norm": 13.68077632229009,
|
||
|
|
"learning_rate": 2.962962962962963e-06,
|
||
|
|
"loss": 0.7768,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.370077908039093,
|
||
|
|
"step": 15,
|
||
|
|
"valid_targets_mean": 7269.0,
|
||
|
|
"valid_targets_min": 3491
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07407407407407407,
|
||
|
|
"grad_norm": 4.83254717026584,
|
||
|
|
"learning_rate": 4.0211640211640215e-06,
|
||
|
|
"loss": 0.6639,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3309337794780731,
|
||
|
|
"step": 20,
|
||
|
|
"valid_targets_mean": 6683.6,
|
||
|
|
"valid_targets_min": 1240
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.09259259259259259,
|
||
|
|
"grad_norm": 2.428166466749575,
|
||
|
|
"learning_rate": 5.07936507936508e-06,
|
||
|
|
"loss": 0.6093,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3472573757171631,
|
||
|
|
"step": 25,
|
||
|
|
"valid_targets_mean": 5540.1,
|
||
|
|
"valid_targets_min": 501
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1111111111111111,
|
||
|
|
"grad_norm": 1.8170151536729782,
|
||
|
|
"learning_rate": 6.137566137566138e-06,
|
||
|
|
"loss": 0.577,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.288746178150177,
|
||
|
|
"step": 30,
|
||
|
|
"valid_targets_mean": 7024.4,
|
||
|
|
"valid_targets_min": 413
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.12962962962962962,
|
||
|
|
"grad_norm": 1.360405445125232,
|
||
|
|
"learning_rate": 7.195767195767196e-06,
|
||
|
|
"loss": 0.546,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3221224844455719,
|
||
|
|
"step": 35,
|
||
|
|
"valid_targets_mean": 6184.5,
|
||
|
|
"valid_targets_min": 4369
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.14814814814814814,
|
||
|
|
"grad_norm": 0.9196969375594887,
|
||
|
|
"learning_rate": 8.253968253968254e-06,
|
||
|
|
"loss": 0.5352,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2751440405845642,
|
||
|
|
"step": 40,
|
||
|
|
"valid_targets_mean": 6822.4,
|
||
|
|
"valid_targets_min": 4627
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.16666666666666666,
|
||
|
|
"grad_norm": 0.765133314763819,
|
||
|
|
"learning_rate": 9.312169312169313e-06,
|
||
|
|
"loss": 0.5021,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3200388550758362,
|
||
|
|
"step": 45,
|
||
|
|
"valid_targets_mean": 7612.6,
|
||
|
|
"valid_targets_min": 4008
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.18518518518518517,
|
||
|
|
"grad_norm": 0.6845104561118748,
|
||
|
|
"learning_rate": 1.037037037037037e-05,
|
||
|
|
"loss": 0.4855,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2742794156074524,
|
||
|
|
"step": 50,
|
||
|
|
"valid_targets_mean": 7739.4,
|
||
|
|
"valid_targets_min": 391
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2037037037037037,
|
||
|
|
"grad_norm": 0.627723124248625,
|
||
|
|
"learning_rate": 1.1428571428571429e-05,
|
||
|
|
"loss": 0.4741,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24860906600952148,
|
||
|
|
"step": 55,
|
||
|
|
"valid_targets_mean": 5759.1,
|
||
|
|
"valid_targets_min": 479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2222222222222222,
|
||
|
|
"grad_norm": 0.5488873482745417,
|
||
|
|
"learning_rate": 1.2486772486772486e-05,
|
||
|
|
"loss": 0.4688,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23666323721408844,
|
||
|
|
"step": 60,
|
||
|
|
"valid_targets_mean": 7891.8,
|
||
|
|
"valid_targets_min": 6024
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24074074074074073,
|
||
|
|
"grad_norm": 0.5404794526206105,
|
||
|
|
"learning_rate": 1.3544973544973545e-05,
|
||
|
|
"loss": 0.4433,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21840626001358032,
|
||
|
|
"step": 65,
|
||
|
|
"valid_targets_mean": 7151.1,
|
||
|
|
"valid_targets_min": 3780
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.25925925925925924,
|
||
|
|
"grad_norm": 0.5380782703461664,
|
||
|
|
"learning_rate": 1.4603174603174603e-05,
|
||
|
|
"loss": 0.4242,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21182352304458618,
|
||
|
|
"step": 70,
|
||
|
|
"valid_targets_mean": 7776.1,
|
||
|
|
"valid_targets_min": 5679
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2777777777777778,
|
||
|
|
"grad_norm": 0.5450893010749523,
|
||
|
|
"learning_rate": 1.5661375661375662e-05,
|
||
|
|
"loss": 0.4293,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20561331510543823,
|
||
|
|
"step": 75,
|
||
|
|
"valid_targets_mean": 6248.5,
|
||
|
|
"valid_targets_min": 445
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2962962962962963,
|
||
|
|
"grad_norm": 0.5317555635167633,
|
||
|
|
"learning_rate": 1.671957671957672e-05,
|
||
|
|
"loss": 0.4256,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2224772423505783,
|
||
|
|
"step": 80,
|
||
|
|
"valid_targets_mean": 7044.6,
|
||
|
|
"valid_targets_min": 4290
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3148148148148148,
|
||
|
|
"grad_norm": 0.5450655757884623,
|
||
|
|
"learning_rate": 1.7777777777777777e-05,
|
||
|
|
"loss": 0.3943,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23075850307941437,
|
||
|
|
"step": 85,
|
||
|
|
"valid_targets_mean": 6590.8,
|
||
|
|
"valid_targets_min": 2602
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3333333333333333,
|
||
|
|
"grad_norm": 0.4891965175751207,
|
||
|
|
"learning_rate": 1.8835978835978836e-05,
|
||
|
|
"loss": 0.3825,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21840187907218933,
|
||
|
|
"step": 90,
|
||
|
|
"valid_targets_mean": 7733.4,
|
||
|
|
"valid_targets_min": 4802
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.35185185185185186,
|
||
|
|
"grad_norm": 0.5051228466360034,
|
||
|
|
"learning_rate": 1.9894179894179895e-05,
|
||
|
|
"loss": 0.3811,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18121924996376038,
|
||
|
|
"step": 95,
|
||
|
|
"valid_targets_mean": 7083.4,
|
||
|
|
"valid_targets_min": 3807
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.37037037037037035,
|
||
|
|
"grad_norm": 0.6045358159055502,
|
||
|
|
"learning_rate": 2.0952380952380954e-05,
|
||
|
|
"loss": 0.3904,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2092483937740326,
|
||
|
|
"step": 100,
|
||
|
|
"valid_targets_mean": 5927.2,
|
||
|
|
"valid_targets_min": 3887
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3888888888888889,
|
||
|
|
"grad_norm": 0.5039852241565205,
|
||
|
|
"learning_rate": 2.2010582010582013e-05,
|
||
|
|
"loss": 0.3697,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16192597150802612,
|
||
|
|
"step": 105,
|
||
|
|
"valid_targets_mean": 6823.8,
|
||
|
|
"valid_targets_min": 2851
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4074074074074074,
|
||
|
|
"grad_norm": 0.543344814617639,
|
||
|
|
"learning_rate": 2.3068783068783072e-05,
|
||
|
|
"loss": 0.366,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21563449501991272,
|
||
|
|
"step": 110,
|
||
|
|
"valid_targets_mean": 7248.0,
|
||
|
|
"valid_targets_min": 4263
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.42592592592592593,
|
||
|
|
"grad_norm": 0.51463194661137,
|
||
|
|
"learning_rate": 2.4126984126984128e-05,
|
||
|
|
"loss": 0.3673,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20645663142204285,
|
||
|
|
"step": 115,
|
||
|
|
"valid_targets_mean": 7274.1,
|
||
|
|
"valid_targets_min": 5029
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4444444444444444,
|
||
|
|
"grad_norm": 0.5022729085110892,
|
||
|
|
"learning_rate": 2.5185185185185187e-05,
|
||
|
|
"loss": 0.3513,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1954728364944458,
|
||
|
|
"step": 120,
|
||
|
|
"valid_targets_mean": 8162.1,
|
||
|
|
"valid_targets_min": 4772
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.46296296296296297,
|
||
|
|
"grad_norm": 0.48849343768132747,
|
||
|
|
"learning_rate": 2.6243386243386246e-05,
|
||
|
|
"loss": 0.3764,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22054851055145264,
|
||
|
|
"step": 125,
|
||
|
|
"valid_targets_mean": 9386.2,
|
||
|
|
"valid_targets_min": 5811
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.48148148148148145,
|
||
|
|
"grad_norm": 0.6066721985482638,
|
||
|
|
"learning_rate": 2.7301587301587305e-05,
|
||
|
|
"loss": 0.3552,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17588719725608826,
|
||
|
|
"step": 130,
|
||
|
|
"valid_targets_mean": 4571.5,
|
||
|
|
"valid_targets_min": 1134
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5,
|
||
|
|
"grad_norm": 0.4931053049682215,
|
||
|
|
"learning_rate": 2.835978835978836e-05,
|
||
|
|
"loss": 0.3687,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14577624201774597,
|
||
|
|
"step": 135,
|
||
|
|
"valid_targets_mean": 6307.6,
|
||
|
|
"valid_targets_min": 3966
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5185185185185185,
|
||
|
|
"grad_norm": 0.6073682337976894,
|
||
|
|
"learning_rate": 2.941798941798942e-05,
|
||
|
|
"loss": 0.3514,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17465010285377502,
|
||
|
|
"step": 140,
|
||
|
|
"valid_targets_mean": 6394.0,
|
||
|
|
"valid_targets_min": 269
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5370370370370371,
|
||
|
|
"grad_norm": 0.5024739310945823,
|
||
|
|
"learning_rate": 3.047619047619048e-05,
|
||
|
|
"loss": 0.3496,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1507670283317566,
|
||
|
|
"step": 145,
|
||
|
|
"valid_targets_mean": 6376.0,
|
||
|
|
"valid_targets_min": 466
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5555555555555556,
|
||
|
|
"grad_norm": 0.5277273202526614,
|
||
|
|
"learning_rate": 3.153439153439154e-05,
|
||
|
|
"loss": 0.3585,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18677863478660583,
|
||
|
|
"step": 150,
|
||
|
|
"valid_targets_mean": 7588.2,
|
||
|
|
"valid_targets_min": 3447
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5740740740740741,
|
||
|
|
"grad_norm": 0.4927981212250953,
|
||
|
|
"learning_rate": 3.259259259259259e-05,
|
||
|
|
"loss": 0.343,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15430906414985657,
|
||
|
|
"step": 155,
|
||
|
|
"valid_targets_mean": 6208.2,
|
||
|
|
"valid_targets_min": 1884
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5925925925925926,
|
||
|
|
"grad_norm": 0.5955616973209993,
|
||
|
|
"learning_rate": 3.3650793650793656e-05,
|
||
|
|
"loss": 0.3475,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16549400985240936,
|
||
|
|
"step": 160,
|
||
|
|
"valid_targets_mean": 5810.4,
|
||
|
|
"valid_targets_min": 409
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6111111111111112,
|
||
|
|
"grad_norm": 0.5308592994318894,
|
||
|
|
"learning_rate": 3.470899470899471e-05,
|
||
|
|
"loss": 0.3252,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16406959295272827,
|
||
|
|
"step": 165,
|
||
|
|
"valid_targets_mean": 7197.9,
|
||
|
|
"valid_targets_min": 4781
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6296296296296297,
|
||
|
|
"grad_norm": 0.5786264253850772,
|
||
|
|
"learning_rate": 3.576719576719577e-05,
|
||
|
|
"loss": 0.334,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14584289491176605,
|
||
|
|
"step": 170,
|
||
|
|
"valid_targets_mean": 5856.1,
|
||
|
|
"valid_targets_min": 3947
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6481481481481481,
|
||
|
|
"grad_norm": 0.5876267301381748,
|
||
|
|
"learning_rate": 3.682539682539683e-05,
|
||
|
|
"loss": 0.3364,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19383975863456726,
|
||
|
|
"step": 175,
|
||
|
|
"valid_targets_mean": 6151.6,
|
||
|
|
"valid_targets_min": 298
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6666666666666666,
|
||
|
|
"grad_norm": 0.5579841097601295,
|
||
|
|
"learning_rate": 3.7883597883597885e-05,
|
||
|
|
"loss": 0.3355,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19771698117256165,
|
||
|
|
"step": 180,
|
||
|
|
"valid_targets_mean": 7475.2,
|
||
|
|
"valid_targets_min": 5181
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6851851851851852,
|
||
|
|
"grad_norm": 0.5575120093361495,
|
||
|
|
"learning_rate": 3.894179894179894e-05,
|
||
|
|
"loss": 0.3306,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13605374097824097,
|
||
|
|
"step": 185,
|
||
|
|
"valid_targets_mean": 6225.5,
|
||
|
|
"valid_targets_min": 2414
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7037037037037037,
|
||
|
|
"grad_norm": 0.5695877338034481,
|
||
|
|
"learning_rate": 4e-05,
|
||
|
|
"loss": 0.3331,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19986039400100708,
|
||
|
|
"step": 190,
|
||
|
|
"valid_targets_mean": 8463.9,
|
||
|
|
"valid_targets_min": 4419
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7222222222222222,
|
||
|
|
"grad_norm": 0.5009496122576277,
|
||
|
|
"learning_rate": 3.999914723760517e-05,
|
||
|
|
"loss": 0.3364,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1776013821363449,
|
||
|
|
"step": 195,
|
||
|
|
"valid_targets_mean": 7864.9,
|
||
|
|
"valid_targets_min": 5879
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7407407407407407,
|
||
|
|
"grad_norm": 0.5293768737385741,
|
||
|
|
"learning_rate": 3.999658902314104e-05,
|
||
|
|
"loss": 0.338,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1694238781929016,
|
||
|
|
"step": 200,
|
||
|
|
"valid_targets_mean": 8140.8,
|
||
|
|
"valid_targets_min": 5488
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7592592592592593,
|
||
|
|
"grad_norm": 0.8375558031870537,
|
||
|
|
"learning_rate": 3.999232557476252e-05,
|
||
|
|
"loss": 0.3319,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16278810799121857,
|
||
|
|
"step": 205,
|
||
|
|
"valid_targets_mean": 6144.5,
|
||
|
|
"valid_targets_min": 2437
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7777777777777778,
|
||
|
|
"grad_norm": 0.529694505104515,
|
||
|
|
"learning_rate": 3.9986357256040465e-05,
|
||
|
|
"loss": 0.324,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14480090141296387,
|
||
|
|
"step": 210,
|
||
|
|
"valid_targets_mean": 6128.9,
|
||
|
|
"valid_targets_min": 353
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7962962962962963,
|
||
|
|
"grad_norm": 0.5687228641593676,
|
||
|
|
"learning_rate": 3.997868457593064e-05,
|
||
|
|
"loss": 0.3187,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16306929290294647,
|
||
|
|
"step": 215,
|
||
|
|
"valid_targets_mean": 6418.0,
|
||
|
|
"valid_targets_min": 3908
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8148148148148148,
|
||
|
|
"grad_norm": 0.47438386161926516,
|
||
|
|
"learning_rate": 3.996930818873035e-05,
|
||
|
|
"loss": 0.3186,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14409326016902924,
|
||
|
|
"step": 220,
|
||
|
|
"valid_targets_mean": 6047.1,
|
||
|
|
"valid_targets_min": 304
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8333333333333334,
|
||
|
|
"grad_norm": 0.5486113720670046,
|
||
|
|
"learning_rate": 3.9958228894022645e-05,
|
||
|
|
"loss": 0.3106,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15029951930046082,
|
||
|
|
"step": 225,
|
||
|
|
"valid_targets_mean": 6173.9,
|
||
|
|
"valid_targets_min": 3754
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8518518518518519,
|
||
|
|
"grad_norm": 0.5170677946882957,
|
||
|
|
"learning_rate": 3.994544763660811e-05,
|
||
|
|
"loss": 0.3071,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16101841628551483,
|
||
|
|
"step": 230,
|
||
|
|
"valid_targets_mean": 8052.0,
|
||
|
|
"valid_targets_min": 5204
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8703703703703703,
|
||
|
|
"grad_norm": 0.5874905209063319,
|
||
|
|
"learning_rate": 3.993096550642431e-05,
|
||
|
|
"loss": 0.3234,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21433310210704803,
|
||
|
|
"step": 235,
|
||
|
|
"valid_targets_mean": 8033.4,
|
||
|
|
"valid_targets_min": 4617
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8888888888888888,
|
||
|
|
"grad_norm": 0.5772717839196063,
|
||
|
|
"learning_rate": 3.991478373845286e-05,
|
||
|
|
"loss": 0.3187,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15627962350845337,
|
||
|
|
"step": 240,
|
||
|
|
"valid_targets_mean": 6618.6,
|
||
|
|
"valid_targets_min": 2600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9074074074074074,
|
||
|
|
"grad_norm": 0.5510574817704281,
|
||
|
|
"learning_rate": 3.989690371261406e-05,
|
||
|
|
"loss": 0.3214,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16640323400497437,
|
||
|
|
"step": 245,
|
||
|
|
"valid_targets_mean": 6397.0,
|
||
|
|
"valid_targets_min": 456
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9259259259259259,
|
||
|
|
"grad_norm": 0.530761854892717,
|
||
|
|
"learning_rate": 3.987732695364929e-05,
|
||
|
|
"loss": 0.3105,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14477106928825378,
|
||
|
|
"step": 250,
|
||
|
|
"valid_targets_mean": 5501.1,
|
||
|
|
"valid_targets_min": 2206
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9444444444444444,
|
||
|
|
"grad_norm": 0.5289325090905338,
|
||
|
|
"learning_rate": 3.985605513099093e-05,
|
||
|
|
"loss": 0.3163,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13415177166461945,
|
||
|
|
"step": 255,
|
||
|
|
"valid_targets_mean": 6082.1,
|
||
|
|
"valid_targets_min": 5234
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9629629629629629,
|
||
|
|
"grad_norm": 0.569051693937061,
|
||
|
|
"learning_rate": 3.983309005862002e-05,
|
||
|
|
"loss": 0.3324,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13313525915145874,
|
||
|
|
"step": 260,
|
||
|
|
"valid_targets_mean": 4545.2,
|
||
|
|
"valid_targets_min": 669
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9814814814814815,
|
||
|
|
"grad_norm": 0.5353604464324123,
|
||
|
|
"learning_rate": 3.980843369491159e-05,
|
||
|
|
"loss": 0.3171,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.199254110455513,
|
||
|
|
"step": 265,
|
||
|
|
"valid_targets_mean": 7222.6,
|
||
|
|
"valid_targets_min": 4705
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0,
|
||
|
|
"grad_norm": 0.5103477604549704,
|
||
|
|
"learning_rate": 3.9782088142467595e-05,
|
||
|
|
"loss": 0.3107,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1165643036365509,
|
||
|
|
"step": 270,
|
||
|
|
"valid_targets_mean": 5523.9,
|
||
|
|
"valid_targets_min": 396
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0185185185185186,
|
||
|
|
"grad_norm": 0.5065171933118513,
|
||
|
|
"learning_rate": 3.975405564793768e-05,
|
||
|
|
"loss": 0.3119,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21201768517494202,
|
||
|
|
"step": 275,
|
||
|
|
"valid_targets_mean": 7694.2,
|
||
|
|
"valid_targets_min": 434
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.037037037037037,
|
||
|
|
"grad_norm": 0.5475032237106019,
|
||
|
|
"learning_rate": 3.972433860182757e-05,
|
||
|
|
"loss": 0.2949,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13425172865390778,
|
||
|
|
"step": 280,
|
||
|
|
"valid_targets_mean": 6122.1,
|
||
|
|
"valid_targets_min": 377
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0555555555555556,
|
||
|
|
"grad_norm": 0.59475901438283,
|
||
|
|
"learning_rate": 3.969293953829519e-05,
|
||
|
|
"loss": 0.2955,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14180830121040344,
|
||
|
|
"step": 285,
|
||
|
|
"valid_targets_mean": 7330.9,
|
||
|
|
"valid_targets_min": 4746
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.074074074074074,
|
||
|
|
"grad_norm": 0.4732078699188954,
|
||
|
|
"learning_rate": 3.965986113493462e-05,
|
||
|
|
"loss": 0.2973,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14922069013118744,
|
||
|
|
"step": 290,
|
||
|
|
"valid_targets_mean": 7192.6,
|
||
|
|
"valid_targets_min": 3965
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0925925925925926,
|
||
|
|
"grad_norm": 0.5946837128648786,
|
||
|
|
"learning_rate": 3.9625106212547696e-05,
|
||
|
|
"loss": 0.2955,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12483422458171844,
|
||
|
|
"step": 295,
|
||
|
|
"valid_targets_mean": 4485.5,
|
||
|
|
"valid_targets_min": 461
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1111111111111112,
|
||
|
|
"grad_norm": 0.48537206737996996,
|
||
|
|
"learning_rate": 3.9588677734903505e-05,
|
||
|
|
"loss": 0.289,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14004018902778625,
|
||
|
|
"step": 300,
|
||
|
|
"valid_targets_mean": 7126.4,
|
||
|
|
"valid_targets_min": 2065
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1296296296296295,
|
||
|
|
"grad_norm": 0.5868179766871565,
|
||
|
|
"learning_rate": 3.955057880848563e-05,
|
||
|
|
"loss": 0.2944,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1457635760307312,
|
||
|
|
"step": 305,
|
||
|
|
"valid_targets_mean": 7584.9,
|
||
|
|
"valid_targets_min": 3016
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1481481481481481,
|
||
|
|
"grad_norm": 0.5531573803747889,
|
||
|
|
"learning_rate": 3.9510812682227245e-05,
|
||
|
|
"loss": 0.3144,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1710018515586853,
|
||
|
|
"step": 310,
|
||
|
|
"valid_targets_mean": 6904.5,
|
||
|
|
"valid_targets_min": 4493
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1666666666666667,
|
||
|
|
"grad_norm": 0.5478913235405003,
|
||
|
|
"learning_rate": 3.946938274723405e-05,
|
||
|
|
"loss": 0.2974,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15083035826683044,
|
||
|
|
"step": 315,
|
||
|
|
"valid_targets_mean": 5984.2,
|
||
|
|
"valid_targets_min": 308
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1851851851851851,
|
||
|
|
"grad_norm": 0.47833830640118885,
|
||
|
|
"learning_rate": 3.9426292536495114e-05,
|
||
|
|
"loss": 0.3051,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15671434998512268,
|
||
|
|
"step": 320,
|
||
|
|
"valid_targets_mean": 8063.6,
|
||
|
|
"valid_targets_min": 5433
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2037037037037037,
|
||
|
|
"grad_norm": 0.5349890076251889,
|
||
|
|
"learning_rate": 3.938154572458156e-05,
|
||
|
|
"loss": 0.2926,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14529670774936676,
|
||
|
|
"step": 325,
|
||
|
|
"valid_targets_mean": 6739.8,
|
||
|
|
"valid_targets_min": 5419
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2222222222222223,
|
||
|
|
"grad_norm": 0.4994576555333995,
|
||
|
|
"learning_rate": 3.9335146127333245e-05,
|
||
|
|
"loss": 0.2906,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15106892585754395,
|
||
|
|
"step": 330,
|
||
|
|
"valid_targets_mean": 7467.2,
|
||
|
|
"valid_targets_min": 3877
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2407407407407407,
|
||
|
|
"grad_norm": 0.5935948253238553,
|
||
|
|
"learning_rate": 3.928709770153332e-05,
|
||
|
|
"loss": 0.2957,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14414165914058685,
|
||
|
|
"step": 335,
|
||
|
|
"valid_targets_mean": 4819.1,
|
||
|
|
"valid_targets_min": 315
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2592592592592593,
|
||
|
|
"grad_norm": 0.5524891188597137,
|
||
|
|
"learning_rate": 3.923740454457087e-05,
|
||
|
|
"loss": 0.2919,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1580236554145813,
|
||
|
|
"step": 340,
|
||
|
|
"valid_targets_mean": 6985.0,
|
||
|
|
"valid_targets_min": 474
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2777777777777777,
|
||
|
|
"grad_norm": 0.5456561849473739,
|
||
|
|
"learning_rate": 3.9186070894091433e-05,
|
||
|
|
"loss": 0.3006,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18431127071380615,
|
||
|
|
"step": 345,
|
||
|
|
"valid_targets_mean": 6896.9,
|
||
|
|
"valid_targets_min": 3800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2962962962962963,
|
||
|
|
"grad_norm": 0.5624233622754709,
|
||
|
|
"learning_rate": 3.9133101127635684e-05,
|
||
|
|
"loss": 0.2892,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15472449362277985,
|
||
|
|
"step": 350,
|
||
|
|
"valid_targets_mean": 7580.0,
|
||
|
|
"valid_targets_min": 367
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3148148148148149,
|
||
|
|
"grad_norm": 0.5889370024503714,
|
||
|
|
"learning_rate": 3.9078499762266124e-05,
|
||
|
|
"loss": 0.2896,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12132951617240906,
|
||
|
|
"step": 355,
|
||
|
|
"valid_targets_mean": 5551.5,
|
||
|
|
"valid_targets_min": 470
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3333333333333333,
|
||
|
|
"grad_norm": 0.5062166048938903,
|
||
|
|
"learning_rate": 3.902227145418185e-05,
|
||
|
|
"loss": 0.2883,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1225079745054245,
|
||
|
|
"step": 360,
|
||
|
|
"valid_targets_mean": 5971.0,
|
||
|
|
"valid_targets_min": 368
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3518518518518519,
|
||
|
|
"grad_norm": 0.47488441470103737,
|
||
|
|
"learning_rate": 3.896442099832153e-05,
|
||
|
|
"loss": 0.2855,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12804746627807617,
|
||
|
|
"step": 365,
|
||
|
|
"valid_targets_mean": 6091.5,
|
||
|
|
"valid_targets_min": 1976
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3703703703703702,
|
||
|
|
"grad_norm": 0.5406355069355051,
|
||
|
|
"learning_rate": 3.89049533279545e-05,
|
||
|
|
"loss": 0.295,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18195563554763794,
|
||
|
|
"step": 370,
|
||
|
|
"valid_targets_mean": 6963.6,
|
||
|
|
"valid_targets_min": 480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3888888888888888,
|
||
|
|
"grad_norm": 0.5574169233693884,
|
||
|
|
"learning_rate": 3.884387351426005e-05,
|
||
|
|
"loss": 0.2924,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15529048442840576,
|
||
|
|
"step": 375,
|
||
|
|
"valid_targets_mean": 6006.2,
|
||
|
|
"valid_targets_min": 541
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4074074074074074,
|
||
|
|
"grad_norm": 0.535890457895036,
|
||
|
|
"learning_rate": 3.8781186765895e-05,
|
||
|
|
"loss": 0.2897,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15748731791973114,
|
||
|
|
"step": 380,
|
||
|
|
"valid_targets_mean": 6537.9,
|
||
|
|
"valid_targets_min": 4917
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.425925925925926,
|
||
|
|
"grad_norm": 0.5095040628778826,
|
||
|
|
"learning_rate": 3.8716898428549526e-05,
|
||
|
|
"loss": 0.2847,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09297989308834076,
|
||
|
|
"step": 385,
|
||
|
|
"valid_targets_mean": 4088.0,
|
||
|
|
"valid_targets_min": 304
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4444444444444444,
|
||
|
|
"grad_norm": 0.4716182875282023,
|
||
|
|
"learning_rate": 3.865101398449127e-05,
|
||
|
|
"loss": 0.2942,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1432163268327713,
|
||
|
|
"step": 390,
|
||
|
|
"valid_targets_mean": 7432.4,
|
||
|
|
"valid_targets_min": 5799
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.462962962962963,
|
||
|
|
"grad_norm": 0.5386315145255598,
|
||
|
|
"learning_rate": 3.858353905209787e-05,
|
||
|
|
"loss": 0.2921,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17763468623161316,
|
||
|
|
"step": 395,
|
||
|
|
"valid_targets_mean": 7208.8,
|
||
|
|
"valid_targets_min": 4700
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4814814814814814,
|
||
|
|
"grad_norm": 0.5378548282182605,
|
||
|
|
"learning_rate": 3.8514479385377813e-05,
|
||
|
|
"loss": 0.2919,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15650640428066254,
|
||
|
|
"step": 400,
|
||
|
|
"valid_targets_mean": 6712.2,
|
||
|
|
"valid_targets_min": 1883
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5,
|
||
|
|
"grad_norm": 0.5142301123444354,
|
||
|
|
"learning_rate": 3.844384087347978e-05,
|
||
|
|
"loss": 0.3043,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1322391927242279,
|
||
|
|
"step": 405,
|
||
|
|
"valid_targets_mean": 6352.9,
|
||
|
|
"valid_targets_min": 3290
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5185185185185186,
|
||
|
|
"grad_norm": 0.5505544471616537,
|
||
|
|
"learning_rate": 3.837162954019042e-05,
|
||
|
|
"loss": 0.2814,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14132408797740936,
|
||
|
|
"step": 410,
|
||
|
|
"valid_targets_mean": 6377.4,
|
||
|
|
"valid_targets_min": 3935
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5370370370370372,
|
||
|
|
"grad_norm": 0.6229636570648036,
|
||
|
|
"learning_rate": 3.829785154342069e-05,
|
||
|
|
"loss": 0.2912,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15506146848201752,
|
||
|
|
"step": 415,
|
||
|
|
"valid_targets_mean": 7248.5,
|
||
|
|
"valid_targets_min": 5048
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5555555555555556,
|
||
|
|
"grad_norm": 0.7699539965412168,
|
||
|
|
"learning_rate": 3.822251317468073e-05,
|
||
|
|
"loss": 0.2915,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13349926471710205,
|
||
|
|
"step": 420,
|
||
|
|
"valid_targets_mean": 6603.4,
|
||
|
|
"valid_targets_min": 336
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.574074074074074,
|
||
|
|
"grad_norm": 0.4695932327596188,
|
||
|
|
"learning_rate": 3.814562085854328e-05,
|
||
|
|
"loss": 0.2963,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14610108733177185,
|
||
|
|
"step": 425,
|
||
|
|
"valid_targets_mean": 7011.5,
|
||
|
|
"valid_targets_min": 457
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5925925925925926,
|
||
|
|
"grad_norm": 0.572965259354651,
|
||
|
|
"learning_rate": 3.8067181152095935e-05,
|
||
|
|
"loss": 0.2755,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12474419176578522,
|
||
|
|
"step": 430,
|
||
|
|
"valid_targets_mean": 6207.2,
|
||
|
|
"valid_targets_min": 368
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6111111111111112,
|
||
|
|
"grad_norm": 0.5072200634901262,
|
||
|
|
"learning_rate": 3.7987200744381866e-05,
|
||
|
|
"loss": 0.2787,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14492423832416534,
|
||
|
|
"step": 435,
|
||
|
|
"valid_targets_mean": 6685.0,
|
||
|
|
"valid_targets_min": 3491
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6296296296296298,
|
||
|
|
"grad_norm": 0.5315100425545446,
|
||
|
|
"learning_rate": 3.790568645582949e-05,
|
||
|
|
"loss": 0.2889,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12990285456180573,
|
||
|
|
"step": 440,
|
||
|
|
"valid_targets_mean": 5514.2,
|
||
|
|
"valid_targets_min": 487
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6481481481481481,
|
||
|
|
"grad_norm": 0.48299403677554154,
|
||
|
|
"learning_rate": 3.7822645237670786e-05,
|
||
|
|
"loss": 0.2859,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1585882604122162,
|
||
|
|
"step": 445,
|
||
|
|
"valid_targets_mean": 8019.8,
|
||
|
|
"valid_targets_min": 4406
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6666666666666665,
|
||
|
|
"grad_norm": 0.5281572977294482,
|
||
|
|
"learning_rate": 3.773808417134857e-05,
|
||
|
|
"loss": 0.2927,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1543073058128357,
|
||
|
|
"step": 450,
|
||
|
|
"valid_targets_mean": 7052.0,
|
||
|
|
"valid_targets_min": 3687
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6851851851851851,
|
||
|
|
"grad_norm": 0.4845837149564397,
|
||
|
|
"learning_rate": 3.7652010467912586e-05,
|
||
|
|
"loss": 0.2949,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15963464975357056,
|
||
|
|
"step": 455,
|
||
|
|
"valid_targets_mean": 8213.2,
|
||
|
|
"valid_targets_min": 6411
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7037037037037037,
|
||
|
|
"grad_norm": 0.5408032989769307,
|
||
|
|
"learning_rate": 3.756443146740457e-05,
|
||
|
|
"loss": 0.2984,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13530519604682922,
|
||
|
|
"step": 460,
|
||
|
|
"valid_targets_mean": 5813.6,
|
||
|
|
"valid_targets_min": 3993
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7222222222222223,
|
||
|
|
"grad_norm": 0.5353189538043924,
|
||
|
|
"learning_rate": 3.7475354638232364e-05,
|
||
|
|
"loss": 0.2744,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10430891811847687,
|
||
|
|
"step": 465,
|
||
|
|
"valid_targets_mean": 4659.6,
|
||
|
|
"valid_targets_min": 1661
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7407407407407407,
|
||
|
|
"grad_norm": 0.5226421018247178,
|
||
|
|
"learning_rate": 3.7384787576532955e-05,
|
||
|
|
"loss": 0.2848,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1390485018491745,
|
||
|
|
"step": 470,
|
||
|
|
"valid_targets_mean": 7290.6,
|
||
|
|
"valid_targets_min": 810
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7592592592592593,
|
||
|
|
"grad_norm": 0.49394601207037214,
|
||
|
|
"learning_rate": 3.729273800552482e-05,
|
||
|
|
"loss": 0.2847,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1647380143404007,
|
||
|
|
"step": 475,
|
||
|
|
"valid_targets_mean": 7383.2,
|
||
|
|
"valid_targets_min": 5781
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7777777777777777,
|
||
|
|
"grad_norm": 0.6224532686212161,
|
||
|
|
"learning_rate": 3.719921377484919e-05,
|
||
|
|
"loss": 0.2841,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14958155155181885,
|
||
|
|
"step": 480,
|
||
|
|
"valid_targets_mean": 6402.1,
|
||
|
|
"valid_targets_min": 4041
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7962962962962963,
|
||
|
|
"grad_norm": 0.5990070202349348,
|
||
|
|
"learning_rate": 3.710422285990078e-05,
|
||
|
|
"loss": 0.2858,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1453811526298523,
|
||
|
|
"step": 485,
|
||
|
|
"valid_targets_mean": 5552.4,
|
||
|
|
"valid_targets_min": 192
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8148148148148149,
|
||
|
|
"grad_norm": 0.47961741304787425,
|
||
|
|
"learning_rate": 3.700777336114758e-05,
|
||
|
|
"loss": 0.2809,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13036441802978516,
|
||
|
|
"step": 490,
|
||
|
|
"valid_targets_mean": 6451.1,
|
||
|
|
"valid_targets_min": 307
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8333333333333335,
|
||
|
|
"grad_norm": 0.5499567840797964,
|
||
|
|
"learning_rate": 3.690987350344017e-05,
|
||
|
|
"loss": 0.2686,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11820720136165619,
|
||
|
|
"step": 495,
|
||
|
|
"valid_targets_mean": 6068.8,
|
||
|
|
"valid_targets_min": 487
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8518518518518519,
|
||
|
|
"grad_norm": 0.5346461604872333,
|
||
|
|
"learning_rate": 3.681053163531024e-05,
|
||
|
|
"loss": 0.2839,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1634947955608368,
|
||
|
|
"step": 500,
|
||
|
|
"valid_targets_mean": 7725.8,
|
||
|
|
"valid_targets_min": 4799
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8703703703703702,
|
||
|
|
"grad_norm": 0.5253112100424155,
|
||
|
|
"learning_rate": 3.6709756228258735e-05,
|
||
|
|
"loss": 0.2876,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13562235236167908,
|
||
|
|
"step": 505,
|
||
|
|
"valid_targets_mean": 6321.1,
|
||
|
|
"valid_targets_min": 3914
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8888888888888888,
|
||
|
|
"grad_norm": 0.49379712991928504,
|
||
|
|
"learning_rate": 3.66075558760334e-05,
|
||
|
|
"loss": 0.2768,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11750568449497223,
|
||
|
|
"step": 510,
|
||
|
|
"valid_targets_mean": 6793.4,
|
||
|
|
"valid_targets_min": 321
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9074074074074074,
|
||
|
|
"grad_norm": 0.5151893286966898,
|
||
|
|
"learning_rate": 3.6503939293895945e-05,
|
||
|
|
"loss": 0.2815,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14757773280143738,
|
||
|
|
"step": 515,
|
||
|
|
"valid_targets_mean": 8412.4,
|
||
|
|
"valid_targets_min": 6081
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.925925925925926,
|
||
|
|
"grad_norm": 0.5044055970161373,
|
||
|
|
"learning_rate": 3.639891531787885e-05,
|
||
|
|
"loss": 0.2634,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14695027470588684,
|
||
|
|
"step": 520,
|
||
|
|
"valid_targets_mean": 6752.4,
|
||
|
|
"valid_targets_min": 245
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9444444444444444,
|
||
|
|
"grad_norm": 0.5430440383866891,
|
||
|
|
"learning_rate": 3.6292492904031844e-05,
|
||
|
|
"loss": 0.2853,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11272375285625458,
|
||
|
|
"step": 525,
|
||
|
|
"valid_targets_mean": 4375.9,
|
||
|
|
"valid_targets_min": 357
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9629629629629628,
|
||
|
|
"grad_norm": 0.535911996260599,
|
||
|
|
"learning_rate": 3.6184681127658166e-05,
|
||
|
|
"loss": 0.2824,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1217561811208725,
|
||
|
|
"step": 530,
|
||
|
|
"valid_targets_mean": 4825.1,
|
||
|
|
"valid_targets_min": 403
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9814814814814814,
|
||
|
|
"grad_norm": 0.4652442437519838,
|
||
|
|
"learning_rate": 3.607548918254068e-05,
|
||
|
|
"loss": 0.2859,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13177233934402466,
|
||
|
|
"step": 535,
|
||
|
|
"valid_targets_mean": 7426.4,
|
||
|
|
"valid_targets_min": 4939
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0,
|
||
|
|
"grad_norm": 0.526551082703696,
|
||
|
|
"learning_rate": 3.5964926380157856e-05,
|
||
|
|
"loss": 0.2848,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16320262849330902,
|
||
|
|
"step": 540,
|
||
|
|
"valid_targets_mean": 7920.5,
|
||
|
|
"valid_targets_min": 4695
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0185185185185186,
|
||
|
|
"grad_norm": 0.5416505181112615,
|
||
|
|
"learning_rate": 3.585300214888971e-05,
|
||
|
|
"loss": 0.2666,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14433258771896362,
|
||
|
|
"step": 545,
|
||
|
|
"valid_targets_mean": 7458.1,
|
||
|
|
"valid_targets_min": 3929
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.037037037037037,
|
||
|
|
"grad_norm": 0.574840102953103,
|
||
|
|
"learning_rate": 3.5739726033213785e-05,
|
||
|
|
"loss": 0.2608,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11546637117862701,
|
||
|
|
"step": 550,
|
||
|
|
"valid_targets_mean": 5356.0,
|
||
|
|
"valid_targets_min": 385
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0555555555555554,
|
||
|
|
"grad_norm": 0.5607643377480926,
|
||
|
|
"learning_rate": 3.562510769289124e-05,
|
||
|
|
"loss": 0.2689,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13067400455474854,
|
||
|
|
"step": 555,
|
||
|
|
"valid_targets_mean": 6073.8,
|
||
|
|
"valid_targets_min": 250
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.074074074074074,
|
||
|
|
"grad_norm": 0.5218117367647941,
|
||
|
|
"learning_rate": 3.550915690214313e-05,
|
||
|
|
"loss": 0.2641,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14398810267448425,
|
||
|
|
"step": 560,
|
||
|
|
"valid_targets_mean": 6835.8,
|
||
|
|
"valid_targets_min": 3422
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0925925925925926,
|
||
|
|
"grad_norm": 0.5934505930381603,
|
||
|
|
"learning_rate": 3.539188354881685e-05,
|
||
|
|
"loss": 0.2715,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13066671788692474,
|
||
|
|
"step": 565,
|
||
|
|
"valid_targets_mean": 6965.4,
|
||
|
|
"valid_targets_min": 4297
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.111111111111111,
|
||
|
|
"grad_norm": 0.4708347758965413,
|
||
|
|
"learning_rate": 3.527329763354295e-05,
|
||
|
|
"loss": 0.26,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12042200565338135,
|
||
|
|
"step": 570,
|
||
|
|
"valid_targets_mean": 7412.5,
|
||
|
|
"valid_targets_min": 3336
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1296296296296298,
|
||
|
|
"grad_norm": 0.5708517268952519,
|
||
|
|
"learning_rate": 3.515340926888236e-05,
|
||
|
|
"loss": 0.2575,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1318579465150833,
|
||
|
|
"step": 575,
|
||
|
|
"valid_targets_mean": 6028.6,
|
||
|
|
"valid_targets_min": 4547
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.148148148148148,
|
||
|
|
"grad_norm": 0.5178407866313944,
|
||
|
|
"learning_rate": 3.503222867846397e-05,
|
||
|
|
"loss": 0.2616,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10885745286941528,
|
||
|
|
"step": 580,
|
||
|
|
"valid_targets_mean": 5752.1,
|
||
|
|
"valid_targets_min": 257
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1666666666666665,
|
||
|
|
"grad_norm": 0.5863919284533499,
|
||
|
|
"learning_rate": 3.490976619611282e-05,
|
||
|
|
"loss": 0.2655,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13897745311260223,
|
||
|
|
"step": 585,
|
||
|
|
"valid_targets_mean": 6269.6,
|
||
|
|
"valid_targets_min": 3887
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.185185185185185,
|
||
|
|
"grad_norm": 0.5881076768641105,
|
||
|
|
"learning_rate": 3.47860322649689e-05,
|
||
|
|
"loss": 0.2611,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08158881962299347,
|
||
|
|
"step": 590,
|
||
|
|
"valid_targets_mean": 4183.5,
|
||
|
|
"valid_targets_min": 353
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2037037037037037,
|
||
|
|
"grad_norm": 0.5449342889554193,
|
||
|
|
"learning_rate": 3.4661037436596526e-05,
|
||
|
|
"loss": 0.2686,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12910135090351105,
|
||
|
|
"step": 595,
|
||
|
|
"valid_targets_mean": 7711.6,
|
||
|
|
"valid_targets_min": 5163
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2222222222222223,
|
||
|
|
"grad_norm": 0.5061684984724052,
|
||
|
|
"learning_rate": 3.453479237008465e-05,
|
||
|
|
"loss": 0.2632,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15672308206558228,
|
||
|
|
"step": 600,
|
||
|
|
"valid_targets_mean": 8003.1,
|
||
|
|
"valid_targets_min": 5234
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.240740740740741,
|
||
|
|
"grad_norm": 0.524095008431537,
|
||
|
|
"learning_rate": 3.4407307831137775e-05,
|
||
|
|
"loss": 0.259,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12406113743782043,
|
||
|
|
"step": 605,
|
||
|
|
"valid_targets_mean": 6011.2,
|
||
|
|
"valid_targets_min": 2463
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.259259259259259,
|
||
|
|
"grad_norm": 0.4572943901172685,
|
||
|
|
"learning_rate": 3.4278594691157985e-05,
|
||
|
|
"loss": 0.2769,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1535755693912506,
|
||
|
|
"step": 610,
|
||
|
|
"valid_targets_mean": 8442.5,
|
||
|
|
"valid_targets_min": 4737
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2777777777777777,
|
||
|
|
"grad_norm": 0.48298994041640353,
|
||
|
|
"learning_rate": 3.4148663926317826e-05,
|
||
|
|
"loss": 0.2567,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10671807080507278,
|
||
|
|
"step": 615,
|
||
|
|
"valid_targets_mean": 6054.6,
|
||
|
|
"valid_targets_min": 446
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2962962962962963,
|
||
|
|
"grad_norm": 0.5492340875103793,
|
||
|
|
"learning_rate": 3.401752661662431e-05,
|
||
|
|
"loss": 0.2618,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15312159061431885,
|
||
|
|
"step": 620,
|
||
|
|
"valid_targets_mean": 6727.8,
|
||
|
|
"valid_targets_min": 2786
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.314814814814815,
|
||
|
|
"grad_norm": 0.5392228802956868,
|
||
|
|
"learning_rate": 3.388519394497408e-05,
|
||
|
|
"loss": 0.2635,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13924045860767365,
|
||
|
|
"step": 625,
|
||
|
|
"valid_targets_mean": 6856.8,
|
||
|
|
"valid_targets_min": 5211
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3333333333333335,
|
||
|
|
"grad_norm": 0.5233591164759019,
|
||
|
|
"learning_rate": 3.375167719619972e-05,
|
||
|
|
"loss": 0.2628,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14242339134216309,
|
||
|
|
"step": 630,
|
||
|
|
"valid_targets_mean": 6251.1,
|
||
|
|
"valid_targets_min": 405
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.351851851851852,
|
||
|
|
"grad_norm": 0.4419020062221808,
|
||
|
|
"learning_rate": 3.361698775610748e-05,
|
||
|
|
"loss": 0.2569,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12432446330785751,
|
||
|
|
"step": 635,
|
||
|
|
"valid_targets_mean": 6905.8,
|
||
|
|
"valid_targets_min": 4639
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3703703703703702,
|
||
|
|
"grad_norm": 0.5468809658880694,
|
||
|
|
"learning_rate": 3.3481137110506305e-05,
|
||
|
|
"loss": 0.2613,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15010154247283936,
|
||
|
|
"step": 640,
|
||
|
|
"valid_targets_mean": 5742.6,
|
||
|
|
"valid_targets_min": 396
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.388888888888889,
|
||
|
|
"grad_norm": 0.4848873250194103,
|
||
|
|
"learning_rate": 3.334413684422839e-05,
|
||
|
|
"loss": 0.2691,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14694947004318237,
|
||
|
|
"step": 645,
|
||
|
|
"valid_targets_mean": 8914.1,
|
||
|
|
"valid_targets_min": 6186
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4074074074074074,
|
||
|
|
"grad_norm": 0.4912012424730424,
|
||
|
|
"learning_rate": 3.3205998640141255e-05,
|
||
|
|
"loss": 0.2658,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12130933254957199,
|
||
|
|
"step": 650,
|
||
|
|
"valid_targets_mean": 6549.1,
|
||
|
|
"valid_targets_min": 415
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.425925925925926,
|
||
|
|
"grad_norm": 0.4437323909959092,
|
||
|
|
"learning_rate": 3.3066734278151464e-05,
|
||
|
|
"loss": 0.2639,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11655676364898682,
|
||
|
|
"step": 655,
|
||
|
|
"valid_targets_mean": 6480.2,
|
||
|
|
"valid_targets_min": 117
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4444444444444446,
|
||
|
|
"grad_norm": 0.4719184803629788,
|
||
|
|
"learning_rate": 3.292635563420009e-05,
|
||
|
|
"loss": 0.2585,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12040881812572479,
|
||
|
|
"step": 660,
|
||
|
|
"valid_targets_mean": 5791.6,
|
||
|
|
"valid_targets_min": 421
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.462962962962963,
|
||
|
|
"grad_norm": 0.5216259861354975,
|
||
|
|
"learning_rate": 3.2784874679250026e-05,
|
||
|
|
"loss": 0.2588,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12373167276382446,
|
||
|
|
"step": 665,
|
||
|
|
"valid_targets_mean": 6689.1,
|
||
|
|
"valid_targets_min": 4072
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4814814814814814,
|
||
|
|
"grad_norm": 0.4482229745629832,
|
||
|
|
"learning_rate": 3.264230347826504e-05,
|
||
|
|
"loss": 0.2689,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13821570575237274,
|
||
|
|
"step": 670,
|
||
|
|
"valid_targets_mean": 7872.4,
|
||
|
|
"valid_targets_min": 4265
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5,
|
||
|
|
"grad_norm": 0.4688397809024902,
|
||
|
|
"learning_rate": 3.249865418918102e-05,
|
||
|
|
"loss": 0.2689,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12180155515670776,
|
||
|
|
"step": 675,
|
||
|
|
"valid_targets_mean": 8303.1,
|
||
|
|
"valid_targets_min": 5882
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5185185185185186,
|
||
|
|
"grad_norm": 0.5067336932293169,
|
||
|
|
"learning_rate": 3.2353939061869145e-05,
|
||
|
|
"loss": 0.2624,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17937231063842773,
|
||
|
|
"step": 680,
|
||
|
|
"valid_targets_mean": 7677.5,
|
||
|
|
"valid_targets_min": 385
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.537037037037037,
|
||
|
|
"grad_norm": 0.495004907760756,
|
||
|
|
"learning_rate": 3.2208170437091267e-05,
|
||
|
|
"loss": 0.2754,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12227485328912735,
|
||
|
|
"step": 685,
|
||
|
|
"valid_targets_mean": 6554.2,
|
||
|
|
"valid_targets_min": 460
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5555555555555554,
|
||
|
|
"grad_norm": 0.46262512362964875,
|
||
|
|
"learning_rate": 3.206136074544754e-05,
|
||
|
|
"loss": 0.2632,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1247977614402771,
|
||
|
|
"step": 690,
|
||
|
|
"valid_targets_mean": 5935.9,
|
||
|
|
"valid_targets_min": 413
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.574074074074074,
|
||
|
|
"grad_norm": 0.577326499426772,
|
||
|
|
"learning_rate": 3.1913522506316396e-05,
|
||
|
|
"loss": 0.2688,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12780506908893585,
|
||
|
|
"step": 695,
|
||
|
|
"valid_targets_mean": 6527.5,
|
||
|
|
"valid_targets_min": 229
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5925925925925926,
|
||
|
|
"grad_norm": 0.5017879104054451,
|
||
|
|
"learning_rate": 3.17646683267869e-05,
|
||
|
|
"loss": 0.2683,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14587455987930298,
|
||
|
|
"step": 700,
|
||
|
|
"valid_targets_mean": 6552.1,
|
||
|
|
"valid_targets_min": 1068
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.611111111111111,
|
||
|
|
"grad_norm": 0.5078274256321608,
|
||
|
|
"learning_rate": 3.161481090058374e-05,
|
||
|
|
"loss": 0.2677,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13253484666347504,
|
||
|
|
"step": 705,
|
||
|
|
"valid_targets_mean": 7849.0,
|
||
|
|
"valid_targets_min": 5687
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6296296296296298,
|
||
|
|
"grad_norm": 0.48831962900628234,
|
||
|
|
"learning_rate": 3.146396300698467e-05,
|
||
|
|
"loss": 0.2603,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11534512788057327,
|
||
|
|
"step": 710,
|
||
|
|
"valid_targets_mean": 5541.4,
|
||
|
|
"valid_targets_min": 475
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.648148148148148,
|
||
|
|
"grad_norm": 1.7586484361894488,
|
||
|
|
"learning_rate": 3.1312137509730776e-05,
|
||
|
|
"loss": 0.2474,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10342179238796234,
|
||
|
|
"step": 715,
|
||
|
|
"valid_targets_mean": 4904.9,
|
||
|
|
"valid_targets_min": 3551
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6666666666666665,
|
||
|
|
"grad_norm": 0.5727289688315167,
|
||
|
|
"learning_rate": 3.115934735592954e-05,
|
||
|
|
"loss": 0.2506,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15417921543121338,
|
||
|
|
"step": 720,
|
||
|
|
"valid_targets_mean": 5637.8,
|
||
|
|
"valid_targets_min": 457
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.685185185185185,
|
||
|
|
"grad_norm": 0.5013909576024947,
|
||
|
|
"learning_rate": 3.10056055749507e-05,
|
||
|
|
"loss": 0.253,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12603460252285004,
|
||
|
|
"step": 725,
|
||
|
|
"valid_targets_mean": 7198.4,
|
||
|
|
"valid_targets_min": 4333
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7037037037037037,
|
||
|
|
"grad_norm": 0.5511286906952175,
|
||
|
|
"learning_rate": 3.0850925277315193e-05,
|
||
|
|
"loss": 0.2614,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1324155628681183,
|
||
|
|
"step": 730,
|
||
|
|
"valid_targets_mean": 6283.8,
|
||
|
|
"valid_targets_min": 501
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7222222222222223,
|
||
|
|
"grad_norm": 0.5435986623296872,
|
||
|
|
"learning_rate": 3.0695319653577116e-05,
|
||
|
|
"loss": 0.2538,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11377684026956558,
|
||
|
|
"step": 735,
|
||
|
|
"valid_targets_mean": 5828.4,
|
||
|
|
"valid_targets_min": 3486
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7407407407407405,
|
||
|
|
"grad_norm": 0.48998670607021577,
|
||
|
|
"learning_rate": 3.0538801973198914e-05,
|
||
|
|
"loss": 0.2583,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11242972314357758,
|
||
|
|
"step": 740,
|
||
|
|
"valid_targets_mean": 6409.4,
|
||
|
|
"valid_targets_min": 3687
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7592592592592595,
|
||
|
|
"grad_norm": 0.5181670834094368,
|
||
|
|
"learning_rate": 3.0381385583419783e-05,
|
||
|
|
"loss": 0.2762,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11965565383434296,
|
||
|
|
"step": 745,
|
||
|
|
"valid_targets_mean": 6171.4,
|
||
|
|
"valid_targets_min": 3598
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7777777777777777,
|
||
|
|
"grad_norm": 0.4892760482968971,
|
||
|
|
"learning_rate": 3.0223083908117466e-05,
|
||
|
|
"loss": 0.2551,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1174713522195816,
|
||
|
|
"step": 750,
|
||
|
|
"valid_targets_mean": 6405.0,
|
||
|
|
"valid_targets_min": 4515
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7962962962962963,
|
||
|
|
"grad_norm": 0.6775974514440799,
|
||
|
|
"learning_rate": 3.0063910446663542e-05,
|
||
|
|
"loss": 0.2613,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12394280731678009,
|
||
|
|
"step": 755,
|
||
|
|
"valid_targets_mean": 5689.2,
|
||
|
|
"valid_targets_min": 472
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.814814814814815,
|
||
|
|
"grad_norm": 0.48632532617444213,
|
||
|
|
"learning_rate": 2.9903878772772227e-05,
|
||
|
|
"loss": 0.2561,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12731653451919556,
|
||
|
|
"step": 760,
|
||
|
|
"valid_targets_mean": 7426.2,
|
||
|
|
"valid_targets_min": 3712
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8333333333333335,
|
||
|
|
"grad_norm": 0.46253818350114984,
|
||
|
|
"learning_rate": 2.9743002533342876e-05,
|
||
|
|
"loss": 0.2602,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11572107672691345,
|
||
|
|
"step": 765,
|
||
|
|
"valid_targets_mean": 7458.6,
|
||
|
|
"valid_targets_min": 3852
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.851851851851852,
|
||
|
|
"grad_norm": 0.5203069527342822,
|
||
|
|
"learning_rate": 2.9581295447296202e-05,
|
||
|
|
"loss": 0.262,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10665138065814972,
|
||
|
|
"step": 770,
|
||
|
|
"valid_targets_mean": 5312.9,
|
||
|
|
"valid_targets_min": 370
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8703703703703702,
|
||
|
|
"grad_norm": 0.508992244499264,
|
||
|
|
"learning_rate": 2.9418771304404408e-05,
|
||
|
|
"loss": 0.2529,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13962328433990479,
|
||
|
|
"step": 775,
|
||
|
|
"valid_targets_mean": 6861.0,
|
||
|
|
"valid_targets_min": 2838
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.888888888888889,
|
||
|
|
"grad_norm": 0.48349040691849704,
|
||
|
|
"learning_rate": 2.9255443964115217e-05,
|
||
|
|
"loss": 0.266,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1340981125831604,
|
||
|
|
"step": 780,
|
||
|
|
"valid_targets_mean": 7161.6,
|
||
|
|
"valid_targets_min": 3762
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9074074074074074,
|
||
|
|
"grad_norm": 0.47898223750895574,
|
||
|
|
"learning_rate": 2.9091327354370014e-05,
|
||
|
|
"loss": 0.2538,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13395154476165771,
|
||
|
|
"step": 785,
|
||
|
|
"valid_targets_mean": 6627.9,
|
||
|
|
"valid_targets_min": 69
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.925925925925926,
|
||
|
|
"grad_norm": 0.4541657970273562,
|
||
|
|
"learning_rate": 2.8926435470416123e-05,
|
||
|
|
"loss": 0.2639,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1160229966044426,
|
||
|
|
"step": 790,
|
||
|
|
"valid_targets_mean": 6603.0,
|
||
|
|
"valid_targets_min": 572
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9444444444444446,
|
||
|
|
"grad_norm": 0.5540368967272006,
|
||
|
|
"learning_rate": 2.8760782373613322e-05,
|
||
|
|
"loss": 0.2497,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13160249590873718,
|
||
|
|
"step": 795,
|
||
|
|
"valid_targets_mean": 6338.4,
|
||
|
|
"valid_targets_min": 411
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.962962962962963,
|
||
|
|
"grad_norm": 0.7359958376980468,
|
||
|
|
"learning_rate": 2.859438219023477e-05,
|
||
|
|
"loss": 0.2654,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15799115598201752,
|
||
|
|
"step": 800,
|
||
|
|
"valid_targets_mean": 5611.8,
|
||
|
|
"valid_targets_min": 273
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9814814814814814,
|
||
|
|
"grad_norm": 0.4824226714339216,
|
||
|
|
"learning_rate": 2.8427249110262346e-05,
|
||
|
|
"loss": 0.2508,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1053648293018341,
|
||
|
|
"step": 805,
|
||
|
|
"valid_targets_mean": 5927.5,
|
||
|
|
"valid_targets_min": 489
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0,
|
||
|
|
"grad_norm": 0.5382552432072585,
|
||
|
|
"learning_rate": 2.8259397386176616e-05,
|
||
|
|
"loss": 0.2581,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11860036849975586,
|
||
|
|
"step": 810,
|
||
|
|
"valid_targets_mean": 6269.6,
|
||
|
|
"valid_targets_min": 5439
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0185185185185186,
|
||
|
|
"grad_norm": 1.273808109588435,
|
||
|
|
"learning_rate": 2.809084133174139e-05,
|
||
|
|
"loss": 0.2365,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09889354556798935,
|
||
|
|
"step": 815,
|
||
|
|
"valid_targets_mean": 6494.2,
|
||
|
|
"valid_targets_min": 4860
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.037037037037037,
|
||
|
|
"grad_norm": 0.5296610389294772,
|
||
|
|
"learning_rate": 2.792159532078314e-05,
|
||
|
|
"loss": 0.2369,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1007370874285698,
|
||
|
|
"step": 820,
|
||
|
|
"valid_targets_mean": 6074.8,
|
||
|
|
"valid_targets_min": 3754
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0555555555555554,
|
||
|
|
"grad_norm": 0.5109827190620847,
|
||
|
|
"learning_rate": 2.775167378596522e-05,
|
||
|
|
"loss": 0.2452,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1191176027059555,
|
||
|
|
"step": 825,
|
||
|
|
"valid_targets_mean": 6418.1,
|
||
|
|
"valid_targets_min": 965
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.074074074074074,
|
||
|
|
"grad_norm": 0.5158436295503185,
|
||
|
|
"learning_rate": 2.7581091217557134e-05,
|
||
|
|
"loss": 0.2425,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13595688343048096,
|
||
|
|
"step": 830,
|
||
|
|
"valid_targets_mean": 6856.4,
|
||
|
|
"valid_targets_min": 2920
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0925925925925926,
|
||
|
|
"grad_norm": 0.5078354894585061,
|
||
|
|
"learning_rate": 2.740986216219884e-05,
|
||
|
|
"loss": 0.2413,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12502720952033997,
|
||
|
|
"step": 835,
|
||
|
|
"valid_targets_mean": 6669.6,
|
||
|
|
"valid_targets_min": 4233
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.111111111111111,
|
||
|
|
"grad_norm": 0.5054778272970494,
|
||
|
|
"learning_rate": 2.7238001221660257e-05,
|
||
|
|
"loss": 0.2398,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12242163717746735,
|
||
|
|
"step": 840,
|
||
|
|
"valid_targets_mean": 8300.9,
|
||
|
|
"valid_targets_min": 7173
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1296296296296298,
|
||
|
|
"grad_norm": 0.5126564174818152,
|
||
|
|
"learning_rate": 2.7065523051596114e-05,
|
||
|
|
"loss": 0.247,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12792211771011353,
|
||
|
|
"step": 845,
|
||
|
|
"valid_targets_mean": 6557.4,
|
||
|
|
"valid_targets_min": 1560
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.148148148148148,
|
||
|
|
"grad_norm": 0.6105982078938521,
|
||
|
|
"learning_rate": 2.6892442360296152e-05,
|
||
|
|
"loss": 0.2414,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12742607295513153,
|
||
|
|
"step": 850,
|
||
|
|
"valid_targets_mean": 6655.6,
|
||
|
|
"valid_targets_min": 2065
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1666666666666665,
|
||
|
|
"grad_norm": 0.49317723663403246,
|
||
|
|
"learning_rate": 2.6718773907430847e-05,
|
||
|
|
"loss": 0.2364,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11097238957881927,
|
||
|
|
"step": 855,
|
||
|
|
"valid_targets_mean": 6589.4,
|
||
|
|
"valid_targets_min": 250
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.185185185185185,
|
||
|
|
"grad_norm": 0.5015576290750604,
|
||
|
|
"learning_rate": 2.6544532502792778e-05,
|
||
|
|
"loss": 0.2467,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13182875514030457,
|
||
|
|
"step": 860,
|
||
|
|
"valid_targets_mean": 6727.2,
|
||
|
|
"valid_targets_min": 478
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2037037037037037,
|
||
|
|
"grad_norm": 0.5212344941398274,
|
||
|
|
"learning_rate": 2.6369733005033693e-05,
|
||
|
|
"loss": 0.2559,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13697285950183868,
|
||
|
|
"step": 865,
|
||
|
|
"valid_targets_mean": 7515.0,
|
||
|
|
"valid_targets_min": 5976
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2222222222222223,
|
||
|
|
"grad_norm": 0.5508648846761911,
|
||
|
|
"learning_rate": 2.6194390320397426e-05,
|
||
|
|
"loss": 0.2352,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09470212459564209,
|
||
|
|
"step": 870,
|
||
|
|
"valid_targets_mean": 6168.4,
|
||
|
|
"valid_targets_min": 306
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.240740740740741,
|
||
|
|
"grad_norm": 0.44089806796960995,
|
||
|
|
"learning_rate": 2.601851940144874e-05,
|
||
|
|
"loss": 0.2532,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09514069557189941,
|
||
|
|
"step": 875,
|
||
|
|
"valid_targets_mean": 7256.9,
|
||
|
|
"valid_targets_min": 5156
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.259259259259259,
|
||
|
|
"grad_norm": 0.9948233767091689,
|
||
|
|
"learning_rate": 2.5842135245798248e-05,
|
||
|
|
"loss": 0.2346,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11571880429983139,
|
||
|
|
"step": 880,
|
||
|
|
"valid_targets_mean": 7016.8,
|
||
|
|
"valid_targets_min": 488
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2777777777777777,
|
||
|
|
"grad_norm": 0.5404910292302445,
|
||
|
|
"learning_rate": 2.5665252894823436e-05,
|
||
|
|
"loss": 0.244,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1577197015285492,
|
||
|
|
"step": 885,
|
||
|
|
"valid_targets_mean": 7343.0,
|
||
|
|
"valid_targets_min": 5338
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2962962962962963,
|
||
|
|
"grad_norm": 0.47094356670247445,
|
||
|
|
"learning_rate": 2.5487887432386035e-05,
|
||
|
|
"loss": 0.2415,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13475333154201508,
|
||
|
|
"step": 890,
|
||
|
|
"valid_targets_mean": 7784.5,
|
||
|
|
"valid_targets_min": 6171
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.314814814814815,
|
||
|
|
"grad_norm": 0.5046387190727711,
|
||
|
|
"learning_rate": 2.531005398354569e-05,
|
||
|
|
"loss": 0.2566,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11572615802288055,
|
||
|
|
"step": 895,
|
||
|
|
"valid_targets_mean": 6777.4,
|
||
|
|
"valid_targets_min": 3828
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3333333333333335,
|
||
|
|
"grad_norm": 0.5357822466955011,
|
||
|
|
"learning_rate": 2.5131767713270174e-05,
|
||
|
|
"loss": 0.2357,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12361173331737518,
|
||
|
|
"step": 900,
|
||
|
|
"valid_targets_mean": 6211.9,
|
||
|
|
"valid_targets_min": 407
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.351851851851852,
|
||
|
|
"grad_norm": 0.503314513417158,
|
||
|
|
"learning_rate": 2.4953043825142164e-05,
|
||
|
|
"loss": 0.2383,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10359904170036316,
|
||
|
|
"step": 905,
|
||
|
|
"valid_targets_mean": 6026.4,
|
||
|
|
"valid_targets_min": 398
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3703703703703702,
|
||
|
|
"grad_norm": 0.5303301465182496,
|
||
|
|
"learning_rate": 2.477389756006276e-05,
|
||
|
|
"loss": 0.2406,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12895119190216064,
|
||
|
|
"step": 910,
|
||
|
|
"valid_targets_mean": 6841.1,
|
||
|
|
"valid_targets_min": 3687
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.388888888888889,
|
||
|
|
"grad_norm": 0.47181710270046207,
|
||
|
|
"learning_rate": 2.4594344194951748e-05,
|
||
|
|
"loss": 0.238,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13982756435871124,
|
||
|
|
"step": 915,
|
||
|
|
"valid_targets_mean": 7695.1,
|
||
|
|
"valid_targets_min": 6655
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4074074074074074,
|
||
|
|
"grad_norm": 0.6978123307943264,
|
||
|
|
"learning_rate": 2.4414399041444897e-05,
|
||
|
|
"loss": 0.2297,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12279509752988815,
|
||
|
|
"step": 920,
|
||
|
|
"valid_targets_mean": 5957.0,
|
||
|
|
"valid_targets_min": 424
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.425925925925926,
|
||
|
|
"grad_norm": 0.5103058494828053,
|
||
|
|
"learning_rate": 2.423407744458822e-05,
|
||
|
|
"loss": 0.2476,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10531405359506607,
|
||
|
|
"step": 925,
|
||
|
|
"valid_targets_mean": 6795.9,
|
||
|
|
"valid_targets_min": 3746
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4444444444444446,
|
||
|
|
"grad_norm": 0.4961262749151785,
|
||
|
|
"learning_rate": 2.405339478152938e-05,
|
||
|
|
"loss": 0.2424,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13874265551567078,
|
||
|
|
"step": 930,
|
||
|
|
"valid_targets_mean": 7975.0,
|
||
|
|
"valid_targets_min": 4268
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.462962962962963,
|
||
|
|
"grad_norm": 0.5574805174642966,
|
||
|
|
"learning_rate": 2.387236646020643e-05,
|
||
|
|
"loss": 0.2483,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12402337044477463,
|
||
|
|
"step": 935,
|
||
|
|
"valid_targets_mean": 8073.1,
|
||
|
|
"valid_targets_min": 3757
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4814814814814814,
|
||
|
|
"grad_norm": 0.5498492265628724,
|
||
|
|
"learning_rate": 2.3691007918033858e-05,
|
||
|
|
"loss": 0.2526,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1583220362663269,
|
||
|
|
"step": 940,
|
||
|
|
"valid_targets_mean": 6594.8,
|
||
|
|
"valid_targets_min": 4154
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5,
|
||
|
|
"grad_norm": 0.5001189338850895,
|
||
|
|
"learning_rate": 2.3509334620586127e-05,
|
||
|
|
"loss": 0.2445,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12958279252052307,
|
||
|
|
"step": 945,
|
||
|
|
"valid_targets_mean": 7600.1,
|
||
|
|
"valid_targets_min": 5250
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5185185185185186,
|
||
|
|
"grad_norm": 0.4807963923696486,
|
||
|
|
"learning_rate": 2.332736206027887e-05,
|
||
|
|
"loss": 0.2466,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10260862857103348,
|
||
|
|
"step": 950,
|
||
|
|
"valid_targets_mean": 6256.6,
|
||
|
|
"valid_targets_min": 3839
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.537037037037037,
|
||
|
|
"grad_norm": 0.5061807303605752,
|
||
|
|
"learning_rate": 2.314510575504771e-05,
|
||
|
|
"loss": 0.2406,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13543078303337097,
|
||
|
|
"step": 955,
|
||
|
|
"valid_targets_mean": 7313.0,
|
||
|
|
"valid_targets_min": 4655
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5555555555555554,
|
||
|
|
"grad_norm": 0.4561793894533891,
|
||
|
|
"learning_rate": 2.2962581247024983e-05,
|
||
|
|
"loss": 0.2394,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1189025267958641,
|
||
|
|
"step": 960,
|
||
|
|
"valid_targets_mean": 7561.6,
|
||
|
|
"valid_targets_min": 3970
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.574074074074074,
|
||
|
|
"grad_norm": 0.4825083581140084,
|
||
|
|
"learning_rate": 2.277980410121434e-05,
|
||
|
|
"loss": 0.2396,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10504335165023804,
|
||
|
|
"step": 965,
|
||
|
|
"valid_targets_mean": 6335.8,
|
||
|
|
"valid_targets_min": 2847
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5925925925925926,
|
||
|
|
"grad_norm": 0.4931411405781181,
|
||
|
|
"learning_rate": 2.2596789904163453e-05,
|
||
|
|
"loss": 0.2448,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13161906599998474,
|
||
|
|
"step": 970,
|
||
|
|
"valid_targets_mean": 7651.8,
|
||
|
|
"valid_targets_min": 4413
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.611111111111111,
|
||
|
|
"grad_norm": 0.7353648756929703,
|
||
|
|
"learning_rate": 2.2413554262634802e-05,
|
||
|
|
"loss": 0.24,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1295996904373169,
|
||
|
|
"step": 975,
|
||
|
|
"valid_targets_mean": 6932.1,
|
||
|
|
"valid_targets_min": 314
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6296296296296298,
|
||
|
|
"grad_norm": 0.4968603493634512,
|
||
|
|
"learning_rate": 2.223011280227485e-05,
|
||
|
|
"loss": 0.25,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11219480633735657,
|
||
|
|
"step": 980,
|
||
|
|
"valid_targets_mean": 6326.2,
|
||
|
|
"valid_targets_min": 255
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.648148148148148,
|
||
|
|
"grad_norm": 0.5295742966751648,
|
||
|
|
"learning_rate": 2.2046481166281496e-05,
|
||
|
|
"loss": 0.2451,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1228310838341713,
|
||
|
|
"step": 985,
|
||
|
|
"valid_targets_mean": 5468.6,
|
||
|
|
"valid_targets_min": 316
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6666666666666665,
|
||
|
|
"grad_norm": 0.48970756187215925,
|
||
|
|
"learning_rate": 2.1862675014070106e-05,
|
||
|
|
"loss": 0.2404,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12276136130094528,
|
||
|
|
"step": 990,
|
||
|
|
"valid_targets_mean": 7585.4,
|
||
|
|
"valid_targets_min": 3731
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.685185185185185,
|
||
|
|
"grad_norm": 0.5126550687549059,
|
||
|
|
"learning_rate": 2.1678710019938136e-05,
|
||
|
|
"loss": 0.2496,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11687671393156052,
|
||
|
|
"step": 995,
|
||
|
|
"valid_targets_mean": 5928.8,
|
||
|
|
"valid_targets_min": 2060
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7037037037037037,
|
||
|
|
"grad_norm": 0.5038883486908647,
|
||
|
|
"learning_rate": 2.149460187172849e-05,
|
||
|
|
"loss": 0.2418,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0843639075756073,
|
||
|
|
"step": 1000,
|
||
|
|
"valid_targets_mean": 4160.1,
|
||
|
|
"valid_targets_min": 470
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7222222222222223,
|
||
|
|
"grad_norm": 0.437278082428542,
|
||
|
|
"learning_rate": 2.1310366269491693e-05,
|
||
|
|
"loss": 0.245,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12892818450927734,
|
||
|
|
"step": 1005,
|
||
|
|
"valid_targets_mean": 7916.6,
|
||
|
|
"valid_targets_min": 3774
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7407407407407405,
|
||
|
|
"grad_norm": 0.7342847594893045,
|
||
|
|
"learning_rate": 2.1126018924147084e-05,
|
||
|
|
"loss": 0.2497,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1251305788755417,
|
||
|
|
"step": 1010,
|
||
|
|
"valid_targets_mean": 6748.9,
|
||
|
|
"valid_targets_min": 4396
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7592592592592595,
|
||
|
|
"grad_norm": 0.5411710794385421,
|
||
|
|
"learning_rate": 2.094157555614304e-05,
|
||
|
|
"loss": 0.2424,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13948220014572144,
|
||
|
|
"step": 1015,
|
||
|
|
"valid_targets_mean": 7209.6,
|
||
|
|
"valid_targets_min": 245
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7777777777777777,
|
||
|
|
"grad_norm": 0.4659496590666761,
|
||
|
|
"learning_rate": 2.0757051894116382e-05,
|
||
|
|
"loss": 0.2322,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12497460097074509,
|
||
|
|
"step": 1020,
|
||
|
|
"valid_targets_mean": 7786.8,
|
||
|
|
"valid_targets_min": 3204
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7962962962962963,
|
||
|
|
"grad_norm": 0.8396535698509019,
|
||
|
|
"learning_rate": 2.057246367355109e-05,
|
||
|
|
"loss": 0.2363,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13099229335784912,
|
||
|
|
"step": 1025,
|
||
|
|
"valid_targets_mean": 6280.6,
|
||
|
|
"valid_targets_min": 450
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.814814814814815,
|
||
|
|
"grad_norm": 0.49954658133269225,
|
||
|
|
"learning_rate": 2.038782663543649e-05,
|
||
|
|
"loss": 0.2489,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14834380149841309,
|
||
|
|
"step": 1030,
|
||
|
|
"valid_targets_mean": 7883.1,
|
||
|
|
"valid_targets_min": 5303
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8333333333333335,
|
||
|
|
"grad_norm": 0.5628070531489748,
|
||
|
|
"learning_rate": 2.0203156524924847e-05,
|
||
|
|
"loss": 0.2434,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15435266494750977,
|
||
|
|
"step": 1035,
|
||
|
|
"valid_targets_mean": 7055.5,
|
||
|
|
"valid_targets_min": 4829
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.851851851851852,
|
||
|
|
"grad_norm": 0.5299852216946729,
|
||
|
|
"learning_rate": 2.0018469089988723e-05,
|
||
|
|
"loss": 0.2526,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17090864479541779,
|
||
|
|
"step": 1040,
|
||
|
|
"valid_targets_mean": 7314.8,
|
||
|
|
"valid_targets_min": 3083
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8703703703703702,
|
||
|
|
"grad_norm": 0.5063447667820846,
|
||
|
|
"learning_rate": 1.9833780080078063e-05,
|
||
|
|
"loss": 0.2437,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13567772507667542,
|
||
|
|
"step": 1045,
|
||
|
|
"valid_targets_mean": 7075.2,
|
||
|
|
"valid_targets_min": 3665
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.888888888888889,
|
||
|
|
"grad_norm": 0.5295998490010958,
|
||
|
|
"learning_rate": 1.9649105244777097e-05,
|
||
|
|
"loss": 0.2391,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12415409088134766,
|
||
|
|
"step": 1050,
|
||
|
|
"valid_targets_mean": 6369.9,
|
||
|
|
"valid_targets_min": 332
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9074074074074074,
|
||
|
|
"grad_norm": 0.5103141821329165,
|
||
|
|
"learning_rate": 1.946446033246132e-05,
|
||
|
|
"loss": 0.246,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13250163197517395,
|
||
|
|
"step": 1055,
|
||
|
|
"valid_targets_mean": 6484.8,
|
||
|
|
"valid_targets_min": 4386
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.925925925925926,
|
||
|
|
"grad_norm": 0.46646251978801523,
|
||
|
|
"learning_rate": 1.927986108895448e-05,
|
||
|
|
"loss": 0.2446,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11562138795852661,
|
||
|
|
"step": 1060,
|
||
|
|
"valid_targets_mean": 7588.4,
|
||
|
|
"valid_targets_min": 5279
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9444444444444446,
|
||
|
|
"grad_norm": 0.48394668574105776,
|
||
|
|
"learning_rate": 1.9095323256185877e-05,
|
||
|
|
"loss": 0.2449,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12256023287773132,
|
||
|
|
"step": 1065,
|
||
|
|
"valid_targets_mean": 6316.8,
|
||
|
|
"valid_targets_min": 1402
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.962962962962963,
|
||
|
|
"grad_norm": 0.48521748020882943,
|
||
|
|
"learning_rate": 1.8910862570847936e-05,
|
||
|
|
"loss": 0.2456,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12952281534671783,
|
||
|
|
"step": 1070,
|
||
|
|
"valid_targets_mean": 7025.2,
|
||
|
|
"valid_targets_min": 4953
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9814814814814814,
|
||
|
|
"grad_norm": 0.49338916425063034,
|
||
|
|
"learning_rate": 1.872649476305423e-05,
|
||
|
|
"loss": 0.2298,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11373429000377655,
|
||
|
|
"step": 1075,
|
||
|
|
"valid_targets_mean": 5696.2,
|
||
|
|
"valid_targets_min": 498
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.0,
|
||
|
|
"grad_norm": 0.5727656140824072,
|
||
|
|
"learning_rate": 1.8542235554998097e-05,
|
||
|
|
"loss": 0.2552,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17409956455230713,
|
||
|
|
"step": 1080,
|
||
|
|
"valid_targets_mean": 8495.8,
|
||
|
|
"valid_targets_min": 598
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.018518518518518,
|
||
|
|
"grad_norm": 0.516032233014235,
|
||
|
|
"learning_rate": 1.835810065961189e-05,
|
||
|
|
"loss": 0.2343,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11870207637548447,
|
||
|
|
"step": 1085,
|
||
|
|
"valid_targets_mean": 7494.0,
|
||
|
|
"valid_targets_min": 5474
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.037037037037037,
|
||
|
|
"grad_norm": 0.5919098251042874,
|
||
|
|
"learning_rate": 1.8174105779227038e-05,
|
||
|
|
"loss": 0.222,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10515961796045303,
|
||
|
|
"step": 1090,
|
||
|
|
"valid_targets_mean": 5810.2,
|
||
|
|
"valid_targets_min": 810
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.055555555555555,
|
||
|
|
"grad_norm": 0.5018483993286512,
|
||
|
|
"learning_rate": 1.799026660423503e-05,
|
||
|
|
"loss": 0.2351,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10188450664281845,
|
||
|
|
"step": 1095,
|
||
|
|
"valid_targets_mean": 7080.5,
|
||
|
|
"valid_targets_min": 4348
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.074074074074074,
|
||
|
|
"grad_norm": 0.5299673754548403,
|
||
|
|
"learning_rate": 1.780659881174937e-05,
|
||
|
|
"loss": 0.2238,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13417434692382812,
|
||
|
|
"step": 1100,
|
||
|
|
"valid_targets_mean": 6985.2,
|
||
|
|
"valid_targets_min": 3872
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.092592592592593,
|
||
|
|
"grad_norm": 0.5130673298943274,
|
||
|
|
"learning_rate": 1.7623118064268726e-05,
|
||
|
|
"loss": 0.2307,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09408153593540192,
|
||
|
|
"step": 1105,
|
||
|
|
"valid_targets_mean": 6263.2,
|
||
|
|
"valid_targets_min": 3473
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.111111111111111,
|
||
|
|
"grad_norm": 0.5462228358022995,
|
||
|
|
"learning_rate": 1.743984000834126e-05,
|
||
|
|
"loss": 0.2286,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09989724308252335,
|
||
|
|
"step": 1110,
|
||
|
|
"valid_targets_mean": 5982.1,
|
||
|
|
"valid_targets_min": 475
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.12962962962963,
|
||
|
|
"grad_norm": 0.5988706825678385,
|
||
|
|
"learning_rate": 1.7256780273230358e-05,
|
||
|
|
"loss": 0.2255,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10022996366024017,
|
||
|
|
"step": 1115,
|
||
|
|
"valid_targets_mean": 5157.0,
|
||
|
|
"valid_targets_min": 451
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.148148148148148,
|
||
|
|
"grad_norm": 0.48816249519586924,
|
||
|
|
"learning_rate": 1.707395446958183e-05,
|
||
|
|
"loss": 0.2304,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10353829711675644,
|
||
|
|
"step": 1120,
|
||
|
|
"valid_targets_mean": 6314.1,
|
||
|
|
"valid_targets_min": 423
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.166666666666667,
|
||
|
|
"grad_norm": 0.49451693646994704,
|
||
|
|
"learning_rate": 1.6891378188092694e-05,
|
||
|
|
"loss": 0.2417,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12381202727556229,
|
||
|
|
"step": 1125,
|
||
|
|
"valid_targets_mean": 7282.1,
|
||
|
|
"valid_targets_min": 3336
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.185185185185185,
|
||
|
|
"grad_norm": 0.5087635256864471,
|
||
|
|
"learning_rate": 1.6709066998181653e-05,
|
||
|
|
"loss": 0.2264,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12461232393980026,
|
||
|
|
"step": 1130,
|
||
|
|
"valid_targets_mean": 6933.9,
|
||
|
|
"valid_targets_min": 3762
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.203703703703703,
|
||
|
|
"grad_norm": 0.5084569033634785,
|
||
|
|
"learning_rate": 1.6527036446661396e-05,
|
||
|
|
"loss": 0.239,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15918824076652527,
|
||
|
|
"step": 1135,
|
||
|
|
"valid_targets_mean": 7666.1,
|
||
|
|
"valid_targets_min": 442
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.222222222222222,
|
||
|
|
"grad_norm": 0.5017372904791324,
|
||
|
|
"learning_rate": 1.634530205641283e-05,
|
||
|
|
"loss": 0.235,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1275438666343689,
|
||
|
|
"step": 1140,
|
||
|
|
"valid_targets_mean": 7447.4,
|
||
|
|
"valid_targets_min": 5245
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.2407407407407405,
|
||
|
|
"grad_norm": 0.5206246639358723,
|
||
|
|
"learning_rate": 1.616387932506135e-05,
|
||
|
|
"loss": 0.2252,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10627258569002151,
|
||
|
|
"step": 1145,
|
||
|
|
"valid_targets_mean": 6473.4,
|
||
|
|
"valid_targets_min": 102
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.2592592592592595,
|
||
|
|
"grad_norm": 0.4977541341325297,
|
||
|
|
"learning_rate": 1.5982783723655225e-05,
|
||
|
|
"loss": 0.2333,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08717834949493408,
|
||
|
|
"step": 1150,
|
||
|
|
"valid_targets_mean": 6253.9,
|
||
|
|
"valid_targets_min": 2602
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.277777777777778,
|
||
|
|
"grad_norm": 0.5023452631707405,
|
||
|
|
"learning_rate": 1.580203069534634e-05,
|
||
|
|
"loss": 0.2325,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12174762040376663,
|
||
|
|
"step": 1155,
|
||
|
|
"valid_targets_mean": 6748.9,
|
||
|
|
"valid_targets_min": 2016
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.296296296296296,
|
||
|
|
"grad_norm": 0.5279594814422506,
|
||
|
|
"learning_rate": 1.5621635654073216e-05,
|
||
|
|
"loss": 0.2374,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12284161150455475,
|
||
|
|
"step": 1160,
|
||
|
|
"valid_targets_mean": 6711.1,
|
||
|
|
"valid_targets_min": 4902
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.314814814814815,
|
||
|
|
"grad_norm": 0.49117787226016985,
|
||
|
|
"learning_rate": 1.5441613983246606e-05,
|
||
|
|
"loss": 0.2316,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1301436871290207,
|
||
|
|
"step": 1165,
|
||
|
|
"valid_targets_mean": 6814.2,
|
||
|
|
"valid_targets_min": 5645
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.333333333333333,
|
||
|
|
"grad_norm": 0.5240995867758355,
|
||
|
|
"learning_rate": 1.5261981034437617e-05,
|
||
|
|
"loss": 0.2275,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10003925859928131,
|
||
|
|
"step": 1170,
|
||
|
|
"valid_targets_mean": 5936.5,
|
||
|
|
"valid_targets_min": 1476
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.351851851851852,
|
||
|
|
"grad_norm": 0.4887431417522901,
|
||
|
|
"learning_rate": 1.508275212606862e-05,
|
||
|
|
"loss": 0.2295,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.126789391040802,
|
||
|
|
"step": 1175,
|
||
|
|
"valid_targets_mean": 7479.0,
|
||
|
|
"valid_targets_min": 2546
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.37037037037037,
|
||
|
|
"grad_norm": 0.5010137089706221,
|
||
|
|
"learning_rate": 1.490394254210691e-05,
|
||
|
|
"loss": 0.2247,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11638576537370682,
|
||
|
|
"step": 1180,
|
||
|
|
"valid_targets_mean": 7829.9,
|
||
|
|
"valid_targets_min": 6242
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.388888888888889,
|
||
|
|
"grad_norm": 0.5302823871361698,
|
||
|
|
"learning_rate": 1.4725567530761402e-05,
|
||
|
|
"loss": 0.2335,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09980429708957672,
|
||
|
|
"step": 1185,
|
||
|
|
"valid_targets_mean": 6296.0,
|
||
|
|
"valid_targets_min": 1823
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.407407407407407,
|
||
|
|
"grad_norm": 0.48938232381512503,
|
||
|
|
"learning_rate": 1.4547642303182282e-05,
|
||
|
|
"loss": 0.2324,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12234029173851013,
|
||
|
|
"step": 1190,
|
||
|
|
"valid_targets_mean": 8091.8,
|
||
|
|
"valid_targets_min": 5562
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.425925925925926,
|
||
|
|
"grad_norm": 0.5031522973767952,
|
||
|
|
"learning_rate": 1.4370182032163861e-05,
|
||
|
|
"loss": 0.2177,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07054075598716736,
|
||
|
|
"step": 1195,
|
||
|
|
"valid_targets_mean": 4125.9,
|
||
|
|
"valid_targets_min": 392
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.444444444444445,
|
||
|
|
"grad_norm": 0.5018613158088705,
|
||
|
|
"learning_rate": 1.4193201850850717e-05,
|
||
|
|
"loss": 0.2363,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10027652978897095,
|
||
|
|
"step": 1200,
|
||
|
|
"valid_targets_mean": 6186.6,
|
||
|
|
"valid_targets_min": 860
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.462962962962963,
|
||
|
|
"grad_norm": 0.4856214263792827,
|
||
|
|
"learning_rate": 1.4016716851447173e-05,
|
||
|
|
"loss": 0.2303,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11477172374725342,
|
||
|
|
"step": 1205,
|
||
|
|
"valid_targets_mean": 7052.6,
|
||
|
|
"valid_targets_min": 4152
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.481481481481482,
|
||
|
|
"grad_norm": 0.4874276605163382,
|
||
|
|
"learning_rate": 1.3840742083930297e-05,
|
||
|
|
"loss": 0.2218,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10933384299278259,
|
||
|
|
"step": 1210,
|
||
|
|
"valid_targets_mean": 8173.6,
|
||
|
|
"valid_targets_min": 4973
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.5,
|
||
|
|
"grad_norm": 0.531455676384898,
|
||
|
|
"learning_rate": 1.3665292554766513e-05,
|
||
|
|
"loss": 0.2285,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11772537231445312,
|
||
|
|
"step": 1215,
|
||
|
|
"valid_targets_mean": 5933.6,
|
||
|
|
"valid_targets_min": 357
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.518518518518518,
|
||
|
|
"grad_norm": 0.5372681406699098,
|
||
|
|
"learning_rate": 1.3490383225631885e-05,
|
||
|
|
"loss": 0.221,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10665614157915115,
|
||
|
|
"step": 1220,
|
||
|
|
"valid_targets_mean": 7539.6,
|
||
|
|
"valid_targets_min": 4008
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.537037037037037,
|
||
|
|
"grad_norm": 0.5211115545139049,
|
||
|
|
"learning_rate": 1.3316029012136251e-05,
|
||
|
|
"loss": 0.2344,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12121891230344772,
|
||
|
|
"step": 1225,
|
||
|
|
"valid_targets_mean": 7162.0,
|
||
|
|
"valid_targets_min": 3992
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.555555555555555,
|
||
|
|
"grad_norm": 0.46584465301969513,
|
||
|
|
"learning_rate": 1.314224478255128e-05,
|
||
|
|
"loss": 0.2276,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11857609450817108,
|
||
|
|
"step": 1230,
|
||
|
|
"valid_targets_mean": 6463.0,
|
||
|
|
"valid_targets_min": 1340
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.574074074074074,
|
||
|
|
"grad_norm": 0.4588439125962153,
|
||
|
|
"learning_rate": 1.2969045356542558e-05,
|
||
|
|
"loss": 0.2279,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10397493839263916,
|
||
|
|
"step": 1235,
|
||
|
|
"valid_targets_mean": 7388.2,
|
||
|
|
"valid_targets_min": 4077
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.592592592592593,
|
||
|
|
"grad_norm": 0.50171375299644,
|
||
|
|
"learning_rate": 1.2796445503905797e-05,
|
||
|
|
"loss": 0.2221,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.119759202003479,
|
||
|
|
"step": 1240,
|
||
|
|
"valid_targets_mean": 7190.8,
|
||
|
|
"valid_targets_min": 1976
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.611111111111111,
|
||
|
|
"grad_norm": 0.5252807258839242,
|
||
|
|
"learning_rate": 1.2624459943307378e-05,
|
||
|
|
"loss": 0.2246,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10422271490097046,
|
||
|
|
"step": 1245,
|
||
|
|
"valid_targets_mean": 6359.0,
|
||
|
|
"valid_targets_min": 4126
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.62962962962963,
|
||
|
|
"grad_norm": 0.4591959160558025,
|
||
|
|
"learning_rate": 1.2453103341029154e-05,
|
||
|
|
"loss": 0.2314,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13636991381645203,
|
||
|
|
"step": 1250,
|
||
|
|
"valid_targets_mean": 7624.4,
|
||
|
|
"valid_targets_min": 4176
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.648148148148148,
|
||
|
|
"grad_norm": 0.5397510993994119,
|
||
|
|
"learning_rate": 1.2282390309717776e-05,
|
||
|
|
"loss": 0.2255,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10083657503128052,
|
||
|
|
"step": 1255,
|
||
|
|
"valid_targets_mean": 5491.2,
|
||
|
|
"valid_targets_min": 231
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.666666666666667,
|
||
|
|
"grad_norm": 0.47818583241901386,
|
||
|
|
"learning_rate": 1.2112335407138582e-05,
|
||
|
|
"loss": 0.2246,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11500853300094604,
|
||
|
|
"step": 1260,
|
||
|
|
"valid_targets_mean": 6869.8,
|
||
|
|
"valid_targets_min": 4280
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.685185185185185,
|
||
|
|
"grad_norm": 0.6028087003564473,
|
||
|
|
"learning_rate": 1.1942953134934185e-05,
|
||
|
|
"loss": 0.2303,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1319492757320404,
|
||
|
|
"step": 1265,
|
||
|
|
"valid_targets_mean": 5306.4,
|
||
|
|
"valid_targets_min": 3887
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.703703703703704,
|
||
|
|
"grad_norm": 0.487471515961342,
|
||
|
|
"learning_rate": 1.1774257937387774e-05,
|
||
|
|
"loss": 0.2241,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10458207130432129,
|
||
|
|
"step": 1270,
|
||
|
|
"valid_targets_mean": 6610.9,
|
||
|
|
"valid_targets_min": 3766
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.722222222222222,
|
||
|
|
"grad_norm": 0.5162656468076039,
|
||
|
|
"learning_rate": 1.160626420019142e-05,
|
||
|
|
"loss": 0.2373,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14040455222129822,
|
||
|
|
"step": 1275,
|
||
|
|
"valid_targets_mean": 7369.4,
|
||
|
|
"valid_targets_min": 1723
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.7407407407407405,
|
||
|
|
"grad_norm": 0.4992171347071927,
|
||
|
|
"learning_rate": 1.1438986249219292e-05,
|
||
|
|
"loss": 0.2274,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1111883670091629,
|
||
|
|
"step": 1280,
|
||
|
|
"valid_targets_mean": 7044.2,
|
||
|
|
"valid_targets_min": 1730
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.7592592592592595,
|
||
|
|
"grad_norm": 0.4980112243313627,
|
||
|
|
"learning_rate": 1.1272438349305996e-05,
|
||
|
|
"loss": 0.2216,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1132529079914093,
|
||
|
|
"step": 1285,
|
||
|
|
"valid_targets_mean": 6436.4,
|
||
|
|
"valid_targets_min": 4369
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.777777777777778,
|
||
|
|
"grad_norm": 0.4991126681737921,
|
||
|
|
"learning_rate": 1.1106634703030132e-05,
|
||
|
|
"loss": 0.227,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13967010378837585,
|
||
|
|
"step": 1290,
|
||
|
|
"valid_targets_mean": 8305.5,
|
||
|
|
"valid_targets_min": 5394
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.796296296296296,
|
||
|
|
"grad_norm": 0.5155034087864115,
|
||
|
|
"learning_rate": 1.0941589449503152e-05,
|
||
|
|
"loss": 0.2262,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10799385607242584,
|
||
|
|
"step": 1295,
|
||
|
|
"valid_targets_mean": 6244.2,
|
||
|
|
"valid_targets_min": 413
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.814814814814815,
|
||
|
|
"grad_norm": 0.4522833911832173,
|
||
|
|
"learning_rate": 1.0777316663163604e-05,
|
||
|
|
"loss": 0.2297,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10367443412542343,
|
||
|
|
"step": 1300,
|
||
|
|
"valid_targets_mean": 7749.2,
|
||
|
|
"valid_targets_min": 3820
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.833333333333333,
|
||
|
|
"grad_norm": 0.4560347865249111,
|
||
|
|
"learning_rate": 1.061383035257697e-05,
|
||
|
|
"loss": 0.2308,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12242156267166138,
|
||
|
|
"step": 1305,
|
||
|
|
"valid_targets_mean": 7613.8,
|
||
|
|
"valid_targets_min": 5903
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.851851851851852,
|
||
|
|
"grad_norm": 0.477804618520794,
|
||
|
|
"learning_rate": 1.0451144459241021e-05,
|
||
|
|
"loss": 0.2257,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12609726190567017,
|
||
|
|
"step": 1310,
|
||
|
|
"valid_targets_mean": 7283.4,
|
||
|
|
"valid_targets_min": 4187
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.87037037037037,
|
||
|
|
"grad_norm": 0.4848059755823838,
|
||
|
|
"learning_rate": 1.0289272856396954e-05,
|
||
|
|
"loss": 0.2334,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12737436592578888,
|
||
|
|
"step": 1315,
|
||
|
|
"valid_targets_mean": 6414.9,
|
||
|
|
"valid_targets_min": 3065
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.888888888888889,
|
||
|
|
"grad_norm": 0.5169709458077109,
|
||
|
|
"learning_rate": 1.0128229347846348e-05,
|
||
|
|
"loss": 0.2326,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09037278592586517,
|
||
|
|
"step": 1320,
|
||
|
|
"valid_targets_mean": 5083.1,
|
||
|
|
"valid_targets_min": 2463
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.907407407407407,
|
||
|
|
"grad_norm": 0.4691072483470426,
|
||
|
|
"learning_rate": 9.968027666774005e-06,
|
||
|
|
"loss": 0.2232,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11444838345050812,
|
||
|
|
"step": 1325,
|
||
|
|
"valid_targets_mean": 6681.6,
|
||
|
|
"valid_targets_min": 4390
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.925925925925926,
|
||
|
|
"grad_norm": 0.5394803424881776,
|
||
|
|
"learning_rate": 9.80868147457683e-06,
|
||
|
|
"loss": 0.2365,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14064496755599976,
|
||
|
|
"step": 1330,
|
||
|
|
"valid_targets_mean": 7132.9,
|
||
|
|
"valid_targets_min": 4613
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.944444444444445,
|
||
|
|
"grad_norm": 0.519246694995982,
|
||
|
|
"learning_rate": 9.650204359698884e-06,
|
||
|
|
"loss": 0.2364,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10239046812057495,
|
||
|
|
"step": 1335,
|
||
|
|
"valid_targets_mean": 5749.8,
|
||
|
|
"valid_targets_min": 407
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.962962962962963,
|
||
|
|
"grad_norm": 0.5251939867353143,
|
||
|
|
"learning_rate": 9.492609836472563e-06,
|
||
|
|
"loss": 0.2264,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1245264858007431,
|
||
|
|
"step": 1340,
|
||
|
|
"valid_targets_mean": 8139.8,
|
||
|
|
"valid_targets_min": 304
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.981481481481482,
|
||
|
|
"grad_norm": 0.4841966754669591,
|
||
|
|
"learning_rate": 9.33591134396618e-06,
|
||
|
|
"loss": 0.2369,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09950557351112366,
|
||
|
|
"step": 1345,
|
||
|
|
"valid_targets_mean": 5948.6,
|
||
|
|
"valid_targets_min": 1240
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.0,
|
||
|
|
"grad_norm": 0.46635316251229963,
|
||
|
|
"learning_rate": 9.180122244837893e-06,
|
||
|
|
"loss": 0.2187,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09984344244003296,
|
||
|
|
"step": 1350,
|
||
|
|
"valid_targets_mean": 6878.8,
|
||
|
|
"valid_targets_min": 4793
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.018518518518518,
|
||
|
|
"grad_norm": 0.4917225509060581,
|
||
|
|
"learning_rate": 9.025255824196234e-06,
|
||
|
|
"loss": 0.2078,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1116233840584755,
|
||
|
|
"step": 1355,
|
||
|
|
"valid_targets_mean": 6953.8,
|
||
|
|
"valid_targets_min": 3714
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.037037037037037,
|
||
|
|
"grad_norm": 0.5096642938537449,
|
||
|
|
"learning_rate": 8.871325288467188e-06,
|
||
|
|
"loss": 0.2184,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1091044619679451,
|
||
|
|
"step": 1360,
|
||
|
|
"valid_targets_mean": 6872.6,
|
||
|
|
"valid_targets_min": 3982
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.055555555555555,
|
||
|
|
"grad_norm": 0.5130910816904468,
|
||
|
|
"learning_rate": 8.718343764267967e-06,
|
||
|
|
"loss": 0.2218,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08576367050409317,
|
||
|
|
"step": 1365,
|
||
|
|
"valid_targets_mean": 5957.0,
|
||
|
|
"valid_targets_min": 321
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.074074074074074,
|
||
|
|
"grad_norm": 0.506747302079751,
|
||
|
|
"learning_rate": 8.566324297287674e-06,
|
||
|
|
"loss": 0.2212,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14150217175483704,
|
||
|
|
"step": 1370,
|
||
|
|
"valid_targets_mean": 7874.0,
|
||
|
|
"valid_targets_min": 4982
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.092592592592593,
|
||
|
|
"grad_norm": 0.5233616721490927,
|
||
|
|
"learning_rate": 8.41527985117478e-06,
|
||
|
|
"loss": 0.2303,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09984303265810013,
|
||
|
|
"step": 1375,
|
||
|
|
"valid_targets_mean": 5710.6,
|
||
|
|
"valid_targets_min": 471
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.111111111111111,
|
||
|
|
"grad_norm": 0.5312752630451189,
|
||
|
|
"learning_rate": 8.265223306431644e-06,
|
||
|
|
"loss": 0.2156,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10283049941062927,
|
||
|
|
"step": 1380,
|
||
|
|
"valid_targets_mean": 5761.2,
|
||
|
|
"valid_targets_min": 427
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.12962962962963,
|
||
|
|
"grad_norm": 0.5693784646821431,
|
||
|
|
"learning_rate": 8.116167459316116e-06,
|
||
|
|
"loss": 0.2305,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12732897698879242,
|
||
|
|
"step": 1385,
|
||
|
|
"valid_targets_mean": 7584.5,
|
||
|
|
"valid_targets_min": 5131
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.148148148148148,
|
||
|
|
"grad_norm": 0.5027220474125025,
|
||
|
|
"learning_rate": 7.96812502075031e-06,
|
||
|
|
"loss": 0.2249,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11553220450878143,
|
||
|
|
"step": 1390,
|
||
|
|
"valid_targets_mean": 6780.9,
|
||
|
|
"valid_targets_min": 368
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.166666666666667,
|
||
|
|
"grad_norm": 0.6619482906498753,
|
||
|
|
"learning_rate": 7.821108615236663e-06,
|
||
|
|
"loss": 0.22,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11507928371429443,
|
||
|
|
"step": 1395,
|
||
|
|
"valid_targets_mean": 6894.5,
|
||
|
|
"valid_targets_min": 4617
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.185185185185185,
|
||
|
|
"grad_norm": 0.591868727798189,
|
||
|
|
"learning_rate": 7.675130779781385e-06,
|
||
|
|
"loss": 0.2169,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10910572856664658,
|
||
|
|
"step": 1400,
|
||
|
|
"valid_targets_mean": 6408.5,
|
||
|
|
"valid_targets_min": 3807
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.203703703703703,
|
||
|
|
"grad_norm": 0.5126683283711245,
|
||
|
|
"learning_rate": 7.530203962825331e-06,
|
||
|
|
"loss": 0.2166,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10854263603687286,
|
||
|
|
"step": 1405,
|
||
|
|
"valid_targets_mean": 7869.9,
|
||
|
|
"valid_targets_min": 6327
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.222222222222222,
|
||
|
|
"grad_norm": 0.5236210165162991,
|
||
|
|
"learning_rate": 7.386340523182451e-06,
|
||
|
|
"loss": 0.2248,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.127744659781456,
|
||
|
|
"step": 1410,
|
||
|
|
"valid_targets_mean": 6906.0,
|
||
|
|
"valid_targets_min": 2789
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.2407407407407405,
|
||
|
|
"grad_norm": 0.5593021034033383,
|
||
|
|
"learning_rate": 7.243552728985879e-06,
|
||
|
|
"loss": 0.2182,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11911047995090485,
|
||
|
|
"step": 1415,
|
||
|
|
"valid_targets_mean": 6434.8,
|
||
|
|
"valid_targets_min": 3960
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.2592592592592595,
|
||
|
|
"grad_norm": 0.49741040737686926,
|
||
|
|
"learning_rate": 7.1018527566417535e-06,
|
||
|
|
"loss": 0.2255,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.093578040599823,
|
||
|
|
"step": 1420,
|
||
|
|
"valid_targets_mean": 5445.6,
|
||
|
|
"valid_targets_min": 347
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.277777777777778,
|
||
|
|
"grad_norm": 0.50446609238118,
|
||
|
|
"learning_rate": 6.961252689790836e-06,
|
||
|
|
"loss": 0.2298,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12511374056339264,
|
||
|
|
"step": 1425,
|
||
|
|
"valid_targets_mean": 7610.8,
|
||
|
|
"valid_targets_min": 3551
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.296296296296296,
|
||
|
|
"grad_norm": 0.5170007137165736,
|
||
|
|
"learning_rate": 6.821764518278109e-06,
|
||
|
|
"loss": 0.211,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10103592276573181,
|
||
|
|
"step": 1430,
|
||
|
|
"valid_targets_mean": 5218.6,
|
||
|
|
"valid_targets_min": 402
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.314814814814815,
|
||
|
|
"grad_norm": 0.49515563805483387,
|
||
|
|
"learning_rate": 6.6834001371302874e-06,
|
||
|
|
"loss": 0.2123,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11039568483829498,
|
||
|
|
"step": 1435,
|
||
|
|
"valid_targets_mean": 7072.1,
|
||
|
|
"valid_targets_min": 4250
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.333333333333333,
|
||
|
|
"grad_norm": 0.45720233551933753,
|
||
|
|
"learning_rate": 6.546171345541474e-06,
|
||
|
|
"loss": 0.2261,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10742579400539398,
|
||
|
|
"step": 1440,
|
||
|
|
"valid_targets_mean": 7819.8,
|
||
|
|
"valid_targets_min": 5542
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.351851851851852,
|
||
|
|
"grad_norm": 0.4933421472302054,
|
||
|
|
"learning_rate": 6.410089845866969e-06,
|
||
|
|
"loss": 0.2222,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09236937761306763,
|
||
|
|
"step": 1445,
|
||
|
|
"valid_targets_mean": 7047.8,
|
||
|
|
"valid_targets_min": 5584
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.37037037037037,
|
||
|
|
"grad_norm": 0.46977639240502755,
|
||
|
|
"learning_rate": 6.275167242625331e-06,
|
||
|
|
"loss": 0.2178,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12316617369651794,
|
||
|
|
"step": 1450,
|
||
|
|
"valid_targets_mean": 8225.5,
|
||
|
|
"valid_targets_min": 6143
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.388888888888889,
|
||
|
|
"grad_norm": 0.4919487785546802,
|
||
|
|
"learning_rate": 6.141415041508774e-06,
|
||
|
|
"loss": 0.2162,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10855259001255035,
|
||
|
|
"step": 1455,
|
||
|
|
"valid_targets_mean": 6035.5,
|
||
|
|
"valid_targets_min": 387
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.407407407407407,
|
||
|
|
"grad_norm": 0.48842724784791386,
|
||
|
|
"learning_rate": 6.008844648402037e-06,
|
||
|
|
"loss": 0.227,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1209472045302391,
|
||
|
|
"step": 1460,
|
||
|
|
"valid_targets_mean": 7274.0,
|
||
|
|
"valid_targets_min": 335
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.425925925925926,
|
||
|
|
"grad_norm": 0.5168122874383904,
|
||
|
|
"learning_rate": 5.877467368409711e-06,
|
||
|
|
"loss": 0.2231,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10533533990383148,
|
||
|
|
"step": 1465,
|
||
|
|
"valid_targets_mean": 6178.0,
|
||
|
|
"valid_targets_min": 1402
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.444444444444445,
|
||
|
|
"grad_norm": 0.4938494111570672,
|
||
|
|
"learning_rate": 5.74729440489219e-06,
|
||
|
|
"loss": 0.2105,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10273820906877518,
|
||
|
|
"step": 1470,
|
||
|
|
"valid_targets_mean": 7238.5,
|
||
|
|
"valid_targets_min": 4875
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.462962962962963,
|
||
|
|
"grad_norm": 0.4908203224021245,
|
||
|
|
"learning_rate": 5.61833685851028e-06,
|
||
|
|
"loss": 0.2124,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1013522744178772,
|
||
|
|
"step": 1475,
|
||
|
|
"valid_targets_mean": 6711.8,
|
||
|
|
"valid_targets_min": 5280
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.481481481481482,
|
||
|
|
"grad_norm": 0.6323153682269085,
|
||
|
|
"learning_rate": 5.490605726278602e-06,
|
||
|
|
"loss": 0.2193,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09872453659772873,
|
||
|
|
"step": 1480,
|
||
|
|
"valid_targets_mean": 6050.2,
|
||
|
|
"valid_targets_min": 1873
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.5,
|
||
|
|
"grad_norm": 0.5059930160839521,
|
||
|
|
"learning_rate": 5.364111900627759e-06,
|
||
|
|
"loss": 0.2129,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10614493489265442,
|
||
|
|
"step": 1485,
|
||
|
|
"valid_targets_mean": 6757.8,
|
||
|
|
"valid_targets_min": 3389
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.518518518518518,
|
||
|
|
"grad_norm": 0.7298032265200861,
|
||
|
|
"learning_rate": 5.238866168475532e-06,
|
||
|
|
"loss": 0.2229,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09671928733587265,
|
||
|
|
"step": 1490,
|
||
|
|
"valid_targets_mean": 6525.5,
|
||
|
|
"valid_targets_min": 1156
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.537037037037037,
|
||
|
|
"grad_norm": 0.46965643801489465,
|
||
|
|
"learning_rate": 5.114879210306967e-06,
|
||
|
|
"loss": 0.2184,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09873900562524796,
|
||
|
|
"step": 1495,
|
||
|
|
"valid_targets_mean": 6918.2,
|
||
|
|
"valid_targets_min": 4153
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.555555555555555,
|
||
|
|
"grad_norm": 0.5036641297823798,
|
||
|
|
"learning_rate": 4.9921615992636004e-06,
|
||
|
|
"loss": 0.2197,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10597766935825348,
|
||
|
|
"step": 1500,
|
||
|
|
"valid_targets_mean": 6490.0,
|
||
|
|
"valid_targets_min": 456
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.574074074074074,
|
||
|
|
"grad_norm": 0.5055842380658947,
|
||
|
|
"learning_rate": 4.870723800241832e-06,
|
||
|
|
"loss": 0.2106,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08831615746021271,
|
||
|
|
"step": 1505,
|
||
|
|
"valid_targets_mean": 5106.0,
|
||
|
|
"valid_targets_min": 245
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.592592592592593,
|
||
|
|
"grad_norm": 0.4574856423862591,
|
||
|
|
"learning_rate": 4.750576169000476e-06,
|
||
|
|
"loss": 0.224,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12107911705970764,
|
||
|
|
"step": 1510,
|
||
|
|
"valid_targets_mean": 8312.5,
|
||
|
|
"valid_targets_min": 7302
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.611111111111111,
|
||
|
|
"grad_norm": 0.5526024879531509,
|
||
|
|
"learning_rate": 4.631728951277716e-06,
|
||
|
|
"loss": 0.217,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1274055689573288,
|
||
|
|
"step": 1515,
|
||
|
|
"valid_targets_mean": 6396.0,
|
||
|
|
"valid_targets_min": 329
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.62962962962963,
|
||
|
|
"grad_norm": 0.493838956685955,
|
||
|
|
"learning_rate": 4.514192281917351e-06,
|
||
|
|
"loss": 0.2343,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12989237904548645,
|
||
|
|
"step": 1520,
|
||
|
|
"valid_targets_mean": 6073.9,
|
||
|
|
"valid_targets_min": 431
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.648148148148148,
|
||
|
|
"grad_norm": 0.4867215016980891,
|
||
|
|
"learning_rate": 4.397976184004553e-06,
|
||
|
|
"loss": 0.2122,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0971713662147522,
|
||
|
|
"step": 1525,
|
||
|
|
"valid_targets_mean": 6264.5,
|
||
|
|
"valid_targets_min": 390
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.666666666666667,
|
||
|
|
"grad_norm": 0.5159449102090277,
|
||
|
|
"learning_rate": 4.283090568011106e-06,
|
||
|
|
"loss": 0.215,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12054109573364258,
|
||
|
|
"step": 1530,
|
||
|
|
"valid_targets_mean": 7413.1,
|
||
|
|
"valid_targets_min": 5734
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.685185185185185,
|
||
|
|
"grad_norm": 0.4889188303592639,
|
||
|
|
"learning_rate": 4.169545230950321e-06,
|
||
|
|
"loss": 0.2116,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1139330267906189,
|
||
|
|
"step": 1535,
|
||
|
|
"valid_targets_mean": 6801.9,
|
||
|
|
"valid_targets_min": 3473
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.703703703703704,
|
||
|
|
"grad_norm": 0.4840565682120755,
|
||
|
|
"learning_rate": 4.057349855541557e-06,
|
||
|
|
"loss": 0.2261,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10931402444839478,
|
||
|
|
"step": 1540,
|
||
|
|
"valid_targets_mean": 7520.5,
|
||
|
|
"valid_targets_min": 4113
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.722222222222222,
|
||
|
|
"grad_norm": 0.488027844595327,
|
||
|
|
"learning_rate": 3.9465140093845035e-06,
|
||
|
|
"loss": 0.2246,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1238413006067276,
|
||
|
|
"step": 1545,
|
||
|
|
"valid_targets_mean": 7499.6,
|
||
|
|
"valid_targets_min": 4714
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.7407407407407405,
|
||
|
|
"grad_norm": 0.5478734177317678,
|
||
|
|
"learning_rate": 3.837047144143331e-06,
|
||
|
|
"loss": 0.2321,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12330850958824158,
|
||
|
|
"step": 1550,
|
||
|
|
"valid_targets_mean": 5695.8,
|
||
|
|
"valid_targets_min": 2294
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.7592592592592595,
|
||
|
|
"grad_norm": 0.4783212632587014,
|
||
|
|
"learning_rate": 3.7289585947406504e-06,
|
||
|
|
"loss": 0.2175,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09635508060455322,
|
||
|
|
"step": 1555,
|
||
|
|
"valid_targets_mean": 6844.5,
|
||
|
|
"valid_targets_min": 4746
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.777777777777778,
|
||
|
|
"grad_norm": 0.4569367430559496,
|
||
|
|
"learning_rate": 3.6222575785614898e-06,
|
||
|
|
"loss": 0.2117,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11338578909635544,
|
||
|
|
"step": 1560,
|
||
|
|
"valid_targets_mean": 7313.2,
|
||
|
|
"valid_targets_min": 5594
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.796296296296296,
|
||
|
|
"grad_norm": 0.4652991348453085,
|
||
|
|
"learning_rate": 3.5169531946672563e-06,
|
||
|
|
"loss": 0.2301,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12462925910949707,
|
||
|
|
"step": 1565,
|
||
|
|
"valid_targets_mean": 8310.9,
|
||
|
|
"valid_targets_min": 2015
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.814814814814815,
|
||
|
|
"grad_norm": 0.45290586655386583,
|
||
|
|
"learning_rate": 3.413054423019815e-06,
|
||
|
|
"loss": 0.2188,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1050763726234436,
|
||
|
|
"step": 1570,
|
||
|
|
"valid_targets_mean": 7246.6,
|
||
|
|
"valid_targets_min": 477
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.833333333333333,
|
||
|
|
"grad_norm": 0.46941892907078986,
|
||
|
|
"learning_rate": 3.3105701237156885e-06,
|
||
|
|
"loss": 0.2247,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09569090604782104,
|
||
|
|
"step": 1575,
|
||
|
|
"valid_targets_mean": 6234.6,
|
||
|
|
"valid_targets_min": 3204
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.851851851851852,
|
||
|
|
"grad_norm": 0.48656566073537605,
|
||
|
|
"learning_rate": 3.2095090362305316e-06,
|
||
|
|
"loss": 0.2267,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12894827127456665,
|
||
|
|
"step": 1580,
|
||
|
|
"valid_targets_mean": 7983.6,
|
||
|
|
"valid_targets_min": 6576
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.87037037037037,
|
||
|
|
"grad_norm": 0.5659066142032889,
|
||
|
|
"learning_rate": 3.1098797786738433e-06,
|
||
|
|
"loss": 0.2255,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12359024584293365,
|
||
|
|
"step": 1585,
|
||
|
|
"valid_targets_mean": 6504.0,
|
||
|
|
"valid_targets_min": 3646
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.888888888888889,
|
||
|
|
"grad_norm": 0.4995416346883199,
|
||
|
|
"learning_rate": 3.011690847054054e-06,
|
||
|
|
"loss": 0.2197,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1010461077094078,
|
||
|
|
"step": 1590,
|
||
|
|
"valid_targets_mean": 6379.1,
|
||
|
|
"valid_targets_min": 2857
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.907407407407407,
|
||
|
|
"grad_norm": 0.5306353785153805,
|
||
|
|
"learning_rate": 2.9149506145540064e-06,
|
||
|
|
"loss": 0.2209,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09737998247146606,
|
||
|
|
"step": 1595,
|
||
|
|
"valid_targets_mean": 6443.9,
|
||
|
|
"valid_targets_min": 310
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.925925925925926,
|
||
|
|
"grad_norm": 0.4781652098007515,
|
||
|
|
"learning_rate": 2.819667330816942e-06,
|
||
|
|
"loss": 0.2114,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10525282472372055,
|
||
|
|
"step": 1600,
|
||
|
|
"valid_targets_mean": 7038.8,
|
||
|
|
"valid_targets_min": 2065
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.944444444444445,
|
||
|
|
"grad_norm": 0.5099409359460987,
|
||
|
|
"learning_rate": 2.725849121242976e-06,
|
||
|
|
"loss": 0.2117,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09639444947242737,
|
||
|
|
"step": 1605,
|
||
|
|
"valid_targets_mean": 7068.9,
|
||
|
|
"valid_targets_min": 2646
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.962962962962963,
|
||
|
|
"grad_norm": 0.482201179503476,
|
||
|
|
"learning_rate": 2.633503986296215e-06,
|
||
|
|
"loss": 0.2132,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09317293018102646,
|
||
|
|
"step": 1610,
|
||
|
|
"valid_targets_mean": 6141.4,
|
||
|
|
"valid_targets_min": 3965
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.981481481481482,
|
||
|
|
"grad_norm": 0.524434990755225,
|
||
|
|
"learning_rate": 2.5426398008225084e-06,
|
||
|
|
"loss": 0.214,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07734841853380203,
|
||
|
|
"step": 1615,
|
||
|
|
"valid_targets_mean": 5183.4,
|
||
|
|
"valid_targets_min": 535
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.0,
|
||
|
|
"grad_norm": 0.5477430720593653,
|
||
|
|
"learning_rate": 2.4532643133778922e-06,
|
||
|
|
"loss": 0.2186,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09451612830162048,
|
||
|
|
"step": 1620,
|
||
|
|
"valid_targets_mean": 6252.8,
|
||
|
|
"valid_targets_min": 409
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.018518518518518,
|
||
|
|
"grad_norm": 0.5122116198318503,
|
||
|
|
"learning_rate": 2.36538514556784e-06,
|
||
|
|
"loss": 0.2131,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12386941909790039,
|
||
|
|
"step": 1625,
|
||
|
|
"valid_targets_mean": 7532.4,
|
||
|
|
"valid_targets_min": 5944
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.037037037037037,
|
||
|
|
"grad_norm": 0.48210109707959664,
|
||
|
|
"learning_rate": 2.2790097913973154e-06,
|
||
|
|
"loss": 0.2141,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09711402654647827,
|
||
|
|
"step": 1630,
|
||
|
|
"valid_targets_mean": 6310.2,
|
||
|
|
"valid_targets_min": 3336
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.055555555555555,
|
||
|
|
"grad_norm": 0.5222607009794501,
|
||
|
|
"learning_rate": 2.1941456166316953e-06,
|
||
|
|
"loss": 0.221,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11883814632892609,
|
||
|
|
"step": 1635,
|
||
|
|
"valid_targets_mean": 6612.6,
|
||
|
|
"valid_targets_min": 460
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.074074074074074,
|
||
|
|
"grad_norm": 0.5229353313952704,
|
||
|
|
"learning_rate": 2.1107998581686793e-06,
|
||
|
|
"loss": 0.2128,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10010014474391937,
|
||
|
|
"step": 1640,
|
||
|
|
"valid_targets_mean": 6095.0,
|
||
|
|
"valid_targets_min": 446
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.092592592592593,
|
||
|
|
"grad_norm": 0.5109976673243143,
|
||
|
|
"learning_rate": 2.0289796234211235e-06,
|
||
|
|
"loss": 0.2226,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10803158581256866,
|
||
|
|
"step": 1645,
|
||
|
|
"valid_targets_mean": 6716.4,
|
||
|
|
"valid_targets_min": 400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.111111111111111,
|
||
|
|
"grad_norm": 0.4762813898465876,
|
||
|
|
"learning_rate": 1.9486918897109607e-06,
|
||
|
|
"loss": 0.2142,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1186881810426712,
|
||
|
|
"step": 1650,
|
||
|
|
"valid_targets_mean": 7162.5,
|
||
|
|
"valid_targets_min": 321
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.12962962962963,
|
||
|
|
"grad_norm": 0.5468221852953015,
|
||
|
|
"learning_rate": 1.8699435036741987e-06,
|
||
|
|
"loss": 0.2134,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11599022150039673,
|
||
|
|
"step": 1655,
|
||
|
|
"valid_targets_mean": 5360.0,
|
||
|
|
"valid_targets_min": 437
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.148148148148148,
|
||
|
|
"grad_norm": 0.4613166221297042,
|
||
|
|
"learning_rate": 1.792741180677069e-06,
|
||
|
|
"loss": 0.2116,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11193803697824478,
|
||
|
|
"step": 1660,
|
||
|
|
"valid_targets_mean": 7607.8,
|
||
|
|
"valid_targets_min": 3046
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.166666666666667,
|
||
|
|
"grad_norm": 0.4992299365923156,
|
||
|
|
"learning_rate": 1.7170915042433468e-06,
|
||
|
|
"loss": 0.2112,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10833920538425446,
|
||
|
|
"step": 1665,
|
||
|
|
"valid_targets_mean": 6474.9,
|
||
|
|
"valid_targets_min": 2007
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.185185185185185,
|
||
|
|
"grad_norm": 0.48734826804572084,
|
||
|
|
"learning_rate": 1.643000925492959e-06,
|
||
|
|
"loss": 0.2167,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10757909715175629,
|
||
|
|
"step": 1670,
|
||
|
|
"valid_targets_mean": 7296.0,
|
||
|
|
"valid_targets_min": 4685
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.203703703703703,
|
||
|
|
"grad_norm": 0.5038576702929699,
|
||
|
|
"learning_rate": 1.5704757625918454e-06,
|
||
|
|
"loss": 0.2198,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0956081971526146,
|
||
|
|
"step": 1675,
|
||
|
|
"valid_targets_mean": 6562.6,
|
||
|
|
"valid_targets_min": 530
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.222222222222222,
|
||
|
|
"grad_norm": 0.5014972764981547,
|
||
|
|
"learning_rate": 1.499522200213166e-06,
|
||
|
|
"loss": 0.2139,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13388274610042572,
|
||
|
|
"step": 1680,
|
||
|
|
"valid_targets_mean": 7633.9,
|
||
|
|
"valid_targets_min": 2642
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.2407407407407405,
|
||
|
|
"grad_norm": 0.5727109885681817,
|
||
|
|
"learning_rate": 1.4301462890099016e-06,
|
||
|
|
"loss": 0.2188,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1071832925081253,
|
||
|
|
"step": 1685,
|
||
|
|
"valid_targets_mean": 6702.2,
|
||
|
|
"valid_targets_min": 2613
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.2592592592592595,
|
||
|
|
"grad_norm": 0.526681748443717,
|
||
|
|
"learning_rate": 1.362353945098862e-06,
|
||
|
|
"loss": 0.2289,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.149953693151474,
|
||
|
|
"step": 1690,
|
||
|
|
"valid_targets_mean": 7246.5,
|
||
|
|
"valid_targets_min": 2595
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.277777777777778,
|
||
|
|
"grad_norm": 0.4820760829827286,
|
||
|
|
"learning_rate": 1.2961509495562074e-06,
|
||
|
|
"loss": 0.2115,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09220828860998154,
|
||
|
|
"step": 1695,
|
||
|
|
"valid_targets_mean": 6987.0,
|
||
|
|
"valid_targets_min": 4035
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.296296296296296,
|
||
|
|
"grad_norm": 0.5236479137171796,
|
||
|
|
"learning_rate": 1.2315429479244378e-06,
|
||
|
|
"loss": 0.2169,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11444682627916336,
|
||
|
|
"step": 1700,
|
||
|
|
"valid_targets_mean": 6010.9,
|
||
|
|
"valid_targets_min": 385
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.314814814814815,
|
||
|
|
"grad_norm": 0.5245034887848089,
|
||
|
|
"learning_rate": 1.1685354497309764e-06,
|
||
|
|
"loss": 0.2141,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12459330260753632,
|
||
|
|
"step": 1705,
|
||
|
|
"valid_targets_mean": 6315.4,
|
||
|
|
"valid_targets_min": 4419
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.333333333333333,
|
||
|
|
"grad_norm": 1.9145227209817528,
|
||
|
|
"learning_rate": 1.107133828018323e-06,
|
||
|
|
"loss": 0.2228,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09349900484085083,
|
||
|
|
"step": 1710,
|
||
|
|
"valid_targets_mean": 6550.8,
|
||
|
|
"valid_targets_min": 4214
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.351851851851852,
|
||
|
|
"grad_norm": 0.5373455723958673,
|
||
|
|
"learning_rate": 1.0473433188858784e-06,
|
||
|
|
"loss": 0.2142,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08351831883192062,
|
||
|
|
"step": 1715,
|
||
|
|
"valid_targets_mean": 5286.6,
|
||
|
|
"valid_targets_min": 2838
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.37037037037037,
|
||
|
|
"grad_norm": 0.5118701946064793,
|
||
|
|
"learning_rate": 9.891690210434235e-07,
|
||
|
|
"loss": 0.226,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08807747066020966,
|
||
|
|
"step": 1720,
|
||
|
|
"valid_targets_mean": 5508.8,
|
||
|
|
"valid_targets_min": 3072
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.388888888888889,
|
||
|
|
"grad_norm": 0.48206381384965274,
|
||
|
|
"learning_rate": 9.326158953763009e-07,
|
||
|
|
"loss": 0.2017,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09705394506454468,
|
||
|
|
"step": 1725,
|
||
|
|
"valid_targets_mean": 6747.9,
|
||
|
|
"valid_targets_min": 5018
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.407407407407407,
|
||
|
|
"grad_norm": 0.5459872813528737,
|
||
|
|
"learning_rate": 8.776887645224086e-07,
|
||
|
|
"loss": 0.2075,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1001187264919281,
|
||
|
|
"step": 1730,
|
||
|
|
"valid_targets_mean": 7545.6,
|
||
|
|
"valid_targets_min": 5814
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.425925925925926,
|
||
|
|
"grad_norm": 0.5445943239279856,
|
||
|
|
"learning_rate": 8.243923124609066e-07,
|
||
|
|
"loss": 0.2161,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09796342998743057,
|
||
|
|
"step": 1735,
|
||
|
|
"valid_targets_mean": 7020.4,
|
||
|
|
"valid_targets_min": 3779
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.444444444444445,
|
||
|
|
"grad_norm": 0.4684005826009441,
|
||
|
|
"learning_rate": 7.727310841128055e-07,
|
||
|
|
"loss": 0.2325,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14073237776756287,
|
||
|
|
"step": 1740,
|
||
|
|
"valid_targets_mean": 8810.2,
|
||
|
|
"valid_targets_min": 1462
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.462962962962963,
|
||
|
|
"grad_norm": 0.4541060098469679,
|
||
|
|
"learning_rate": 7.227094849533878e-07,
|
||
|
|
"loss": 0.2124,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09816377609968185,
|
||
|
|
"step": 1745,
|
||
|
|
"valid_targets_mean": 7020.2,
|
||
|
|
"valid_targets_min": 2597
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.481481481481482,
|
||
|
|
"grad_norm": 0.4673983795871858,
|
||
|
|
"learning_rate": 6.743317806365213e-07,
|
||
|
|
"loss": 0.2198,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10708339512348175,
|
||
|
|
"step": 1750,
|
||
|
|
"valid_targets_mean": 7477.1,
|
||
|
|
"valid_targets_min": 4900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.5,
|
||
|
|
"grad_norm": 0.6105942569643106,
|
||
|
|
"learning_rate": 6.276020966309059e-07,
|
||
|
|
"loss": 0.199,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09840669482946396,
|
||
|
|
"step": 1755,
|
||
|
|
"valid_targets_mean": 6394.8,
|
||
|
|
"valid_targets_min": 3962
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.518518518518518,
|
||
|
|
"grad_norm": 0.5261917751262789,
|
||
|
|
"learning_rate": 5.825244178682621e-07,
|
||
|
|
"loss": 0.2089,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12467174232006073,
|
||
|
|
"step": 1760,
|
||
|
|
"valid_targets_mean": 7320.4,
|
||
|
|
"valid_targets_min": 4238
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.537037037037037,
|
||
|
|
"grad_norm": 0.5040103215289163,
|
||
|
|
"learning_rate": 5.391025884035239e-07,
|
||
|
|
"loss": 0.2128,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1107766330242157,
|
||
|
|
"step": 1765,
|
||
|
|
"valid_targets_mean": 6337.5,
|
||
|
|
"valid_targets_min": 329
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.555555555555555,
|
||
|
|
"grad_norm": 0.4850335062667886,
|
||
|
|
"learning_rate": 4.973403110870178e-07,
|
||
|
|
"loss": 0.2167,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0921555906534195,
|
||
|
|
"step": 1770,
|
||
|
|
"valid_targets_mean": 6312.9,
|
||
|
|
"valid_targets_min": 5236
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.574074074074074,
|
||
|
|
"grad_norm": 0.5151266321529011,
|
||
|
|
"learning_rate": 4.5724114724870593e-07,
|
||
|
|
"loss": 0.2176,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12505650520324707,
|
||
|
|
"step": 1775,
|
||
|
|
"valid_targets_mean": 7164.6,
|
||
|
|
"valid_targets_min": 5352
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.592592592592593,
|
||
|
|
"grad_norm": 0.5287100835670476,
|
||
|
|
"learning_rate": 4.188085163944866e-07,
|
||
|
|
"loss": 0.2036,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09333769977092743,
|
||
|
|
"step": 1780,
|
||
|
|
"valid_targets_mean": 6233.1,
|
||
|
|
"valid_targets_min": 471
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.611111111111111,
|
||
|
|
"grad_norm": 0.5184514712077551,
|
||
|
|
"learning_rate": 3.820456959145924e-07,
|
||
|
|
"loss": 0.2138,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10321502387523651,
|
||
|
|
"step": 1785,
|
||
|
|
"valid_targets_mean": 6844.4,
|
||
|
|
"valid_targets_min": 4841
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.62962962962963,
|
||
|
|
"grad_norm": 0.7780741153859662,
|
||
|
|
"learning_rate": 3.4695582080410686e-07,
|
||
|
|
"loss": 0.2173,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1244448572397232,
|
||
|
|
"step": 1790,
|
||
|
|
"valid_targets_mean": 7082.1,
|
||
|
|
"valid_targets_min": 4348
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.648148148148148,
|
||
|
|
"grad_norm": 0.5680521512792298,
|
||
|
|
"learning_rate": 3.1354188339562277e-07,
|
||
|
|
"loss": 0.2123,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11791664361953735,
|
||
|
|
"step": 1795,
|
||
|
|
"valid_targets_mean": 6692.6,
|
||
|
|
"valid_targets_min": 4308
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.666666666666667,
|
||
|
|
"grad_norm": 0.5333100590033606,
|
||
|
|
"learning_rate": 2.818067331040708e-07,
|
||
|
|
"loss": 0.211,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10548874735832214,
|
||
|
|
"step": 1800,
|
||
|
|
"valid_targets_mean": 6284.0,
|
||
|
|
"valid_targets_min": 4426
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.685185185185185,
|
||
|
|
"grad_norm": 0.4756549745078081,
|
||
|
|
"learning_rate": 2.517530761837228e-07,
|
||
|
|
"loss": 0.212,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10474137216806412,
|
||
|
|
"step": 1805,
|
||
|
|
"valid_targets_mean": 7218.1,
|
||
|
|
"valid_targets_min": 474
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.703703703703704,
|
||
|
|
"grad_norm": 0.6122065278621339,
|
||
|
|
"learning_rate": 2.2338347549742956e-07,
|
||
|
|
"loss": 0.2125,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12869982421398163,
|
||
|
|
"step": 1810,
|
||
|
|
"valid_targets_mean": 5424.0,
|
||
|
|
"valid_targets_min": 301
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.722222222222222,
|
||
|
|
"grad_norm": 0.47686114593760576,
|
||
|
|
"learning_rate": 1.9670035029804912e-07,
|
||
|
|
"loss": 0.2253,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1062430888414383,
|
||
|
|
"step": 1815,
|
||
|
|
"valid_targets_mean": 7357.9,
|
||
|
|
"valid_targets_min": 5245
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.7407407407407405,
|
||
|
|
"grad_norm": 0.4663069570426537,
|
||
|
|
"learning_rate": 1.7170597602215622e-07,
|
||
|
|
"loss": 0.2105,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10924944281578064,
|
||
|
|
"step": 1820,
|
||
|
|
"valid_targets_mean": 7345.1,
|
||
|
|
"valid_targets_min": 417
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.7592592592592595,
|
||
|
|
"grad_norm": 0.4863214142184415,
|
||
|
|
"learning_rate": 1.4840248409599966e-07,
|
||
|
|
"loss": 0.2166,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11560134589672089,
|
||
|
|
"step": 1825,
|
||
|
|
"valid_targets_mean": 6813.0,
|
||
|
|
"valid_targets_min": 336
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.777777777777778,
|
||
|
|
"grad_norm": 0.5144234551395174,
|
||
|
|
"learning_rate": 1.2679186175373448e-07,
|
||
|
|
"loss": 0.2205,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10990774631500244,
|
||
|
|
"step": 1830,
|
||
|
|
"valid_targets_mean": 7161.0,
|
||
|
|
"valid_targets_min": 4072
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.796296296296296,
|
||
|
|
"grad_norm": 0.5092042732657823,
|
||
|
|
"learning_rate": 1.0687595186797073e-07,
|
||
|
|
"loss": 0.2187,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11906181275844574,
|
||
|
|
"step": 1835,
|
||
|
|
"valid_targets_mean": 7092.1,
|
||
|
|
"valid_targets_min": 2798
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.814814814814815,
|
||
|
|
"grad_norm": 0.4879235110558119,
|
||
|
|
"learning_rate": 8.865645279260815e-08,
|
||
|
|
"loss": 0.2191,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11363916099071503,
|
||
|
|
"step": 1840,
|
||
|
|
"valid_targets_mean": 6146.9,
|
||
|
|
"valid_targets_min": 597
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.833333333333333,
|
||
|
|
"grad_norm": 0.47702518244695324,
|
||
|
|
"learning_rate": 7.213491821800977e-08,
|
||
|
|
"loss": 0.2081,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10680747032165527,
|
||
|
|
"step": 1845,
|
||
|
|
"valid_targets_mean": 7832.5,
|
||
|
|
"valid_targets_min": 6237
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.851851851851852,
|
||
|
|
"grad_norm": 0.5034576503374186,
|
||
|
|
"learning_rate": 5.731275703851902e-08,
|
||
|
|
"loss": 0.2148,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08941599726676941,
|
||
|
|
"step": 1850,
|
||
|
|
"valid_targets_mean": 6900.6,
|
||
|
|
"valid_targets_min": 2557
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.87037037037037,
|
||
|
|
"grad_norm": 0.5186476251514881,
|
||
|
|
"learning_rate": 4.4191233232300235e-08,
|
||
|
|
"loss": 0.206,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1075291633605957,
|
||
|
|
"step": 1855,
|
||
|
|
"valid_targets_mean": 6265.1,
|
||
|
|
"valid_targets_min": 457
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.888888888888889,
|
||
|
|
"grad_norm": 0.48818375200273456,
|
||
|
|
"learning_rate": 3.2771465753560495e-08,
|
||
|
|
"loss": 0.2164,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11990895122289658,
|
||
|
|
"step": 1860,
|
||
|
|
"valid_targets_mean": 6609.4,
|
||
|
|
"valid_targets_min": 1476
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.907407407407407,
|
||
|
|
"grad_norm": 0.4514003867926065,
|
||
|
|
"learning_rate": 2.3054428437125907e-08,
|
||
|
|
"loss": 0.2106,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0958041399717331,
|
||
|
|
"step": 1865,
|
||
|
|
"valid_targets_mean": 7317.1,
|
||
|
|
"valid_targets_min": 1873
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.925925925925926,
|
||
|
|
"grad_norm": 0.4750915394409351,
|
||
|
|
"learning_rate": 1.5040949915399173e-08,
|
||
|
|
"loss": 0.2159,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11690913140773773,
|
||
|
|
"step": 1870,
|
||
|
|
"valid_targets_mean": 7307.8,
|
||
|
|
"valid_targets_min": 4736
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.944444444444445,
|
||
|
|
"grad_norm": 0.4406540020628789,
|
||
|
|
"learning_rate": 8.731713547689424e-09,
|
||
|
|
"loss": 0.2157,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09516197443008423,
|
||
|
|
"step": 1875,
|
||
|
|
"valid_targets_mean": 8098.8,
|
||
|
|
"valid_targets_min": 5210
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.962962962962963,
|
||
|
|
"grad_norm": 0.4595438788046572,
|
||
|
|
"learning_rate": 4.127257361954406e-09,
|
||
|
|
"loss": 0.2136,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08866438269615173,
|
||
|
|
"step": 1880,
|
||
|
|
"valid_targets_mean": 6785.6,
|
||
|
|
"valid_targets_min": 274
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.981481481481482,
|
||
|
|
"grad_norm": 0.4913637179887596,
|
||
|
|
"learning_rate": 1.2279740088971814e-09,
|
||
|
|
"loss": 0.2234,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13494595885276794,
|
||
|
|
"step": 1885,
|
||
|
|
"valid_targets_mean": 7588.5,
|
||
|
|
"valid_targets_min": 5906
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 7.0,
|
||
|
|
"grad_norm": 0.5014822111690687,
|
||
|
|
"learning_rate": 3.411072850179054e-11,
|
||
|
|
"loss": 0.22,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10025124996900558,
|
||
|
|
"step": 1890,
|
||
|
|
"valid_targets_mean": 5791.9,
|
||
|
|
"valid_targets_min": 453
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 5,
|
||
|
|
"max_steps": 1890,
|
||
|
|
"num_input_tokens_seen": 0,
|
||
|
|
"num_train_epochs": 7,
|
||
|
|
"save_steps": 1500,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": true,
|
||
|
|
"should_training_stop": true
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 1.153060328150401e+18,
|
||
|
|
"train_batch_size": 1,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|