9673 lines
258 KiB
JSON
9673 lines
258 KiB
JSON
|
|
{
|
||
|
|
"best_global_step": null,
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 7.0,
|
||
|
|
"eval_steps": 500,
|
||
|
|
"global_step": 4375,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"epoch": 0.008,
|
||
|
|
"grad_norm": 29.0015320167095,
|
||
|
|
"learning_rate": 3.6529680365296803e-07,
|
||
|
|
"loss": 0.9436,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.9374949932098389,
|
||
|
|
"step": 5,
|
||
|
|
"valid_targets_mean": 5346.2,
|
||
|
|
"valid_targets_min": 1863
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.016,
|
||
|
|
"grad_norm": 28.785647078426567,
|
||
|
|
"learning_rate": 8.219178082191781e-07,
|
||
|
|
"loss": 0.9171,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.9532188177108765,
|
||
|
|
"step": 10,
|
||
|
|
"valid_targets_mean": 5302.2,
|
||
|
|
"valid_targets_min": 2130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.024,
|
||
|
|
"grad_norm": 26.616351818564006,
|
||
|
|
"learning_rate": 1.278538812785388e-06,
|
||
|
|
"loss": 0.9016,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.8873691558837891,
|
||
|
|
"step": 15,
|
||
|
|
"valid_targets_mean": 5041.2,
|
||
|
|
"valid_targets_min": 1753
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.032,
|
||
|
|
"grad_norm": 18.931940149483303,
|
||
|
|
"learning_rate": 1.7351598173515982e-06,
|
||
|
|
"loss": 0.8321,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.8346962928771973,
|
||
|
|
"step": 20,
|
||
|
|
"valid_targets_mean": 5056.3,
|
||
|
|
"valid_targets_min": 1670
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.04,
|
||
|
|
"grad_norm": 9.219256821460794,
|
||
|
|
"learning_rate": 2.191780821917808e-06,
|
||
|
|
"loss": 0.7515,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.7757815718650818,
|
||
|
|
"step": 25,
|
||
|
|
"valid_targets_mean": 5699.4,
|
||
|
|
"valid_targets_min": 3462
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.048,
|
||
|
|
"grad_norm": 4.729746039956276,
|
||
|
|
"learning_rate": 2.6484018264840183e-06,
|
||
|
|
"loss": 0.7129,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.6848674416542053,
|
||
|
|
"step": 30,
|
||
|
|
"valid_targets_mean": 6168.1,
|
||
|
|
"valid_targets_min": 3152
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.056,
|
||
|
|
"grad_norm": 2.4602603161083585,
|
||
|
|
"learning_rate": 3.1050228310502285e-06,
|
||
|
|
"loss": 0.6598,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.6890764236450195,
|
||
|
|
"step": 35,
|
||
|
|
"valid_targets_mean": 7148.6,
|
||
|
|
"valid_targets_min": 783
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.064,
|
||
|
|
"grad_norm": 1.8763042005789952,
|
||
|
|
"learning_rate": 3.5616438356164386e-06,
|
||
|
|
"loss": 0.6163,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.6118077635765076,
|
||
|
|
"step": 40,
|
||
|
|
"valid_targets_mean": 5883.2,
|
||
|
|
"valid_targets_min": 2516
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.072,
|
||
|
|
"grad_norm": 1.602650359787958,
|
||
|
|
"learning_rate": 4.018264840182649e-06,
|
||
|
|
"loss": 0.6138,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.5972576141357422,
|
||
|
|
"step": 45,
|
||
|
|
"valid_targets_mean": 5523.7,
|
||
|
|
"valid_targets_min": 1938
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.08,
|
||
|
|
"grad_norm": 1.4500272948050443,
|
||
|
|
"learning_rate": 4.4748858447488585e-06,
|
||
|
|
"loss": 0.5783,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.544214129447937,
|
||
|
|
"step": 50,
|
||
|
|
"valid_targets_mean": 5985.9,
|
||
|
|
"valid_targets_min": 2378
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.088,
|
||
|
|
"grad_norm": 1.009096494436647,
|
||
|
|
"learning_rate": 4.931506849315069e-06,
|
||
|
|
"loss": 0.5743,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.567956805229187,
|
||
|
|
"step": 55,
|
||
|
|
"valid_targets_mean": 5316.4,
|
||
|
|
"valid_targets_min": 2034
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.096,
|
||
|
|
"grad_norm": 0.8031284787989955,
|
||
|
|
"learning_rate": 5.388127853881279e-06,
|
||
|
|
"loss": 0.5495,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.5508638620376587,
|
||
|
|
"step": 60,
|
||
|
|
"valid_targets_mean": 5330.8,
|
||
|
|
"valid_targets_min": 2818
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.104,
|
||
|
|
"grad_norm": 0.757836297686658,
|
||
|
|
"learning_rate": 5.8447488584474885e-06,
|
||
|
|
"loss": 0.5469,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.5989420413970947,
|
||
|
|
"step": 65,
|
||
|
|
"valid_targets_mean": 6095.4,
|
||
|
|
"valid_targets_min": 965
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.112,
|
||
|
|
"grad_norm": 0.6744715206516015,
|
||
|
|
"learning_rate": 6.301369863013699e-06,
|
||
|
|
"loss": 0.5404,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.5301568508148193,
|
||
|
|
"step": 70,
|
||
|
|
"valid_targets_mean": 5555.2,
|
||
|
|
"valid_targets_min": 2634
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.12,
|
||
|
|
"grad_norm": 0.7203043506061609,
|
||
|
|
"learning_rate": 6.757990867579909e-06,
|
||
|
|
"loss": 0.513,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.5385029315948486,
|
||
|
|
"step": 75,
|
||
|
|
"valid_targets_mean": 4590.4,
|
||
|
|
"valid_targets_min": 1978
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.128,
|
||
|
|
"grad_norm": 0.5376106997657275,
|
||
|
|
"learning_rate": 7.214611872146119e-06,
|
||
|
|
"loss": 0.516,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.4749385714530945,
|
||
|
|
"step": 80,
|
||
|
|
"valid_targets_mean": 6827.4,
|
||
|
|
"valid_targets_min": 3176
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.136,
|
||
|
|
"grad_norm": 0.6106498365181853,
|
||
|
|
"learning_rate": 7.671232876712329e-06,
|
||
|
|
"loss": 0.5076,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.49556079506874084,
|
||
|
|
"step": 85,
|
||
|
|
"valid_targets_mean": 5444.6,
|
||
|
|
"valid_targets_min": 2821
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.144,
|
||
|
|
"grad_norm": 0.5289997229974207,
|
||
|
|
"learning_rate": 8.127853881278539e-06,
|
||
|
|
"loss": 0.4685,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.431709349155426,
|
||
|
|
"step": 90,
|
||
|
|
"valid_targets_mean": 6285.6,
|
||
|
|
"valid_targets_min": 2326
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.152,
|
||
|
|
"grad_norm": 0.5416140795290694,
|
||
|
|
"learning_rate": 8.584474885844748e-06,
|
||
|
|
"loss": 0.4608,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.4308019280433655,
|
||
|
|
"step": 95,
|
||
|
|
"valid_targets_mean": 5831.9,
|
||
|
|
"valid_targets_min": 2332
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.16,
|
||
|
|
"grad_norm": 0.5038587793531678,
|
||
|
|
"learning_rate": 9.04109589041096e-06,
|
||
|
|
"loss": 0.4727,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.44734400510787964,
|
||
|
|
"step": 100,
|
||
|
|
"valid_targets_mean": 6098.7,
|
||
|
|
"valid_targets_min": 2794
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.168,
|
||
|
|
"grad_norm": 0.6499130358808322,
|
||
|
|
"learning_rate": 9.49771689497717e-06,
|
||
|
|
"loss": 0.4559,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.46700429916381836,
|
||
|
|
"step": 105,
|
||
|
|
"valid_targets_mean": 5643.3,
|
||
|
|
"valid_targets_min": 3348
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.176,
|
||
|
|
"grad_norm": 0.5164488739469887,
|
||
|
|
"learning_rate": 9.95433789954338e-06,
|
||
|
|
"loss": 0.4487,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.4107072949409485,
|
||
|
|
"step": 110,
|
||
|
|
"valid_targets_mean": 5961.1,
|
||
|
|
"valid_targets_min": 2021
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.184,
|
||
|
|
"grad_norm": 0.6640110036460022,
|
||
|
|
"learning_rate": 1.0410958904109589e-05,
|
||
|
|
"loss": 0.4481,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.418204665184021,
|
||
|
|
"step": 115,
|
||
|
|
"valid_targets_mean": 4624.6,
|
||
|
|
"valid_targets_min": 2451
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.192,
|
||
|
|
"grad_norm": 0.5285519912988442,
|
||
|
|
"learning_rate": 1.08675799086758e-05,
|
||
|
|
"loss": 0.4061,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.4019206464290619,
|
||
|
|
"step": 120,
|
||
|
|
"valid_targets_mean": 5007.2,
|
||
|
|
"valid_targets_min": 3000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2,
|
||
|
|
"grad_norm": 0.6548167137474712,
|
||
|
|
"learning_rate": 1.132420091324201e-05,
|
||
|
|
"loss": 0.4426,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.4582175612449646,
|
||
|
|
"step": 125,
|
||
|
|
"valid_targets_mean": 6787.3,
|
||
|
|
"valid_targets_min": 2528
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.208,
|
||
|
|
"grad_norm": 0.5867537785236684,
|
||
|
|
"learning_rate": 1.178082191780822e-05,
|
||
|
|
"loss": 0.4217,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.44458815455436707,
|
||
|
|
"step": 130,
|
||
|
|
"valid_targets_mean": 5277.4,
|
||
|
|
"valid_targets_min": 2969
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.216,
|
||
|
|
"grad_norm": 0.5692868498717647,
|
||
|
|
"learning_rate": 1.223744292237443e-05,
|
||
|
|
"loss": 0.413,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.4310612082481384,
|
||
|
|
"step": 135,
|
||
|
|
"valid_targets_mean": 5636.2,
|
||
|
|
"valid_targets_min": 230
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.224,
|
||
|
|
"grad_norm": 0.4977876660314295,
|
||
|
|
"learning_rate": 1.2694063926940641e-05,
|
||
|
|
"loss": 0.3913,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.35761088132858276,
|
||
|
|
"step": 140,
|
||
|
|
"valid_targets_mean": 6244.7,
|
||
|
|
"valid_targets_min": 1660
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.232,
|
||
|
|
"grad_norm": 0.5977951035748577,
|
||
|
|
"learning_rate": 1.3150684931506849e-05,
|
||
|
|
"loss": 0.4021,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.4173124432563782,
|
||
|
|
"step": 145,
|
||
|
|
"valid_targets_mean": 6014.3,
|
||
|
|
"valid_targets_min": 2804
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24,
|
||
|
|
"grad_norm": 0.5896906397538862,
|
||
|
|
"learning_rate": 1.360730593607306e-05,
|
||
|
|
"loss": 0.4133,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.42406731843948364,
|
||
|
|
"step": 150,
|
||
|
|
"valid_targets_mean": 5189.6,
|
||
|
|
"valid_targets_min": 2551
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.248,
|
||
|
|
"grad_norm": 0.5871809118104986,
|
||
|
|
"learning_rate": 1.406392694063927e-05,
|
||
|
|
"loss": 0.4066,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3785093426704407,
|
||
|
|
"step": 155,
|
||
|
|
"valid_targets_mean": 5138.2,
|
||
|
|
"valid_targets_min": 2204
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.256,
|
||
|
|
"grad_norm": 0.5323983997580389,
|
||
|
|
"learning_rate": 1.4520547945205482e-05,
|
||
|
|
"loss": 0.4003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.37402379512786865,
|
||
|
|
"step": 160,
|
||
|
|
"valid_targets_mean": 5847.3,
|
||
|
|
"valid_targets_min": 2233
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.264,
|
||
|
|
"grad_norm": 0.5657381387600574,
|
||
|
|
"learning_rate": 1.497716894977169e-05,
|
||
|
|
"loss": 0.384,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3925345540046692,
|
||
|
|
"step": 165,
|
||
|
|
"valid_targets_mean": 4730.1,
|
||
|
|
"valid_targets_min": 798
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.272,
|
||
|
|
"grad_norm": 0.5962892332796642,
|
||
|
|
"learning_rate": 1.54337899543379e-05,
|
||
|
|
"loss": 0.3865,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.36262843012809753,
|
||
|
|
"step": 170,
|
||
|
|
"valid_targets_mean": 4678.3,
|
||
|
|
"valid_targets_min": 1850
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.28,
|
||
|
|
"grad_norm": 0.5815083569532998,
|
||
|
|
"learning_rate": 1.589041095890411e-05,
|
||
|
|
"loss": 0.3882,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.39265549182891846,
|
||
|
|
"step": 175,
|
||
|
|
"valid_targets_mean": 4959.8,
|
||
|
|
"valid_targets_min": 901
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.288,
|
||
|
|
"grad_norm": 0.6169273432083452,
|
||
|
|
"learning_rate": 1.634703196347032e-05,
|
||
|
|
"loss": 0.3908,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.38367488980293274,
|
||
|
|
"step": 180,
|
||
|
|
"valid_targets_mean": 5054.1,
|
||
|
|
"valid_targets_min": 2086
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.296,
|
||
|
|
"grad_norm": 0.5073520805794912,
|
||
|
|
"learning_rate": 1.680365296803653e-05,
|
||
|
|
"loss": 0.3696,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.35496461391448975,
|
||
|
|
"step": 185,
|
||
|
|
"valid_targets_mean": 5353.8,
|
||
|
|
"valid_targets_min": 2311
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.304,
|
||
|
|
"grad_norm": 0.5951830285204442,
|
||
|
|
"learning_rate": 1.726027397260274e-05,
|
||
|
|
"loss": 0.3697,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3645014762878418,
|
||
|
|
"step": 190,
|
||
|
|
"valid_targets_mean": 5700.0,
|
||
|
|
"valid_targets_min": 3838
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.312,
|
||
|
|
"grad_norm": 3.284936406262614,
|
||
|
|
"learning_rate": 1.771689497716895e-05,
|
||
|
|
"loss": 0.3734,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3810313045978546,
|
||
|
|
"step": 195,
|
||
|
|
"valid_targets_mean": 4864.6,
|
||
|
|
"valid_targets_min": 2229
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.32,
|
||
|
|
"grad_norm": 0.5815089840052964,
|
||
|
|
"learning_rate": 1.8173515981735163e-05,
|
||
|
|
"loss": 0.3804,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.39126527309417725,
|
||
|
|
"step": 200,
|
||
|
|
"valid_targets_mean": 5333.4,
|
||
|
|
"valid_targets_min": 786
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.328,
|
||
|
|
"grad_norm": 0.5871232329779636,
|
||
|
|
"learning_rate": 1.863013698630137e-05,
|
||
|
|
"loss": 0.3975,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.33873528242111206,
|
||
|
|
"step": 205,
|
||
|
|
"valid_targets_mean": 5260.6,
|
||
|
|
"valid_targets_min": 2590
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.336,
|
||
|
|
"grad_norm": 0.5623908127082623,
|
||
|
|
"learning_rate": 1.9086757990867582e-05,
|
||
|
|
"loss": 0.3872,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3926033079624176,
|
||
|
|
"step": 210,
|
||
|
|
"valid_targets_mean": 5914.9,
|
||
|
|
"valid_targets_min": 2290
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.344,
|
||
|
|
"grad_norm": 0.5829996789134975,
|
||
|
|
"learning_rate": 1.954337899543379e-05,
|
||
|
|
"loss": 0.369,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.36474674940109253,
|
||
|
|
"step": 215,
|
||
|
|
"valid_targets_mean": 5885.6,
|
||
|
|
"valid_targets_min": 1668
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.352,
|
||
|
|
"grad_norm": 0.5814734468612167,
|
||
|
|
"learning_rate": 2e-05,
|
||
|
|
"loss": 0.3893,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.40361806750297546,
|
||
|
|
"step": 220,
|
||
|
|
"valid_targets_mean": 6311.9,
|
||
|
|
"valid_targets_min": 2249
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.36,
|
||
|
|
"grad_norm": 0.5922468705700561,
|
||
|
|
"learning_rate": 2.045662100456621e-05,
|
||
|
|
"loss": 0.3612,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3497602939605713,
|
||
|
|
"step": 225,
|
||
|
|
"valid_targets_mean": 5419.3,
|
||
|
|
"valid_targets_min": 2622
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.368,
|
||
|
|
"grad_norm": 0.522456463858042,
|
||
|
|
"learning_rate": 2.0913242009132424e-05,
|
||
|
|
"loss": 0.3675,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.36271563172340393,
|
||
|
|
"step": 230,
|
||
|
|
"valid_targets_mean": 6095.4,
|
||
|
|
"valid_targets_min": 2460
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.376,
|
||
|
|
"grad_norm": 0.6424635869072116,
|
||
|
|
"learning_rate": 2.1369863013698632e-05,
|
||
|
|
"loss": 0.3867,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.4467678964138031,
|
||
|
|
"step": 235,
|
||
|
|
"valid_targets_mean": 6817.1,
|
||
|
|
"valid_targets_min": 604
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.384,
|
||
|
|
"grad_norm": 0.6594713274606352,
|
||
|
|
"learning_rate": 2.182648401826484e-05,
|
||
|
|
"loss": 0.3893,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3981528878211975,
|
||
|
|
"step": 240,
|
||
|
|
"valid_targets_mean": 4952.9,
|
||
|
|
"valid_targets_min": 761
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.392,
|
||
|
|
"grad_norm": 0.5529439179880414,
|
||
|
|
"learning_rate": 2.2283105022831052e-05,
|
||
|
|
"loss": 0.3679,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3784303665161133,
|
||
|
|
"step": 245,
|
||
|
|
"valid_targets_mean": 6425.6,
|
||
|
|
"valid_targets_min": 2506
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4,
|
||
|
|
"grad_norm": 0.5831354430338912,
|
||
|
|
"learning_rate": 2.2739726027397263e-05,
|
||
|
|
"loss": 0.3759,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3691878318786621,
|
||
|
|
"step": 250,
|
||
|
|
"valid_targets_mean": 5749.1,
|
||
|
|
"valid_targets_min": 1980
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.408,
|
||
|
|
"grad_norm": 0.65000190634853,
|
||
|
|
"learning_rate": 2.3196347031963475e-05,
|
||
|
|
"loss": 0.3508,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3631196618080139,
|
||
|
|
"step": 255,
|
||
|
|
"valid_targets_mean": 4850.5,
|
||
|
|
"valid_targets_min": 759
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.416,
|
||
|
|
"grad_norm": 0.6645578317049943,
|
||
|
|
"learning_rate": 2.3652968036529683e-05,
|
||
|
|
"loss": 0.3512,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3574402630329132,
|
||
|
|
"step": 260,
|
||
|
|
"valid_targets_mean": 4735.8,
|
||
|
|
"valid_targets_min": 2466
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.424,
|
||
|
|
"grad_norm": 0.6656191465639894,
|
||
|
|
"learning_rate": 2.410958904109589e-05,
|
||
|
|
"loss": 0.3619,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.362774521112442,
|
||
|
|
"step": 265,
|
||
|
|
"valid_targets_mean": 6647.6,
|
||
|
|
"valid_targets_min": 2383
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.432,
|
||
|
|
"grad_norm": 0.5851717481481281,
|
||
|
|
"learning_rate": 2.4566210045662106e-05,
|
||
|
|
"loss": 0.3601,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.33157211542129517,
|
||
|
|
"step": 270,
|
||
|
|
"valid_targets_mean": 4614.8,
|
||
|
|
"valid_targets_min": 1941
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.44,
|
||
|
|
"grad_norm": 0.5751797691958569,
|
||
|
|
"learning_rate": 2.5022831050228314e-05,
|
||
|
|
"loss": 0.3423,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31601348519325256,
|
||
|
|
"step": 275,
|
||
|
|
"valid_targets_mean": 5310.1,
|
||
|
|
"valid_targets_min": 2662
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.448,
|
||
|
|
"grad_norm": 0.5643551630768594,
|
||
|
|
"learning_rate": 2.547945205479452e-05,
|
||
|
|
"loss": 0.3632,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.36333590745925903,
|
||
|
|
"step": 280,
|
||
|
|
"valid_targets_mean": 5489.1,
|
||
|
|
"valid_targets_min": 1956
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.456,
|
||
|
|
"grad_norm": 0.5452707045878729,
|
||
|
|
"learning_rate": 2.593607305936073e-05,
|
||
|
|
"loss": 0.3542,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3407953381538391,
|
||
|
|
"step": 285,
|
||
|
|
"valid_targets_mean": 5989.4,
|
||
|
|
"valid_targets_min": 3143
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.464,
|
||
|
|
"grad_norm": 0.6015341682807512,
|
||
|
|
"learning_rate": 2.6392694063926944e-05,
|
||
|
|
"loss": 0.3495,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.36542242765426636,
|
||
|
|
"step": 290,
|
||
|
|
"valid_targets_mean": 4879.5,
|
||
|
|
"valid_targets_min": 904
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.472,
|
||
|
|
"grad_norm": 0.4933163499696808,
|
||
|
|
"learning_rate": 2.6849315068493153e-05,
|
||
|
|
"loss": 0.352,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3441370725631714,
|
||
|
|
"step": 295,
|
||
|
|
"valid_targets_mean": 6533.8,
|
||
|
|
"valid_targets_min": 2505
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.48,
|
||
|
|
"grad_norm": 0.6570928110972526,
|
||
|
|
"learning_rate": 2.7305936073059364e-05,
|
||
|
|
"loss": 0.3608,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.32394760847091675,
|
||
|
|
"step": 300,
|
||
|
|
"valid_targets_mean": 5315.5,
|
||
|
|
"valid_targets_min": 490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.488,
|
||
|
|
"grad_norm": 0.5963639828543595,
|
||
|
|
"learning_rate": 2.7762557077625572e-05,
|
||
|
|
"loss": 0.3526,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3477485775947571,
|
||
|
|
"step": 305,
|
||
|
|
"valid_targets_mean": 5883.1,
|
||
|
|
"valid_targets_min": 838
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.496,
|
||
|
|
"grad_norm": 0.5867601585655516,
|
||
|
|
"learning_rate": 2.8219178082191783e-05,
|
||
|
|
"loss": 0.3416,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3327102065086365,
|
||
|
|
"step": 310,
|
||
|
|
"valid_targets_mean": 6452.5,
|
||
|
|
"valid_targets_min": 3225
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.504,
|
||
|
|
"grad_norm": 0.6010215270774765,
|
||
|
|
"learning_rate": 2.8675799086757995e-05,
|
||
|
|
"loss": 0.345,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.33421438932418823,
|
||
|
|
"step": 315,
|
||
|
|
"valid_targets_mean": 5088.0,
|
||
|
|
"valid_targets_min": 1962
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.512,
|
||
|
|
"grad_norm": 0.5707370764644752,
|
||
|
|
"learning_rate": 2.9132420091324203e-05,
|
||
|
|
"loss": 0.3323,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.33381012082099915,
|
||
|
|
"step": 320,
|
||
|
|
"valid_targets_mean": 5421.4,
|
||
|
|
"valid_targets_min": 1666
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.52,
|
||
|
|
"grad_norm": 0.5906992975596219,
|
||
|
|
"learning_rate": 2.958904109589041e-05,
|
||
|
|
"loss": 0.3622,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3678559362888336,
|
||
|
|
"step": 325,
|
||
|
|
"valid_targets_mean": 6029.4,
|
||
|
|
"valid_targets_min": 2516
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.528,
|
||
|
|
"grad_norm": 0.5371802016216455,
|
||
|
|
"learning_rate": 3.0045662100456626e-05,
|
||
|
|
"loss": 0.3328,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2999245524406433,
|
||
|
|
"step": 330,
|
||
|
|
"valid_targets_mean": 5799.5,
|
||
|
|
"valid_targets_min": 3433
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.536,
|
||
|
|
"grad_norm": 0.5822954280809008,
|
||
|
|
"learning_rate": 3.0502283105022834e-05,
|
||
|
|
"loss": 0.3374,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.33282238245010376,
|
||
|
|
"step": 335,
|
||
|
|
"valid_targets_mean": 4737.1,
|
||
|
|
"valid_targets_min": 1791
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.544,
|
||
|
|
"grad_norm": 0.6029890531225149,
|
||
|
|
"learning_rate": 3.0958904109589045e-05,
|
||
|
|
"loss": 0.3557,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3635110855102539,
|
||
|
|
"step": 340,
|
||
|
|
"valid_targets_mean": 5499.4,
|
||
|
|
"valid_targets_min": 2056
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.552,
|
||
|
|
"grad_norm": 0.5655087348685645,
|
||
|
|
"learning_rate": 3.141552511415525e-05,
|
||
|
|
"loss": 0.3387,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.37209635972976685,
|
||
|
|
"step": 345,
|
||
|
|
"valid_targets_mean": 5426.6,
|
||
|
|
"valid_targets_min": 563
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.56,
|
||
|
|
"grad_norm": 0.5333462848275267,
|
||
|
|
"learning_rate": 3.187214611872147e-05,
|
||
|
|
"loss": 0.3343,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3303799331188202,
|
||
|
|
"step": 350,
|
||
|
|
"valid_targets_mean": 5406.4,
|
||
|
|
"valid_targets_min": 779
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.568,
|
||
|
|
"grad_norm": 0.5295185279670263,
|
||
|
|
"learning_rate": 3.2328767123287676e-05,
|
||
|
|
"loss": 0.346,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.32677990198135376,
|
||
|
|
"step": 355,
|
||
|
|
"valid_targets_mean": 5974.3,
|
||
|
|
"valid_targets_min": 1628
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.576,
|
||
|
|
"grad_norm": 0.5983539503289153,
|
||
|
|
"learning_rate": 3.2785388127853884e-05,
|
||
|
|
"loss": 0.3459,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.39269623160362244,
|
||
|
|
"step": 360,
|
||
|
|
"valid_targets_mean": 6738.8,
|
||
|
|
"valid_targets_min": 1678
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.584,
|
||
|
|
"grad_norm": 0.5766745460057539,
|
||
|
|
"learning_rate": 3.324200913242009e-05,
|
||
|
|
"loss": 0.3342,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29778122901916504,
|
||
|
|
"step": 365,
|
||
|
|
"valid_targets_mean": 5049.4,
|
||
|
|
"valid_targets_min": 2193
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.592,
|
||
|
|
"grad_norm": 0.6277621549756567,
|
||
|
|
"learning_rate": 3.369863013698631e-05,
|
||
|
|
"loss": 0.3292,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3347325921058655,
|
||
|
|
"step": 370,
|
||
|
|
"valid_targets_mean": 5283.5,
|
||
|
|
"valid_targets_min": 2402
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6,
|
||
|
|
"grad_norm": 0.5918373851674901,
|
||
|
|
"learning_rate": 3.4155251141552515e-05,
|
||
|
|
"loss": 0.34,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.36557674407958984,
|
||
|
|
"step": 375,
|
||
|
|
"valid_targets_mean": 5248.7,
|
||
|
|
"valid_targets_min": 2252
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.608,
|
||
|
|
"grad_norm": 0.6167625201761314,
|
||
|
|
"learning_rate": 3.461187214611872e-05,
|
||
|
|
"loss": 0.3382,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3086686432361603,
|
||
|
|
"step": 380,
|
||
|
|
"valid_targets_mean": 4223.2,
|
||
|
|
"valid_targets_min": 711
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.616,
|
||
|
|
"grad_norm": 0.5591693735982324,
|
||
|
|
"learning_rate": 3.506849315068493e-05,
|
||
|
|
"loss": 0.3504,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.368280827999115,
|
||
|
|
"step": 385,
|
||
|
|
"valid_targets_mean": 5700.3,
|
||
|
|
"valid_targets_min": 904
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.624,
|
||
|
|
"grad_norm": 0.5799417720813952,
|
||
|
|
"learning_rate": 3.5525114155251146e-05,
|
||
|
|
"loss": 0.3294,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3143656551837921,
|
||
|
|
"step": 390,
|
||
|
|
"valid_targets_mean": 5221.9,
|
||
|
|
"valid_targets_min": 536
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.632,
|
||
|
|
"grad_norm": 0.5433992211523737,
|
||
|
|
"learning_rate": 3.5981735159817354e-05,
|
||
|
|
"loss": 0.347,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3558363914489746,
|
||
|
|
"step": 395,
|
||
|
|
"valid_targets_mean": 5620.7,
|
||
|
|
"valid_targets_min": 2461
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.64,
|
||
|
|
"grad_norm": 0.5621911130490547,
|
||
|
|
"learning_rate": 3.643835616438356e-05,
|
||
|
|
"loss": 0.3426,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.33586400747299194,
|
||
|
|
"step": 400,
|
||
|
|
"valid_targets_mean": 6311.3,
|
||
|
|
"valid_targets_min": 3458
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.648,
|
||
|
|
"grad_norm": 0.509645925031835,
|
||
|
|
"learning_rate": 3.689497716894977e-05,
|
||
|
|
"loss": 0.3363,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.32601773738861084,
|
||
|
|
"step": 405,
|
||
|
|
"valid_targets_mean": 6338.4,
|
||
|
|
"valid_targets_min": 2329
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.656,
|
||
|
|
"grad_norm": 0.5678298273897082,
|
||
|
|
"learning_rate": 3.7351598173515985e-05,
|
||
|
|
"loss": 0.3309,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3719245195388794,
|
||
|
|
"step": 410,
|
||
|
|
"valid_targets_mean": 5888.6,
|
||
|
|
"valid_targets_min": 2675
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.664,
|
||
|
|
"grad_norm": 0.5606239723542871,
|
||
|
|
"learning_rate": 3.780821917808219e-05,
|
||
|
|
"loss": 0.3252,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3294365406036377,
|
||
|
|
"step": 415,
|
||
|
|
"valid_targets_mean": 5571.3,
|
||
|
|
"valid_targets_min": 2321
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.672,
|
||
|
|
"grad_norm": 0.6903980569872955,
|
||
|
|
"learning_rate": 3.82648401826484e-05,
|
||
|
|
"loss": 0.3255,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.35279595851898193,
|
||
|
|
"step": 420,
|
||
|
|
"valid_targets_mean": 6234.4,
|
||
|
|
"valid_targets_min": 631
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.68,
|
||
|
|
"grad_norm": 0.5770701013621418,
|
||
|
|
"learning_rate": 3.8721461187214615e-05,
|
||
|
|
"loss": 0.3297,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3458443582057953,
|
||
|
|
"step": 425,
|
||
|
|
"valid_targets_mean": 5096.1,
|
||
|
|
"valid_targets_min": 2409
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.688,
|
||
|
|
"grad_norm": 0.7015744477293635,
|
||
|
|
"learning_rate": 3.9178082191780823e-05,
|
||
|
|
"loss": 0.3325,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3228449821472168,
|
||
|
|
"step": 430,
|
||
|
|
"valid_targets_mean": 4802.9,
|
||
|
|
"valid_targets_min": 932
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.696,
|
||
|
|
"grad_norm": 0.5814276027663211,
|
||
|
|
"learning_rate": 3.963470319634704e-05,
|
||
|
|
"loss": 0.3348,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3646930754184723,
|
||
|
|
"step": 435,
|
||
|
|
"valid_targets_mean": 5815.1,
|
||
|
|
"valid_targets_min": 2370
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.704,
|
||
|
|
"grad_norm": 0.6088081325835798,
|
||
|
|
"learning_rate": 3.99999936325009e-05,
|
||
|
|
"loss": 0.3119,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3166343569755554,
|
||
|
|
"step": 440,
|
||
|
|
"valid_targets_mean": 6217.9,
|
||
|
|
"valid_targets_min": 2075
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.712,
|
||
|
|
"grad_norm": 0.5402127247441519,
|
||
|
|
"learning_rate": 3.9999770770457856e-05,
|
||
|
|
"loss": 0.3215,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31158143281936646,
|
||
|
|
"step": 445,
|
||
|
|
"valid_targets_mean": 6205.0,
|
||
|
|
"valid_targets_min": 913
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.72,
|
||
|
|
"grad_norm": 0.6399233174311737,
|
||
|
|
"learning_rate": 3.9999229537513936e-05,
|
||
|
|
"loss": 0.3215,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.32378578186035156,
|
||
|
|
"step": 450,
|
||
|
|
"valid_targets_mean": 4884.4,
|
||
|
|
"valid_targets_min": 1215
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.728,
|
||
|
|
"grad_norm": 0.5915756632434621,
|
||
|
|
"learning_rate": 3.999836994228487e-05,
|
||
|
|
"loss": 0.3209,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3110769987106323,
|
||
|
|
"step": 455,
|
||
|
|
"valid_targets_mean": 5154.0,
|
||
|
|
"valid_targets_min": 1728
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.736,
|
||
|
|
"grad_norm": 0.6319049650168674,
|
||
|
|
"learning_rate": 3.999719199845432e-05,
|
||
|
|
"loss": 0.3364,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.32748833298683167,
|
||
|
|
"step": 460,
|
||
|
|
"valid_targets_mean": 5428.9,
|
||
|
|
"valid_targets_min": 2531
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.744,
|
||
|
|
"grad_norm": 0.5700199682389311,
|
||
|
|
"learning_rate": 3.999569572477366e-05,
|
||
|
|
"loss": 0.3358,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.33130866289138794,
|
||
|
|
"step": 465,
|
||
|
|
"valid_targets_mean": 5058.3,
|
||
|
|
"valid_targets_min": 1760
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.752,
|
||
|
|
"grad_norm": 0.5092781146395354,
|
||
|
|
"learning_rate": 3.999388114506166e-05,
|
||
|
|
"loss": 0.319,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3101567327976227,
|
||
|
|
"step": 470,
|
||
|
|
"valid_targets_mean": 5335.4,
|
||
|
|
"valid_targets_min": 2946
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.76,
|
||
|
|
"grad_norm": 0.5794130728227729,
|
||
|
|
"learning_rate": 3.999174828820413e-05,
|
||
|
|
"loss": 0.3321,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30423641204833984,
|
||
|
|
"step": 475,
|
||
|
|
"valid_targets_mean": 4474.7,
|
||
|
|
"valid_targets_min": 2081
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.768,
|
||
|
|
"grad_norm": 0.5448773661149979,
|
||
|
|
"learning_rate": 3.998929718815341e-05,
|
||
|
|
"loss": 0.3316,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.32564234733581543,
|
||
|
|
"step": 480,
|
||
|
|
"valid_targets_mean": 5164.1,
|
||
|
|
"valid_targets_min": 2488
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.776,
|
||
|
|
"grad_norm": 0.5387594305910872,
|
||
|
|
"learning_rate": 3.998652788392792e-05,
|
||
|
|
"loss": 0.3269,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3404187262058258,
|
||
|
|
"step": 485,
|
||
|
|
"valid_targets_mean": 5412.6,
|
||
|
|
"valid_targets_min": 818
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.784,
|
||
|
|
"grad_norm": 0.5854237384124716,
|
||
|
|
"learning_rate": 3.9983440419611445e-05,
|
||
|
|
"loss": 0.3211,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3279475271701813,
|
||
|
|
"step": 490,
|
||
|
|
"valid_targets_mean": 4456.4,
|
||
|
|
"valid_targets_min": 1879
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.792,
|
||
|
|
"grad_norm": 0.564775380231534,
|
||
|
|
"learning_rate": 3.9980034844352494e-05,
|
||
|
|
"loss": 0.3254,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3300333321094513,
|
||
|
|
"step": 495,
|
||
|
|
"valid_targets_mean": 5009.7,
|
||
|
|
"valid_targets_min": 2930
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8,
|
||
|
|
"grad_norm": 0.5293883302680141,
|
||
|
|
"learning_rate": 3.9976311212363495e-05,
|
||
|
|
"loss": 0.3263,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3328399658203125,
|
||
|
|
"step": 500,
|
||
|
|
"valid_targets_mean": 6093.4,
|
||
|
|
"valid_targets_min": 3097
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.808,
|
||
|
|
"grad_norm": 0.5789613836327289,
|
||
|
|
"learning_rate": 3.997226958291992e-05,
|
||
|
|
"loss": 0.3167,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30174410343170166,
|
||
|
|
"step": 505,
|
||
|
|
"valid_targets_mean": 4972.1,
|
||
|
|
"valid_targets_min": 2321
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.816,
|
||
|
|
"grad_norm": 0.5132049869786744,
|
||
|
|
"learning_rate": 3.996791002035937e-05,
|
||
|
|
"loss": 0.3193,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.34351909160614014,
|
||
|
|
"step": 510,
|
||
|
|
"valid_targets_mean": 6353.1,
|
||
|
|
"valid_targets_min": 2811
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.824,
|
||
|
|
"grad_norm": 0.5128789381578767,
|
||
|
|
"learning_rate": 3.996323259408055e-05,
|
||
|
|
"loss": 0.3237,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3485877215862274,
|
||
|
|
"step": 515,
|
||
|
|
"valid_targets_mean": 5549.4,
|
||
|
|
"valid_targets_min": 740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.832,
|
||
|
|
"grad_norm": 0.5402172060969582,
|
||
|
|
"learning_rate": 3.995823737854211e-05,
|
||
|
|
"loss": 0.3086,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30282849073410034,
|
||
|
|
"step": 520,
|
||
|
|
"valid_targets_mean": 6072.9,
|
||
|
|
"valid_targets_min": 1872
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.84,
|
||
|
|
"grad_norm": 0.5515819926347459,
|
||
|
|
"learning_rate": 3.9952924453261534e-05,
|
||
|
|
"loss": 0.3127,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2936662435531616,
|
||
|
|
"step": 525,
|
||
|
|
"valid_targets_mean": 4590.2,
|
||
|
|
"valid_targets_min": 2248
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.848,
|
||
|
|
"grad_norm": 0.5285706315693174,
|
||
|
|
"learning_rate": 3.994729390281384e-05,
|
||
|
|
"loss": 0.3186,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3250778913497925,
|
||
|
|
"step": 530,
|
||
|
|
"valid_targets_mean": 5578.5,
|
||
|
|
"valid_targets_min": 2616
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.856,
|
||
|
|
"grad_norm": 0.5502402081859151,
|
||
|
|
"learning_rate": 3.994134581683021e-05,
|
||
|
|
"loss": 0.3434,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3574582040309906,
|
||
|
|
"step": 535,
|
||
|
|
"valid_targets_mean": 5670.8,
|
||
|
|
"valid_targets_min": 3313
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.864,
|
||
|
|
"grad_norm": 0.5016293419426597,
|
||
|
|
"learning_rate": 3.9935080289996626e-05,
|
||
|
|
"loss": 0.3144,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3080974221229553,
|
||
|
|
"step": 540,
|
||
|
|
"valid_targets_mean": 5986.4,
|
||
|
|
"valid_targets_min": 2539
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.872,
|
||
|
|
"grad_norm": 0.5660291944457707,
|
||
|
|
"learning_rate": 3.992849742205228e-05,
|
||
|
|
"loss": 0.3226,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31069374084472656,
|
||
|
|
"step": 545,
|
||
|
|
"valid_targets_mean": 6014.2,
|
||
|
|
"valid_targets_min": 2454
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.88,
|
||
|
|
"grad_norm": 0.46398599640649046,
|
||
|
|
"learning_rate": 3.9921597317788065e-05,
|
||
|
|
"loss": 0.3124,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3050593435764313,
|
||
|
|
"step": 550,
|
||
|
|
"valid_targets_mean": 6101.0,
|
||
|
|
"valid_targets_min": 4913
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.888,
|
||
|
|
"grad_norm": 0.5349679943298677,
|
||
|
|
"learning_rate": 3.991438008704486e-05,
|
||
|
|
"loss": 0.3373,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3825862407684326,
|
||
|
|
"step": 555,
|
||
|
|
"valid_targets_mean": 7490.4,
|
||
|
|
"valid_targets_min": 1967
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.896,
|
||
|
|
"grad_norm": 0.5129679083648333,
|
||
|
|
"learning_rate": 3.990684584471179e-05,
|
||
|
|
"loss": 0.3199,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3344353437423706,
|
||
|
|
"step": 560,
|
||
|
|
"valid_targets_mean": 5056.2,
|
||
|
|
"valid_targets_min": 991
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.904,
|
||
|
|
"grad_norm": 0.5114013214259068,
|
||
|
|
"learning_rate": 3.989899471072441e-05,
|
||
|
|
"loss": 0.3152,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.32178831100463867,
|
||
|
|
"step": 565,
|
||
|
|
"valid_targets_mean": 5480.6,
|
||
|
|
"valid_targets_min": 2776
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.912,
|
||
|
|
"grad_norm": 0.5551757388586439,
|
||
|
|
"learning_rate": 3.9890826810062784e-05,
|
||
|
|
"loss": 0.3115,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3461974859237671,
|
||
|
|
"step": 570,
|
||
|
|
"valid_targets_mean": 5539.6,
|
||
|
|
"valid_targets_min": 3173
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.92,
|
||
|
|
"grad_norm": 0.48095163281778175,
|
||
|
|
"learning_rate": 3.988234227274949e-05,
|
||
|
|
"loss": 0.3137,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3255422115325928,
|
||
|
|
"step": 575,
|
||
|
|
"valid_targets_mean": 6356.3,
|
||
|
|
"valid_targets_min": 2670
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.928,
|
||
|
|
"grad_norm": 0.5337169643841331,
|
||
|
|
"learning_rate": 3.987354123384757e-05,
|
||
|
|
"loss": 0.3194,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3116692304611206,
|
||
|
|
"step": 580,
|
||
|
|
"valid_targets_mean": 5684.5,
|
||
|
|
"valid_targets_min": 3490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.936,
|
||
|
|
"grad_norm": 0.5548195739252937,
|
||
|
|
"learning_rate": 3.9864423833458364e-05,
|
||
|
|
"loss": 0.3114,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3080330789089203,
|
||
|
|
"step": 585,
|
||
|
|
"valid_targets_mean": 4697.8,
|
||
|
|
"valid_targets_min": 1989
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.944,
|
||
|
|
"grad_norm": 0.5335506041335158,
|
||
|
|
"learning_rate": 3.9854990216719285e-05,
|
||
|
|
"loss": 0.3142,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2945554554462433,
|
||
|
|
"step": 590,
|
||
|
|
"valid_targets_mean": 4857.4,
|
||
|
|
"valid_targets_min": 2493
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.952,
|
||
|
|
"grad_norm": 0.4930115107440699,
|
||
|
|
"learning_rate": 3.98452405338015e-05,
|
||
|
|
"loss": 0.3215,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29840582609176636,
|
||
|
|
"step": 595,
|
||
|
|
"valid_targets_mean": 5117.4,
|
||
|
|
"valid_targets_min": 833
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.96,
|
||
|
|
"grad_norm": 0.48996673143333463,
|
||
|
|
"learning_rate": 3.983517493990756e-05,
|
||
|
|
"loss": 0.3206,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30873602628707886,
|
||
|
|
"step": 600,
|
||
|
|
"valid_targets_mean": 5820.4,
|
||
|
|
"valid_targets_min": 1017
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.968,
|
||
|
|
"grad_norm": 0.5283852318337353,
|
||
|
|
"learning_rate": 3.982479359526892e-05,
|
||
|
|
"loss": 0.3163,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3178045153617859,
|
||
|
|
"step": 605,
|
||
|
|
"valid_targets_mean": 5300.6,
|
||
|
|
"valid_targets_min": 1851
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.976,
|
||
|
|
"grad_norm": 0.5001051716337229,
|
||
|
|
"learning_rate": 3.981409666514336e-05,
|
||
|
|
"loss": 0.3256,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31478673219680786,
|
||
|
|
"step": 610,
|
||
|
|
"valid_targets_mean": 6632.8,
|
||
|
|
"valid_targets_min": 2269
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.984,
|
||
|
|
"grad_norm": 0.6753756767282909,
|
||
|
|
"learning_rate": 3.98030843198124e-05,
|
||
|
|
"loss": 0.3247,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31617915630340576,
|
||
|
|
"step": 615,
|
||
|
|
"valid_targets_mean": 5035.5,
|
||
|
|
"valid_targets_min": 768
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.992,
|
||
|
|
"grad_norm": 0.5005628037703251,
|
||
|
|
"learning_rate": 3.979175673457858e-05,
|
||
|
|
"loss": 0.3038,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3059810400009155,
|
||
|
|
"step": 620,
|
||
|
|
"valid_targets_mean": 5247.0,
|
||
|
|
"valid_targets_min": 2168
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0,
|
||
|
|
"grad_norm": 0.5101683864070548,
|
||
|
|
"learning_rate": 3.9780114089762616e-05,
|
||
|
|
"loss": 0.3054,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3115279972553253,
|
||
|
|
"step": 625,
|
||
|
|
"valid_targets_mean": 6066.2,
|
||
|
|
"valid_targets_min": 2665
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.008,
|
||
|
|
"grad_norm": 0.5061533542766917,
|
||
|
|
"learning_rate": 3.976815657070062e-05,
|
||
|
|
"loss": 0.3196,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.32069745659828186,
|
||
|
|
"step": 630,
|
||
|
|
"valid_targets_mean": 5403.9,
|
||
|
|
"valid_targets_min": 2594
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.016,
|
||
|
|
"grad_norm": 0.5059001703699166,
|
||
|
|
"learning_rate": 3.975588436774107e-05,
|
||
|
|
"loss": 0.3294,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3165131211280823,
|
||
|
|
"step": 635,
|
||
|
|
"valid_targets_mean": 6142.6,
|
||
|
|
"valid_targets_min": 647
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.024,
|
||
|
|
"grad_norm": 0.4533978910070391,
|
||
|
|
"learning_rate": 3.9743297676241826e-05,
|
||
|
|
"loss": 0.3193,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3258289396762848,
|
||
|
|
"step": 640,
|
||
|
|
"valid_targets_mean": 6012.1,
|
||
|
|
"valid_targets_min": 2008
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.032,
|
||
|
|
"grad_norm": 0.5083341694516313,
|
||
|
|
"learning_rate": 3.9730396696566994e-05,
|
||
|
|
"loss": 0.305,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31480103731155396,
|
||
|
|
"step": 645,
|
||
|
|
"valid_targets_mean": 6597.6,
|
||
|
|
"valid_targets_min": 3755
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.04,
|
||
|
|
"grad_norm": 0.4935210278014131,
|
||
|
|
"learning_rate": 3.971718163408375e-05,
|
||
|
|
"loss": 0.2977,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2967149019241333,
|
||
|
|
"step": 650,
|
||
|
|
"valid_targets_mean": 5684.5,
|
||
|
|
"valid_targets_min": 708
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.048,
|
||
|
|
"grad_norm": 0.565325705266283,
|
||
|
|
"learning_rate": 3.9703652699159093e-05,
|
||
|
|
"loss": 0.3082,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30539369583129883,
|
||
|
|
"step": 655,
|
||
|
|
"valid_targets_mean": 4551.8,
|
||
|
|
"valid_targets_min": 714
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.056,
|
||
|
|
"grad_norm": 0.4905273364614086,
|
||
|
|
"learning_rate": 3.9689810107156425e-05,
|
||
|
|
"loss": 0.3026,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29835695028305054,
|
||
|
|
"step": 660,
|
||
|
|
"valid_targets_mean": 5777.5,
|
||
|
|
"valid_targets_min": 2063
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.064,
|
||
|
|
"grad_norm": 0.4661058379526559,
|
||
|
|
"learning_rate": 3.967565407843222e-05,
|
||
|
|
"loss": 0.297,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.276365727186203,
|
||
|
|
"step": 665,
|
||
|
|
"valid_targets_mean": 5450.4,
|
||
|
|
"valid_targets_min": 2204
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.072,
|
||
|
|
"grad_norm": 0.5603469801480837,
|
||
|
|
"learning_rate": 3.966118483833242e-05,
|
||
|
|
"loss": 0.308,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.33001142740249634,
|
||
|
|
"step": 670,
|
||
|
|
"valid_targets_mean": 6528.9,
|
||
|
|
"valid_targets_min": 789
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.08,
|
||
|
|
"grad_norm": 0.5197535966327156,
|
||
|
|
"learning_rate": 3.964640261718893e-05,
|
||
|
|
"loss": 0.2989,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2953532934188843,
|
||
|
|
"step": 675,
|
||
|
|
"valid_targets_mean": 4881.6,
|
||
|
|
"valid_targets_min": 1040
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.088,
|
||
|
|
"grad_norm": 0.4678919417514522,
|
||
|
|
"learning_rate": 3.963130765031589e-05,
|
||
|
|
"loss": 0.3058,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2858159840106964,
|
||
|
|
"step": 680,
|
||
|
|
"valid_targets_mean": 5310.2,
|
||
|
|
"valid_targets_min": 2368
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.096,
|
||
|
|
"grad_norm": 0.484112524785576,
|
||
|
|
"learning_rate": 3.961590017800598e-05,
|
||
|
|
"loss": 0.314,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2966598868370056,
|
||
|
|
"step": 685,
|
||
|
|
"valid_targets_mean": 5507.7,
|
||
|
|
"valid_targets_min": 779
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.104,
|
||
|
|
"grad_norm": 0.4984597429065734,
|
||
|
|
"learning_rate": 3.960018044552653e-05,
|
||
|
|
"loss": 0.308,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29976171255111694,
|
||
|
|
"step": 690,
|
||
|
|
"valid_targets_mean": 5569.8,
|
||
|
|
"valid_targets_min": 2526
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.112,
|
||
|
|
"grad_norm": 0.4787188881097212,
|
||
|
|
"learning_rate": 3.9584148703115704e-05,
|
||
|
|
"loss": 0.308,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30830392241477966,
|
||
|
|
"step": 695,
|
||
|
|
"valid_targets_mean": 5337.4,
|
||
|
|
"valid_targets_min": 2429
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.12,
|
||
|
|
"grad_norm": 0.4902996758578751,
|
||
|
|
"learning_rate": 3.956780520597842e-05,
|
||
|
|
"loss": 0.307,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2992423474788666,
|
||
|
|
"step": 700,
|
||
|
|
"valid_targets_mean": 5177.4,
|
||
|
|
"valid_targets_min": 2765
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1280000000000001,
|
||
|
|
"grad_norm": 0.4774544301602382,
|
||
|
|
"learning_rate": 3.955115021428236e-05,
|
||
|
|
"loss": 0.3205,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31589141488075256,
|
||
|
|
"step": 705,
|
||
|
|
"valid_targets_mean": 5750.9,
|
||
|
|
"valid_targets_min": 2097
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1360000000000001,
|
||
|
|
"grad_norm": 0.4236448807576577,
|
||
|
|
"learning_rate": 3.95341839931538e-05,
|
||
|
|
"loss": 0.3056,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3106244206428528,
|
||
|
|
"step": 710,
|
||
|
|
"valid_targets_mean": 7255.8,
|
||
|
|
"valid_targets_min": 3054
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.144,
|
||
|
|
"grad_norm": 0.4668295458159508,
|
||
|
|
"learning_rate": 3.95169068126734e-05,
|
||
|
|
"loss": 0.305,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30808717012405396,
|
||
|
|
"step": 715,
|
||
|
|
"valid_targets_mean": 6632.2,
|
||
|
|
"valid_targets_min": 2729
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.152,
|
||
|
|
"grad_norm": 0.5435292950469758,
|
||
|
|
"learning_rate": 3.949931894787187e-05,
|
||
|
|
"loss": 0.3073,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3164767920970917,
|
||
|
|
"step": 720,
|
||
|
|
"valid_targets_mean": 5680.6,
|
||
|
|
"valid_targets_min": 2794
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.16,
|
||
|
|
"grad_norm": 0.5281852061014428,
|
||
|
|
"learning_rate": 3.948142067872565e-05,
|
||
|
|
"loss": 0.2976,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2959475517272949,
|
||
|
|
"step": 725,
|
||
|
|
"valid_targets_mean": 5686.9,
|
||
|
|
"valid_targets_min": 2968
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.168,
|
||
|
|
"grad_norm": 0.4983171972870052,
|
||
|
|
"learning_rate": 3.946321229015241e-05,
|
||
|
|
"loss": 0.2993,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3173370361328125,
|
||
|
|
"step": 730,
|
||
|
|
"valid_targets_mean": 5205.5,
|
||
|
|
"valid_targets_min": 809
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.176,
|
||
|
|
"grad_norm": 0.4890718645725748,
|
||
|
|
"learning_rate": 3.944469407200652e-05,
|
||
|
|
"loss": 0.3031,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3072756230831146,
|
||
|
|
"step": 735,
|
||
|
|
"valid_targets_mean": 5731.9,
|
||
|
|
"valid_targets_min": 2480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.184,
|
||
|
|
"grad_norm": 0.47357704723925165,
|
||
|
|
"learning_rate": 3.942586631907444e-05,
|
||
|
|
"loss": 0.3207,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28427958488464355,
|
||
|
|
"step": 740,
|
||
|
|
"valid_targets_mean": 6041.5,
|
||
|
|
"valid_targets_min": 2130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.192,
|
||
|
|
"grad_norm": 0.5453762286495085,
|
||
|
|
"learning_rate": 3.9406729331070054e-05,
|
||
|
|
"loss": 0.3028,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.295629620552063,
|
||
|
|
"step": 745,
|
||
|
|
"valid_targets_mean": 5579.8,
|
||
|
|
"valid_targets_min": 997
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2,
|
||
|
|
"grad_norm": 0.4852696909448581,
|
||
|
|
"learning_rate": 3.938728341262985e-05,
|
||
|
|
"loss": 0.3094,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.32111233472824097,
|
||
|
|
"step": 750,
|
||
|
|
"valid_targets_mean": 5610.9,
|
||
|
|
"valid_targets_min": 1678
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.208,
|
||
|
|
"grad_norm": 0.4935199675479707,
|
||
|
|
"learning_rate": 3.936752887330812e-05,
|
||
|
|
"loss": 0.3204,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3121909201145172,
|
||
|
|
"step": 755,
|
||
|
|
"valid_targets_mean": 5657.9,
|
||
|
|
"valid_targets_min": 3379
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.216,
|
||
|
|
"grad_norm": 0.5449908430487179,
|
||
|
|
"learning_rate": 3.9347466027571975e-05,
|
||
|
|
"loss": 0.3097,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29402291774749756,
|
||
|
|
"step": 760,
|
||
|
|
"valid_targets_mean": 5161.4,
|
||
|
|
"valid_targets_min": 1967
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.224,
|
||
|
|
"grad_norm": 0.45633103438298667,
|
||
|
|
"learning_rate": 3.932709519479639e-05,
|
||
|
|
"loss": 0.3071,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.34494248032569885,
|
||
|
|
"step": 765,
|
||
|
|
"valid_targets_mean": 6849.4,
|
||
|
|
"valid_targets_min": 4095
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.232,
|
||
|
|
"grad_norm": 0.47019749701597285,
|
||
|
|
"learning_rate": 3.930641669925911e-05,
|
||
|
|
"loss": 0.291,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30092108249664307,
|
||
|
|
"step": 770,
|
||
|
|
"valid_targets_mean": 6349.1,
|
||
|
|
"valid_targets_min": 2875
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.24,
|
||
|
|
"grad_norm": 0.5201632699890976,
|
||
|
|
"learning_rate": 3.928543087013546e-05,
|
||
|
|
"loss": 0.301,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2793954014778137,
|
||
|
|
"step": 775,
|
||
|
|
"valid_targets_mean": 4882.1,
|
||
|
|
"valid_targets_min": 1988
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.248,
|
||
|
|
"grad_norm": 0.49981243099331757,
|
||
|
|
"learning_rate": 3.926413804149315e-05,
|
||
|
|
"loss": 0.2941,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2935693860054016,
|
||
|
|
"step": 780,
|
||
|
|
"valid_targets_mean": 6399.2,
|
||
|
|
"valid_targets_min": 1814
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.256,
|
||
|
|
"grad_norm": 0.4697792529332793,
|
||
|
|
"learning_rate": 3.9242538552286894e-05,
|
||
|
|
"loss": 0.2977,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31196436285972595,
|
||
|
|
"step": 785,
|
||
|
|
"valid_targets_mean": 6547.4,
|
||
|
|
"valid_targets_min": 659
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.264,
|
||
|
|
"grad_norm": 0.5081110490095486,
|
||
|
|
"learning_rate": 3.9220632746353096e-05,
|
||
|
|
"loss": 0.3212,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.34884440898895264,
|
||
|
|
"step": 790,
|
||
|
|
"valid_targets_mean": 5839.6,
|
||
|
|
"valid_targets_min": 2563
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.272,
|
||
|
|
"grad_norm": 0.4245750286149305,
|
||
|
|
"learning_rate": 3.91984209724043e-05,
|
||
|
|
"loss": 0.2943,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31733453273773193,
|
||
|
|
"step": 795,
|
||
|
|
"valid_targets_mean": 7176.9,
|
||
|
|
"valid_targets_min": 3404
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.28,
|
||
|
|
"grad_norm": 0.4945888003308397,
|
||
|
|
"learning_rate": 3.917590358402369e-05,
|
||
|
|
"loss": 0.2997,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3026392459869385,
|
||
|
|
"step": 800,
|
||
|
|
"valid_targets_mean": 5278.9,
|
||
|
|
"valid_targets_min": 2117
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.288,
|
||
|
|
"grad_norm": 0.4743869642323437,
|
||
|
|
"learning_rate": 3.915308093965943e-05,
|
||
|
|
"loss": 0.2947,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2844420075416565,
|
||
|
|
"step": 805,
|
||
|
|
"valid_targets_mean": 5709.9,
|
||
|
|
"valid_targets_min": 2348
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.296,
|
||
|
|
"grad_norm": 0.6061193147263803,
|
||
|
|
"learning_rate": 3.9129953402618976e-05,
|
||
|
|
"loss": 0.2931,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30593141913414,
|
||
|
|
"step": 810,
|
||
|
|
"valid_targets_mean": 4334.1,
|
||
|
|
"valid_targets_min": 766
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.304,
|
||
|
|
"grad_norm": 0.4995811617110616,
|
||
|
|
"learning_rate": 3.91065213410633e-05,
|
||
|
|
"loss": 0.2994,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3021504878997803,
|
||
|
|
"step": 815,
|
||
|
|
"valid_targets_mean": 5270.4,
|
||
|
|
"valid_targets_min": 3025
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.312,
|
||
|
|
"grad_norm": 0.5677510964504177,
|
||
|
|
"learning_rate": 3.908278512800098e-05,
|
||
|
|
"loss": 0.3055,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28321537375450134,
|
||
|
|
"step": 820,
|
||
|
|
"valid_targets_mean": 5837.6,
|
||
|
|
"valid_targets_min": 1886
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.32,
|
||
|
|
"grad_norm": 0.4806450055573019,
|
||
|
|
"learning_rate": 3.905874514128235e-05,
|
||
|
|
"loss": 0.298,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.285899817943573,
|
||
|
|
"step": 825,
|
||
|
|
"valid_targets_mean": 5459.9,
|
||
|
|
"valid_targets_min": 2177
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.328,
|
||
|
|
"grad_norm": 0.4638341935119887,
|
||
|
|
"learning_rate": 3.903440176359338e-05,
|
||
|
|
"loss": 0.2993,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2896142303943634,
|
||
|
|
"step": 830,
|
||
|
|
"valid_targets_mean": 5199.6,
|
||
|
|
"valid_targets_min": 2706
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.336,
|
||
|
|
"grad_norm": 0.4829925681558631,
|
||
|
|
"learning_rate": 3.90097553824497e-05,
|
||
|
|
"loss": 0.3107,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30030563473701477,
|
||
|
|
"step": 835,
|
||
|
|
"valid_targets_mean": 5174.9,
|
||
|
|
"valid_targets_min": 1798
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3439999999999999,
|
||
|
|
"grad_norm": 0.4540116364686032,
|
||
|
|
"learning_rate": 3.8984806390190304e-05,
|
||
|
|
"loss": 0.3087,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29696711897850037,
|
||
|
|
"step": 840,
|
||
|
|
"valid_targets_mean": 6698.7,
|
||
|
|
"valid_targets_min": 2721
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3519999999999999,
|
||
|
|
"grad_norm": 0.46848218879375525,
|
||
|
|
"learning_rate": 3.895955518397141e-05,
|
||
|
|
"loss": 0.3008,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.320040225982666,
|
||
|
|
"step": 845,
|
||
|
|
"valid_targets_mean": 5730.2,
|
||
|
|
"valid_targets_min": 607
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3599999999999999,
|
||
|
|
"grad_norm": 0.46627572996018274,
|
||
|
|
"learning_rate": 3.893400216576011e-05,
|
||
|
|
"loss": 0.3076,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2929132878780365,
|
||
|
|
"step": 850,
|
||
|
|
"valid_targets_mean": 5770.9,
|
||
|
|
"valid_targets_min": 784
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3679999999999999,
|
||
|
|
"grad_norm": 0.4997177540619092,
|
||
|
|
"learning_rate": 3.89081477423279e-05,
|
||
|
|
"loss": 0.3122,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30305910110473633,
|
||
|
|
"step": 855,
|
||
|
|
"valid_targets_mean": 5442.1,
|
||
|
|
"valid_targets_min": 2508
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.376,
|
||
|
|
"grad_norm": 0.46782451496329075,
|
||
|
|
"learning_rate": 3.888199232524434e-05,
|
||
|
|
"loss": 0.3048,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27627885341644287,
|
||
|
|
"step": 860,
|
||
|
|
"valid_targets_mean": 5929.6,
|
||
|
|
"valid_targets_min": 2027
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.384,
|
||
|
|
"grad_norm": 0.48097129853488274,
|
||
|
|
"learning_rate": 3.8855536330870354e-05,
|
||
|
|
"loss": 0.2982,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3025239109992981,
|
||
|
|
"step": 865,
|
||
|
|
"valid_targets_mean": 6750.7,
|
||
|
|
"valid_targets_min": 2133
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.392,
|
||
|
|
"grad_norm": 0.6100068929323097,
|
||
|
|
"learning_rate": 3.882878018035173e-05,
|
||
|
|
"loss": 0.3036,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3252163827419281,
|
||
|
|
"step": 870,
|
||
|
|
"valid_targets_mean": 4455.0,
|
||
|
|
"valid_targets_min": 1173
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4,
|
||
|
|
"grad_norm": 0.5148159541342916,
|
||
|
|
"learning_rate": 3.880172429961232e-05,
|
||
|
|
"loss": 0.2937,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29258278012275696,
|
||
|
|
"step": 875,
|
||
|
|
"valid_targets_mean": 4651.9,
|
||
|
|
"valid_targets_min": 925
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.408,
|
||
|
|
"grad_norm": 0.5381895580511613,
|
||
|
|
"learning_rate": 3.877436911934733e-05,
|
||
|
|
"loss": 0.3109,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30743107199668884,
|
||
|
|
"step": 880,
|
||
|
|
"valid_targets_mean": 5533.6,
|
||
|
|
"valid_targets_min": 904
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.416,
|
||
|
|
"grad_norm": 0.4188291102566486,
|
||
|
|
"learning_rate": 3.874671507501641e-05,
|
||
|
|
"loss": 0.2943,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2865792512893677,
|
||
|
|
"step": 885,
|
||
|
|
"valid_targets_mean": 5537.3,
|
||
|
|
"valid_targets_min": 2931
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.424,
|
||
|
|
"grad_norm": 0.4908922548894577,
|
||
|
|
"learning_rate": 3.871876260683677e-05,
|
||
|
|
"loss": 0.3022,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.330981582403183,
|
||
|
|
"step": 890,
|
||
|
|
"valid_targets_mean": 5801.6,
|
||
|
|
"valid_targets_min": 851
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.432,
|
||
|
|
"grad_norm": 0.4646370664832961,
|
||
|
|
"learning_rate": 3.869051215977612e-05,
|
||
|
|
"loss": 0.3066,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2849675118923187,
|
||
|
|
"step": 895,
|
||
|
|
"valid_targets_mean": 6021.3,
|
||
|
|
"valid_targets_min": 2230
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.44,
|
||
|
|
"grad_norm": 0.48222675658351327,
|
||
|
|
"learning_rate": 3.8661964183545634e-05,
|
||
|
|
"loss": 0.2902,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2672026455402374,
|
||
|
|
"step": 900,
|
||
|
|
"valid_targets_mean": 5221.6,
|
||
|
|
"valid_targets_min": 954
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.448,
|
||
|
|
"grad_norm": 0.45000451698113425,
|
||
|
|
"learning_rate": 3.863311913259276e-05,
|
||
|
|
"loss": 0.2883,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2890450656414032,
|
||
|
|
"step": 905,
|
||
|
|
"valid_targets_mean": 5694.0,
|
||
|
|
"valid_targets_min": 1468
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.456,
|
||
|
|
"grad_norm": 0.5025170489351635,
|
||
|
|
"learning_rate": 3.860397746609402e-05,
|
||
|
|
"loss": 0.2979,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30842775106430054,
|
||
|
|
"step": 910,
|
||
|
|
"valid_targets_mean": 5658.1,
|
||
|
|
"valid_targets_min": 2233
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.464,
|
||
|
|
"grad_norm": 0.49901946092424515,
|
||
|
|
"learning_rate": 3.857453964794764e-05,
|
||
|
|
"loss": 0.2937,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28981584310531616,
|
||
|
|
"step": 915,
|
||
|
|
"valid_targets_mean": 4907.5,
|
||
|
|
"valid_targets_min": 945
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.472,
|
||
|
|
"grad_norm": 0.49525172496266556,
|
||
|
|
"learning_rate": 3.854480614676624e-05,
|
||
|
|
"loss": 0.2858,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2704351544380188,
|
||
|
|
"step": 920,
|
||
|
|
"valid_targets_mean": 4829.2,
|
||
|
|
"valid_targets_min": 2224
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.48,
|
||
|
|
"grad_norm": 0.4569247307054098,
|
||
|
|
"learning_rate": 3.851477743586932e-05,
|
||
|
|
"loss": 0.3053,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28220322728157043,
|
||
|
|
"step": 925,
|
||
|
|
"valid_targets_mean": 5560.4,
|
||
|
|
"valid_targets_min": 2442
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.488,
|
||
|
|
"grad_norm": 0.4870926051405198,
|
||
|
|
"learning_rate": 3.8484453993275746e-05,
|
||
|
|
"loss": 0.2875,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27977991104125977,
|
||
|
|
"step": 930,
|
||
|
|
"valid_targets_mean": 5836.9,
|
||
|
|
"valid_targets_min": 3443
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.496,
|
||
|
|
"grad_norm": 0.48160127940354946,
|
||
|
|
"learning_rate": 3.8453836301696134e-05,
|
||
|
|
"loss": 0.3102,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3110281825065613,
|
||
|
|
"step": 935,
|
||
|
|
"valid_targets_mean": 5640.1,
|
||
|
|
"valid_targets_min": 1980
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.504,
|
||
|
|
"grad_norm": 0.48779913431255106,
|
||
|
|
"learning_rate": 3.842292484852518e-05,
|
||
|
|
"loss": 0.314,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3401919901371002,
|
||
|
|
"step": 940,
|
||
|
|
"valid_targets_mean": 5965.8,
|
||
|
|
"valid_targets_min": 913
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.512,
|
||
|
|
"grad_norm": 0.4287904629804472,
|
||
|
|
"learning_rate": 3.8391720125833875e-05,
|
||
|
|
"loss": 0.3176,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.391912579536438,
|
||
|
|
"step": 945,
|
||
|
|
"valid_targets_mean": 6960.0,
|
||
|
|
"valid_targets_min": 613
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.52,
|
||
|
|
"grad_norm": 0.45454070436489985,
|
||
|
|
"learning_rate": 3.83602226303617e-05,
|
||
|
|
"loss": 0.2939,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3041311204433441,
|
||
|
|
"step": 950,
|
||
|
|
"valid_targets_mean": 6276.3,
|
||
|
|
"valid_targets_min": 1965
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.528,
|
||
|
|
"grad_norm": 0.4367609607515258,
|
||
|
|
"learning_rate": 3.83284328635087e-05,
|
||
|
|
"loss": 0.3072,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31207773089408875,
|
||
|
|
"step": 955,
|
||
|
|
"valid_targets_mean": 6414.3,
|
||
|
|
"valid_targets_min": 2158
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.536,
|
||
|
|
"grad_norm": 0.47628978925952564,
|
||
|
|
"learning_rate": 3.829635133132751e-05,
|
||
|
|
"loss": 0.2959,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3053891658782959,
|
||
|
|
"step": 960,
|
||
|
|
"valid_targets_mean": 5082.9,
|
||
|
|
"valid_targets_min": 2155
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.544,
|
||
|
|
"grad_norm": 0.4578863740375709,
|
||
|
|
"learning_rate": 3.8263978544515304e-05,
|
||
|
|
"loss": 0.2918,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29359114170074463,
|
||
|
|
"step": 965,
|
||
|
|
"valid_targets_mean": 6367.7,
|
||
|
|
"valid_targets_min": 3567
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.552,
|
||
|
|
"grad_norm": 0.44590971239621957,
|
||
|
|
"learning_rate": 3.823131501840565e-05,
|
||
|
|
"loss": 0.2945,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28205692768096924,
|
||
|
|
"step": 970,
|
||
|
|
"valid_targets_mean": 5617.2,
|
||
|
|
"valid_targets_min": 2831
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.56,
|
||
|
|
"grad_norm": 0.47647296867414674,
|
||
|
|
"learning_rate": 3.819836127296032e-05,
|
||
|
|
"loss": 0.2911,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29584789276123047,
|
||
|
|
"step": 975,
|
||
|
|
"valid_targets_mean": 5086.8,
|
||
|
|
"valid_targets_min": 2383
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.568,
|
||
|
|
"grad_norm": 0.44827088421612804,
|
||
|
|
"learning_rate": 3.8165117832761016e-05,
|
||
|
|
"loss": 0.2883,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2906496524810791,
|
||
|
|
"step": 980,
|
||
|
|
"valid_targets_mean": 5322.4,
|
||
|
|
"valid_targets_min": 925
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.576,
|
||
|
|
"grad_norm": 0.46473511615349966,
|
||
|
|
"learning_rate": 3.813158522700098e-05,
|
||
|
|
"loss": 0.2833,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3010580837726593,
|
||
|
|
"step": 985,
|
||
|
|
"valid_targets_mean": 6231.4,
|
||
|
|
"valid_targets_min": 2747
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.584,
|
||
|
|
"grad_norm": 0.46146412830588385,
|
||
|
|
"learning_rate": 3.809776398947665e-05,
|
||
|
|
"loss": 0.2904,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2865464389324188,
|
||
|
|
"step": 990,
|
||
|
|
"valid_targets_mean": 5287.1,
|
||
|
|
"valid_targets_min": 2584
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.592,
|
||
|
|
"grad_norm": 0.41928387386535865,
|
||
|
|
"learning_rate": 3.806365465857908e-05,
|
||
|
|
"loss": 0.2869,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2612549960613251,
|
||
|
|
"step": 995,
|
||
|
|
"valid_targets_mean": 6553.7,
|
||
|
|
"valid_targets_min": 2261
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6,
|
||
|
|
"grad_norm": 0.4446606668239555,
|
||
|
|
"learning_rate": 3.802925777728541e-05,
|
||
|
|
"loss": 0.297,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29210934042930603,
|
||
|
|
"step": 1000,
|
||
|
|
"valid_targets_mean": 5958.9,
|
||
|
|
"valid_targets_min": 2292
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.608,
|
||
|
|
"grad_norm": 0.4596824212829141,
|
||
|
|
"learning_rate": 3.799457389315023e-05,
|
||
|
|
"loss": 0.2958,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2969397306442261,
|
||
|
|
"step": 1005,
|
||
|
|
"valid_targets_mean": 5465.1,
|
||
|
|
"valid_targets_min": 2633
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.616,
|
||
|
|
"grad_norm": 0.4642174072728452,
|
||
|
|
"learning_rate": 3.795960355829683e-05,
|
||
|
|
"loss": 0.2937,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3383893370628357,
|
||
|
|
"step": 1010,
|
||
|
|
"valid_targets_mean": 5533.6,
|
||
|
|
"valid_targets_min": 1543
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.624,
|
||
|
|
"grad_norm": 0.41923015410914755,
|
||
|
|
"learning_rate": 3.7924347329408444e-05,
|
||
|
|
"loss": 0.2861,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2893693447113037,
|
||
|
|
"step": 1015,
|
||
|
|
"valid_targets_mean": 6553.8,
|
||
|
|
"valid_targets_min": 2738
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6320000000000001,
|
||
|
|
"grad_norm": 0.474798883715657,
|
||
|
|
"learning_rate": 3.788880576771937e-05,
|
||
|
|
"loss": 0.2955,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3074151873588562,
|
||
|
|
"step": 1020,
|
||
|
|
"valid_targets_mean": 5287.8,
|
||
|
|
"valid_targets_min": 920
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6400000000000001,
|
||
|
|
"grad_norm": 0.4859351138274312,
|
||
|
|
"learning_rate": 3.785297943900605e-05,
|
||
|
|
"loss": 0.293,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2984621822834015,
|
||
|
|
"step": 1025,
|
||
|
|
"valid_targets_mean": 4972.1,
|
||
|
|
"valid_targets_min": 3064
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6480000000000001,
|
||
|
|
"grad_norm": 0.45439722658451603,
|
||
|
|
"learning_rate": 3.7816868913578044e-05,
|
||
|
|
"loss": 0.2915,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29881051182746887,
|
||
|
|
"step": 1030,
|
||
|
|
"valid_targets_mean": 6891.9,
|
||
|
|
"valid_targets_min": 797
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6560000000000001,
|
||
|
|
"grad_norm": 0.4954789496970954,
|
||
|
|
"learning_rate": 3.778047476626897e-05,
|
||
|
|
"loss": 0.2886,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27837109565734863,
|
||
|
|
"step": 1035,
|
||
|
|
"valid_targets_mean": 4826.8,
|
||
|
|
"valid_targets_min": 2124
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6640000000000001,
|
||
|
|
"grad_norm": 0.4486524659460757,
|
||
|
|
"learning_rate": 3.7743797576427335e-05,
|
||
|
|
"loss": 0.298,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2992919683456421,
|
||
|
|
"step": 1040,
|
||
|
|
"valid_targets_mean": 5997.5,
|
||
|
|
"valid_targets_min": 1923
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6720000000000002,
|
||
|
|
"grad_norm": 0.4744725016895598,
|
||
|
|
"learning_rate": 3.770683792790733e-05,
|
||
|
|
"loss": 0.3074,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.35754770040512085,
|
||
|
|
"step": 1045,
|
||
|
|
"valid_targets_mean": 5574.3,
|
||
|
|
"valid_targets_min": 2306
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6800000000000002,
|
||
|
|
"grad_norm": 0.4619485199478231,
|
||
|
|
"learning_rate": 3.766959640905954e-05,
|
||
|
|
"loss": 0.294,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26582252979278564,
|
||
|
|
"step": 1050,
|
||
|
|
"valid_targets_mean": 4876.6,
|
||
|
|
"valid_targets_min": 2432
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.688,
|
||
|
|
"grad_norm": 0.46452806236395455,
|
||
|
|
"learning_rate": 3.763207361272153e-05,
|
||
|
|
"loss": 0.2832,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2735568583011627,
|
||
|
|
"step": 1055,
|
||
|
|
"valid_targets_mean": 4596.1,
|
||
|
|
"valid_targets_min": 862
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.696,
|
||
|
|
"grad_norm": 0.4613970271174457,
|
||
|
|
"learning_rate": 3.759427013620849e-05,
|
||
|
|
"loss": 0.2938,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30159851908683777,
|
||
|
|
"step": 1060,
|
||
|
|
"valid_targets_mean": 5337.3,
|
||
|
|
"valid_targets_min": 2797
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.704,
|
||
|
|
"grad_norm": 0.45254038624818077,
|
||
|
|
"learning_rate": 3.755618658130366e-05,
|
||
|
|
"loss": 0.301,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.332949697971344,
|
||
|
|
"step": 1065,
|
||
|
|
"valid_targets_mean": 6322.7,
|
||
|
|
"valid_targets_min": 2395
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.712,
|
||
|
|
"grad_norm": 0.46132268024374845,
|
||
|
|
"learning_rate": 3.751782355424877e-05,
|
||
|
|
"loss": 0.3057,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3081086277961731,
|
||
|
|
"step": 1070,
|
||
|
|
"valid_targets_mean": 5333.1,
|
||
|
|
"valid_targets_min": 676
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.72,
|
||
|
|
"grad_norm": 0.647810915100628,
|
||
|
|
"learning_rate": 3.7479181665734395e-05,
|
||
|
|
"loss": 0.2928,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2798285484313965,
|
||
|
|
"step": 1075,
|
||
|
|
"valid_targets_mean": 4865.9,
|
||
|
|
"valid_targets_min": 768
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.728,
|
||
|
|
"grad_norm": 0.4595256488279494,
|
||
|
|
"learning_rate": 3.7440261530890213e-05,
|
||
|
|
"loss": 0.3039,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30216994881629944,
|
||
|
|
"step": 1080,
|
||
|
|
"valid_targets_mean": 5566.8,
|
||
|
|
"valid_targets_min": 3165
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.736,
|
||
|
|
"grad_norm": 0.5173055386818666,
|
||
|
|
"learning_rate": 3.740106376927527e-05,
|
||
|
|
"loss": 0.2991,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26667511463165283,
|
||
|
|
"step": 1085,
|
||
|
|
"valid_targets_mean": 4258.4,
|
||
|
|
"valid_targets_min": 2453
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.744,
|
||
|
|
"grad_norm": 0.44729287065917317,
|
||
|
|
"learning_rate": 3.7361589004868035e-05,
|
||
|
|
"loss": 0.3005,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29991859197616577,
|
||
|
|
"step": 1090,
|
||
|
|
"valid_targets_mean": 5005.5,
|
||
|
|
"valid_targets_min": 1317
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.752,
|
||
|
|
"grad_norm": 0.4502413369795436,
|
||
|
|
"learning_rate": 3.7321837866056535e-05,
|
||
|
|
"loss": 0.3066,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.311065137386322,
|
||
|
|
"step": 1095,
|
||
|
|
"valid_targets_mean": 5624.9,
|
||
|
|
"valid_targets_min": 2808
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.76,
|
||
|
|
"grad_norm": 0.44388049018525816,
|
||
|
|
"learning_rate": 3.728181098562831e-05,
|
||
|
|
"loss": 0.3084,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.32409796118736267,
|
||
|
|
"step": 1100,
|
||
|
|
"valid_targets_mean": 5841.8,
|
||
|
|
"valid_targets_min": 742
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.768,
|
||
|
|
"grad_norm": 0.4946267811455118,
|
||
|
|
"learning_rate": 3.7241509000760355e-05,
|
||
|
|
"loss": 0.3005,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3057519793510437,
|
||
|
|
"step": 1105,
|
||
|
|
"valid_targets_mean": 5293.1,
|
||
|
|
"valid_targets_min": 2960
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.776,
|
||
|
|
"grad_norm": 0.48440289350790505,
|
||
|
|
"learning_rate": 3.720093255300899e-05,
|
||
|
|
"loss": 0.2956,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29010993242263794,
|
||
|
|
"step": 1110,
|
||
|
|
"valid_targets_mean": 5239.8,
|
||
|
|
"valid_targets_min": 1797
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.784,
|
||
|
|
"grad_norm": 0.42075171272953676,
|
||
|
|
"learning_rate": 3.7160082288299645e-05,
|
||
|
|
"loss": 0.2915,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29798623919487,
|
||
|
|
"step": 1115,
|
||
|
|
"valid_targets_mean": 5845.2,
|
||
|
|
"valid_targets_min": 2606
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.792,
|
||
|
|
"grad_norm": 0.44876314392122135,
|
||
|
|
"learning_rate": 3.7118958856916534e-05,
|
||
|
|
"loss": 0.2955,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.317965030670166,
|
||
|
|
"step": 1120,
|
||
|
|
"valid_targets_mean": 5691.3,
|
||
|
|
"valid_targets_min": 729
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8,
|
||
|
|
"grad_norm": 0.4562553068597693,
|
||
|
|
"learning_rate": 3.707756291349237e-05,
|
||
|
|
"loss": 0.304,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3572045564651489,
|
||
|
|
"step": 1125,
|
||
|
|
"valid_targets_mean": 6273.2,
|
||
|
|
"valid_targets_min": 3036
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.808,
|
||
|
|
"grad_norm": 0.4654698361735082,
|
||
|
|
"learning_rate": 3.703589511699787e-05,
|
||
|
|
"loss": 0.2872,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2775658369064331,
|
||
|
|
"step": 1130,
|
||
|
|
"valid_targets_mean": 5154.9,
|
||
|
|
"valid_targets_min": 1883
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8159999999999998,
|
||
|
|
"grad_norm": 0.4585691012158252,
|
||
|
|
"learning_rate": 3.6993956130731355e-05,
|
||
|
|
"loss": 0.2907,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2992740571498871,
|
||
|
|
"step": 1135,
|
||
|
|
"valid_targets_mean": 5435.6,
|
||
|
|
"valid_targets_min": 2525
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8239999999999998,
|
||
|
|
"grad_norm": 0.423001138385012,
|
||
|
|
"learning_rate": 3.6951746622308106e-05,
|
||
|
|
"loss": 0.2952,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28858619928359985,
|
||
|
|
"step": 1140,
|
||
|
|
"valid_targets_mean": 5687.1,
|
||
|
|
"valid_targets_min": 926
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8319999999999999,
|
||
|
|
"grad_norm": 0.43450074990970605,
|
||
|
|
"learning_rate": 3.69092672636498e-05,
|
||
|
|
"loss": 0.2977,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3227563500404358,
|
||
|
|
"step": 1145,
|
||
|
|
"valid_targets_mean": 7171.9,
|
||
|
|
"valid_targets_min": 4053
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8399999999999999,
|
||
|
|
"grad_norm": 0.45663335612023054,
|
||
|
|
"learning_rate": 3.686651873097375e-05,
|
||
|
|
"loss": 0.2884,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3199521601200104,
|
||
|
|
"step": 1150,
|
||
|
|
"valid_targets_mean": 6205.1,
|
||
|
|
"valid_targets_min": 1888
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8479999999999999,
|
||
|
|
"grad_norm": 0.40345881129581174,
|
||
|
|
"learning_rate": 3.682350170478223e-05,
|
||
|
|
"loss": 0.2798,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2745388150215149,
|
||
|
|
"step": 1155,
|
||
|
|
"valid_targets_mean": 6815.2,
|
||
|
|
"valid_targets_min": 2696
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8559999999999999,
|
||
|
|
"grad_norm": 0.4739803045161771,
|
||
|
|
"learning_rate": 3.678021686985153e-05,
|
||
|
|
"loss": 0.2906,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3329041302204132,
|
||
|
|
"step": 1160,
|
||
|
|
"valid_targets_mean": 6181.8,
|
||
|
|
"valid_targets_min": 2589
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8639999999999999,
|
||
|
|
"grad_norm": 0.4119410013072989,
|
||
|
|
"learning_rate": 3.6736664915221144e-05,
|
||
|
|
"loss": 0.2884,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29115116596221924,
|
||
|
|
"step": 1165,
|
||
|
|
"valid_targets_mean": 6576.3,
|
||
|
|
"valid_targets_min": 965
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8719999999999999,
|
||
|
|
"grad_norm": 0.4383779875712353,
|
||
|
|
"learning_rate": 3.669284653418278e-05,
|
||
|
|
"loss": 0.2998,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30039533972740173,
|
||
|
|
"step": 1170,
|
||
|
|
"valid_targets_mean": 6318.1,
|
||
|
|
"valid_targets_min": 3353
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.88,
|
||
|
|
"grad_norm": 0.4478554027571998,
|
||
|
|
"learning_rate": 3.6648762424269306e-05,
|
||
|
|
"loss": 0.2869,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2735443115234375,
|
||
|
|
"step": 1175,
|
||
|
|
"valid_targets_mean": 5188.6,
|
||
|
|
"valid_targets_min": 2463
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.888,
|
||
|
|
"grad_norm": 0.4967663577542887,
|
||
|
|
"learning_rate": 3.660441328724365e-05,
|
||
|
|
"loss": 0.2988,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2868739366531372,
|
||
|
|
"step": 1180,
|
||
|
|
"valid_targets_mean": 6199.9,
|
||
|
|
"valid_targets_min": 2365
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.896,
|
||
|
|
"grad_norm": 0.4544842757439779,
|
||
|
|
"learning_rate": 3.655979982908764e-05,
|
||
|
|
"loss": 0.2906,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2722131609916687,
|
||
|
|
"step": 1185,
|
||
|
|
"valid_targets_mean": 4829.0,
|
||
|
|
"valid_targets_min": 1607
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.904,
|
||
|
|
"grad_norm": 0.4312917127023269,
|
||
|
|
"learning_rate": 3.6514922759990756e-05,
|
||
|
|
"loss": 0.2871,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30703186988830566,
|
||
|
|
"step": 1190,
|
||
|
|
"valid_targets_mean": 5894.9,
|
||
|
|
"valid_targets_min": 2382
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.912,
|
||
|
|
"grad_norm": 0.4773885555314306,
|
||
|
|
"learning_rate": 3.646978279433883e-05,
|
||
|
|
"loss": 0.2924,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.275803804397583,
|
||
|
|
"step": 1195,
|
||
|
|
"valid_targets_mean": 5051.6,
|
||
|
|
"valid_targets_min": 2287
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.92,
|
||
|
|
"grad_norm": 0.4441897580352912,
|
||
|
|
"learning_rate": 3.6424380650702685e-05,
|
||
|
|
"loss": 0.2955,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2826846241950989,
|
||
|
|
"step": 1200,
|
||
|
|
"valid_targets_mean": 5370.1,
|
||
|
|
"valid_targets_min": 1760
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.928,
|
||
|
|
"grad_norm": 0.4612436316301898,
|
||
|
|
"learning_rate": 3.637871705182667e-05,
|
||
|
|
"loss": 0.2878,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2725675702095032,
|
||
|
|
"step": 1205,
|
||
|
|
"valid_targets_mean": 4786.4,
|
||
|
|
"valid_targets_min": 2658
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.936,
|
||
|
|
"grad_norm": 0.5513342663258486,
|
||
|
|
"learning_rate": 3.633279272461717e-05,
|
||
|
|
"loss": 0.2921,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3400229513645172,
|
||
|
|
"step": 1210,
|
||
|
|
"valid_targets_mean": 5965.6,
|
||
|
|
"valid_targets_min": 833
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.944,
|
||
|
|
"grad_norm": 0.4419992875343551,
|
||
|
|
"learning_rate": 3.628660840013102e-05,
|
||
|
|
"loss": 0.2779,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2913501262664795,
|
||
|
|
"step": 1215,
|
||
|
|
"valid_targets_mean": 5585.3,
|
||
|
|
"valid_targets_min": 2383
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.952,
|
||
|
|
"grad_norm": 0.4328857845662596,
|
||
|
|
"learning_rate": 3.624016481356392e-05,
|
||
|
|
"loss": 0.2881,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29528385400772095,
|
||
|
|
"step": 1220,
|
||
|
|
"valid_targets_mean": 5912.7,
|
||
|
|
"valid_targets_min": 3592
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.96,
|
||
|
|
"grad_norm": 0.4729340584130962,
|
||
|
|
"learning_rate": 3.619346270423866e-05,
|
||
|
|
"loss": 0.2989,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3194947838783264,
|
||
|
|
"step": 1225,
|
||
|
|
"valid_targets_mean": 5515.3,
|
||
|
|
"valid_targets_min": 2746
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.968,
|
||
|
|
"grad_norm": 0.42437072854473223,
|
||
|
|
"learning_rate": 3.6146502815593384e-05,
|
||
|
|
"loss": 0.2885,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2815873622894287,
|
||
|
|
"step": 1230,
|
||
|
|
"valid_targets_mean": 5601.3,
|
||
|
|
"valid_targets_min": 2488
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.976,
|
||
|
|
"grad_norm": 0.41289542578032434,
|
||
|
|
"learning_rate": 3.609928589516977e-05,
|
||
|
|
"loss": 0.3137,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2931348979473114,
|
||
|
|
"step": 1235,
|
||
|
|
"valid_targets_mean": 6282.1,
|
||
|
|
"valid_targets_min": 863
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.984,
|
||
|
|
"grad_norm": 0.5321386208096575,
|
||
|
|
"learning_rate": 3.6051812694601114e-05,
|
||
|
|
"loss": 0.2893,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28520435094833374,
|
||
|
|
"step": 1240,
|
||
|
|
"valid_targets_mean": 6663.6,
|
||
|
|
"valid_targets_min": 2058
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.992,
|
||
|
|
"grad_norm": 0.482697618696399,
|
||
|
|
"learning_rate": 3.6004083969600346e-05,
|
||
|
|
"loss": 0.2972,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2983943223953247,
|
||
|
|
"step": 1245,
|
||
|
|
"valid_targets_mean": 4540.4,
|
||
|
|
"valid_targets_min": 834
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0,
|
||
|
|
"grad_norm": 0.49192629172974606,
|
||
|
|
"learning_rate": 3.595610047994804e-05,
|
||
|
|
"loss": 0.2908,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2858894169330597,
|
||
|
|
"step": 1250,
|
||
|
|
"valid_targets_mean": 5028.2,
|
||
|
|
"valid_targets_min": 1660
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.008,
|
||
|
|
"grad_norm": 0.4359872718221418,
|
||
|
|
"learning_rate": 3.5907862989480285e-05,
|
||
|
|
"loss": 0.2748,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2697839140892029,
|
||
|
|
"step": 1255,
|
||
|
|
"valid_targets_mean": 7249.1,
|
||
|
|
"valid_targets_min": 2496
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.016,
|
||
|
|
"grad_norm": 0.45859795145980614,
|
||
|
|
"learning_rate": 3.585937226607656e-05,
|
||
|
|
"loss": 0.2655,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.279720664024353,
|
||
|
|
"step": 1260,
|
||
|
|
"valid_targets_mean": 5313.3,
|
||
|
|
"valid_targets_min": 2647
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.024,
|
||
|
|
"grad_norm": 0.4505438704272417,
|
||
|
|
"learning_rate": 3.5810629081647476e-05,
|
||
|
|
"loss": 0.2721,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2788507044315338,
|
||
|
|
"step": 1265,
|
||
|
|
"valid_targets_mean": 6312.9,
|
||
|
|
"valid_targets_min": 2771
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.032,
|
||
|
|
"grad_norm": 0.4691711148608988,
|
||
|
|
"learning_rate": 3.576163421212249e-05,
|
||
|
|
"loss": 0.2664,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2736594080924988,
|
||
|
|
"step": 1270,
|
||
|
|
"valid_targets_mean": 4661.1,
|
||
|
|
"valid_targets_min": 578
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.04,
|
||
|
|
"grad_norm": 0.5037167340395163,
|
||
|
|
"learning_rate": 3.5712388437437576e-05,
|
||
|
|
"loss": 0.2772,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2533131241798401,
|
||
|
|
"step": 1275,
|
||
|
|
"valid_targets_mean": 5276.2,
|
||
|
|
"valid_targets_min": 3433
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.048,
|
||
|
|
"grad_norm": 0.42546930787191156,
|
||
|
|
"learning_rate": 3.566289254152283e-05,
|
||
|
|
"loss": 0.2784,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2628275752067566,
|
||
|
|
"step": 1280,
|
||
|
|
"valid_targets_mean": 6090.3,
|
||
|
|
"valid_targets_min": 3340
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.056,
|
||
|
|
"grad_norm": 0.42930908716917526,
|
||
|
|
"learning_rate": 3.56131473122899e-05,
|
||
|
|
"loss": 0.2816,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27324312925338745,
|
||
|
|
"step": 1285,
|
||
|
|
"valid_targets_mean": 5684.6,
|
||
|
|
"valid_targets_min": 766
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.064,
|
||
|
|
"grad_norm": 0.49385851729689484,
|
||
|
|
"learning_rate": 3.556315354161955e-05,
|
||
|
|
"loss": 0.2653,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26746314764022827,
|
||
|
|
"step": 1290,
|
||
|
|
"valid_targets_mean": 5008.8,
|
||
|
|
"valid_targets_min": 2139
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.072,
|
||
|
|
"grad_norm": 0.4391947070222829,
|
||
|
|
"learning_rate": 3.551291202534899e-05,
|
||
|
|
"loss": 0.2718,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27265650033950806,
|
||
|
|
"step": 1295,
|
||
|
|
"valid_targets_mean": 5607.2,
|
||
|
|
"valid_targets_min": 2190
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.08,
|
||
|
|
"grad_norm": 0.4582254368398922,
|
||
|
|
"learning_rate": 3.546242356325922e-05,
|
||
|
|
"loss": 0.2898,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2865563631057739,
|
||
|
|
"step": 1300,
|
||
|
|
"valid_targets_mean": 5633.8,
|
||
|
|
"valid_targets_min": 687
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.088,
|
||
|
|
"grad_norm": 0.4293498674993555,
|
||
|
|
"learning_rate": 3.5411688959062323e-05,
|
||
|
|
"loss": 0.2797,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2655894458293915,
|
||
|
|
"step": 1305,
|
||
|
|
"valid_targets_mean": 5344.7,
|
||
|
|
"valid_targets_min": 2540
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.096,
|
||
|
|
"grad_norm": 0.4332699663002504,
|
||
|
|
"learning_rate": 3.5360709020388625e-05,
|
||
|
|
"loss": 0.2908,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2839583158493042,
|
||
|
|
"step": 1310,
|
||
|
|
"valid_targets_mean": 6193.4,
|
||
|
|
"valid_targets_min": 2130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.104,
|
||
|
|
"grad_norm": 0.4796757683247767,
|
||
|
|
"learning_rate": 3.530948455877388e-05,
|
||
|
|
"loss": 0.2852,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2717803120613098,
|
||
|
|
"step": 1315,
|
||
|
|
"valid_targets_mean": 5138.0,
|
||
|
|
"valid_targets_min": 2010
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.112,
|
||
|
|
"grad_norm": 0.474723248917543,
|
||
|
|
"learning_rate": 3.525801638964634e-05,
|
||
|
|
"loss": 0.2862,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3030173182487488,
|
||
|
|
"step": 1320,
|
||
|
|
"valid_targets_mean": 5644.9,
|
||
|
|
"valid_targets_min": 1939
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.12,
|
||
|
|
"grad_norm": 0.41894104802521787,
|
||
|
|
"learning_rate": 3.520630533231376e-05,
|
||
|
|
"loss": 0.276,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30031317472457886,
|
||
|
|
"step": 1325,
|
||
|
|
"valid_targets_mean": 6236.9,
|
||
|
|
"valid_targets_min": 2499
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.128,
|
||
|
|
"grad_norm": 0.44950239662521674,
|
||
|
|
"learning_rate": 3.5154352209950376e-05,
|
||
|
|
"loss": 0.2759,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29977887868881226,
|
||
|
|
"step": 1330,
|
||
|
|
"valid_targets_mean": 6185.9,
|
||
|
|
"valid_targets_min": 2168
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.136,
|
||
|
|
"grad_norm": 0.43075680464817284,
|
||
|
|
"learning_rate": 3.510215784958376e-05,
|
||
|
|
"loss": 0.2802,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25842052698135376,
|
||
|
|
"step": 1335,
|
||
|
|
"valid_targets_mean": 5626.4,
|
||
|
|
"valid_targets_min": 2212
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.144,
|
||
|
|
"grad_norm": 0.4015925556150186,
|
||
|
|
"learning_rate": 3.5049723082081755e-05,
|
||
|
|
"loss": 0.2776,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2656909227371216,
|
||
|
|
"step": 1340,
|
||
|
|
"valid_targets_mean": 7547.5,
|
||
|
|
"valid_targets_min": 4034
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.152,
|
||
|
|
"grad_norm": 0.43393949690284467,
|
||
|
|
"learning_rate": 3.49970487421391e-05,
|
||
|
|
"loss": 0.303,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3003223240375519,
|
||
|
|
"step": 1345,
|
||
|
|
"valid_targets_mean": 6083.6,
|
||
|
|
"valid_targets_min": 1981
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.16,
|
||
|
|
"grad_norm": 0.43827694971056175,
|
||
|
|
"learning_rate": 3.494413566826427e-05,
|
||
|
|
"loss": 0.2736,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26340213418006897,
|
||
|
|
"step": 1350,
|
||
|
|
"valid_targets_mean": 6010.8,
|
||
|
|
"valid_targets_min": 1962
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.168,
|
||
|
|
"grad_norm": 0.40727563091955427,
|
||
|
|
"learning_rate": 3.489098470276608e-05,
|
||
|
|
"loss": 0.2734,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2752530574798584,
|
||
|
|
"step": 1355,
|
||
|
|
"valid_targets_mean": 6446.7,
|
||
|
|
"valid_targets_min": 2515
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.176,
|
||
|
|
"grad_norm": 0.4075018940370257,
|
||
|
|
"learning_rate": 3.483759669174024e-05,
|
||
|
|
"loss": 0.2759,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2670140266418457,
|
||
|
|
"step": 1360,
|
||
|
|
"valid_targets_mean": 6044.2,
|
||
|
|
"valid_targets_min": 2033
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.184,
|
||
|
|
"grad_norm": 0.4458150382743038,
|
||
|
|
"learning_rate": 3.478397248505598e-05,
|
||
|
|
"loss": 0.2777,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28183412551879883,
|
||
|
|
"step": 1365,
|
||
|
|
"valid_targets_mean": 5179.1,
|
||
|
|
"valid_targets_min": 2292
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.192,
|
||
|
|
"grad_norm": 0.441118210464986,
|
||
|
|
"learning_rate": 3.473011293634241e-05,
|
||
|
|
"loss": 0.2705,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2754057049751282,
|
||
|
|
"step": 1370,
|
||
|
|
"valid_targets_mean": 5369.0,
|
||
|
|
"valid_targets_min": 582
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2,
|
||
|
|
"grad_norm": 0.4614185627987562,
|
||
|
|
"learning_rate": 3.467601890297502e-05,
|
||
|
|
"loss": 0.2788,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30023840069770813,
|
||
|
|
"step": 1375,
|
||
|
|
"valid_targets_mean": 6044.8,
|
||
|
|
"valid_targets_min": 2461
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.208,
|
||
|
|
"grad_norm": 0.4493461757147211,
|
||
|
|
"learning_rate": 3.4621691246061976e-05,
|
||
|
|
"loss": 0.2769,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2876693606376648,
|
||
|
|
"step": 1380,
|
||
|
|
"valid_targets_mean": 6722.2,
|
||
|
|
"valid_targets_min": 2520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.216,
|
||
|
|
"grad_norm": 0.5466147214995866,
|
||
|
|
"learning_rate": 3.456713083043046e-05,
|
||
|
|
"loss": 0.2863,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2818297743797302,
|
||
|
|
"step": 1385,
|
||
|
|
"valid_targets_mean": 6499.2,
|
||
|
|
"valid_targets_min": 2419
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.224,
|
||
|
|
"grad_norm": 0.41989719260316044,
|
||
|
|
"learning_rate": 3.451233852461285e-05,
|
||
|
|
"loss": 0.2907,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30823421478271484,
|
||
|
|
"step": 1390,
|
||
|
|
"valid_targets_mean": 6820.2,
|
||
|
|
"valid_targets_min": 3289
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.232,
|
||
|
|
"grad_norm": 0.43771545385689653,
|
||
|
|
"learning_rate": 3.4457315200832935e-05,
|
||
|
|
"loss": 0.2816,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2757190465927124,
|
||
|
|
"step": 1395,
|
||
|
|
"valid_targets_mean": 5641.8,
|
||
|
|
"valid_targets_min": 2220
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.24,
|
||
|
|
"grad_norm": 0.4799367692033165,
|
||
|
|
"learning_rate": 3.440206173499201e-05,
|
||
|
|
"loss": 0.2618,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28247034549713135,
|
||
|
|
"step": 1400,
|
||
|
|
"valid_targets_mean": 5592.2,
|
||
|
|
"valid_targets_min": 3292
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.248,
|
||
|
|
"grad_norm": 0.43043323612593953,
|
||
|
|
"learning_rate": 3.4346579006654945e-05,
|
||
|
|
"loss": 0.2615,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2743549048900604,
|
||
|
|
"step": 1405,
|
||
|
|
"valid_targets_mean": 5858.2,
|
||
|
|
"valid_targets_min": 2022
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2560000000000002,
|
||
|
|
"grad_norm": 0.4205959013545583,
|
||
|
|
"learning_rate": 3.4290867899036166e-05,
|
||
|
|
"loss": 0.282,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30858612060546875,
|
||
|
|
"step": 1410,
|
||
|
|
"valid_targets_mean": 6465.2,
|
||
|
|
"valid_targets_min": 3118
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2640000000000002,
|
||
|
|
"grad_norm": 0.3702037241290151,
|
||
|
|
"learning_rate": 3.4234929298985614e-05,
|
||
|
|
"loss": 0.277,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27199870347976685,
|
||
|
|
"step": 1415,
|
||
|
|
"valid_targets_mean": 6840.3,
|
||
|
|
"valid_targets_min": 3274
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2720000000000002,
|
||
|
|
"grad_norm": 0.49042099459027266,
|
||
|
|
"learning_rate": 3.417876409697463e-05,
|
||
|
|
"loss": 0.281,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2543753981590271,
|
||
|
|
"step": 1420,
|
||
|
|
"valid_targets_mean": 4505.9,
|
||
|
|
"valid_targets_min": 627
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2800000000000002,
|
||
|
|
"grad_norm": 0.4676067529924004,
|
||
|
|
"learning_rate": 3.412237318708175e-05,
|
||
|
|
"loss": 0.2908,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3017064929008484,
|
||
|
|
"step": 1425,
|
||
|
|
"valid_targets_mean": 5715.6,
|
||
|
|
"valid_targets_min": 2442
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.288,
|
||
|
|
"grad_norm": 0.44794340952190315,
|
||
|
|
"learning_rate": 3.4065757466978504e-05,
|
||
|
|
"loss": 0.2776,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2632843852043152,
|
||
|
|
"step": 1430,
|
||
|
|
"valid_targets_mean": 5181.3,
|
||
|
|
"valid_targets_min": 737
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.296,
|
||
|
|
"grad_norm": 0.4515048985651608,
|
||
|
|
"learning_rate": 3.400891783791511e-05,
|
||
|
|
"loss": 0.2746,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26894891262054443,
|
||
|
|
"step": 1435,
|
||
|
|
"valid_targets_mean": 5134.5,
|
||
|
|
"valid_targets_min": 2738
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.304,
|
||
|
|
"grad_norm": 0.4210263389946454,
|
||
|
|
"learning_rate": 3.395185520470614e-05,
|
||
|
|
"loss": 0.277,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2625139355659485,
|
||
|
|
"step": 1440,
|
||
|
|
"valid_targets_mean": 5573.9,
|
||
|
|
"valid_targets_min": 2412
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.312,
|
||
|
|
"grad_norm": 0.45646353517684174,
|
||
|
|
"learning_rate": 3.38945704757161e-05,
|
||
|
|
"loss": 0.293,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29955101013183594,
|
||
|
|
"step": 1445,
|
||
|
|
"valid_targets_mean": 4880.6,
|
||
|
|
"valid_targets_min": 2246
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.32,
|
||
|
|
"grad_norm": 0.4648922907575776,
|
||
|
|
"learning_rate": 3.383706456284498e-05,
|
||
|
|
"loss": 0.2728,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2535097897052765,
|
||
|
|
"step": 1450,
|
||
|
|
"valid_targets_mean": 4900.8,
|
||
|
|
"valid_targets_min": 2258
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.328,
|
||
|
|
"grad_norm": 0.414919522096783,
|
||
|
|
"learning_rate": 3.377933838151374e-05,
|
||
|
|
"loss": 0.2793,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27614641189575195,
|
||
|
|
"step": 1455,
|
||
|
|
"valid_targets_mean": 5782.2,
|
||
|
|
"valid_targets_min": 2233
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.336,
|
||
|
|
"grad_norm": 0.4721401237719625,
|
||
|
|
"learning_rate": 3.3721392850649714e-05,
|
||
|
|
"loss": 0.2854,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.279009073972702,
|
||
|
|
"step": 1460,
|
||
|
|
"valid_targets_mean": 5091.2,
|
||
|
|
"valid_targets_min": 2956
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.344,
|
||
|
|
"grad_norm": 0.3980236596511246,
|
||
|
|
"learning_rate": 3.3663228892672034e-05,
|
||
|
|
"loss": 0.2733,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30130547285079956,
|
||
|
|
"step": 1465,
|
||
|
|
"valid_targets_mean": 6822.2,
|
||
|
|
"valid_targets_min": 1767
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.352,
|
||
|
|
"grad_norm": 0.4411737648188492,
|
||
|
|
"learning_rate": 3.36048474334769e-05,
|
||
|
|
"loss": 0.2769,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28683120012283325,
|
||
|
|
"step": 1470,
|
||
|
|
"valid_targets_mean": 5142.1,
|
||
|
|
"valid_targets_min": 921
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.36,
|
||
|
|
"grad_norm": 0.40418223383565033,
|
||
|
|
"learning_rate": 3.3546249402422834e-05,
|
||
|
|
"loss": 0.2906,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27696824073791504,
|
||
|
|
"step": 1475,
|
||
|
|
"valid_targets_mean": 6867.9,
|
||
|
|
"valid_targets_min": 2408
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.368,
|
||
|
|
"grad_norm": 0.4328314194872318,
|
||
|
|
"learning_rate": 3.3487435732315944e-05,
|
||
|
|
"loss": 0.2785,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26666712760925293,
|
||
|
|
"step": 1480,
|
||
|
|
"valid_targets_mean": 5044.9,
|
||
|
|
"valid_targets_min": 1312
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.376,
|
||
|
|
"grad_norm": 0.4188243659370168,
|
||
|
|
"learning_rate": 3.342840735939501e-05,
|
||
|
|
"loss": 0.2811,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27887147665023804,
|
||
|
|
"step": 1485,
|
||
|
|
"valid_targets_mean": 6185.2,
|
||
|
|
"valid_targets_min": 3188
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.384,
|
||
|
|
"grad_norm": 0.4525280463154427,
|
||
|
|
"learning_rate": 3.33691652233166e-05,
|
||
|
|
"loss": 0.2751,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2639387249946594,
|
||
|
|
"step": 1490,
|
||
|
|
"valid_targets_mean": 5238.1,
|
||
|
|
"valid_targets_min": 2297
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.392,
|
||
|
|
"grad_norm": 0.40754755046085145,
|
||
|
|
"learning_rate": 3.330971026714016e-05,
|
||
|
|
"loss": 0.2783,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27780595421791077,
|
||
|
|
"step": 1495,
|
||
|
|
"valid_targets_mean": 5899.4,
|
||
|
|
"valid_targets_min": 961
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4,
|
||
|
|
"grad_norm": 0.45449778937356167,
|
||
|
|
"learning_rate": 3.325004343731292e-05,
|
||
|
|
"loss": 0.2942,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2731771469116211,
|
||
|
|
"step": 1500,
|
||
|
|
"valid_targets_mean": 5282.1,
|
||
|
|
"valid_targets_min": 2451
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.408,
|
||
|
|
"grad_norm": 0.41936217467238024,
|
||
|
|
"learning_rate": 3.3190165683654885e-05,
|
||
|
|
"loss": 0.2884,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2833578586578369,
|
||
|
|
"step": 1505,
|
||
|
|
"valid_targets_mean": 5853.5,
|
||
|
|
"valid_targets_min": 2398
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.416,
|
||
|
|
"grad_norm": 0.47309565281866045,
|
||
|
|
"learning_rate": 3.31300779593437e-05,
|
||
|
|
"loss": 0.2729,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2796747386455536,
|
||
|
|
"step": 1510,
|
||
|
|
"valid_targets_mean": 5078.4,
|
||
|
|
"valid_targets_min": 1543
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.424,
|
||
|
|
"grad_norm": 0.4177225202562232,
|
||
|
|
"learning_rate": 3.306978122089948e-05,
|
||
|
|
"loss": 0.2656,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24066470563411713,
|
||
|
|
"step": 1515,
|
||
|
|
"valid_targets_mean": 5414.7,
|
||
|
|
"valid_targets_min": 2838
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.432,
|
||
|
|
"grad_norm": 0.39007343307685954,
|
||
|
|
"learning_rate": 3.300927642816957e-05,
|
||
|
|
"loss": 0.2864,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2647990882396698,
|
||
|
|
"step": 1520,
|
||
|
|
"valid_targets_mean": 7182.4,
|
||
|
|
"valid_targets_min": 3833
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.44,
|
||
|
|
"grad_norm": 0.4766268290914442,
|
||
|
|
"learning_rate": 3.294856454431328e-05,
|
||
|
|
"loss": 0.2731,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2947763502597809,
|
||
|
|
"step": 1525,
|
||
|
|
"valid_targets_mean": 5058.1,
|
||
|
|
"valid_targets_min": 740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.448,
|
||
|
|
"grad_norm": 0.4749676704997854,
|
||
|
|
"learning_rate": 3.288764653578653e-05,
|
||
|
|
"loss": 0.2829,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2593019902706146,
|
||
|
|
"step": 1530,
|
||
|
|
"valid_targets_mean": 5531.9,
|
||
|
|
"valid_targets_min": 3211
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.456,
|
||
|
|
"grad_norm": 0.43087832321512626,
|
||
|
|
"learning_rate": 3.2826523372326516e-05,
|
||
|
|
"loss": 0.2867,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2759775221347809,
|
||
|
|
"step": 1535,
|
||
|
|
"valid_targets_mean": 5525.2,
|
||
|
|
"valid_targets_min": 2348
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.464,
|
||
|
|
"grad_norm": 0.4144418282536644,
|
||
|
|
"learning_rate": 3.276519602693621e-05,
|
||
|
|
"loss": 0.2744,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2653999924659729,
|
||
|
|
"step": 1540,
|
||
|
|
"valid_targets_mean": 5352.9,
|
||
|
|
"valid_targets_min": 542
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.472,
|
||
|
|
"grad_norm": 0.42435435381093956,
|
||
|
|
"learning_rate": 3.270366547586892e-05,
|
||
|
|
"loss": 0.2794,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2846945524215698,
|
||
|
|
"step": 1545,
|
||
|
|
"valid_targets_mean": 5556.9,
|
||
|
|
"valid_targets_min": 2818
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.48,
|
||
|
|
"grad_norm": 0.46730653220343593,
|
||
|
|
"learning_rate": 3.2641932698612715e-05,
|
||
|
|
"loss": 0.2878,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27474409341812134,
|
||
|
|
"step": 1550,
|
||
|
|
"valid_targets_mean": 4989.6,
|
||
|
|
"valid_targets_min": 3049
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.488,
|
||
|
|
"grad_norm": 0.4484239721050301,
|
||
|
|
"learning_rate": 3.2579998677874855e-05,
|
||
|
|
"loss": 0.2707,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2688925862312317,
|
||
|
|
"step": 1555,
|
||
|
|
"valid_targets_mean": 5152.9,
|
||
|
|
"valid_targets_min": 744
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.496,
|
||
|
|
"grad_norm": 0.45786906944884076,
|
||
|
|
"learning_rate": 3.251786439956614e-05,
|
||
|
|
"loss": 0.2778,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26745644211769104,
|
||
|
|
"step": 1560,
|
||
|
|
"valid_targets_mean": 4496.0,
|
||
|
|
"valid_targets_min": 834
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.504,
|
||
|
|
"grad_norm": 0.5706239065761335,
|
||
|
|
"learning_rate": 3.2455530852785206e-05,
|
||
|
|
"loss": 0.2745,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29573023319244385,
|
||
|
|
"step": 1565,
|
||
|
|
"valid_targets_mean": 3546.1,
|
||
|
|
"valid_targets_min": 536
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.512,
|
||
|
|
"grad_norm": 0.4660476242711952,
|
||
|
|
"learning_rate": 3.239299902980281e-05,
|
||
|
|
"loss": 0.2949,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2543278634548187,
|
||
|
|
"step": 1570,
|
||
|
|
"valid_targets_mean": 4862.9,
|
||
|
|
"valid_targets_min": 2734
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.52,
|
||
|
|
"grad_norm": 0.42408248225968304,
|
||
|
|
"learning_rate": 3.2330269926046e-05,
|
||
|
|
"loss": 0.2732,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2651727795600891,
|
||
|
|
"step": 1575,
|
||
|
|
"valid_targets_mean": 5785.1,
|
||
|
|
"valid_targets_min": 1938
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.528,
|
||
|
|
"grad_norm": 0.4129023020912163,
|
||
|
|
"learning_rate": 3.2267344540082284e-05,
|
||
|
|
"loss": 0.2811,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28441861271858215,
|
||
|
|
"step": 1580,
|
||
|
|
"valid_targets_mean": 5624.2,
|
||
|
|
"valid_targets_min": 1104
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.536,
|
||
|
|
"grad_norm": 0.41229447790684026,
|
||
|
|
"learning_rate": 3.220422387360373e-05,
|
||
|
|
"loss": 0.282,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2966400980949402,
|
||
|
|
"step": 1585,
|
||
|
|
"valid_targets_mean": 6064.5,
|
||
|
|
"valid_targets_min": 711
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.544,
|
||
|
|
"grad_norm": 0.3940473278014604,
|
||
|
|
"learning_rate": 3.2140908931411026e-05,
|
||
|
|
"loss": 0.2811,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2629774510860443,
|
||
|
|
"step": 1590,
|
||
|
|
"valid_targets_mean": 6014.9,
|
||
|
|
"valid_targets_min": 2710
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.552,
|
||
|
|
"grad_norm": 0.48235415015990013,
|
||
|
|
"learning_rate": 3.207740072139748e-05,
|
||
|
|
"loss": 0.297,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27599039673805237,
|
||
|
|
"step": 1595,
|
||
|
|
"valid_targets_mean": 4336.2,
|
||
|
|
"valid_targets_min": 676
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.56,
|
||
|
|
"grad_norm": 0.43812989181014855,
|
||
|
|
"learning_rate": 3.2013700254532996e-05,
|
||
|
|
"loss": 0.2843,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2840218245983124,
|
||
|
|
"step": 1600,
|
||
|
|
"valid_targets_mean": 6128.1,
|
||
|
|
"valid_targets_min": 2306
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.568,
|
||
|
|
"grad_norm": 0.4421391095546585,
|
||
|
|
"learning_rate": 3.194980854484794e-05,
|
||
|
|
"loss": 0.2824,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2730617821216583,
|
||
|
|
"step": 1605,
|
||
|
|
"valid_targets_mean": 5236.0,
|
||
|
|
"valid_targets_min": 2238
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.576,
|
||
|
|
"grad_norm": 0.40993966192632497,
|
||
|
|
"learning_rate": 3.188572660941702e-05,
|
||
|
|
"loss": 0.2737,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2702597379684448,
|
||
|
|
"step": 1610,
|
||
|
|
"valid_targets_mean": 5520.4,
|
||
|
|
"valid_targets_min": 1951
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.584,
|
||
|
|
"grad_norm": 0.449106850254283,
|
||
|
|
"learning_rate": 3.182145546834311e-05,
|
||
|
|
"loss": 0.2827,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2685029208660126,
|
||
|
|
"step": 1615,
|
||
|
|
"valid_targets_mean": 5474.4,
|
||
|
|
"valid_targets_min": 1011
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.592,
|
||
|
|
"grad_norm": 0.43609694783076547,
|
||
|
|
"learning_rate": 3.1756996144740994e-05,
|
||
|
|
"loss": 0.2727,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26621997356414795,
|
||
|
|
"step": 1620,
|
||
|
|
"valid_targets_mean": 5704.4,
|
||
|
|
"valid_targets_min": 2305
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6,
|
||
|
|
"grad_norm": 0.437600515095227,
|
||
|
|
"learning_rate": 3.1692349664721074e-05,
|
||
|
|
"loss": 0.2845,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2866860032081604,
|
||
|
|
"step": 1625,
|
||
|
|
"valid_targets_mean": 6387.4,
|
||
|
|
"valid_targets_min": 2588
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.608,
|
||
|
|
"grad_norm": 0.4758070327306289,
|
||
|
|
"learning_rate": 3.1627517057373046e-05,
|
||
|
|
"loss": 0.2754,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27016139030456543,
|
||
|
|
"step": 1630,
|
||
|
|
"valid_targets_mean": 5040.3,
|
||
|
|
"valid_targets_min": 804
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.616,
|
||
|
|
"grad_norm": 0.5050945646090564,
|
||
|
|
"learning_rate": 3.156249935474953e-05,
|
||
|
|
"loss": 0.2629,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27052950859069824,
|
||
|
|
"step": 1635,
|
||
|
|
"valid_targets_mean": 5326.6,
|
||
|
|
"valid_targets_min": 2368
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.624,
|
||
|
|
"grad_norm": 0.39592620562061526,
|
||
|
|
"learning_rate": 3.1497297591849614e-05,
|
||
|
|
"loss": 0.2736,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26756832003593445,
|
||
|
|
"step": 1640,
|
||
|
|
"valid_targets_mean": 6438.1,
|
||
|
|
"valid_targets_min": 970
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.632,
|
||
|
|
"grad_norm": 0.47399414052636385,
|
||
|
|
"learning_rate": 3.143191280660238e-05,
|
||
|
|
"loss": 0.2877,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3013014495372772,
|
||
|
|
"step": 1645,
|
||
|
|
"valid_targets_mean": 5066.2,
|
||
|
|
"valid_targets_min": 2808
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.64,
|
||
|
|
"grad_norm": 0.4498456912539592,
|
||
|
|
"learning_rate": 3.1366346039850424e-05,
|
||
|
|
"loss": 0.2769,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3045828938484192,
|
||
|
|
"step": 1650,
|
||
|
|
"valid_targets_mean": 5521.9,
|
||
|
|
"valid_targets_min": 585
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.648,
|
||
|
|
"grad_norm": 0.495309465433173,
|
||
|
|
"learning_rate": 3.130059833533323e-05,
|
||
|
|
"loss": 0.2817,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30020833015441895,
|
||
|
|
"step": 1655,
|
||
|
|
"valid_targets_mean": 4797.9,
|
||
|
|
"valid_targets_min": 564
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.656,
|
||
|
|
"grad_norm": 0.4297894353894199,
|
||
|
|
"learning_rate": 3.123467073967059e-05,
|
||
|
|
"loss": 0.2768,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2691328823566437,
|
||
|
|
"step": 1660,
|
||
|
|
"valid_targets_mean": 6277.9,
|
||
|
|
"valid_targets_min": 3682
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.664,
|
||
|
|
"grad_norm": 0.45151420932528674,
|
||
|
|
"learning_rate": 3.116856430234594e-05,
|
||
|
|
"loss": 0.2905,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2929932773113251,
|
||
|
|
"step": 1665,
|
||
|
|
"valid_targets_mean": 6091.2,
|
||
|
|
"valid_targets_min": 1280
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.672,
|
||
|
|
"grad_norm": 0.4255273252520274,
|
||
|
|
"learning_rate": 3.110228007568963e-05,
|
||
|
|
"loss": 0.2851,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2611987590789795,
|
||
|
|
"step": 1670,
|
||
|
|
"valid_targets_mean": 5407.6,
|
||
|
|
"valid_targets_min": 2936
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.68,
|
||
|
|
"grad_norm": 0.41326123258834,
|
||
|
|
"learning_rate": 3.103581911486221e-05,
|
||
|
|
"loss": 0.2676,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2713870704174042,
|
||
|
|
"step": 1675,
|
||
|
|
"valid_targets_mean": 5890.7,
|
||
|
|
"valid_targets_min": 3319
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6879999999999997,
|
||
|
|
"grad_norm": 0.42906521181729274,
|
||
|
|
"learning_rate": 3.0969182477837604e-05,
|
||
|
|
"loss": 0.2905,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2987547516822815,
|
||
|
|
"step": 1680,
|
||
|
|
"valid_targets_mean": 6719.4,
|
||
|
|
"valid_targets_min": 797
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6959999999999997,
|
||
|
|
"grad_norm": 0.4651035568357889,
|
||
|
|
"learning_rate": 3.090237122538628e-05,
|
||
|
|
"loss": 0.2808,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2748391032218933,
|
||
|
|
"step": 1685,
|
||
|
|
"valid_targets_mean": 4811.9,
|
||
|
|
"valid_targets_min": 1835
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7039999999999997,
|
||
|
|
"grad_norm": 0.4526858770407024,
|
||
|
|
"learning_rate": 3.0835386421058345e-05,
|
||
|
|
"loss": 0.2717,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2757079303264618,
|
||
|
|
"step": 1690,
|
||
|
|
"valid_targets_mean": 5114.1,
|
||
|
|
"valid_targets_min": 1825
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7119999999999997,
|
||
|
|
"grad_norm": 0.4303883323426744,
|
||
|
|
"learning_rate": 3.0768229131166664e-05,
|
||
|
|
"loss": 0.2719,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27044957876205444,
|
||
|
|
"step": 1695,
|
||
|
|
"valid_targets_mean": 5952.2,
|
||
|
|
"valid_targets_min": 2348
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7199999999999998,
|
||
|
|
"grad_norm": 0.421293283882041,
|
||
|
|
"learning_rate": 3.070090042476983e-05,
|
||
|
|
"loss": 0.2794,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2537495195865631,
|
||
|
|
"step": 1700,
|
||
|
|
"valid_targets_mean": 5275.4,
|
||
|
|
"valid_targets_min": 2820
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7279999999999998,
|
||
|
|
"grad_norm": 0.42070620075383014,
|
||
|
|
"learning_rate": 3.063340137365517e-05,
|
||
|
|
"loss": 0.2727,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26784974336624146,
|
||
|
|
"step": 1705,
|
||
|
|
"valid_targets_mean": 6155.7,
|
||
|
|
"valid_targets_min": 2833
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7359999999999998,
|
||
|
|
"grad_norm": 0.46160330069058947,
|
||
|
|
"learning_rate": 3.0565733052321674e-05,
|
||
|
|
"loss": 0.2939,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3010793924331665,
|
||
|
|
"step": 1710,
|
||
|
|
"valid_targets_mean": 5182.6,
|
||
|
|
"valid_targets_min": 766
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7439999999999998,
|
||
|
|
"grad_norm": 0.47565115114958284,
|
||
|
|
"learning_rate": 3.0497896537962924e-05,
|
||
|
|
"loss": 0.2747,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29553931951522827,
|
||
|
|
"step": 1715,
|
||
|
|
"valid_targets_mean": 4856.3,
|
||
|
|
"valid_targets_min": 2348
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.752,
|
||
|
|
"grad_norm": 0.47758063053303274,
|
||
|
|
"learning_rate": 3.042989291044991e-05,
|
||
|
|
"loss": 0.2883,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2923269271850586,
|
||
|
|
"step": 1720,
|
||
|
|
"valid_targets_mean": 4834.2,
|
||
|
|
"valid_targets_min": 604
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.76,
|
||
|
|
"grad_norm": 0.44821402221279694,
|
||
|
|
"learning_rate": 3.036172325231383e-05,
|
||
|
|
"loss": 0.275,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27335962653160095,
|
||
|
|
"step": 1725,
|
||
|
|
"valid_targets_mean": 5101.4,
|
||
|
|
"valid_targets_min": 2237
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.768,
|
||
|
|
"grad_norm": 0.40899132950092204,
|
||
|
|
"learning_rate": 3.0293388648728908e-05,
|
||
|
|
"loss": 0.2851,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26575198769569397,
|
||
|
|
"step": 1730,
|
||
|
|
"valid_targets_mean": 5682.9,
|
||
|
|
"valid_targets_min": 3293
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.776,
|
||
|
|
"grad_norm": 0.4748210406966679,
|
||
|
|
"learning_rate": 3.022489018749508e-05,
|
||
|
|
"loss": 0.2786,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28260117769241333,
|
||
|
|
"step": 1735,
|
||
|
|
"valid_targets_mean": 5148.7,
|
||
|
|
"valid_targets_min": 659
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.784,
|
||
|
|
"grad_norm": 0.4349828443150829,
|
||
|
|
"learning_rate": 3.015622895902068e-05,
|
||
|
|
"loss": 0.2893,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29926276206970215,
|
||
|
|
"step": 1740,
|
||
|
|
"valid_targets_mean": 5951.7,
|
||
|
|
"valid_targets_min": 532
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.792,
|
||
|
|
"grad_norm": 0.4101951619425804,
|
||
|
|
"learning_rate": 3.008740605630508e-05,
|
||
|
|
"loss": 0.28,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27278026938438416,
|
||
|
|
"step": 1745,
|
||
|
|
"valid_targets_mean": 6255.8,
|
||
|
|
"valid_targets_min": 2383
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8,
|
||
|
|
"grad_norm": 0.43886743945375883,
|
||
|
|
"learning_rate": 3.0018422574921337e-05,
|
||
|
|
"loss": 0.2743,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27373838424682617,
|
||
|
|
"step": 1750,
|
||
|
|
"valid_targets_mean": 6037.8,
|
||
|
|
"valid_targets_min": 2287
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.808,
|
||
|
|
"grad_norm": 0.46273862132614424,
|
||
|
|
"learning_rate": 2.9949279612998673e-05,
|
||
|
|
"loss": 0.276,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2563420534133911,
|
||
|
|
"step": 1755,
|
||
|
|
"valid_targets_mean": 5141.6,
|
||
|
|
"valid_targets_min": 2334
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.816,
|
||
|
|
"grad_norm": 0.42084337553867707,
|
||
|
|
"learning_rate": 2.9879978271205064e-05,
|
||
|
|
"loss": 0.2709,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2631978392601013,
|
||
|
|
"step": 1760,
|
||
|
|
"valid_targets_mean": 5481.5,
|
||
|
|
"valid_targets_min": 2358
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.824,
|
||
|
|
"grad_norm": 0.4226323080765043,
|
||
|
|
"learning_rate": 2.9810519652729692e-05,
|
||
|
|
"loss": 0.2898,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27294427156448364,
|
||
|
|
"step": 1765,
|
||
|
|
"valid_targets_mean": 5862.9,
|
||
|
|
"valid_targets_min": 2158
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.832,
|
||
|
|
"grad_norm": 0.4133165678616725,
|
||
|
|
"learning_rate": 2.9740904863265378e-05,
|
||
|
|
"loss": 0.2695,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25264087319374084,
|
||
|
|
"step": 1770,
|
||
|
|
"valid_targets_mean": 5787.2,
|
||
|
|
"valid_targets_min": 2246
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.84,
|
||
|
|
"grad_norm": 0.44849937220188063,
|
||
|
|
"learning_rate": 2.967113501099097e-05,
|
||
|
|
"loss": 0.2835,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.32316309213638306,
|
||
|
|
"step": 1775,
|
||
|
|
"valid_targets_mean": 5906.6,
|
||
|
|
"valid_targets_min": 4056
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.848,
|
||
|
|
"grad_norm": 0.42817162546087406,
|
||
|
|
"learning_rate": 2.9601211206553745e-05,
|
||
|
|
"loss": 0.2744,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26556289196014404,
|
||
|
|
"step": 1780,
|
||
|
|
"valid_targets_mean": 5367.5,
|
||
|
|
"valid_targets_min": 845
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.856,
|
||
|
|
"grad_norm": 0.38768874404245973,
|
||
|
|
"learning_rate": 2.9531134563051686e-05,
|
||
|
|
"loss": 0.2718,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2648174464702606,
|
||
|
|
"step": 1785,
|
||
|
|
"valid_targets_mean": 6421.1,
|
||
|
|
"valid_targets_min": 2438
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.864,
|
||
|
|
"grad_norm": 0.4047385059000395,
|
||
|
|
"learning_rate": 2.946090619601579e-05,
|
||
|
|
"loss": 0.2771,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2702750265598297,
|
||
|
|
"step": 1790,
|
||
|
|
"valid_targets_mean": 6062.8,
|
||
|
|
"valid_targets_min": 2828
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.872,
|
||
|
|
"grad_norm": 0.4481184534104384,
|
||
|
|
"learning_rate": 2.9390527223392292e-05,
|
||
|
|
"loss": 0.2799,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30734002590179443,
|
||
|
|
"step": 1795,
|
||
|
|
"valid_targets_mean": 5348.5,
|
||
|
|
"valid_targets_min": 851
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.88,
|
||
|
|
"grad_norm": 0.4358783108651049,
|
||
|
|
"learning_rate": 2.931999876552488e-05,
|
||
|
|
"loss": 0.2855,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29541853070259094,
|
||
|
|
"step": 1800,
|
||
|
|
"valid_targets_mean": 5955.2,
|
||
|
|
"valid_targets_min": 3117
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.888,
|
||
|
|
"grad_norm": 0.3874182891708397,
|
||
|
|
"learning_rate": 2.9249321945136854e-05,
|
||
|
|
"loss": 0.2679,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2845751643180847,
|
||
|
|
"step": 1805,
|
||
|
|
"valid_targets_mean": 7245.1,
|
||
|
|
"valid_targets_min": 2606
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.896,
|
||
|
|
"grad_norm": 0.42410801990986086,
|
||
|
|
"learning_rate": 2.9178497887313257e-05,
|
||
|
|
"loss": 0.2869,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27056288719177246,
|
||
|
|
"step": 1810,
|
||
|
|
"valid_targets_mean": 5659.6,
|
||
|
|
"valid_targets_min": 1996
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.904,
|
||
|
|
"grad_norm": 0.46595265901950295,
|
||
|
|
"learning_rate": 2.9107527719482968e-05,
|
||
|
|
"loss": 0.2775,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27986976504325867,
|
||
|
|
"step": 1815,
|
||
|
|
"valid_targets_mean": 5373.6,
|
||
|
|
"valid_targets_min": 1951
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.912,
|
||
|
|
"grad_norm": 0.4402020755344616,
|
||
|
|
"learning_rate": 2.9036412571400747e-05,
|
||
|
|
"loss": 0.2635,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2562943696975708,
|
||
|
|
"step": 1820,
|
||
|
|
"valid_targets_mean": 5127.8,
|
||
|
|
"valid_targets_min": 2872
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.92,
|
||
|
|
"grad_norm": 0.3805031455186279,
|
||
|
|
"learning_rate": 2.8965153575129255e-05,
|
||
|
|
"loss": 0.2837,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2894074320793152,
|
||
|
|
"step": 1825,
|
||
|
|
"valid_targets_mean": 7028.5,
|
||
|
|
"valid_targets_min": 3875
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.928,
|
||
|
|
"grad_norm": 0.4460246697987472,
|
||
|
|
"learning_rate": 2.8893751865021044e-05,
|
||
|
|
"loss": 0.2637,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25859567523002625,
|
||
|
|
"step": 1830,
|
||
|
|
"valid_targets_mean": 5250.1,
|
||
|
|
"valid_targets_min": 1151
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.936,
|
||
|
|
"grad_norm": 0.40198594148376793,
|
||
|
|
"learning_rate": 2.8822208577700473e-05,
|
||
|
|
"loss": 0.2815,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24994345009326935,
|
||
|
|
"step": 1835,
|
||
|
|
"valid_targets_mean": 5900.0,
|
||
|
|
"valid_targets_min": 2878
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.944,
|
||
|
|
"grad_norm": 0.45652338078801363,
|
||
|
|
"learning_rate": 2.8750524852045642e-05,
|
||
|
|
"loss": 0.2785,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3019225001335144,
|
||
|
|
"step": 1840,
|
||
|
|
"valid_targets_mean": 5770.6,
|
||
|
|
"valid_targets_min": 793
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.952,
|
||
|
|
"grad_norm": 0.3891949461136039,
|
||
|
|
"learning_rate": 2.867870182917024e-05,
|
||
|
|
"loss": 0.2851,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2574838697910309,
|
||
|
|
"step": 1845,
|
||
|
|
"valid_targets_mean": 5976.9,
|
||
|
|
"valid_targets_min": 2672
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.96,
|
||
|
|
"grad_norm": 0.4372813649278471,
|
||
|
|
"learning_rate": 2.8606740652405394e-05,
|
||
|
|
"loss": 0.289,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29001131653785706,
|
||
|
|
"step": 1850,
|
||
|
|
"valid_targets_mean": 5360.3,
|
||
|
|
"valid_targets_min": 3093
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.968,
|
||
|
|
"grad_norm": 0.4165914393492165,
|
||
|
|
"learning_rate": 2.853464246728147e-05,
|
||
|
|
"loss": 0.2702,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2655784785747528,
|
||
|
|
"step": 1855,
|
||
|
|
"valid_targets_mean": 5410.2,
|
||
|
|
"valid_targets_min": 2130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.976,
|
||
|
|
"grad_norm": 0.4008129134672245,
|
||
|
|
"learning_rate": 2.846240842150984e-05,
|
||
|
|
"loss": 0.2687,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29392343759536743,
|
||
|
|
"step": 1860,
|
||
|
|
"valid_targets_mean": 6621.4,
|
||
|
|
"valid_targets_min": 2007
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.984,
|
||
|
|
"grad_norm": 0.4063502231247828,
|
||
|
|
"learning_rate": 2.839003966496458e-05,
|
||
|
|
"loss": 0.2795,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2830163538455963,
|
||
|
|
"step": 1865,
|
||
|
|
"valid_targets_mean": 7332.2,
|
||
|
|
"valid_targets_min": 2647
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.992,
|
||
|
|
"grad_norm": 0.41454228214125255,
|
||
|
|
"learning_rate": 2.8317537349664215e-05,
|
||
|
|
"loss": 0.2824,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.300517737865448,
|
||
|
|
"step": 1870,
|
||
|
|
"valid_targets_mean": 6810.3,
|
||
|
|
"valid_targets_min": 1725
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0,
|
||
|
|
"grad_norm": 0.3811043034757187,
|
||
|
|
"learning_rate": 2.824490262975334e-05,
|
||
|
|
"loss": 0.2822,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2567134201526642,
|
||
|
|
"step": 1875,
|
||
|
|
"valid_targets_mean": 6309.3,
|
||
|
|
"valid_targets_min": 2837
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.008,
|
||
|
|
"grad_norm": 0.44986998163213937,
|
||
|
|
"learning_rate": 2.817213666148427e-05,
|
||
|
|
"loss": 0.2598,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2562277913093567,
|
||
|
|
"step": 1880,
|
||
|
|
"valid_targets_mean": 4919.7,
|
||
|
|
"valid_targets_min": 2287
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.016,
|
||
|
|
"grad_norm": 0.39188395180907326,
|
||
|
|
"learning_rate": 2.809924060319862e-05,
|
||
|
|
"loss": 0.2637,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24832743406295776,
|
||
|
|
"step": 1885,
|
||
|
|
"valid_targets_mean": 5902.5,
|
||
|
|
"valid_targets_min": 2527
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.024,
|
||
|
|
"grad_norm": 0.44868780761237514,
|
||
|
|
"learning_rate": 2.802621561530888e-05,
|
||
|
|
"loss": 0.2699,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2716829776763916,
|
||
|
|
"step": 1890,
|
||
|
|
"valid_targets_mean": 5865.4,
|
||
|
|
"valid_targets_min": 1532
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.032,
|
||
|
|
"grad_norm": 0.49232768631488905,
|
||
|
|
"learning_rate": 2.7953062860279937e-05,
|
||
|
|
"loss": 0.2645,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.254425048828125,
|
||
|
|
"step": 1895,
|
||
|
|
"valid_targets_mean": 4952.2,
|
||
|
|
"valid_targets_min": 2359
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.04,
|
||
|
|
"grad_norm": 0.39705870685465516,
|
||
|
|
"learning_rate": 2.7879783502610557e-05,
|
||
|
|
"loss": 0.2598,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26486480236053467,
|
||
|
|
"step": 1900,
|
||
|
|
"valid_targets_mean": 5991.8,
|
||
|
|
"valid_targets_min": 2506
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.048,
|
||
|
|
"grad_norm": 0.4103910836926923,
|
||
|
|
"learning_rate": 2.7806378708814875e-05,
|
||
|
|
"loss": 0.2574,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2603754699230194,
|
||
|
|
"step": 1905,
|
||
|
|
"valid_targets_mean": 5866.4,
|
||
|
|
"valid_targets_min": 3445
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.056,
|
||
|
|
"grad_norm": 0.4404796718903863,
|
||
|
|
"learning_rate": 2.773284964740379e-05,
|
||
|
|
"loss": 0.2547,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25497716665267944,
|
||
|
|
"step": 1910,
|
||
|
|
"valid_targets_mean": 5420.9,
|
||
|
|
"valid_targets_min": 2347
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.064,
|
||
|
|
"grad_norm": 0.4595279784055635,
|
||
|
|
"learning_rate": 2.7659197488866403e-05,
|
||
|
|
"loss": 0.2733,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2736179828643799,
|
||
|
|
"step": 1915,
|
||
|
|
"valid_targets_mean": 5122.2,
|
||
|
|
"valid_targets_min": 977
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.072,
|
||
|
|
"grad_norm": 0.47456111645753335,
|
||
|
|
"learning_rate": 2.7585423405651347e-05,
|
||
|
|
"loss": 0.2661,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2576856017112732,
|
||
|
|
"step": 1920,
|
||
|
|
"valid_targets_mean": 4942.1,
|
||
|
|
"valid_targets_min": 2950
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.08,
|
||
|
|
"grad_norm": 0.43777645624232037,
|
||
|
|
"learning_rate": 2.7511528572148153e-05,
|
||
|
|
"loss": 0.2695,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27957212924957275,
|
||
|
|
"step": 1925,
|
||
|
|
"valid_targets_mean": 5275.7,
|
||
|
|
"valid_targets_min": 1870
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.088,
|
||
|
|
"grad_norm": 0.4522466576604798,
|
||
|
|
"learning_rate": 2.7437514164668536e-05,
|
||
|
|
"loss": 0.2604,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2531649172306061,
|
||
|
|
"step": 1930,
|
||
|
|
"valid_targets_mean": 4698.0,
|
||
|
|
"valid_targets_min": 1760
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.096,
|
||
|
|
"grad_norm": 0.4424402462760102,
|
||
|
|
"learning_rate": 2.7363381361427692e-05,
|
||
|
|
"loss": 0.2726,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2742995023727417,
|
||
|
|
"step": 1935,
|
||
|
|
"valid_targets_mean": 5695.1,
|
||
|
|
"valid_targets_min": 2581
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.104,
|
||
|
|
"grad_norm": 0.4157664621751824,
|
||
|
|
"learning_rate": 2.72891313425255e-05,
|
||
|
|
"loss": 0.2495,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25712645053863525,
|
||
|
|
"step": 1940,
|
||
|
|
"valid_targets_mean": 5666.0,
|
||
|
|
"valid_targets_min": 2670
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.112,
|
||
|
|
"grad_norm": 0.4448020926912599,
|
||
|
|
"learning_rate": 2.7214765289927777e-05,
|
||
|
|
"loss": 0.2643,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2533559501171112,
|
||
|
|
"step": 1945,
|
||
|
|
"valid_targets_mean": 4905.4,
|
||
|
|
"valid_targets_min": 2204
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.12,
|
||
|
|
"grad_norm": 0.4416757344594968,
|
||
|
|
"learning_rate": 2.714028438744746e-05,
|
||
|
|
"loss": 0.2643,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23446297645568848,
|
||
|
|
"step": 1950,
|
||
|
|
"valid_targets_mean": 4820.4,
|
||
|
|
"valid_targets_min": 3010
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.128,
|
||
|
|
"grad_norm": 0.99132804143127,
|
||
|
|
"learning_rate": 2.706568982072573e-05,
|
||
|
|
"loss": 0.2653,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25379735231399536,
|
||
|
|
"step": 1955,
|
||
|
|
"valid_targets_mean": 4584.4,
|
||
|
|
"valid_targets_min": 2042
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.136,
|
||
|
|
"grad_norm": 0.37723732371353097,
|
||
|
|
"learning_rate": 2.6990982777213174e-05,
|
||
|
|
"loss": 0.2677,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2723168730735779,
|
||
|
|
"step": 1960,
|
||
|
|
"valid_targets_mean": 6596.4,
|
||
|
|
"valid_targets_min": 840
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.144,
|
||
|
|
"grad_norm": 0.4271097743973345,
|
||
|
|
"learning_rate": 2.691616444615085e-05,
|
||
|
|
"loss": 0.2768,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26200491189956665,
|
||
|
|
"step": 1965,
|
||
|
|
"valid_targets_mean": 6096.9,
|
||
|
|
"valid_targets_min": 3143
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.152,
|
||
|
|
"grad_norm": 0.41531856969368103,
|
||
|
|
"learning_rate": 2.6841236018551402e-05,
|
||
|
|
"loss": 0.2715,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.260453999042511,
|
||
|
|
"step": 1970,
|
||
|
|
"valid_targets_mean": 5445.1,
|
||
|
|
"valid_targets_min": 563
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.16,
|
||
|
|
"grad_norm": 0.4417409056500301,
|
||
|
|
"learning_rate": 2.6766198687180028e-05,
|
||
|
|
"loss": 0.2673,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2590559124946594,
|
||
|
|
"step": 1975,
|
||
|
|
"valid_targets_mean": 5636.7,
|
||
|
|
"valid_targets_min": 611
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.168,
|
||
|
|
"grad_norm": 0.4422829407193221,
|
||
|
|
"learning_rate": 2.6691053646535564e-05,
|
||
|
|
"loss": 0.2658,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2656911313533783,
|
||
|
|
"step": 1980,
|
||
|
|
"valid_targets_mean": 5018.6,
|
||
|
|
"valid_targets_min": 619
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.176,
|
||
|
|
"grad_norm": 0.4237887881247236,
|
||
|
|
"learning_rate": 2.6615802092831446e-05,
|
||
|
|
"loss": 0.2636,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2780739963054657,
|
||
|
|
"step": 1985,
|
||
|
|
"valid_targets_mean": 5302.7,
|
||
|
|
"valid_targets_min": 2266
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.184,
|
||
|
|
"grad_norm": 0.42380386232725864,
|
||
|
|
"learning_rate": 2.6540445223976637e-05,
|
||
|
|
"loss": 0.2624,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2620670199394226,
|
||
|
|
"step": 1990,
|
||
|
|
"valid_targets_mean": 6350.1,
|
||
|
|
"valid_targets_min": 776
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.192,
|
||
|
|
"grad_norm": 0.47180334207154845,
|
||
|
|
"learning_rate": 2.6464984239556602e-05,
|
||
|
|
"loss": 0.2499,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2522035539150238,
|
||
|
|
"step": 1995,
|
||
|
|
"valid_targets_mean": 5105.7,
|
||
|
|
"valid_targets_min": 707
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2,
|
||
|
|
"grad_norm": 0.40590968448904624,
|
||
|
|
"learning_rate": 2.63894203408142e-05,
|
||
|
|
"loss": 0.2641,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2857327163219452,
|
||
|
|
"step": 2000,
|
||
|
|
"valid_targets_mean": 6472.3,
|
||
|
|
"valid_targets_min": 824
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.208,
|
||
|
|
"grad_norm": 0.4364196490684295,
|
||
|
|
"learning_rate": 2.6313754730630528e-05,
|
||
|
|
"loss": 0.2707,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3018920421600342,
|
||
|
|
"step": 2005,
|
||
|
|
"valid_targets_mean": 6931.6,
|
||
|
|
"valid_targets_min": 3554
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.216,
|
||
|
|
"grad_norm": 0.4380123000076932,
|
||
|
|
"learning_rate": 2.623798861350582e-05,
|
||
|
|
"loss": 0.2711,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27944761514663696,
|
||
|
|
"step": 2010,
|
||
|
|
"valid_targets_mean": 5875.2,
|
||
|
|
"valid_targets_min": 1748
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.224,
|
||
|
|
"grad_norm": 0.4370825009728502,
|
||
|
|
"learning_rate": 2.6162123195540247e-05,
|
||
|
|
"loss": 0.2648,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28861603140830994,
|
||
|
|
"step": 2015,
|
||
|
|
"valid_targets_mean": 5691.7,
|
||
|
|
"valid_targets_min": 2845
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.232,
|
||
|
|
"grad_norm": 0.485452151135628,
|
||
|
|
"learning_rate": 2.6086159684414726e-05,
|
||
|
|
"loss": 0.2627,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2795363664627075,
|
||
|
|
"step": 2020,
|
||
|
|
"valid_targets_mean": 4541.8,
|
||
|
|
"valid_targets_min": 797
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.24,
|
||
|
|
"grad_norm": 0.48611407649627225,
|
||
|
|
"learning_rate": 2.6010099289371694e-05,
|
||
|
|
"loss": 0.279,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25999119877815247,
|
||
|
|
"step": 2025,
|
||
|
|
"valid_targets_mean": 4361.3,
|
||
|
|
"valid_targets_min": 809
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.248,
|
||
|
|
"grad_norm": 0.43314259033107916,
|
||
|
|
"learning_rate": 2.5933943221195844e-05,
|
||
|
|
"loss": 0.2704,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27597910165786743,
|
||
|
|
"step": 2030,
|
||
|
|
"valid_targets_mean": 5998.9,
|
||
|
|
"valid_targets_min": 3510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2560000000000002,
|
||
|
|
"grad_norm": 0.4467889320108032,
|
||
|
|
"learning_rate": 2.5857692692194884e-05,
|
||
|
|
"loss": 0.2609,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27943307161331177,
|
||
|
|
"step": 2035,
|
||
|
|
"valid_targets_mean": 6833.2,
|
||
|
|
"valid_targets_min": 3418
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2640000000000002,
|
||
|
|
"grad_norm": 0.444733214195372,
|
||
|
|
"learning_rate": 2.5781348916180195e-05,
|
||
|
|
"loss": 0.2662,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2452739030122757,
|
||
|
|
"step": 2040,
|
||
|
|
"valid_targets_mean": 5335.4,
|
||
|
|
"valid_targets_min": 2970
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2720000000000002,
|
||
|
|
"grad_norm": 0.4473237438727197,
|
||
|
|
"learning_rate": 2.570491310844755e-05,
|
||
|
|
"loss": 0.2627,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26792657375335693,
|
||
|
|
"step": 2045,
|
||
|
|
"valid_targets_mean": 5598.2,
|
||
|
|
"valid_targets_min": 3002
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2800000000000002,
|
||
|
|
"grad_norm": 0.44916463593865047,
|
||
|
|
"learning_rate": 2.562838648575774e-05,
|
||
|
|
"loss": 0.265,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25192153453826904,
|
||
|
|
"step": 2050,
|
||
|
|
"valid_targets_mean": 5738.1,
|
||
|
|
"valid_targets_min": 1400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.288,
|
||
|
|
"grad_norm": 0.37869541775803,
|
||
|
|
"learning_rate": 2.5551770266317224e-05,
|
||
|
|
"loss": 0.266,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2549699544906616,
|
||
|
|
"step": 2055,
|
||
|
|
"valid_targets_mean": 6927.1,
|
||
|
|
"valid_targets_min": 3188
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.296,
|
||
|
|
"grad_norm": 0.45381896265008714,
|
||
|
|
"learning_rate": 2.5475065669758713e-05,
|
||
|
|
"loss": 0.2802,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2710186839103699,
|
||
|
|
"step": 2060,
|
||
|
|
"valid_targets_mean": 5502.5,
|
||
|
|
"valid_targets_min": 721
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.304,
|
||
|
|
"grad_norm": 0.476208174837823,
|
||
|
|
"learning_rate": 2.5398273917121786e-05,
|
||
|
|
"loss": 0.2658,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2721608281135559,
|
||
|
|
"step": 2065,
|
||
|
|
"valid_targets_mean": 5169.7,
|
||
|
|
"valid_targets_min": 2469
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.312,
|
||
|
|
"grad_norm": 0.4512021420566415,
|
||
|
|
"learning_rate": 2.532139623083342e-05,
|
||
|
|
"loss": 0.2787,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.280364990234375,
|
||
|
|
"step": 2070,
|
||
|
|
"valid_targets_mean": 5583.2,
|
||
|
|
"valid_targets_min": 759
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.32,
|
||
|
|
"grad_norm": 0.43311411478631495,
|
||
|
|
"learning_rate": 2.5244433834688552e-05,
|
||
|
|
"loss": 0.26,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24803069233894348,
|
||
|
|
"step": 2075,
|
||
|
|
"valid_targets_mean": 5230.8,
|
||
|
|
"valid_targets_min": 2395
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.328,
|
||
|
|
"grad_norm": 0.4236542010190115,
|
||
|
|
"learning_rate": 2.5167387953830602e-05,
|
||
|
|
"loss": 0.2785,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26770997047424316,
|
||
|
|
"step": 2080,
|
||
|
|
"valid_targets_mean": 5747.2,
|
||
|
|
"valid_targets_min": 1906
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.336,
|
||
|
|
"grad_norm": 0.41233687112752104,
|
||
|
|
"learning_rate": 2.5090259814731946e-05,
|
||
|
|
"loss": 0.252,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23465529084205627,
|
||
|
|
"step": 2085,
|
||
|
|
"valid_targets_mean": 6191.6,
|
||
|
|
"valid_targets_min": 610
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.344,
|
||
|
|
"grad_norm": 0.4301152484815432,
|
||
|
|
"learning_rate": 2.5013050645174414e-05,
|
||
|
|
"loss": 0.2573,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2639310359954834,
|
||
|
|
"step": 2090,
|
||
|
|
"valid_targets_mean": 5565.9,
|
||
|
|
"valid_targets_min": 970
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.352,
|
||
|
|
"grad_norm": 0.42805683293361596,
|
||
|
|
"learning_rate": 2.4935761674229735e-05,
|
||
|
|
"loss": 0.2741,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28847432136535645,
|
||
|
|
"step": 2095,
|
||
|
|
"valid_targets_mean": 6180.6,
|
||
|
|
"valid_targets_min": 2325
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.36,
|
||
|
|
"grad_norm": 0.4108722186615793,
|
||
|
|
"learning_rate": 2.4858394132239982e-05,
|
||
|
|
"loss": 0.2631,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26170119643211365,
|
||
|
|
"step": 2100,
|
||
|
|
"valid_targets_mean": 6346.6,
|
||
|
|
"valid_targets_min": 3315
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.368,
|
||
|
|
"grad_norm": 0.4438984053068301,
|
||
|
|
"learning_rate": 2.4780949250797964e-05,
|
||
|
|
"loss": 0.2569,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2657065987586975,
|
||
|
|
"step": 2105,
|
||
|
|
"valid_targets_mean": 5510.9,
|
||
|
|
"valid_targets_min": 2010
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.376,
|
||
|
|
"grad_norm": 0.40828469834181624,
|
||
|
|
"learning_rate": 2.4703428262727656e-05,
|
||
|
|
"loss": 0.2742,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2982710003852844,
|
||
|
|
"step": 2110,
|
||
|
|
"valid_targets_mean": 6507.9,
|
||
|
|
"valid_targets_min": 2926
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.384,
|
||
|
|
"grad_norm": 0.43104351910647665,
|
||
|
|
"learning_rate": 2.4625832402064525e-05,
|
||
|
|
"loss": 0.2623,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3116433024406433,
|
||
|
|
"step": 2115,
|
||
|
|
"valid_targets_mean": 5877.9,
|
||
|
|
"valid_targets_min": 636
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.392,
|
||
|
|
"grad_norm": 0.42418155412882275,
|
||
|
|
"learning_rate": 2.454816290403595e-05,
|
||
|
|
"loss": 0.2608,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2540300786495209,
|
||
|
|
"step": 2120,
|
||
|
|
"valid_targets_mean": 5527.6,
|
||
|
|
"valid_targets_min": 2077
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4,
|
||
|
|
"grad_norm": 0.39935339866930863,
|
||
|
|
"learning_rate": 2.4470421005041492e-05,
|
||
|
|
"loss": 0.2671,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25366467237472534,
|
||
|
|
"step": 2125,
|
||
|
|
"valid_targets_mean": 5777.4,
|
||
|
|
"valid_targets_min": 2126
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.408,
|
||
|
|
"grad_norm": 0.46642524230649085,
|
||
|
|
"learning_rate": 2.4392607942633263e-05,
|
||
|
|
"loss": 0.2692,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2766075134277344,
|
||
|
|
"step": 2130,
|
||
|
|
"valid_targets_mean": 5309.3,
|
||
|
|
"valid_targets_min": 2492
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.416,
|
||
|
|
"grad_norm": 0.40542150642600944,
|
||
|
|
"learning_rate": 2.43147249554962e-05,
|
||
|
|
"loss": 0.2638,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2503218352794647,
|
||
|
|
"step": 2135,
|
||
|
|
"valid_targets_mean": 5945.4,
|
||
|
|
"valid_targets_min": 2787
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.424,
|
||
|
|
"grad_norm": 0.5003293603533124,
|
||
|
|
"learning_rate": 2.423677328342835e-05,
|
||
|
|
"loss": 0.2762,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.269273042678833,
|
||
|
|
"step": 2140,
|
||
|
|
"valid_targets_mean": 4656.9,
|
||
|
|
"valid_targets_min": 2190
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.432,
|
||
|
|
"grad_norm": 0.43945562577333586,
|
||
|
|
"learning_rate": 2.415875416732113e-05,
|
||
|
|
"loss": 0.2669,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2691774368286133,
|
||
|
|
"step": 2145,
|
||
|
|
"valid_targets_mean": 5574.9,
|
||
|
|
"valid_targets_min": 2402
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.44,
|
||
|
|
"grad_norm": 0.46811613846101446,
|
||
|
|
"learning_rate": 2.4080668849139603e-05,
|
||
|
|
"loss": 0.2695,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26551342010498047,
|
||
|
|
"step": 2150,
|
||
|
|
"valid_targets_mean": 5372.2,
|
||
|
|
"valid_targets_min": 991
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.448,
|
||
|
|
"grad_norm": 0.4077015317433029,
|
||
|
|
"learning_rate": 2.4002518571902665e-05,
|
||
|
|
"loss": 0.2612,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25210684537887573,
|
||
|
|
"step": 2155,
|
||
|
|
"valid_targets_mean": 5778.7,
|
||
|
|
"valid_targets_min": 2306
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.456,
|
||
|
|
"grad_norm": 0.440624451883909,
|
||
|
|
"learning_rate": 2.392430457966328e-05,
|
||
|
|
"loss": 0.2542,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24545451998710632,
|
||
|
|
"step": 2160,
|
||
|
|
"valid_targets_mean": 5699.8,
|
||
|
|
"valid_targets_min": 2457
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.464,
|
||
|
|
"grad_norm": 0.4644597860891612,
|
||
|
|
"learning_rate": 2.3846028117488686e-05,
|
||
|
|
"loss": 0.2611,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2686898410320282,
|
||
|
|
"step": 2165,
|
||
|
|
"valid_targets_mean": 5256.7,
|
||
|
|
"valid_targets_min": 901
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.472,
|
||
|
|
"grad_norm": 0.45287734791016193,
|
||
|
|
"learning_rate": 2.3767690431440533e-05,
|
||
|
|
"loss": 0.2599,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28908854722976685,
|
||
|
|
"step": 2170,
|
||
|
|
"valid_targets_mean": 6981.8,
|
||
|
|
"valid_targets_min": 708
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.48,
|
||
|
|
"grad_norm": 0.41268646628823424,
|
||
|
|
"learning_rate": 2.368929276855512e-05,
|
||
|
|
"loss": 0.2588,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26175349950790405,
|
||
|
|
"step": 2175,
|
||
|
|
"valid_targets_mean": 5828.8,
|
||
|
|
"valid_targets_min": 2328
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.488,
|
||
|
|
"grad_norm": 0.40243049575345696,
|
||
|
|
"learning_rate": 2.361083637682347e-05,
|
||
|
|
"loss": 0.2627,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26664072275161743,
|
||
|
|
"step": 2180,
|
||
|
|
"valid_targets_mean": 6444.5,
|
||
|
|
"valid_targets_min": 2079
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.496,
|
||
|
|
"grad_norm": 0.4208581290888113,
|
||
|
|
"learning_rate": 2.3532322505171502e-05,
|
||
|
|
"loss": 0.2629,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26101818680763245,
|
||
|
|
"step": 2185,
|
||
|
|
"valid_targets_mean": 5354.2,
|
||
|
|
"valid_targets_min": 2479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.504,
|
||
|
|
"grad_norm": 0.3953533628663449,
|
||
|
|
"learning_rate": 2.3453752403440147e-05,
|
||
|
|
"loss": 0.2612,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26703381538391113,
|
||
|
|
"step": 2190,
|
||
|
|
"valid_targets_mean": 5831.3,
|
||
|
|
"valid_targets_min": 2203
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.512,
|
||
|
|
"grad_norm": 0.38434465715826854,
|
||
|
|
"learning_rate": 2.337512732236545e-05,
|
||
|
|
"loss": 0.256,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23783396184444427,
|
||
|
|
"step": 2195,
|
||
|
|
"valid_targets_mean": 6418.1,
|
||
|
|
"valid_targets_min": 1104
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.52,
|
||
|
|
"grad_norm": 0.43338676646075414,
|
||
|
|
"learning_rate": 2.3296448513558628e-05,
|
||
|
|
"loss": 0.2682,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26993754506111145,
|
||
|
|
"step": 2200,
|
||
|
|
"valid_targets_mean": 5462.9,
|
||
|
|
"valid_targets_min": 640
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.528,
|
||
|
|
"grad_norm": 0.4496777907267995,
|
||
|
|
"learning_rate": 2.321771722948622e-05,
|
||
|
|
"loss": 0.2721,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2622802257537842,
|
||
|
|
"step": 2205,
|
||
|
|
"valid_targets_mean": 6097.9,
|
||
|
|
"valid_targets_min": 3581
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.536,
|
||
|
|
"grad_norm": 0.41566876544597353,
|
||
|
|
"learning_rate": 2.3138934723450074e-05,
|
||
|
|
"loss": 0.264,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2877008616924286,
|
||
|
|
"step": 2210,
|
||
|
|
"valid_targets_mean": 6912.1,
|
||
|
|
"valid_targets_min": 2531
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.544,
|
||
|
|
"grad_norm": 0.47881534922065505,
|
||
|
|
"learning_rate": 2.306010224956744e-05,
|
||
|
|
"loss": 0.273,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31302931904792786,
|
||
|
|
"step": 2215,
|
||
|
|
"valid_targets_mean": 5172.6,
|
||
|
|
"valid_targets_min": 970
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.552,
|
||
|
|
"grad_norm": 0.4090791008823442,
|
||
|
|
"learning_rate": 2.2981221062750986e-05,
|
||
|
|
"loss": 0.2568,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23748889565467834,
|
||
|
|
"step": 2220,
|
||
|
|
"valid_targets_mean": 5436.2,
|
||
|
|
"valid_targets_min": 3544
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.56,
|
||
|
|
"grad_norm": 0.37997502199670286,
|
||
|
|
"learning_rate": 2.290229241868882e-05,
|
||
|
|
"loss": 0.273,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.251059353351593,
|
||
|
|
"step": 2225,
|
||
|
|
"valid_targets_mean": 5980.6,
|
||
|
|
"valid_targets_min": 687
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.568,
|
||
|
|
"grad_norm": 0.3986292880302235,
|
||
|
|
"learning_rate": 2.282331757382454e-05,
|
||
|
|
"loss": 0.2697,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28471672534942627,
|
||
|
|
"step": 2230,
|
||
|
|
"valid_targets_mean": 6370.6,
|
||
|
|
"valid_targets_min": 1941
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.576,
|
||
|
|
"grad_norm": 0.431878132975706,
|
||
|
|
"learning_rate": 2.2744297785337155e-05,
|
||
|
|
"loss": 0.2654,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2553999125957489,
|
||
|
|
"step": 2235,
|
||
|
|
"valid_targets_mean": 5513.6,
|
||
|
|
"valid_targets_min": 2046
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.584,
|
||
|
|
"grad_norm": 0.4115529559985401,
|
||
|
|
"learning_rate": 2.2665234311121155e-05,
|
||
|
|
"loss": 0.2659,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26266321539878845,
|
||
|
|
"step": 2240,
|
||
|
|
"valid_targets_mean": 5916.8,
|
||
|
|
"valid_targets_min": 2845
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.592,
|
||
|
|
"grad_norm": 0.45407655506541134,
|
||
|
|
"learning_rate": 2.258612840976645e-05,
|
||
|
|
"loss": 0.2632,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24577778577804565,
|
||
|
|
"step": 2245,
|
||
|
|
"valid_targets_mean": 5146.2,
|
||
|
|
"valid_targets_min": 2756
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6,
|
||
|
|
"grad_norm": 0.41467217796114125,
|
||
|
|
"learning_rate": 2.2506981340538315e-05,
|
||
|
|
"loss": 0.2624,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26159006357192993,
|
||
|
|
"step": 2250,
|
||
|
|
"valid_targets_mean": 5252.8,
|
||
|
|
"valid_targets_min": 537
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.608,
|
||
|
|
"grad_norm": 0.3919567632052263,
|
||
|
|
"learning_rate": 2.2427794363357384e-05,
|
||
|
|
"loss": 0.259,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2614225745201111,
|
||
|
|
"step": 2255,
|
||
|
|
"valid_targets_mean": 6612.1,
|
||
|
|
"valid_targets_min": 2208
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.616,
|
||
|
|
"grad_norm": 0.43278714802298296,
|
||
|
|
"learning_rate": 2.2348568738779566e-05,
|
||
|
|
"loss": 0.2742,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29214614629745483,
|
||
|
|
"step": 2260,
|
||
|
|
"valid_targets_mean": 6142.4,
|
||
|
|
"valid_targets_min": 1847
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.624,
|
||
|
|
"grad_norm": 0.4110332083414153,
|
||
|
|
"learning_rate": 2.2269305727975993e-05,
|
||
|
|
"loss": 0.2726,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29854828119277954,
|
||
|
|
"step": 2265,
|
||
|
|
"valid_targets_mean": 7177.7,
|
||
|
|
"valid_targets_min": 3515
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.632,
|
||
|
|
"grad_norm": 0.44547325805290383,
|
||
|
|
"learning_rate": 2.2190006592712927e-05,
|
||
|
|
"loss": 0.2638,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26258304715156555,
|
||
|
|
"step": 2270,
|
||
|
|
"valid_targets_mean": 5474.2,
|
||
|
|
"valid_targets_min": 2692
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.64,
|
||
|
|
"grad_norm": 0.4614610116421694,
|
||
|
|
"learning_rate": 2.2110672595331698e-05,
|
||
|
|
"loss": 0.2519,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2816447913646698,
|
||
|
|
"step": 2275,
|
||
|
|
"valid_targets_mean": 4608.2,
|
||
|
|
"valid_targets_min": 384
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.648,
|
||
|
|
"grad_norm": 0.41303927586822076,
|
||
|
|
"learning_rate": 2.2031304998728587e-05,
|
||
|
|
"loss": 0.2677,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.261924684047699,
|
||
|
|
"step": 2280,
|
||
|
|
"valid_targets_mean": 5297.2,
|
||
|
|
"valid_targets_min": 664
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.656,
|
||
|
|
"grad_norm": 0.42755844253142716,
|
||
|
|
"learning_rate": 2.1951905066334737e-05,
|
||
|
|
"loss": 0.27,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.275720477104187,
|
||
|
|
"step": 2285,
|
||
|
|
"valid_targets_mean": 5888.2,
|
||
|
|
"valid_targets_min": 2348
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.664,
|
||
|
|
"grad_norm": 0.39946995165209875,
|
||
|
|
"learning_rate": 2.1872474062096046e-05,
|
||
|
|
"loss": 0.265,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27406832575798035,
|
||
|
|
"step": 2290,
|
||
|
|
"valid_targets_mean": 6055.6,
|
||
|
|
"valid_targets_min": 2541
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.672,
|
||
|
|
"grad_norm": 0.4286567596359356,
|
||
|
|
"learning_rate": 2.179301325045301e-05,
|
||
|
|
"loss": 0.2675,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26409912109375,
|
||
|
|
"step": 2295,
|
||
|
|
"valid_targets_mean": 5746.8,
|
||
|
|
"valid_targets_min": 1493
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.68,
|
||
|
|
"grad_norm": 0.6041261914809137,
|
||
|
|
"learning_rate": 2.1713523896320647e-05,
|
||
|
|
"loss": 0.2722,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26296675205230713,
|
||
|
|
"step": 2300,
|
||
|
|
"valid_targets_mean": 5095.0,
|
||
|
|
"valid_targets_min": 798
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6879999999999997,
|
||
|
|
"grad_norm": 0.4267338339744416,
|
||
|
|
"learning_rate": 2.163400726506832e-05,
|
||
|
|
"loss": 0.2595,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24727872014045715,
|
||
|
|
"step": 2305,
|
||
|
|
"valid_targets_mean": 5302.2,
|
||
|
|
"valid_targets_min": 2620
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6959999999999997,
|
||
|
|
"grad_norm": 0.41218171564224837,
|
||
|
|
"learning_rate": 2.155446462249961e-05,
|
||
|
|
"loss": 0.2666,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26557302474975586,
|
||
|
|
"step": 2310,
|
||
|
|
"valid_targets_mean": 6288.5,
|
||
|
|
"valid_targets_min": 714
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7039999999999997,
|
||
|
|
"grad_norm": 0.45281061679532264,
|
||
|
|
"learning_rate": 2.147489723483217e-05,
|
||
|
|
"loss": 0.2648,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2626858949661255,
|
||
|
|
"step": 2315,
|
||
|
|
"valid_targets_mean": 5368.7,
|
||
|
|
"valid_targets_min": 1957
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7119999999999997,
|
||
|
|
"grad_norm": 0.48248707648966427,
|
||
|
|
"learning_rate": 2.139530636867757e-05,
|
||
|
|
"loss": 0.2679,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2763807475566864,
|
||
|
|
"step": 2320,
|
||
|
|
"valid_targets_mean": 5099.9,
|
||
|
|
"valid_targets_min": 932
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7199999999999998,
|
||
|
|
"grad_norm": 0.40742401489841945,
|
||
|
|
"learning_rate": 2.1315693291021114e-05,
|
||
|
|
"loss": 0.2535,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25393787026405334,
|
||
|
|
"step": 2325,
|
||
|
|
"valid_targets_mean": 5533.6,
|
||
|
|
"valid_targets_min": 2550
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7279999999999998,
|
||
|
|
"grad_norm": 0.4100080302945935,
|
||
|
|
"learning_rate": 2.1236059269201686e-05,
|
||
|
|
"loss": 0.2568,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2520829439163208,
|
||
|
|
"step": 2330,
|
||
|
|
"valid_targets_mean": 6048.5,
|
||
|
|
"valid_targets_min": 2362
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7359999999999998,
|
||
|
|
"grad_norm": 0.45314645842815454,
|
||
|
|
"learning_rate": 2.1156405570891584e-05,
|
||
|
|
"loss": 0.2709,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27272647619247437,
|
||
|
|
"step": 2335,
|
||
|
|
"valid_targets_mean": 5500.8,
|
||
|
|
"valid_targets_min": 613
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7439999999999998,
|
||
|
|
"grad_norm": 0.4026491355138221,
|
||
|
|
"learning_rate": 2.1076733464076322e-05,
|
||
|
|
"loss": 0.2571,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24665513634681702,
|
||
|
|
"step": 2340,
|
||
|
|
"valid_targets_mean": 5994.2,
|
||
|
|
"valid_targets_min": 960
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.752,
|
||
|
|
"grad_norm": 0.4702466547059018,
|
||
|
|
"learning_rate": 2.0997044217034462e-05,
|
||
|
|
"loss": 0.2635,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26227515935897827,
|
||
|
|
"step": 2345,
|
||
|
|
"valid_targets_mean": 5172.7,
|
||
|
|
"valid_targets_min": 2105
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.76,
|
||
|
|
"grad_norm": 0.4039545878119713,
|
||
|
|
"learning_rate": 2.0917339098317405e-05,
|
||
|
|
"loss": 0.2711,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2739185094833374,
|
||
|
|
"step": 2350,
|
||
|
|
"valid_targets_mean": 6585.1,
|
||
|
|
"valid_targets_min": 2045
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.768,
|
||
|
|
"grad_norm": 0.4611875541775836,
|
||
|
|
"learning_rate": 2.083761937672922e-05,
|
||
|
|
"loss": 0.2754,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2752639055252075,
|
||
|
|
"step": 2355,
|
||
|
|
"valid_targets_mean": 5468.2,
|
||
|
|
"valid_targets_min": 802
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.776,
|
||
|
|
"grad_norm": 0.47139731269484614,
|
||
|
|
"learning_rate": 2.0757886321306433e-05,
|
||
|
|
"loss": 0.2608,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2621690630912781,
|
||
|
|
"step": 2360,
|
||
|
|
"valid_targets_mean": 5031.0,
|
||
|
|
"valid_targets_min": 1183
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.784,
|
||
|
|
"grad_norm": 0.4249388855321063,
|
||
|
|
"learning_rate": 2.0678141201297827e-05,
|
||
|
|
"loss": 0.259,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22892312705516815,
|
||
|
|
"step": 2365,
|
||
|
|
"valid_targets_mean": 5808.9,
|
||
|
|
"valid_targets_min": 2364
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.792,
|
||
|
|
"grad_norm": 0.4520229219931925,
|
||
|
|
"learning_rate": 2.059838528614423e-05,
|
||
|
|
"loss": 0.2609,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26507341861724854,
|
||
|
|
"step": 2370,
|
||
|
|
"valid_targets_mean": 5221.9,
|
||
|
|
"valid_targets_min": 2056
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8,
|
||
|
|
"grad_norm": 0.4200067243880517,
|
||
|
|
"learning_rate": 2.0518619845458322e-05,
|
||
|
|
"loss": 0.2673,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2991790771484375,
|
||
|
|
"step": 2375,
|
||
|
|
"valid_targets_mean": 6177.3,
|
||
|
|
"valid_targets_min": 2232
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.808,
|
||
|
|
"grad_norm": 0.38938230112936845,
|
||
|
|
"learning_rate": 2.0438846149004426e-05,
|
||
|
|
"loss": 0.2625,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29206550121307373,
|
||
|
|
"step": 2380,
|
||
|
|
"valid_targets_mean": 6879.7,
|
||
|
|
"valid_targets_min": 3899
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.816,
|
||
|
|
"grad_norm": 0.45082171070921734,
|
||
|
|
"learning_rate": 2.0359065466678268e-05,
|
||
|
|
"loss": 0.2631,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27746134996414185,
|
||
|
|
"step": 2385,
|
||
|
|
"valid_targets_mean": 5233.9,
|
||
|
|
"valid_targets_min": 1939
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.824,
|
||
|
|
"grad_norm": 0.45232849088890803,
|
||
|
|
"learning_rate": 2.0279279068486795e-05,
|
||
|
|
"loss": 0.2646,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26361560821533203,
|
||
|
|
"step": 2390,
|
||
|
|
"valid_targets_mean": 5877.0,
|
||
|
|
"valid_targets_min": 2719
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.832,
|
||
|
|
"grad_norm": 0.459025083974539,
|
||
|
|
"learning_rate": 2.019948822452794e-05,
|
||
|
|
"loss": 0.2825,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2750234007835388,
|
||
|
|
"step": 2395,
|
||
|
|
"valid_targets_mean": 5298.2,
|
||
|
|
"valid_targets_min": 740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.84,
|
||
|
|
"grad_norm": 0.4196361041060983,
|
||
|
|
"learning_rate": 2.0119694204970393e-05,
|
||
|
|
"loss": 0.2714,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2892104387283325,
|
||
|
|
"step": 2400,
|
||
|
|
"valid_targets_mean": 6700.9,
|
||
|
|
"valid_targets_min": 2552
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.848,
|
||
|
|
"grad_norm": 0.4133422781214903,
|
||
|
|
"learning_rate": 2.0039898280033414e-05,
|
||
|
|
"loss": 0.2583,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2697451412677765,
|
||
|
|
"step": 2405,
|
||
|
|
"valid_targets_mean": 5783.1,
|
||
|
|
"valid_targets_min": 1798
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.856,
|
||
|
|
"grad_norm": 0.42185605892875344,
|
||
|
|
"learning_rate": 1.9960101719966592e-05,
|
||
|
|
"loss": 0.2775,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2788695693016052,
|
||
|
|
"step": 2410,
|
||
|
|
"valid_targets_mean": 5466.9,
|
||
|
|
"valid_targets_min": 2780
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.864,
|
||
|
|
"grad_norm": 0.4065091466238856,
|
||
|
|
"learning_rate": 1.9880305795029617e-05,
|
||
|
|
"loss": 0.2723,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26106393337249756,
|
||
|
|
"step": 2415,
|
||
|
|
"valid_targets_mean": 5957.0,
|
||
|
|
"valid_targets_min": 2787
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.872,
|
||
|
|
"grad_norm": 0.4350518885133558,
|
||
|
|
"learning_rate": 1.980051177547207e-05,
|
||
|
|
"loss": 0.2662,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2566600441932678,
|
||
|
|
"step": 2420,
|
||
|
|
"valid_targets_mean": 5157.2,
|
||
|
|
"valid_targets_min": 2272
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.88,
|
||
|
|
"grad_norm": 0.43647694712355944,
|
||
|
|
"learning_rate": 1.9720720931513212e-05,
|
||
|
|
"loss": 0.274,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27034640312194824,
|
||
|
|
"step": 2425,
|
||
|
|
"valid_targets_mean": 5680.7,
|
||
|
|
"valid_targets_min": 2868
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.888,
|
||
|
|
"grad_norm": 0.4465441645571561,
|
||
|
|
"learning_rate": 1.9640934533321735e-05,
|
||
|
|
"loss": 0.2579,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2533532977104187,
|
||
|
|
"step": 2430,
|
||
|
|
"valid_targets_mean": 4983.7,
|
||
|
|
"valid_targets_min": 1607
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.896,
|
||
|
|
"grad_norm": 0.4948292300701761,
|
||
|
|
"learning_rate": 1.9561153850995577e-05,
|
||
|
|
"loss": 0.252,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2283918261528015,
|
||
|
|
"step": 2435,
|
||
|
|
"valid_targets_mean": 4227.5,
|
||
|
|
"valid_targets_min": 2344
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.904,
|
||
|
|
"grad_norm": 0.4484712258176857,
|
||
|
|
"learning_rate": 1.948138015454168e-05,
|
||
|
|
"loss": 0.2624,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28464293479919434,
|
||
|
|
"step": 2440,
|
||
|
|
"valid_targets_mean": 5670.1,
|
||
|
|
"valid_targets_min": 2449
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.912,
|
||
|
|
"grad_norm": 0.43934564876974813,
|
||
|
|
"learning_rate": 1.9401614713855775e-05,
|
||
|
|
"loss": 0.2725,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26345354318618774,
|
||
|
|
"step": 2445,
|
||
|
|
"valid_targets_mean": 5403.1,
|
||
|
|
"valid_targets_min": 707
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.92,
|
||
|
|
"grad_norm": 0.416395054814685,
|
||
|
|
"learning_rate": 1.932185879870218e-05,
|
||
|
|
"loss": 0.2643,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25871995091438293,
|
||
|
|
"step": 2450,
|
||
|
|
"valid_targets_mean": 5303.6,
|
||
|
|
"valid_targets_min": 2395
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.928,
|
||
|
|
"grad_norm": 0.43694368425623087,
|
||
|
|
"learning_rate": 1.924211367869357e-05,
|
||
|
|
"loss": 0.2601,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24475713074207306,
|
||
|
|
"step": 2455,
|
||
|
|
"valid_targets_mean": 5055.1,
|
||
|
|
"valid_targets_min": 2806
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.936,
|
||
|
|
"grad_norm": 0.4243501892440525,
|
||
|
|
"learning_rate": 1.9162380623270783e-05,
|
||
|
|
"loss": 0.2538,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22306443750858307,
|
||
|
|
"step": 2460,
|
||
|
|
"valid_targets_mean": 5135.1,
|
||
|
|
"valid_targets_min": 2248
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.944,
|
||
|
|
"grad_norm": 0.44283286109138925,
|
||
|
|
"learning_rate": 1.90826609016826e-05,
|
||
|
|
"loss": 0.2682,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2637837529182434,
|
||
|
|
"step": 2465,
|
||
|
|
"valid_targets_mean": 5476.3,
|
||
|
|
"valid_targets_min": 759
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.952,
|
||
|
|
"grad_norm": 0.42470717124634133,
|
||
|
|
"learning_rate": 1.9002955782965548e-05,
|
||
|
|
"loss": 0.2623,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27605631947517395,
|
||
|
|
"step": 2470,
|
||
|
|
"valid_targets_mean": 5417.3,
|
||
|
|
"valid_targets_min": 2605
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.96,
|
||
|
|
"grad_norm": 0.3803343612948792,
|
||
|
|
"learning_rate": 1.8923266535923688e-05,
|
||
|
|
"loss": 0.2859,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2890893220901489,
|
||
|
|
"step": 2475,
|
||
|
|
"valid_targets_mean": 7136.2,
|
||
|
|
"valid_targets_min": 3811
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.968,
|
||
|
|
"grad_norm": 0.4130261601006259,
|
||
|
|
"learning_rate": 1.8843594429108426e-05,
|
||
|
|
"loss": 0.2671,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2711412012577057,
|
||
|
|
"step": 2480,
|
||
|
|
"valid_targets_mean": 6824.2,
|
||
|
|
"valid_targets_min": 2246
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.976,
|
||
|
|
"grad_norm": 0.4468048565381207,
|
||
|
|
"learning_rate": 1.8763940730798324e-05,
|
||
|
|
"loss": 0.2589,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2685108482837677,
|
||
|
|
"step": 2485,
|
||
|
|
"valid_targets_mean": 4759.8,
|
||
|
|
"valid_targets_min": 763
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.984,
|
||
|
|
"grad_norm": 0.45251787058208964,
|
||
|
|
"learning_rate": 1.8684306708978896e-05,
|
||
|
|
"loss": 0.2665,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2934056222438812,
|
||
|
|
"step": 2490,
|
||
|
|
"valid_targets_mean": 4659.1,
|
||
|
|
"valid_targets_min": 920
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.992,
|
||
|
|
"grad_norm": 0.43509351569725757,
|
||
|
|
"learning_rate": 1.8604693631322433e-05,
|
||
|
|
"loss": 0.2632,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24598343670368195,
|
||
|
|
"step": 2495,
|
||
|
|
"valid_targets_mean": 4712.8,
|
||
|
|
"valid_targets_min": 607
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.0,
|
||
|
|
"grad_norm": 0.4139875035737289,
|
||
|
|
"learning_rate": 1.852510276516783e-05,
|
||
|
|
"loss": 0.2661,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22778859734535217,
|
||
|
|
"step": 2500,
|
||
|
|
"valid_targets_mean": 5029.6,
|
||
|
|
"valid_targets_min": 2780
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.008,
|
||
|
|
"grad_norm": 0.4026517254214821,
|
||
|
|
"learning_rate": 1.8445535377500393e-05,
|
||
|
|
"loss": 0.2503,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2527252435684204,
|
||
|
|
"step": 2505,
|
||
|
|
"valid_targets_mean": 5420.4,
|
||
|
|
"valid_targets_min": 804
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.016,
|
||
|
|
"grad_norm": 0.527046696353138,
|
||
|
|
"learning_rate": 1.8365992734931686e-05,
|
||
|
|
"loss": 0.251,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23123949766159058,
|
||
|
|
"step": 2510,
|
||
|
|
"valid_targets_mean": 4485.6,
|
||
|
|
"valid_targets_min": 932
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.024,
|
||
|
|
"grad_norm": 0.4502829884248935,
|
||
|
|
"learning_rate": 1.8286476103679356e-05,
|
||
|
|
"loss": 0.2434,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25222235918045044,
|
||
|
|
"step": 2515,
|
||
|
|
"valid_targets_mean": 5331.8,
|
||
|
|
"valid_targets_min": 2508
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.032,
|
||
|
|
"grad_norm": 0.40196394658018636,
|
||
|
|
"learning_rate": 1.8206986749546992e-05,
|
||
|
|
"loss": 0.2577,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2326112687587738,
|
||
|
|
"step": 2520,
|
||
|
|
"valid_targets_mean": 5491.2,
|
||
|
|
"valid_targets_min": 2650
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.04,
|
||
|
|
"grad_norm": 0.4419760841326031,
|
||
|
|
"learning_rate": 1.8127525937903957e-05,
|
||
|
|
"loss": 0.254,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25284963846206665,
|
||
|
|
"step": 2525,
|
||
|
|
"valid_targets_mean": 5374.3,
|
||
|
|
"valid_targets_min": 724
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.048,
|
||
|
|
"grad_norm": 0.563487988263502,
|
||
|
|
"learning_rate": 1.8048094933665262e-05,
|
||
|
|
"loss": 0.2538,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23597420752048492,
|
||
|
|
"step": 2530,
|
||
|
|
"valid_targets_mean": 4752.8,
|
||
|
|
"valid_targets_min": 2266
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.056,
|
||
|
|
"grad_norm": 0.4207542474689084,
|
||
|
|
"learning_rate": 1.7968695001271416e-05,
|
||
|
|
"loss": 0.2571,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25209924578666687,
|
||
|
|
"step": 2535,
|
||
|
|
"valid_targets_mean": 5711.2,
|
||
|
|
"valid_targets_min": 2319
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.064,
|
||
|
|
"grad_norm": 0.4164797883736113,
|
||
|
|
"learning_rate": 1.7889327404668316e-05,
|
||
|
|
"loss": 0.2503,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26291561126708984,
|
||
|
|
"step": 2540,
|
||
|
|
"valid_targets_mean": 5958.5,
|
||
|
|
"valid_targets_min": 3351
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.072,
|
||
|
|
"grad_norm": 0.4280721543502966,
|
||
|
|
"learning_rate": 1.7809993407287083e-05,
|
||
|
|
"loss": 0.2507,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2563862204551697,
|
||
|
|
"step": 2545,
|
||
|
|
"valid_targets_mean": 5596.1,
|
||
|
|
"valid_targets_min": 2016
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.08,
|
||
|
|
"grad_norm": 0.41964916244227835,
|
||
|
|
"learning_rate": 1.7730694272024018e-05,
|
||
|
|
"loss": 0.2504,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2623998522758484,
|
||
|
|
"step": 2550,
|
||
|
|
"valid_targets_mean": 5987.8,
|
||
|
|
"valid_targets_min": 711
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.088,
|
||
|
|
"grad_norm": 0.4050228919933895,
|
||
|
|
"learning_rate": 1.765143126122044e-05,
|
||
|
|
"loss": 0.2499,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23614199459552765,
|
||
|
|
"step": 2555,
|
||
|
|
"valid_targets_mean": 5699.4,
|
||
|
|
"valid_targets_min": 2592
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.096,
|
||
|
|
"grad_norm": 0.3910248661150996,
|
||
|
|
"learning_rate": 1.7572205636642622e-05,
|
||
|
|
"loss": 0.2549,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25823622941970825,
|
||
|
|
"step": 2560,
|
||
|
|
"valid_targets_mean": 7367.4,
|
||
|
|
"valid_targets_min": 3276
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.104,
|
||
|
|
"grad_norm": 0.42236355425116934,
|
||
|
|
"learning_rate": 1.749301865946169e-05,
|
||
|
|
"loss": 0.2569,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2622085511684418,
|
||
|
|
"step": 2565,
|
||
|
|
"valid_targets_mean": 5688.9,
|
||
|
|
"valid_targets_min": 2657
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.112,
|
||
|
|
"grad_norm": 0.42509311490729107,
|
||
|
|
"learning_rate": 1.7413871590233557e-05,
|
||
|
|
"loss": 0.2616,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2588081359863281,
|
||
|
|
"step": 2570,
|
||
|
|
"valid_targets_mean": 6006.1,
|
||
|
|
"valid_targets_min": 1061
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.12,
|
||
|
|
"grad_norm": 0.4057780074446429,
|
||
|
|
"learning_rate": 1.7334765688878848e-05,
|
||
|
|
"loss": 0.2404,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23342590034008026,
|
||
|
|
"step": 2575,
|
||
|
|
"valid_targets_mean": 5994.2,
|
||
|
|
"valid_targets_min": 3506
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.128,
|
||
|
|
"grad_norm": 0.4581070105994256,
|
||
|
|
"learning_rate": 1.7255702214662852e-05,
|
||
|
|
"loss": 0.2535,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26349925994873047,
|
||
|
|
"step": 2580,
|
||
|
|
"valid_targets_mean": 5232.7,
|
||
|
|
"valid_targets_min": 549
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.136,
|
||
|
|
"grad_norm": 0.4166464687342281,
|
||
|
|
"learning_rate": 1.7176682426175468e-05,
|
||
|
|
"loss": 0.2624,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.251516729593277,
|
||
|
|
"step": 2585,
|
||
|
|
"valid_targets_mean": 5601.5,
|
||
|
|
"valid_targets_min": 2340
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.144,
|
||
|
|
"grad_norm": 0.4657055090249881,
|
||
|
|
"learning_rate": 1.709770758131118e-05,
|
||
|
|
"loss": 0.2564,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23418261110782623,
|
||
|
|
"step": 2590,
|
||
|
|
"valid_targets_mean": 4537.5,
|
||
|
|
"valid_targets_min": 2258
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.152,
|
||
|
|
"grad_norm": 0.4116673541052104,
|
||
|
|
"learning_rate": 1.7018778937249017e-05,
|
||
|
|
"loss": 0.2559,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24612781405448914,
|
||
|
|
"step": 2595,
|
||
|
|
"valid_targets_mean": 6193.2,
|
||
|
|
"valid_targets_min": 2229
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.16,
|
||
|
|
"grad_norm": 0.4340572439834897,
|
||
|
|
"learning_rate": 1.6939897750432562e-05,
|
||
|
|
"loss": 0.2425,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2452816367149353,
|
||
|
|
"step": 2600,
|
||
|
|
"valid_targets_mean": 5402.6,
|
||
|
|
"valid_targets_min": 1660
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.168,
|
||
|
|
"grad_norm": 0.43025994341938056,
|
||
|
|
"learning_rate": 1.6861065276549933e-05,
|
||
|
|
"loss": 0.2555,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2600667476654053,
|
||
|
|
"step": 2605,
|
||
|
|
"valid_targets_mean": 5861.5,
|
||
|
|
"valid_targets_min": 2402
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.176,
|
||
|
|
"grad_norm": 0.42608335179914697,
|
||
|
|
"learning_rate": 1.6782282770513788e-05,
|
||
|
|
"loss": 0.271,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23782122135162354,
|
||
|
|
"step": 2610,
|
||
|
|
"valid_targets_mean": 5192.6,
|
||
|
|
"valid_targets_min": 2508
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.184,
|
||
|
|
"grad_norm": 0.4277951887838342,
|
||
|
|
"learning_rate": 1.6703551486441382e-05,
|
||
|
|
"loss": 0.2563,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25356531143188477,
|
||
|
|
"step": 2615,
|
||
|
|
"valid_targets_mean": 5669.9,
|
||
|
|
"valid_targets_min": 1543
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.192,
|
||
|
|
"grad_norm": 0.4609230061476829,
|
||
|
|
"learning_rate": 1.6624872677634565e-05,
|
||
|
|
"loss": 0.25,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24406729638576508,
|
||
|
|
"step": 2620,
|
||
|
|
"valid_targets_mean": 4672.6,
|
||
|
|
"valid_targets_min": 2794
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.2,
|
||
|
|
"grad_norm": 0.40355339238632393,
|
||
|
|
"learning_rate": 1.654624759655986e-05,
|
||
|
|
"loss": 0.2668,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29306623339653015,
|
||
|
|
"step": 2625,
|
||
|
|
"valid_targets_mean": 6442.0,
|
||
|
|
"valid_targets_min": 2237
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.208,
|
||
|
|
"grad_norm": 0.425607995518686,
|
||
|
|
"learning_rate": 1.64676774948285e-05,
|
||
|
|
"loss": 0.2379,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23136329650878906,
|
||
|
|
"step": 2630,
|
||
|
|
"valid_targets_mean": 5687.0,
|
||
|
|
"valid_targets_min": 2907
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.216,
|
||
|
|
"grad_norm": 0.42610729575271533,
|
||
|
|
"learning_rate": 1.6389163623176536e-05,
|
||
|
|
"loss": 0.2576,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25710445642471313,
|
||
|
|
"step": 2635,
|
||
|
|
"valid_targets_mean": 5690.4,
|
||
|
|
"valid_targets_min": 2275
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.224,
|
||
|
|
"grad_norm": 0.4502455592506156,
|
||
|
|
"learning_rate": 1.6310707231444884e-05,
|
||
|
|
"loss": 0.2547,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25009778141975403,
|
||
|
|
"step": 2640,
|
||
|
|
"valid_targets_mean": 5083.6,
|
||
|
|
"valid_targets_min": 2693
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.232,
|
||
|
|
"grad_norm": 0.42929174971662437,
|
||
|
|
"learning_rate": 1.623230956855947e-05,
|
||
|
|
"loss": 0.2393,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24527814984321594,
|
||
|
|
"step": 2645,
|
||
|
|
"valid_targets_mean": 5330.5,
|
||
|
|
"valid_targets_min": 2359
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.24,
|
||
|
|
"grad_norm": 0.38173395631546464,
|
||
|
|
"learning_rate": 1.6153971882511324e-05,
|
||
|
|
"loss": 0.2542,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2393120527267456,
|
||
|
|
"step": 2650,
|
||
|
|
"valid_targets_mean": 6225.8,
|
||
|
|
"valid_targets_min": 3329
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.248,
|
||
|
|
"grad_norm": 0.3933433949338803,
|
||
|
|
"learning_rate": 1.6075695420336724e-05,
|
||
|
|
"loss": 0.2545,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2711977958679199,
|
||
|
|
"step": 2655,
|
||
|
|
"valid_targets_mean": 6330.6,
|
||
|
|
"valid_targets_min": 2363
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.256,
|
||
|
|
"grad_norm": 0.4121284121686269,
|
||
|
|
"learning_rate": 1.5997481428097338e-05,
|
||
|
|
"loss": 0.2453,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24723577499389648,
|
||
|
|
"step": 2660,
|
||
|
|
"valid_targets_mean": 5960.3,
|
||
|
|
"valid_targets_min": 3191
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.264,
|
||
|
|
"grad_norm": 0.3882119749671396,
|
||
|
|
"learning_rate": 1.5919331150860396e-05,
|
||
|
|
"loss": 0.2587,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2718477249145508,
|
||
|
|
"step": 2665,
|
||
|
|
"valid_targets_mean": 6472.1,
|
||
|
|
"valid_targets_min": 3292
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.272,
|
||
|
|
"grad_norm": 0.4100674855225304,
|
||
|
|
"learning_rate": 1.5841245832678873e-05,
|
||
|
|
"loss": 0.2581,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23369711637496948,
|
||
|
|
"step": 2670,
|
||
|
|
"valid_targets_mean": 5683.4,
|
||
|
|
"valid_targets_min": 763
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.28,
|
||
|
|
"grad_norm": 0.41838795378288424,
|
||
|
|
"learning_rate": 1.576322671657166e-05,
|
||
|
|
"loss": 0.2576,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2701454758644104,
|
||
|
|
"step": 2675,
|
||
|
|
"valid_targets_mean": 6447.2,
|
||
|
|
"valid_targets_min": 2868
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.288,
|
||
|
|
"grad_norm": 0.4400893801670801,
|
||
|
|
"learning_rate": 1.5685275044503804e-05,
|
||
|
|
"loss": 0.2529,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24263077974319458,
|
||
|
|
"step": 2680,
|
||
|
|
"valid_targets_mean": 5147.9,
|
||
|
|
"valid_targets_min": 2684
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.296,
|
||
|
|
"grad_norm": 0.46173199977270163,
|
||
|
|
"learning_rate": 1.560739205736674e-05,
|
||
|
|
"loss": 0.255,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26507413387298584,
|
||
|
|
"step": 2685,
|
||
|
|
"valid_targets_mean": 5198.1,
|
||
|
|
"valid_targets_min": 613
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.304,
|
||
|
|
"grad_norm": 0.45496647831386733,
|
||
|
|
"learning_rate": 1.552957899495851e-05,
|
||
|
|
"loss": 0.2552,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2521948218345642,
|
||
|
|
"step": 2690,
|
||
|
|
"valid_targets_mean": 4976.4,
|
||
|
|
"valid_targets_min": 1184
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.312,
|
||
|
|
"grad_norm": 0.4633484681626059,
|
||
|
|
"learning_rate": 1.5451837095964054e-05,
|
||
|
|
"loss": 0.2524,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26329267024993896,
|
||
|
|
"step": 2695,
|
||
|
|
"valid_targets_mean": 5536.6,
|
||
|
|
"valid_targets_min": 802
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.32,
|
||
|
|
"grad_norm": 0.3777915678980122,
|
||
|
|
"learning_rate": 1.5374167597935478e-05,
|
||
|
|
"loss": 0.2502,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2505547106266022,
|
||
|
|
"step": 2700,
|
||
|
|
"valid_targets_mean": 7020.2,
|
||
|
|
"valid_targets_min": 3721
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.328,
|
||
|
|
"grad_norm": 0.432513339772364,
|
||
|
|
"learning_rate": 1.5296571737272354e-05,
|
||
|
|
"loss": 0.2597,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26320308446884155,
|
||
|
|
"step": 2705,
|
||
|
|
"valid_targets_mean": 5831.6,
|
||
|
|
"valid_targets_min": 2076
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.336,
|
||
|
|
"grad_norm": 0.4259936130726487,
|
||
|
|
"learning_rate": 1.5219050749202037e-05,
|
||
|
|
"loss": 0.2498,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26931965351104736,
|
||
|
|
"step": 2710,
|
||
|
|
"valid_targets_mean": 5925.8,
|
||
|
|
"valid_targets_min": 3057
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.344,
|
||
|
|
"grad_norm": 0.43484781101199177,
|
||
|
|
"learning_rate": 1.5141605867760021e-05,
|
||
|
|
"loss": 0.2551,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2746845483779907,
|
||
|
|
"step": 2715,
|
||
|
|
"valid_targets_mean": 5499.1,
|
||
|
|
"valid_targets_min": 879
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.352,
|
||
|
|
"grad_norm": 0.4240116184683575,
|
||
|
|
"learning_rate": 1.5064238325770267e-05,
|
||
|
|
"loss": 0.2701,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27649781107902527,
|
||
|
|
"step": 2720,
|
||
|
|
"valid_targets_mean": 6327.1,
|
||
|
|
"valid_targets_min": 3023
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.36,
|
||
|
|
"grad_norm": 0.46160595397956594,
|
||
|
|
"learning_rate": 1.498694935482559e-05,
|
||
|
|
"loss": 0.2461,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23835043609142303,
|
||
|
|
"step": 2725,
|
||
|
|
"valid_targets_mean": 4995.6,
|
||
|
|
"valid_targets_min": 384
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.368,
|
||
|
|
"grad_norm": 0.42449896176713736,
|
||
|
|
"learning_rate": 1.4909740185268056e-05,
|
||
|
|
"loss": 0.2693,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3132569193840027,
|
||
|
|
"step": 2730,
|
||
|
|
"valid_targets_mean": 7493.7,
|
||
|
|
"valid_targets_min": 2898
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.376,
|
||
|
|
"grad_norm": 0.4634859796301222,
|
||
|
|
"learning_rate": 1.4832612046169408e-05,
|
||
|
|
"loss": 0.2787,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26285839080810547,
|
||
|
|
"step": 2735,
|
||
|
|
"valid_targets_mean": 4890.0,
|
||
|
|
"valid_targets_min": 920
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.384,
|
||
|
|
"grad_norm": 0.5118855317119525,
|
||
|
|
"learning_rate": 1.4755566165311455e-05,
|
||
|
|
"loss": 0.2453,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2320340871810913,
|
||
|
|
"step": 2740,
|
||
|
|
"valid_targets_mean": 4532.7,
|
||
|
|
"valid_targets_min": 834
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.392,
|
||
|
|
"grad_norm": 0.3720350693877654,
|
||
|
|
"learning_rate": 1.4678603769166591e-05,
|
||
|
|
"loss": 0.2635,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26782023906707764,
|
||
|
|
"step": 2745,
|
||
|
|
"valid_targets_mean": 7465.8,
|
||
|
|
"valid_targets_min": 2616
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.4,
|
||
|
|
"grad_norm": 0.38971835604206173,
|
||
|
|
"learning_rate": 1.4601726082878226e-05,
|
||
|
|
"loss": 0.2621,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2792567014694214,
|
||
|
|
"step": 2750,
|
||
|
|
"valid_targets_mean": 6849.9,
|
||
|
|
"valid_targets_min": 2097
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.408,
|
||
|
|
"grad_norm": 0.40344159503778143,
|
||
|
|
"learning_rate": 1.4524934330241292e-05,
|
||
|
|
"loss": 0.2614,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2851219177246094,
|
||
|
|
"step": 2755,
|
||
|
|
"valid_targets_mean": 6309.6,
|
||
|
|
"valid_targets_min": 2719
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.416,
|
||
|
|
"grad_norm": 0.3938933697532101,
|
||
|
|
"learning_rate": 1.4448229733682784e-05,
|
||
|
|
"loss": 0.257,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23094943165779114,
|
||
|
|
"step": 2760,
|
||
|
|
"valid_targets_mean": 5509.4,
|
||
|
|
"valid_targets_min": 499
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.424,
|
||
|
|
"grad_norm": 0.41239113282985423,
|
||
|
|
"learning_rate": 1.4371613514242264e-05,
|
||
|
|
"loss": 0.2478,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2564051151275635,
|
||
|
|
"step": 2765,
|
||
|
|
"valid_targets_mean": 6004.9,
|
||
|
|
"valid_targets_min": 664
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.432,
|
||
|
|
"grad_norm": 0.3935389062042307,
|
||
|
|
"learning_rate": 1.4295086891552457e-05,
|
||
|
|
"loss": 0.2584,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2657475769519806,
|
||
|
|
"step": 2770,
|
||
|
|
"valid_targets_mean": 6504.3,
|
||
|
|
"valid_targets_min": 3996
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.44,
|
||
|
|
"grad_norm": 0.445989307618548,
|
||
|
|
"learning_rate": 1.4218651083819811e-05,
|
||
|
|
"loss": 0.2456,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23502030968666077,
|
||
|
|
"step": 2775,
|
||
|
|
"valid_targets_mean": 5178.3,
|
||
|
|
"valid_targets_min": 2092
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.448,
|
||
|
|
"grad_norm": 0.4197509448438314,
|
||
|
|
"learning_rate": 1.4142307307805125e-05,
|
||
|
|
"loss": 0.263,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24606627225875854,
|
||
|
|
"step": 2780,
|
||
|
|
"valid_targets_mean": 5779.6,
|
||
|
|
"valid_targets_min": 2508
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.456,
|
||
|
|
"grad_norm": 0.4511017310825799,
|
||
|
|
"learning_rate": 1.406605677880416e-05,
|
||
|
|
"loss": 0.2522,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24840837717056274,
|
||
|
|
"step": 2785,
|
||
|
|
"valid_targets_mean": 5349.4,
|
||
|
|
"valid_targets_min": 2473
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.464,
|
||
|
|
"grad_norm": 0.45567039640007745,
|
||
|
|
"learning_rate": 1.3989900710628313e-05,
|
||
|
|
"loss": 0.2514,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2618921995162964,
|
||
|
|
"step": 2790,
|
||
|
|
"valid_targets_mean": 5036.2,
|
||
|
|
"valid_targets_min": 542
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.4719999999999995,
|
||
|
|
"grad_norm": 0.43962860716408275,
|
||
|
|
"learning_rate": 1.3913840315585279e-05,
|
||
|
|
"loss": 0.2618,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27140137553215027,
|
||
|
|
"step": 2795,
|
||
|
|
"valid_targets_mean": 5845.9,
|
||
|
|
"valid_targets_min": 2457
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.48,
|
||
|
|
"grad_norm": 0.43611294833451114,
|
||
|
|
"learning_rate": 1.3837876804459765e-05,
|
||
|
|
"loss": 0.2486,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25768083333969116,
|
||
|
|
"step": 2800,
|
||
|
|
"valid_targets_mean": 5575.6,
|
||
|
|
"valid_targets_min": 746
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.4879999999999995,
|
||
|
|
"grad_norm": 0.42803383496080244,
|
||
|
|
"learning_rate": 1.3762011386494191e-05,
|
||
|
|
"loss": 0.2619,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2686399519443512,
|
||
|
|
"step": 2805,
|
||
|
|
"valid_targets_mean": 5792.1,
|
||
|
|
"valid_targets_min": 921
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.496,
|
||
|
|
"grad_norm": 0.4719245633331461,
|
||
|
|
"learning_rate": 1.3686245269369485e-05,
|
||
|
|
"loss": 0.2663,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30958646535873413,
|
||
|
|
"step": 2810,
|
||
|
|
"valid_targets_mean": 6427.6,
|
||
|
|
"valid_targets_min": 1017
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.504,
|
||
|
|
"grad_norm": 0.4237505355734542,
|
||
|
|
"learning_rate": 1.3610579659185809e-05,
|
||
|
|
"loss": 0.2528,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26016536355018616,
|
||
|
|
"step": 2815,
|
||
|
|
"valid_targets_mean": 5658.1,
|
||
|
|
"valid_targets_min": 720
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.5120000000000005,
|
||
|
|
"grad_norm": 0.38024984937490686,
|
||
|
|
"learning_rate": 1.35350157604434e-05,
|
||
|
|
"loss": 0.2582,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2725783586502075,
|
||
|
|
"step": 2820,
|
||
|
|
"valid_targets_mean": 6973.2,
|
||
|
|
"valid_targets_min": 2168
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.52,
|
||
|
|
"grad_norm": 0.41952130341304616,
|
||
|
|
"learning_rate": 1.345955477602337e-05,
|
||
|
|
"loss": 0.2608,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26130425930023193,
|
||
|
|
"step": 2825,
|
||
|
|
"valid_targets_mean": 6372.4,
|
||
|
|
"valid_targets_min": 1173
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.5280000000000005,
|
||
|
|
"grad_norm": 0.4528337053842432,
|
||
|
|
"learning_rate": 1.3384197907168561e-05,
|
||
|
|
"loss": 0.2473,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25075584650039673,
|
||
|
|
"step": 2830,
|
||
|
|
"valid_targets_mean": 5724.8,
|
||
|
|
"valid_targets_min": 2776
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.536,
|
||
|
|
"grad_norm": 0.41288983796665957,
|
||
|
|
"learning_rate": 1.3308946353464438e-05,
|
||
|
|
"loss": 0.2501,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24849629402160645,
|
||
|
|
"step": 2835,
|
||
|
|
"valid_targets_mean": 5834.9,
|
||
|
|
"valid_targets_min": 2456
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.5440000000000005,
|
||
|
|
"grad_norm": 0.4109103515660612,
|
||
|
|
"learning_rate": 1.3233801312819979e-05,
|
||
|
|
"loss": 0.2506,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24398134648799896,
|
||
|
|
"step": 2840,
|
||
|
|
"valid_targets_mean": 5745.9,
|
||
|
|
"valid_targets_min": 3589
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.552,
|
||
|
|
"grad_norm": 0.41889125877984157,
|
||
|
|
"learning_rate": 1.3158763981448606e-05,
|
||
|
|
"loss": 0.2537,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2618812620639801,
|
||
|
|
"step": 2845,
|
||
|
|
"valid_targets_mean": 6272.4,
|
||
|
|
"valid_targets_min": 1666
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.5600000000000005,
|
||
|
|
"grad_norm": 0.4550626345785704,
|
||
|
|
"learning_rate": 1.3083835553849148e-05,
|
||
|
|
"loss": 0.2524,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2640746831893921,
|
||
|
|
"step": 2850,
|
||
|
|
"valid_targets_mean": 5556.9,
|
||
|
|
"valid_targets_min": 1918
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.568,
|
||
|
|
"grad_norm": 0.4316185089835862,
|
||
|
|
"learning_rate": 1.3009017222786828e-05,
|
||
|
|
"loss": 0.2521,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2707984745502472,
|
||
|
|
"step": 2855,
|
||
|
|
"valid_targets_mean": 5878.1,
|
||
|
|
"valid_targets_min": 2964
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.576,
|
||
|
|
"grad_norm": 0.3506904633597355,
|
||
|
|
"learning_rate": 1.2934310179274269e-05,
|
||
|
|
"loss": 0.2591,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24263477325439453,
|
||
|
|
"step": 2860,
|
||
|
|
"valid_targets_mean": 7210.3,
|
||
|
|
"valid_targets_min": 2190
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.584,
|
||
|
|
"grad_norm": 0.3830616811150377,
|
||
|
|
"learning_rate": 1.2859715612552541e-05,
|
||
|
|
"loss": 0.2434,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22636298835277557,
|
||
|
|
"step": 2865,
|
||
|
|
"valid_targets_mean": 6734.7,
|
||
|
|
"valid_targets_min": 2555
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.592,
|
||
|
|
"grad_norm": 0.4555586317701132,
|
||
|
|
"learning_rate": 1.278523471007223e-05,
|
||
|
|
"loss": 0.2563,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2616588771343231,
|
||
|
|
"step": 2870,
|
||
|
|
"valid_targets_mean": 5836.5,
|
||
|
|
"valid_targets_min": 3143
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.6,
|
||
|
|
"grad_norm": 0.45645747710035556,
|
||
|
|
"learning_rate": 1.271086865747451e-05,
|
||
|
|
"loss": 0.2582,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24527783691883087,
|
||
|
|
"step": 2875,
|
||
|
|
"valid_targets_mean": 5121.6,
|
||
|
|
"valid_targets_min": 2841
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.608,
|
||
|
|
"grad_norm": 0.466642440212726,
|
||
|
|
"learning_rate": 1.2636618638572316e-05,
|
||
|
|
"loss": 0.2537,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2527911067008972,
|
||
|
|
"step": 2880,
|
||
|
|
"valid_targets_mean": 5101.9,
|
||
|
|
"valid_targets_min": 997
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.616,
|
||
|
|
"grad_norm": 0.46937573930033066,
|
||
|
|
"learning_rate": 1.2562485835331466e-05,
|
||
|
|
"loss": 0.2629,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2864949405193329,
|
||
|
|
"step": 2885,
|
||
|
|
"valid_targets_mean": 5126.4,
|
||
|
|
"valid_targets_min": 442
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.624,
|
||
|
|
"grad_norm": 0.46822865956269294,
|
||
|
|
"learning_rate": 1.2488471427851852e-05,
|
||
|
|
"loss": 0.2577,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28379178047180176,
|
||
|
|
"step": 2890,
|
||
|
|
"valid_targets_mean": 6053.8,
|
||
|
|
"valid_targets_min": 970
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.632,
|
||
|
|
"grad_norm": 0.45941682547920754,
|
||
|
|
"learning_rate": 1.241457659434866e-05,
|
||
|
|
"loss": 0.2504,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27936357259750366,
|
||
|
|
"step": 2895,
|
||
|
|
"valid_targets_mean": 5776.1,
|
||
|
|
"valid_targets_min": 3188
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.64,
|
||
|
|
"grad_norm": 0.4385032906332573,
|
||
|
|
"learning_rate": 1.2340802511133605e-05,
|
||
|
|
"loss": 0.2561,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2690129578113556,
|
||
|
|
"step": 2900,
|
||
|
|
"valid_targets_mean": 5771.6,
|
||
|
|
"valid_targets_min": 1872
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.648,
|
||
|
|
"grad_norm": 0.3719833844478648,
|
||
|
|
"learning_rate": 1.2267150352596216e-05,
|
||
|
|
"loss": 0.2501,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24423128366470337,
|
||
|
|
"step": 2905,
|
||
|
|
"valid_targets_mean": 6587.7,
|
||
|
|
"valid_targets_min": 2041
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.656,
|
||
|
|
"grad_norm": 0.49249405276783137,
|
||
|
|
"learning_rate": 1.2193621291185132e-05,
|
||
|
|
"loss": 0.2542,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2529935836791992,
|
||
|
|
"step": 2910,
|
||
|
|
"valid_targets_mean": 5225.8,
|
||
|
|
"valid_targets_min": 1416
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.664,
|
||
|
|
"grad_norm": 0.41781331593173815,
|
||
|
|
"learning_rate": 1.2120216497389446e-05,
|
||
|
|
"loss": 0.2553,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2483152151107788,
|
||
|
|
"step": 2915,
|
||
|
|
"valid_targets_mean": 5744.6,
|
||
|
|
"valid_targets_min": 2086
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.672,
|
||
|
|
"grad_norm": 0.4047966801272431,
|
||
|
|
"learning_rate": 1.2046937139720068e-05,
|
||
|
|
"loss": 0.2572,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2477341890335083,
|
||
|
|
"step": 2920,
|
||
|
|
"valid_targets_mean": 5689.0,
|
||
|
|
"valid_targets_min": 759
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.68,
|
||
|
|
"grad_norm": 0.37498929916376766,
|
||
|
|
"learning_rate": 1.1973784384691121e-05,
|
||
|
|
"loss": 0.2499,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2430843859910965,
|
||
|
|
"step": 2925,
|
||
|
|
"valid_targets_mean": 7156.9,
|
||
|
|
"valid_targets_min": 2993
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.688,
|
||
|
|
"grad_norm": 0.41388324850358565,
|
||
|
|
"learning_rate": 1.1900759396801382e-05,
|
||
|
|
"loss": 0.2424,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2551068067550659,
|
||
|
|
"step": 2930,
|
||
|
|
"valid_targets_mean": 6077.2,
|
||
|
|
"valid_targets_min": 3013
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.696,
|
||
|
|
"grad_norm": 0.36861725664383904,
|
||
|
|
"learning_rate": 1.1827863338515741e-05,
|
||
|
|
"loss": 0.2545,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2571861445903778,
|
||
|
|
"step": 2935,
|
||
|
|
"valid_targets_mean": 6625.1,
|
||
|
|
"valid_targets_min": 824
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.704,
|
||
|
|
"grad_norm": 0.48378743134760654,
|
||
|
|
"learning_rate": 1.1755097370246669e-05,
|
||
|
|
"loss": 0.2532,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2558562457561493,
|
||
|
|
"step": 2940,
|
||
|
|
"valid_targets_mean": 5220.9,
|
||
|
|
"valid_targets_min": 2269
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.712,
|
||
|
|
"grad_norm": 0.4406782311453092,
|
||
|
|
"learning_rate": 1.1682462650335791e-05,
|
||
|
|
"loss": 0.2467,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25980138778686523,
|
||
|
|
"step": 2945,
|
||
|
|
"valid_targets_mean": 5291.1,
|
||
|
|
"valid_targets_min": 2112
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.72,
|
||
|
|
"grad_norm": 0.4219789914017073,
|
||
|
|
"learning_rate": 1.1609960335035423e-05,
|
||
|
|
"loss": 0.2576,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28451454639434814,
|
||
|
|
"step": 2950,
|
||
|
|
"valid_targets_mean": 6052.9,
|
||
|
|
"valid_targets_min": 2838
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.728,
|
||
|
|
"grad_norm": 0.4542496598947857,
|
||
|
|
"learning_rate": 1.1537591578490165e-05,
|
||
|
|
"loss": 0.2624,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24413950741291046,
|
||
|
|
"step": 2955,
|
||
|
|
"valid_targets_mean": 4976.8,
|
||
|
|
"valid_targets_min": 735
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.736,
|
||
|
|
"grad_norm": 0.43241503805052256,
|
||
|
|
"learning_rate": 1.146535753271853e-05,
|
||
|
|
"loss": 0.2507,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23115313053131104,
|
||
|
|
"step": 2960,
|
||
|
|
"valid_targets_mean": 4997.9,
|
||
|
|
"valid_targets_min": 2360
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.744,
|
||
|
|
"grad_norm": 0.44686143386709815,
|
||
|
|
"learning_rate": 1.139325934759461e-05,
|
||
|
|
"loss": 0.2551,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26531267166137695,
|
||
|
|
"step": 2965,
|
||
|
|
"valid_targets_mean": 5456.7,
|
||
|
|
"valid_targets_min": 760
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.752,
|
||
|
|
"grad_norm": 0.4012430319952668,
|
||
|
|
"learning_rate": 1.1321298170829768e-05,
|
||
|
|
"loss": 0.2521,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2565727233886719,
|
||
|
|
"step": 2970,
|
||
|
|
"valid_targets_mean": 6016.4,
|
||
|
|
"valid_targets_min": 786
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.76,
|
||
|
|
"grad_norm": 0.40873161812607367,
|
||
|
|
"learning_rate": 1.1249475147954363e-05,
|
||
|
|
"loss": 0.2512,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28063255548477173,
|
||
|
|
"step": 2975,
|
||
|
|
"valid_targets_mean": 6314.0,
|
||
|
|
"valid_targets_min": 2924
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.768,
|
||
|
|
"grad_norm": 0.3764269524348353,
|
||
|
|
"learning_rate": 1.1177791422299528e-05,
|
||
|
|
"loss": 0.2479,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2501315474510193,
|
||
|
|
"step": 2980,
|
||
|
|
"valid_targets_mean": 6623.1,
|
||
|
|
"valid_targets_min": 4202
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.776,
|
||
|
|
"grad_norm": 0.4295656083509184,
|
||
|
|
"learning_rate": 1.1106248134978959e-05,
|
||
|
|
"loss": 0.2463,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2510390281677246,
|
||
|
|
"step": 2985,
|
||
|
|
"valid_targets_mean": 5810.9,
|
||
|
|
"valid_targets_min": 2502
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.784,
|
||
|
|
"grad_norm": 0.4286687447325965,
|
||
|
|
"learning_rate": 1.1034846424870744e-05,
|
||
|
|
"loss": 0.2569,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2772992253303528,
|
||
|
|
"step": 2990,
|
||
|
|
"valid_targets_mean": 5971.2,
|
||
|
|
"valid_targets_min": 2590
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.792,
|
||
|
|
"grad_norm": 0.374726299286535,
|
||
|
|
"learning_rate": 1.0963587428599256e-05,
|
||
|
|
"loss": 0.2568,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24093443155288696,
|
||
|
|
"step": 2995,
|
||
|
|
"valid_targets_mean": 6393.6,
|
||
|
|
"valid_targets_min": 2733
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.8,
|
||
|
|
"grad_norm": 0.39305947450528317,
|
||
|
|
"learning_rate": 1.089247228051704e-05,
|
||
|
|
"loss": 0.2542,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26492711901664734,
|
||
|
|
"step": 3000,
|
||
|
|
"valid_targets_mean": 6464.1,
|
||
|
|
"valid_targets_min": 902
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.808,
|
||
|
|
"grad_norm": 0.43173986170046935,
|
||
|
|
"learning_rate": 1.0821502112686753e-05,
|
||
|
|
"loss": 0.2579,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26849010586738586,
|
||
|
|
"step": 3005,
|
||
|
|
"valid_targets_mean": 6025.8,
|
||
|
|
"valid_targets_min": 876
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.816,
|
||
|
|
"grad_norm": 0.553114591599834,
|
||
|
|
"learning_rate": 1.0750678054863158e-05,
|
||
|
|
"loss": 0.254,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25507980585098267,
|
||
|
|
"step": 3010,
|
||
|
|
"valid_targets_mean": 5294.4,
|
||
|
|
"valid_targets_min": 2993
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.824,
|
||
|
|
"grad_norm": 0.4183913969010833,
|
||
|
|
"learning_rate": 1.0680001234475127e-05,
|
||
|
|
"loss": 0.263,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23742127418518066,
|
||
|
|
"step": 3015,
|
||
|
|
"valid_targets_mean": 5359.8,
|
||
|
|
"valid_targets_min": 2555
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.832,
|
||
|
|
"grad_norm": 0.41631043723949235,
|
||
|
|
"learning_rate": 1.0609472776607715e-05,
|
||
|
|
"loss": 0.26,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26865795254707336,
|
||
|
|
"step": 3020,
|
||
|
|
"valid_targets_mean": 6159.6,
|
||
|
|
"valid_targets_min": 2405
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.84,
|
||
|
|
"grad_norm": 0.4078093943828265,
|
||
|
|
"learning_rate": 1.0539093803984217e-05,
|
||
|
|
"loss": 0.2463,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24415555596351624,
|
||
|
|
"step": 3025,
|
||
|
|
"valid_targets_mean": 5986.4,
|
||
|
|
"valid_targets_min": 2063
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.848,
|
||
|
|
"grad_norm": 0.40603508493805524,
|
||
|
|
"learning_rate": 1.046886543694832e-05,
|
||
|
|
"loss": 0.2499,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2623901069164276,
|
||
|
|
"step": 3030,
|
||
|
|
"valid_targets_mean": 6127.6,
|
||
|
|
"valid_targets_min": 2081
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.856,
|
||
|
|
"grad_norm": 0.49406303870996265,
|
||
|
|
"learning_rate": 1.0398788793446263e-05,
|
||
|
|
"loss": 0.2541,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2604304552078247,
|
||
|
|
"step": 3035,
|
||
|
|
"valid_targets_mean": 4751.2,
|
||
|
|
"valid_targets_min": 779
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.864,
|
||
|
|
"grad_norm": 0.44990796885772155,
|
||
|
|
"learning_rate": 1.0328864989009037e-05,
|
||
|
|
"loss": 0.2561,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26390713453292847,
|
||
|
|
"step": 3040,
|
||
|
|
"valid_targets_mean": 5145.4,
|
||
|
|
"valid_targets_min": 2084
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.872,
|
||
|
|
"grad_norm": 0.42810640807420824,
|
||
|
|
"learning_rate": 1.0259095136734634e-05,
|
||
|
|
"loss": 0.2638,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.291616827249527,
|
||
|
|
"step": 3045,
|
||
|
|
"valid_targets_mean": 6583.1,
|
||
|
|
"valid_targets_min": 1114
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.88,
|
||
|
|
"grad_norm": 0.4597136382960901,
|
||
|
|
"learning_rate": 1.0189480347270311e-05,
|
||
|
|
"loss": 0.2376,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22865967452526093,
|
||
|
|
"step": 3050,
|
||
|
|
"valid_targets_mean": 4451.9,
|
||
|
|
"valid_targets_min": 610
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.888,
|
||
|
|
"grad_norm": 0.4458559434583576,
|
||
|
|
"learning_rate": 1.0120021728794938e-05,
|
||
|
|
"loss": 0.2672,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27014094591140747,
|
||
|
|
"step": 3055,
|
||
|
|
"valid_targets_mean": 6275.8,
|
||
|
|
"valid_targets_min": 2201
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.896,
|
||
|
|
"grad_norm": 0.4071136411439628,
|
||
|
|
"learning_rate": 1.0050720387001334e-05,
|
||
|
|
"loss": 0.2545,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24810561537742615,
|
||
|
|
"step": 3060,
|
||
|
|
"valid_targets_mean": 6389.0,
|
||
|
|
"valid_targets_min": 2924
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.904,
|
||
|
|
"grad_norm": 0.4038730028965521,
|
||
|
|
"learning_rate": 9.981577425078672e-06,
|
||
|
|
"loss": 0.246,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24336443841457367,
|
||
|
|
"step": 3065,
|
||
|
|
"valid_targets_mean": 5670.3,
|
||
|
|
"valid_targets_min": 740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.912,
|
||
|
|
"grad_norm": 0.43517093885534663,
|
||
|
|
"learning_rate": 9.912593943694924e-06,
|
||
|
|
"loss": 0.2516,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2760591506958008,
|
||
|
|
"step": 3070,
|
||
|
|
"valid_targets_mean": 5472.9,
|
||
|
|
"valid_targets_min": 783
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.92,
|
||
|
|
"grad_norm": 0.4227928570500053,
|
||
|
|
"learning_rate": 9.843771040979328e-06,
|
||
|
|
"loss": 0.2549,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25480031967163086,
|
||
|
|
"step": 3075,
|
||
|
|
"valid_targets_mean": 6020.2,
|
||
|
|
"valid_targets_min": 2221
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.928,
|
||
|
|
"grad_norm": 0.4684683751640332,
|
||
|
|
"learning_rate": 9.775109812504922e-06,
|
||
|
|
"loss": 0.249,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2481108158826828,
|
||
|
|
"step": 3080,
|
||
|
|
"valid_targets_mean": 4860.8,
|
||
|
|
"valid_targets_min": 2502
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.936,
|
||
|
|
"grad_norm": 0.447379036311016,
|
||
|
|
"learning_rate": 9.706611351271088e-06,
|
||
|
|
"loss": 0.254,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2520700693130493,
|
||
|
|
"step": 3085,
|
||
|
|
"valid_targets_mean": 5130.7,
|
||
|
|
"valid_targets_min": 2504
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.944,
|
||
|
|
"grad_norm": 0.4383325246904605,
|
||
|
|
"learning_rate": 9.638276747686169e-06,
|
||
|
|
"loss": 0.255,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24543023109436035,
|
||
|
|
"step": 3090,
|
||
|
|
"valid_targets_mean": 5125.6,
|
||
|
|
"valid_targets_min": 2175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.952,
|
||
|
|
"grad_norm": 0.4352567562200437,
|
||
|
|
"learning_rate": 9.570107089550091e-06,
|
||
|
|
"loss": 0.256,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2787247896194458,
|
||
|
|
"step": 3095,
|
||
|
|
"valid_targets_mean": 4917.8,
|
||
|
|
"valid_targets_min": 2301
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.96,
|
||
|
|
"grad_norm": 0.4604774036965585,
|
||
|
|
"learning_rate": 9.502103462037074e-06,
|
||
|
|
"loss": 0.2581,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2525831460952759,
|
||
|
|
"step": 3100,
|
||
|
|
"valid_targets_mean": 5261.8,
|
||
|
|
"valid_targets_min": 1041
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.968,
|
||
|
|
"grad_norm": 0.37947111921277665,
|
||
|
|
"learning_rate": 9.434266947678326e-06,
|
||
|
|
"loss": 0.263,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2499474585056305,
|
||
|
|
"step": 3105,
|
||
|
|
"valid_targets_mean": 7018.6,
|
||
|
|
"valid_targets_min": 3738
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.976,
|
||
|
|
"grad_norm": 0.4605718602465518,
|
||
|
|
"learning_rate": 9.366598626344836e-06,
|
||
|
|
"loss": 0.2494,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23395417630672455,
|
||
|
|
"step": 3110,
|
||
|
|
"valid_targets_mean": 4318.6,
|
||
|
|
"valid_targets_min": 918
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.984,
|
||
|
|
"grad_norm": 0.408558726874829,
|
||
|
|
"learning_rate": 9.299099575230172e-06,
|
||
|
|
"loss": 0.2729,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.292435884475708,
|
||
|
|
"step": 3115,
|
||
|
|
"valid_targets_mean": 6323.2,
|
||
|
|
"valid_targets_min": 2397
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.992,
|
||
|
|
"grad_norm": 0.484634939648324,
|
||
|
|
"learning_rate": 9.231770868833334e-06,
|
||
|
|
"loss": 0.271,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2561487555503845,
|
||
|
|
"step": 3120,
|
||
|
|
"valid_targets_mean": 5338.1,
|
||
|
|
"valid_targets_min": 2595
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.0,
|
||
|
|
"grad_norm": 0.45839751766225767,
|
||
|
|
"learning_rate": 9.164613578941652e-06,
|
||
|
|
"loss": 0.2534,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28931480646133423,
|
||
|
|
"step": 3125,
|
||
|
|
"valid_targets_mean": 4836.5,
|
||
|
|
"valid_targets_min": 428
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.008,
|
||
|
|
"grad_norm": 0.4182833320275383,
|
||
|
|
"learning_rate": 9.097628774613732e-06,
|
||
|
|
"loss": 0.2529,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2569211721420288,
|
||
|
|
"step": 3130,
|
||
|
|
"valid_targets_mean": 5781.9,
|
||
|
|
"valid_targets_min": 2451
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.016,
|
||
|
|
"grad_norm": 0.4209600786624088,
|
||
|
|
"learning_rate": 9.030817522162403e-06,
|
||
|
|
"loss": 0.235,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2324075996875763,
|
||
|
|
"step": 3135,
|
||
|
|
"valid_targets_mean": 5339.5,
|
||
|
|
"valid_targets_min": 793
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.024,
|
||
|
|
"grad_norm": 0.44851688512328064,
|
||
|
|
"learning_rate": 8.964180885137797e-06,
|
||
|
|
"loss": 0.244,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2408837378025055,
|
||
|
|
"step": 3140,
|
||
|
|
"valid_targets_mean": 5425.8,
|
||
|
|
"valid_targets_min": 2716
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.032,
|
||
|
|
"grad_norm": 0.484323206155298,
|
||
|
|
"learning_rate": 8.897719924310375e-06,
|
||
|
|
"loss": 0.2426,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2470928132534027,
|
||
|
|
"step": 3145,
|
||
|
|
"valid_targets_mean": 4851.0,
|
||
|
|
"valid_targets_min": 627
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.04,
|
||
|
|
"grad_norm": 0.4623710689089007,
|
||
|
|
"learning_rate": 8.831435697654068e-06,
|
||
|
|
"loss": 0.2455,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25896331667900085,
|
||
|
|
"step": 3150,
|
||
|
|
"valid_targets_mean": 5875.5,
|
||
|
|
"valid_targets_min": 1335
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.048,
|
||
|
|
"grad_norm": 0.4119943346269266,
|
||
|
|
"learning_rate": 8.765329260329413e-06,
|
||
|
|
"loss": 0.2373,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26245784759521484,
|
||
|
|
"step": 3155,
|
||
|
|
"valid_targets_mean": 6056.2,
|
||
|
|
"valid_targets_min": 2070
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.056,
|
||
|
|
"grad_norm": 0.4107422408390761,
|
||
|
|
"learning_rate": 8.699401664666774e-06,
|
||
|
|
"loss": 0.2444,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24972708523273468,
|
||
|
|
"step": 3160,
|
||
|
|
"valid_targets_mean": 6400.5,
|
||
|
|
"valid_targets_min": 784
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.064,
|
||
|
|
"grad_norm": 0.34537901545202443,
|
||
|
|
"learning_rate": 8.633653960149579e-06,
|
||
|
|
"loss": 0.2463,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23936009407043457,
|
||
|
|
"step": 3165,
|
||
|
|
"valid_targets_mean": 7169.5,
|
||
|
|
"valid_targets_min": 1933
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.072,
|
||
|
|
"grad_norm": 0.44020015097063847,
|
||
|
|
"learning_rate": 8.56808719339762e-06,
|
||
|
|
"loss": 0.243,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26007118821144104,
|
||
|
|
"step": 3170,
|
||
|
|
"valid_targets_mean": 5437.1,
|
||
|
|
"valid_targets_min": 2662
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.08,
|
||
|
|
"grad_norm": 0.3670993210107219,
|
||
|
|
"learning_rate": 8.502702408150391e-06,
|
||
|
|
"loss": 0.2349,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2242787927389145,
|
||
|
|
"step": 3175,
|
||
|
|
"valid_targets_mean": 6277.2,
|
||
|
|
"valid_targets_min": 2027
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.088,
|
||
|
|
"grad_norm": 0.4378221792589208,
|
||
|
|
"learning_rate": 8.43750064525047e-06,
|
||
|
|
"loss": 0.2314,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22399312257766724,
|
||
|
|
"step": 3180,
|
||
|
|
"valid_targets_mean": 5389.4,
|
||
|
|
"valid_targets_min": 2389
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.096,
|
||
|
|
"grad_norm": 0.4892555740387963,
|
||
|
|
"learning_rate": 8.372482942626952e-06,
|
||
|
|
"loss": 0.2345,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2311638444662094,
|
||
|
|
"step": 3185,
|
||
|
|
"valid_targets_mean": 4608.6,
|
||
|
|
"valid_targets_min": 1941
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.104,
|
||
|
|
"grad_norm": 0.4389530297089188,
|
||
|
|
"learning_rate": 8.307650335278927e-06,
|
||
|
|
"loss": 0.252,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2438417673110962,
|
||
|
|
"step": 3190,
|
||
|
|
"valid_targets_mean": 5221.5,
|
||
|
|
"valid_targets_min": 2204
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.112,
|
||
|
|
"grad_norm": 0.4447312980181925,
|
||
|
|
"learning_rate": 8.243003855259015e-06,
|
||
|
|
"loss": 0.2389,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23452109098434448,
|
||
|
|
"step": 3195,
|
||
|
|
"valid_targets_mean": 4981.2,
|
||
|
|
"valid_targets_min": 2463
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.12,
|
||
|
|
"grad_norm": 0.43191387980796925,
|
||
|
|
"learning_rate": 8.178544531656897e-06,
|
||
|
|
"loss": 0.2511,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2466181516647339,
|
||
|
|
"step": 3200,
|
||
|
|
"valid_targets_mean": 6129.1,
|
||
|
|
"valid_targets_min": 3720
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.128,
|
||
|
|
"grad_norm": 0.402538872085775,
|
||
|
|
"learning_rate": 8.11427339058299e-06,
|
||
|
|
"loss": 0.256,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25876384973526,
|
||
|
|
"step": 3205,
|
||
|
|
"valid_targets_mean": 6184.6,
|
||
|
|
"valid_targets_min": 1409
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.136,
|
||
|
|
"grad_norm": 0.4860220056729548,
|
||
|
|
"learning_rate": 8.050191455152072e-06,
|
||
|
|
"loss": 0.2544,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2506711184978485,
|
||
|
|
"step": 3210,
|
||
|
|
"valid_targets_mean": 4799.5,
|
||
|
|
"valid_targets_min": 418
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.144,
|
||
|
|
"grad_norm": 0.48057095956799106,
|
||
|
|
"learning_rate": 7.986299745467013e-06,
|
||
|
|
"loss": 0.2541,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25175127387046814,
|
||
|
|
"step": 3215,
|
||
|
|
"valid_targets_mean": 5248.0,
|
||
|
|
"valid_targets_min": 2555
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.152,
|
||
|
|
"grad_norm": 0.45862189357281363,
|
||
|
|
"learning_rate": 7.922599278602524e-06,
|
||
|
|
"loss": 0.2489,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20885488390922546,
|
||
|
|
"step": 3220,
|
||
|
|
"valid_targets_mean": 4904.6,
|
||
|
|
"valid_targets_min": 2321
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.16,
|
||
|
|
"grad_norm": 0.41829998931400625,
|
||
|
|
"learning_rate": 7.859091068588987e-06,
|
||
|
|
"loss": 0.2354,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23560893535614014,
|
||
|
|
"step": 3225,
|
||
|
|
"valid_targets_mean": 5652.8,
|
||
|
|
"valid_targets_min": 2067
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.168,
|
||
|
|
"grad_norm": 0.44356890901584595,
|
||
|
|
"learning_rate": 7.795776126396284e-06,
|
||
|
|
"loss": 0.2478,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2722322344779968,
|
||
|
|
"step": 3230,
|
||
|
|
"valid_targets_mean": 5946.8,
|
||
|
|
"valid_targets_min": 2419
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.176,
|
||
|
|
"grad_norm": 0.4752087181203412,
|
||
|
|
"learning_rate": 7.732655459917726e-06,
|
||
|
|
"loss": 0.245,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.251334011554718,
|
||
|
|
"step": 3235,
|
||
|
|
"valid_targets_mean": 6202.1,
|
||
|
|
"valid_targets_min": 2143
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.184,
|
||
|
|
"grad_norm": 0.47330365224350573,
|
||
|
|
"learning_rate": 7.669730073954005e-06,
|
||
|
|
"loss": 0.2541,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25445556640625,
|
||
|
|
"step": 3240,
|
||
|
|
"valid_targets_mean": 4800.8,
|
||
|
|
"valid_targets_min": 1979
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.192,
|
||
|
|
"grad_norm": 0.4241113042867172,
|
||
|
|
"learning_rate": 7.607000970197194e-06,
|
||
|
|
"loss": 0.2691,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26085883378982544,
|
||
|
|
"step": 3245,
|
||
|
|
"valid_targets_mean": 6422.4,
|
||
|
|
"valid_targets_min": 3359
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.2,
|
||
|
|
"grad_norm": 0.4082829197550291,
|
||
|
|
"learning_rate": 7.544469147214797e-06,
|
||
|
|
"loss": 0.2435,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23965823650360107,
|
||
|
|
"step": 3250,
|
||
|
|
"valid_targets_mean": 5750.3,
|
||
|
|
"valid_targets_min": 2589
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.208,
|
||
|
|
"grad_norm": 0.4212938893338529,
|
||
|
|
"learning_rate": 7.482135600433868e-06,
|
||
|
|
"loss": 0.2612,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2490275353193283,
|
||
|
|
"step": 3255,
|
||
|
|
"valid_targets_mean": 5702.0,
|
||
|
|
"valid_targets_min": 1607
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.216,
|
||
|
|
"grad_norm": 0.4533089451122526,
|
||
|
|
"learning_rate": 7.420001322125156e-06,
|
||
|
|
"loss": 0.2433,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26186293363571167,
|
||
|
|
"step": 3260,
|
||
|
|
"valid_targets_mean": 5979.9,
|
||
|
|
"valid_targets_min": 965
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.224,
|
||
|
|
"grad_norm": 0.43280208998225644,
|
||
|
|
"learning_rate": 7.3580673013872946e-06,
|
||
|
|
"loss": 0.2438,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2600148320198059,
|
||
|
|
"step": 3265,
|
||
|
|
"valid_targets_mean": 5992.2,
|
||
|
|
"valid_targets_min": 2637
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.232,
|
||
|
|
"grad_norm": 0.4285996875181488,
|
||
|
|
"learning_rate": 7.2963345241310904e-06,
|
||
|
|
"loss": 0.2487,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24552065134048462,
|
||
|
|
"step": 3270,
|
||
|
|
"valid_targets_mean": 5952.8,
|
||
|
|
"valid_targets_min": 2993
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.24,
|
||
|
|
"grad_norm": 0.4396851695342125,
|
||
|
|
"learning_rate": 7.234803973063797e-06,
|
||
|
|
"loss": 0.2539,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2468017339706421,
|
||
|
|
"step": 3275,
|
||
|
|
"valid_targets_mean": 5339.3,
|
||
|
|
"valid_targets_min": 2045
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.248,
|
||
|
|
"grad_norm": 0.46885707206768734,
|
||
|
|
"learning_rate": 7.173476627673492e-06,
|
||
|
|
"loss": 0.2379,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23570626974105835,
|
||
|
|
"step": 3280,
|
||
|
|
"valid_targets_mean": 4740.2,
|
||
|
|
"valid_targets_min": 895
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.256,
|
||
|
|
"grad_norm": 0.47683292662936794,
|
||
|
|
"learning_rate": 7.112353464213477e-06,
|
||
|
|
"loss": 0.2469,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23079991340637207,
|
||
|
|
"step": 3285,
|
||
|
|
"valid_targets_mean": 4879.6,
|
||
|
|
"valid_targets_min": 781
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.264,
|
||
|
|
"grad_norm": 0.40296099845402256,
|
||
|
|
"learning_rate": 7.051435455686735e-06,
|
||
|
|
"loss": 0.2351,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24319109320640564,
|
||
|
|
"step": 3290,
|
||
|
|
"valid_targets_mean": 6255.1,
|
||
|
|
"valid_targets_min": 2817
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.272,
|
||
|
|
"grad_norm": 0.4469612736416828,
|
||
|
|
"learning_rate": 6.990723571830438e-06,
|
||
|
|
"loss": 0.2474,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24624331295490265,
|
||
|
|
"step": 3295,
|
||
|
|
"valid_targets_mean": 5637.4,
|
||
|
|
"valid_targets_min": 1883
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.28,
|
||
|
|
"grad_norm": 0.39280133547463153,
|
||
|
|
"learning_rate": 6.93021877910052e-06,
|
||
|
|
"loss": 0.2482,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2591555118560791,
|
||
|
|
"step": 3300,
|
||
|
|
"valid_targets_mean": 6686.6,
|
||
|
|
"valid_targets_min": 2720
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.288,
|
||
|
|
"grad_norm": 0.4258546217009564,
|
||
|
|
"learning_rate": 6.8699220406562985e-06,
|
||
|
|
"loss": 0.2512,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24424447119235992,
|
||
|
|
"step": 3305,
|
||
|
|
"valid_targets_mean": 6241.3,
|
||
|
|
"valid_targets_min": 3838
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.296,
|
||
|
|
"grad_norm": 0.4385890742807229,
|
||
|
|
"learning_rate": 6.809834316345117e-06,
|
||
|
|
"loss": 0.2504,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24113088846206665,
|
||
|
|
"step": 3310,
|
||
|
|
"valid_targets_mean": 5023.6,
|
||
|
|
"valid_targets_min": 3535
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.304,
|
||
|
|
"grad_norm": 0.43825408254424714,
|
||
|
|
"learning_rate": 6.749956562687083e-06,
|
||
|
|
"loss": 0.2609,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31159403920173645,
|
||
|
|
"step": 3315,
|
||
|
|
"valid_targets_mean": 6687.9,
|
||
|
|
"valid_targets_min": 608
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.312,
|
||
|
|
"grad_norm": 0.4433569916218975,
|
||
|
|
"learning_rate": 6.690289732859841e-06,
|
||
|
|
"loss": 0.2543,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28443658351898193,
|
||
|
|
"step": 3320,
|
||
|
|
"valid_targets_mean": 6012.2,
|
||
|
|
"valid_targets_min": 2471
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.32,
|
||
|
|
"grad_norm": 0.3949199572235783,
|
||
|
|
"learning_rate": 6.630834776683403e-06,
|
||
|
|
"loss": 0.2434,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26501333713531494,
|
||
|
|
"step": 3325,
|
||
|
|
"valid_targets_mean": 6522.0,
|
||
|
|
"valid_targets_min": 1666
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.328,
|
||
|
|
"grad_norm": 0.4111723604366614,
|
||
|
|
"learning_rate": 6.571592640605e-06,
|
||
|
|
"loss": 0.2519,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23842376470565796,
|
||
|
|
"step": 3330,
|
||
|
|
"valid_targets_mean": 6088.6,
|
||
|
|
"valid_targets_min": 3261
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.336,
|
||
|
|
"grad_norm": 0.4018640109418417,
|
||
|
|
"learning_rate": 6.512564267684061e-06,
|
||
|
|
"loss": 0.2407,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24280548095703125,
|
||
|
|
"step": 3335,
|
||
|
|
"valid_targets_mean": 6272.4,
|
||
|
|
"valid_targets_min": 783
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.344,
|
||
|
|
"grad_norm": 0.399544016875131,
|
||
|
|
"learning_rate": 6.453750597577167e-06,
|
||
|
|
"loss": 0.2817,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.284124493598938,
|
||
|
|
"step": 3340,
|
||
|
|
"valid_targets_mean": 6603.8,
|
||
|
|
"valid_targets_min": 2776
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.352,
|
||
|
|
"grad_norm": 0.39745798572137775,
|
||
|
|
"learning_rate": 6.395152566523106e-06,
|
||
|
|
"loss": 0.2547,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24532105028629303,
|
||
|
|
"step": 3345,
|
||
|
|
"valid_targets_mean": 5812.2,
|
||
|
|
"valid_targets_min": 818
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.36,
|
||
|
|
"grad_norm": 0.46784958899372814,
|
||
|
|
"learning_rate": 6.336771107327966e-06,
|
||
|
|
"loss": 0.2526,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2436250001192093,
|
||
|
|
"step": 3350,
|
||
|
|
"valid_targets_mean": 5234.5,
|
||
|
|
"valid_targets_min": 619
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.368,
|
||
|
|
"grad_norm": 0.45098215369867023,
|
||
|
|
"learning_rate": 6.278607149350289e-06,
|
||
|
|
"loss": 0.245,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25613322854042053,
|
||
|
|
"step": 3355,
|
||
|
|
"valid_targets_mean": 5455.2,
|
||
|
|
"valid_targets_min": 2360
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.376,
|
||
|
|
"grad_norm": 0.43942856625352117,
|
||
|
|
"learning_rate": 6.220661618486268e-06,
|
||
|
|
"loss": 0.2489,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24000336229801178,
|
||
|
|
"step": 3360,
|
||
|
|
"valid_targets_mean": 5367.2,
|
||
|
|
"valid_targets_min": 1183
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.384,
|
||
|
|
"grad_norm": 0.4523399589569011,
|
||
|
|
"learning_rate": 6.162935437155024e-06,
|
||
|
|
"loss": 0.2509,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24709078669548035,
|
||
|
|
"step": 3365,
|
||
|
|
"valid_targets_mean": 5741.4,
|
||
|
|
"valid_targets_min": 2570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.392,
|
||
|
|
"grad_norm": 0.4708562307010389,
|
||
|
|
"learning_rate": 6.105429524283901e-06,
|
||
|
|
"loss": 0.2462,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23900192975997925,
|
||
|
|
"step": 3370,
|
||
|
|
"valid_targets_mean": 4828.1,
|
||
|
|
"valid_targets_min": 1988
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.4,
|
||
|
|
"grad_norm": 0.381020756607991,
|
||
|
|
"learning_rate": 6.04814479529386e-06,
|
||
|
|
"loss": 0.2412,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2541239261627197,
|
||
|
|
"step": 3375,
|
||
|
|
"valid_targets_mean": 6797.5,
|
||
|
|
"valid_targets_min": 3085
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.408,
|
||
|
|
"grad_norm": 0.46455582913065613,
|
||
|
|
"learning_rate": 5.991082162084889e-06,
|
||
|
|
"loss": 0.2522,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2606612741947174,
|
||
|
|
"step": 3380,
|
||
|
|
"valid_targets_mean": 5196.4,
|
||
|
|
"valid_targets_min": 1907
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.416,
|
||
|
|
"grad_norm": 0.4388471193782842,
|
||
|
|
"learning_rate": 5.934242533021499e-06,
|
||
|
|
"loss": 0.2474,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2463933527469635,
|
||
|
|
"step": 3385,
|
||
|
|
"valid_targets_mean": 5517.2,
|
||
|
|
"valid_targets_min": 2367
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.424,
|
||
|
|
"grad_norm": 0.42373863816522317,
|
||
|
|
"learning_rate": 5.877626812918258e-06,
|
||
|
|
"loss": 0.2467,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2637603282928467,
|
||
|
|
"step": 3390,
|
||
|
|
"valid_targets_mean": 6460.0,
|
||
|
|
"valid_targets_min": 2931
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.432,
|
||
|
|
"grad_norm": 0.42564288561284036,
|
||
|
|
"learning_rate": 5.821235903025378e-06,
|
||
|
|
"loss": 0.2299,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23866936564445496,
|
||
|
|
"step": 3395,
|
||
|
|
"valid_targets_mean": 5335.6,
|
||
|
|
"valid_targets_min": 2272
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.44,
|
||
|
|
"grad_norm": 0.4223958250211236,
|
||
|
|
"learning_rate": 5.765070701014391e-06,
|
||
|
|
"loss": 0.2466,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2407655119895935,
|
||
|
|
"step": 3400,
|
||
|
|
"valid_targets_mean": 5698.6,
|
||
|
|
"valid_targets_min": 1173
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.448,
|
||
|
|
"grad_norm": 0.4363438815869388,
|
||
|
|
"learning_rate": 5.709132100963841e-06,
|
||
|
|
"loss": 0.2499,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24764753878116608,
|
||
|
|
"step": 3405,
|
||
|
|
"valid_targets_mean": 5587.6,
|
||
|
|
"valid_targets_min": 1994
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.456,
|
||
|
|
"grad_norm": 0.4265485440426739,
|
||
|
|
"learning_rate": 5.653420993345062e-06,
|
||
|
|
"loss": 0.2415,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25707298517227173,
|
||
|
|
"step": 3410,
|
||
|
|
"valid_targets_mean": 5705.9,
|
||
|
|
"valid_targets_min": 1607
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.464,
|
||
|
|
"grad_norm": 0.42927725568540487,
|
||
|
|
"learning_rate": 5.597938265007994e-06,
|
||
|
|
"loss": 0.2632,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2565683126449585,
|
||
|
|
"step": 3415,
|
||
|
|
"valid_targets_mean": 5731.8,
|
||
|
|
"valid_targets_min": 763
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.4719999999999995,
|
||
|
|
"grad_norm": 0.42083810722984627,
|
||
|
|
"learning_rate": 5.542684799167069e-06,
|
||
|
|
"loss": 0.2517,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2680370807647705,
|
||
|
|
"step": 3420,
|
||
|
|
"valid_targets_mean": 5880.8,
|
||
|
|
"valid_targets_min": 2995
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.48,
|
||
|
|
"grad_norm": 0.397286313684561,
|
||
|
|
"learning_rate": 5.487661475387152e-06,
|
||
|
|
"loss": 0.2574,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2509041726589203,
|
||
|
|
"step": 3425,
|
||
|
|
"valid_targets_mean": 6285.8,
|
||
|
|
"valid_targets_min": 1872
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.4879999999999995,
|
||
|
|
"grad_norm": 0.4187203152823719,
|
||
|
|
"learning_rate": 5.432869169569541e-06,
|
||
|
|
"loss": 0.2502,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23192846775054932,
|
||
|
|
"step": 3430,
|
||
|
|
"valid_targets_mean": 5728.9,
|
||
|
|
"valid_targets_min": 2062
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.496,
|
||
|
|
"grad_norm": 0.4240392250819973,
|
||
|
|
"learning_rate": 5.378308753938024e-06,
|
||
|
|
"loss": 0.2528,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23642998933792114,
|
||
|
|
"step": 3435,
|
||
|
|
"valid_targets_mean": 5666.9,
|
||
|
|
"valid_targets_min": 2092
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.504,
|
||
|
|
"grad_norm": 0.45571623112464904,
|
||
|
|
"learning_rate": 5.323981097024986e-06,
|
||
|
|
"loss": 0.2372,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24621962010860443,
|
||
|
|
"step": 3440,
|
||
|
|
"valid_targets_mean": 5369.1,
|
||
|
|
"valid_targets_min": 2968
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.5120000000000005,
|
||
|
|
"grad_norm": 0.43171822203395027,
|
||
|
|
"learning_rate": 5.269887063657595e-06,
|
||
|
|
"loss": 0.2484,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2391555905342102,
|
||
|
|
"step": 3445,
|
||
|
|
"valid_targets_mean": 5589.8,
|
||
|
|
"valid_targets_min": 907
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.52,
|
||
|
|
"grad_norm": 0.4275681474717406,
|
||
|
|
"learning_rate": 5.216027514944027e-06,
|
||
|
|
"loss": 0.2362,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24127346277236938,
|
||
|
|
"step": 3450,
|
||
|
|
"valid_targets_mean": 5519.3,
|
||
|
|
"valid_targets_min": 1018
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.5280000000000005,
|
||
|
|
"grad_norm": 0.41944732345815783,
|
||
|
|
"learning_rate": 5.162403308259767e-06,
|
||
|
|
"loss": 0.2491,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24043628573417664,
|
||
|
|
"step": 3455,
|
||
|
|
"valid_targets_mean": 6375.5,
|
||
|
|
"valid_targets_min": 3478
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.536,
|
||
|
|
"grad_norm": 0.43002550469332923,
|
||
|
|
"learning_rate": 5.109015297233935e-06,
|
||
|
|
"loss": 0.248,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23204442858695984,
|
||
|
|
"step": 3460,
|
||
|
|
"valid_targets_mean": 5087.3,
|
||
|
|
"valid_targets_min": 901
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.5440000000000005,
|
||
|
|
"grad_norm": 0.4431970324561754,
|
||
|
|
"learning_rate": 5.055864331735736e-06,
|
||
|
|
"loss": 0.2413,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24048209190368652,
|
||
|
|
"step": 3465,
|
||
|
|
"valid_targets_mean": 5482.9,
|
||
|
|
"valid_targets_min": 1870
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.552,
|
||
|
|
"grad_norm": 0.3920844129704998,
|
||
|
|
"learning_rate": 5.002951257860909e-06,
|
||
|
|
"loss": 0.248,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24493908882141113,
|
||
|
|
"step": 3470,
|
||
|
|
"valid_targets_mean": 6554.7,
|
||
|
|
"valid_targets_min": 2038
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.5600000000000005,
|
||
|
|
"grad_norm": 0.4181462899118393,
|
||
|
|
"learning_rate": 4.950276917918256e-06,
|
||
|
|
"loss": 0.2393,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24973033368587494,
|
||
|
|
"step": 3475,
|
||
|
|
"valid_targets_mean": 5991.9,
|
||
|
|
"valid_targets_min": 3047
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.568,
|
||
|
|
"grad_norm": 0.45794713136570736,
|
||
|
|
"learning_rate": 4.8978421504162385e-06,
|
||
|
|
"loss": 0.2466,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23978012800216675,
|
||
|
|
"step": 3480,
|
||
|
|
"valid_targets_mean": 4957.3,
|
||
|
|
"valid_targets_min": 2608
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.576,
|
||
|
|
"grad_norm": 0.3923396821974702,
|
||
|
|
"learning_rate": 4.845647790049634e-06,
|
||
|
|
"loss": 0.2382,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23957261443138123,
|
||
|
|
"step": 3485,
|
||
|
|
"valid_targets_mean": 6061.6,
|
||
|
|
"valid_targets_min": 2531
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.584,
|
||
|
|
"grad_norm": 0.4800708463038158,
|
||
|
|
"learning_rate": 4.793694667686244e-06,
|
||
|
|
"loss": 0.2404,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2572818398475647,
|
||
|
|
"step": 3490,
|
||
|
|
"valid_targets_mean": 5308.6,
|
||
|
|
"valid_targets_min": 687
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.592,
|
||
|
|
"grad_norm": 0.4109993468120257,
|
||
|
|
"learning_rate": 4.741983610353664e-06,
|
||
|
|
"loss": 0.2351,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21713688969612122,
|
||
|
|
"step": 3495,
|
||
|
|
"valid_targets_mean": 5530.6,
|
||
|
|
"valid_targets_min": 2402
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.6,
|
||
|
|
"grad_norm": 0.4372650094791484,
|
||
|
|
"learning_rate": 4.690515441226122e-06,
|
||
|
|
"loss": 0.2538,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23960278928279877,
|
||
|
|
"step": 3500,
|
||
|
|
"valid_targets_mean": 5337.7,
|
||
|
|
"valid_targets_min": 1942
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.608,
|
||
|
|
"grad_norm": 0.46819765052374257,
|
||
|
|
"learning_rate": 4.639290979611379e-06,
|
||
|
|
"loss": 0.2456,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22429856657981873,
|
||
|
|
"step": 3505,
|
||
|
|
"valid_targets_mean": 4855.4,
|
||
|
|
"valid_targets_min": 2284
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.616,
|
||
|
|
"grad_norm": 0.4041069980782639,
|
||
|
|
"learning_rate": 4.588311040937683e-06,
|
||
|
|
"loss": 0.2567,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2593272924423218,
|
||
|
|
"step": 3510,
|
||
|
|
"valid_targets_mean": 6654.3,
|
||
|
|
"valid_targets_min": 4808
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.624,
|
||
|
|
"grad_norm": 0.44503285983742874,
|
||
|
|
"learning_rate": 4.537576436740783e-06,
|
||
|
|
"loss": 0.2419,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24970743060112,
|
||
|
|
"step": 3515,
|
||
|
|
"valid_targets_mean": 5985.1,
|
||
|
|
"valid_targets_min": 2545
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.632,
|
||
|
|
"grad_norm": 0.4717360114114379,
|
||
|
|
"learning_rate": 4.487087974651016e-06,
|
||
|
|
"loss": 0.2389,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23342707753181458,
|
||
|
|
"step": 3520,
|
||
|
|
"valid_targets_mean": 4694.1,
|
||
|
|
"valid_targets_min": 2567
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.64,
|
||
|
|
"grad_norm": 0.4706677633097796,
|
||
|
|
"learning_rate": 4.436846458380455e-06,
|
||
|
|
"loss": 0.2362,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2343820333480835,
|
||
|
|
"step": 3525,
|
||
|
|
"valid_targets_mean": 4799.3,
|
||
|
|
"valid_targets_min": 876
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.648,
|
||
|
|
"grad_norm": 0.41331820998430824,
|
||
|
|
"learning_rate": 4.386852687710104e-06,
|
||
|
|
"loss": 0.2439,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2305280566215515,
|
||
|
|
"step": 3530,
|
||
|
|
"valid_targets_mean": 5371.6,
|
||
|
|
"valid_targets_min": 2276
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.656,
|
||
|
|
"grad_norm": 0.43830938492589083,
|
||
|
|
"learning_rate": 4.337107458477177e-06,
|
||
|
|
"loss": 0.2512,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2581741213798523,
|
||
|
|
"step": 3535,
|
||
|
|
"valid_targets_mean": 5969.8,
|
||
|
|
"valid_targets_min": 3700
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.664,
|
||
|
|
"grad_norm": 0.39718001494210303,
|
||
|
|
"learning_rate": 4.287611562562422e-06,
|
||
|
|
"loss": 0.252,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2551124393939972,
|
||
|
|
"step": 3540,
|
||
|
|
"valid_targets_mean": 6743.6,
|
||
|
|
"valid_targets_min": 2610
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.672,
|
||
|
|
"grad_norm": 0.47799147623395644,
|
||
|
|
"learning_rate": 4.238365787877516e-06,
|
||
|
|
"loss": 0.2512,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2615952491760254,
|
||
|
|
"step": 3545,
|
||
|
|
"valid_targets_mean": 5030.4,
|
||
|
|
"valid_targets_min": 2079
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.68,
|
||
|
|
"grad_norm": 0.43097924186117664,
|
||
|
|
"learning_rate": 4.189370918352531e-06,
|
||
|
|
"loss": 0.2498,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23403191566467285,
|
||
|
|
"step": 3550,
|
||
|
|
"valid_targets_mean": 5988.9,
|
||
|
|
"valid_targets_min": 2722
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.688,
|
||
|
|
"grad_norm": 0.43364358762558836,
|
||
|
|
"learning_rate": 4.140627733923439e-06,
|
||
|
|
"loss": 0.2525,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24536678194999695,
|
||
|
|
"step": 3555,
|
||
|
|
"valid_targets_mean": 5547.0,
|
||
|
|
"valid_targets_min": 2828
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.696,
|
||
|
|
"grad_norm": 0.380117459617515,
|
||
|
|
"learning_rate": 4.092137010519712e-06,
|
||
|
|
"loss": 0.2573,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2651956081390381,
|
||
|
|
"step": 3560,
|
||
|
|
"valid_targets_mean": 6673.7,
|
||
|
|
"valid_targets_min": 2056
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.704,
|
||
|
|
"grad_norm": 0.37672052137731615,
|
||
|
|
"learning_rate": 4.043899520051964e-06,
|
||
|
|
"loss": 0.2574,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2500587999820709,
|
||
|
|
"step": 3565,
|
||
|
|
"valid_targets_mean": 6525.6,
|
||
|
|
"valid_targets_min": 1040
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.712,
|
||
|
|
"grad_norm": 0.42491726304200406,
|
||
|
|
"learning_rate": 3.995916030399658e-06,
|
||
|
|
"loss": 0.2498,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2274150550365448,
|
||
|
|
"step": 3570,
|
||
|
|
"valid_targets_mean": 5182.4,
|
||
|
|
"valid_targets_min": 699
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.72,
|
||
|
|
"grad_norm": 0.42850700678468717,
|
||
|
|
"learning_rate": 3.948187305398892e-06,
|
||
|
|
"loss": 0.25,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2219313681125641,
|
||
|
|
"step": 3575,
|
||
|
|
"valid_targets_mean": 5911.5,
|
||
|
|
"valid_targets_min": 2882
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.728,
|
||
|
|
"grad_norm": 0.44052850311666414,
|
||
|
|
"learning_rate": 3.90071410483023e-06,
|
||
|
|
"loss": 0.2502,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25418415665626526,
|
||
|
|
"step": 3580,
|
||
|
|
"valid_targets_mean": 5418.7,
|
||
|
|
"valid_targets_min": 1666
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.736,
|
||
|
|
"grad_norm": 0.43430539148105113,
|
||
|
|
"learning_rate": 3.853497184406623e-06,
|
||
|
|
"loss": 0.2508,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2542496919631958,
|
||
|
|
"step": 3585,
|
||
|
|
"valid_targets_mean": 5782.9,
|
||
|
|
"valid_targets_min": 2622
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.744,
|
||
|
|
"grad_norm": 0.4378276109822209,
|
||
|
|
"learning_rate": 3.80653729576135e-06,
|
||
|
|
"loss": 0.2507,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23649832606315613,
|
||
|
|
"step": 3590,
|
||
|
|
"valid_targets_mean": 5274.5,
|
||
|
|
"valid_targets_min": 2480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.752,
|
||
|
|
"grad_norm": 0.4762535210324838,
|
||
|
|
"learning_rate": 3.7598351864360872e-06,
|
||
|
|
"loss": 0.2378,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23490282893180847,
|
||
|
|
"step": 3595,
|
||
|
|
"valid_targets_mean": 4631.4,
|
||
|
|
"valid_targets_min": 802
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.76,
|
||
|
|
"grad_norm": 0.4509017959893099,
|
||
|
|
"learning_rate": 3.713391599868985e-06,
|
||
|
|
"loss": 0.2483,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23814690113067627,
|
||
|
|
"step": 3600,
|
||
|
|
"valid_targets_mean": 5169.4,
|
||
|
|
"valid_targets_min": 740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.768,
|
||
|
|
"grad_norm": 0.4171862673978773,
|
||
|
|
"learning_rate": 3.6672072753828424e-06,
|
||
|
|
"loss": 0.2419,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2462296038866043,
|
||
|
|
"step": 3605,
|
||
|
|
"valid_targets_mean": 5297.6,
|
||
|
|
"valid_targets_min": 1668
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.776,
|
||
|
|
"grad_norm": 0.4256809301759758,
|
||
|
|
"learning_rate": 3.6212829481733368e-06,
|
||
|
|
"loss": 0.2461,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24445554614067078,
|
||
|
|
"step": 3610,
|
||
|
|
"valid_targets_mean": 5463.2,
|
||
|
|
"valid_targets_min": 2316
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.784,
|
||
|
|
"grad_norm": 0.4167025170439137,
|
||
|
|
"learning_rate": 3.575619349297317e-06,
|
||
|
|
"loss": 0.2407,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21641194820404053,
|
||
|
|
"step": 3615,
|
||
|
|
"valid_targets_mean": 5376.2,
|
||
|
|
"valid_targets_min": 533
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.792,
|
||
|
|
"grad_norm": 0.4206073504201526,
|
||
|
|
"learning_rate": 3.5302172056611682e-06,
|
||
|
|
"loss": 0.2553,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27774128317832947,
|
||
|
|
"step": 3620,
|
||
|
|
"valid_targets_mean": 6891.8,
|
||
|
|
"valid_targets_min": 740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.8,
|
||
|
|
"grad_norm": 0.4575929196736855,
|
||
|
|
"learning_rate": 3.485077240009247e-06,
|
||
|
|
"loss": 0.2538,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2605830132961273,
|
||
|
|
"step": 3625,
|
||
|
|
"valid_targets_mean": 5522.7,
|
||
|
|
"valid_targets_min": 761
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.808,
|
||
|
|
"grad_norm": 0.461225529092125,
|
||
|
|
"learning_rate": 3.4402001709123643e-06,
|
||
|
|
"loss": 0.2433,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2431730031967163,
|
||
|
|
"step": 3630,
|
||
|
|
"valid_targets_mean": 5088.9,
|
||
|
|
"valid_targets_min": 2248
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.816,
|
||
|
|
"grad_norm": 0.4126669360245022,
|
||
|
|
"learning_rate": 3.3955867127563515e-06,
|
||
|
|
"loss": 0.2547,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25011661648750305,
|
||
|
|
"step": 3635,
|
||
|
|
"valid_targets_mean": 6407.1,
|
||
|
|
"valid_targets_min": 2470
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.824,
|
||
|
|
"grad_norm": 0.4448568564700826,
|
||
|
|
"learning_rate": 3.351237575730695e-06,
|
||
|
|
"loss": 0.2507,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2398379147052765,
|
||
|
|
"step": 3640,
|
||
|
|
"valid_targets_mean": 5172.5,
|
||
|
|
"valid_targets_min": 798
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.832,
|
||
|
|
"grad_norm": 0.4143753113371597,
|
||
|
|
"learning_rate": 3.307153465817219e-06,
|
||
|
|
"loss": 0.2368,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23666608333587646,
|
||
|
|
"step": 3645,
|
||
|
|
"valid_targets_mean": 5480.1,
|
||
|
|
"valid_targets_min": 2095
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.84,
|
||
|
|
"grad_norm": 0.4753065577157038,
|
||
|
|
"learning_rate": 3.263335084778856e-06,
|
||
|
|
"loss": 0.2557,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26486414670944214,
|
||
|
|
"step": 3650,
|
||
|
|
"valid_targets_mean": 5145.8,
|
||
|
|
"valid_targets_min": 532
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.848,
|
||
|
|
"grad_norm": 0.4785539397504414,
|
||
|
|
"learning_rate": 3.2197831301484816e-06,
|
||
|
|
"loss": 0.2506,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2624918818473816,
|
||
|
|
"step": 3655,
|
||
|
|
"valid_targets_mean": 5236.1,
|
||
|
|
"valid_targets_min": 704
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.856,
|
||
|
|
"grad_norm": 0.43901540322621985,
|
||
|
|
"learning_rate": 3.1764982952177805e-06,
|
||
|
|
"loss": 0.2541,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.253711462020874,
|
||
|
|
"step": 3660,
|
||
|
|
"valid_targets_mean": 6207.4,
|
||
|
|
"valid_targets_min": 3493
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.864,
|
||
|
|
"grad_norm": 0.46334079040298753,
|
||
|
|
"learning_rate": 3.1334812690262507e-06,
|
||
|
|
"loss": 0.2518,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2284860759973526,
|
||
|
|
"step": 3665,
|
||
|
|
"valid_targets_mean": 4962.0,
|
||
|
|
"valid_targets_min": 1970
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.872,
|
||
|
|
"grad_norm": 0.41129202818307,
|
||
|
|
"learning_rate": 3.0907327363502084e-06,
|
||
|
|
"loss": 0.24,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24177731573581696,
|
||
|
|
"step": 3670,
|
||
|
|
"valid_targets_mean": 5679.8,
|
||
|
|
"valid_targets_min": 2740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.88,
|
||
|
|
"grad_norm": 0.4405933411516817,
|
||
|
|
"learning_rate": 3.0482533776918987e-06,
|
||
|
|
"loss": 0.2467,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2584155201911926,
|
||
|
|
"step": 3675,
|
||
|
|
"valid_targets_mean": 5422.5,
|
||
|
|
"valid_targets_min": 2514
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.888,
|
||
|
|
"grad_norm": 0.38587475298350843,
|
||
|
|
"learning_rate": 3.0060438692686533e-06,
|
||
|
|
"loss": 0.2503,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2485569566488266,
|
||
|
|
"step": 3680,
|
||
|
|
"valid_targets_mean": 6428.7,
|
||
|
|
"valid_targets_min": 2191
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.896,
|
||
|
|
"grad_norm": 0.4507232955476526,
|
||
|
|
"learning_rate": 2.964104883002139e-06,
|
||
|
|
"loss": 0.2395,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26484763622283936,
|
||
|
|
"step": 3685,
|
||
|
|
"valid_targets_mean": 5712.8,
|
||
|
|
"valid_targets_min": 2328
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.904,
|
||
|
|
"grad_norm": 0.43487093323288945,
|
||
|
|
"learning_rate": 2.9224370865076457e-06,
|
||
|
|
"loss": 0.2475,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25088346004486084,
|
||
|
|
"step": 3690,
|
||
|
|
"valid_targets_mean": 5280.4,
|
||
|
|
"valid_targets_min": 2833
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.912,
|
||
|
|
"grad_norm": 0.46366305358427945,
|
||
|
|
"learning_rate": 2.8810411430834716e-06,
|
||
|
|
"loss": 0.2467,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23430442810058594,
|
||
|
|
"step": 3695,
|
||
|
|
"valid_targets_mean": 4903.6,
|
||
|
|
"valid_targets_min": 2845
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.92,
|
||
|
|
"grad_norm": 0.45953163223662696,
|
||
|
|
"learning_rate": 2.8399177117003595e-06,
|
||
|
|
"loss": 0.2456,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21741312742233276,
|
||
|
|
"step": 3700,
|
||
|
|
"valid_targets_mean": 4953.9,
|
||
|
|
"valid_targets_min": 524
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.928,
|
||
|
|
"grad_norm": 0.39124968173123453,
|
||
|
|
"learning_rate": 2.7990674469910085e-06,
|
||
|
|
"loss": 0.2447,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2448250949382782,
|
||
|
|
"step": 3705,
|
||
|
|
"valid_targets_mean": 6150.7,
|
||
|
|
"valid_targets_min": 2248
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.936,
|
||
|
|
"grad_norm": 0.35502781214246365,
|
||
|
|
"learning_rate": 2.7584909992396515e-06,
|
||
|
|
"loss": 0.2428,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26683682203292847,
|
||
|
|
"step": 3710,
|
||
|
|
"valid_targets_mean": 7718.4,
|
||
|
|
"valid_targets_min": 3037
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.944,
|
||
|
|
"grad_norm": 0.39446290087026825,
|
||
|
|
"learning_rate": 2.7181890143716995e-06,
|
||
|
|
"loss": 0.2534,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26290997862815857,
|
||
|
|
"step": 3715,
|
||
|
|
"valid_targets_mean": 6427.1,
|
||
|
|
"valid_targets_min": 3224
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.952,
|
||
|
|
"grad_norm": 0.47638170497550425,
|
||
|
|
"learning_rate": 2.6781621339434717e-06,
|
||
|
|
"loss": 0.2401,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23213085532188416,
|
||
|
|
"step": 3720,
|
||
|
|
"valid_targets_mean": 4806.5,
|
||
|
|
"valid_targets_min": 647
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.96,
|
||
|
|
"grad_norm": 0.4267363318221976,
|
||
|
|
"learning_rate": 2.638410995131966e-06,
|
||
|
|
"loss": 0.2518,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26729530096054077,
|
||
|
|
"step": 3725,
|
||
|
|
"valid_targets_mean": 5881.9,
|
||
|
|
"valid_targets_min": 2237
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.968,
|
||
|
|
"grad_norm": 0.4179471638601527,
|
||
|
|
"learning_rate": 2.5989362307247313e-06,
|
||
|
|
"loss": 0.2467,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26390784978866577,
|
||
|
|
"step": 3730,
|
||
|
|
"valid_targets_mean": 6047.1,
|
||
|
|
"valid_targets_min": 3510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.976,
|
||
|
|
"grad_norm": 0.40078466530949025,
|
||
|
|
"learning_rate": 2.5597384691097847e-06,
|
||
|
|
"loss": 0.2391,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24570685625076294,
|
||
|
|
"step": 3735,
|
||
|
|
"valid_targets_mean": 6009.8,
|
||
|
|
"valid_targets_min": 707
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.984,
|
||
|
|
"grad_norm": 0.4225593036828595,
|
||
|
|
"learning_rate": 2.520818334265611e-06,
|
||
|
|
"loss": 0.2565,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25478851795196533,
|
||
|
|
"step": 3740,
|
||
|
|
"valid_targets_mean": 5721.6,
|
||
|
|
"valid_targets_min": 3165
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.992,
|
||
|
|
"grad_norm": 0.44828897669175494,
|
||
|
|
"learning_rate": 2.482176445751232e-06,
|
||
|
|
"loss": 0.2367,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24662527441978455,
|
||
|
|
"step": 3745,
|
||
|
|
"valid_targets_mean": 5301.8,
|
||
|
|
"valid_targets_min": 1841
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.0,
|
||
|
|
"grad_norm": 0.424666922649159,
|
||
|
|
"learning_rate": 2.4438134186963415e-06,
|
||
|
|
"loss": 0.2373,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27201324701309204,
|
||
|
|
"step": 3750,
|
||
|
|
"valid_targets_mean": 5872.3,
|
||
|
|
"valid_targets_min": 2475
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.008,
|
||
|
|
"grad_norm": 0.45619535275351364,
|
||
|
|
"learning_rate": 2.4057298637915105e-06,
|
||
|
|
"loss": 0.235,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2112616002559662,
|
||
|
|
"step": 3755,
|
||
|
|
"valid_targets_mean": 4857.0,
|
||
|
|
"valid_targets_min": 2365
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.016,
|
||
|
|
"grad_norm": 0.4367115638555302,
|
||
|
|
"learning_rate": 2.3679263872784717e-06,
|
||
|
|
"loss": 0.2463,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25617077946662903,
|
||
|
|
"step": 3760,
|
||
|
|
"valid_targets_mean": 4926.6,
|
||
|
|
"valid_targets_min": 273
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.024,
|
||
|
|
"grad_norm": 0.45303009142719136,
|
||
|
|
"learning_rate": 2.330403590940471e-06,
|
||
|
|
"loss": 0.2546,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3148558735847473,
|
||
|
|
"step": 3765,
|
||
|
|
"valid_targets_mean": 6289.8,
|
||
|
|
"valid_targets_min": 988
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.032,
|
||
|
|
"grad_norm": 0.42543736613890915,
|
||
|
|
"learning_rate": 2.2931620720926717e-06,
|
||
|
|
"loss": 0.2502,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2525025010108948,
|
||
|
|
"step": 3770,
|
||
|
|
"valid_targets_mean": 5580.8,
|
||
|
|
"valid_targets_min": 2635
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.04,
|
||
|
|
"grad_norm": 0.4352315343427995,
|
||
|
|
"learning_rate": 2.256202423572669e-06,
|
||
|
|
"loss": 0.2507,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24089187383651733,
|
||
|
|
"step": 3775,
|
||
|
|
"valid_targets_mean": 5376.5,
|
||
|
|
"valid_targets_min": 598
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.048,
|
||
|
|
"grad_norm": 0.430712579222173,
|
||
|
|
"learning_rate": 2.219525233731035e-06,
|
||
|
|
"loss": 0.2395,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24066440761089325,
|
||
|
|
"step": 3780,
|
||
|
|
"valid_targets_mean": 5657.6,
|
||
|
|
"valid_targets_min": 859
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.056,
|
||
|
|
"grad_norm": 0.4785332476153452,
|
||
|
|
"learning_rate": 2.183131086421961e-06,
|
||
|
|
"loss": 0.2427,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2637433409690857,
|
||
|
|
"step": 3785,
|
||
|
|
"valid_targets_mean": 5608.4,
|
||
|
|
"valid_targets_min": 3189
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.064,
|
||
|
|
"grad_norm": 0.4750185463565966,
|
||
|
|
"learning_rate": 2.1470205609939533e-06,
|
||
|
|
"loss": 0.2412,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24166584014892578,
|
||
|
|
"step": 3790,
|
||
|
|
"valid_targets_mean": 5490.9,
|
||
|
|
"valid_targets_min": 2950
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.072,
|
||
|
|
"grad_norm": 0.3827437754884297,
|
||
|
|
"learning_rate": 2.1111942322806335e-06,
|
||
|
|
"loss": 0.2502,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24897339940071106,
|
||
|
|
"step": 3795,
|
||
|
|
"valid_targets_mean": 6231.8,
|
||
|
|
"valid_targets_min": 2760
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.08,
|
||
|
|
"grad_norm": 0.3816212526244572,
|
||
|
|
"learning_rate": 2.0756526705915635e-06,
|
||
|
|
"loss": 0.2461,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28404420614242554,
|
||
|
|
"step": 3800,
|
||
|
|
"valid_targets_mean": 7391.5,
|
||
|
|
"valid_targets_min": 4361
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.088,
|
||
|
|
"grad_norm": 0.45028360119094746,
|
||
|
|
"learning_rate": 2.0403964417031764e-06,
|
||
|
|
"loss": 0.237,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22149428725242615,
|
||
|
|
"step": 3805,
|
||
|
|
"valid_targets_mean": 4898.4,
|
||
|
|
"valid_targets_min": 2070
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.096,
|
||
|
|
"grad_norm": 0.4285819585903812,
|
||
|
|
"learning_rate": 2.0054261068497773e-06,
|
||
|
|
"loss": 0.2422,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23075240850448608,
|
||
|
|
"step": 3810,
|
||
|
|
"valid_targets_mean": 5468.2,
|
||
|
|
"valid_targets_min": 2258
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.104,
|
||
|
|
"grad_norm": 0.40077819783213,
|
||
|
|
"learning_rate": 1.9707422227145922e-06,
|
||
|
|
"loss": 0.2422,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23339712619781494,
|
||
|
|
"step": 3815,
|
||
|
|
"valid_targets_mean": 6263.2,
|
||
|
|
"valid_targets_min": 676
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.112,
|
||
|
|
"grad_norm": 0.4548643615766851,
|
||
|
|
"learning_rate": 1.936345341420924e-06,
|
||
|
|
"loss": 0.2462,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2526797652244568,
|
||
|
|
"step": 3820,
|
||
|
|
"valid_targets_mean": 5243.7,
|
||
|
|
"valid_targets_min": 1971
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.12,
|
||
|
|
"grad_norm": 0.4318025930934668,
|
||
|
|
"learning_rate": 1.9022360105233507e-06,
|
||
|
|
"loss": 0.2508,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24329990148544312,
|
||
|
|
"step": 3825,
|
||
|
|
"valid_targets_mean": 5584.3,
|
||
|
|
"valid_targets_min": 2735
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.128,
|
||
|
|
"grad_norm": 0.42527366921476684,
|
||
|
|
"learning_rate": 1.8684147729990188e-06,
|
||
|
|
"loss": 0.2389,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2661643326282501,
|
||
|
|
"step": 3830,
|
||
|
|
"valid_targets_mean": 5815.2,
|
||
|
|
"valid_targets_min": 2130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.136,
|
||
|
|
"grad_norm": 0.46563230810045014,
|
||
|
|
"learning_rate": 1.8348821672389893e-06,
|
||
|
|
"loss": 0.2407,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23695829510688782,
|
||
|
|
"step": 3835,
|
||
|
|
"valid_targets_mean": 4932.8,
|
||
|
|
"valid_targets_min": 721
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.144,
|
||
|
|
"grad_norm": 0.49593615366135946,
|
||
|
|
"learning_rate": 1.8016387270396784e-06,
|
||
|
|
"loss": 0.2523,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24340695142745972,
|
||
|
|
"step": 3840,
|
||
|
|
"valid_targets_mean": 4821.8,
|
||
|
|
"valid_targets_min": 1607
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.152,
|
||
|
|
"grad_norm": 0.4101378057435764,
|
||
|
|
"learning_rate": 1.7686849815943486e-06,
|
||
|
|
"loss": 0.2361,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23768271505832672,
|
||
|
|
"step": 3845,
|
||
|
|
"valid_targets_mean": 5658.8,
|
||
|
|
"valid_targets_min": 1018
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.16,
|
||
|
|
"grad_norm": 0.40398041857114947,
|
||
|
|
"learning_rate": 1.7360214554847e-06,
|
||
|
|
"loss": 0.2335,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2318260818719864,
|
||
|
|
"step": 3850,
|
||
|
|
"valid_targets_mean": 5986.0,
|
||
|
|
"valid_targets_min": 1597
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.168,
|
||
|
|
"grad_norm": 0.3896245352440973,
|
||
|
|
"learning_rate": 1.703648668672495e-06,
|
||
|
|
"loss": 0.2396,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25532662868499756,
|
||
|
|
"step": 3855,
|
||
|
|
"valid_targets_mean": 6799.4,
|
||
|
|
"valid_targets_min": 4356
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.176,
|
||
|
|
"grad_norm": 0.47023459730054107,
|
||
|
|
"learning_rate": 1.6715671364913077e-06,
|
||
|
|
"loss": 0.251,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25101006031036377,
|
||
|
|
"step": 3860,
|
||
|
|
"valid_targets_mean": 5093.5,
|
||
|
|
"valid_targets_min": 2197
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.184,
|
||
|
|
"grad_norm": 0.4725640485374112,
|
||
|
|
"learning_rate": 1.6397773696383091e-06,
|
||
|
|
"loss": 0.2444,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.261438250541687,
|
||
|
|
"step": 3865,
|
||
|
|
"valid_targets_mean": 5164.5,
|
||
|
|
"valid_targets_min": 2718
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.192,
|
||
|
|
"grad_norm": 0.3777763013538414,
|
||
|
|
"learning_rate": 1.6082798741661321e-06,
|
||
|
|
"loss": 0.2524,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24589067697525024,
|
||
|
|
"step": 3870,
|
||
|
|
"valid_targets_mean": 6304.3,
|
||
|
|
"valid_targets_min": 2316
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.2,
|
||
|
|
"grad_norm": 0.4141398897239025,
|
||
|
|
"learning_rate": 1.5770751514748273e-06,
|
||
|
|
"loss": 0.2346,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2230231761932373,
|
||
|
|
"step": 3875,
|
||
|
|
"valid_targets_mean": 6024.8,
|
||
|
|
"valid_targets_min": 2746
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.208,
|
||
|
|
"grad_norm": 0.5268906276378791,
|
||
|
|
"learning_rate": 1.5461636983038686e-06,
|
||
|
|
"loss": 0.237,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20659932494163513,
|
||
|
|
"step": 3880,
|
||
|
|
"valid_targets_mean": 4485.9,
|
||
|
|
"valid_targets_min": 746
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.216,
|
||
|
|
"grad_norm": 0.4538584693912728,
|
||
|
|
"learning_rate": 1.5155460067242578e-06,
|
||
|
|
"loss": 0.2451,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24113240838050842,
|
||
|
|
"step": 3885,
|
||
|
|
"valid_targets_mean": 5059.6,
|
||
|
|
"valid_targets_min": 2210
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.224,
|
||
|
|
"grad_norm": 0.4186097232034136,
|
||
|
|
"learning_rate": 1.4852225641306816e-06,
|
||
|
|
"loss": 0.2369,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23600910604000092,
|
||
|
|
"step": 3890,
|
||
|
|
"valid_targets_mean": 5368.4,
|
||
|
|
"valid_targets_min": 2607
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.232,
|
||
|
|
"grad_norm": 0.39080225059366364,
|
||
|
|
"learning_rate": 1.4551938532337607e-06,
|
||
|
|
"loss": 0.2478,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22949565947055817,
|
||
|
|
"step": 3895,
|
||
|
|
"valid_targets_mean": 6097.4,
|
||
|
|
"valid_targets_min": 2321
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.24,
|
||
|
|
"grad_norm": 0.40327121453751646,
|
||
|
|
"learning_rate": 1.4254603520523614e-06,
|
||
|
|
"loss": 0.2421,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2395782172679901,
|
||
|
|
"step": 3900,
|
||
|
|
"valid_targets_mean": 5384.2,
|
||
|
|
"valid_targets_min": 1669
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.248,
|
||
|
|
"grad_norm": 0.46323326452197255,
|
||
|
|
"learning_rate": 1.3960225339059875e-06,
|
||
|
|
"loss": 0.2442,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2367008924484253,
|
||
|
|
"step": 3905,
|
||
|
|
"valid_targets_mean": 4857.9,
|
||
|
|
"valid_targets_min": 2263
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.256,
|
||
|
|
"grad_norm": 0.4669508465968655,
|
||
|
|
"learning_rate": 1.3668808674072409e-06,
|
||
|
|
"loss": 0.2475,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24488641321659088,
|
||
|
|
"step": 3910,
|
||
|
|
"valid_targets_mean": 5089.0,
|
||
|
|
"valid_targets_min": 607
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.264,
|
||
|
|
"grad_norm": 0.4185297696425607,
|
||
|
|
"learning_rate": 1.338035816454375e-06,
|
||
|
|
"loss": 0.2652,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2515830993652344,
|
||
|
|
"step": 3915,
|
||
|
|
"valid_targets_mean": 6029.6,
|
||
|
|
"valid_targets_min": 548
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.272,
|
||
|
|
"grad_norm": 0.38594773761023776,
|
||
|
|
"learning_rate": 1.3094878402238887e-06,
|
||
|
|
"loss": 0.2404,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24694006145000458,
|
||
|
|
"step": 3920,
|
||
|
|
"valid_targets_mean": 6196.4,
|
||
|
|
"valid_targets_min": 2614
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.28,
|
||
|
|
"grad_norm": 0.47584759172344404,
|
||
|
|
"learning_rate": 1.2812373931632371e-06,
|
||
|
|
"loss": 0.2353,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2393854856491089,
|
||
|
|
"step": 3925,
|
||
|
|
"valid_targets_mean": 4695.4,
|
||
|
|
"valid_targets_min": 2014
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.288,
|
||
|
|
"grad_norm": 0.39590185736913047,
|
||
|
|
"learning_rate": 1.2532849249835932e-06,
|
||
|
|
"loss": 0.2406,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2512339949607849,
|
||
|
|
"step": 3930,
|
||
|
|
"valid_targets_mean": 6472.1,
|
||
|
|
"valid_targets_min": 2398
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.296,
|
||
|
|
"grad_norm": 0.37945048971948964,
|
||
|
|
"learning_rate": 1.2256308806526774e-06,
|
||
|
|
"loss": 0.2425,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25047585368156433,
|
||
|
|
"step": 3935,
|
||
|
|
"valid_targets_mean": 6452.8,
|
||
|
|
"valid_targets_min": 1215
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.304,
|
||
|
|
"grad_norm": 0.4387172990204296,
|
||
|
|
"learning_rate": 1.1982757003876855e-06,
|
||
|
|
"loss": 0.2313,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23491425812244415,
|
||
|
|
"step": 3940,
|
||
|
|
"valid_targets_mean": 5545.7,
|
||
|
|
"valid_targets_min": 885
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.312,
|
||
|
|
"grad_norm": 0.4469901316225646,
|
||
|
|
"learning_rate": 1.1712198196482793e-06,
|
||
|
|
"loss": 0.2423,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23085957765579224,
|
||
|
|
"step": 3945,
|
||
|
|
"valid_targets_mean": 5547.8,
|
||
|
|
"valid_targets_min": 3023
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.32,
|
||
|
|
"grad_norm": 0.4603135179912558,
|
||
|
|
"learning_rate": 1.1444636691296518e-06,
|
||
|
|
"loss": 0.2379,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24019691348075867,
|
||
|
|
"step": 3950,
|
||
|
|
"valid_targets_mean": 4919.2,
|
||
|
|
"valid_targets_min": 724
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.328,
|
||
|
|
"grad_norm": 0.4438177637134314,
|
||
|
|
"learning_rate": 1.11800767475567e-06,
|
||
|
|
"loss": 0.2639,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2676534652709961,
|
||
|
|
"step": 3955,
|
||
|
|
"valid_targets_mean": 5193.1,
|
||
|
|
"valid_targets_min": 920
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.336,
|
||
|
|
"grad_norm": 0.4199267913461744,
|
||
|
|
"learning_rate": 1.0918522576721014e-06,
|
||
|
|
"loss": 0.2572,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24460649490356445,
|
||
|
|
"step": 3960,
|
||
|
|
"valid_targets_mean": 5347.7,
|
||
|
|
"valid_targets_min": 763
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.344,
|
||
|
|
"grad_norm": 0.4948993768878234,
|
||
|
|
"learning_rate": 1.0659978342399003e-06,
|
||
|
|
"loss": 0.2524,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27012935280799866,
|
||
|
|
"step": 3965,
|
||
|
|
"valid_targets_mean": 4998.6,
|
||
|
|
"valid_targets_min": 2627
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.352,
|
||
|
|
"grad_norm": 0.4098695529895006,
|
||
|
|
"learning_rate": 1.0404448160285897e-06,
|
||
|
|
"loss": 0.24,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2442832738161087,
|
||
|
|
"step": 3970,
|
||
|
|
"valid_targets_mean": 6394.7,
|
||
|
|
"valid_targets_min": 2311
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.36,
|
||
|
|
"grad_norm": 0.4346467043954532,
|
||
|
|
"learning_rate": 1.0151936098097015e-06,
|
||
|
|
"loss": 0.2533,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26824837923049927,
|
||
|
|
"step": 3975,
|
||
|
|
"valid_targets_mean": 5873.1,
|
||
|
|
"valid_targets_min": 2276
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.368,
|
||
|
|
"grad_norm": 0.4513988008025312,
|
||
|
|
"learning_rate": 9.902446175503089e-07,
|
||
|
|
"loss": 0.2434,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2434309720993042,
|
||
|
|
"step": 3980,
|
||
|
|
"valid_targets_mean": 5546.4,
|
||
|
|
"valid_targets_min": 1814
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.376,
|
||
|
|
"grad_norm": 0.41241583616978356,
|
||
|
|
"learning_rate": 9.655982364066197e-07,
|
||
|
|
"loss": 0.2508,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2734718322753906,
|
||
|
|
"step": 3985,
|
||
|
|
"valid_targets_mean": 6247.4,
|
||
|
|
"valid_targets_min": 3358
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.384,
|
||
|
|
"grad_norm": 0.5047686747359263,
|
||
|
|
"learning_rate": 9.412548587176595e-07,
|
||
|
|
"loss": 0.2297,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22194236516952515,
|
||
|
|
"step": 3990,
|
||
|
|
"valid_targets_mean": 5368.3,
|
||
|
|
"valid_targets_min": 2132
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.392,
|
||
|
|
"grad_norm": 0.44151918846121585,
|
||
|
|
"learning_rate": 9.172148719990237e-07,
|
||
|
|
"loss": 0.2328,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2182375192642212,
|
||
|
|
"step": 3995,
|
||
|
|
"valid_targets_mean": 4883.4,
|
||
|
|
"valid_targets_min": 742
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.4,
|
||
|
|
"grad_norm": 0.41520968471331116,
|
||
|
|
"learning_rate": 8.934786589367106e-07,
|
||
|
|
"loss": 0.2292,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22132691740989685,
|
||
|
|
"step": 4000,
|
||
|
|
"valid_targets_mean": 5478.4,
|
||
|
|
"valid_targets_min": 913
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.408,
|
||
|
|
"grad_norm": 0.48850525017353114,
|
||
|
|
"learning_rate": 8.700465973810246e-07,
|
||
|
|
"loss": 0.2444,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22604086995124817,
|
||
|
|
"step": 4005,
|
||
|
|
"valid_targets_mean": 4439.8,
|
||
|
|
"valid_targets_min": 647
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.416,
|
||
|
|
"grad_norm": 0.47475359432399017,
|
||
|
|
"learning_rate": 8.469190603405719e-07,
|
||
|
|
"loss": 0.2504,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.260003924369812,
|
||
|
|
"step": 4010,
|
||
|
|
"valid_targets_mean": 5379.8,
|
||
|
|
"valid_targets_min": 862
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.424,
|
||
|
|
"grad_norm": 0.4224315307842758,
|
||
|
|
"learning_rate": 8.240964159763121e-07,
|
||
|
|
"loss": 0.2441,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2336377203464508,
|
||
|
|
"step": 4015,
|
||
|
|
"valid_targets_mean": 5360.7,
|
||
|
|
"valid_targets_min": 647
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.432,
|
||
|
|
"grad_norm": 0.44447996013306695,
|
||
|
|
"learning_rate": 8.015790275957003e-07,
|
||
|
|
"loss": 0.2494,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2225901037454605,
|
||
|
|
"step": 4020,
|
||
|
|
"valid_targets_mean": 5149.2,
|
||
|
|
"valid_targets_min": 2655
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.44,
|
||
|
|
"grad_norm": 0.4238855863067296,
|
||
|
|
"learning_rate": 7.793672536469077e-07,
|
||
|
|
"loss": 0.244,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21927297115325928,
|
||
|
|
"step": 4025,
|
||
|
|
"valid_targets_mean": 5357.9,
|
||
|
|
"valid_targets_min": 2314
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.448,
|
||
|
|
"grad_norm": 0.4830003944984125,
|
||
|
|
"learning_rate": 7.574614477131081e-07,
|
||
|
|
"loss": 0.2502,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2570672333240509,
|
||
|
|
"step": 4030,
|
||
|
|
"valid_targets_mean": 4749.3,
|
||
|
|
"valid_targets_min": 965
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.456,
|
||
|
|
"grad_norm": 0.40887871261527975,
|
||
|
|
"learning_rate": 7.358619585068583e-07,
|
||
|
|
"loss": 0.2518,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2433190941810608,
|
||
|
|
"step": 4035,
|
||
|
|
"valid_targets_mean": 6246.4,
|
||
|
|
"valid_targets_min": 3007
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.464,
|
||
|
|
"grad_norm": 0.3859653370068934,
|
||
|
|
"learning_rate": 7.145691298645419e-07,
|
||
|
|
"loss": 0.242,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24633197486400604,
|
||
|
|
"step": 4040,
|
||
|
|
"valid_targets_mean": 7010.8,
|
||
|
|
"valid_targets_min": 3496
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.4719999999999995,
|
||
|
|
"grad_norm": 0.38681864849268677,
|
||
|
|
"learning_rate": 6.935833007408965e-07,
|
||
|
|
"loss": 0.2522,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2680492699146271,
|
||
|
|
"step": 4045,
|
||
|
|
"valid_targets_mean": 6608.4,
|
||
|
|
"valid_targets_min": 613
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.48,
|
||
|
|
"grad_norm": 0.4460709435308017,
|
||
|
|
"learning_rate": 6.729048052036136e-07,
|
||
|
|
"loss": 0.2442,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2533029317855835,
|
||
|
|
"step": 4050,
|
||
|
|
"valid_targets_mean": 5791.8,
|
||
|
|
"valid_targets_min": 1604
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.4879999999999995,
|
||
|
|
"grad_norm": 0.3943743836605657,
|
||
|
|
"learning_rate": 6.52533972428031e-07,
|
||
|
|
"loss": 0.2489,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23628069460391998,
|
||
|
|
"step": 4055,
|
||
|
|
"valid_targets_mean": 6551.3,
|
||
|
|
"valid_targets_min": 2696
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.496,
|
||
|
|
"grad_norm": 0.4312618998760607,
|
||
|
|
"learning_rate": 6.324711266918826e-07,
|
||
|
|
"loss": 0.2396,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25112444162368774,
|
||
|
|
"step": 4060,
|
||
|
|
"valid_targets_mean": 5862.1,
|
||
|
|
"valid_targets_min": 2993
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.504,
|
||
|
|
"grad_norm": 0.483174829368675,
|
||
|
|
"learning_rate": 6.127165873701457e-07,
|
||
|
|
"loss": 0.2501,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23675619065761566,
|
||
|
|
"step": 4065,
|
||
|
|
"valid_targets_mean": 4392.4,
|
||
|
|
"valid_targets_min": 901
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.5120000000000005,
|
||
|
|
"grad_norm": 0.4197944086659229,
|
||
|
|
"learning_rate": 5.932706689299461e-07,
|
||
|
|
"loss": 0.243,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2547464668750763,
|
||
|
|
"step": 4070,
|
||
|
|
"valid_targets_mean": 5488.9,
|
||
|
|
"valid_targets_min": 2215
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.52,
|
||
|
|
"grad_norm": 0.33766862446417356,
|
||
|
|
"learning_rate": 5.741336809255615e-07,
|
||
|
|
"loss": 0.2497,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25667327642440796,
|
||
|
|
"step": 4075,
|
||
|
|
"valid_targets_mean": 7697.0,
|
||
|
|
"valid_targets_min": 2813
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.5280000000000005,
|
||
|
|
"grad_norm": 0.4533360710834701,
|
||
|
|
"learning_rate": 5.553059279934902e-07,
|
||
|
|
"loss": 0.2308,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22701610624790192,
|
||
|
|
"step": 4080,
|
||
|
|
"valid_targets_mean": 5237.2,
|
||
|
|
"valid_targets_min": 1011
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.536,
|
||
|
|
"grad_norm": 0.3726240899705432,
|
||
|
|
"learning_rate": 5.36787709847597e-07,
|
||
|
|
"loss": 0.2441,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2350490689277649,
|
||
|
|
"step": 4085,
|
||
|
|
"valid_targets_mean": 6899.0,
|
||
|
|
"valid_targets_min": 3302
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.5440000000000005,
|
||
|
|
"grad_norm": 0.421250464481422,
|
||
|
|
"learning_rate": 5.185793212743529e-07,
|
||
|
|
"loss": 0.243,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2310941517353058,
|
||
|
|
"step": 4090,
|
||
|
|
"valid_targets_mean": 5545.6,
|
||
|
|
"valid_targets_min": 2301
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.552,
|
||
|
|
"grad_norm": 0.45769843975283325,
|
||
|
|
"learning_rate": 5.006810521281335e-07,
|
||
|
|
"loss": 0.2505,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2067297399044037,
|
||
|
|
"step": 4095,
|
||
|
|
"valid_targets_mean": 4581.6,
|
||
|
|
"valid_targets_min": 907
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.5600000000000005,
|
||
|
|
"grad_norm": 0.4005818383542897,
|
||
|
|
"learning_rate": 4.830931873266065e-07,
|
||
|
|
"loss": 0.2375,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23793132603168488,
|
||
|
|
"step": 4100,
|
||
|
|
"valid_targets_mean": 5918.6,
|
||
|
|
"valid_targets_min": 611
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.568,
|
||
|
|
"grad_norm": 0.39937305590430555,
|
||
|
|
"learning_rate": 4.658160068462025e-07,
|
||
|
|
"loss": 0.2505,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2463841736316681,
|
||
|
|
"step": 4105,
|
||
|
|
"valid_targets_mean": 6267.2,
|
||
|
|
"valid_targets_min": 3317
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.576,
|
||
|
|
"grad_norm": 0.32626143813018726,
|
||
|
|
"learning_rate": 4.488497857176466e-07,
|
||
|
|
"loss": 0.2516,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2482718527317047,
|
||
|
|
"step": 4110,
|
||
|
|
"valid_targets_mean": 7305.4,
|
||
|
|
"valid_targets_min": 542
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.584,
|
||
|
|
"grad_norm": 0.4539225656394375,
|
||
|
|
"learning_rate": 4.321947940215898e-07,
|
||
|
|
"loss": 0.2461,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23661956191062927,
|
||
|
|
"step": 4115,
|
||
|
|
"valid_targets_mean": 5279.0,
|
||
|
|
"valid_targets_min": 2007
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.592,
|
||
|
|
"grad_norm": 0.4344850114515945,
|
||
|
|
"learning_rate": 4.1585129688430425e-07,
|
||
|
|
"loss": 0.2476,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24658052623271942,
|
||
|
|
"step": 4120,
|
||
|
|
"valid_targets_mean": 5324.8,
|
||
|
|
"valid_targets_min": 549
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.6,
|
||
|
|
"grad_norm": 0.39310146860493744,
|
||
|
|
"learning_rate": 3.998195544734706e-07,
|
||
|
|
"loss": 0.2519,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24207232892513275,
|
||
|
|
"step": 4125,
|
||
|
|
"valid_targets_mean": 6842.8,
|
||
|
|
"valid_targets_min": 2210
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.608,
|
||
|
|
"grad_norm": 0.4149625856439961,
|
||
|
|
"learning_rate": 3.840998219940284e-07,
|
||
|
|
"loss": 0.2436,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26455581188201904,
|
||
|
|
"step": 4130,
|
||
|
|
"valid_targets_mean": 5933.3,
|
||
|
|
"valid_targets_min": 2936
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.616,
|
||
|
|
"grad_norm": 0.4058154946023268,
|
||
|
|
"learning_rate": 3.6869234968411214e-07,
|
||
|
|
"loss": 0.251,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25165531039237976,
|
||
|
|
"step": 4135,
|
||
|
|
"valid_targets_mean": 5976.5,
|
||
|
|
"valid_targets_min": 766
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.624,
|
||
|
|
"grad_norm": 0.4120431574460261,
|
||
|
|
"learning_rate": 3.5359738281107504e-07,
|
||
|
|
"loss": 0.2419,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24470007419586182,
|
||
|
|
"step": 4140,
|
||
|
|
"valid_targets_mean": 6167.4,
|
||
|
|
"valid_targets_min": 2083
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.632,
|
||
|
|
"grad_norm": 0.44621207055238954,
|
||
|
|
"learning_rate": 3.38815161667585e-07,
|
||
|
|
"loss": 0.2496,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24108701944351196,
|
||
|
|
"step": 4145,
|
||
|
|
"valid_targets_mean": 5187.9,
|
||
|
|
"valid_targets_min": 2760
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.64,
|
||
|
|
"grad_norm": 0.45740673758834627,
|
||
|
|
"learning_rate": 3.24345921567788e-07,
|
||
|
|
"loss": 0.2311,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25003916025161743,
|
||
|
|
"step": 4150,
|
||
|
|
"valid_targets_mean": 5214.8,
|
||
|
|
"valid_targets_min": 2432
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.648,
|
||
|
|
"grad_norm": 0.38170623989649993,
|
||
|
|
"learning_rate": 3.101898928435754e-07,
|
||
|
|
"loss": 0.2434,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26009601354599,
|
||
|
|
"step": 4155,
|
||
|
|
"valid_targets_mean": 6232.9,
|
||
|
|
"valid_targets_min": 2900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.656,
|
||
|
|
"grad_norm": 0.3949667344123087,
|
||
|
|
"learning_rate": 2.9634730084091343e-07,
|
||
|
|
"loss": 0.2388,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2611731290817261,
|
||
|
|
"step": 4160,
|
||
|
|
"valid_targets_mean": 6492.0,
|
||
|
|
"valid_targets_min": 2597
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.664,
|
||
|
|
"grad_norm": 0.481054045629589,
|
||
|
|
"learning_rate": 2.8281836591624865e-07,
|
||
|
|
"loss": 0.2336,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22188538312911987,
|
||
|
|
"step": 4165,
|
||
|
|
"valid_targets_mean": 4702.8,
|
||
|
|
"valid_targets_min": 1883
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.672,
|
||
|
|
"grad_norm": 0.45164701161138154,
|
||
|
|
"learning_rate": 2.6960330343301033e-07,
|
||
|
|
"loss": 0.2364,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23218591511249542,
|
||
|
|
"step": 4170,
|
||
|
|
"valid_targets_mean": 4975.9,
|
||
|
|
"valid_targets_min": 2262
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.68,
|
||
|
|
"grad_norm": 0.4796809330908409,
|
||
|
|
"learning_rate": 2.5670232375817784e-07,
|
||
|
|
"loss": 0.2479,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22703537344932556,
|
||
|
|
"step": 4175,
|
||
|
|
"valid_targets_mean": 4798.4,
|
||
|
|
"valid_targets_min": 859
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.688,
|
||
|
|
"grad_norm": 0.44726947654327165,
|
||
|
|
"learning_rate": 2.441156322589322e-07,
|
||
|
|
"loss": 0.2287,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21367965638637543,
|
||
|
|
"step": 4180,
|
||
|
|
"valid_targets_mean": 5252.1,
|
||
|
|
"valid_targets_min": 2079
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.696,
|
||
|
|
"grad_norm": 0.4199003621464118,
|
||
|
|
"learning_rate": 2.318434292993832e-07,
|
||
|
|
"loss": 0.2423,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2607828378677368,
|
||
|
|
"step": 4185,
|
||
|
|
"valid_targets_mean": 6629.9,
|
||
|
|
"valid_targets_min": 3995
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.704,
|
||
|
|
"grad_norm": 0.4016367445657586,
|
||
|
|
"learning_rate": 2.1988591023738514e-07,
|
||
|
|
"loss": 0.248,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26771795749664307,
|
||
|
|
"step": 4190,
|
||
|
|
"valid_targets_mean": 6307.6,
|
||
|
|
"valid_targets_min": 2672
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.712,
|
||
|
|
"grad_norm": 0.399715800869208,
|
||
|
|
"learning_rate": 2.0824326542142835e-07,
|
||
|
|
"loss": 0.2314,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2620810270309448,
|
||
|
|
"step": 4195,
|
||
|
|
"valid_targets_mean": 6914.5,
|
||
|
|
"valid_targets_min": 2479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.72,
|
||
|
|
"grad_norm": 0.3910519268922181,
|
||
|
|
"learning_rate": 1.9691568018759931e-07,
|
||
|
|
"loss": 0.2353,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23408928513526917,
|
||
|
|
"step": 4200,
|
||
|
|
"valid_targets_mean": 6041.6,
|
||
|
|
"valid_targets_min": 2297
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.728,
|
||
|
|
"grad_norm": 0.40997014213714333,
|
||
|
|
"learning_rate": 1.8590333485664525e-07,
|
||
|
|
"loss": 0.2421,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2437349408864975,
|
||
|
|
"step": 4205,
|
||
|
|
"valid_targets_mean": 5969.2,
|
||
|
|
"valid_targets_min": 2861
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.736,
|
||
|
|
"grad_norm": 0.4079184645546954,
|
||
|
|
"learning_rate": 1.752064047310853e-07,
|
||
|
|
"loss": 0.2464,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2418249547481537,
|
||
|
|
"step": 4210,
|
||
|
|
"valid_targets_mean": 5699.6,
|
||
|
|
"valid_targets_min": 3429
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.744,
|
||
|
|
"grad_norm": 0.437700129698411,
|
||
|
|
"learning_rate": 1.6482506009243949e-07,
|
||
|
|
"loss": 0.2466,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29487597942352295,
|
||
|
|
"step": 4215,
|
||
|
|
"valid_targets_mean": 6602.3,
|
||
|
|
"valid_targets_min": 2588
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.752,
|
||
|
|
"grad_norm": 0.4142521640099534,
|
||
|
|
"learning_rate": 1.5475946619850192e-07,
|
||
|
|
"loss": 0.248,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24046289920806885,
|
||
|
|
"step": 4220,
|
||
|
|
"valid_targets_mean": 6069.1,
|
||
|
|
"valid_targets_min": 711
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.76,
|
||
|
|
"grad_norm": 0.41546394357679706,
|
||
|
|
"learning_rate": 1.4500978328071845e-07,
|
||
|
|
"loss": 0.2341,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25072425603866577,
|
||
|
|
"step": 4225,
|
||
|
|
"valid_targets_mean": 5360.8,
|
||
|
|
"valid_targets_min": 428
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.768,
|
||
|
|
"grad_norm": 0.40151629403045647,
|
||
|
|
"learning_rate": 1.3557616654163775e-07,
|
||
|
|
"loss": 0.2487,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2626868784427643,
|
||
|
|
"step": 4230,
|
||
|
|
"valid_targets_mean": 6657.1,
|
||
|
|
"valid_targets_min": 3302
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.776,
|
||
|
|
"grad_norm": 0.42833620944626843,
|
||
|
|
"learning_rate": 1.264587661524308e-07,
|
||
|
|
"loss": 0.2445,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24112708866596222,
|
||
|
|
"step": 4235,
|
||
|
|
"valid_targets_mean": 6090.4,
|
||
|
|
"valid_targets_min": 950
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.784,
|
||
|
|
"grad_norm": 0.4154553942811618,
|
||
|
|
"learning_rate": 1.1765772725051084e-07,
|
||
|
|
"loss": 0.2313,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23809021711349487,
|
||
|
|
"step": 4240,
|
||
|
|
"valid_targets_mean": 5470.9,
|
||
|
|
"valid_targets_min": 1628
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.792,
|
||
|
|
"grad_norm": 0.4195908754448132,
|
||
|
|
"learning_rate": 1.0917318993721726e-07,
|
||
|
|
"loss": 0.2496,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2500789165496826,
|
||
|
|
"step": 4245,
|
||
|
|
"valid_targets_mean": 5899.9,
|
||
|
|
"valid_targets_min": 2438
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.8,
|
||
|
|
"grad_norm": 0.45570500783237344,
|
||
|
|
"learning_rate": 1.0100528927558861e-07,
|
||
|
|
"loss": 0.231,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20834442973136902,
|
||
|
|
"step": 4250,
|
||
|
|
"valid_targets_mean": 4979.1,
|
||
|
|
"valid_targets_min": 2591
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.808,
|
||
|
|
"grad_norm": 0.4170242154763821,
|
||
|
|
"learning_rate": 9.31541552882087e-08,
|
||
|
|
"loss": 0.235,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23040908575057983,
|
||
|
|
"step": 4255,
|
||
|
|
"valid_targets_mean": 5964.5,
|
||
|
|
"valid_targets_min": 2124
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.816,
|
||
|
|
"grad_norm": 0.4142871848307513,
|
||
|
|
"learning_rate": 8.561991295514161e-08,
|
||
|
|
"loss": 0.2444,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2491617053747177,
|
||
|
|
"step": 4260,
|
||
|
|
"valid_targets_mean": 5598.0,
|
||
|
|
"valid_targets_min": 1553
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.824,
|
||
|
|
"grad_norm": 0.4151855097481202,
|
||
|
|
"learning_rate": 7.840268221193548e-08,
|
||
|
|
"loss": 0.2456,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24346888065338135,
|
||
|
|
"step": 4265,
|
||
|
|
"valid_targets_mean": 6049.5,
|
||
|
|
"valid_targets_min": 2898
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.832,
|
||
|
|
"grad_norm": 0.4555291535355044,
|
||
|
|
"learning_rate": 7.150257794772186e-08,
|
||
|
|
"loss": 0.2357,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24714088439941406,
|
||
|
|
"step": 4270,
|
||
|
|
"valid_targets_mean": 6723.9,
|
||
|
|
"valid_targets_min": 926
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.84,
|
||
|
|
"grad_norm": 0.4369171425961876,
|
||
|
|
"learning_rate": 6.491971000337938e-08,
|
||
|
|
"loss": 0.2413,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22927583754062653,
|
||
|
|
"step": 4275,
|
||
|
|
"valid_targets_mean": 5355.7,
|
||
|
|
"valid_targets_min": 1490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.848,
|
||
|
|
"grad_norm": 0.3603313297065621,
|
||
|
|
"learning_rate": 5.8654183169788435e-08,
|
||
|
|
"loss": 0.2543,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2546162009239197,
|
||
|
|
"step": 4280,
|
||
|
|
"valid_targets_mean": 7805.7,
|
||
|
|
"valid_targets_min": 2702
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.856,
|
||
|
|
"grad_norm": 0.3776558735589143,
|
||
|
|
"learning_rate": 5.270609718616593e-08,
|
||
|
|
"loss": 0.2542,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2700864374637604,
|
||
|
|
"step": 4285,
|
||
|
|
"valid_targets_mean": 7140.9,
|
||
|
|
"valid_targets_min": 2497
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.864,
|
||
|
|
"grad_norm": 0.3910196634027121,
|
||
|
|
"learning_rate": 4.70755467384687e-08,
|
||
|
|
"loss": 0.2508,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2360091507434845,
|
||
|
|
"step": 4290,
|
||
|
|
"valid_targets_mean": 6045.0,
|
||
|
|
"valid_targets_min": 2924
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.872,
|
||
|
|
"grad_norm": 0.42067440511825865,
|
||
|
|
"learning_rate": 4.176262145789478e-08,
|
||
|
|
"loss": 0.2381,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23304817080497742,
|
||
|
|
"step": 4295,
|
||
|
|
"valid_targets_mean": 6235.3,
|
||
|
|
"valid_targets_min": 2477
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.88,
|
||
|
|
"grad_norm": 0.44819785709400006,
|
||
|
|
"learning_rate": 3.676740591945782e-08,
|
||
|
|
"loss": 0.2263,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23022767901420593,
|
||
|
|
"step": 4300,
|
||
|
|
"valid_targets_mean": 5170.4,
|
||
|
|
"valid_targets_min": 845
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.888,
|
||
|
|
"grad_norm": 0.38159591470198784,
|
||
|
|
"learning_rate": 3.208997964062821e-08,
|
||
|
|
"loss": 0.2529,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.266330748796463,
|
||
|
|
"step": 4305,
|
||
|
|
"valid_targets_mean": 6728.8,
|
||
|
|
"valid_targets_min": 809
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.896,
|
||
|
|
"grad_norm": 0.47438857326603057,
|
||
|
|
"learning_rate": 2.773041708008295e-08,
|
||
|
|
"loss": 0.2439,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21114903688430786,
|
||
|
|
"step": 4310,
|
||
|
|
"valid_targets_mean": 4498.0,
|
||
|
|
"valid_targets_min": 714
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.904,
|
||
|
|
"grad_norm": 0.4169788152049192,
|
||
|
|
"learning_rate": 2.3688787636511057e-08,
|
||
|
|
"loss": 0.2502,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2451530396938324,
|
||
|
|
"step": 4315,
|
||
|
|
"valid_targets_mean": 5779.1,
|
||
|
|
"valid_targets_min": 2389
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.912,
|
||
|
|
"grad_norm": 0.5314249782039797,
|
||
|
|
"learning_rate": 1.9965155647507782e-08,
|
||
|
|
"loss": 0.2372,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24146723747253418,
|
||
|
|
"step": 4320,
|
||
|
|
"valid_targets_mean": 4286.8,
|
||
|
|
"valid_targets_min": 594
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.92,
|
||
|
|
"grad_norm": 0.42573633152148477,
|
||
|
|
"learning_rate": 1.655958038855765e-08,
|
||
|
|
"loss": 0.2513,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.257783442735672,
|
||
|
|
"step": 4325,
|
||
|
|
"valid_targets_mean": 5627.4,
|
||
|
|
"valid_targets_min": 881
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.928,
|
||
|
|
"grad_norm": 0.4097519577004155,
|
||
|
|
"learning_rate": 1.3472116072084096e-08,
|
||
|
|
"loss": 0.2319,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23979437351226807,
|
||
|
|
"step": 4330,
|
||
|
|
"valid_targets_mean": 5950.1,
|
||
|
|
"valid_targets_min": 3580
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.936,
|
||
|
|
"grad_norm": 0.4454465260722158,
|
||
|
|
"learning_rate": 1.0702811846590167e-08,
|
||
|
|
"loss": 0.2524,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2746885418891907,
|
||
|
|
"step": 4335,
|
||
|
|
"valid_targets_mean": 5342.9,
|
||
|
|
"valid_targets_min": 954
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.944,
|
||
|
|
"grad_norm": 0.41412006546592484,
|
||
|
|
"learning_rate": 8.251711795876916e-09,
|
||
|
|
"loss": 0.2337,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24524347484111786,
|
||
|
|
"step": 4340,
|
||
|
|
"valid_targets_mean": 5723.5,
|
||
|
|
"valid_targets_min": 1678
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.952,
|
||
|
|
"grad_norm": 0.4452590210279377,
|
||
|
|
"learning_rate": 6.1188549383373044e-09,
|
||
|
|
"loss": 0.2293,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22402501106262207,
|
||
|
|
"step": 4345,
|
||
|
|
"valid_targets_mean": 5053.6,
|
||
|
|
"valid_targets_min": 1317
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.96,
|
||
|
|
"grad_norm": 0.4249708336351826,
|
||
|
|
"learning_rate": 4.304275226338916e-09,
|
||
|
|
"loss": 0.2402,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23752881586551666,
|
||
|
|
"step": 4350,
|
||
|
|
"valid_targets_mean": 5775.6,
|
||
|
|
"valid_targets_min": 729
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.968,
|
||
|
|
"grad_norm": 0.4351604501867694,
|
||
|
|
"learning_rate": 2.8080015456799503e-09,
|
||
|
|
"loss": 0.2254,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2576856017112732,
|
||
|
|
"step": 4355,
|
||
|
|
"valid_targets_mean": 5823.6,
|
||
|
|
"valid_targets_min": 720
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.976,
|
||
|
|
"grad_norm": 0.43551585909366247,
|
||
|
|
"learning_rate": 1.6300577151340257e-09,
|
||
|
|
"loss": 0.2384,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23403063416481018,
|
||
|
|
"step": 4360,
|
||
|
|
"valid_targets_mean": 5693.9,
|
||
|
|
"valid_targets_min": 3118
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.984,
|
||
|
|
"grad_norm": 0.44352689496306524,
|
||
|
|
"learning_rate": 7.70462486070489e-10,
|
||
|
|
"loss": 0.2471,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2642480134963989,
|
||
|
|
"step": 4365,
|
||
|
|
"valid_targets_mean": 6007.5,
|
||
|
|
"valid_targets_min": 2710
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.992,
|
||
|
|
"grad_norm": 0.41338621237648826,
|
||
|
|
"learning_rate": 2.2922954214799065e-10,
|
||
|
|
"loss": 0.2598,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2713726758956909,
|
||
|
|
"step": 4370,
|
||
|
|
"valid_targets_mean": 6049.0,
|
||
|
|
"valid_targets_min": 2589
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 7.0,
|
||
|
|
"grad_norm": 0.4110206394678341,
|
||
|
|
"learning_rate": 6.367499107984288e-12,
|
||
|
|
"loss": 0.2365,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23771843314170837,
|
||
|
|
"step": 4375,
|
||
|
|
"valid_targets_mean": 6046.8,
|
||
|
|
"valid_targets_min": 2950
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 7.0,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23771843314170837,
|
||
|
|
"step": 4375,
|
||
|
|
"total_flos": 1967928670814208.0,
|
||
|
|
"train_loss": 0.28350407949175155,
|
||
|
|
"train_runtime": 36852.1456,
|
||
|
|
"train_samples_per_second": 1.899,
|
||
|
|
"train_steps_per_second": 0.119,
|
||
|
|
"valid_targets_mean": 6046.8,
|
||
|
|
"valid_targets_min": 2950
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 5,
|
||
|
|
"max_steps": 4375,
|
||
|
|
"num_input_tokens_seen": 0,
|
||
|
|
"num_train_epochs": 7,
|
||
|
|
"save_steps": 1500,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": true,
|
||
|
|
"should_training_stop": true
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 1967928670814208.0,
|
||
|
|
"train_batch_size": 1,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|