9750 lines
271 KiB
JSON
9750 lines
271 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4410,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.007936507936507936,
|
|
"grad_norm": 13.936445999727159,
|
|
"learning_rate": 3.6281179138322e-07,
|
|
"loss": 0.5831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5446114540100098,
|
|
"step": 5,
|
|
"valid_targets_mean": 5181.8,
|
|
"valid_targets_min": 1333
|
|
},
|
|
{
|
|
"epoch": 0.015873015873015872,
|
|
"grad_norm": 14.14405406674101,
|
|
"learning_rate": 8.163265306122449e-07,
|
|
"loss": 0.6026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6077768802642822,
|
|
"step": 10,
|
|
"valid_targets_mean": 5303.6,
|
|
"valid_targets_min": 948
|
|
},
|
|
{
|
|
"epoch": 0.023809523809523808,
|
|
"grad_norm": 13.385853949153486,
|
|
"learning_rate": 1.26984126984127e-06,
|
|
"loss": 0.5824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5799890160560608,
|
|
"step": 15,
|
|
"valid_targets_mean": 5301.1,
|
|
"valid_targets_min": 1000
|
|
},
|
|
{
|
|
"epoch": 0.031746031746031744,
|
|
"grad_norm": 10.791756912090966,
|
|
"learning_rate": 1.723356009070295e-06,
|
|
"loss": 0.5901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5597653985023499,
|
|
"step": 20,
|
|
"valid_targets_mean": 4823.9,
|
|
"valid_targets_min": 263
|
|
},
|
|
{
|
|
"epoch": 0.03968253968253968,
|
|
"grad_norm": 6.581205621105729,
|
|
"learning_rate": 2.17687074829932e-06,
|
|
"loss": 0.5323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4685205817222595,
|
|
"step": 25,
|
|
"valid_targets_mean": 4813.8,
|
|
"valid_targets_min": 330
|
|
},
|
|
{
|
|
"epoch": 0.047619047619047616,
|
|
"grad_norm": 3.7853786833152236,
|
|
"learning_rate": 2.6303854875283447e-06,
|
|
"loss": 0.4573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4164578914642334,
|
|
"step": 30,
|
|
"valid_targets_mean": 5283.2,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 0.05555555555555555,
|
|
"grad_norm": 2.602330607184181,
|
|
"learning_rate": 3.08390022675737e-06,
|
|
"loss": 0.423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39225059747695923,
|
|
"step": 35,
|
|
"valid_targets_mean": 4700.6,
|
|
"valid_targets_min": 2390
|
|
},
|
|
{
|
|
"epoch": 0.06349206349206349,
|
|
"grad_norm": 1.4233023861991225,
|
|
"learning_rate": 3.537414965986395e-06,
|
|
"loss": 0.3809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35967135429382324,
|
|
"step": 40,
|
|
"valid_targets_mean": 5445.3,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 0.07142857142857142,
|
|
"grad_norm": 1.0170115963512993,
|
|
"learning_rate": 3.99092970521542e-06,
|
|
"loss": 0.402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3903147876262665,
|
|
"step": 45,
|
|
"valid_targets_mean": 6464.0,
|
|
"valid_targets_min": 3085
|
|
},
|
|
{
|
|
"epoch": 0.07936507936507936,
|
|
"grad_norm": 0.8370897006461435,
|
|
"learning_rate": 4.444444444444444e-06,
|
|
"loss": 0.3794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33025485277175903,
|
|
"step": 50,
|
|
"valid_targets_mean": 5718.5,
|
|
"valid_targets_min": 975
|
|
},
|
|
{
|
|
"epoch": 0.0873015873015873,
|
|
"grad_norm": 0.7252658896074926,
|
|
"learning_rate": 4.897959183673469e-06,
|
|
"loss": 0.3611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34258341789245605,
|
|
"step": 55,
|
|
"valid_targets_mean": 5999.1,
|
|
"valid_targets_min": 989
|
|
},
|
|
{
|
|
"epoch": 0.09523809523809523,
|
|
"grad_norm": 0.756722250222215,
|
|
"learning_rate": 5.3514739229024945e-06,
|
|
"loss": 0.3478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3988223075866699,
|
|
"step": 60,
|
|
"valid_targets_mean": 4506.9,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 0.10317460317460317,
|
|
"grad_norm": 0.568685223882473,
|
|
"learning_rate": 5.80498866213152e-06,
|
|
"loss": 0.3448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3268760144710541,
|
|
"step": 65,
|
|
"valid_targets_mean": 6105.9,
|
|
"valid_targets_min": 2717
|
|
},
|
|
{
|
|
"epoch": 0.1111111111111111,
|
|
"grad_norm": 0.5731290626935265,
|
|
"learning_rate": 6.258503401360545e-06,
|
|
"loss": 0.3215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3184238374233246,
|
|
"step": 70,
|
|
"valid_targets_mean": 5088.2,
|
|
"valid_targets_min": 420
|
|
},
|
|
{
|
|
"epoch": 0.11904761904761904,
|
|
"grad_norm": 0.5739988755146004,
|
|
"learning_rate": 6.71201814058957e-06,
|
|
"loss": 0.3124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34080153703689575,
|
|
"step": 75,
|
|
"valid_targets_mean": 5674.6,
|
|
"valid_targets_min": 1923
|
|
},
|
|
{
|
|
"epoch": 0.12698412698412698,
|
|
"grad_norm": 0.5021784940463249,
|
|
"learning_rate": 7.165532879818595e-06,
|
|
"loss": 0.3008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3233030438423157,
|
|
"step": 80,
|
|
"valid_targets_mean": 5277.2,
|
|
"valid_targets_min": 291
|
|
},
|
|
{
|
|
"epoch": 0.1349206349206349,
|
|
"grad_norm": 0.5521401363464609,
|
|
"learning_rate": 7.61904761904762e-06,
|
|
"loss": 0.3086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2872031331062317,
|
|
"step": 85,
|
|
"valid_targets_mean": 5470.0,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 0.14285714285714285,
|
|
"grad_norm": 0.4751497127812398,
|
|
"learning_rate": 8.072562358276645e-06,
|
|
"loss": 0.3222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3212955594062805,
|
|
"step": 90,
|
|
"valid_targets_mean": 6301.1,
|
|
"valid_targets_min": 2854
|
|
},
|
|
{
|
|
"epoch": 0.15079365079365079,
|
|
"grad_norm": 0.43696107468178264,
|
|
"learning_rate": 8.52607709750567e-06,
|
|
"loss": 0.2895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2621079683303833,
|
|
"step": 95,
|
|
"valid_targets_mean": 5617.8,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 0.15873015873015872,
|
|
"grad_norm": 0.5242468737414164,
|
|
"learning_rate": 8.979591836734695e-06,
|
|
"loss": 0.3018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3428232669830322,
|
|
"step": 100,
|
|
"valid_targets_mean": 5715.6,
|
|
"valid_targets_min": 2266
|
|
},
|
|
{
|
|
"epoch": 0.16666666666666666,
|
|
"grad_norm": 0.4723266645052996,
|
|
"learning_rate": 9.43310657596372e-06,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27712035179138184,
|
|
"step": 105,
|
|
"valid_targets_mean": 5371.8,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 0.1746031746031746,
|
|
"grad_norm": 0.49793096409078624,
|
|
"learning_rate": 9.886621315192746e-06,
|
|
"loss": 0.2986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2675686478614807,
|
|
"step": 110,
|
|
"valid_targets_mean": 4750.8,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 0.18253968253968253,
|
|
"grad_norm": 0.4902776042402625,
|
|
"learning_rate": 1.034013605442177e-05,
|
|
"loss": 0.2876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30433300137519836,
|
|
"step": 115,
|
|
"valid_targets_mean": 5146.1,
|
|
"valid_targets_min": 940
|
|
},
|
|
{
|
|
"epoch": 0.19047619047619047,
|
|
"grad_norm": 0.465412157163947,
|
|
"learning_rate": 1.0793650793650794e-05,
|
|
"loss": 0.2969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2920241057872772,
|
|
"step": 120,
|
|
"valid_targets_mean": 5453.2,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 0.1984126984126984,
|
|
"grad_norm": 0.4355039639514751,
|
|
"learning_rate": 1.124716553287982e-05,
|
|
"loss": 0.2503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2529776692390442,
|
|
"step": 125,
|
|
"valid_targets_mean": 5727.6,
|
|
"valid_targets_min": 2696
|
|
},
|
|
{
|
|
"epoch": 0.20634920634920634,
|
|
"grad_norm": 0.4996682572655225,
|
|
"learning_rate": 1.1700680272108845e-05,
|
|
"loss": 0.2826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2821199297904968,
|
|
"step": 130,
|
|
"valid_targets_mean": 5396.6,
|
|
"valid_targets_min": 2020
|
|
},
|
|
{
|
|
"epoch": 0.21428571428571427,
|
|
"grad_norm": 0.43094951290178424,
|
|
"learning_rate": 1.215419501133787e-05,
|
|
"loss": 0.2583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24614138901233673,
|
|
"step": 135,
|
|
"valid_targets_mean": 5842.7,
|
|
"valid_targets_min": 286
|
|
},
|
|
{
|
|
"epoch": 0.2222222222222222,
|
|
"grad_norm": 0.47723431745558176,
|
|
"learning_rate": 1.2607709750566895e-05,
|
|
"loss": 0.2845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2890702486038208,
|
|
"step": 140,
|
|
"valid_targets_mean": 5354.1,
|
|
"valid_targets_min": 2307
|
|
},
|
|
{
|
|
"epoch": 0.23015873015873015,
|
|
"grad_norm": 0.4617350150011481,
|
|
"learning_rate": 1.3061224489795918e-05,
|
|
"loss": 0.2586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2510530948638916,
|
|
"step": 145,
|
|
"valid_targets_mean": 5552.1,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 0.23809523809523808,
|
|
"grad_norm": 0.45824070397194494,
|
|
"learning_rate": 1.3514739229024945e-05,
|
|
"loss": 0.2652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26986801624298096,
|
|
"step": 150,
|
|
"valid_targets_mean": 5441.2,
|
|
"valid_targets_min": 1175
|
|
},
|
|
{
|
|
"epoch": 0.24603174603174602,
|
|
"grad_norm": 0.4392573652714541,
|
|
"learning_rate": 1.3968253968253968e-05,
|
|
"loss": 0.2548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2496776133775711,
|
|
"step": 155,
|
|
"valid_targets_mean": 5731.4,
|
|
"valid_targets_min": 1393
|
|
},
|
|
{
|
|
"epoch": 0.25396825396825395,
|
|
"grad_norm": 0.4536964041682826,
|
|
"learning_rate": 1.4421768707482994e-05,
|
|
"loss": 0.2374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21673960983753204,
|
|
"step": 160,
|
|
"valid_targets_mean": 6079.4,
|
|
"valid_targets_min": 300
|
|
},
|
|
{
|
|
"epoch": 0.2619047619047619,
|
|
"grad_norm": 0.5619742860376928,
|
|
"learning_rate": 1.4875283446712018e-05,
|
|
"loss": 0.2578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2804248631000519,
|
|
"step": 165,
|
|
"valid_targets_mean": 5392.2,
|
|
"valid_targets_min": 2214
|
|
},
|
|
{
|
|
"epoch": 0.2698412698412698,
|
|
"grad_norm": 0.4588222248723898,
|
|
"learning_rate": 1.5328798185941044e-05,
|
|
"loss": 0.2643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24595819413661957,
|
|
"step": 170,
|
|
"valid_targets_mean": 5367.2,
|
|
"valid_targets_min": 1806
|
|
},
|
|
{
|
|
"epoch": 0.2777777777777778,
|
|
"grad_norm": 0.4837210875448078,
|
|
"learning_rate": 1.578231292517007e-05,
|
|
"loss": 0.2465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26808232069015503,
|
|
"step": 175,
|
|
"valid_targets_mean": 4694.2,
|
|
"valid_targets_min": 1970
|
|
},
|
|
{
|
|
"epoch": 0.2857142857142857,
|
|
"grad_norm": 0.4642135470304616,
|
|
"learning_rate": 1.6235827664399097e-05,
|
|
"loss": 0.2568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25557032227516174,
|
|
"step": 180,
|
|
"valid_targets_mean": 5724.9,
|
|
"valid_targets_min": 1981
|
|
},
|
|
{
|
|
"epoch": 0.29365079365079366,
|
|
"grad_norm": 0.4404678599566564,
|
|
"learning_rate": 1.668934240362812e-05,
|
|
"loss": 0.2319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2403090000152588,
|
|
"step": 185,
|
|
"valid_targets_mean": 5580.2,
|
|
"valid_targets_min": 1807
|
|
},
|
|
{
|
|
"epoch": 0.30158730158730157,
|
|
"grad_norm": 0.461513428421082,
|
|
"learning_rate": 1.7142857142857142e-05,
|
|
"loss": 0.2717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2342744767665863,
|
|
"step": 190,
|
|
"valid_targets_mean": 5209.1,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 0.30952380952380953,
|
|
"grad_norm": 0.4616028237565255,
|
|
"learning_rate": 1.759637188208617e-05,
|
|
"loss": 0.2573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2381066232919693,
|
|
"step": 195,
|
|
"valid_targets_mean": 4881.2,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 0.31746031746031744,
|
|
"grad_norm": 0.5048131717477712,
|
|
"learning_rate": 1.8049886621315194e-05,
|
|
"loss": 0.2497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24961230158805847,
|
|
"step": 200,
|
|
"valid_targets_mean": 4766.8,
|
|
"valid_targets_min": 760
|
|
},
|
|
{
|
|
"epoch": 0.3253968253968254,
|
|
"grad_norm": 0.4608218526938904,
|
|
"learning_rate": 1.8503401360544218e-05,
|
|
"loss": 0.2423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22085292637348175,
|
|
"step": 205,
|
|
"valid_targets_mean": 5626.6,
|
|
"valid_targets_min": 311
|
|
},
|
|
{
|
|
"epoch": 0.3333333333333333,
|
|
"grad_norm": 0.5276409706041367,
|
|
"learning_rate": 1.8956916099773243e-05,
|
|
"loss": 0.2558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2512398362159729,
|
|
"step": 210,
|
|
"valid_targets_mean": 5207.8,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 0.3412698412698413,
|
|
"grad_norm": 0.44125944724862465,
|
|
"learning_rate": 1.941043083900227e-05,
|
|
"loss": 0.2319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23039595782756805,
|
|
"step": 215,
|
|
"valid_targets_mean": 6150.5,
|
|
"valid_targets_min": 2819
|
|
},
|
|
{
|
|
"epoch": 0.3492063492063492,
|
|
"grad_norm": 0.8225576873165226,
|
|
"learning_rate": 1.9863945578231295e-05,
|
|
"loss": 0.2654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2745077311992645,
|
|
"step": 220,
|
|
"valid_targets_mean": 4586.4,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 0.35714285714285715,
|
|
"grad_norm": 0.44559302418983976,
|
|
"learning_rate": 2.031746031746032e-05,
|
|
"loss": 0.2447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27263548970222473,
|
|
"step": 225,
|
|
"valid_targets_mean": 5901.4,
|
|
"valid_targets_min": 1813
|
|
},
|
|
{
|
|
"epoch": 0.36507936507936506,
|
|
"grad_norm": 0.4702793229346846,
|
|
"learning_rate": 2.0770975056689343e-05,
|
|
"loss": 0.2457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2387492060661316,
|
|
"step": 230,
|
|
"valid_targets_mean": 5740.0,
|
|
"valid_targets_min": 1593
|
|
},
|
|
{
|
|
"epoch": 0.373015873015873,
|
|
"grad_norm": 0.5771251589812603,
|
|
"learning_rate": 2.122448979591837e-05,
|
|
"loss": 0.2577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27772706747055054,
|
|
"step": 235,
|
|
"valid_targets_mean": 5892.1,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 0.38095238095238093,
|
|
"grad_norm": 0.4716600277551803,
|
|
"learning_rate": 2.1678004535147395e-05,
|
|
"loss": 0.2366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24430419504642487,
|
|
"step": 240,
|
|
"valid_targets_mean": 5370.8,
|
|
"valid_targets_min": 300
|
|
},
|
|
{
|
|
"epoch": 0.3888888888888889,
|
|
"grad_norm": 0.4834321959641482,
|
|
"learning_rate": 2.213151927437642e-05,
|
|
"loss": 0.2335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2554604709148407,
|
|
"step": 245,
|
|
"valid_targets_mean": 5334.6,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 0.3968253968253968,
|
|
"grad_norm": 0.4468651198346908,
|
|
"learning_rate": 2.2585034013605444e-05,
|
|
"loss": 0.2326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22178535163402557,
|
|
"step": 250,
|
|
"valid_targets_mean": 5966.1,
|
|
"valid_targets_min": 1724
|
|
},
|
|
{
|
|
"epoch": 0.40476190476190477,
|
|
"grad_norm": 0.4750302614876995,
|
|
"learning_rate": 2.3038548752834472e-05,
|
|
"loss": 0.2402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26613864302635193,
|
|
"step": 255,
|
|
"valid_targets_mean": 5677.9,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 0.4126984126984127,
|
|
"grad_norm": 0.4468674852795431,
|
|
"learning_rate": 2.3492063492063496e-05,
|
|
"loss": 0.2425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2535470724105835,
|
|
"step": 260,
|
|
"valid_targets_mean": 5571.2,
|
|
"valid_targets_min": 3116
|
|
},
|
|
{
|
|
"epoch": 0.42063492063492064,
|
|
"grad_norm": 0.4435957536640986,
|
|
"learning_rate": 2.394557823129252e-05,
|
|
"loss": 0.2403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24253222346305847,
|
|
"step": 265,
|
|
"valid_targets_mean": 5520.6,
|
|
"valid_targets_min": 2481
|
|
},
|
|
{
|
|
"epoch": 0.42857142857142855,
|
|
"grad_norm": 0.48192905697263166,
|
|
"learning_rate": 2.439909297052154e-05,
|
|
"loss": 0.2405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22654183208942413,
|
|
"step": 270,
|
|
"valid_targets_mean": 5040.6,
|
|
"valid_targets_min": 1020
|
|
},
|
|
{
|
|
"epoch": 0.4365079365079365,
|
|
"grad_norm": 0.4654909902022295,
|
|
"learning_rate": 2.4852607709750566e-05,
|
|
"loss": 0.2397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22321158647537231,
|
|
"step": 275,
|
|
"valid_targets_mean": 4938.6,
|
|
"valid_targets_min": 554
|
|
},
|
|
{
|
|
"epoch": 0.4444444444444444,
|
|
"grad_norm": 0.47136311558318184,
|
|
"learning_rate": 2.5306122448979597e-05,
|
|
"loss": 0.256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25398164987564087,
|
|
"step": 280,
|
|
"valid_targets_mean": 5719.1,
|
|
"valid_targets_min": 1427
|
|
},
|
|
{
|
|
"epoch": 0.4523809523809524,
|
|
"grad_norm": 0.5143544056685512,
|
|
"learning_rate": 2.5759637188208618e-05,
|
|
"loss": 0.2538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28001242876052856,
|
|
"step": 285,
|
|
"valid_targets_mean": 4379.2,
|
|
"valid_targets_min": 275
|
|
},
|
|
{
|
|
"epoch": 0.4603174603174603,
|
|
"grad_norm": 0.40992433170257936,
|
|
"learning_rate": 2.6213151927437642e-05,
|
|
"loss": 0.2107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19944259524345398,
|
|
"step": 290,
|
|
"valid_targets_mean": 6113.2,
|
|
"valid_targets_min": 1496
|
|
},
|
|
{
|
|
"epoch": 0.46825396825396826,
|
|
"grad_norm": 0.7108778765163672,
|
|
"learning_rate": 2.6666666666666667e-05,
|
|
"loss": 0.2316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2676636576652527,
|
|
"step": 295,
|
|
"valid_targets_mean": 4878.0,
|
|
"valid_targets_min": 1397
|
|
},
|
|
{
|
|
"epoch": 0.47619047619047616,
|
|
"grad_norm": 0.7424501325702221,
|
|
"learning_rate": 2.7120181405895694e-05,
|
|
"loss": 0.2345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27534204721450806,
|
|
"step": 300,
|
|
"valid_targets_mean": 4737.1,
|
|
"valid_targets_min": 1195
|
|
},
|
|
{
|
|
"epoch": 0.48412698412698413,
|
|
"grad_norm": 0.4554032952042926,
|
|
"learning_rate": 2.757369614512472e-05,
|
|
"loss": 0.2423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23061871528625488,
|
|
"step": 305,
|
|
"valid_targets_mean": 5816.9,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 0.49206349206349204,
|
|
"grad_norm": 0.43918142641279273,
|
|
"learning_rate": 2.8027210884353743e-05,
|
|
"loss": 0.246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2266923487186432,
|
|
"step": 310,
|
|
"valid_targets_mean": 5512.4,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"grad_norm": 0.46398515425879433,
|
|
"learning_rate": 2.8480725623582767e-05,
|
|
"loss": 0.2234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21582689881324768,
|
|
"step": 315,
|
|
"valid_targets_mean": 5874.5,
|
|
"valid_targets_min": 1816
|
|
},
|
|
{
|
|
"epoch": 0.5079365079365079,
|
|
"grad_norm": 0.4043326688329922,
|
|
"learning_rate": 2.893424036281179e-05,
|
|
"loss": 0.2239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21339790523052216,
|
|
"step": 320,
|
|
"valid_targets_mean": 5820.1,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 0.5158730158730159,
|
|
"grad_norm": 0.502316882086023,
|
|
"learning_rate": 2.938775510204082e-05,
|
|
"loss": 0.2237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20219871401786804,
|
|
"step": 325,
|
|
"valid_targets_mean": 5578.6,
|
|
"valid_targets_min": 1540
|
|
},
|
|
{
|
|
"epoch": 0.5238095238095238,
|
|
"grad_norm": 0.9835515697061404,
|
|
"learning_rate": 2.9841269841269844e-05,
|
|
"loss": 0.2323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2298191487789154,
|
|
"step": 330,
|
|
"valid_targets_mean": 4911.0,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 0.5317460317460317,
|
|
"grad_norm": 0.4647735663644707,
|
|
"learning_rate": 3.0294784580498868e-05,
|
|
"loss": 0.2339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22781239449977875,
|
|
"step": 335,
|
|
"valid_targets_mean": 5532.8,
|
|
"valid_targets_min": 1535
|
|
},
|
|
{
|
|
"epoch": 0.5396825396825397,
|
|
"grad_norm": 0.5438389697935898,
|
|
"learning_rate": 3.074829931972789e-05,
|
|
"loss": 0.2375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22640562057495117,
|
|
"step": 340,
|
|
"valid_targets_mean": 4285.8,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 0.5476190476190477,
|
|
"grad_norm": 0.4334285919558895,
|
|
"learning_rate": 3.1201814058956924e-05,
|
|
"loss": 0.2399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21913409233093262,
|
|
"step": 345,
|
|
"valid_targets_mean": 5222.7,
|
|
"valid_targets_min": 285
|
|
},
|
|
{
|
|
"epoch": 0.5555555555555556,
|
|
"grad_norm": 0.4738405937337707,
|
|
"learning_rate": 3.1655328798185945e-05,
|
|
"loss": 0.2378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25035279989242554,
|
|
"step": 350,
|
|
"valid_targets_mean": 5209.8,
|
|
"valid_targets_min": 833
|
|
},
|
|
{
|
|
"epoch": 0.5634920634920635,
|
|
"grad_norm": 0.4555272170875938,
|
|
"learning_rate": 3.2108843537414965e-05,
|
|
"loss": 0.2223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21295906603336334,
|
|
"step": 355,
|
|
"valid_targets_mean": 4887.6,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 0.5714285714285714,
|
|
"grad_norm": 0.4607956603180466,
|
|
"learning_rate": 3.256235827664399e-05,
|
|
"loss": 0.2242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23422232270240784,
|
|
"step": 360,
|
|
"valid_targets_mean": 5982.9,
|
|
"valid_targets_min": 3791
|
|
},
|
|
{
|
|
"epoch": 0.5793650793650794,
|
|
"grad_norm": 0.46019446827222377,
|
|
"learning_rate": 3.3015873015873014e-05,
|
|
"loss": 0.234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21903209388256073,
|
|
"step": 365,
|
|
"valid_targets_mean": 5330.9,
|
|
"valid_targets_min": 2257
|
|
},
|
|
{
|
|
"epoch": 0.5873015873015873,
|
|
"grad_norm": 0.6042052076393369,
|
|
"learning_rate": 3.346938775510204e-05,
|
|
"loss": 0.2358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2550661861896515,
|
|
"step": 370,
|
|
"valid_targets_mean": 4556.8,
|
|
"valid_targets_min": 763
|
|
},
|
|
{
|
|
"epoch": 0.5952380952380952,
|
|
"grad_norm": 0.44853135001478606,
|
|
"learning_rate": 3.392290249433107e-05,
|
|
"loss": 0.2178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.202229842543602,
|
|
"step": 375,
|
|
"valid_targets_mean": 5618.4,
|
|
"valid_targets_min": 287
|
|
},
|
|
{
|
|
"epoch": 0.6031746031746031,
|
|
"grad_norm": 0.4114084841092905,
|
|
"learning_rate": 3.437641723356009e-05,
|
|
"loss": 0.2336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.208021879196167,
|
|
"step": 380,
|
|
"valid_targets_mean": 6323.6,
|
|
"valid_targets_min": 3070
|
|
},
|
|
{
|
|
"epoch": 0.6111111111111112,
|
|
"grad_norm": 0.4907016504547875,
|
|
"learning_rate": 3.482993197278912e-05,
|
|
"loss": 0.2096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2262699007987976,
|
|
"step": 385,
|
|
"valid_targets_mean": 5458.8,
|
|
"valid_targets_min": 1233
|
|
},
|
|
{
|
|
"epoch": 0.6190476190476191,
|
|
"grad_norm": 0.4470894371245897,
|
|
"learning_rate": 3.5283446712018146e-05,
|
|
"loss": 0.2147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2284412682056427,
|
|
"step": 390,
|
|
"valid_targets_mean": 5275.5,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 0.626984126984127,
|
|
"grad_norm": 0.8292394623205982,
|
|
"learning_rate": 3.573696145124717e-05,
|
|
"loss": 0.2177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23321247100830078,
|
|
"step": 395,
|
|
"valid_targets_mean": 4546.7,
|
|
"valid_targets_min": 1120
|
|
},
|
|
{
|
|
"epoch": 0.6349206349206349,
|
|
"grad_norm": 0.5101519952218712,
|
|
"learning_rate": 3.6190476190476195e-05,
|
|
"loss": 0.2029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22552572190761566,
|
|
"step": 400,
|
|
"valid_targets_mean": 4713.0,
|
|
"valid_targets_min": 403
|
|
},
|
|
{
|
|
"epoch": 0.6428571428571429,
|
|
"grad_norm": 0.4782175383328679,
|
|
"learning_rate": 3.6643990929705216e-05,
|
|
"loss": 0.2336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22066287696361542,
|
|
"step": 405,
|
|
"valid_targets_mean": 5557.4,
|
|
"valid_targets_min": 1431
|
|
},
|
|
{
|
|
"epoch": 0.6507936507936508,
|
|
"grad_norm": 0.6813010668498629,
|
|
"learning_rate": 3.7097505668934243e-05,
|
|
"loss": 0.2304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23460529744625092,
|
|
"step": 410,
|
|
"valid_targets_mean": 4301.2,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 0.6587301587301587,
|
|
"grad_norm": 0.42762135214795316,
|
|
"learning_rate": 3.755102040816327e-05,
|
|
"loss": 0.2218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22290262579917908,
|
|
"step": 415,
|
|
"valid_targets_mean": 5696.5,
|
|
"valid_targets_min": 1242
|
|
},
|
|
{
|
|
"epoch": 0.6666666666666666,
|
|
"grad_norm": 0.7113003892096883,
|
|
"learning_rate": 3.800453514739229e-05,
|
|
"loss": 0.2307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24848781526088715,
|
|
"step": 420,
|
|
"valid_targets_mean": 5861.1,
|
|
"valid_targets_min": 1829
|
|
},
|
|
{
|
|
"epoch": 0.6746031746031746,
|
|
"grad_norm": 0.4223786613285171,
|
|
"learning_rate": 3.845804988662132e-05,
|
|
"loss": 0.2361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23021358251571655,
|
|
"step": 425,
|
|
"valid_targets_mean": 5603.6,
|
|
"valid_targets_min": 2005
|
|
},
|
|
{
|
|
"epoch": 0.6825396825396826,
|
|
"grad_norm": 0.46482319003042916,
|
|
"learning_rate": 3.891156462585034e-05,
|
|
"loss": 0.2369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2257668375968933,
|
|
"step": 430,
|
|
"valid_targets_mean": 5665.9,
|
|
"valid_targets_min": 936
|
|
},
|
|
{
|
|
"epoch": 0.6904761904761905,
|
|
"grad_norm": 0.49517195170111594,
|
|
"learning_rate": 3.936507936507937e-05,
|
|
"loss": 0.2219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24368128180503845,
|
|
"step": 435,
|
|
"valid_targets_mean": 5276.9,
|
|
"valid_targets_min": 965
|
|
},
|
|
{
|
|
"epoch": 0.6984126984126984,
|
|
"grad_norm": 0.4533449896226927,
|
|
"learning_rate": 3.9818594104308396e-05,
|
|
"loss": 0.2227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2182699739933014,
|
|
"step": 440,
|
|
"valid_targets_mean": 5190.2,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 0.7063492063492064,
|
|
"grad_norm": 0.47604972418524916,
|
|
"learning_rate": 3.999994361288785e-05,
|
|
"loss": 0.2316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27464592456817627,
|
|
"step": 445,
|
|
"valid_targets_mean": 5377.2,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 0.7142857142857143,
|
|
"grad_norm": 0.4843309348827549,
|
|
"learning_rate": 3.9999599026131644e-05,
|
|
"loss": 0.2078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21664546430110931,
|
|
"step": 450,
|
|
"valid_targets_mean": 5402.9,
|
|
"valid_targets_min": 1359
|
|
},
|
|
{
|
|
"epoch": 0.7222222222222222,
|
|
"grad_norm": 0.4382635205661119,
|
|
"learning_rate": 3.999894118418342e-05,
|
|
"loss": 0.2176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22355255484580994,
|
|
"step": 455,
|
|
"valid_targets_mean": 5052.1,
|
|
"valid_targets_min": 2069
|
|
},
|
|
{
|
|
"epoch": 0.7301587301587301,
|
|
"grad_norm": 0.472980100407786,
|
|
"learning_rate": 3.999797009734697e-05,
|
|
"loss": 0.2286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22827665507793427,
|
|
"step": 460,
|
|
"valid_targets_mean": 5404.2,
|
|
"valid_targets_min": 1744
|
|
},
|
|
{
|
|
"epoch": 0.7380952380952381,
|
|
"grad_norm": 0.43558241143554155,
|
|
"learning_rate": 3.999668578083253e-05,
|
|
"loss": 0.2344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22950908541679382,
|
|
"step": 465,
|
|
"valid_targets_mean": 5786.8,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 0.746031746031746,
|
|
"grad_norm": 0.39301407866116134,
|
|
"learning_rate": 3.9995088254756434e-05,
|
|
"loss": 0.2171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20144161581993103,
|
|
"step": 470,
|
|
"valid_targets_mean": 6067.9,
|
|
"valid_targets_min": 3477
|
|
},
|
|
{
|
|
"epoch": 0.753968253968254,
|
|
"grad_norm": 0.4335866805247727,
|
|
"learning_rate": 3.999317754414084e-05,
|
|
"loss": 0.2162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24468550086021423,
|
|
"step": 475,
|
|
"valid_targets_mean": 5383.6,
|
|
"valid_targets_min": 297
|
|
},
|
|
{
|
|
"epoch": 0.7619047619047619,
|
|
"grad_norm": 0.43530076929025974,
|
|
"learning_rate": 3.999095367891337e-05,
|
|
"loss": 0.2064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2087598741054535,
|
|
"step": 480,
|
|
"valid_targets_mean": 5412.7,
|
|
"valid_targets_min": 241
|
|
},
|
|
{
|
|
"epoch": 0.7698412698412699,
|
|
"grad_norm": 0.42581385510950404,
|
|
"learning_rate": 3.9988416693906563e-05,
|
|
"loss": 0.2149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20418357849121094,
|
|
"step": 485,
|
|
"valid_targets_mean": 5400.4,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 0.7777777777777778,
|
|
"grad_norm": 0.6201401443531581,
|
|
"learning_rate": 3.9985566628857425e-05,
|
|
"loss": 0.2286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22904717922210693,
|
|
"step": 490,
|
|
"valid_targets_mean": 5283.9,
|
|
"valid_targets_min": 1641
|
|
},
|
|
{
|
|
"epoch": 0.7857142857142857,
|
|
"grad_norm": 0.40573600003328514,
|
|
"learning_rate": 3.998240352840672e-05,
|
|
"loss": 0.2369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19750481843948364,
|
|
"step": 495,
|
|
"valid_targets_mean": 5820.5,
|
|
"valid_targets_min": 2527
|
|
},
|
|
{
|
|
"epoch": 0.7936507936507936,
|
|
"grad_norm": 0.6282176812699868,
|
|
"learning_rate": 3.997892744209833e-05,
|
|
"loss": 0.2157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21052823960781097,
|
|
"step": 500,
|
|
"valid_targets_mean": 3286.4,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 0.8015873015873016,
|
|
"grad_norm": 0.4150664416041001,
|
|
"learning_rate": 3.997513842437845e-05,
|
|
"loss": 0.2168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20114190876483917,
|
|
"step": 505,
|
|
"valid_targets_mean": 6746.6,
|
|
"valid_targets_min": 3503
|
|
},
|
|
{
|
|
"epoch": 0.8095238095238095,
|
|
"grad_norm": 0.5580131451084269,
|
|
"learning_rate": 3.997103653459475e-05,
|
|
"loss": 0.2255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21351292729377747,
|
|
"step": 510,
|
|
"valid_targets_mean": 5427.2,
|
|
"valid_targets_min": 1310
|
|
},
|
|
{
|
|
"epoch": 0.8174603174603174,
|
|
"grad_norm": 0.4539249289508295,
|
|
"learning_rate": 3.996662183699541e-05,
|
|
"loss": 0.2029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22894957661628723,
|
|
"step": 515,
|
|
"valid_targets_mean": 6587.3,
|
|
"valid_targets_min": 2987
|
|
},
|
|
{
|
|
"epoch": 0.8253968253968254,
|
|
"grad_norm": 0.4606135797915741,
|
|
"learning_rate": 3.996189440072818e-05,
|
|
"loss": 0.2235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23994050920009613,
|
|
"step": 520,
|
|
"valid_targets_mean": 5603.4,
|
|
"valid_targets_min": 963
|
|
},
|
|
{
|
|
"epoch": 0.8333333333333334,
|
|
"grad_norm": 0.46485471964434133,
|
|
"learning_rate": 3.9956854299839246e-05,
|
|
"loss": 0.2399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24385607242584229,
|
|
"step": 525,
|
|
"valid_targets_mean": 5554.9,
|
|
"valid_targets_min": 1998
|
|
},
|
|
{
|
|
"epoch": 0.8412698412698413,
|
|
"grad_norm": 0.5070540327503203,
|
|
"learning_rate": 3.9951501613272076e-05,
|
|
"loss": 0.2238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2311379760503769,
|
|
"step": 530,
|
|
"valid_targets_mean": 5792.5,
|
|
"valid_targets_min": 2950
|
|
},
|
|
{
|
|
"epoch": 0.8492063492063492,
|
|
"grad_norm": 0.4509301237412979,
|
|
"learning_rate": 3.994583642486618e-05,
|
|
"loss": 0.2155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22014960646629333,
|
|
"step": 535,
|
|
"valid_targets_mean": 5551.4,
|
|
"valid_targets_min": 2454
|
|
},
|
|
{
|
|
"epoch": 0.8571428571428571,
|
|
"grad_norm": 0.37450581843880576,
|
|
"learning_rate": 3.993985882335584e-05,
|
|
"loss": 0.2102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17562690377235413,
|
|
"step": 540,
|
|
"valid_targets_mean": 6399.9,
|
|
"valid_targets_min": 865
|
|
},
|
|
{
|
|
"epoch": 0.8650793650793651,
|
|
"grad_norm": 0.4181520509392521,
|
|
"learning_rate": 3.993356890236866e-05,
|
|
"loss": 0.2178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1997915804386139,
|
|
"step": 545,
|
|
"valid_targets_mean": 5282.3,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 0.873015873015873,
|
|
"grad_norm": 0.40651126602493426,
|
|
"learning_rate": 3.992696676042414e-05,
|
|
"loss": 0.2135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20215590298175812,
|
|
"step": 550,
|
|
"valid_targets_mean": 6478.2,
|
|
"valid_targets_min": 1253
|
|
},
|
|
{
|
|
"epoch": 0.8809523809523809,
|
|
"grad_norm": 0.42510576646915893,
|
|
"learning_rate": 3.992005250093211e-05,
|
|
"loss": 0.2162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20907750725746155,
|
|
"step": 555,
|
|
"valid_targets_mean": 5629.7,
|
|
"valid_targets_min": 1539
|
|
},
|
|
{
|
|
"epoch": 0.8888888888888888,
|
|
"grad_norm": 0.4812454511222598,
|
|
"learning_rate": 3.991282623219113e-05,
|
|
"loss": 0.2149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22547686100006104,
|
|
"step": 560,
|
|
"valid_targets_mean": 4727.8,
|
|
"valid_targets_min": 1324
|
|
},
|
|
{
|
|
"epoch": 0.8968253968253969,
|
|
"grad_norm": 0.4203217407948123,
|
|
"learning_rate": 3.9905288067386776e-05,
|
|
"loss": 0.2062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21304470300674438,
|
|
"step": 565,
|
|
"valid_targets_mean": 5743.1,
|
|
"valid_targets_min": 2659
|
|
},
|
|
{
|
|
"epoch": 0.9047619047619048,
|
|
"grad_norm": 0.5536262458911827,
|
|
"learning_rate": 3.989743812458987e-05,
|
|
"loss": 0.2175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22015127539634705,
|
|
"step": 570,
|
|
"valid_targets_mean": 5742.7,
|
|
"valid_targets_min": 501
|
|
},
|
|
{
|
|
"epoch": 0.9126984126984127,
|
|
"grad_norm": 0.44923723778131375,
|
|
"learning_rate": 3.9889276526754664e-05,
|
|
"loss": 0.2187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2490832507610321,
|
|
"step": 575,
|
|
"valid_targets_mean": 5566.1,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 0.9206349206349206,
|
|
"grad_norm": 0.4833832932237736,
|
|
"learning_rate": 3.988080340171685e-05,
|
|
"loss": 0.2229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2266359031200409,
|
|
"step": 580,
|
|
"valid_targets_mean": 4794.4,
|
|
"valid_targets_min": 510
|
|
},
|
|
{
|
|
"epoch": 0.9285714285714286,
|
|
"grad_norm": 0.5314150951022422,
|
|
"learning_rate": 3.987201888219161e-05,
|
|
"loss": 0.2096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23443414270877838,
|
|
"step": 585,
|
|
"valid_targets_mean": 3981.2,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 0.9365079365079365,
|
|
"grad_norm": 0.44573566583926183,
|
|
"learning_rate": 3.986292310577153e-05,
|
|
"loss": 0.2096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1997448205947876,
|
|
"step": 590,
|
|
"valid_targets_mean": 5087.9,
|
|
"valid_targets_min": 256
|
|
},
|
|
{
|
|
"epoch": 0.9444444444444444,
|
|
"grad_norm": 0.4823118351901083,
|
|
"learning_rate": 3.9853516214924416e-05,
|
|
"loss": 0.2362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21190452575683594,
|
|
"step": 595,
|
|
"valid_targets_mean": 4769.4,
|
|
"valid_targets_min": 1789
|
|
},
|
|
{
|
|
"epoch": 0.9523809523809523,
|
|
"grad_norm": 0.9701252803781317,
|
|
"learning_rate": 3.9843798356991096e-05,
|
|
"loss": 0.2033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21730470657348633,
|
|
"step": 600,
|
|
"valid_targets_mean": 5590.2,
|
|
"valid_targets_min": 871
|
|
},
|
|
{
|
|
"epoch": 0.9603174603174603,
|
|
"grad_norm": 0.8528374645310175,
|
|
"learning_rate": 3.9833769684183104e-05,
|
|
"loss": 0.2378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25169217586517334,
|
|
"step": 605,
|
|
"valid_targets_mean": 5481.6,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 0.9682539682539683,
|
|
"grad_norm": 0.5719512687258413,
|
|
"learning_rate": 3.982343035358026e-05,
|
|
"loss": 0.2426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25562554597854614,
|
|
"step": 610,
|
|
"valid_targets_mean": 4258.5,
|
|
"valid_targets_min": 852
|
|
},
|
|
{
|
|
"epoch": 0.9761904761904762,
|
|
"grad_norm": 0.42417870906303506,
|
|
"learning_rate": 3.981278052712827e-05,
|
|
"loss": 0.2186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19715425372123718,
|
|
"step": 615,
|
|
"valid_targets_mean": 6383.4,
|
|
"valid_targets_min": 2557
|
|
},
|
|
{
|
|
"epoch": 0.9841269841269841,
|
|
"grad_norm": 0.5592731470756656,
|
|
"learning_rate": 3.9801820371636157e-05,
|
|
"loss": 0.2205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21826912462711334,
|
|
"step": 620,
|
|
"valid_targets_mean": 5242.9,
|
|
"valid_targets_min": 540
|
|
},
|
|
{
|
|
"epoch": 0.9920634920634921,
|
|
"grad_norm": 0.45536035098972394,
|
|
"learning_rate": 3.979055005877364e-05,
|
|
"loss": 0.2103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23044517636299133,
|
|
"step": 625,
|
|
"valid_targets_mean": 6434.3,
|
|
"valid_targets_min": 1909
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 0.45688834618759994,
|
|
"learning_rate": 3.977896976506845e-05,
|
|
"loss": 0.2262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20650140941143036,
|
|
"step": 630,
|
|
"valid_targets_mean": 4983.3,
|
|
"valid_targets_min": 920
|
|
},
|
|
{
|
|
"epoch": 1.007936507936508,
|
|
"grad_norm": 0.45783302985716545,
|
|
"learning_rate": 3.976707967190358e-05,
|
|
"loss": 0.2009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1878741830587387,
|
|
"step": 635,
|
|
"valid_targets_mean": 6036.8,
|
|
"valid_targets_min": 1419
|
|
},
|
|
{
|
|
"epoch": 1.0158730158730158,
|
|
"grad_norm": 0.3696615133926402,
|
|
"learning_rate": 3.9754879965514456e-05,
|
|
"loss": 0.2187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17767128348350525,
|
|
"step": 640,
|
|
"valid_targets_mean": 6949.4,
|
|
"valid_targets_min": 1382
|
|
},
|
|
{
|
|
"epoch": 1.0238095238095237,
|
|
"grad_norm": 0.5301335563601449,
|
|
"learning_rate": 3.9742370836985956e-05,
|
|
"loss": 0.1955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18509696424007416,
|
|
"step": 645,
|
|
"valid_targets_mean": 5665.9,
|
|
"valid_targets_min": 3373
|
|
},
|
|
{
|
|
"epoch": 1.0317460317460316,
|
|
"grad_norm": 0.45339150425457114,
|
|
"learning_rate": 3.972955248224949e-05,
|
|
"loss": 0.1957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19851231575012207,
|
|
"step": 650,
|
|
"valid_targets_mean": 5496.2,
|
|
"valid_targets_min": 1700
|
|
},
|
|
{
|
|
"epoch": 1.0396825396825398,
|
|
"grad_norm": 0.4563819518280238,
|
|
"learning_rate": 3.971642510207989e-05,
|
|
"loss": 0.1999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20360763370990753,
|
|
"step": 655,
|
|
"valid_targets_mean": 5350.4,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 1.0476190476190477,
|
|
"grad_norm": 0.5625847542647227,
|
|
"learning_rate": 3.9702988902092274e-05,
|
|
"loss": 0.2,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1898064911365509,
|
|
"step": 660,
|
|
"valid_targets_mean": 5748.2,
|
|
"valid_targets_min": 1540
|
|
},
|
|
{
|
|
"epoch": 1.0555555555555556,
|
|
"grad_norm": 0.425299703122137,
|
|
"learning_rate": 3.968924409273884e-05,
|
|
"loss": 0.2059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19356761872768402,
|
|
"step": 665,
|
|
"valid_targets_mean": 5209.3,
|
|
"valid_targets_min": 1985
|
|
},
|
|
{
|
|
"epoch": 1.0634920634920635,
|
|
"grad_norm": 0.47286769247229504,
|
|
"learning_rate": 3.9675190889305545e-05,
|
|
"loss": 0.2005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1817602813243866,
|
|
"step": 670,
|
|
"valid_targets_mean": 5672.6,
|
|
"valid_targets_min": 959
|
|
},
|
|
{
|
|
"epoch": 1.0714285714285714,
|
|
"grad_norm": 0.432773210737844,
|
|
"learning_rate": 3.966082951190874e-05,
|
|
"loss": 0.1861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2104063630104065,
|
|
"step": 675,
|
|
"valid_targets_mean": 6248.0,
|
|
"valid_targets_min": 1230
|
|
},
|
|
{
|
|
"epoch": 1.0793650793650793,
|
|
"grad_norm": 0.4706439453673335,
|
|
"learning_rate": 3.9646160185491756e-05,
|
|
"loss": 0.201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18827101588249207,
|
|
"step": 680,
|
|
"valid_targets_mean": 4876.6,
|
|
"valid_targets_min": 1043
|
|
},
|
|
{
|
|
"epoch": 1.0873015873015872,
|
|
"grad_norm": 0.5338851223858002,
|
|
"learning_rate": 3.963118313982131e-05,
|
|
"loss": 0.1941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18436592817306519,
|
|
"step": 685,
|
|
"valid_targets_mean": 4361.0,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 1.0952380952380953,
|
|
"grad_norm": 0.41520798715060503,
|
|
"learning_rate": 3.961589860948399e-05,
|
|
"loss": 0.1961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21202310919761658,
|
|
"step": 690,
|
|
"valid_targets_mean": 5486.4,
|
|
"valid_targets_min": 1888
|
|
},
|
|
{
|
|
"epoch": 1.1031746031746033,
|
|
"grad_norm": 0.42676794941537016,
|
|
"learning_rate": 3.960030683388251e-05,
|
|
"loss": 0.2056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17921066284179688,
|
|
"step": 695,
|
|
"valid_targets_mean": 5590.2,
|
|
"valid_targets_min": 217
|
|
},
|
|
{
|
|
"epoch": 1.1111111111111112,
|
|
"grad_norm": 0.40164984647020757,
|
|
"learning_rate": 3.9584408057232e-05,
|
|
"loss": 0.1928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17369890213012695,
|
|
"step": 700,
|
|
"valid_targets_mean": 5022.5,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 1.119047619047619,
|
|
"grad_norm": 0.4557865055131249,
|
|
"learning_rate": 3.956820252855618e-05,
|
|
"loss": 0.207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21178218722343445,
|
|
"step": 705,
|
|
"valid_targets_mean": 5212.6,
|
|
"valid_targets_min": 1650
|
|
},
|
|
{
|
|
"epoch": 1.126984126984127,
|
|
"grad_norm": 0.4737440529809228,
|
|
"learning_rate": 3.955169050168343e-05,
|
|
"loss": 0.1974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18235670030117035,
|
|
"step": 710,
|
|
"valid_targets_mean": 4897.6,
|
|
"valid_targets_min": 1233
|
|
},
|
|
{
|
|
"epoch": 1.1349206349206349,
|
|
"grad_norm": 0.36301667189206566,
|
|
"learning_rate": 3.953487223524283e-05,
|
|
"loss": 0.1911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15816360712051392,
|
|
"step": 715,
|
|
"valid_targets_mean": 6158.5,
|
|
"valid_targets_min": 2000
|
|
},
|
|
{
|
|
"epoch": 1.1428571428571428,
|
|
"grad_norm": 0.4512962324237001,
|
|
"learning_rate": 3.951774799266014e-05,
|
|
"loss": 0.1978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1824280321598053,
|
|
"step": 720,
|
|
"valid_targets_mean": 5561.8,
|
|
"valid_targets_min": 2560
|
|
},
|
|
{
|
|
"epoch": 1.1507936507936507,
|
|
"grad_norm": 0.4511979075727613,
|
|
"learning_rate": 3.950031804215364e-05,
|
|
"loss": 0.1984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2006044089794159,
|
|
"step": 725,
|
|
"valid_targets_mean": 5573.5,
|
|
"valid_targets_min": 1633
|
|
},
|
|
{
|
|
"epoch": 1.1587301587301586,
|
|
"grad_norm": 0.42676374430180725,
|
|
"learning_rate": 3.948258265672991e-05,
|
|
"loss": 0.2072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21262499690055847,
|
|
"step": 730,
|
|
"valid_targets_mean": 5894.2,
|
|
"valid_targets_min": 513
|
|
},
|
|
{
|
|
"epoch": 1.1666666666666667,
|
|
"grad_norm": 0.669542399200941,
|
|
"learning_rate": 3.946454211417961e-05,
|
|
"loss": 0.1946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20334014296531677,
|
|
"step": 735,
|
|
"valid_targets_mean": 5074.4,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 1.1746031746031746,
|
|
"grad_norm": 0.45162646285557456,
|
|
"learning_rate": 3.944619669707309e-05,
|
|
"loss": 0.2193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.215205579996109,
|
|
"step": 740,
|
|
"valid_targets_mean": 5439.4,
|
|
"valid_targets_min": 1907
|
|
},
|
|
{
|
|
"epoch": 1.1825396825396826,
|
|
"grad_norm": 0.4710256071640483,
|
|
"learning_rate": 3.9427546692755946e-05,
|
|
"loss": 0.2082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21397551894187927,
|
|
"step": 745,
|
|
"valid_targets_mean": 5276.3,
|
|
"valid_targets_min": 1098
|
|
},
|
|
{
|
|
"epoch": 1.1904761904761905,
|
|
"grad_norm": 0.4625751950531012,
|
|
"learning_rate": 3.9408592393344596e-05,
|
|
"loss": 0.2041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19521868228912354,
|
|
"step": 750,
|
|
"valid_targets_mean": 5905.4,
|
|
"valid_targets_min": 2861
|
|
},
|
|
{
|
|
"epoch": 1.1984126984126984,
|
|
"grad_norm": 0.4677554812825755,
|
|
"learning_rate": 3.9389334095721606e-05,
|
|
"loss": 0.2025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21653307974338531,
|
|
"step": 755,
|
|
"valid_targets_mean": 4797.6,
|
|
"valid_targets_min": 895
|
|
},
|
|
{
|
|
"epoch": 1.2063492063492063,
|
|
"grad_norm": 0.546242046347798,
|
|
"learning_rate": 3.936977210153113e-05,
|
|
"loss": 0.1932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20400574803352356,
|
|
"step": 760,
|
|
"valid_targets_mean": 5519.1,
|
|
"valid_targets_min": 896
|
|
},
|
|
{
|
|
"epoch": 1.2142857142857142,
|
|
"grad_norm": 0.4204392123363325,
|
|
"learning_rate": 3.93499067171741e-05,
|
|
"loss": 0.2077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22943107783794403,
|
|
"step": 765,
|
|
"valid_targets_mean": 6420.3,
|
|
"valid_targets_min": 1329
|
|
},
|
|
{
|
|
"epoch": 1.2222222222222223,
|
|
"grad_norm": 0.4682155865822684,
|
|
"learning_rate": 3.932973825380351e-05,
|
|
"loss": 0.1994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22972063720226288,
|
|
"step": 770,
|
|
"valid_targets_mean": 4946.4,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 1.2301587301587302,
|
|
"grad_norm": 0.5301139897940557,
|
|
"learning_rate": 3.9309267027319485e-05,
|
|
"loss": 0.2052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23306161165237427,
|
|
"step": 775,
|
|
"valid_targets_mean": 3961.6,
|
|
"valid_targets_min": 222
|
|
},
|
|
{
|
|
"epoch": 1.2380952380952381,
|
|
"grad_norm": 0.40365821128282314,
|
|
"learning_rate": 3.928849335836435e-05,
|
|
"loss": 0.1902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18574738502502441,
|
|
"step": 780,
|
|
"valid_targets_mean": 5806.0,
|
|
"valid_targets_min": 2320
|
|
},
|
|
{
|
|
"epoch": 1.246031746031746,
|
|
"grad_norm": 0.47813886067964756,
|
|
"learning_rate": 3.926741757231761e-05,
|
|
"loss": 0.1966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21044926345348358,
|
|
"step": 785,
|
|
"valid_targets_mean": 5299.6,
|
|
"valid_targets_min": 1723
|
|
},
|
|
{
|
|
"epoch": 1.253968253968254,
|
|
"grad_norm": 0.40253903041192324,
|
|
"learning_rate": 3.924603999929086e-05,
|
|
"loss": 0.2169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2096974402666092,
|
|
"step": 790,
|
|
"valid_targets_mean": 5558.8,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 1.2619047619047619,
|
|
"grad_norm": 0.4549298029596003,
|
|
"learning_rate": 3.9224360974122584e-05,
|
|
"loss": 0.2109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23242492973804474,
|
|
"step": 795,
|
|
"valid_targets_mean": 5270.6,
|
|
"valid_targets_min": 2212
|
|
},
|
|
{
|
|
"epoch": 1.2698412698412698,
|
|
"grad_norm": 0.4695471820608219,
|
|
"learning_rate": 3.920238083637297e-05,
|
|
"loss": 0.199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20831601321697235,
|
|
"step": 800,
|
|
"valid_targets_mean": 5308.8,
|
|
"valid_targets_min": 883
|
|
},
|
|
{
|
|
"epoch": 1.2777777777777777,
|
|
"grad_norm": 0.7642610362979803,
|
|
"learning_rate": 3.9180099930318524e-05,
|
|
"loss": 0.2037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1991654336452484,
|
|
"step": 805,
|
|
"valid_targets_mean": 6815.1,
|
|
"valid_targets_min": 1462
|
|
},
|
|
{
|
|
"epoch": 1.2857142857142856,
|
|
"grad_norm": 0.4440164128697404,
|
|
"learning_rate": 3.915751860494672e-05,
|
|
"loss": 0.1893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16493773460388184,
|
|
"step": 810,
|
|
"valid_targets_mean": 5028.6,
|
|
"valid_targets_min": 397
|
|
},
|
|
{
|
|
"epoch": 1.2936507936507937,
|
|
"grad_norm": 0.43322694583144405,
|
|
"learning_rate": 3.913463721395051e-05,
|
|
"loss": 0.2092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1892578899860382,
|
|
"step": 815,
|
|
"valid_targets_mean": 4793.7,
|
|
"valid_targets_min": 343
|
|
},
|
|
{
|
|
"epoch": 1.3015873015873016,
|
|
"grad_norm": 0.4113857371999088,
|
|
"learning_rate": 3.911145611572282e-05,
|
|
"loss": 0.2014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22850877046585083,
|
|
"step": 820,
|
|
"valid_targets_mean": 6205.9,
|
|
"valid_targets_min": 3119
|
|
},
|
|
{
|
|
"epoch": 1.3095238095238095,
|
|
"grad_norm": 0.4024454753442706,
|
|
"learning_rate": 3.908797567335089e-05,
|
|
"loss": 0.2043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1995319277048111,
|
|
"step": 825,
|
|
"valid_targets_mean": 6545.4,
|
|
"valid_targets_min": 2059
|
|
},
|
|
{
|
|
"epoch": 1.3174603174603174,
|
|
"grad_norm": 0.4766793634475361,
|
|
"learning_rate": 3.906419625461062e-05,
|
|
"loss": 0.1979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21162061393260956,
|
|
"step": 830,
|
|
"valid_targets_mean": 5270.3,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 1.3253968253968254,
|
|
"grad_norm": 0.43124302476576,
|
|
"learning_rate": 3.90401182319608e-05,
|
|
"loss": 0.2112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2024182230234146,
|
|
"step": 835,
|
|
"valid_targets_mean": 5587.5,
|
|
"valid_targets_min": 946
|
|
},
|
|
{
|
|
"epoch": 1.3333333333333333,
|
|
"grad_norm": 0.37620838473431895,
|
|
"learning_rate": 3.9015741982537265e-05,
|
|
"loss": 0.1894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16263701021671295,
|
|
"step": 840,
|
|
"valid_targets_mean": 6486.5,
|
|
"valid_targets_min": 777
|
|
},
|
|
{
|
|
"epoch": 1.3412698412698414,
|
|
"grad_norm": 0.4227366879146679,
|
|
"learning_rate": 3.899106788814701e-05,
|
|
"loss": 0.2324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19696393609046936,
|
|
"step": 845,
|
|
"valid_targets_mean": 5981.3,
|
|
"valid_targets_min": 1252
|
|
},
|
|
{
|
|
"epoch": 1.3492063492063493,
|
|
"grad_norm": 0.4396285787879683,
|
|
"learning_rate": 3.896609633526219e-05,
|
|
"loss": 0.213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21186983585357666,
|
|
"step": 850,
|
|
"valid_targets_mean": 5076.4,
|
|
"valid_targets_min": 525
|
|
},
|
|
{
|
|
"epoch": 1.3571428571428572,
|
|
"grad_norm": 0.4809038294139755,
|
|
"learning_rate": 3.894082771501407e-05,
|
|
"loss": 0.2076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.191837877035141,
|
|
"step": 855,
|
|
"valid_targets_mean": 5344.6,
|
|
"valid_targets_min": 3615
|
|
},
|
|
{
|
|
"epoch": 1.3650793650793651,
|
|
"grad_norm": 0.48473797503352833,
|
|
"learning_rate": 3.891526242318692e-05,
|
|
"loss": 0.1953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16904303431510925,
|
|
"step": 860,
|
|
"valid_targets_mean": 4677.6,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 1.373015873015873,
|
|
"grad_norm": 0.3848141194705589,
|
|
"learning_rate": 3.8889400860211785e-05,
|
|
"loss": 0.2045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19609695672988892,
|
|
"step": 865,
|
|
"valid_targets_mean": 5797.4,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 1.380952380952381,
|
|
"grad_norm": 0.43935568195512203,
|
|
"learning_rate": 3.886324343116023e-05,
|
|
"loss": 0.2004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21925979852676392,
|
|
"step": 870,
|
|
"valid_targets_mean": 5510.4,
|
|
"valid_targets_min": 2795
|
|
},
|
|
{
|
|
"epoch": 1.3888888888888888,
|
|
"grad_norm": 0.3787284592232413,
|
|
"learning_rate": 3.883679054573799e-05,
|
|
"loss": 0.1925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17324265837669373,
|
|
"step": 875,
|
|
"valid_targets_mean": 6371.8,
|
|
"valid_targets_min": 1851
|
|
},
|
|
{
|
|
"epoch": 1.3968253968253967,
|
|
"grad_norm": 0.48231311291457907,
|
|
"learning_rate": 3.881004261827856e-05,
|
|
"loss": 0.2057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22266656160354614,
|
|
"step": 880,
|
|
"valid_targets_mean": 5407.8,
|
|
"valid_targets_min": 247
|
|
},
|
|
{
|
|
"epoch": 1.4047619047619047,
|
|
"grad_norm": 0.39562200883867676,
|
|
"learning_rate": 3.878300006773669e-05,
|
|
"loss": 0.1996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19309642910957336,
|
|
"step": 885,
|
|
"valid_targets_mean": 6324.1,
|
|
"valid_targets_min": 3843
|
|
},
|
|
{
|
|
"epoch": 1.4126984126984126,
|
|
"grad_norm": 0.4536286972970636,
|
|
"learning_rate": 3.875566331768184e-05,
|
|
"loss": 0.1989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.205659881234169,
|
|
"step": 890,
|
|
"valid_targets_mean": 4451.7,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 1.4206349206349207,
|
|
"grad_norm": 0.40917484623660716,
|
|
"learning_rate": 3.872803279629155e-05,
|
|
"loss": 0.1922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21330603957176208,
|
|
"step": 895,
|
|
"valid_targets_mean": 5915.2,
|
|
"valid_targets_min": 2792
|
|
},
|
|
{
|
|
"epoch": 1.4285714285714286,
|
|
"grad_norm": 0.44208512274029477,
|
|
"learning_rate": 3.8700108936344705e-05,
|
|
"loss": 0.1981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21972954273223877,
|
|
"step": 900,
|
|
"valid_targets_mean": 4555.5,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 1.4365079365079365,
|
|
"grad_norm": 0.44093255819884136,
|
|
"learning_rate": 3.867189217521477e-05,
|
|
"loss": 0.2056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20141087472438812,
|
|
"step": 905,
|
|
"valid_targets_mean": 5635.6,
|
|
"valid_targets_min": 2966
|
|
},
|
|
{
|
|
"epoch": 1.4444444444444444,
|
|
"grad_norm": 0.4623153556330511,
|
|
"learning_rate": 3.864338295486297e-05,
|
|
"loss": 0.2042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21589690446853638,
|
|
"step": 910,
|
|
"valid_targets_mean": 4678.0,
|
|
"valid_targets_min": 514
|
|
},
|
|
{
|
|
"epoch": 1.4523809523809523,
|
|
"grad_norm": 0.41249982388927653,
|
|
"learning_rate": 3.8614581721831316e-05,
|
|
"loss": 0.2027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2026449292898178,
|
|
"step": 915,
|
|
"valid_targets_mean": 5216.7,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 1.4603174603174602,
|
|
"grad_norm": 0.4104480770890901,
|
|
"learning_rate": 3.858548892723563e-05,
|
|
"loss": 0.1958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1731051206588745,
|
|
"step": 920,
|
|
"valid_targets_mean": 5339.4,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 1.4682539682539684,
|
|
"grad_norm": 0.41895380180198477,
|
|
"learning_rate": 3.855610502675851e-05,
|
|
"loss": 0.1987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2031707614660263,
|
|
"step": 925,
|
|
"valid_targets_mean": 5263.9,
|
|
"valid_targets_min": 1601
|
|
},
|
|
{
|
|
"epoch": 1.4761904761904763,
|
|
"grad_norm": 0.4261994122248901,
|
|
"learning_rate": 3.852643048064215e-05,
|
|
"loss": 0.2095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2084016501903534,
|
|
"step": 930,
|
|
"valid_targets_mean": 5398.7,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 1.4841269841269842,
|
|
"grad_norm": 0.44461914330875957,
|
|
"learning_rate": 3.8496465753681145e-05,
|
|
"loss": 0.1898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.203968346118927,
|
|
"step": 935,
|
|
"valid_targets_mean": 5705.4,
|
|
"valid_targets_min": 293
|
|
},
|
|
{
|
|
"epoch": 1.492063492063492,
|
|
"grad_norm": 0.3619697764513735,
|
|
"learning_rate": 3.846621131521522e-05,
|
|
"loss": 0.1942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16397273540496826,
|
|
"step": 940,
|
|
"valid_targets_mean": 5586.3,
|
|
"valid_targets_min": 603
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"grad_norm": 0.47927964908668164,
|
|
"learning_rate": 3.843566763912187e-05,
|
|
"loss": 0.1938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19876304268836975,
|
|
"step": 945,
|
|
"valid_targets_mean": 5481.6,
|
|
"valid_targets_min": 2470
|
|
},
|
|
{
|
|
"epoch": 1.507936507936508,
|
|
"grad_norm": 0.43177345256816557,
|
|
"learning_rate": 3.840483520380896e-05,
|
|
"loss": 0.1979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19520597159862518,
|
|
"step": 950,
|
|
"valid_targets_mean": 5171.9,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 1.5158730158730158,
|
|
"grad_norm": 0.45814725913527155,
|
|
"learning_rate": 3.837371449220717e-05,
|
|
"loss": 0.1975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.191867396235466,
|
|
"step": 955,
|
|
"valid_targets_mean": 6292.2,
|
|
"valid_targets_min": 3359
|
|
},
|
|
{
|
|
"epoch": 1.5238095238095237,
|
|
"grad_norm": 0.3660046705130465,
|
|
"learning_rate": 3.834230599176251e-05,
|
|
"loss": 0.1888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16699504852294922,
|
|
"step": 960,
|
|
"valid_targets_mean": 6213.4,
|
|
"valid_targets_min": 2488
|
|
},
|
|
{
|
|
"epoch": 1.5317460317460316,
|
|
"grad_norm": 0.3890844674038831,
|
|
"learning_rate": 3.831061019442864e-05,
|
|
"loss": 0.2056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20834946632385254,
|
|
"step": 965,
|
|
"valid_targets_mean": 5682.8,
|
|
"valid_targets_min": 855
|
|
},
|
|
{
|
|
"epoch": 1.5396825396825395,
|
|
"grad_norm": 0.442290989827857,
|
|
"learning_rate": 3.827862759665916e-05,
|
|
"loss": 0.2217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2266271859407425,
|
|
"step": 970,
|
|
"valid_targets_mean": 5248.5,
|
|
"valid_targets_min": 1995
|
|
},
|
|
{
|
|
"epoch": 1.5476190476190477,
|
|
"grad_norm": 0.4156925522612072,
|
|
"learning_rate": 3.8246358699399853e-05,
|
|
"loss": 0.1998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16675469279289246,
|
|
"step": 975,
|
|
"valid_targets_mean": 4952.9,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 1.5555555555555556,
|
|
"grad_norm": 0.37604931667074504,
|
|
"learning_rate": 3.8213804008080824e-05,
|
|
"loss": 0.1981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18879935145378113,
|
|
"step": 980,
|
|
"valid_targets_mean": 6674.0,
|
|
"valid_targets_min": 3077
|
|
},
|
|
{
|
|
"epoch": 1.5634920634920635,
|
|
"grad_norm": 0.4287194995251398,
|
|
"learning_rate": 3.818096403260862e-05,
|
|
"loss": 0.1951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20438791811466217,
|
|
"step": 985,
|
|
"valid_targets_mean": 5784.4,
|
|
"valid_targets_min": 1565
|
|
},
|
|
{
|
|
"epoch": 1.5714285714285714,
|
|
"grad_norm": 0.44022259559851695,
|
|
"learning_rate": 3.8147839287358185e-05,
|
|
"loss": 0.2104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21569909155368805,
|
|
"step": 990,
|
|
"valid_targets_mean": 5262.6,
|
|
"valid_targets_min": 1953
|
|
},
|
|
{
|
|
"epoch": 1.5793650793650795,
|
|
"grad_norm": 0.466156003317299,
|
|
"learning_rate": 3.8114430291164836e-05,
|
|
"loss": 0.1833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1877652108669281,
|
|
"step": 995,
|
|
"valid_targets_mean": 4195.9,
|
|
"valid_targets_min": 361
|
|
},
|
|
{
|
|
"epoch": 1.5873015873015874,
|
|
"grad_norm": 0.38334955845745405,
|
|
"learning_rate": 3.808073756731615e-05,
|
|
"loss": 0.1842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1762232482433319,
|
|
"step": 1000,
|
|
"valid_targets_mean": 5545.6,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 1.5952380952380953,
|
|
"grad_norm": 0.5439994931547266,
|
|
"learning_rate": 3.8046761643543734e-05,
|
|
"loss": 0.207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1913871467113495,
|
|
"step": 1005,
|
|
"valid_targets_mean": 4491.5,
|
|
"valid_targets_min": 369
|
|
},
|
|
{
|
|
"epoch": 1.6031746031746033,
|
|
"grad_norm": 0.40075365186274436,
|
|
"learning_rate": 3.8012503052014996e-05,
|
|
"loss": 0.2202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1946803331375122,
|
|
"step": 1010,
|
|
"valid_targets_mean": 5425.6,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 1.6111111111111112,
|
|
"grad_norm": 0.3877028365107908,
|
|
"learning_rate": 3.797796232932476e-05,
|
|
"loss": 0.203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21146497130393982,
|
|
"step": 1015,
|
|
"valid_targets_mean": 6453.6,
|
|
"valid_targets_min": 2839
|
|
},
|
|
{
|
|
"epoch": 1.619047619047619,
|
|
"grad_norm": 0.3668527782681026,
|
|
"learning_rate": 3.794314001648692e-05,
|
|
"loss": 0.2022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18072551488876343,
|
|
"step": 1020,
|
|
"valid_targets_mean": 6010.3,
|
|
"valid_targets_min": 2661
|
|
},
|
|
{
|
|
"epoch": 1.626984126984127,
|
|
"grad_norm": 0.44781816078627307,
|
|
"learning_rate": 3.7908036658925926e-05,
|
|
"loss": 0.1919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2001311182975769,
|
|
"step": 1025,
|
|
"valid_targets_mean": 5515.9,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 1.6349206349206349,
|
|
"grad_norm": 0.40397196550171016,
|
|
"learning_rate": 3.787265280646825e-05,
|
|
"loss": 0.1921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19972187280654907,
|
|
"step": 1030,
|
|
"valid_targets_mean": 5461.7,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 1.6428571428571428,
|
|
"grad_norm": 0.4356586536795959,
|
|
"learning_rate": 3.7836989013333776e-05,
|
|
"loss": 0.2041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1890064775943756,
|
|
"step": 1035,
|
|
"valid_targets_mean": 5751.4,
|
|
"valid_targets_min": 211
|
|
},
|
|
{
|
|
"epoch": 1.6507936507936507,
|
|
"grad_norm": 0.44136645672562447,
|
|
"learning_rate": 3.780104583812712e-05,
|
|
"loss": 0.1952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1881527304649353,
|
|
"step": 1040,
|
|
"valid_targets_mean": 5342.1,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 1.6587301587301586,
|
|
"grad_norm": 0.4490662637016875,
|
|
"learning_rate": 3.7764823843828883e-05,
|
|
"loss": 0.1933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20181840658187866,
|
|
"step": 1045,
|
|
"valid_targets_mean": 4998.9,
|
|
"valid_targets_min": 1099
|
|
},
|
|
{
|
|
"epoch": 1.6666666666666665,
|
|
"grad_norm": 0.414814689156328,
|
|
"learning_rate": 3.7728323597786834e-05,
|
|
"loss": 0.193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2113458514213562,
|
|
"step": 1050,
|
|
"valid_targets_mean": 5317.9,
|
|
"valid_targets_min": 912
|
|
},
|
|
{
|
|
"epoch": 1.6746031746031746,
|
|
"grad_norm": 0.4002735439554692,
|
|
"learning_rate": 3.7691545671707007e-05,
|
|
"loss": 0.2019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19339720904827118,
|
|
"step": 1055,
|
|
"valid_targets_mean": 5467.6,
|
|
"valid_targets_min": 304
|
|
},
|
|
{
|
|
"epoch": 1.6825396825396826,
|
|
"grad_norm": 0.34935950618209505,
|
|
"learning_rate": 3.765449064164477e-05,
|
|
"loss": 0.1992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16387102007865906,
|
|
"step": 1060,
|
|
"valid_targets_mean": 5961.0,
|
|
"valid_targets_min": 1397
|
|
},
|
|
{
|
|
"epoch": 1.6904761904761905,
|
|
"grad_norm": 0.4617875758265747,
|
|
"learning_rate": 3.7617159087995784e-05,
|
|
"loss": 0.1952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20451588928699493,
|
|
"step": 1065,
|
|
"valid_targets_mean": 5329.4,
|
|
"valid_targets_min": 1754
|
|
},
|
|
{
|
|
"epoch": 1.6984126984126984,
|
|
"grad_norm": 0.43000065083291655,
|
|
"learning_rate": 3.757955159548693e-05,
|
|
"loss": 0.2123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19835901260375977,
|
|
"step": 1070,
|
|
"valid_targets_mean": 5593.4,
|
|
"valid_targets_min": 1554
|
|
},
|
|
{
|
|
"epoch": 1.7063492063492065,
|
|
"grad_norm": 0.3606371311330236,
|
|
"learning_rate": 3.754166875316713e-05,
|
|
"loss": 0.1868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18690404295921326,
|
|
"step": 1075,
|
|
"valid_targets_mean": 6369.9,
|
|
"valid_targets_min": 2694
|
|
},
|
|
{
|
|
"epoch": 1.7142857142857144,
|
|
"grad_norm": 0.39285827605092777,
|
|
"learning_rate": 3.750351115439812e-05,
|
|
"loss": 0.1994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20067650079727173,
|
|
"step": 1080,
|
|
"valid_targets_mean": 5648.6,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 1.7222222222222223,
|
|
"grad_norm": 0.38570908603943815,
|
|
"learning_rate": 3.746507939684519e-05,
|
|
"loss": 0.1901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19070516526699066,
|
|
"step": 1085,
|
|
"valid_targets_mean": 6492.1,
|
|
"valid_targets_min": 3263
|
|
},
|
|
{
|
|
"epoch": 1.7301587301587302,
|
|
"grad_norm": 0.39910170101731635,
|
|
"learning_rate": 3.742637408246779e-05,
|
|
"loss": 0.2,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21158373355865479,
|
|
"step": 1090,
|
|
"valid_targets_mean": 6281.1,
|
|
"valid_targets_min": 1277
|
|
},
|
|
{
|
|
"epoch": 1.7380952380952381,
|
|
"grad_norm": 0.49874739628178993,
|
|
"learning_rate": 3.73873958175101e-05,
|
|
"loss": 0.2025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20216044783592224,
|
|
"step": 1095,
|
|
"valid_targets_mean": 4570.4,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 1.746031746031746,
|
|
"grad_norm": 0.47674802821687573,
|
|
"learning_rate": 3.734814521249156e-05,
|
|
"loss": 0.2124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21278969943523407,
|
|
"step": 1100,
|
|
"valid_targets_mean": 6688.0,
|
|
"valid_targets_min": 4913
|
|
},
|
|
{
|
|
"epoch": 1.753968253968254,
|
|
"grad_norm": 0.4208345610037382,
|
|
"learning_rate": 3.7308622882197294e-05,
|
|
"loss": 0.2021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20342180132865906,
|
|
"step": 1105,
|
|
"valid_targets_mean": 4974.2,
|
|
"valid_targets_min": 1373
|
|
},
|
|
{
|
|
"epoch": 1.7619047619047619,
|
|
"grad_norm": 0.5036978972360446,
|
|
"learning_rate": 3.7268829445668456e-05,
|
|
"loss": 0.1913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19987335801124573,
|
|
"step": 1110,
|
|
"valid_targets_mean": 4656.1,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 1.7698412698412698,
|
|
"grad_norm": 0.4475822251477478,
|
|
"learning_rate": 3.722876552619257e-05,
|
|
"loss": 0.1953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1833523064851761,
|
|
"step": 1115,
|
|
"valid_targets_mean": 5454.4,
|
|
"valid_targets_min": 1320
|
|
},
|
|
{
|
|
"epoch": 1.7777777777777777,
|
|
"grad_norm": 0.42301407783978096,
|
|
"learning_rate": 3.718843175129378e-05,
|
|
"loss": 0.1906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1914646029472351,
|
|
"step": 1120,
|
|
"valid_targets_mean": 6252.5,
|
|
"valid_targets_min": 899
|
|
},
|
|
{
|
|
"epoch": 1.7857142857142856,
|
|
"grad_norm": 0.3667105929189479,
|
|
"learning_rate": 3.7147828752722944e-05,
|
|
"loss": 0.1872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18096204102039337,
|
|
"step": 1125,
|
|
"valid_targets_mean": 5728.2,
|
|
"valid_targets_min": 1408
|
|
},
|
|
{
|
|
"epoch": 1.7936507936507935,
|
|
"grad_norm": 0.3988857634933404,
|
|
"learning_rate": 3.7106957166447834e-05,
|
|
"loss": 0.187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20091086626052856,
|
|
"step": 1130,
|
|
"valid_targets_mean": 6323.2,
|
|
"valid_targets_min": 2797
|
|
},
|
|
{
|
|
"epoch": 1.8015873015873016,
|
|
"grad_norm": 0.4353965199064458,
|
|
"learning_rate": 3.7065817632643115e-05,
|
|
"loss": 0.1983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2031416893005371,
|
|
"step": 1135,
|
|
"valid_targets_mean": 5034.0,
|
|
"valid_targets_min": 1545
|
|
},
|
|
{
|
|
"epoch": 1.8095238095238095,
|
|
"grad_norm": 0.3835363845414605,
|
|
"learning_rate": 3.7024410795680326e-05,
|
|
"loss": 0.1964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20650315284729004,
|
|
"step": 1140,
|
|
"valid_targets_mean": 5709.4,
|
|
"valid_targets_min": 2868
|
|
},
|
|
{
|
|
"epoch": 1.8174603174603174,
|
|
"grad_norm": 0.6017933902148771,
|
|
"learning_rate": 3.698273730411782e-05,
|
|
"loss": 0.1895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21874158084392548,
|
|
"step": 1145,
|
|
"valid_targets_mean": 4664.8,
|
|
"valid_targets_min": 1051
|
|
},
|
|
{
|
|
"epoch": 1.8253968253968254,
|
|
"grad_norm": 0.5010674614611315,
|
|
"learning_rate": 3.694079781069053e-05,
|
|
"loss": 0.1978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22206932306289673,
|
|
"step": 1150,
|
|
"valid_targets_mean": 5182.3,
|
|
"valid_targets_min": 1754
|
|
},
|
|
{
|
|
"epoch": 1.8333333333333335,
|
|
"grad_norm": 0.3923636627441356,
|
|
"learning_rate": 3.6898592972299875e-05,
|
|
"loss": 0.2019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17895236611366272,
|
|
"step": 1155,
|
|
"valid_targets_mean": 5307.1,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 1.8412698412698414,
|
|
"grad_norm": 0.46097320064253466,
|
|
"learning_rate": 3.6856123450003306e-05,
|
|
"loss": 0.1934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17629393935203552,
|
|
"step": 1160,
|
|
"valid_targets_mean": 5396.4,
|
|
"valid_targets_min": 1717
|
|
},
|
|
{
|
|
"epoch": 1.8492063492063493,
|
|
"grad_norm": 0.4118263198386797,
|
|
"learning_rate": 3.68133899090041e-05,
|
|
"loss": 0.1974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21603277325630188,
|
|
"step": 1165,
|
|
"valid_targets_mean": 5743.1,
|
|
"valid_targets_min": 1944
|
|
},
|
|
{
|
|
"epoch": 1.8571428571428572,
|
|
"grad_norm": 0.40887713694857647,
|
|
"learning_rate": 3.677039301864085e-05,
|
|
"loss": 0.1862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21704469621181488,
|
|
"step": 1170,
|
|
"valid_targets_mean": 5467.9,
|
|
"valid_targets_min": 1922
|
|
},
|
|
{
|
|
"epoch": 1.8650793650793651,
|
|
"grad_norm": 0.3934682878408651,
|
|
"learning_rate": 3.672713345237701e-05,
|
|
"loss": 0.199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19257432222366333,
|
|
"step": 1175,
|
|
"valid_targets_mean": 5539.5,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 1.873015873015873,
|
|
"grad_norm": 0.40297241825152724,
|
|
"learning_rate": 3.6683611887790356e-05,
|
|
"loss": 0.1949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22532138228416443,
|
|
"step": 1180,
|
|
"valid_targets_mean": 5603.6,
|
|
"valid_targets_min": 839
|
|
},
|
|
{
|
|
"epoch": 1.880952380952381,
|
|
"grad_norm": 0.4525568905524082,
|
|
"learning_rate": 3.663982900656236e-05,
|
|
"loss": 0.1941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21516132354736328,
|
|
"step": 1185,
|
|
"valid_targets_mean": 4871.1,
|
|
"valid_targets_min": 852
|
|
},
|
|
{
|
|
"epoch": 1.8888888888888888,
|
|
"grad_norm": 0.3826095420283405,
|
|
"learning_rate": 3.6595785494467516e-05,
|
|
"loss": 0.2033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2126571089029312,
|
|
"step": 1190,
|
|
"valid_targets_mean": 6149.8,
|
|
"valid_targets_min": 322
|
|
},
|
|
{
|
|
"epoch": 1.8968253968253967,
|
|
"grad_norm": 0.49491876878445984,
|
|
"learning_rate": 3.655148204136259e-05,
|
|
"loss": 0.2157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1980828046798706,
|
|
"step": 1195,
|
|
"valid_targets_mean": 4812.1,
|
|
"valid_targets_min": 355
|
|
},
|
|
{
|
|
"epoch": 1.9047619047619047,
|
|
"grad_norm": 0.3844425255192722,
|
|
"learning_rate": 3.650691934117584e-05,
|
|
"loss": 0.1961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17985425889492035,
|
|
"step": 1200,
|
|
"valid_targets_mean": 5473.3,
|
|
"valid_targets_min": 2775
|
|
},
|
|
{
|
|
"epoch": 1.9126984126984126,
|
|
"grad_norm": 0.4025013315745311,
|
|
"learning_rate": 3.646209809189611e-05,
|
|
"loss": 0.1911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2079595923423767,
|
|
"step": 1205,
|
|
"valid_targets_mean": 5829.4,
|
|
"valid_targets_min": 2090
|
|
},
|
|
{
|
|
"epoch": 1.9206349206349205,
|
|
"grad_norm": 0.3718395023938511,
|
|
"learning_rate": 3.641701899556192e-05,
|
|
"loss": 0.1909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20109693706035614,
|
|
"step": 1210,
|
|
"valid_targets_mean": 5616.2,
|
|
"valid_targets_min": 447
|
|
},
|
|
{
|
|
"epoch": 1.9285714285714286,
|
|
"grad_norm": 0.41192663980117594,
|
|
"learning_rate": 3.63716827582505e-05,
|
|
"loss": 0.2066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19556891918182373,
|
|
"step": 1215,
|
|
"valid_targets_mean": 5405.4,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 1.9365079365079365,
|
|
"grad_norm": 0.4061502796105446,
|
|
"learning_rate": 3.632609009006665e-05,
|
|
"loss": 0.1876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1885574758052826,
|
|
"step": 1220,
|
|
"valid_targets_mean": 4830.4,
|
|
"valid_targets_min": 1806
|
|
},
|
|
{
|
|
"epoch": 1.9444444444444444,
|
|
"grad_norm": 0.394449808870156,
|
|
"learning_rate": 3.62802417051317e-05,
|
|
"loss": 0.1974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1800915151834488,
|
|
"step": 1225,
|
|
"valid_targets_mean": 5700.7,
|
|
"valid_targets_min": 403
|
|
},
|
|
{
|
|
"epoch": 1.9523809523809523,
|
|
"grad_norm": 0.4440079869221486,
|
|
"learning_rate": 3.6234138321572274e-05,
|
|
"loss": 0.1999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17726945877075195,
|
|
"step": 1230,
|
|
"valid_targets_mean": 5161.5,
|
|
"valid_targets_min": 1498
|
|
},
|
|
{
|
|
"epoch": 1.9603174603174605,
|
|
"grad_norm": 0.4620529228104198,
|
|
"learning_rate": 3.6187780661509074e-05,
|
|
"loss": 0.2106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22657737135887146,
|
|
"step": 1235,
|
|
"valid_targets_mean": 5007.8,
|
|
"valid_targets_min": 1843
|
|
},
|
|
{
|
|
"epoch": 1.9682539682539684,
|
|
"grad_norm": 0.4536224923690941,
|
|
"learning_rate": 3.6141169451045526e-05,
|
|
"loss": 0.2038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1943274736404419,
|
|
"step": 1240,
|
|
"valid_targets_mean": 4127.1,
|
|
"valid_targets_min": 391
|
|
},
|
|
{
|
|
"epoch": 1.9761904761904763,
|
|
"grad_norm": 0.46913089325037155,
|
|
"learning_rate": 3.609430542025646e-05,
|
|
"loss": 0.1917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17612981796264648,
|
|
"step": 1245,
|
|
"valid_targets_mean": 5872.5,
|
|
"valid_targets_min": 1386
|
|
},
|
|
{
|
|
"epoch": 1.9841269841269842,
|
|
"grad_norm": 0.40006665679686376,
|
|
"learning_rate": 3.604718930317664e-05,
|
|
"loss": 0.1935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21723848581314087,
|
|
"step": 1250,
|
|
"valid_targets_mean": 5627.9,
|
|
"valid_targets_min": 1877
|
|
},
|
|
{
|
|
"epoch": 1.992063492063492,
|
|
"grad_norm": 0.4171450102000576,
|
|
"learning_rate": 3.5999821837789275e-05,
|
|
"loss": 0.1974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17687958478927612,
|
|
"step": 1255,
|
|
"valid_targets_mean": 4820.7,
|
|
"valid_targets_min": 275
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 0.39376114464436146,
|
|
"learning_rate": 3.595220376601447e-05,
|
|
"loss": 0.1858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16747349500656128,
|
|
"step": 1260,
|
|
"valid_targets_mean": 5956.8,
|
|
"valid_targets_min": 510
|
|
},
|
|
{
|
|
"epoch": 2.007936507936508,
|
|
"grad_norm": 0.42367770753786105,
|
|
"learning_rate": 3.590433583369758e-05,
|
|
"loss": 0.1755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.164157897233963,
|
|
"step": 1265,
|
|
"valid_targets_mean": 6732.4,
|
|
"valid_targets_min": 3077
|
|
},
|
|
{
|
|
"epoch": 2.015873015873016,
|
|
"grad_norm": 0.43491398653549607,
|
|
"learning_rate": 3.5856218790597554e-05,
|
|
"loss": 0.1756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16858386993408203,
|
|
"step": 1270,
|
|
"valid_targets_mean": 4635.7,
|
|
"valid_targets_min": 267
|
|
},
|
|
{
|
|
"epoch": 2.0238095238095237,
|
|
"grad_norm": 0.4336346848049278,
|
|
"learning_rate": 3.580785339037519e-05,
|
|
"loss": 0.1762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1782153844833374,
|
|
"step": 1275,
|
|
"valid_targets_mean": 5509.9,
|
|
"valid_targets_min": 2820
|
|
},
|
|
{
|
|
"epoch": 2.0317460317460316,
|
|
"grad_norm": 0.4219922314179232,
|
|
"learning_rate": 3.57592403905813e-05,
|
|
"loss": 0.1801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17715319991111755,
|
|
"step": 1280,
|
|
"valid_targets_mean": 5722.5,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 2.0396825396825395,
|
|
"grad_norm": 0.3940209379170676,
|
|
"learning_rate": 3.571038055264489e-05,
|
|
"loss": 0.188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16919057071208954,
|
|
"step": 1285,
|
|
"valid_targets_mean": 5806.7,
|
|
"valid_targets_min": 2137
|
|
},
|
|
{
|
|
"epoch": 2.0476190476190474,
|
|
"grad_norm": 0.9513050625567275,
|
|
"learning_rate": 3.566127464186119e-05,
|
|
"loss": 0.1672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17070958018302917,
|
|
"step": 1290,
|
|
"valid_targets_mean": 6109.1,
|
|
"valid_targets_min": 2815
|
|
},
|
|
{
|
|
"epoch": 2.0555555555555554,
|
|
"grad_norm": 0.39178871800874376,
|
|
"learning_rate": 3.56119234273797e-05,
|
|
"loss": 0.1696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15980932116508484,
|
|
"step": 1295,
|
|
"valid_targets_mean": 6312.2,
|
|
"valid_targets_min": 2200
|
|
},
|
|
{
|
|
"epoch": 2.0634920634920633,
|
|
"grad_norm": 0.4545939454742837,
|
|
"learning_rate": 3.5562327682192134e-05,
|
|
"loss": 0.184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17266622185707092,
|
|
"step": 1300,
|
|
"valid_targets_mean": 5758.9,
|
|
"valid_targets_min": 322
|
|
},
|
|
{
|
|
"epoch": 2.0714285714285716,
|
|
"grad_norm": 0.40752366743563423,
|
|
"learning_rate": 3.5512488183120286e-05,
|
|
"loss": 0.1772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18903039395809174,
|
|
"step": 1305,
|
|
"valid_targets_mean": 6031.5,
|
|
"valid_targets_min": 552
|
|
},
|
|
{
|
|
"epoch": 2.0793650793650795,
|
|
"grad_norm": 0.43154212786267104,
|
|
"learning_rate": 3.54624057108039e-05,
|
|
"loss": 0.1707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1685219407081604,
|
|
"step": 1310,
|
|
"valid_targets_mean": 4359.1,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 2.0873015873015874,
|
|
"grad_norm": 0.41368733303715804,
|
|
"learning_rate": 3.5412081049688444e-05,
|
|
"loss": 0.1761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1651342809200287,
|
|
"step": 1315,
|
|
"valid_targets_mean": 5127.6,
|
|
"valid_targets_min": 1605
|
|
},
|
|
{
|
|
"epoch": 2.0952380952380953,
|
|
"grad_norm": 0.38589454302426857,
|
|
"learning_rate": 3.5361514988012774e-05,
|
|
"loss": 0.1771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1480841338634491,
|
|
"step": 1320,
|
|
"valid_targets_mean": 5874.6,
|
|
"valid_targets_min": 1503
|
|
},
|
|
{
|
|
"epoch": 2.1031746031746033,
|
|
"grad_norm": 0.43713259076728145,
|
|
"learning_rate": 3.5310708317796844e-05,
|
|
"loss": 0.1794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16304585337638855,
|
|
"step": 1325,
|
|
"valid_targets_mean": 5606.6,
|
|
"valid_targets_min": 2620
|
|
},
|
|
{
|
|
"epoch": 2.111111111111111,
|
|
"grad_norm": 0.5149351729670725,
|
|
"learning_rate": 3.5259661834829266e-05,
|
|
"loss": 0.1915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17917974293231964,
|
|
"step": 1330,
|
|
"valid_targets_mean": 6228.2,
|
|
"valid_targets_min": 1503
|
|
},
|
|
{
|
|
"epoch": 2.119047619047619,
|
|
"grad_norm": 0.3861389835108437,
|
|
"learning_rate": 3.5208376338654866e-05,
|
|
"loss": 0.1766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16448023915290833,
|
|
"step": 1335,
|
|
"valid_targets_mean": 6666.1,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 2.126984126984127,
|
|
"grad_norm": 0.38207111066871835,
|
|
"learning_rate": 3.515685263256214e-05,
|
|
"loss": 0.1881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17809657752513885,
|
|
"step": 1340,
|
|
"valid_targets_mean": 6127.4,
|
|
"valid_targets_min": 1544
|
|
},
|
|
{
|
|
"epoch": 2.134920634920635,
|
|
"grad_norm": 0.4127592358303544,
|
|
"learning_rate": 3.51050915235707e-05,
|
|
"loss": 0.1683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18220895528793335,
|
|
"step": 1345,
|
|
"valid_targets_mean": 5229.9,
|
|
"valid_targets_min": 864
|
|
},
|
|
{
|
|
"epoch": 2.142857142857143,
|
|
"grad_norm": 0.42540313592591483,
|
|
"learning_rate": 3.5053093822418596e-05,
|
|
"loss": 0.1862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1991414576768875,
|
|
"step": 1350,
|
|
"valid_targets_mean": 5462.2,
|
|
"valid_targets_min": 2257
|
|
},
|
|
{
|
|
"epoch": 2.1507936507936507,
|
|
"grad_norm": 0.39687138441854536,
|
|
"learning_rate": 3.500086034354966e-05,
|
|
"loss": 0.1874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20256337523460388,
|
|
"step": 1355,
|
|
"valid_targets_mean": 5684.8,
|
|
"valid_targets_min": 2415
|
|
},
|
|
{
|
|
"epoch": 2.1587301587301586,
|
|
"grad_norm": 0.44932267888477845,
|
|
"learning_rate": 3.494839190510071e-05,
|
|
"loss": 0.1882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17283421754837036,
|
|
"step": 1360,
|
|
"valid_targets_mean": 4960.4,
|
|
"valid_targets_min": 2345
|
|
},
|
|
{
|
|
"epoch": 2.1666666666666665,
|
|
"grad_norm": 0.49624084160009224,
|
|
"learning_rate": 3.489568932888877e-05,
|
|
"loss": 0.1798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1903117299079895,
|
|
"step": 1365,
|
|
"valid_targets_mean": 5084.8,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 2.1746031746031744,
|
|
"grad_norm": 0.42424536754472875,
|
|
"learning_rate": 3.484275344039815e-05,
|
|
"loss": 0.1773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1761750876903534,
|
|
"step": 1370,
|
|
"valid_targets_mean": 4764.8,
|
|
"valid_targets_min": 310
|
|
},
|
|
{
|
|
"epoch": 2.1825396825396823,
|
|
"grad_norm": 0.3930655272562859,
|
|
"learning_rate": 3.478958506876759e-05,
|
|
"loss": 0.1769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16021452844142914,
|
|
"step": 1375,
|
|
"valid_targets_mean": 5342.4,
|
|
"valid_targets_min": 2886
|
|
},
|
|
{
|
|
"epoch": 2.1904761904761907,
|
|
"grad_norm": 0.5253004636054942,
|
|
"learning_rate": 3.47361850467772e-05,
|
|
"loss": 0.1835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1926204413175583,
|
|
"step": 1380,
|
|
"valid_targets_mean": 5126.9,
|
|
"valid_targets_min": 2220
|
|
},
|
|
{
|
|
"epoch": 2.1984126984126986,
|
|
"grad_norm": 0.3913488618062026,
|
|
"learning_rate": 3.468255421083546e-05,
|
|
"loss": 0.1869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2079114466905594,
|
|
"step": 1385,
|
|
"valid_targets_mean": 6658.1,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 2.2063492063492065,
|
|
"grad_norm": 0.4343501732005746,
|
|
"learning_rate": 3.46286934009661e-05,
|
|
"loss": 0.1826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1819835603237152,
|
|
"step": 1390,
|
|
"valid_targets_mean": 4903.1,
|
|
"valid_targets_min": 1946
|
|
},
|
|
{
|
|
"epoch": 2.2142857142857144,
|
|
"grad_norm": 0.4642383235525826,
|
|
"learning_rate": 3.457460346079495e-05,
|
|
"loss": 0.174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1960304081439972,
|
|
"step": 1395,
|
|
"valid_targets_mean": 4859.3,
|
|
"valid_targets_min": 1756
|
|
},
|
|
{
|
|
"epoch": 2.2222222222222223,
|
|
"grad_norm": 0.39883822754099246,
|
|
"learning_rate": 3.452028523753673e-05,
|
|
"loss": 0.1821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1751224845647812,
|
|
"step": 1400,
|
|
"valid_targets_mean": 5698.9,
|
|
"valid_targets_min": 1531
|
|
},
|
|
{
|
|
"epoch": 2.2301587301587302,
|
|
"grad_norm": 0.37010389401431576,
|
|
"learning_rate": 3.446573958198176e-05,
|
|
"loss": 0.1812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18461816012859344,
|
|
"step": 1405,
|
|
"valid_targets_mean": 6230.6,
|
|
"valid_targets_min": 1700
|
|
},
|
|
{
|
|
"epoch": 2.238095238095238,
|
|
"grad_norm": 0.6181478614629903,
|
|
"learning_rate": 3.4410967348482666e-05,
|
|
"loss": 0.1769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14911070466041565,
|
|
"step": 1410,
|
|
"valid_targets_mean": 4602.1,
|
|
"valid_targets_min": 1798
|
|
},
|
|
{
|
|
"epoch": 2.246031746031746,
|
|
"grad_norm": 0.43108429750485655,
|
|
"learning_rate": 3.435596939494098e-05,
|
|
"loss": 0.1786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1552506387233734,
|
|
"step": 1415,
|
|
"valid_targets_mean": 5343.7,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 2.253968253968254,
|
|
"grad_norm": 0.3655618940959395,
|
|
"learning_rate": 3.430074658279369e-05,
|
|
"loss": 0.1686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16736620664596558,
|
|
"step": 1420,
|
|
"valid_targets_mean": 6760.8,
|
|
"valid_targets_min": 4142
|
|
},
|
|
{
|
|
"epoch": 2.261904761904762,
|
|
"grad_norm": 0.43470898698901705,
|
|
"learning_rate": 3.424529977699977e-05,
|
|
"loss": 0.1747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18593010306358337,
|
|
"step": 1425,
|
|
"valid_targets_mean": 5456.4,
|
|
"valid_targets_min": 938
|
|
},
|
|
{
|
|
"epoch": 2.2698412698412698,
|
|
"grad_norm": 0.4012757385555316,
|
|
"learning_rate": 3.418962984602661e-05,
|
|
"loss": 0.1893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2028789222240448,
|
|
"step": 1430,
|
|
"valid_targets_mean": 6217.8,
|
|
"valid_targets_min": 2538
|
|
},
|
|
{
|
|
"epoch": 2.2777777777777777,
|
|
"grad_norm": 0.422277687599635,
|
|
"learning_rate": 3.413373766183646e-05,
|
|
"loss": 0.1766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17681989073753357,
|
|
"step": 1435,
|
|
"valid_targets_mean": 5242.7,
|
|
"valid_targets_min": 1133
|
|
},
|
|
{
|
|
"epoch": 2.2857142857142856,
|
|
"grad_norm": 0.4610732403739008,
|
|
"learning_rate": 3.40776240998727e-05,
|
|
"loss": 0.1881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19924741983413696,
|
|
"step": 1440,
|
|
"valid_targets_mean": 5244.7,
|
|
"valid_targets_min": 878
|
|
},
|
|
{
|
|
"epoch": 2.2936507936507935,
|
|
"grad_norm": 0.4230850293753159,
|
|
"learning_rate": 3.4021290039046184e-05,
|
|
"loss": 0.1939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18189997971057892,
|
|
"step": 1445,
|
|
"valid_targets_mean": 5056.2,
|
|
"valid_targets_min": 287
|
|
},
|
|
{
|
|
"epoch": 2.3015873015873014,
|
|
"grad_norm": 0.4282446015574262,
|
|
"learning_rate": 3.396473636172146e-05,
|
|
"loss": 0.1823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1614503115415573,
|
|
"step": 1450,
|
|
"valid_targets_mean": 5646.3,
|
|
"valid_targets_min": 1748
|
|
},
|
|
{
|
|
"epoch": 2.3095238095238093,
|
|
"grad_norm": 0.35393446023512337,
|
|
"learning_rate": 3.390796395370294e-05,
|
|
"loss": 0.1662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1533820927143097,
|
|
"step": 1455,
|
|
"valid_targets_mean": 6412.6,
|
|
"valid_targets_min": 930
|
|
},
|
|
{
|
|
"epoch": 2.317460317460317,
|
|
"grad_norm": 0.4006705141341369,
|
|
"learning_rate": 3.385097370422102e-05,
|
|
"loss": 0.1705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17715230584144592,
|
|
"step": 1460,
|
|
"valid_targets_mean": 5679.0,
|
|
"valid_targets_min": 1403
|
|
},
|
|
{
|
|
"epoch": 2.3253968253968256,
|
|
"grad_norm": 0.41667317090980344,
|
|
"learning_rate": 3.3793766505918185e-05,
|
|
"loss": 0.1827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18403995037078857,
|
|
"step": 1465,
|
|
"valid_targets_mean": 5691.4,
|
|
"valid_targets_min": 1804
|
|
},
|
|
{
|
|
"epoch": 2.3333333333333335,
|
|
"grad_norm": 0.4438049911035154,
|
|
"learning_rate": 3.3736343254834994e-05,
|
|
"loss": 0.1741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16705909371376038,
|
|
"step": 1470,
|
|
"valid_targets_mean": 5163.1,
|
|
"valid_targets_min": 514
|
|
},
|
|
{
|
|
"epoch": 2.3412698412698414,
|
|
"grad_norm": 0.41724254225439533,
|
|
"learning_rate": 3.3678704850396045e-05,
|
|
"loss": 0.1803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20384517312049866,
|
|
"step": 1475,
|
|
"valid_targets_mean": 5862.2,
|
|
"valid_targets_min": 328
|
|
},
|
|
{
|
|
"epoch": 2.3492063492063493,
|
|
"grad_norm": 0.4965895506653012,
|
|
"learning_rate": 3.362085219539592e-05,
|
|
"loss": 0.1903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19839468598365784,
|
|
"step": 1480,
|
|
"valid_targets_mean": 5439.9,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 2.357142857142857,
|
|
"grad_norm": 0.38336740154664095,
|
|
"learning_rate": 3.3562786195985025e-05,
|
|
"loss": 0.1821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.190034419298172,
|
|
"step": 1485,
|
|
"valid_targets_mean": 6053.7,
|
|
"valid_targets_min": 3073
|
|
},
|
|
{
|
|
"epoch": 2.365079365079365,
|
|
"grad_norm": 0.42799053268182136,
|
|
"learning_rate": 3.350450776165535e-05,
|
|
"loss": 0.1843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17199914157390594,
|
|
"step": 1490,
|
|
"valid_targets_mean": 5109.0,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 2.373015873015873,
|
|
"grad_norm": 0.49042106827448145,
|
|
"learning_rate": 3.344601780522634e-05,
|
|
"loss": 0.1783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18558023869991302,
|
|
"step": 1495,
|
|
"valid_targets_mean": 4502.5,
|
|
"valid_targets_min": 1358
|
|
},
|
|
{
|
|
"epoch": 2.380952380952381,
|
|
"grad_norm": 0.452178036330889,
|
|
"learning_rate": 3.3387317242830466e-05,
|
|
"loss": 0.1873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17586269974708557,
|
|
"step": 1500,
|
|
"valid_targets_mean": 5095.8,
|
|
"valid_targets_min": 271
|
|
},
|
|
{
|
|
"epoch": 2.388888888888889,
|
|
"grad_norm": 0.4167028985093456,
|
|
"learning_rate": 3.332840699389897e-05,
|
|
"loss": 0.1721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16293631494045258,
|
|
"step": 1505,
|
|
"valid_targets_mean": 5093.2,
|
|
"valid_targets_min": 1199
|
|
},
|
|
{
|
|
"epoch": 2.3968253968253967,
|
|
"grad_norm": 0.4134999974854722,
|
|
"learning_rate": 3.32692879811474e-05,
|
|
"loss": 0.178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16504386067390442,
|
|
"step": 1510,
|
|
"valid_targets_mean": 4788.2,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 2.4047619047619047,
|
|
"grad_norm": 0.38629332618116435,
|
|
"learning_rate": 3.320996113056123e-05,
|
|
"loss": 0.1768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16470927000045776,
|
|
"step": 1515,
|
|
"valid_targets_mean": 5480.1,
|
|
"valid_targets_min": 1382
|
|
},
|
|
{
|
|
"epoch": 2.4126984126984126,
|
|
"grad_norm": 0.4517489878934338,
|
|
"learning_rate": 3.315042737138128e-05,
|
|
"loss": 0.1833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19131216406822205,
|
|
"step": 1520,
|
|
"valid_targets_mean": 5177.0,
|
|
"valid_targets_min": 1427
|
|
},
|
|
{
|
|
"epoch": 2.4206349206349205,
|
|
"grad_norm": 0.3912437196261749,
|
|
"learning_rate": 3.309068763608919e-05,
|
|
"loss": 0.1629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15866073966026306,
|
|
"step": 1525,
|
|
"valid_targets_mean": 4809.5,
|
|
"valid_targets_min": 304
|
|
},
|
|
{
|
|
"epoch": 2.4285714285714284,
|
|
"grad_norm": 0.4287081470079961,
|
|
"learning_rate": 3.303074286039285e-05,
|
|
"loss": 0.1818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18889503180980682,
|
|
"step": 1530,
|
|
"valid_targets_mean": 5050.8,
|
|
"valid_targets_min": 1050
|
|
},
|
|
{
|
|
"epoch": 2.4365079365079367,
|
|
"grad_norm": 0.5226823839069293,
|
|
"learning_rate": 3.2970593983211694e-05,
|
|
"loss": 0.1766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17266133427619934,
|
|
"step": 1535,
|
|
"valid_targets_mean": 5716.3,
|
|
"valid_targets_min": 2116
|
|
},
|
|
{
|
|
"epoch": 2.4444444444444446,
|
|
"grad_norm": 0.4142977777917709,
|
|
"learning_rate": 3.2910241946661993e-05,
|
|
"loss": 0.178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17832788825035095,
|
|
"step": 1540,
|
|
"valid_targets_mean": 5296.9,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 2.4523809523809526,
|
|
"grad_norm": 0.4094380830442922,
|
|
"learning_rate": 3.2849687696042165e-05,
|
|
"loss": 0.1714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16501125693321228,
|
|
"step": 1545,
|
|
"valid_targets_mean": 5243.6,
|
|
"valid_targets_min": 337
|
|
},
|
|
{
|
|
"epoch": 2.4603174603174605,
|
|
"grad_norm": 0.5264844857969575,
|
|
"learning_rate": 3.2788932179817886e-05,
|
|
"loss": 0.1907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19089126586914062,
|
|
"step": 1550,
|
|
"valid_targets_mean": 4228.1,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 2.4682539682539684,
|
|
"grad_norm": 0.3870078917146876,
|
|
"learning_rate": 3.2727976349607276e-05,
|
|
"loss": 0.1742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16617675125598907,
|
|
"step": 1555,
|
|
"valid_targets_mean": 5233.7,
|
|
"valid_targets_min": 283
|
|
},
|
|
{
|
|
"epoch": 2.4761904761904763,
|
|
"grad_norm": 0.37140129207143047,
|
|
"learning_rate": 3.266682116016599e-05,
|
|
"loss": 0.1785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15645205974578857,
|
|
"step": 1560,
|
|
"valid_targets_mean": 6038.5,
|
|
"valid_targets_min": 858
|
|
},
|
|
{
|
|
"epoch": 2.484126984126984,
|
|
"grad_norm": 0.3972434967156975,
|
|
"learning_rate": 3.260546756937227e-05,
|
|
"loss": 0.1742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1872669756412506,
|
|
"step": 1565,
|
|
"valid_targets_mean": 5819.3,
|
|
"valid_targets_min": 343
|
|
},
|
|
{
|
|
"epoch": 2.492063492063492,
|
|
"grad_norm": 0.41540248633079907,
|
|
"learning_rate": 3.254391653821192e-05,
|
|
"loss": 0.1736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1847609579563141,
|
|
"step": 1570,
|
|
"valid_targets_mean": 5872.5,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"grad_norm": 0.4326289822516213,
|
|
"learning_rate": 3.248216903076328e-05,
|
|
"loss": 0.1833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20463158190250397,
|
|
"step": 1575,
|
|
"valid_targets_mean": 5289.8,
|
|
"valid_targets_min": 279
|
|
},
|
|
{
|
|
"epoch": 2.507936507936508,
|
|
"grad_norm": 0.5002103630542671,
|
|
"learning_rate": 3.24202260141821e-05,
|
|
"loss": 0.1849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19607800245285034,
|
|
"step": 1580,
|
|
"valid_targets_mean": 4856.6,
|
|
"valid_targets_min": 377
|
|
},
|
|
{
|
|
"epoch": 2.515873015873016,
|
|
"grad_norm": 0.44088426161151917,
|
|
"learning_rate": 3.235808845868641e-05,
|
|
"loss": 0.1911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17973065376281738,
|
|
"step": 1585,
|
|
"valid_targets_mean": 5105.2,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 2.5238095238095237,
|
|
"grad_norm": 0.41586874944924196,
|
|
"learning_rate": 3.229575733754132e-05,
|
|
"loss": 0.1736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18156777322292328,
|
|
"step": 1590,
|
|
"valid_targets_mean": 5101.9,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 2.5317460317460316,
|
|
"grad_norm": 0.3977154984604151,
|
|
"learning_rate": 3.2233233627043765e-05,
|
|
"loss": 0.1802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19697993993759155,
|
|
"step": 1595,
|
|
"valid_targets_mean": 5949.2,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 2.5396825396825395,
|
|
"grad_norm": 0.3893767013034315,
|
|
"learning_rate": 3.217051830650722e-05,
|
|
"loss": 0.1788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1648177206516266,
|
|
"step": 1600,
|
|
"valid_targets_mean": 5405.4,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 2.5476190476190474,
|
|
"grad_norm": 0.4012855625129347,
|
|
"learning_rate": 3.210761235824639e-05,
|
|
"loss": 0.1955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1662881076335907,
|
|
"step": 1605,
|
|
"valid_targets_mean": 5683.6,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 2.5555555555555554,
|
|
"grad_norm": 0.41653714911973017,
|
|
"learning_rate": 3.204451676756175e-05,
|
|
"loss": 0.1768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1690150797367096,
|
|
"step": 1610,
|
|
"valid_targets_mean": 5188.4,
|
|
"valid_targets_min": 2353
|
|
},
|
|
{
|
|
"epoch": 2.5634920634920633,
|
|
"grad_norm": 0.43105631877085077,
|
|
"learning_rate": 3.198123252272419e-05,
|
|
"loss": 0.1789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1893271803855896,
|
|
"step": 1615,
|
|
"valid_targets_mean": 5241.9,
|
|
"valid_targets_min": 2285
|
|
},
|
|
{
|
|
"epoch": 2.571428571428571,
|
|
"grad_norm": 0.45967281455616704,
|
|
"learning_rate": 3.1917760614959505e-05,
|
|
"loss": 0.1795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19677485525608063,
|
|
"step": 1620,
|
|
"valid_targets_mean": 5801.2,
|
|
"valid_targets_min": 1301
|
|
},
|
|
{
|
|
"epoch": 2.5793650793650795,
|
|
"grad_norm": 0.40150052297597855,
|
|
"learning_rate": 3.1854102038432856e-05,
|
|
"loss": 0.1803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1876111626625061,
|
|
"step": 1625,
|
|
"valid_targets_mean": 5630.9,
|
|
"valid_targets_min": 404
|
|
},
|
|
{
|
|
"epoch": 2.5873015873015874,
|
|
"grad_norm": 0.352002911984159,
|
|
"learning_rate": 3.17902577902332e-05,
|
|
"loss": 0.1737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1494748890399933,
|
|
"step": 1630,
|
|
"valid_targets_mean": 5767.0,
|
|
"valid_targets_min": 330
|
|
},
|
|
{
|
|
"epoch": 2.5952380952380953,
|
|
"grad_norm": 0.38659122601598817,
|
|
"learning_rate": 3.172622887035771e-05,
|
|
"loss": 0.168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17912599444389343,
|
|
"step": 1635,
|
|
"valid_targets_mean": 5444.4,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 2.6031746031746033,
|
|
"grad_norm": 0.566374914273737,
|
|
"learning_rate": 3.1662016281696073e-05,
|
|
"loss": 0.1954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1878930926322937,
|
|
"step": 1640,
|
|
"valid_targets_mean": 4545.0,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 2.611111111111111,
|
|
"grad_norm": 0.40019696946056404,
|
|
"learning_rate": 3.15976210300148e-05,
|
|
"loss": 0.1817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19527481496334076,
|
|
"step": 1645,
|
|
"valid_targets_mean": 5715.0,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 2.619047619047619,
|
|
"grad_norm": 0.39423172570166204,
|
|
"learning_rate": 3.153304412394143e-05,
|
|
"loss": 0.1652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1640002578496933,
|
|
"step": 1650,
|
|
"valid_targets_mean": 5794.7,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 2.626984126984127,
|
|
"grad_norm": 0.4024698863549588,
|
|
"learning_rate": 3.146828657494883e-05,
|
|
"loss": 0.1933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1920669972896576,
|
|
"step": 1655,
|
|
"valid_targets_mean": 5749.9,
|
|
"valid_targets_min": 3138
|
|
},
|
|
{
|
|
"epoch": 2.634920634920635,
|
|
"grad_norm": 0.3925326135892127,
|
|
"learning_rate": 3.140334939733924e-05,
|
|
"loss": 0.1733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18373993039131165,
|
|
"step": 1660,
|
|
"valid_targets_mean": 5227.9,
|
|
"valid_targets_min": 2482
|
|
},
|
|
{
|
|
"epoch": 2.642857142857143,
|
|
"grad_norm": 0.46866690749449025,
|
|
"learning_rate": 3.1338233608228455e-05,
|
|
"loss": 0.1701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17870697379112244,
|
|
"step": 1665,
|
|
"valid_targets_mean": 5330.0,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 2.6507936507936507,
|
|
"grad_norm": 0.3948699322229872,
|
|
"learning_rate": 3.127294022752988e-05,
|
|
"loss": 0.1748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16245272755622864,
|
|
"step": 1670,
|
|
"valid_targets_mean": 5712.2,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 2.6587301587301586,
|
|
"grad_norm": 0.33562761669358987,
|
|
"learning_rate": 3.120747027793854e-05,
|
|
"loss": 0.1578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1378447711467743,
|
|
"step": 1675,
|
|
"valid_targets_mean": 6322.2,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 2.6666666666666665,
|
|
"grad_norm": 0.4080788254926686,
|
|
"learning_rate": 3.114182478491509e-05,
|
|
"loss": 0.1699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16858676075935364,
|
|
"step": 1680,
|
|
"valid_targets_mean": 5623.9,
|
|
"valid_targets_min": 325
|
|
},
|
|
{
|
|
"epoch": 2.674603174603175,
|
|
"grad_norm": 0.44788385062808955,
|
|
"learning_rate": 3.107600477666969e-05,
|
|
"loss": 0.1756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17567336559295654,
|
|
"step": 1685,
|
|
"valid_targets_mean": 5283.2,
|
|
"valid_targets_min": 2475
|
|
},
|
|
{
|
|
"epoch": 2.682539682539683,
|
|
"grad_norm": 0.3698134704035969,
|
|
"learning_rate": 3.1010011284146004e-05,
|
|
"loss": 0.1785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1585252285003662,
|
|
"step": 1690,
|
|
"valid_targets_mean": 5612.2,
|
|
"valid_targets_min": 1509
|
|
},
|
|
{
|
|
"epoch": 2.6904761904761907,
|
|
"grad_norm": 0.4744606363702609,
|
|
"learning_rate": 3.0943845341004944e-05,
|
|
"loss": 0.1881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21260786056518555,
|
|
"step": 1695,
|
|
"valid_targets_mean": 4632.7,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 2.6984126984126986,
|
|
"grad_norm": 0.3925483959574624,
|
|
"learning_rate": 3.087750798360856e-05,
|
|
"loss": 0.1717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1703377664089203,
|
|
"step": 1700,
|
|
"valid_targets_mean": 6203.7,
|
|
"valid_targets_min": 1733
|
|
},
|
|
{
|
|
"epoch": 2.7063492063492065,
|
|
"grad_norm": 0.423603961482913,
|
|
"learning_rate": 3.0811000251003774e-05,
|
|
"loss": 0.1768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1864738166332245,
|
|
"step": 1705,
|
|
"valid_targets_mean": 4999.4,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 2.7142857142857144,
|
|
"grad_norm": 0.42173447744798365,
|
|
"learning_rate": 3.074432318490608e-05,
|
|
"loss": 0.1819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17015457153320312,
|
|
"step": 1710,
|
|
"valid_targets_mean": 4929.1,
|
|
"valid_targets_min": 1230
|
|
},
|
|
{
|
|
"epoch": 2.7222222222222223,
|
|
"grad_norm": 0.3656999014872132,
|
|
"learning_rate": 3.067747782968328e-05,
|
|
"loss": 0.1735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1662687063217163,
|
|
"step": 1715,
|
|
"valid_targets_mean": 5857.1,
|
|
"valid_targets_min": 1370
|
|
},
|
|
{
|
|
"epoch": 2.7301587301587302,
|
|
"grad_norm": 0.4250802423607952,
|
|
"learning_rate": 3.0610465232339096e-05,
|
|
"loss": 0.1802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17572614550590515,
|
|
"step": 1720,
|
|
"valid_targets_mean": 5605.6,
|
|
"valid_targets_min": 895
|
|
},
|
|
{
|
|
"epoch": 2.738095238095238,
|
|
"grad_norm": 0.39832850275301074,
|
|
"learning_rate": 3.054328644249677e-05,
|
|
"loss": 0.174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17158354818820953,
|
|
"step": 1725,
|
|
"valid_targets_mean": 5492.9,
|
|
"valid_targets_min": 1324
|
|
},
|
|
{
|
|
"epoch": 2.746031746031746,
|
|
"grad_norm": 0.40611280034429587,
|
|
"learning_rate": 3.047594251238265e-05,
|
|
"loss": 0.1741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17876523733139038,
|
|
"step": 1730,
|
|
"valid_targets_mean": 5122.9,
|
|
"valid_targets_min": 1754
|
|
},
|
|
{
|
|
"epoch": 2.753968253968254,
|
|
"grad_norm": 0.3850213501848369,
|
|
"learning_rate": 3.0408434496809643e-05,
|
|
"loss": 0.1787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1557503491640091,
|
|
"step": 1735,
|
|
"valid_targets_mean": 4817.9,
|
|
"valid_targets_min": 346
|
|
},
|
|
{
|
|
"epoch": 2.761904761904762,
|
|
"grad_norm": 0.43072814030855905,
|
|
"learning_rate": 3.034076345316079e-05,
|
|
"loss": 0.1863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1973961591720581,
|
|
"step": 1740,
|
|
"valid_targets_mean": 5802.5,
|
|
"valid_targets_min": 1788
|
|
},
|
|
{
|
|
"epoch": 2.7698412698412698,
|
|
"grad_norm": 0.4244003572959457,
|
|
"learning_rate": 3.0272930441372628e-05,
|
|
"loss": 0.1722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1777854859828949,
|
|
"step": 1745,
|
|
"valid_targets_mean": 5021.1,
|
|
"valid_targets_min": 1233
|
|
},
|
|
{
|
|
"epoch": 2.7777777777777777,
|
|
"grad_norm": 0.3968223319635917,
|
|
"learning_rate": 3.02049365239186e-05,
|
|
"loss": 0.1778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18718859553337097,
|
|
"step": 1750,
|
|
"valid_targets_mean": 5884.2,
|
|
"valid_targets_min": 3484
|
|
},
|
|
{
|
|
"epoch": 2.7857142857142856,
|
|
"grad_norm": 0.38979762153827907,
|
|
"learning_rate": 3.0136782765792455e-05,
|
|
"loss": 0.1767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.184782013297081,
|
|
"step": 1755,
|
|
"valid_targets_mean": 5326.1,
|
|
"valid_targets_min": 3768
|
|
},
|
|
{
|
|
"epoch": 2.7936507936507935,
|
|
"grad_norm": 0.401430603353089,
|
|
"learning_rate": 3.0068470234491517e-05,
|
|
"loss": 0.1816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18122202157974243,
|
|
"step": 1760,
|
|
"valid_targets_mean": 5606.8,
|
|
"valid_targets_min": 2889
|
|
},
|
|
{
|
|
"epoch": 2.8015873015873014,
|
|
"grad_norm": 0.46774432941464267,
|
|
"learning_rate": 3.0000000000000004e-05,
|
|
"loss": 0.1715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19038715958595276,
|
|
"step": 1765,
|
|
"valid_targets_mean": 5706.6,
|
|
"valid_targets_min": 953
|
|
},
|
|
{
|
|
"epoch": 2.8095238095238093,
|
|
"grad_norm": 0.3900657231476483,
|
|
"learning_rate": 2.993137313477223e-05,
|
|
"loss": 0.1849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16720932722091675,
|
|
"step": 1770,
|
|
"valid_targets_mean": 5955.4,
|
|
"valid_targets_min": 229
|
|
},
|
|
{
|
|
"epoch": 2.817460317460317,
|
|
"grad_norm": 0.4671446489123995,
|
|
"learning_rate": 2.9862590713715837e-05,
|
|
"loss": 0.1831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1919449269771576,
|
|
"step": 1775,
|
|
"valid_targets_mean": 4992.8,
|
|
"valid_targets_min": 1710
|
|
},
|
|
{
|
|
"epoch": 2.825396825396825,
|
|
"grad_norm": 0.4108207960556999,
|
|
"learning_rate": 2.9793653814174957e-05,
|
|
"loss": 0.1748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16805459558963776,
|
|
"step": 1780,
|
|
"valid_targets_mean": 5892.2,
|
|
"valid_targets_min": 1220
|
|
},
|
|
{
|
|
"epoch": 2.8333333333333335,
|
|
"grad_norm": 0.3878996578053704,
|
|
"learning_rate": 2.9724563515913317e-05,
|
|
"loss": 0.1669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16483880579471588,
|
|
"step": 1785,
|
|
"valid_targets_mean": 5715.8,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 2.8412698412698414,
|
|
"grad_norm": 0.3924118467590962,
|
|
"learning_rate": 2.9655320901097348e-05,
|
|
"loss": 0.1827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16196705400943756,
|
|
"step": 1790,
|
|
"valid_targets_mean": 5235.0,
|
|
"valid_targets_min": 2689
|
|
},
|
|
{
|
|
"epoch": 2.8492063492063493,
|
|
"grad_norm": 0.38981761916486113,
|
|
"learning_rate": 2.9585927054279224e-05,
|
|
"loss": 0.1767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1542998105287552,
|
|
"step": 1795,
|
|
"valid_targets_mean": 5381.4,
|
|
"valid_targets_min": 2263
|
|
},
|
|
{
|
|
"epoch": 2.857142857142857,
|
|
"grad_norm": 0.3581946508380772,
|
|
"learning_rate": 2.951638306237988e-05,
|
|
"loss": 0.1733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17149242758750916,
|
|
"step": 1800,
|
|
"valid_targets_mean": 6313.2,
|
|
"valid_targets_min": 1580
|
|
},
|
|
{
|
|
"epoch": 2.865079365079365,
|
|
"grad_norm": 0.4494970197989124,
|
|
"learning_rate": 2.9446690014671976e-05,
|
|
"loss": 0.1712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1602257341146469,
|
|
"step": 1805,
|
|
"valid_targets_mean": 5223.4,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 2.873015873015873,
|
|
"grad_norm": 0.37530142354886875,
|
|
"learning_rate": 2.937684900276285e-05,
|
|
"loss": 0.17,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15792617201805115,
|
|
"step": 1810,
|
|
"valid_targets_mean": 5738.4,
|
|
"valid_targets_min": 2336
|
|
},
|
|
{
|
|
"epoch": 2.880952380952381,
|
|
"grad_norm": 0.39396887510337375,
|
|
"learning_rate": 2.9306861120577416e-05,
|
|
"loss": 0.1681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15244446694850922,
|
|
"step": 1815,
|
|
"valid_targets_mean": 5644.9,
|
|
"valid_targets_min": 1221
|
|
},
|
|
{
|
|
"epoch": 2.888888888888889,
|
|
"grad_norm": 0.3706798220884454,
|
|
"learning_rate": 2.923672746434103e-05,
|
|
"loss": 0.1754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1762312352657318,
|
|
"step": 1820,
|
|
"valid_targets_mean": 6201.0,
|
|
"valid_targets_min": 552
|
|
},
|
|
{
|
|
"epoch": 2.8968253968253967,
|
|
"grad_norm": 0.4021984022426286,
|
|
"learning_rate": 2.9166449132562303e-05,
|
|
"loss": 0.1915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2141510546207428,
|
|
"step": 1825,
|
|
"valid_targets_mean": 5325.4,
|
|
"valid_targets_min": 1539
|
|
},
|
|
{
|
|
"epoch": 2.9047619047619047,
|
|
"grad_norm": 0.42224038990358104,
|
|
"learning_rate": 2.9096027226015927e-05,
|
|
"loss": 0.1742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18177683651447296,
|
|
"step": 1830,
|
|
"valid_targets_mean": 4708.6,
|
|
"valid_targets_min": 1117
|
|
},
|
|
{
|
|
"epoch": 2.9126984126984126,
|
|
"grad_norm": 0.3819006419400972,
|
|
"learning_rate": 2.9025462847725405e-05,
|
|
"loss": 0.1767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16706177592277527,
|
|
"step": 1835,
|
|
"valid_targets_mean": 6122.9,
|
|
"valid_targets_min": 2503
|
|
},
|
|
{
|
|
"epoch": 2.9206349206349205,
|
|
"grad_norm": 0.4410064177919863,
|
|
"learning_rate": 2.8954757102945798e-05,
|
|
"loss": 0.1898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2258429080247879,
|
|
"step": 1840,
|
|
"valid_targets_mean": 4895.2,
|
|
"valid_targets_min": 833
|
|
},
|
|
{
|
|
"epoch": 2.928571428571429,
|
|
"grad_norm": 0.39487518735853117,
|
|
"learning_rate": 2.888391109914638e-05,
|
|
"loss": 0.18,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17937694489955902,
|
|
"step": 1845,
|
|
"valid_targets_mean": 5114.6,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 2.9365079365079367,
|
|
"grad_norm": 0.38265019511969883,
|
|
"learning_rate": 2.8812925945993333e-05,
|
|
"loss": 0.1787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17933601140975952,
|
|
"step": 1850,
|
|
"valid_targets_mean": 5663.7,
|
|
"valid_targets_min": 1513
|
|
},
|
|
{
|
|
"epoch": 2.9444444444444446,
|
|
"grad_norm": 0.402651671534175,
|
|
"learning_rate": 2.8741802755332332e-05,
|
|
"loss": 0.1891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17298966646194458,
|
|
"step": 1855,
|
|
"valid_targets_mean": 4725.4,
|
|
"valid_targets_min": 403
|
|
},
|
|
{
|
|
"epoch": 2.9523809523809526,
|
|
"grad_norm": 0.43929484251032125,
|
|
"learning_rate": 2.8670542641171155e-05,
|
|
"loss": 0.1949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19994154572486877,
|
|
"step": 1860,
|
|
"valid_targets_mean": 4797.8,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 2.9603174603174605,
|
|
"grad_norm": 0.40410989413843573,
|
|
"learning_rate": 2.859914671966221e-05,
|
|
"loss": 0.1717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17386558651924133,
|
|
"step": 1865,
|
|
"valid_targets_mean": 4977.1,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 2.9682539682539684,
|
|
"grad_norm": 0.346301301173511,
|
|
"learning_rate": 2.8527616109085082e-05,
|
|
"loss": 0.1743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15783601999282837,
|
|
"step": 1870,
|
|
"valid_targets_mean": 5833.5,
|
|
"valid_targets_min": 1896
|
|
},
|
|
{
|
|
"epoch": 2.9761904761904763,
|
|
"grad_norm": 0.38049759940528577,
|
|
"learning_rate": 2.8455951929828977e-05,
|
|
"loss": 0.172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15591737627983093,
|
|
"step": 1875,
|
|
"valid_targets_mean": 5575.6,
|
|
"valid_targets_min": 295
|
|
},
|
|
{
|
|
"epoch": 2.984126984126984,
|
|
"grad_norm": 0.3831654484686578,
|
|
"learning_rate": 2.8384155304375223e-05,
|
|
"loss": 0.1779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19842402637004852,
|
|
"step": 1880,
|
|
"valid_targets_mean": 6079.8,
|
|
"valid_targets_min": 1937
|
|
},
|
|
{
|
|
"epoch": 2.992063492063492,
|
|
"grad_norm": 0.487008799638132,
|
|
"learning_rate": 2.8312227357279646e-05,
|
|
"loss": 0.1851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.192101389169693,
|
|
"step": 1885,
|
|
"valid_targets_mean": 4321.4,
|
|
"valid_targets_min": 342
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 0.5184260476406786,
|
|
"learning_rate": 2.8240169215154977e-05,
|
|
"loss": 0.1692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1595619022846222,
|
|
"step": 1890,
|
|
"valid_targets_mean": 5334.9,
|
|
"valid_targets_min": 325
|
|
},
|
|
{
|
|
"epoch": 3.007936507936508,
|
|
"grad_norm": 0.3405314330974361,
|
|
"learning_rate": 2.8167982006653196e-05,
|
|
"loss": 0.1623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15521922707557678,
|
|
"step": 1895,
|
|
"valid_targets_mean": 6951.9,
|
|
"valid_targets_min": 3834
|
|
},
|
|
{
|
|
"epoch": 3.015873015873016,
|
|
"grad_norm": 0.44531826407382213,
|
|
"learning_rate": 2.8095666862447876e-05,
|
|
"loss": 0.161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18891948461532593,
|
|
"step": 1900,
|
|
"valid_targets_mean": 5601.6,
|
|
"valid_targets_min": 2266
|
|
},
|
|
{
|
|
"epoch": 3.0238095238095237,
|
|
"grad_norm": 0.3667355362457471,
|
|
"learning_rate": 2.8023224915216442e-05,
|
|
"loss": 0.1613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13565120100975037,
|
|
"step": 1905,
|
|
"valid_targets_mean": 6374.4,
|
|
"valid_targets_min": 1807
|
|
},
|
|
{
|
|
"epoch": 3.0317460317460316,
|
|
"grad_norm": 0.36488467668267915,
|
|
"learning_rate": 2.795065729962244e-05,
|
|
"loss": 0.1658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15088070929050446,
|
|
"step": 1910,
|
|
"valid_targets_mean": 6743.3,
|
|
"valid_targets_min": 3371
|
|
},
|
|
{
|
|
"epoch": 3.0396825396825395,
|
|
"grad_norm": 0.5240072304479968,
|
|
"learning_rate": 2.7877965152297785e-05,
|
|
"loss": 0.1678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16858817636966705,
|
|
"step": 1915,
|
|
"valid_targets_mean": 5218.3,
|
|
"valid_targets_min": 325
|
|
},
|
|
{
|
|
"epoch": 3.0476190476190474,
|
|
"grad_norm": 0.4345088557078499,
|
|
"learning_rate": 2.780514961182492e-05,
|
|
"loss": 0.1688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18977370858192444,
|
|
"step": 1920,
|
|
"valid_targets_mean": 6161.9,
|
|
"valid_targets_min": 978
|
|
},
|
|
{
|
|
"epoch": 3.0555555555555554,
|
|
"grad_norm": 0.43067510343421206,
|
|
"learning_rate": 2.773221181871903e-05,
|
|
"loss": 0.145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15231278538703918,
|
|
"step": 1925,
|
|
"valid_targets_mean": 5256.0,
|
|
"valid_targets_min": 1166
|
|
},
|
|
{
|
|
"epoch": 3.0634920634920633,
|
|
"grad_norm": 0.4329109482212343,
|
|
"learning_rate": 2.765915291541013e-05,
|
|
"loss": 0.1586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1648765504360199,
|
|
"step": 1930,
|
|
"valid_targets_mean": 5768.6,
|
|
"valid_targets_min": 2664
|
|
},
|
|
{
|
|
"epoch": 3.0714285714285716,
|
|
"grad_norm": 0.4360393225780438,
|
|
"learning_rate": 2.7585974046225206e-05,
|
|
"loss": 0.1578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16902469098567963,
|
|
"step": 1935,
|
|
"valid_targets_mean": 4864.1,
|
|
"valid_targets_min": 1763
|
|
},
|
|
{
|
|
"epoch": 3.0793650793650795,
|
|
"grad_norm": 0.42242563176181436,
|
|
"learning_rate": 2.751267635737027e-05,
|
|
"loss": 0.1589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15772101283073425,
|
|
"step": 1940,
|
|
"valid_targets_mean": 5512.9,
|
|
"valid_targets_min": 340
|
|
},
|
|
{
|
|
"epoch": 3.0873015873015874,
|
|
"grad_norm": 0.38359983981254425,
|
|
"learning_rate": 2.7439260996912423e-05,
|
|
"loss": 0.1646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.171489417552948,
|
|
"step": 1945,
|
|
"valid_targets_mean": 5938.1,
|
|
"valid_targets_min": 2752
|
|
},
|
|
{
|
|
"epoch": 3.0952380952380953,
|
|
"grad_norm": 0.41322753491472525,
|
|
"learning_rate": 2.7365729114761862e-05,
|
|
"loss": 0.1582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15758131444454193,
|
|
"step": 1950,
|
|
"valid_targets_mean": 5200.1,
|
|
"valid_targets_min": 370
|
|
},
|
|
{
|
|
"epoch": 3.1031746031746033,
|
|
"grad_norm": 0.4186192867390412,
|
|
"learning_rate": 2.729208186265386e-05,
|
|
"loss": 0.1546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14994092285633087,
|
|
"step": 1955,
|
|
"valid_targets_mean": 5166.9,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 3.111111111111111,
|
|
"grad_norm": 0.3644255779428956,
|
|
"learning_rate": 2.721832039413077e-05,
|
|
"loss": 0.1513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14574278891086578,
|
|
"step": 1960,
|
|
"valid_targets_mean": 6365.3,
|
|
"valid_targets_min": 1939
|
|
},
|
|
{
|
|
"epoch": 3.119047619047619,
|
|
"grad_norm": 0.41549829471711147,
|
|
"learning_rate": 2.7144445864523887e-05,
|
|
"loss": 0.1578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15822090208530426,
|
|
"step": 1965,
|
|
"valid_targets_mean": 5193.6,
|
|
"valid_targets_min": 300
|
|
},
|
|
{
|
|
"epoch": 3.126984126984127,
|
|
"grad_norm": 0.4080754517778744,
|
|
"learning_rate": 2.7070459430935407e-05,
|
|
"loss": 0.1743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18438664078712463,
|
|
"step": 1970,
|
|
"valid_targets_mean": 6076.5,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 3.134920634920635,
|
|
"grad_norm": 0.34845655756539395,
|
|
"learning_rate": 2.69963622522203e-05,
|
|
"loss": 0.1563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1505107879638672,
|
|
"step": 1975,
|
|
"valid_targets_mean": 7000.2,
|
|
"valid_targets_min": 867
|
|
},
|
|
{
|
|
"epoch": 3.142857142857143,
|
|
"grad_norm": 0.3957443526395654,
|
|
"learning_rate": 2.6922155488968117e-05,
|
|
"loss": 0.1558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15863436460494995,
|
|
"step": 1980,
|
|
"valid_targets_mean": 6114.2,
|
|
"valid_targets_min": 1754
|
|
},
|
|
{
|
|
"epoch": 3.1507936507936507,
|
|
"grad_norm": 0.3987157816987954,
|
|
"learning_rate": 2.684784030348486e-05,
|
|
"loss": 0.1541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1591426432132721,
|
|
"step": 1985,
|
|
"valid_targets_mean": 5729.1,
|
|
"valid_targets_min": 2795
|
|
},
|
|
{
|
|
"epoch": 3.1587301587301586,
|
|
"grad_norm": 0.3311471215567983,
|
|
"learning_rate": 2.6773417859774755e-05,
|
|
"loss": 0.1575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13412167131900787,
|
|
"step": 1990,
|
|
"valid_targets_mean": 7218.6,
|
|
"valid_targets_min": 5066
|
|
},
|
|
{
|
|
"epoch": 3.1666666666666665,
|
|
"grad_norm": 0.44452141015453234,
|
|
"learning_rate": 2.669888932352201e-05,
|
|
"loss": 0.1525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14640459418296814,
|
|
"step": 1995,
|
|
"valid_targets_mean": 4116.0,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 3.1746031746031744,
|
|
"grad_norm": 0.4296245823041024,
|
|
"learning_rate": 2.662425586207259e-05,
|
|
"loss": 0.1587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18060961365699768,
|
|
"step": 2000,
|
|
"valid_targets_mean": 5717.8,
|
|
"valid_targets_min": 2684
|
|
},
|
|
{
|
|
"epoch": 3.1825396825396823,
|
|
"grad_norm": 0.44161933967338207,
|
|
"learning_rate": 2.6549518644415876e-05,
|
|
"loss": 0.1566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13894905149936676,
|
|
"step": 2005,
|
|
"valid_targets_mean": 5385.4,
|
|
"valid_targets_min": 1651
|
|
},
|
|
{
|
|
"epoch": 3.1904761904761907,
|
|
"grad_norm": 0.4402027862054312,
|
|
"learning_rate": 2.6474678841166426e-05,
|
|
"loss": 0.1658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15985198318958282,
|
|
"step": 2010,
|
|
"valid_targets_mean": 4908.7,
|
|
"valid_targets_min": 263
|
|
},
|
|
{
|
|
"epoch": 3.1984126984126986,
|
|
"grad_norm": 0.4521093840332604,
|
|
"learning_rate": 2.639973762454558e-05,
|
|
"loss": 0.1565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17308469116687775,
|
|
"step": 2015,
|
|
"valid_targets_mean": 5940.6,
|
|
"valid_targets_min": 2224
|
|
},
|
|
{
|
|
"epoch": 3.2063492063492065,
|
|
"grad_norm": 0.44949779778305143,
|
|
"learning_rate": 2.6324696168363134e-05,
|
|
"loss": 0.1588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16031065583229065,
|
|
"step": 2020,
|
|
"valid_targets_mean": 4588.2,
|
|
"valid_targets_min": 1570
|
|
},
|
|
{
|
|
"epoch": 3.2142857142857144,
|
|
"grad_norm": 0.44736879299802484,
|
|
"learning_rate": 2.624955564799894e-05,
|
|
"loss": 0.1687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18750852346420288,
|
|
"step": 2025,
|
|
"valid_targets_mean": 4769.4,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 3.2222222222222223,
|
|
"grad_norm": 0.4063469374069734,
|
|
"learning_rate": 2.617431724038451e-05,
|
|
"loss": 0.1592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15944424271583557,
|
|
"step": 2030,
|
|
"valid_targets_mean": 5581.2,
|
|
"valid_targets_min": 1012
|
|
},
|
|
{
|
|
"epoch": 3.2301587301587302,
|
|
"grad_norm": 0.4237562908407849,
|
|
"learning_rate": 2.609898212398455e-05,
|
|
"loss": 0.1532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15621069073677063,
|
|
"step": 2035,
|
|
"valid_targets_mean": 5366.9,
|
|
"valid_targets_min": 1168
|
|
},
|
|
{
|
|
"epoch": 3.238095238095238,
|
|
"grad_norm": 0.39370444783531333,
|
|
"learning_rate": 2.6023551478778535e-05,
|
|
"loss": 0.1508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14452454447746277,
|
|
"step": 2040,
|
|
"valid_targets_mean": 5937.5,
|
|
"valid_targets_min": 225
|
|
},
|
|
{
|
|
"epoch": 3.246031746031746,
|
|
"grad_norm": 0.49270720514980915,
|
|
"learning_rate": 2.5948026486242225e-05,
|
|
"loss": 0.1607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1724134087562561,
|
|
"step": 2045,
|
|
"valid_targets_mean": 3770.9,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 3.253968253968254,
|
|
"grad_norm": 0.4577588443456724,
|
|
"learning_rate": 2.5872408329329136e-05,
|
|
"loss": 0.1606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13135287165641785,
|
|
"step": 2050,
|
|
"valid_targets_mean": 5848.4,
|
|
"valid_targets_min": 2027
|
|
},
|
|
{
|
|
"epoch": 3.261904761904762,
|
|
"grad_norm": 0.44271121122885954,
|
|
"learning_rate": 2.5796698192452016e-05,
|
|
"loss": 0.16,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16199621558189392,
|
|
"step": 2055,
|
|
"valid_targets_mean": 4766.6,
|
|
"valid_targets_min": 404
|
|
},
|
|
{
|
|
"epoch": 3.2698412698412698,
|
|
"grad_norm": 0.39031087916457097,
|
|
"learning_rate": 2.572089726146432e-05,
|
|
"loss": 0.1632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16037863492965698,
|
|
"step": 2060,
|
|
"valid_targets_mean": 5805.1,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 3.2777777777777777,
|
|
"grad_norm": 0.4646573659873822,
|
|
"learning_rate": 2.564500672364162e-05,
|
|
"loss": 0.1647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.162231907248497,
|
|
"step": 2065,
|
|
"valid_targets_mean": 4860.0,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 3.2857142857142856,
|
|
"grad_norm": 0.3990327001371696,
|
|
"learning_rate": 2.556902776766298e-05,
|
|
"loss": 0.1534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15069621801376343,
|
|
"step": 2070,
|
|
"valid_targets_mean": 5909.9,
|
|
"valid_targets_min": 1698
|
|
},
|
|
{
|
|
"epoch": 3.2936507936507935,
|
|
"grad_norm": 0.43271648198961715,
|
|
"learning_rate": 2.5492961583592397e-05,
|
|
"loss": 0.1601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.170293927192688,
|
|
"step": 2075,
|
|
"valid_targets_mean": 5133.9,
|
|
"valid_targets_min": 1647
|
|
},
|
|
{
|
|
"epoch": 3.3015873015873014,
|
|
"grad_norm": 0.44977807483400484,
|
|
"learning_rate": 2.5416809362860107e-05,
|
|
"loss": 0.1738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18963780999183655,
|
|
"step": 2080,
|
|
"valid_targets_mean": 4750.8,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 3.3095238095238093,
|
|
"grad_norm": 0.3681715462106892,
|
|
"learning_rate": 2.5340572298243946e-05,
|
|
"loss": 0.1484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1364738941192627,
|
|
"step": 2085,
|
|
"valid_targets_mean": 6450.1,
|
|
"valid_targets_min": 4105
|
|
},
|
|
{
|
|
"epoch": 3.317460317460317,
|
|
"grad_norm": 0.45380051213619826,
|
|
"learning_rate": 2.5264251583850677e-05,
|
|
"loss": 0.1728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1766563057899475,
|
|
"step": 2090,
|
|
"valid_targets_mean": 4932.4,
|
|
"valid_targets_min": 342
|
|
},
|
|
{
|
|
"epoch": 3.3253968253968256,
|
|
"grad_norm": 0.34929908788411107,
|
|
"learning_rate": 2.518784841509726e-05,
|
|
"loss": 0.1651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.142880380153656,
|
|
"step": 2095,
|
|
"valid_targets_mean": 7236.7,
|
|
"valid_targets_min": 3519
|
|
},
|
|
{
|
|
"epoch": 3.3333333333333335,
|
|
"grad_norm": 0.42494242702127294,
|
|
"learning_rate": 2.511136398869216e-05,
|
|
"loss": 0.1534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1457439363002777,
|
|
"step": 2100,
|
|
"valid_targets_mean": 5273.4,
|
|
"valid_targets_min": 2522
|
|
},
|
|
{
|
|
"epoch": 3.3412698412698414,
|
|
"grad_norm": 0.41871282992961945,
|
|
"learning_rate": 2.503479950261658e-05,
|
|
"loss": 0.1564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14400547742843628,
|
|
"step": 2105,
|
|
"valid_targets_mean": 5479.6,
|
|
"valid_targets_min": 1124
|
|
},
|
|
{
|
|
"epoch": 3.3492063492063493,
|
|
"grad_norm": 0.41199941257756795,
|
|
"learning_rate": 2.4958156156105693e-05,
|
|
"loss": 0.1589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16985690593719482,
|
|
"step": 2110,
|
|
"valid_targets_mean": 5431.3,
|
|
"valid_targets_min": 322
|
|
},
|
|
{
|
|
"epoch": 3.357142857142857,
|
|
"grad_norm": 0.41511677551741843,
|
|
"learning_rate": 2.4881435149629892e-05,
|
|
"loss": 0.1611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15738296508789062,
|
|
"step": 2115,
|
|
"valid_targets_mean": 5947.6,
|
|
"valid_targets_min": 1723
|
|
},
|
|
{
|
|
"epoch": 3.365079365079365,
|
|
"grad_norm": 0.3658736974288424,
|
|
"learning_rate": 2.4804637684875937e-05,
|
|
"loss": 0.1643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1301427185535431,
|
|
"step": 2120,
|
|
"valid_targets_mean": 7314.8,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 3.373015873015873,
|
|
"grad_norm": 0.4048532254523095,
|
|
"learning_rate": 2.4727764964728177e-05,
|
|
"loss": 0.1538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15161439776420593,
|
|
"step": 2125,
|
|
"valid_targets_mean": 5149.5,
|
|
"valid_targets_min": 2014
|
|
},
|
|
{
|
|
"epoch": 3.380952380952381,
|
|
"grad_norm": 0.3829760082289399,
|
|
"learning_rate": 2.4650818193249693e-05,
|
|
"loss": 0.1596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14068162441253662,
|
|
"step": 2130,
|
|
"valid_targets_mean": 5836.1,
|
|
"valid_targets_min": 1685
|
|
},
|
|
{
|
|
"epoch": 3.388888888888889,
|
|
"grad_norm": 0.4507430940791641,
|
|
"learning_rate": 2.4573798575663425e-05,
|
|
"loss": 0.1671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16859221458435059,
|
|
"step": 2135,
|
|
"valid_targets_mean": 5247.2,
|
|
"valid_targets_min": 311
|
|
},
|
|
{
|
|
"epoch": 3.3968253968253967,
|
|
"grad_norm": 0.47784011418612515,
|
|
"learning_rate": 2.4496707318333323e-05,
|
|
"loss": 0.1718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.176387220621109,
|
|
"step": 2140,
|
|
"valid_targets_mean": 4379.1,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 3.4047619047619047,
|
|
"grad_norm": 0.41751426715134293,
|
|
"learning_rate": 2.441954562874541e-05,
|
|
"loss": 0.1494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1648518145084381,
|
|
"step": 2145,
|
|
"valid_targets_mean": 5245.0,
|
|
"valid_targets_min": 1607
|
|
},
|
|
{
|
|
"epoch": 3.4126984126984126,
|
|
"grad_norm": 0.34417394477532504,
|
|
"learning_rate": 2.434231471548893e-05,
|
|
"loss": 0.1565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16924996674060822,
|
|
"step": 2150,
|
|
"valid_targets_mean": 7679.7,
|
|
"valid_targets_min": 4644
|
|
},
|
|
{
|
|
"epoch": 3.4206349206349205,
|
|
"grad_norm": 0.4099313742801725,
|
|
"learning_rate": 2.4265015788237348e-05,
|
|
"loss": 0.1638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1553308665752411,
|
|
"step": 2155,
|
|
"valid_targets_mean": 5291.8,
|
|
"valid_targets_min": 1013
|
|
},
|
|
{
|
|
"epoch": 3.4285714285714284,
|
|
"grad_norm": 0.3644179150651529,
|
|
"learning_rate": 2.4187650057729465e-05,
|
|
"loss": 0.1619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13109582662582397,
|
|
"step": 2160,
|
|
"valid_targets_mean": 6145.5,
|
|
"valid_targets_min": 395
|
|
},
|
|
{
|
|
"epoch": 3.4365079365079367,
|
|
"grad_norm": 0.4344903235604141,
|
|
"learning_rate": 2.4110218735750403e-05,
|
|
"loss": 0.1547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16567984223365784,
|
|
"step": 2165,
|
|
"valid_targets_mean": 5219.4,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 3.4444444444444446,
|
|
"grad_norm": 0.44162025985134756,
|
|
"learning_rate": 2.4032723035112667e-05,
|
|
"loss": 0.1617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17561107873916626,
|
|
"step": 2170,
|
|
"valid_targets_mean": 4544.6,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 3.4523809523809526,
|
|
"grad_norm": 0.3932602300074192,
|
|
"learning_rate": 2.3955164169637124e-05,
|
|
"loss": 0.1625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15324220061302185,
|
|
"step": 2175,
|
|
"valid_targets_mean": 5706.5,
|
|
"valid_targets_min": 347
|
|
},
|
|
{
|
|
"epoch": 3.4603174603174605,
|
|
"grad_norm": 0.42827386189990235,
|
|
"learning_rate": 2.387754335413398e-05,
|
|
"loss": 0.1606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15386894345283508,
|
|
"step": 2180,
|
|
"valid_targets_mean": 4324.1,
|
|
"valid_targets_min": 373
|
|
},
|
|
{
|
|
"epoch": 3.4682539682539684,
|
|
"grad_norm": 0.41357075683918504,
|
|
"learning_rate": 2.3799861804383807e-05,
|
|
"loss": 0.1632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15950973331928253,
|
|
"step": 2185,
|
|
"valid_targets_mean": 5571.4,
|
|
"valid_targets_min": 1700
|
|
},
|
|
{
|
|
"epoch": 3.4761904761904763,
|
|
"grad_norm": 0.4041812547912995,
|
|
"learning_rate": 2.3722120737118414e-05,
|
|
"loss": 0.1566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1401449292898178,
|
|
"step": 2190,
|
|
"valid_targets_mean": 5355.0,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 3.484126984126984,
|
|
"grad_norm": 0.4349818563348464,
|
|
"learning_rate": 2.3644321370001868e-05,
|
|
"loss": 0.1677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16082270443439484,
|
|
"step": 2195,
|
|
"valid_targets_mean": 4887.8,
|
|
"valid_targets_min": 201
|
|
},
|
|
{
|
|
"epoch": 3.492063492063492,
|
|
"grad_norm": 0.41308220488892183,
|
|
"learning_rate": 2.3566464921611393e-05,
|
|
"loss": 0.1629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16265086829662323,
|
|
"step": 2200,
|
|
"valid_targets_mean": 5437.2,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 3.5,
|
|
"grad_norm": 0.4212037346610392,
|
|
"learning_rate": 2.348855261141827e-05,
|
|
"loss": 0.1636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16348662972450256,
|
|
"step": 2205,
|
|
"valid_targets_mean": 4889.4,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 3.507936507936508,
|
|
"grad_norm": 0.4704570906062029,
|
|
"learning_rate": 2.341058565976874e-05,
|
|
"loss": 0.1644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20820948481559753,
|
|
"step": 2210,
|
|
"valid_targets_mean": 5980.6,
|
|
"valid_targets_min": 961
|
|
},
|
|
{
|
|
"epoch": 3.515873015873016,
|
|
"grad_norm": 0.5036481968595117,
|
|
"learning_rate": 2.3332565287864918e-05,
|
|
"loss": 0.1724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15473511815071106,
|
|
"step": 2215,
|
|
"valid_targets_mean": 5321.4,
|
|
"valid_targets_min": 1443
|
|
},
|
|
{
|
|
"epoch": 3.5238095238095237,
|
|
"grad_norm": 0.3735233109636718,
|
|
"learning_rate": 2.325449271774563e-05,
|
|
"loss": 0.1555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14910230040550232,
|
|
"step": 2220,
|
|
"valid_targets_mean": 6132.2,
|
|
"valid_targets_min": 1324
|
|
},
|
|
{
|
|
"epoch": 3.5317460317460316,
|
|
"grad_norm": 0.444413935820999,
|
|
"learning_rate": 2.3176369172267286e-05,
|
|
"loss": 0.1586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17016586661338806,
|
|
"step": 2225,
|
|
"valid_targets_mean": 4620.8,
|
|
"valid_targets_min": 1621
|
|
},
|
|
{
|
|
"epoch": 3.5396825396825395,
|
|
"grad_norm": 0.3953957921849172,
|
|
"learning_rate": 2.3098195875084732e-05,
|
|
"loss": 0.1598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15769408643245697,
|
|
"step": 2230,
|
|
"valid_targets_mean": 5690.9,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 3.5476190476190474,
|
|
"grad_norm": 0.5077199865444484,
|
|
"learning_rate": 2.301997405063208e-05,
|
|
"loss": 0.1739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15993595123291016,
|
|
"step": 2235,
|
|
"valid_targets_mean": 5084.5,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 3.5555555555555554,
|
|
"grad_norm": 0.4086230414672359,
|
|
"learning_rate": 2.2941704924103535e-05,
|
|
"loss": 0.1633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1494399756193161,
|
|
"step": 2240,
|
|
"valid_targets_mean": 5467.4,
|
|
"valid_targets_min": 327
|
|
},
|
|
{
|
|
"epoch": 3.5634920634920633,
|
|
"grad_norm": 0.42370220867636965,
|
|
"learning_rate": 2.2863389721434165e-05,
|
|
"loss": 0.167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16436323523521423,
|
|
"step": 2245,
|
|
"valid_targets_mean": 6613.1,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 3.571428571428571,
|
|
"grad_norm": 0.47414264477785967,
|
|
"learning_rate": 2.2785029669280775e-05,
|
|
"loss": 0.1463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16836649179458618,
|
|
"step": 2250,
|
|
"valid_targets_mean": 4143.4,
|
|
"valid_targets_min": 1765
|
|
},
|
|
{
|
|
"epoch": 3.5793650793650795,
|
|
"grad_norm": 0.4279968424111969,
|
|
"learning_rate": 2.2706625995002626e-05,
|
|
"loss": 0.1602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15963539481163025,
|
|
"step": 2255,
|
|
"valid_targets_mean": 5683.1,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 3.5873015873015874,
|
|
"grad_norm": 0.43551657194367943,
|
|
"learning_rate": 2.262817992664224e-05,
|
|
"loss": 0.1558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15465790033340454,
|
|
"step": 2260,
|
|
"valid_targets_mean": 5270.2,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 3.5952380952380953,
|
|
"grad_norm": 0.473618019837446,
|
|
"learning_rate": 2.2549692692906158e-05,
|
|
"loss": 0.1664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19456999003887177,
|
|
"step": 2265,
|
|
"valid_targets_mean": 4033.5,
|
|
"valid_targets_min": 1118
|
|
},
|
|
{
|
|
"epoch": 3.6031746031746033,
|
|
"grad_norm": 0.4610864812937639,
|
|
"learning_rate": 2.24711655231457e-05,
|
|
"loss": 0.159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16838069260120392,
|
|
"step": 2270,
|
|
"valid_targets_mean": 4850.8,
|
|
"valid_targets_min": 893
|
|
},
|
|
{
|
|
"epoch": 3.611111111111111,
|
|
"grad_norm": 0.4183921042676627,
|
|
"learning_rate": 2.2392599647337724e-05,
|
|
"loss": 0.1696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15832781791687012,
|
|
"step": 2275,
|
|
"valid_targets_mean": 5614.7,
|
|
"valid_targets_min": 2089
|
|
},
|
|
{
|
|
"epoch": 3.619047619047619,
|
|
"grad_norm": 0.4746926085970967,
|
|
"learning_rate": 2.23139962960653e-05,
|
|
"loss": 0.1667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17942070960998535,
|
|
"step": 2280,
|
|
"valid_targets_mean": 4651.3,
|
|
"valid_targets_min": 217
|
|
},
|
|
{
|
|
"epoch": 3.626984126984127,
|
|
"grad_norm": 0.42504033178119727,
|
|
"learning_rate": 2.2235356700498528e-05,
|
|
"loss": 0.1622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1609695851802826,
|
|
"step": 2285,
|
|
"valid_targets_mean": 5650.9,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 3.634920634920635,
|
|
"grad_norm": 0.35104401078143266,
|
|
"learning_rate": 2.2156682092375175e-05,
|
|
"loss": 0.1593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14197728037834167,
|
|
"step": 2290,
|
|
"valid_targets_mean": 6449.4,
|
|
"valid_targets_min": 2699
|
|
},
|
|
{
|
|
"epoch": 3.642857142857143,
|
|
"grad_norm": 0.48629893819073916,
|
|
"learning_rate": 2.2077973703981423e-05,
|
|
"loss": 0.1769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.171891987323761,
|
|
"step": 2295,
|
|
"valid_targets_mean": 5000.9,
|
|
"valid_targets_min": 325
|
|
},
|
|
{
|
|
"epoch": 3.6507936507936507,
|
|
"grad_norm": 0.38982495934645944,
|
|
"learning_rate": 2.1999232768132552e-05,
|
|
"loss": 0.1622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15166640281677246,
|
|
"step": 2300,
|
|
"valid_targets_mean": 5598.8,
|
|
"valid_targets_min": 899
|
|
},
|
|
{
|
|
"epoch": 3.6587301587301586,
|
|
"grad_norm": 0.4538805178933535,
|
|
"learning_rate": 2.1920460518153637e-05,
|
|
"loss": 0.1667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13476833701133728,
|
|
"step": 2305,
|
|
"valid_targets_mean": 4883.2,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 3.6666666666666665,
|
|
"grad_norm": 0.4650060205808812,
|
|
"learning_rate": 2.1841658187860232e-05,
|
|
"loss": 0.1706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1431887298822403,
|
|
"step": 2310,
|
|
"valid_targets_mean": 5863.8,
|
|
"valid_targets_min": 2487
|
|
},
|
|
{
|
|
"epoch": 3.674603174603175,
|
|
"grad_norm": 0.40827307741804697,
|
|
"learning_rate": 2.176282701153904e-05,
|
|
"loss": 0.1567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16187241673469543,
|
|
"step": 2315,
|
|
"valid_targets_mean": 5186.4,
|
|
"valid_targets_min": 881
|
|
},
|
|
{
|
|
"epoch": 3.682539682539683,
|
|
"grad_norm": 0.40111783297909087,
|
|
"learning_rate": 2.1683968223928572e-05,
|
|
"loss": 0.1618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1610245704650879,
|
|
"step": 2320,
|
|
"valid_targets_mean": 5193.5,
|
|
"valid_targets_min": 2007
|
|
},
|
|
{
|
|
"epoch": 3.6904761904761907,
|
|
"grad_norm": 0.4108636912566655,
|
|
"learning_rate": 2.1605083060199835e-05,
|
|
"loss": 0.1724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19376526772975922,
|
|
"step": 2325,
|
|
"valid_targets_mean": 5273.6,
|
|
"valid_targets_min": 918
|
|
},
|
|
{
|
|
"epoch": 3.6984126984126986,
|
|
"grad_norm": 0.4090720045772208,
|
|
"learning_rate": 2.152617275593694e-05,
|
|
"loss": 0.1581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1690284013748169,
|
|
"step": 2330,
|
|
"valid_targets_mean": 5263.3,
|
|
"valid_targets_min": 1166
|
|
},
|
|
{
|
|
"epoch": 3.7063492063492065,
|
|
"grad_norm": 0.37473461724981033,
|
|
"learning_rate": 2.144723854711781e-05,
|
|
"loss": 0.1513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14938317239284515,
|
|
"step": 2335,
|
|
"valid_targets_mean": 5389.9,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 3.7142857142857144,
|
|
"grad_norm": 0.4843387089458587,
|
|
"learning_rate": 2.1368281670094766e-05,
|
|
"loss": 0.166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17382203042507172,
|
|
"step": 2340,
|
|
"valid_targets_mean": 4600.9,
|
|
"valid_targets_min": 1981
|
|
},
|
|
{
|
|
"epoch": 3.7222222222222223,
|
|
"grad_norm": 0.4119585290716737,
|
|
"learning_rate": 2.1289303361575175e-05,
|
|
"loss": 0.164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15374284982681274,
|
|
"step": 2345,
|
|
"valid_targets_mean": 5633.0,
|
|
"valid_targets_min": 2046
|
|
},
|
|
{
|
|
"epoch": 3.7301587301587302,
|
|
"grad_norm": 0.3952648176140555,
|
|
"learning_rate": 2.121030485860211e-05,
|
|
"loss": 0.1712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17831559479236603,
|
|
"step": 2350,
|
|
"valid_targets_mean": 5893.4,
|
|
"valid_targets_min": 2787
|
|
},
|
|
{
|
|
"epoch": 3.738095238095238,
|
|
"grad_norm": 0.41411933208876145,
|
|
"learning_rate": 2.113128739853493e-05,
|
|
"loss": 0.1678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17142191529273987,
|
|
"step": 2355,
|
|
"valid_targets_mean": 5256.2,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 3.746031746031746,
|
|
"grad_norm": 0.45575184540137365,
|
|
"learning_rate": 2.1052252219029944e-05,
|
|
"loss": 0.1642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1489437222480774,
|
|
"step": 2360,
|
|
"valid_targets_mean": 5599.4,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 3.753968253968254,
|
|
"grad_norm": 0.38228242544809465,
|
|
"learning_rate": 2.0973200558020967e-05,
|
|
"loss": 0.1561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1508997082710266,
|
|
"step": 2365,
|
|
"valid_targets_mean": 5869.1,
|
|
"valid_targets_min": 3409
|
|
},
|
|
{
|
|
"epoch": 3.761904761904762,
|
|
"grad_norm": 0.4148754628490215,
|
|
"learning_rate": 2.0894133653700005e-05,
|
|
"loss": 0.1597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1601417064666748,
|
|
"step": 2370,
|
|
"valid_targets_mean": 5959.2,
|
|
"valid_targets_min": 778
|
|
},
|
|
{
|
|
"epoch": 3.7698412698412698,
|
|
"grad_norm": 0.39421365324647456,
|
|
"learning_rate": 2.0815052744497795e-05,
|
|
"loss": 0.1647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16802407801151276,
|
|
"step": 2375,
|
|
"valid_targets_mean": 5507.4,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 3.7777777777777777,
|
|
"grad_norm": 0.4179727695736473,
|
|
"learning_rate": 2.0735959069064434e-05,
|
|
"loss": 0.1578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1505141705274582,
|
|
"step": 2380,
|
|
"valid_targets_mean": 4560.2,
|
|
"valid_targets_min": 946
|
|
},
|
|
{
|
|
"epoch": 3.7857142857142856,
|
|
"grad_norm": 0.4025223262448166,
|
|
"learning_rate": 2.065685386624999e-05,
|
|
"loss": 0.1675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16200333833694458,
|
|
"step": 2385,
|
|
"valid_targets_mean": 5546.6,
|
|
"valid_targets_min": 2114
|
|
},
|
|
{
|
|
"epoch": 3.7936507936507935,
|
|
"grad_norm": 0.42971080125523514,
|
|
"learning_rate": 2.0577738375085076e-05,
|
|
"loss": 0.1539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13847434520721436,
|
|
"step": 2390,
|
|
"valid_targets_mean": 4286.1,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 3.8015873015873014,
|
|
"grad_norm": 0.4040671130609569,
|
|
"learning_rate": 2.0498613834761462e-05,
|
|
"loss": 0.1709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16613656282424927,
|
|
"step": 2395,
|
|
"valid_targets_mean": 5606.1,
|
|
"valid_targets_min": 1122
|
|
},
|
|
{
|
|
"epoch": 3.8095238095238093,
|
|
"grad_norm": 0.4104422551171815,
|
|
"learning_rate": 2.041948148461264e-05,
|
|
"loss": 0.1607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16815027594566345,
|
|
"step": 2400,
|
|
"valid_targets_mean": 6071.4,
|
|
"valid_targets_min": 1424
|
|
},
|
|
{
|
|
"epoch": 3.817460317460317,
|
|
"grad_norm": 0.433862516805992,
|
|
"learning_rate": 2.0340342564094436e-05,
|
|
"loss": 0.1684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1456870138645172,
|
|
"step": 2405,
|
|
"valid_targets_mean": 4660.6,
|
|
"valid_targets_min": 300
|
|
},
|
|
{
|
|
"epoch": 3.825396825396825,
|
|
"grad_norm": 0.37026873619906453,
|
|
"learning_rate": 2.0261198312765597e-05,
|
|
"loss": 0.1569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17130400240421295,
|
|
"step": 2410,
|
|
"valid_targets_mean": 7170.6,
|
|
"valid_targets_min": 3870
|
|
},
|
|
{
|
|
"epoch": 3.8333333333333335,
|
|
"grad_norm": 0.37774101351813544,
|
|
"learning_rate": 2.0182049970268355e-05,
|
|
"loss": 0.1601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1540232002735138,
|
|
"step": 2415,
|
|
"valid_targets_mean": 5801.1,
|
|
"valid_targets_min": 2004
|
|
},
|
|
{
|
|
"epoch": 3.8412698412698414,
|
|
"grad_norm": 0.40806762468519775,
|
|
"learning_rate": 2.010289877630902e-05,
|
|
"loss": 0.164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1633833944797516,
|
|
"step": 2420,
|
|
"valid_targets_mean": 6180.1,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 3.8492063492063493,
|
|
"grad_norm": 0.4658635857688465,
|
|
"learning_rate": 2.002374597063858e-05,
|
|
"loss": 0.1673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14994177222251892,
|
|
"step": 2425,
|
|
"valid_targets_mean": 5532.7,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 3.857142857142857,
|
|
"grad_norm": 0.48235346332409923,
|
|
"learning_rate": 1.9944592793033255e-05,
|
|
"loss": 0.1689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1955932080745697,
|
|
"step": 2430,
|
|
"valid_targets_mean": 4724.8,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 3.865079365079365,
|
|
"grad_norm": 0.8801286649564228,
|
|
"learning_rate": 1.9865440483275086e-05,
|
|
"loss": 0.1609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1733412742614746,
|
|
"step": 2435,
|
|
"valid_targets_mean": 5920.0,
|
|
"valid_targets_min": 2207
|
|
},
|
|
{
|
|
"epoch": 3.873015873015873,
|
|
"grad_norm": 0.4518531270411737,
|
|
"learning_rate": 1.978629028113254e-05,
|
|
"loss": 0.157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15560846030712128,
|
|
"step": 2440,
|
|
"valid_targets_mean": 4698.5,
|
|
"valid_targets_min": 369
|
|
},
|
|
{
|
|
"epoch": 3.880952380952381,
|
|
"grad_norm": 0.4095907564342567,
|
|
"learning_rate": 1.9707143426341058e-05,
|
|
"loss": 0.1596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14006567001342773,
|
|
"step": 2445,
|
|
"valid_targets_mean": 5711.9,
|
|
"valid_targets_min": 1934
|
|
},
|
|
{
|
|
"epoch": 3.888888888888889,
|
|
"grad_norm": 0.38371188352350993,
|
|
"learning_rate": 1.962800115858364e-05,
|
|
"loss": 0.1649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15609079599380493,
|
|
"step": 2450,
|
|
"valid_targets_mean": 6032.2,
|
|
"valid_targets_min": 1733
|
|
},
|
|
{
|
|
"epoch": 3.8968253968253967,
|
|
"grad_norm": 0.4118539842920471,
|
|
"learning_rate": 1.9548864717471472e-05,
|
|
"loss": 0.1641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14997440576553345,
|
|
"step": 2455,
|
|
"valid_targets_mean": 4638.2,
|
|
"valid_targets_min": 2217
|
|
},
|
|
{
|
|
"epoch": 3.9047619047619047,
|
|
"grad_norm": 0.4025039557270683,
|
|
"learning_rate": 1.9469735342524454e-05,
|
|
"loss": 0.1688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1860060691833496,
|
|
"step": 2460,
|
|
"valid_targets_mean": 5977.4,
|
|
"valid_targets_min": 1525
|
|
},
|
|
{
|
|
"epoch": 3.9126984126984126,
|
|
"grad_norm": 0.4033220910310328,
|
|
"learning_rate": 1.939061427315179e-05,
|
|
"loss": 0.1656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1573881059885025,
|
|
"step": 2465,
|
|
"valid_targets_mean": 5562.9,
|
|
"valid_targets_min": 466
|
|
},
|
|
{
|
|
"epoch": 3.9206349206349205,
|
|
"grad_norm": 0.40909002728066796,
|
|
"learning_rate": 1.931150274863265e-05,
|
|
"loss": 0.171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1574082374572754,
|
|
"step": 2470,
|
|
"valid_targets_mean": 5320.6,
|
|
"valid_targets_min": 397
|
|
},
|
|
{
|
|
"epoch": 3.928571428571429,
|
|
"grad_norm": 0.42869522285562844,
|
|
"learning_rate": 1.9232402008096643e-05,
|
|
"loss": 0.175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19264186918735504,
|
|
"step": 2475,
|
|
"valid_targets_mean": 5145.9,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 3.9365079365079367,
|
|
"grad_norm": 0.40381020748858665,
|
|
"learning_rate": 1.9153313290504495e-05,
|
|
"loss": 0.1588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13736480474472046,
|
|
"step": 2480,
|
|
"valid_targets_mean": 5991.2,
|
|
"valid_targets_min": 1935
|
|
},
|
|
{
|
|
"epoch": 3.9444444444444446,
|
|
"grad_norm": 0.44514859085692043,
|
|
"learning_rate": 1.9074237834628623e-05,
|
|
"loss": 0.1727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14704057574272156,
|
|
"step": 2485,
|
|
"valid_targets_mean": 5541.5,
|
|
"valid_targets_min": 2647
|
|
},
|
|
{
|
|
"epoch": 3.9523809523809526,
|
|
"grad_norm": 0.4265501479636169,
|
|
"learning_rate": 1.8995176879033698e-05,
|
|
"loss": 0.1615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18036606907844543,
|
|
"step": 2490,
|
|
"valid_targets_mean": 5466.8,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 3.9603174603174605,
|
|
"grad_norm": 0.4487206178934192,
|
|
"learning_rate": 1.89161316620573e-05,
|
|
"loss": 0.1699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17454467713832855,
|
|
"step": 2495,
|
|
"valid_targets_mean": 5006.6,
|
|
"valid_targets_min": 287
|
|
},
|
|
{
|
|
"epoch": 3.9682539682539684,
|
|
"grad_norm": 0.4033222789710387,
|
|
"learning_rate": 1.8837103421790486e-05,
|
|
"loss": 0.1582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15375812351703644,
|
|
"step": 2500,
|
|
"valid_targets_mean": 6142.2,
|
|
"valid_targets_min": 1358
|
|
},
|
|
{
|
|
"epoch": 3.9761904761904763,
|
|
"grad_norm": 0.46326929396533706,
|
|
"learning_rate": 1.8758093396058386e-05,
|
|
"loss": 0.1687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17514026165008545,
|
|
"step": 2505,
|
|
"valid_targets_mean": 5165.4,
|
|
"valid_targets_min": 2275
|
|
},
|
|
{
|
|
"epoch": 3.984126984126984,
|
|
"grad_norm": 0.40301115402832177,
|
|
"learning_rate": 1.8679102822400874e-05,
|
|
"loss": 0.1679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15668317675590515,
|
|
"step": 2510,
|
|
"valid_targets_mean": 5199.4,
|
|
"valid_targets_min": 211
|
|
},
|
|
{
|
|
"epoch": 3.992063492063492,
|
|
"grad_norm": 0.4789393713046297,
|
|
"learning_rate": 1.8600132938053098e-05,
|
|
"loss": 0.1748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19280311465263367,
|
|
"step": 2515,
|
|
"valid_targets_mean": 4916.7,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"grad_norm": 0.46667355097734303,
|
|
"learning_rate": 1.8521184979926177e-05,
|
|
"loss": 0.1629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1989072561264038,
|
|
"step": 2520,
|
|
"valid_targets_mean": 5119.9,
|
|
"valid_targets_min": 207
|
|
},
|
|
{
|
|
"epoch": 4.007936507936508,
|
|
"grad_norm": 0.3847916291760838,
|
|
"learning_rate": 1.8442260184587804e-05,
|
|
"loss": 0.1508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16161227226257324,
|
|
"step": 2525,
|
|
"valid_targets_mean": 6026.6,
|
|
"valid_targets_min": 2419
|
|
},
|
|
{
|
|
"epoch": 4.015873015873016,
|
|
"grad_norm": 0.4090621169152007,
|
|
"learning_rate": 1.8363359788242842e-05,
|
|
"loss": 0.1388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14368446171283722,
|
|
"step": 2530,
|
|
"valid_targets_mean": 5862.6,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 4.023809523809524,
|
|
"grad_norm": 0.6141566166940376,
|
|
"learning_rate": 1.8284485026714013e-05,
|
|
"loss": 0.1447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13589806854724884,
|
|
"step": 2535,
|
|
"valid_targets_mean": 6541.5,
|
|
"valid_targets_min": 1485
|
|
},
|
|
{
|
|
"epoch": 4.031746031746032,
|
|
"grad_norm": 0.4391239898708553,
|
|
"learning_rate": 1.8205637135422525e-05,
|
|
"loss": 0.1474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15070083737373352,
|
|
"step": 2540,
|
|
"valid_targets_mean": 5536.8,
|
|
"valid_targets_min": 938
|
|
},
|
|
{
|
|
"epoch": 4.0396825396825395,
|
|
"grad_norm": 0.41333666376927736,
|
|
"learning_rate": 1.8126817349368697e-05,
|
|
"loss": 0.1569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1329149305820465,
|
|
"step": 2545,
|
|
"valid_targets_mean": 5577.3,
|
|
"valid_targets_min": 1320
|
|
},
|
|
{
|
|
"epoch": 4.0476190476190474,
|
|
"grad_norm": 0.42756859856165286,
|
|
"learning_rate": 1.8048026903112632e-05,
|
|
"loss": 0.1485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1718430519104004,
|
|
"step": 2550,
|
|
"valid_targets_mean": 5278.7,
|
|
"valid_targets_min": 1393
|
|
},
|
|
{
|
|
"epoch": 4.055555555555555,
|
|
"grad_norm": 0.7929559502540676,
|
|
"learning_rate": 1.7969267030754903e-05,
|
|
"loss": 0.1524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1519625186920166,
|
|
"step": 2555,
|
|
"valid_targets_mean": 4177.6,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 4.063492063492063,
|
|
"grad_norm": 0.43339872921712147,
|
|
"learning_rate": 1.7890538965917184e-05,
|
|
"loss": 0.1359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12034216523170471,
|
|
"step": 2560,
|
|
"valid_targets_mean": 5259.5,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 4.071428571428571,
|
|
"grad_norm": 0.4431280646696749,
|
|
"learning_rate": 1.7811843941722952e-05,
|
|
"loss": 0.1485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16165150701999664,
|
|
"step": 2565,
|
|
"valid_targets_mean": 5233.3,
|
|
"valid_targets_min": 343
|
|
},
|
|
{
|
|
"epoch": 4.079365079365079,
|
|
"grad_norm": 0.4733813623577339,
|
|
"learning_rate": 1.7733183190778174e-05,
|
|
"loss": 0.1478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17020142078399658,
|
|
"step": 2570,
|
|
"valid_targets_mean": 4545.4,
|
|
"valid_targets_min": 852
|
|
},
|
|
{
|
|
"epoch": 4.087301587301587,
|
|
"grad_norm": 0.4256609723968049,
|
|
"learning_rate": 1.7654557945151968e-05,
|
|
"loss": 0.1541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16018405556678772,
|
|
"step": 2575,
|
|
"valid_targets_mean": 5520.1,
|
|
"valid_targets_min": 2986
|
|
},
|
|
{
|
|
"epoch": 4.095238095238095,
|
|
"grad_norm": 0.41262793853257446,
|
|
"learning_rate": 1.7575969436357352e-05,
|
|
"loss": 0.1561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13311775028705597,
|
|
"step": 2580,
|
|
"valid_targets_mean": 5285.7,
|
|
"valid_targets_min": 1788
|
|
},
|
|
{
|
|
"epoch": 4.103174603174603,
|
|
"grad_norm": 0.45945223536796,
|
|
"learning_rate": 1.7497418895331934e-05,
|
|
"loss": 0.1438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14666880667209625,
|
|
"step": 2585,
|
|
"valid_targets_mean": 4386.4,
|
|
"valid_targets_min": 325
|
|
},
|
|
{
|
|
"epoch": 4.111111111111111,
|
|
"grad_norm": 0.4675580885936456,
|
|
"learning_rate": 1.7418907552418597e-05,
|
|
"loss": 0.1505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15302041172981262,
|
|
"step": 2590,
|
|
"valid_targets_mean": 5458.4,
|
|
"valid_targets_min": 2918
|
|
},
|
|
{
|
|
"epoch": 4.119047619047619,
|
|
"grad_norm": 0.43934364103885165,
|
|
"learning_rate": 1.7340436637346315e-05,
|
|
"loss": 0.1473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14084570109844208,
|
|
"step": 2595,
|
|
"valid_targets_mean": 4983.3,
|
|
"valid_targets_min": 1133
|
|
},
|
|
{
|
|
"epoch": 4.1269841269841265,
|
|
"grad_norm": 0.4164892763919611,
|
|
"learning_rate": 1.726200737921079e-05,
|
|
"loss": 0.1429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12312793731689453,
|
|
"step": 2600,
|
|
"valid_targets_mean": 6296.2,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 4.134920634920635,
|
|
"grad_norm": 0.4532045098767217,
|
|
"learning_rate": 1.718362100645527e-05,
|
|
"loss": 0.1453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13698981702327728,
|
|
"step": 2605,
|
|
"valid_targets_mean": 4957.6,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 4.142857142857143,
|
|
"grad_norm": 0.4006124579380861,
|
|
"learning_rate": 1.710527874685129e-05,
|
|
"loss": 0.1497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13815411925315857,
|
|
"step": 2610,
|
|
"valid_targets_mean": 6126.5,
|
|
"valid_targets_min": 855
|
|
},
|
|
{
|
|
"epoch": 4.150793650793651,
|
|
"grad_norm": 0.4714675135702414,
|
|
"learning_rate": 1.702698182747942e-05,
|
|
"loss": 0.16,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14571262896060944,
|
|
"step": 2615,
|
|
"valid_targets_mean": 4147.3,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 4.158730158730159,
|
|
"grad_norm": 0.42920831749050425,
|
|
"learning_rate": 1.6948731474710075e-05,
|
|
"loss": 0.1328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13257333636283875,
|
|
"step": 2620,
|
|
"valid_targets_mean": 5335.2,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 4.166666666666667,
|
|
"grad_norm": 0.48356837750736714,
|
|
"learning_rate": 1.68705289141843e-05,
|
|
"loss": 0.1541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1508401334285736,
|
|
"step": 2625,
|
|
"valid_targets_mean": 6277.6,
|
|
"valid_targets_min": 2995
|
|
},
|
|
{
|
|
"epoch": 4.174603174603175,
|
|
"grad_norm": 0.4755928358851968,
|
|
"learning_rate": 1.679237537079454e-05,
|
|
"loss": 0.1413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14255402982234955,
|
|
"step": 2630,
|
|
"valid_targets_mean": 4744.4,
|
|
"valid_targets_min": 466
|
|
},
|
|
{
|
|
"epoch": 4.182539682539683,
|
|
"grad_norm": 0.41585215382481877,
|
|
"learning_rate": 1.6714272068665526e-05,
|
|
"loss": 0.1503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13894084095954895,
|
|
"step": 2635,
|
|
"valid_targets_mean": 5902.4,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 4.190476190476191,
|
|
"grad_norm": 0.4189696451329358,
|
|
"learning_rate": 1.663622023113501e-05,
|
|
"loss": 0.1598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1487642228603363,
|
|
"step": 2640,
|
|
"valid_targets_mean": 6358.3,
|
|
"valid_targets_min": 3083
|
|
},
|
|
{
|
|
"epoch": 4.198412698412699,
|
|
"grad_norm": 0.44087488345238157,
|
|
"learning_rate": 1.655822108073467e-05,
|
|
"loss": 0.1503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14662505686283112,
|
|
"step": 2645,
|
|
"valid_targets_mean": 5523.2,
|
|
"valid_targets_min": 1561
|
|
},
|
|
{
|
|
"epoch": 4.2063492063492065,
|
|
"grad_norm": 0.47060099939704136,
|
|
"learning_rate": 1.648027583917095e-05,
|
|
"loss": 0.1427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13752037286758423,
|
|
"step": 2650,
|
|
"valid_targets_mean": 5949.6,
|
|
"valid_targets_min": 2002
|
|
},
|
|
{
|
|
"epoch": 4.214285714285714,
|
|
"grad_norm": 0.4223944809526827,
|
|
"learning_rate": 1.640238572730591e-05,
|
|
"loss": 0.1464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14241717755794525,
|
|
"step": 2655,
|
|
"valid_targets_mean": 5939.1,
|
|
"valid_targets_min": 1808
|
|
},
|
|
{
|
|
"epoch": 4.222222222222222,
|
|
"grad_norm": 0.3989848199161138,
|
|
"learning_rate": 1.632455196513809e-05,
|
|
"loss": 0.1457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15084542334079742,
|
|
"step": 2660,
|
|
"valid_targets_mean": 6282.1,
|
|
"valid_targets_min": 3096
|
|
},
|
|
{
|
|
"epoch": 4.23015873015873,
|
|
"grad_norm": 0.4334279841268595,
|
|
"learning_rate": 1.624677577178345e-05,
|
|
"loss": 0.1518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15273742377758026,
|
|
"step": 2665,
|
|
"valid_targets_mean": 5628.8,
|
|
"valid_targets_min": 2067
|
|
},
|
|
{
|
|
"epoch": 4.238095238095238,
|
|
"grad_norm": 0.5090603717967935,
|
|
"learning_rate": 1.616905836545624e-05,
|
|
"loss": 0.1463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14674241840839386,
|
|
"step": 2670,
|
|
"valid_targets_mean": 4538.6,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 4.246031746031746,
|
|
"grad_norm": 0.40233869436893316,
|
|
"learning_rate": 1.6091400963449894e-05,
|
|
"loss": 0.1499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16291236877441406,
|
|
"step": 2675,
|
|
"valid_targets_mean": 5911.8,
|
|
"valid_targets_min": 1450
|
|
},
|
|
{
|
|
"epoch": 4.253968253968254,
|
|
"grad_norm": 0.46919428409154407,
|
|
"learning_rate": 1.6013804782118043e-05,
|
|
"loss": 0.1523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15669263899326324,
|
|
"step": 2680,
|
|
"valid_targets_mean": 4653.7,
|
|
"valid_targets_min": 1195
|
|
},
|
|
{
|
|
"epoch": 4.261904761904762,
|
|
"grad_norm": 0.46975419177737526,
|
|
"learning_rate": 1.5936271036855372e-05,
|
|
"loss": 0.155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15147748589515686,
|
|
"step": 2685,
|
|
"valid_targets_mean": 6341.4,
|
|
"valid_targets_min": 3029
|
|
},
|
|
{
|
|
"epoch": 4.26984126984127,
|
|
"grad_norm": 0.416023668049094,
|
|
"learning_rate": 1.585880094207864e-05,
|
|
"loss": 0.1493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15425267815589905,
|
|
"step": 2690,
|
|
"valid_targets_mean": 5568.8,
|
|
"valid_targets_min": 2137
|
|
},
|
|
{
|
|
"epoch": 4.277777777777778,
|
|
"grad_norm": 0.48941294002103736,
|
|
"learning_rate": 1.5781395711207664e-05,
|
|
"loss": 0.1483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16209441423416138,
|
|
"step": 2695,
|
|
"valid_targets_mean": 5182.4,
|
|
"valid_targets_min": 3065
|
|
},
|
|
{
|
|
"epoch": 4.285714285714286,
|
|
"grad_norm": 0.4233678017768594,
|
|
"learning_rate": 1.5704056556646255e-05,
|
|
"loss": 0.1486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15272748470306396,
|
|
"step": 2700,
|
|
"valid_targets_mean": 6165.9,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 4.2936507936507935,
|
|
"grad_norm": 0.4883272680118086,
|
|
"learning_rate": 1.562678468976329e-05,
|
|
"loss": 0.1558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14731578528881073,
|
|
"step": 2705,
|
|
"valid_targets_mean": 3793.1,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 4.301587301587301,
|
|
"grad_norm": 0.4278181836239168,
|
|
"learning_rate": 1.5549581320873715e-05,
|
|
"loss": 0.1442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14064767956733704,
|
|
"step": 2710,
|
|
"valid_targets_mean": 6504.5,
|
|
"valid_targets_min": 2638
|
|
},
|
|
{
|
|
"epoch": 4.309523809523809,
|
|
"grad_norm": 0.44190091334584447,
|
|
"learning_rate": 1.5472447659219573e-05,
|
|
"loss": 0.1501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1606476753950119,
|
|
"step": 2715,
|
|
"valid_targets_mean": 5205.2,
|
|
"valid_targets_min": 1350
|
|
},
|
|
{
|
|
"epoch": 4.317460317460317,
|
|
"grad_norm": 0.4287418443341925,
|
|
"learning_rate": 1.5395384912951096e-05,
|
|
"loss": 0.1558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.143661230802536,
|
|
"step": 2720,
|
|
"valid_targets_mean": 5759.1,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 4.325396825396825,
|
|
"grad_norm": 0.431052688233739,
|
|
"learning_rate": 1.531839428910774e-05,
|
|
"loss": 0.1431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13569197058677673,
|
|
"step": 2725,
|
|
"valid_targets_mean": 5779.1,
|
|
"valid_targets_min": 2902
|
|
},
|
|
{
|
|
"epoch": 4.333333333333333,
|
|
"grad_norm": 0.4462658898939222,
|
|
"learning_rate": 1.5241476993599318e-05,
|
|
"loss": 0.1399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15712681412696838,
|
|
"step": 2730,
|
|
"valid_targets_mean": 5659.5,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 4.341269841269841,
|
|
"grad_norm": 0.4365063885634691,
|
|
"learning_rate": 1.5164634231187106e-05,
|
|
"loss": 0.1418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14231045544147491,
|
|
"step": 2735,
|
|
"valid_targets_mean": 5411.6,
|
|
"valid_targets_min": 1293
|
|
},
|
|
{
|
|
"epoch": 4.349206349206349,
|
|
"grad_norm": 0.3675606877485401,
|
|
"learning_rate": 1.5087867205464933e-05,
|
|
"loss": 0.1409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12964603304862976,
|
|
"step": 2740,
|
|
"valid_targets_mean": 6364.4,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 4.357142857142857,
|
|
"grad_norm": 0.44160911003260395,
|
|
"learning_rate": 1.5011177118840376e-05,
|
|
"loss": 0.1414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1406039297580719,
|
|
"step": 2745,
|
|
"valid_targets_mean": 5633.9,
|
|
"valid_targets_min": 403
|
|
},
|
|
{
|
|
"epoch": 4.365079365079365,
|
|
"grad_norm": 0.45789145016787364,
|
|
"learning_rate": 1.4934565172515917e-05,
|
|
"loss": 0.1638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16867199540138245,
|
|
"step": 2750,
|
|
"valid_targets_mean": 4970.6,
|
|
"valid_targets_min": 2561
|
|
},
|
|
{
|
|
"epoch": 4.3730158730158735,
|
|
"grad_norm": 0.41734019364461133,
|
|
"learning_rate": 1.4858032566470107e-05,
|
|
"loss": 0.1429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14065919816493988,
|
|
"step": 2755,
|
|
"valid_targets_mean": 6324.4,
|
|
"valid_targets_min": 2455
|
|
},
|
|
{
|
|
"epoch": 4.380952380952381,
|
|
"grad_norm": 0.40415096511524873,
|
|
"learning_rate": 1.4781580499438794e-05,
|
|
"loss": 0.1518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13654825091362,
|
|
"step": 2760,
|
|
"valid_targets_mean": 5964.2,
|
|
"valid_targets_min": 300
|
|
},
|
|
{
|
|
"epoch": 4.388888888888889,
|
|
"grad_norm": 0.40778882084936363,
|
|
"learning_rate": 1.4705210168896327e-05,
|
|
"loss": 0.1517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12639005482196808,
|
|
"step": 2765,
|
|
"valid_targets_mean": 5996.9,
|
|
"valid_targets_min": 2114
|
|
},
|
|
{
|
|
"epoch": 4.396825396825397,
|
|
"grad_norm": 0.44063352236267495,
|
|
"learning_rate": 1.462892277103681e-05,
|
|
"loss": 0.1474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14338403940200806,
|
|
"step": 2770,
|
|
"valid_targets_mean": 5152.0,
|
|
"valid_targets_min": 487
|
|
},
|
|
{
|
|
"epoch": 4.404761904761905,
|
|
"grad_norm": 0.3874565040882736,
|
|
"learning_rate": 1.455271950075539e-05,
|
|
"loss": 0.1383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14000162482261658,
|
|
"step": 2775,
|
|
"valid_targets_mean": 5943.4,
|
|
"valid_targets_min": 3591
|
|
},
|
|
{
|
|
"epoch": 4.412698412698413,
|
|
"grad_norm": 0.3941349722942052,
|
|
"learning_rate": 1.4476601551629493e-05,
|
|
"loss": 0.1488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1478402018547058,
|
|
"step": 2780,
|
|
"valid_targets_mean": 6708.7,
|
|
"valid_targets_min": 3502
|
|
},
|
|
{
|
|
"epoch": 4.420634920634921,
|
|
"grad_norm": 0.41875630131970193,
|
|
"learning_rate": 1.4400570115900147e-05,
|
|
"loss": 0.1636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15849897265434265,
|
|
"step": 2785,
|
|
"valid_targets_mean": 5522.6,
|
|
"valid_targets_min": 865
|
|
},
|
|
{
|
|
"epoch": 4.428571428571429,
|
|
"grad_norm": 0.5412264352868692,
|
|
"learning_rate": 1.4324626384453345e-05,
|
|
"loss": 0.1476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12859734892845154,
|
|
"step": 2790,
|
|
"valid_targets_mean": 4754.4,
|
|
"valid_targets_min": 1324
|
|
},
|
|
{
|
|
"epoch": 4.436507936507937,
|
|
"grad_norm": 0.42316575140054596,
|
|
"learning_rate": 1.4248771546801339e-05,
|
|
"loss": 0.1504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14226463437080383,
|
|
"step": 2795,
|
|
"valid_targets_mean": 5423.4,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 4.444444444444445,
|
|
"grad_norm": 0.431013383627872,
|
|
"learning_rate": 1.4173006791064023e-05,
|
|
"loss": 0.1461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14574524760246277,
|
|
"step": 2800,
|
|
"valid_targets_mean": 5876.1,
|
|
"valid_targets_min": 2317
|
|
},
|
|
{
|
|
"epoch": 4.4523809523809526,
|
|
"grad_norm": 0.4037541081664408,
|
|
"learning_rate": 1.4097333303950368e-05,
|
|
"loss": 0.153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1440451443195343,
|
|
"step": 2805,
|
|
"valid_targets_mean": 5450.2,
|
|
"valid_targets_min": 1747
|
|
},
|
|
{
|
|
"epoch": 4.4603174603174605,
|
|
"grad_norm": 0.3978151834112168,
|
|
"learning_rate": 1.4021752270739759e-05,
|
|
"loss": 0.1533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14280155301094055,
|
|
"step": 2810,
|
|
"valid_targets_mean": 6047.3,
|
|
"valid_targets_min": 2885
|
|
},
|
|
{
|
|
"epoch": 4.468253968253968,
|
|
"grad_norm": 0.39606766776930213,
|
|
"learning_rate": 1.3946264875263485e-05,
|
|
"loss": 0.1494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13499273359775543,
|
|
"step": 2815,
|
|
"valid_targets_mean": 5398.1,
|
|
"valid_targets_min": 881
|
|
},
|
|
{
|
|
"epoch": 4.476190476190476,
|
|
"grad_norm": 0.41951195439714245,
|
|
"learning_rate": 1.3870872299886184e-05,
|
|
"loss": 0.1522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15680038928985596,
|
|
"step": 2820,
|
|
"valid_targets_mean": 6016.8,
|
|
"valid_targets_min": 953
|
|
},
|
|
{
|
|
"epoch": 4.484126984126984,
|
|
"grad_norm": 0.6264261787683508,
|
|
"learning_rate": 1.3795575725487303e-05,
|
|
"loss": 0.1435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16164864599704742,
|
|
"step": 2825,
|
|
"valid_targets_mean": 5240.9,
|
|
"valid_targets_min": 1700
|
|
},
|
|
{
|
|
"epoch": 4.492063492063492,
|
|
"grad_norm": 0.8311634034364538,
|
|
"learning_rate": 1.3720376331442652e-05,
|
|
"loss": 0.1527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18453676998615265,
|
|
"step": 2830,
|
|
"valid_targets_mean": 6361.3,
|
|
"valid_targets_min": 3395
|
|
},
|
|
{
|
|
"epoch": 4.5,
|
|
"grad_norm": 0.42732218297283503,
|
|
"learning_rate": 1.364527529560586e-05,
|
|
"loss": 0.1537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15269696712493896,
|
|
"step": 2835,
|
|
"valid_targets_mean": 5456.8,
|
|
"valid_targets_min": 297
|
|
},
|
|
{
|
|
"epoch": 4.507936507936508,
|
|
"grad_norm": 0.39922258358961354,
|
|
"learning_rate": 1.3570273794289978e-05,
|
|
"loss": 0.1582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15746167302131653,
|
|
"step": 2840,
|
|
"valid_targets_mean": 5865.9,
|
|
"valid_targets_min": 1646
|
|
},
|
|
{
|
|
"epoch": 4.515873015873016,
|
|
"grad_norm": 0.4249607848655103,
|
|
"learning_rate": 1.3495373002249061e-05,
|
|
"loss": 0.1431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14502134919166565,
|
|
"step": 2845,
|
|
"valid_targets_mean": 5561.5,
|
|
"valid_targets_min": 2046
|
|
},
|
|
{
|
|
"epoch": 4.523809523809524,
|
|
"grad_norm": 0.45992538161194857,
|
|
"learning_rate": 1.3420574092659713e-05,
|
|
"loss": 0.1507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13943564891815186,
|
|
"step": 2850,
|
|
"valid_targets_mean": 4809.2,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 4.531746031746032,
|
|
"grad_norm": 0.4849743503805474,
|
|
"learning_rate": 1.3345878237102766e-05,
|
|
"loss": 0.1423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.140847310423851,
|
|
"step": 2855,
|
|
"valid_targets_mean": 4423.5,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 4.5396825396825395,
|
|
"grad_norm": 0.4337324469927627,
|
|
"learning_rate": 1.3271286605544906e-05,
|
|
"loss": 0.1446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1428968906402588,
|
|
"step": 2860,
|
|
"valid_targets_mean": 5831.9,
|
|
"valid_targets_min": 1871
|
|
},
|
|
{
|
|
"epoch": 4.5476190476190474,
|
|
"grad_norm": 0.46560324944354126,
|
|
"learning_rate": 1.3196800366320357e-05,
|
|
"loss": 0.1486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14274823665618896,
|
|
"step": 2865,
|
|
"valid_targets_mean": 4730.6,
|
|
"valid_targets_min": 283
|
|
},
|
|
{
|
|
"epoch": 4.555555555555555,
|
|
"grad_norm": 0.44153707002684167,
|
|
"learning_rate": 1.3122420686112554e-05,
|
|
"loss": 0.1528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14780506491661072,
|
|
"step": 2870,
|
|
"valid_targets_mean": 4814.2,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 4.563492063492063,
|
|
"grad_norm": 0.4539431427613466,
|
|
"learning_rate": 1.3048148729935917e-05,
|
|
"loss": 0.1518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18015024065971375,
|
|
"step": 2875,
|
|
"valid_targets_mean": 5631.1,
|
|
"valid_targets_min": 275
|
|
},
|
|
{
|
|
"epoch": 4.571428571428571,
|
|
"grad_norm": 0.42536670553586153,
|
|
"learning_rate": 1.297398566111756e-05,
|
|
"loss": 0.1435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14803089201450348,
|
|
"step": 2880,
|
|
"valid_targets_mean": 5638.5,
|
|
"valid_targets_min": 2487
|
|
},
|
|
{
|
|
"epoch": 4.579365079365079,
|
|
"grad_norm": 0.42937048670828487,
|
|
"learning_rate": 1.2899932641279082e-05,
|
|
"loss": 0.149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13798286020755768,
|
|
"step": 2885,
|
|
"valid_targets_mean": 4947.8,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 4.587301587301587,
|
|
"grad_norm": 0.4404250376238387,
|
|
"learning_rate": 1.2825990830318395e-05,
|
|
"loss": 0.1533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17519637942314148,
|
|
"step": 2890,
|
|
"valid_targets_mean": 5130.4,
|
|
"valid_targets_min": 1535
|
|
},
|
|
{
|
|
"epoch": 4.595238095238095,
|
|
"grad_norm": 0.4758481215827844,
|
|
"learning_rate": 1.2752161386391526e-05,
|
|
"loss": 0.1427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13737112283706665,
|
|
"step": 2895,
|
|
"valid_targets_mean": 4636.4,
|
|
"valid_targets_min": 293
|
|
},
|
|
{
|
|
"epoch": 4.603174603174603,
|
|
"grad_norm": 0.4767645902681976,
|
|
"learning_rate": 1.2678445465894491e-05,
|
|
"loss": 0.1561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16972827911376953,
|
|
"step": 2900,
|
|
"valid_targets_mean": 4518.6,
|
|
"valid_targets_min": 1195
|
|
},
|
|
{
|
|
"epoch": 4.611111111111111,
|
|
"grad_norm": 0.4257119901155639,
|
|
"learning_rate": 1.2604844223445181e-05,
|
|
"loss": 0.1599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15088322758674622,
|
|
"step": 2905,
|
|
"valid_targets_mean": 5354.5,
|
|
"valid_targets_min": 870
|
|
},
|
|
{
|
|
"epoch": 4.619047619047619,
|
|
"grad_norm": 0.42846734809049425,
|
|
"learning_rate": 1.2531358811865268e-05,
|
|
"loss": 0.1488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15017569065093994,
|
|
"step": 2910,
|
|
"valid_targets_mean": 5312.0,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 4.6269841269841265,
|
|
"grad_norm": 0.4925025724969061,
|
|
"learning_rate": 1.2457990382162173e-05,
|
|
"loss": 0.1437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1452239453792572,
|
|
"step": 2915,
|
|
"valid_targets_mean": 4835.2,
|
|
"valid_targets_min": 656
|
|
},
|
|
{
|
|
"epoch": 4.634920634920634,
|
|
"grad_norm": 0.4662611029234124,
|
|
"learning_rate": 1.238474008351101e-05,
|
|
"loss": 0.1533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15926414728164673,
|
|
"step": 2920,
|
|
"valid_targets_mean": 5120.9,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 4.642857142857143,
|
|
"grad_norm": 0.4322145630104494,
|
|
"learning_rate": 1.2311609063236594e-05,
|
|
"loss": 0.1413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1390167623758316,
|
|
"step": 2925,
|
|
"valid_targets_mean": 4672.6,
|
|
"valid_targets_min": 1746
|
|
},
|
|
{
|
|
"epoch": 4.650793650793651,
|
|
"grad_norm": 0.4130969360637945,
|
|
"learning_rate": 1.2238598466795493e-05,
|
|
"loss": 0.1386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12715992331504822,
|
|
"step": 2930,
|
|
"valid_targets_mean": 5706.3,
|
|
"valid_targets_min": 1765
|
|
},
|
|
{
|
|
"epoch": 4.658730158730159,
|
|
"grad_norm": 0.3829382672782595,
|
|
"learning_rate": 1.2165709437758042e-05,
|
|
"loss": 0.1426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14774689078330994,
|
|
"step": 2935,
|
|
"valid_targets_mean": 6549.7,
|
|
"valid_targets_min": 3799
|
|
},
|
|
{
|
|
"epoch": 4.666666666666667,
|
|
"grad_norm": 0.38939915661022567,
|
|
"learning_rate": 1.209294311779047e-05,
|
|
"loss": 0.1519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14104187488555908,
|
|
"step": 2940,
|
|
"valid_targets_mean": 6312.2,
|
|
"valid_targets_min": 2275
|
|
},
|
|
{
|
|
"epoch": 4.674603174603175,
|
|
"grad_norm": 0.43181417781915576,
|
|
"learning_rate": 1.2020300646637018e-05,
|
|
"loss": 0.1499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11396326869726181,
|
|
"step": 2945,
|
|
"valid_targets_mean": 5581.9,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 4.682539682539683,
|
|
"grad_norm": 0.44410129386120123,
|
|
"learning_rate": 1.1947783162102043e-05,
|
|
"loss": 0.1474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1418922394514084,
|
|
"step": 2950,
|
|
"valid_targets_mean": 4655.8,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 4.690476190476191,
|
|
"grad_norm": 0.48458127033747794,
|
|
"learning_rate": 1.1875391800032248e-05,
|
|
"loss": 0.1577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1841154396533966,
|
|
"step": 2955,
|
|
"valid_targets_mean": 4632.9,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 4.698412698412699,
|
|
"grad_norm": 0.4032339427441577,
|
|
"learning_rate": 1.1803127694298873e-05,
|
|
"loss": 0.1426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14213016629219055,
|
|
"step": 2960,
|
|
"valid_targets_mean": 5584.8,
|
|
"valid_targets_min": 934
|
|
},
|
|
{
|
|
"epoch": 4.7063492063492065,
|
|
"grad_norm": 0.4419975398132066,
|
|
"learning_rate": 1.173099197677992e-05,
|
|
"loss": 0.1437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1430945247411728,
|
|
"step": 2965,
|
|
"valid_targets_mean": 5749.8,
|
|
"valid_targets_min": 2081
|
|
},
|
|
{
|
|
"epoch": 4.714285714285714,
|
|
"grad_norm": 0.43503178571092443,
|
|
"learning_rate": 1.1658985777342458e-05,
|
|
"loss": 0.1449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14752447605133057,
|
|
"step": 2970,
|
|
"valid_targets_mean": 5720.0,
|
|
"valid_targets_min": 1862
|
|
},
|
|
{
|
|
"epoch": 4.722222222222222,
|
|
"grad_norm": 0.42023994518841934,
|
|
"learning_rate": 1.1587110223824874e-05,
|
|
"loss": 0.1585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15875454246997833,
|
|
"step": 2975,
|
|
"valid_targets_mean": 5639.4,
|
|
"valid_targets_min": 1270
|
|
},
|
|
{
|
|
"epoch": 4.73015873015873,
|
|
"grad_norm": 0.37363144454032915,
|
|
"learning_rate": 1.151536644201925e-05,
|
|
"loss": 0.1351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11643218994140625,
|
|
"step": 2980,
|
|
"valid_targets_mean": 5878.9,
|
|
"valid_targets_min": 1841
|
|
},
|
|
{
|
|
"epoch": 4.738095238095238,
|
|
"grad_norm": 0.44030969849082424,
|
|
"learning_rate": 1.1443755555653751e-05,
|
|
"loss": 0.1461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15411710739135742,
|
|
"step": 2985,
|
|
"valid_targets_mean": 4914.2,
|
|
"valid_targets_min": 1221
|
|
},
|
|
{
|
|
"epoch": 4.746031746031746,
|
|
"grad_norm": 0.41262993153981603,
|
|
"learning_rate": 1.1372278686374935e-05,
|
|
"loss": 0.1457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1678827404975891,
|
|
"step": 2990,
|
|
"valid_targets_mean": 5873.3,
|
|
"valid_targets_min": 1320
|
|
},
|
|
{
|
|
"epoch": 4.753968253968254,
|
|
"grad_norm": 0.4311749330104684,
|
|
"learning_rate": 1.1300936953730273e-05,
|
|
"loss": 0.1535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.143174409866333,
|
|
"step": 2995,
|
|
"valid_targets_mean": 5119.6,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 4.761904761904762,
|
|
"grad_norm": 0.361954540458806,
|
|
"learning_rate": 1.1229731475150594e-05,
|
|
"loss": 0.1523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13922378420829773,
|
|
"step": 3000,
|
|
"valid_targets_mean": 6611.1,
|
|
"valid_targets_min": 3655
|
|
},
|
|
{
|
|
"epoch": 4.76984126984127,
|
|
"grad_norm": 0.3966366790173581,
|
|
"learning_rate": 1.1158663365932529e-05,
|
|
"loss": 0.1448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15058225393295288,
|
|
"step": 3005,
|
|
"valid_targets_mean": 5934.8,
|
|
"valid_targets_min": 3133
|
|
},
|
|
{
|
|
"epoch": 4.777777777777778,
|
|
"grad_norm": 0.4612543471363477,
|
|
"learning_rate": 1.1087733739221109e-05,
|
|
"loss": 0.15,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15629416704177856,
|
|
"step": 3010,
|
|
"valid_targets_mean": 4712.2,
|
|
"valid_targets_min": 285
|
|
},
|
|
{
|
|
"epoch": 4.785714285714286,
|
|
"grad_norm": 0.38989272621213816,
|
|
"learning_rate": 1.1016943705992311e-05,
|
|
"loss": 0.1496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1398439258337021,
|
|
"step": 3015,
|
|
"valid_targets_mean": 6174.9,
|
|
"valid_targets_min": 2064
|
|
},
|
|
{
|
|
"epoch": 4.7936507936507935,
|
|
"grad_norm": 0.4371800342017838,
|
|
"learning_rate": 1.0946294375035639e-05,
|
|
"loss": 0.1503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1522529125213623,
|
|
"step": 3020,
|
|
"valid_targets_mean": 4937.9,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 4.801587301587301,
|
|
"grad_norm": 0.5205141553470364,
|
|
"learning_rate": 1.087578685293674e-05,
|
|
"loss": 0.1497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15628096461296082,
|
|
"step": 3025,
|
|
"valid_targets_mean": 5111.9,
|
|
"valid_targets_min": 295
|
|
},
|
|
{
|
|
"epoch": 4.809523809523809,
|
|
"grad_norm": 0.4077302471049976,
|
|
"learning_rate": 1.080542224406015e-05,
|
|
"loss": 0.1458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14736653864383698,
|
|
"step": 3030,
|
|
"valid_targets_mean": 5978.1,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 4.817460317460317,
|
|
"grad_norm": 0.4091248100895989,
|
|
"learning_rate": 1.0735201650531915e-05,
|
|
"loss": 0.1473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1352832168340683,
|
|
"step": 3035,
|
|
"valid_targets_mean": 5664.2,
|
|
"valid_targets_min": 2353
|
|
},
|
|
{
|
|
"epoch": 4.825396825396825,
|
|
"grad_norm": 0.3968540500697524,
|
|
"learning_rate": 1.066512617222235e-05,
|
|
"loss": 0.1401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12752054631710052,
|
|
"step": 3040,
|
|
"valid_targets_mean": 6093.6,
|
|
"valid_targets_min": 1203
|
|
},
|
|
{
|
|
"epoch": 4.833333333333333,
|
|
"grad_norm": 0.3642854376590935,
|
|
"learning_rate": 1.059519690672884e-05,
|
|
"loss": 0.1447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15932929515838623,
|
|
"step": 3045,
|
|
"valid_targets_mean": 7128.1,
|
|
"valid_targets_min": 2558
|
|
},
|
|
{
|
|
"epoch": 4.841269841269841,
|
|
"grad_norm": 0.38602348967427114,
|
|
"learning_rate": 1.0525414949358614e-05,
|
|
"loss": 0.1473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1197904720902443,
|
|
"step": 3050,
|
|
"valid_targets_mean": 5888.0,
|
|
"valid_targets_min": 2560
|
|
},
|
|
{
|
|
"epoch": 4.849206349206349,
|
|
"grad_norm": 0.510022835675847,
|
|
"learning_rate": 1.0455781393111613e-05,
|
|
"loss": 0.1471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14147284626960754,
|
|
"step": 3055,
|
|
"valid_targets_mean": 5739.3,
|
|
"valid_targets_min": 1793
|
|
},
|
|
{
|
|
"epoch": 4.857142857142857,
|
|
"grad_norm": 0.6751258587414498,
|
|
"learning_rate": 1.0386297328663353e-05,
|
|
"loss": 0.1507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16436898708343506,
|
|
"step": 3060,
|
|
"valid_targets_mean": 6290.2,
|
|
"valid_targets_min": 440
|
|
},
|
|
{
|
|
"epoch": 4.865079365079366,
|
|
"grad_norm": 0.4424084854510193,
|
|
"learning_rate": 1.0316963844347843e-05,
|
|
"loss": 0.1522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15425124764442444,
|
|
"step": 3065,
|
|
"valid_targets_mean": 5844.6,
|
|
"valid_targets_min": 2170
|
|
},
|
|
{
|
|
"epoch": 4.8730158730158735,
|
|
"grad_norm": 0.4168794838811193,
|
|
"learning_rate": 1.0247782026140576e-05,
|
|
"loss": 0.1461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13188251852989197,
|
|
"step": 3070,
|
|
"valid_targets_mean": 5396.5,
|
|
"valid_targets_min": 1348
|
|
},
|
|
{
|
|
"epoch": 4.880952380952381,
|
|
"grad_norm": 0.49315142334278483,
|
|
"learning_rate": 1.017875295764144e-05,
|
|
"loss": 0.1503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13740713894367218,
|
|
"step": 3075,
|
|
"valid_targets_mean": 5882.2,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 4.888888888888889,
|
|
"grad_norm": 0.4543689569217225,
|
|
"learning_rate": 1.0109877720057818e-05,
|
|
"loss": 0.1567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16015523672103882,
|
|
"step": 3080,
|
|
"valid_targets_mean": 5451.6,
|
|
"valid_targets_min": 2056
|
|
},
|
|
{
|
|
"epoch": 4.896825396825397,
|
|
"grad_norm": 0.36802504019714555,
|
|
"learning_rate": 1.0041157392187651e-05,
|
|
"loss": 0.145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11829912662506104,
|
|
"step": 3085,
|
|
"valid_targets_mean": 5831.1,
|
|
"valid_targets_min": 2830
|
|
},
|
|
{
|
|
"epoch": 4.904761904761905,
|
|
"grad_norm": 0.3966134482676806,
|
|
"learning_rate": 9.972593050402471e-06,
|
|
"loss": 0.1412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14243818819522858,
|
|
"step": 3090,
|
|
"valid_targets_mean": 6406.8,
|
|
"valid_targets_min": 1428
|
|
},
|
|
{
|
|
"epoch": 4.912698412698413,
|
|
"grad_norm": 0.4183961944811586,
|
|
"learning_rate": 9.904185768630612e-06,
|
|
"loss": 0.1418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16794633865356445,
|
|
"step": 3095,
|
|
"valid_targets_mean": 6009.8,
|
|
"valid_targets_min": 3070
|
|
},
|
|
{
|
|
"epoch": 4.920634920634921,
|
|
"grad_norm": 0.44212723748685295,
|
|
"learning_rate": 9.835936618340377e-06,
|
|
"loss": 0.1541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14964662492275238,
|
|
"step": 3100,
|
|
"valid_targets_mean": 4590.1,
|
|
"valid_targets_min": 1122
|
|
},
|
|
{
|
|
"epoch": 4.928571428571429,
|
|
"grad_norm": 0.41070340267056166,
|
|
"learning_rate": 9.76784666852323e-06,
|
|
"loss": 0.1447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12387900054454803,
|
|
"step": 3105,
|
|
"valid_targets_mean": 6402.7,
|
|
"valid_targets_min": 3540
|
|
},
|
|
{
|
|
"epoch": 4.936507936507937,
|
|
"grad_norm": 0.44233711675764437,
|
|
"learning_rate": 9.699916985677062e-06,
|
|
"loss": 0.1482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1574784517288208,
|
|
"step": 3110,
|
|
"valid_targets_mean": 4382.4,
|
|
"valid_targets_min": 920
|
|
},
|
|
{
|
|
"epoch": 4.944444444444445,
|
|
"grad_norm": 0.46887051592133433,
|
|
"learning_rate": 9.6321486337895e-06,
|
|
"loss": 0.151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15827694535255432,
|
|
"step": 3115,
|
|
"valid_targets_mean": 5222.6,
|
|
"valid_targets_min": 1313
|
|
},
|
|
{
|
|
"epoch": 4.9523809523809526,
|
|
"grad_norm": 0.41511980613828253,
|
|
"learning_rate": 9.564542674321228e-06,
|
|
"loss": 0.1479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14313764870166779,
|
|
"step": 3120,
|
|
"valid_targets_mean": 5663.2,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 4.9603174603174605,
|
|
"grad_norm": 0.4621864142346036,
|
|
"learning_rate": 9.49710016618937e-06,
|
|
"loss": 0.1517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15151920914649963,
|
|
"step": 3125,
|
|
"valid_targets_mean": 5380.6,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 4.968253968253968,
|
|
"grad_norm": 0.48527019717345327,
|
|
"learning_rate": 9.429822165750893e-06,
|
|
"loss": 0.1509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14661577343940735,
|
|
"step": 3130,
|
|
"valid_targets_mean": 4398.2,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 4.976190476190476,
|
|
"grad_norm": 0.3831996919641483,
|
|
"learning_rate": 9.36270972678607e-06,
|
|
"loss": 0.1398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13461735844612122,
|
|
"step": 3135,
|
|
"valid_targets_mean": 6052.1,
|
|
"valid_targets_min": 1578
|
|
},
|
|
{
|
|
"epoch": 4.984126984126984,
|
|
"grad_norm": 0.4100072649060219,
|
|
"learning_rate": 9.295763900481977e-06,
|
|
"loss": 0.1449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12445595860481262,
|
|
"step": 3140,
|
|
"valid_targets_mean": 5427.4,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 4.992063492063492,
|
|
"grad_norm": 0.46254552204793853,
|
|
"learning_rate": 9.22898573541602e-06,
|
|
"loss": 0.1485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16388916969299316,
|
|
"step": 3145,
|
|
"valid_targets_mean": 5861.9,
|
|
"valid_targets_min": 2467
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.40795669884699387,
|
|
"learning_rate": 9.162376277539513e-06,
|
|
"loss": 0.1439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15090236067771912,
|
|
"step": 3150,
|
|
"valid_targets_mean": 5592.5,
|
|
"valid_targets_min": 1990
|
|
},
|
|
{
|
|
"epoch": 5.007936507936508,
|
|
"grad_norm": 0.3857823068068869,
|
|
"learning_rate": 9.095936570161301e-06,
|
|
"loss": 0.138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14320944249629974,
|
|
"step": 3155,
|
|
"valid_targets_mean": 5639.6,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 5.015873015873016,
|
|
"grad_norm": 0.5435449453461013,
|
|
"learning_rate": 9.029667653931411e-06,
|
|
"loss": 0.1325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1401306390762329,
|
|
"step": 3160,
|
|
"valid_targets_mean": 5760.2,
|
|
"valid_targets_min": 371
|
|
},
|
|
{
|
|
"epoch": 5.023809523809524,
|
|
"grad_norm": 0.6789857642195729,
|
|
"learning_rate": 8.96357056682475e-06,
|
|
"loss": 0.1367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12801998853683472,
|
|
"step": 3165,
|
|
"valid_targets_mean": 5610.3,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 5.031746031746032,
|
|
"grad_norm": 0.38873846063838485,
|
|
"learning_rate": 8.897646344124882e-06,
|
|
"loss": 0.1304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11531123518943787,
|
|
"step": 3170,
|
|
"valid_targets_mean": 6343.7,
|
|
"valid_targets_min": 2461
|
|
},
|
|
{
|
|
"epoch": 5.0396825396825395,
|
|
"grad_norm": 0.4632360566976383,
|
|
"learning_rate": 8.83189601840773e-06,
|
|
"loss": 0.1416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14523249864578247,
|
|
"step": 3175,
|
|
"valid_targets_mean": 5589.1,
|
|
"valid_targets_min": 1310
|
|
},
|
|
{
|
|
"epoch": 5.0476190476190474,
|
|
"grad_norm": 0.44480606265136036,
|
|
"learning_rate": 8.766320619525511e-06,
|
|
"loss": 0.1405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1437515914440155,
|
|
"step": 3180,
|
|
"valid_targets_mean": 5137.6,
|
|
"valid_targets_min": 1450
|
|
},
|
|
{
|
|
"epoch": 5.055555555555555,
|
|
"grad_norm": 0.40620681435947337,
|
|
"learning_rate": 8.700921174590525e-06,
|
|
"loss": 0.1357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13447782397270203,
|
|
"step": 3185,
|
|
"valid_targets_mean": 6346.1,
|
|
"valid_targets_min": 4150
|
|
},
|
|
{
|
|
"epoch": 5.063492063492063,
|
|
"grad_norm": 0.46662620571823504,
|
|
"learning_rate": 8.63569870795907e-06,
|
|
"loss": 0.138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14662285149097443,
|
|
"step": 3190,
|
|
"valid_targets_mean": 6060.2,
|
|
"valid_targets_min": 1308
|
|
},
|
|
{
|
|
"epoch": 5.071428571428571,
|
|
"grad_norm": 0.4654130995805731,
|
|
"learning_rate": 8.570654241215466e-06,
|
|
"loss": 0.1393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14438563585281372,
|
|
"step": 3195,
|
|
"valid_targets_mean": 5318.1,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 5.079365079365079,
|
|
"grad_norm": 0.40240677129462593,
|
|
"learning_rate": 8.505788793155978e-06,
|
|
"loss": 0.1285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11840318888425827,
|
|
"step": 3200,
|
|
"valid_targets_mean": 6147.7,
|
|
"valid_targets_min": 403
|
|
},
|
|
{
|
|
"epoch": 5.087301587301587,
|
|
"grad_norm": 0.5552645220273652,
|
|
"learning_rate": 8.441103379772893e-06,
|
|
"loss": 0.1433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15520301461219788,
|
|
"step": 3205,
|
|
"valid_targets_mean": 5761.9,
|
|
"valid_targets_min": 1220
|
|
},
|
|
{
|
|
"epoch": 5.095238095238095,
|
|
"grad_norm": 0.4847744229611747,
|
|
"learning_rate": 8.376599014238605e-06,
|
|
"loss": 0.1524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12781260907649994,
|
|
"step": 3210,
|
|
"valid_targets_mean": 5049.5,
|
|
"valid_targets_min": 2182
|
|
},
|
|
{
|
|
"epoch": 5.103174603174603,
|
|
"grad_norm": 0.4448058887555173,
|
|
"learning_rate": 8.312276706889738e-06,
|
|
"loss": 0.1409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14810214936733246,
|
|
"step": 3215,
|
|
"valid_targets_mean": 4893.2,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 5.111111111111111,
|
|
"grad_norm": 0.3769447002552102,
|
|
"learning_rate": 8.24813746521133e-06,
|
|
"loss": 0.1442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11988615989685059,
|
|
"step": 3220,
|
|
"valid_targets_mean": 6354.1,
|
|
"valid_targets_min": 552
|
|
},
|
|
{
|
|
"epoch": 5.119047619047619,
|
|
"grad_norm": 0.40775763269992454,
|
|
"learning_rate": 8.184182293821046e-06,
|
|
"loss": 0.1335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13774478435516357,
|
|
"step": 3225,
|
|
"valid_targets_mean": 6155.4,
|
|
"valid_targets_min": 1769
|
|
},
|
|
{
|
|
"epoch": 5.1269841269841265,
|
|
"grad_norm": 0.4621791306803943,
|
|
"learning_rate": 8.120412194453442e-06,
|
|
"loss": 0.1297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13131381571292877,
|
|
"step": 3230,
|
|
"valid_targets_mean": 5662.6,
|
|
"valid_targets_min": 1704
|
|
},
|
|
{
|
|
"epoch": 5.134920634920635,
|
|
"grad_norm": 0.47477190317994294,
|
|
"learning_rate": 8.056828165944282e-06,
|
|
"loss": 0.1351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11765207350254059,
|
|
"step": 3235,
|
|
"valid_targets_mean": 4793.4,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 5.142857142857143,
|
|
"grad_norm": 0.4211817590749248,
|
|
"learning_rate": 7.993431204214883e-06,
|
|
"loss": 0.144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.140112042427063,
|
|
"step": 3240,
|
|
"valid_targets_mean": 6042.8,
|
|
"valid_targets_min": 2069
|
|
},
|
|
{
|
|
"epoch": 5.150793650793651,
|
|
"grad_norm": 0.4100200812051378,
|
|
"learning_rate": 7.93022230225652e-06,
|
|
"loss": 0.1364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1306135654449463,
|
|
"step": 3245,
|
|
"valid_targets_mean": 5446.9,
|
|
"valid_targets_min": 1408
|
|
},
|
|
{
|
|
"epoch": 5.158730158730159,
|
|
"grad_norm": 0.4654477832902761,
|
|
"learning_rate": 7.867202450114892e-06,
|
|
"loss": 0.1469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1470104455947876,
|
|
"step": 3250,
|
|
"valid_targets_mean": 4906.9,
|
|
"valid_targets_min": 483
|
|
},
|
|
{
|
|
"epoch": 5.166666666666667,
|
|
"grad_norm": 0.523468892447528,
|
|
"learning_rate": 7.804372634874582e-06,
|
|
"loss": 0.1356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13795553147792816,
|
|
"step": 3255,
|
|
"valid_targets_mean": 4284.5,
|
|
"valid_targets_min": 1195
|
|
},
|
|
{
|
|
"epoch": 5.174603174603175,
|
|
"grad_norm": 0.6078996381505655,
|
|
"learning_rate": 7.74173384064359e-06,
|
|
"loss": 0.1355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1462775468826294,
|
|
"step": 3260,
|
|
"valid_targets_mean": 5041.4,
|
|
"valid_targets_min": 1305
|
|
},
|
|
{
|
|
"epoch": 5.182539682539683,
|
|
"grad_norm": 0.4607675067323951,
|
|
"learning_rate": 7.679287048537987e-06,
|
|
"loss": 0.1433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1391090750694275,
|
|
"step": 3265,
|
|
"valid_targets_mean": 5745.2,
|
|
"valid_targets_min": 336
|
|
},
|
|
{
|
|
"epoch": 5.190476190476191,
|
|
"grad_norm": 0.4923341770949091,
|
|
"learning_rate": 7.617033236666469e-06,
|
|
"loss": 0.1331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1512683480978012,
|
|
"step": 3270,
|
|
"valid_targets_mean": 4856.6,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 5.198412698412699,
|
|
"grad_norm": 0.4823935955118556,
|
|
"learning_rate": 7.55497338011506e-06,
|
|
"loss": 0.133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12928101420402527,
|
|
"step": 3275,
|
|
"valid_targets_mean": 5083.1,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 5.2063492063492065,
|
|
"grad_norm": 0.4571859304803118,
|
|
"learning_rate": 7.493108450931879e-06,
|
|
"loss": 0.1409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.147546648979187,
|
|
"step": 3280,
|
|
"valid_targets_mean": 5298.1,
|
|
"valid_targets_min": 584
|
|
},
|
|
{
|
|
"epoch": 5.214285714285714,
|
|
"grad_norm": 0.4656323081387472,
|
|
"learning_rate": 7.4314394181118636e-06,
|
|
"loss": 0.1455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13368697464466095,
|
|
"step": 3285,
|
|
"valid_targets_mean": 5323.8,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 5.222222222222222,
|
|
"grad_norm": 0.47778320363976773,
|
|
"learning_rate": 7.369967247581611e-06,
|
|
"loss": 0.1363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14050470292568207,
|
|
"step": 3290,
|
|
"valid_targets_mean": 4988.6,
|
|
"valid_targets_min": 1255
|
|
},
|
|
{
|
|
"epoch": 5.23015873015873,
|
|
"grad_norm": 0.47185902185121587,
|
|
"learning_rate": 7.3086929021842575e-06,
|
|
"loss": 0.1366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14221060276031494,
|
|
"step": 3295,
|
|
"valid_targets_mean": 4921.3,
|
|
"valid_targets_min": 778
|
|
},
|
|
{
|
|
"epoch": 5.238095238095238,
|
|
"grad_norm": 0.40677949622794063,
|
|
"learning_rate": 7.247617341664384e-06,
|
|
"loss": 0.141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15191778540611267,
|
|
"step": 3300,
|
|
"valid_targets_mean": 6783.2,
|
|
"valid_targets_min": 2108
|
|
},
|
|
{
|
|
"epoch": 5.246031746031746,
|
|
"grad_norm": 0.5597275095189963,
|
|
"learning_rate": 7.186741522652994e-06,
|
|
"loss": 0.1366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1412215381860733,
|
|
"step": 3305,
|
|
"valid_targets_mean": 5306.6,
|
|
"valid_targets_min": 1796
|
|
},
|
|
{
|
|
"epoch": 5.253968253968254,
|
|
"grad_norm": 0.4136667615907917,
|
|
"learning_rate": 7.12606639865252e-06,
|
|
"loss": 0.1349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13404718041419983,
|
|
"step": 3310,
|
|
"valid_targets_mean": 5412.3,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 5.261904761904762,
|
|
"grad_norm": 0.5072282174178516,
|
|
"learning_rate": 7.065592920021893e-06,
|
|
"loss": 0.1453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1653522104024887,
|
|
"step": 3315,
|
|
"valid_targets_mean": 5170.6,
|
|
"valid_targets_min": 1293
|
|
},
|
|
{
|
|
"epoch": 5.26984126984127,
|
|
"grad_norm": 0.43649425013960674,
|
|
"learning_rate": 7.005322033961679e-06,
|
|
"loss": 0.1421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14359837770462036,
|
|
"step": 3320,
|
|
"valid_targets_mean": 5731.9,
|
|
"valid_targets_min": 2041
|
|
},
|
|
{
|
|
"epoch": 5.277777777777778,
|
|
"grad_norm": 0.4609844808793299,
|
|
"learning_rate": 6.945254684499185e-06,
|
|
"loss": 0.1388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14887037873268127,
|
|
"step": 3325,
|
|
"valid_targets_mean": 5072.8,
|
|
"valid_targets_min": 565
|
|
},
|
|
{
|
|
"epoch": 5.285714285714286,
|
|
"grad_norm": 0.4423681058460165,
|
|
"learning_rate": 6.8853918124737274e-06,
|
|
"loss": 0.1378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1400473713874817,
|
|
"step": 3330,
|
|
"valid_targets_mean": 5419.5,
|
|
"valid_targets_min": 461
|
|
},
|
|
{
|
|
"epoch": 5.2936507936507935,
|
|
"grad_norm": 0.4833740463102983,
|
|
"learning_rate": 6.825734355521898e-06,
|
|
"loss": 0.1438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1373916119337082,
|
|
"step": 3335,
|
|
"valid_targets_mean": 5413.9,
|
|
"valid_targets_min": 285
|
|
},
|
|
{
|
|
"epoch": 5.301587301587301,
|
|
"grad_norm": 0.46607539050074326,
|
|
"learning_rate": 6.766283248062817e-06,
|
|
"loss": 0.1444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14503991603851318,
|
|
"step": 3340,
|
|
"valid_targets_mean": 5005.1,
|
|
"valid_targets_min": 1372
|
|
},
|
|
{
|
|
"epoch": 5.309523809523809,
|
|
"grad_norm": 0.46602580090763035,
|
|
"learning_rate": 6.707039421283559e-06,
|
|
"loss": 0.1362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1407843828201294,
|
|
"step": 3345,
|
|
"valid_targets_mean": 4867.7,
|
|
"valid_targets_min": 279
|
|
},
|
|
{
|
|
"epoch": 5.317460317460317,
|
|
"grad_norm": 0.4977380166852099,
|
|
"learning_rate": 6.648003803124559e-06,
|
|
"loss": 0.1417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18442052602767944,
|
|
"step": 3350,
|
|
"valid_targets_mean": 4517.1,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 5.325396825396825,
|
|
"grad_norm": 0.45300213871690015,
|
|
"learning_rate": 6.589177318265047e-06,
|
|
"loss": 0.133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13406887650489807,
|
|
"step": 3355,
|
|
"valid_targets_mean": 5391.9,
|
|
"valid_targets_min": 501
|
|
},
|
|
{
|
|
"epoch": 5.333333333333333,
|
|
"grad_norm": 0.4255524573366613,
|
|
"learning_rate": 6.53056088810857e-06,
|
|
"loss": 0.1469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14955641329288483,
|
|
"step": 3360,
|
|
"valid_targets_mean": 5542.8,
|
|
"valid_targets_min": 2458
|
|
},
|
|
{
|
|
"epoch": 5.341269841269841,
|
|
"grad_norm": 0.5534775234199173,
|
|
"learning_rate": 6.472155430768608e-06,
|
|
"loss": 0.1378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12088638544082642,
|
|
"step": 3365,
|
|
"valid_targets_mean": 6366.4,
|
|
"valid_targets_min": 1428
|
|
},
|
|
{
|
|
"epoch": 5.349206349206349,
|
|
"grad_norm": 0.4364437395513567,
|
|
"learning_rate": 6.413961861054132e-06,
|
|
"loss": 0.1329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15652699768543243,
|
|
"step": 3370,
|
|
"valid_targets_mean": 5471.2,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 5.357142857142857,
|
|
"grad_norm": 0.45770813360412227,
|
|
"learning_rate": 6.3559810904553095e-06,
|
|
"loss": 0.1361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14036116003990173,
|
|
"step": 3375,
|
|
"valid_targets_mean": 5520.9,
|
|
"valid_targets_min": 256
|
|
},
|
|
{
|
|
"epoch": 5.365079365079365,
|
|
"grad_norm": 0.4153555346953972,
|
|
"learning_rate": 6.298214027129219e-06,
|
|
"loss": 0.1341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13151246309280396,
|
|
"step": 3380,
|
|
"valid_targets_mean": 6010.9,
|
|
"valid_targets_min": 263
|
|
},
|
|
{
|
|
"epoch": 5.3730158730158735,
|
|
"grad_norm": 0.4451350722143499,
|
|
"learning_rate": 6.240661575885629e-06,
|
|
"loss": 0.1374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1685609072446823,
|
|
"step": 3385,
|
|
"valid_targets_mean": 5214.2,
|
|
"valid_targets_min": 763
|
|
},
|
|
{
|
|
"epoch": 5.380952380952381,
|
|
"grad_norm": 0.46292596088927307,
|
|
"learning_rate": 6.183324638172819e-06,
|
|
"loss": 0.1438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15203514695167542,
|
|
"step": 3390,
|
|
"valid_targets_mean": 5266.0,
|
|
"valid_targets_min": 861
|
|
},
|
|
{
|
|
"epoch": 5.388888888888889,
|
|
"grad_norm": 0.4676464235157195,
|
|
"learning_rate": 6.126204112063463e-06,
|
|
"loss": 0.1353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13088193535804749,
|
|
"step": 3395,
|
|
"valid_targets_mean": 4737.1,
|
|
"valid_targets_min": 1121
|
|
},
|
|
{
|
|
"epoch": 5.396825396825397,
|
|
"grad_norm": 0.6797962177962652,
|
|
"learning_rate": 6.069300892240564e-06,
|
|
"loss": 0.1514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16591420769691467,
|
|
"step": 3400,
|
|
"valid_targets_mean": 4606.3,
|
|
"valid_targets_min": 377
|
|
},
|
|
{
|
|
"epoch": 5.404761904761905,
|
|
"grad_norm": 0.47267093685643496,
|
|
"learning_rate": 6.0126158699834625e-06,
|
|
"loss": 0.146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13353607058525085,
|
|
"step": 3405,
|
|
"valid_targets_mean": 5052.6,
|
|
"valid_targets_min": 2020
|
|
},
|
|
{
|
|
"epoch": 5.412698412698413,
|
|
"grad_norm": 0.428442687908954,
|
|
"learning_rate": 5.956149933153816e-06,
|
|
"loss": 0.1318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12273760139942169,
|
|
"step": 3410,
|
|
"valid_targets_mean": 5588.0,
|
|
"valid_targets_min": 2053
|
|
},
|
|
{
|
|
"epoch": 5.420634920634921,
|
|
"grad_norm": 0.4412367255518039,
|
|
"learning_rate": 5.899903966181751e-06,
|
|
"loss": 0.1398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14523178339004517,
|
|
"step": 3415,
|
|
"valid_targets_mean": 5505.2,
|
|
"valid_targets_min": 2042
|
|
},
|
|
{
|
|
"epoch": 5.428571428571429,
|
|
"grad_norm": 0.43781568976305363,
|
|
"learning_rate": 5.843878850052007e-06,
|
|
"loss": 0.1261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14099499583244324,
|
|
"step": 3420,
|
|
"valid_targets_mean": 5586.9,
|
|
"valid_targets_min": 839
|
|
},
|
|
{
|
|
"epoch": 5.436507936507937,
|
|
"grad_norm": 0.46059817758019467,
|
|
"learning_rate": 5.788075462290084e-06,
|
|
"loss": 0.1333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1432957500219345,
|
|
"step": 3425,
|
|
"valid_targets_mean": 4740.2,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 5.444444444444445,
|
|
"grad_norm": 0.415134511085044,
|
|
"learning_rate": 5.732494676948554e-06,
|
|
"loss": 0.1303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12942150235176086,
|
|
"step": 3430,
|
|
"valid_targets_mean": 6054.6,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 5.4523809523809526,
|
|
"grad_norm": 0.41001838037825894,
|
|
"learning_rate": 5.677137364593363e-06,
|
|
"loss": 0.1379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14375941455364227,
|
|
"step": 3435,
|
|
"valid_targets_mean": 5782.4,
|
|
"valid_targets_min": 2362
|
|
},
|
|
{
|
|
"epoch": 5.4603174603174605,
|
|
"grad_norm": 0.4462203270648477,
|
|
"learning_rate": 5.622004392290163e-06,
|
|
"loss": 0.1409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13974609971046448,
|
|
"step": 3440,
|
|
"valid_targets_mean": 5166.1,
|
|
"valid_targets_min": 1105
|
|
},
|
|
{
|
|
"epoch": 5.468253968253968,
|
|
"grad_norm": 0.5621286314149347,
|
|
"learning_rate": 5.567096623590758e-06,
|
|
"loss": 0.1432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14251437783241272,
|
|
"step": 3445,
|
|
"valid_targets_mean": 5521.8,
|
|
"valid_targets_min": 1822
|
|
},
|
|
{
|
|
"epoch": 5.476190476190476,
|
|
"grad_norm": 0.3951253389087575,
|
|
"learning_rate": 5.512414918519573e-06,
|
|
"loss": 0.1375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.138666570186615,
|
|
"step": 3450,
|
|
"valid_targets_mean": 6410.9,
|
|
"valid_targets_min": 2903
|
|
},
|
|
{
|
|
"epoch": 5.484126984126984,
|
|
"grad_norm": 0.42978600879401685,
|
|
"learning_rate": 5.457960133560179e-06,
|
|
"loss": 0.1395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13873498141765594,
|
|
"step": 3455,
|
|
"valid_targets_mean": 5563.0,
|
|
"valid_targets_min": 2443
|
|
},
|
|
{
|
|
"epoch": 5.492063492063492,
|
|
"grad_norm": 0.4763755663272107,
|
|
"learning_rate": 5.403733121641883e-06,
|
|
"loss": 0.1413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14260432124137878,
|
|
"step": 3460,
|
|
"valid_targets_mean": 5387.6,
|
|
"valid_targets_min": 1680
|
|
},
|
|
{
|
|
"epoch": 5.5,
|
|
"grad_norm": 0.45865214891995293,
|
|
"learning_rate": 5.349734732126366e-06,
|
|
"loss": 0.1299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15055881440639496,
|
|
"step": 3465,
|
|
"valid_targets_mean": 5297.8,
|
|
"valid_targets_min": 247
|
|
},
|
|
{
|
|
"epoch": 5.507936507936508,
|
|
"grad_norm": 0.4739801326379873,
|
|
"learning_rate": 5.295965810794376e-06,
|
|
"loss": 0.1415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1341158002614975,
|
|
"step": 3470,
|
|
"valid_targets_mean": 4671.8,
|
|
"valid_targets_min": 1946
|
|
},
|
|
{
|
|
"epoch": 5.515873015873016,
|
|
"grad_norm": 0.4054624348563083,
|
|
"learning_rate": 5.2424271998324895e-06,
|
|
"loss": 0.1305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13239628076553345,
|
|
"step": 3475,
|
|
"valid_targets_mean": 5538.2,
|
|
"valid_targets_min": 2317
|
|
},
|
|
{
|
|
"epoch": 5.523809523809524,
|
|
"grad_norm": 0.47822327586671515,
|
|
"learning_rate": 5.189119737819912e-06,
|
|
"loss": 0.1359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13307338953018188,
|
|
"step": 3480,
|
|
"valid_targets_mean": 5326.6,
|
|
"valid_targets_min": 1927
|
|
},
|
|
{
|
|
"epoch": 5.531746031746032,
|
|
"grad_norm": 0.45640673731159975,
|
|
"learning_rate": 5.136044259715342e-06,
|
|
"loss": 0.1411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1569247543811798,
|
|
"step": 3485,
|
|
"valid_targets_mean": 5192.1,
|
|
"valid_targets_min": 1402
|
|
},
|
|
{
|
|
"epoch": 5.5396825396825395,
|
|
"grad_norm": 0.47606206142121826,
|
|
"learning_rate": 5.083201596843905e-06,
|
|
"loss": 0.1301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13270564377307892,
|
|
"step": 3490,
|
|
"valid_targets_mean": 5579.8,
|
|
"valid_targets_min": 1802
|
|
},
|
|
{
|
|
"epoch": 5.5476190476190474,
|
|
"grad_norm": 0.48207429223420034,
|
|
"learning_rate": 5.030592576884117e-06,
|
|
"loss": 0.1479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1469019204378128,
|
|
"step": 3495,
|
|
"valid_targets_mean": 5300.8,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 5.555555555555555,
|
|
"grad_norm": 0.46920831785621236,
|
|
"learning_rate": 4.978218023854928e-06,
|
|
"loss": 0.1355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14404523372650146,
|
|
"step": 3500,
|
|
"valid_targets_mean": 4518.9,
|
|
"valid_targets_min": 229
|
|
},
|
|
{
|
|
"epoch": 5.563492063492063,
|
|
"grad_norm": 0.42912173167963297,
|
|
"learning_rate": 4.926078758102834e-06,
|
|
"loss": 0.1417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1521531045436859,
|
|
"step": 3505,
|
|
"valid_targets_mean": 5723.4,
|
|
"valid_targets_min": 1020
|
|
},
|
|
{
|
|
"epoch": 5.571428571428571,
|
|
"grad_norm": 0.503182430334019,
|
|
"learning_rate": 4.87417559628897e-06,
|
|
"loss": 0.1304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13956725597381592,
|
|
"step": 3510,
|
|
"valid_targets_mean": 5755.2,
|
|
"valid_targets_min": 342
|
|
},
|
|
{
|
|
"epoch": 5.579365079365079,
|
|
"grad_norm": 0.483615956667945,
|
|
"learning_rate": 4.822509351376399e-06,
|
|
"loss": 0.1484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15917867422103882,
|
|
"step": 3515,
|
|
"valid_targets_mean": 4745.8,
|
|
"valid_targets_min": 871
|
|
},
|
|
{
|
|
"epoch": 5.587301587301587,
|
|
"grad_norm": 0.44843746346045477,
|
|
"learning_rate": 4.7710808326173115e-06,
|
|
"loss": 0.1376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14553232491016388,
|
|
"step": 3520,
|
|
"valid_targets_mean": 5322.7,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 5.595238095238095,
|
|
"grad_norm": 0.4446428825967545,
|
|
"learning_rate": 4.719890845540385e-06,
|
|
"loss": 0.1414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1446400284767151,
|
|
"step": 3525,
|
|
"valid_targets_mean": 5464.6,
|
|
"valid_targets_min": 1876
|
|
},
|
|
{
|
|
"epoch": 5.603174603174603,
|
|
"grad_norm": 0.44461133087445714,
|
|
"learning_rate": 4.668940191938156e-06,
|
|
"loss": 0.1374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12743496894836426,
|
|
"step": 3530,
|
|
"valid_targets_mean": 5617.1,
|
|
"valid_targets_min": 867
|
|
},
|
|
{
|
|
"epoch": 5.611111111111111,
|
|
"grad_norm": 0.46893046808084937,
|
|
"learning_rate": 4.618229669854464e-06,
|
|
"loss": 0.1357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1406087577342987,
|
|
"step": 3535,
|
|
"valid_targets_mean": 5965.8,
|
|
"valid_targets_min": 2904
|
|
},
|
|
{
|
|
"epoch": 5.619047619047619,
|
|
"grad_norm": 0.49547441030459144,
|
|
"learning_rate": 4.567760073571947e-06,
|
|
"loss": 0.136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14891457557678223,
|
|
"step": 3540,
|
|
"valid_targets_mean": 4223.9,
|
|
"valid_targets_min": 1503
|
|
},
|
|
{
|
|
"epoch": 5.6269841269841265,
|
|
"grad_norm": 0.48451923979691575,
|
|
"learning_rate": 4.51753219359961e-06,
|
|
"loss": 0.1392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14008775353431702,
|
|
"step": 3545,
|
|
"valid_targets_mean": 5037.8,
|
|
"valid_targets_min": 2496
|
|
},
|
|
{
|
|
"epoch": 5.634920634920634,
|
|
"grad_norm": 0.4290885471458542,
|
|
"learning_rate": 4.467546816660433e-06,
|
|
"loss": 0.1383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1463758647441864,
|
|
"step": 3550,
|
|
"valid_targets_mean": 6069.6,
|
|
"valid_targets_min": 2782
|
|
},
|
|
{
|
|
"epoch": 5.642857142857143,
|
|
"grad_norm": 0.4422867985916354,
|
|
"learning_rate": 4.417804725679058e-06,
|
|
"loss": 0.1441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1522657573223114,
|
|
"step": 3555,
|
|
"valid_targets_mean": 7388.6,
|
|
"valid_targets_min": 2410
|
|
},
|
|
{
|
|
"epoch": 5.650793650793651,
|
|
"grad_norm": 0.4326771572943575,
|
|
"learning_rate": 4.368306699769518e-06,
|
|
"loss": 0.1362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13901498913764954,
|
|
"step": 3560,
|
|
"valid_targets_mean": 5687.3,
|
|
"valid_targets_min": 930
|
|
},
|
|
{
|
|
"epoch": 5.658730158730159,
|
|
"grad_norm": 0.5184808560608285,
|
|
"learning_rate": 4.319053514223033e-06,
|
|
"loss": 0.1301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12393245846033096,
|
|
"step": 3565,
|
|
"valid_targets_mean": 4805.2,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 5.666666666666667,
|
|
"grad_norm": 0.4747228961326593,
|
|
"learning_rate": 4.270045940495879e-06,
|
|
"loss": 0.151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1519310176372528,
|
|
"step": 3570,
|
|
"valid_targets_mean": 4909.6,
|
|
"valid_targets_min": 525
|
|
},
|
|
{
|
|
"epoch": 5.674603174603175,
|
|
"grad_norm": 0.4817480251050087,
|
|
"learning_rate": 4.221284746197292e-06,
|
|
"loss": 0.1423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14879171550273895,
|
|
"step": 3575,
|
|
"valid_targets_mean": 5205.3,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 5.682539682539683,
|
|
"grad_norm": 0.5206783763038453,
|
|
"learning_rate": 4.172770695077437e-06,
|
|
"loss": 0.1366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16022273898124695,
|
|
"step": 3580,
|
|
"valid_targets_mean": 4187.7,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 5.690476190476191,
|
|
"grad_norm": 0.42530942051464743,
|
|
"learning_rate": 4.124504547015487e-06,
|
|
"loss": 0.1403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1300957351922989,
|
|
"step": 3585,
|
|
"valid_targets_mean": 6382.3,
|
|
"valid_targets_min": 2232
|
|
},
|
|
{
|
|
"epoch": 5.698412698412699,
|
|
"grad_norm": 0.4773846019671863,
|
|
"learning_rate": 4.0764870580076675e-06,
|
|
"loss": 0.1527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17422491312026978,
|
|
"step": 3590,
|
|
"valid_targets_mean": 5224.0,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 5.7063492063492065,
|
|
"grad_norm": 0.3861656168881239,
|
|
"learning_rate": 4.0287189801554304e-06,
|
|
"loss": 0.1436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12745806574821472,
|
|
"step": 3595,
|
|
"valid_targets_mean": 6368.2,
|
|
"valid_targets_min": 1124
|
|
},
|
|
{
|
|
"epoch": 5.714285714285714,
|
|
"grad_norm": 0.41849044348707654,
|
|
"learning_rate": 3.98120106165371e-06,
|
|
"loss": 0.1425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13698044419288635,
|
|
"step": 3600,
|
|
"valid_targets_mean": 5723.8,
|
|
"valid_targets_min": 2689
|
|
},
|
|
{
|
|
"epoch": 5.722222222222222,
|
|
"grad_norm": 0.5006534631008478,
|
|
"learning_rate": 3.933934046779164e-06,
|
|
"loss": 0.1449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15041181445121765,
|
|
"step": 3605,
|
|
"valid_targets_mean": 4726.2,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 5.73015873015873,
|
|
"grad_norm": 0.5390021957931794,
|
|
"learning_rate": 3.886918675878513e-06,
|
|
"loss": 0.1352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15215769410133362,
|
|
"step": 3610,
|
|
"valid_targets_mean": 5078.9,
|
|
"valid_targets_min": 355
|
|
},
|
|
{
|
|
"epoch": 5.738095238095238,
|
|
"grad_norm": 0.4242218655638763,
|
|
"learning_rate": 3.840155685356983e-06,
|
|
"loss": 0.1313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12816202640533447,
|
|
"step": 3615,
|
|
"valid_targets_mean": 5724.1,
|
|
"valid_targets_min": 2290
|
|
},
|
|
{
|
|
"epoch": 5.746031746031746,
|
|
"grad_norm": 0.40905901981368425,
|
|
"learning_rate": 3.793645807666735e-06,
|
|
"loss": 0.1399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12471656501293182,
|
|
"step": 3620,
|
|
"valid_targets_mean": 6163.4,
|
|
"valid_targets_min": 447
|
|
},
|
|
{
|
|
"epoch": 5.753968253968254,
|
|
"grad_norm": 0.5134066612062572,
|
|
"learning_rate": 3.747389771295411e-06,
|
|
"loss": 0.1316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13810807466506958,
|
|
"step": 3625,
|
|
"valid_targets_mean": 5215.5,
|
|
"valid_targets_min": 320
|
|
},
|
|
{
|
|
"epoch": 5.761904761904762,
|
|
"grad_norm": 0.44176531273164726,
|
|
"learning_rate": 3.701388300754709e-06,
|
|
"loss": 0.149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15570278465747833,
|
|
"step": 3630,
|
|
"valid_targets_mean": 5831.4,
|
|
"valid_targets_min": 1928
|
|
},
|
|
{
|
|
"epoch": 5.76984126984127,
|
|
"grad_norm": 0.4552213038987976,
|
|
"learning_rate": 3.6556421165690516e-06,
|
|
"loss": 0.132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13112439215183258,
|
|
"step": 3635,
|
|
"valid_targets_mean": 5127.6,
|
|
"valid_targets_min": 1412
|
|
},
|
|
{
|
|
"epoch": 5.777777777777778,
|
|
"grad_norm": 0.5475459293945621,
|
|
"learning_rate": 3.610151935264288e-06,
|
|
"loss": 0.1321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13923147320747375,
|
|
"step": 3640,
|
|
"valid_targets_mean": 5058.1,
|
|
"valid_targets_min": 1271
|
|
},
|
|
{
|
|
"epoch": 5.785714285714286,
|
|
"grad_norm": 0.4062383720325422,
|
|
"learning_rate": 3.5649184693564797e-06,
|
|
"loss": 0.1413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12178117781877518,
|
|
"step": 3645,
|
|
"valid_targets_mean": 6188.1,
|
|
"valid_targets_min": 2820
|
|
},
|
|
{
|
|
"epoch": 5.7936507936507935,
|
|
"grad_norm": 0.41210477208428037,
|
|
"learning_rate": 3.5199424273407277e-06,
|
|
"loss": 0.1329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11573171615600586,
|
|
"step": 3650,
|
|
"valid_targets_mean": 5608.1,
|
|
"valid_targets_min": 291
|
|
},
|
|
{
|
|
"epoch": 5.801587301587301,
|
|
"grad_norm": 0.39026902409095005,
|
|
"learning_rate": 3.4752245136801065e-06,
|
|
"loss": 0.138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11422628164291382,
|
|
"step": 3655,
|
|
"valid_targets_mean": 6121.1,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 5.809523809523809,
|
|
"grad_norm": 0.44136839976284375,
|
|
"learning_rate": 3.430765428794569e-06,
|
|
"loss": 0.1426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15087169408798218,
|
|
"step": 3660,
|
|
"valid_targets_mean": 5458.3,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 5.817460317460317,
|
|
"grad_norm": 0.47272488901004456,
|
|
"learning_rate": 3.3865658690500424e-06,
|
|
"loss": 0.1342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12730719149112701,
|
|
"step": 3665,
|
|
"valid_targets_mean": 5489.8,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 5.825396825396825,
|
|
"grad_norm": 0.4286583074647361,
|
|
"learning_rate": 3.34262652674749e-06,
|
|
"loss": 0.1399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13272640109062195,
|
|
"step": 3670,
|
|
"valid_targets_mean": 5855.3,
|
|
"valid_targets_min": 3739
|
|
},
|
|
{
|
|
"epoch": 5.833333333333333,
|
|
"grad_norm": 0.4961674424548416,
|
|
"learning_rate": 3.2989480901120684e-06,
|
|
"loss": 0.1305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14517953991889954,
|
|
"step": 3675,
|
|
"valid_targets_mean": 5103.6,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 5.841269841269841,
|
|
"grad_norm": 0.5038921133963972,
|
|
"learning_rate": 3.2555312432823283e-06,
|
|
"loss": 0.1381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14607466757297516,
|
|
"step": 3680,
|
|
"valid_targets_mean": 4695.1,
|
|
"valid_targets_min": 2224
|
|
},
|
|
{
|
|
"epoch": 5.849206349206349,
|
|
"grad_norm": 0.4516355692524723,
|
|
"learning_rate": 3.2123766662995572e-06,
|
|
"loss": 0.1382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13942056894302368,
|
|
"step": 3685,
|
|
"valid_targets_mean": 5577.4,
|
|
"valid_targets_min": 1295
|
|
},
|
|
{
|
|
"epoch": 5.857142857142857,
|
|
"grad_norm": 0.43990400419076664,
|
|
"learning_rate": 3.1694850350970686e-06,
|
|
"loss": 0.1373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14388282597064972,
|
|
"step": 3690,
|
|
"valid_targets_mean": 5896.1,
|
|
"valid_targets_min": 1277
|
|
},
|
|
{
|
|
"epoch": 5.865079365079366,
|
|
"grad_norm": 0.4430961156309604,
|
|
"learning_rate": 3.1268570214896265e-06,
|
|
"loss": 0.1454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16903355717658997,
|
|
"step": 3695,
|
|
"valid_targets_mean": 5768.0,
|
|
"valid_targets_min": 309
|
|
},
|
|
{
|
|
"epoch": 5.8730158730158735,
|
|
"grad_norm": 0.4687413821050724,
|
|
"learning_rate": 3.0844932931629602e-06,
|
|
"loss": 0.1389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12664330005645752,
|
|
"step": 3700,
|
|
"valid_targets_mean": 5469.9,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 5.880952380952381,
|
|
"grad_norm": 0.4251478406093825,
|
|
"learning_rate": 3.0423945136632626e-06,
|
|
"loss": 0.1371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14676138758659363,
|
|
"step": 3705,
|
|
"valid_targets_mean": 6292.8,
|
|
"valid_targets_min": 1166
|
|
},
|
|
{
|
|
"epoch": 5.888888888888889,
|
|
"grad_norm": 0.4418913815124218,
|
|
"learning_rate": 3.000561342386814e-06,
|
|
"loss": 0.1459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1596686840057373,
|
|
"step": 3710,
|
|
"valid_targets_mean": 5276.0,
|
|
"valid_targets_min": 656
|
|
},
|
|
{
|
|
"epoch": 5.896825396825397,
|
|
"grad_norm": 0.46832139188754646,
|
|
"learning_rate": 2.9589944345696596e-06,
|
|
"loss": 0.1346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12101615965366364,
|
|
"step": 3715,
|
|
"valid_targets_mean": 4913.4,
|
|
"valid_targets_min": 343
|
|
},
|
|
{
|
|
"epoch": 5.904761904761905,
|
|
"grad_norm": 0.4657929056680499,
|
|
"learning_rate": 2.9176944412773322e-06,
|
|
"loss": 0.149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17338143289089203,
|
|
"step": 3720,
|
|
"valid_targets_mean": 5412.4,
|
|
"valid_targets_min": 554
|
|
},
|
|
{
|
|
"epoch": 5.912698412698413,
|
|
"grad_norm": 0.4449595143164407,
|
|
"learning_rate": 2.876662009394673e-06,
|
|
"loss": 0.1346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15727762877941132,
|
|
"step": 3725,
|
|
"valid_targets_mean": 5328.9,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 5.920634920634921,
|
|
"grad_norm": 0.39696814679103,
|
|
"learning_rate": 2.8358977816156796e-06,
|
|
"loss": 0.1426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11638738214969635,
|
|
"step": 3730,
|
|
"valid_targets_mean": 6867.2,
|
|
"valid_targets_min": 3676
|
|
},
|
|
{
|
|
"epoch": 5.928571428571429,
|
|
"grad_norm": 0.47195292364800606,
|
|
"learning_rate": 2.7954023964334485e-06,
|
|
"loss": 0.1349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1505923867225647,
|
|
"step": 3735,
|
|
"valid_targets_mean": 5757.2,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 5.936507936507937,
|
|
"grad_norm": 0.4486759780229958,
|
|
"learning_rate": 2.7551764881301955e-06,
|
|
"loss": 0.1246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13487672805786133,
|
|
"step": 3740,
|
|
"valid_targets_mean": 5926.0,
|
|
"valid_targets_min": 1024
|
|
},
|
|
{
|
|
"epoch": 5.944444444444445,
|
|
"grad_norm": 0.45150456684299833,
|
|
"learning_rate": 2.715220686767268e-06,
|
|
"loss": 0.1325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13316667079925537,
|
|
"step": 3745,
|
|
"valid_targets_mean": 5609.4,
|
|
"valid_targets_min": 2528
|
|
},
|
|
{
|
|
"epoch": 5.9523809523809526,
|
|
"grad_norm": 0.4612924420161714,
|
|
"learning_rate": 2.6755356181753247e-06,
|
|
"loss": 0.1555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17791566252708435,
|
|
"step": 3750,
|
|
"valid_targets_mean": 5493.9,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 5.9603174603174605,
|
|
"grad_norm": 0.4466679622369211,
|
|
"learning_rate": 2.6361219039445328e-06,
|
|
"loss": 0.1425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1417991816997528,
|
|
"step": 3755,
|
|
"valid_targets_mean": 5531.8,
|
|
"valid_targets_min": 3560
|
|
},
|
|
{
|
|
"epoch": 5.968253968253968,
|
|
"grad_norm": 0.46290951572004435,
|
|
"learning_rate": 2.5969801614147838e-06,
|
|
"loss": 0.1455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1562907099723816,
|
|
"step": 3760,
|
|
"valid_targets_mean": 5726.4,
|
|
"valid_targets_min": 325
|
|
},
|
|
{
|
|
"epoch": 5.976190476190476,
|
|
"grad_norm": 0.41130105503656605,
|
|
"learning_rate": 2.558111003666075e-06,
|
|
"loss": 0.1451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13213077187538147,
|
|
"step": 3765,
|
|
"valid_targets_mean": 5965.7,
|
|
"valid_targets_min": 1546
|
|
},
|
|
{
|
|
"epoch": 5.984126984126984,
|
|
"grad_norm": 0.4305429547314029,
|
|
"learning_rate": 2.519515039508893e-06,
|
|
"loss": 0.1328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11605265736579895,
|
|
"step": 3770,
|
|
"valid_targets_mean": 5509.0,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 5.992063492063492,
|
|
"grad_norm": 0.43615187162481,
|
|
"learning_rate": 2.481192873474667e-06,
|
|
"loss": 0.1365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13950824737548828,
|
|
"step": 3775,
|
|
"valid_targets_mean": 5780.9,
|
|
"valid_targets_min": 1479
|
|
},
|
|
{
|
|
"epoch": 6.0,
|
|
"grad_norm": 0.39743183112610925,
|
|
"learning_rate": 2.4431451058062928e-06,
|
|
"loss": 0.1351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1134757548570633,
|
|
"step": 3780,
|
|
"valid_targets_mean": 5882.9,
|
|
"valid_targets_min": 2457
|
|
},
|
|
{
|
|
"epoch": 6.007936507936508,
|
|
"grad_norm": 0.4320579605085731,
|
|
"learning_rate": 2.4053723324487677e-06,
|
|
"loss": 0.1308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12739768624305725,
|
|
"step": 3785,
|
|
"valid_targets_mean": 4568.0,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 6.015873015873016,
|
|
"grad_norm": 0.45806291042546377,
|
|
"learning_rate": 2.3678751450398196e-06,
|
|
"loss": 0.1348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1493530422449112,
|
|
"step": 3790,
|
|
"valid_targets_mean": 5264.6,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 6.023809523809524,
|
|
"grad_norm": 0.4440865317653884,
|
|
"learning_rate": 2.330654130900656e-06,
|
|
"loss": 0.1324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13022762537002563,
|
|
"step": 3795,
|
|
"valid_targets_mean": 4942.4,
|
|
"valid_targets_min": 2199
|
|
},
|
|
{
|
|
"epoch": 6.031746031746032,
|
|
"grad_norm": 0.4289001750038118,
|
|
"learning_rate": 2.2937098730267572e-06,
|
|
"loss": 0.134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11798284202814102,
|
|
"step": 3800,
|
|
"valid_targets_mean": 5962.4,
|
|
"valid_targets_min": 656
|
|
},
|
|
{
|
|
"epoch": 6.0396825396825395,
|
|
"grad_norm": 0.4804416865960379,
|
|
"learning_rate": 2.2570429500787604e-06,
|
|
"loss": 0.1359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13769641518592834,
|
|
"step": 3805,
|
|
"valid_targets_mean": 4876.4,
|
|
"valid_targets_min": 2078
|
|
},
|
|
{
|
|
"epoch": 6.0476190476190474,
|
|
"grad_norm": 0.4248137219988032,
|
|
"learning_rate": 2.2206539363733738e-06,
|
|
"loss": 0.1265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09792445600032806,
|
|
"step": 3810,
|
|
"valid_targets_mean": 6735.6,
|
|
"valid_targets_min": 2875
|
|
},
|
|
{
|
|
"epoch": 6.055555555555555,
|
|
"grad_norm": 0.4458277839797382,
|
|
"learning_rate": 2.1845434018744038e-06,
|
|
"loss": 0.1336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13032405078411102,
|
|
"step": 3815,
|
|
"valid_targets_mean": 5100.5,
|
|
"valid_targets_min": 1711
|
|
},
|
|
{
|
|
"epoch": 6.063492063492063,
|
|
"grad_norm": 0.46870997489479654,
|
|
"learning_rate": 2.148711912183803e-06,
|
|
"loss": 0.1349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1340586543083191,
|
|
"step": 3820,
|
|
"valid_targets_mean": 6257.2,
|
|
"valid_targets_min": 3713
|
|
},
|
|
{
|
|
"epoch": 6.071428571428571,
|
|
"grad_norm": 0.43220936389411174,
|
|
"learning_rate": 2.1131600285328458e-06,
|
|
"loss": 0.1372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13491111993789673,
|
|
"step": 3825,
|
|
"valid_targets_mean": 5652.9,
|
|
"valid_targets_min": 2291
|
|
},
|
|
{
|
|
"epoch": 6.079365079365079,
|
|
"grad_norm": 0.4938619349035986,
|
|
"learning_rate": 2.0778883077732903e-06,
|
|
"loss": 0.1221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12590594589710236,
|
|
"step": 3830,
|
|
"valid_targets_mean": 5837.1,
|
|
"valid_targets_min": 1277
|
|
},
|
|
{
|
|
"epoch": 6.087301587301587,
|
|
"grad_norm": 0.4461842204628299,
|
|
"learning_rate": 2.0428973023686983e-06,
|
|
"loss": 0.1301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12995757162570953,
|
|
"step": 3835,
|
|
"valid_targets_mean": 5839.1,
|
|
"valid_targets_min": 2495
|
|
},
|
|
{
|
|
"epoch": 6.095238095238095,
|
|
"grad_norm": 0.4514403709392358,
|
|
"learning_rate": 2.0081875603857726e-06,
|
|
"loss": 0.1271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11519353836774826,
|
|
"step": 3840,
|
|
"valid_targets_mean": 5285.4,
|
|
"valid_targets_min": 1198
|
|
},
|
|
{
|
|
"epoch": 6.103174603174603,
|
|
"grad_norm": 0.5872250143318944,
|
|
"learning_rate": 1.973759625485743e-06,
|
|
"loss": 0.1295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12013471126556396,
|
|
"step": 3845,
|
|
"valid_targets_mean": 3976.3,
|
|
"valid_targets_min": 1509
|
|
},
|
|
{
|
|
"epoch": 6.111111111111111,
|
|
"grad_norm": 0.5053201570927205,
|
|
"learning_rate": 1.9396140369159e-06,
|
|
"loss": 0.1314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14312024414539337,
|
|
"step": 3850,
|
|
"valid_targets_mean": 5034.4,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 6.119047619047619,
|
|
"grad_norm": 0.4254491047353564,
|
|
"learning_rate": 1.9057513295011087e-06,
|
|
"loss": 0.1249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11980589479207993,
|
|
"step": 3855,
|
|
"valid_targets_mean": 5230.5,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 6.1269841269841265,
|
|
"grad_norm": 0.45350019220553667,
|
|
"learning_rate": 1.8721720336354487e-06,
|
|
"loss": 0.1306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1343933343887329,
|
|
"step": 3860,
|
|
"valid_targets_mean": 5310.0,
|
|
"valid_targets_min": 861
|
|
},
|
|
{
|
|
"epoch": 6.134920634920635,
|
|
"grad_norm": 0.44908680436264514,
|
|
"learning_rate": 1.8388766752739017e-06,
|
|
"loss": 0.1298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13214536011219025,
|
|
"step": 3865,
|
|
"valid_targets_mean": 4819.6,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 6.142857142857143,
|
|
"grad_norm": 0.43728782320626974,
|
|
"learning_rate": 1.805865775924116e-06,
|
|
"loss": 0.137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12990088760852814,
|
|
"step": 3870,
|
|
"valid_targets_mean": 5668.4,
|
|
"valid_targets_min": 2900
|
|
},
|
|
{
|
|
"epoch": 6.150793650793651,
|
|
"grad_norm": 0.46855550237931853,
|
|
"learning_rate": 1.7731398526382416e-06,
|
|
"loss": 0.1278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11653086543083191,
|
|
"step": 3875,
|
|
"valid_targets_mean": 5082.9,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 6.158730158730159,
|
|
"grad_norm": 0.4633118493365201,
|
|
"learning_rate": 1.7406994180048231e-06,
|
|
"loss": 0.1302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1214178055524826,
|
|
"step": 3880,
|
|
"valid_targets_mean": 5079.4,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 6.166666666666667,
|
|
"grad_norm": 0.4324845887042187,
|
|
"learning_rate": 1.7085449801407783e-06,
|
|
"loss": 0.1242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11891638487577438,
|
|
"step": 3885,
|
|
"valid_targets_mean": 5482.2,
|
|
"valid_targets_min": 630
|
|
},
|
|
{
|
|
"epoch": 6.174603174603175,
|
|
"grad_norm": 0.4593597812117515,
|
|
"learning_rate": 1.67667704268343e-06,
|
|
"loss": 0.1302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12844383716583252,
|
|
"step": 3890,
|
|
"valid_targets_mean": 5304.3,
|
|
"valid_targets_min": 968
|
|
},
|
|
{
|
|
"epoch": 6.182539682539683,
|
|
"grad_norm": 1.0409294151451973,
|
|
"learning_rate": 1.6450961047826353e-06,
|
|
"loss": 0.127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14141665399074554,
|
|
"step": 3895,
|
|
"valid_targets_mean": 4083.4,
|
|
"valid_targets_min": 287
|
|
},
|
|
{
|
|
"epoch": 6.190476190476191,
|
|
"grad_norm": 0.4147176475336051,
|
|
"learning_rate": 1.6138026610929446e-06,
|
|
"loss": 0.1377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12204622477293015,
|
|
"step": 3900,
|
|
"valid_targets_mean": 6026.9,
|
|
"valid_targets_min": 1789
|
|
},
|
|
{
|
|
"epoch": 6.198412698412699,
|
|
"grad_norm": 0.7688468165385852,
|
|
"learning_rate": 1.5827972017658732e-06,
|
|
"loss": 0.1434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14009518921375275,
|
|
"step": 3905,
|
|
"valid_targets_mean": 5232.4,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 6.2063492063492065,
|
|
"grad_norm": 0.5742634408323547,
|
|
"learning_rate": 1.5520802124422108e-06,
|
|
"loss": 0.1284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11271405220031738,
|
|
"step": 3910,
|
|
"valid_targets_mean": 5412.1,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 6.214285714285714,
|
|
"grad_norm": 0.4434637011482024,
|
|
"learning_rate": 1.5216521742444236e-06,
|
|
"loss": 0.1266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15959657728672028,
|
|
"step": 3915,
|
|
"valid_targets_mean": 5545.5,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 6.222222222222222,
|
|
"grad_norm": 0.4473257762920866,
|
|
"learning_rate": 1.491513563769118e-06,
|
|
"loss": 0.1277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13596297800540924,
|
|
"step": 3920,
|
|
"valid_targets_mean": 6079.2,
|
|
"valid_targets_min": 1352
|
|
},
|
|
{
|
|
"epoch": 6.23015873015873,
|
|
"grad_norm": 0.40218956890406166,
|
|
"learning_rate": 1.4616648530795673e-06,
|
|
"loss": 0.1367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13123834133148193,
|
|
"step": 3925,
|
|
"valid_targets_mean": 6588.4,
|
|
"valid_targets_min": 1264
|
|
},
|
|
{
|
|
"epoch": 6.238095238095238,
|
|
"grad_norm": 0.4665917089898694,
|
|
"learning_rate": 1.432106509698319e-06,
|
|
"loss": 0.1403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14458967745304108,
|
|
"step": 3930,
|
|
"valid_targets_mean": 5620.0,
|
|
"valid_targets_min": 2860
|
|
},
|
|
{
|
|
"epoch": 6.246031746031746,
|
|
"grad_norm": 0.4804278259544632,
|
|
"learning_rate": 1.4028389965998867e-06,
|
|
"loss": 0.1322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13646945357322693,
|
|
"step": 3935,
|
|
"valid_targets_mean": 4918.8,
|
|
"valid_targets_min": 2109
|
|
},
|
|
{
|
|
"epoch": 6.253968253968254,
|
|
"grad_norm": 0.4573452936493578,
|
|
"learning_rate": 1.3738627722034848e-06,
|
|
"loss": 0.1378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12479956448078156,
|
|
"step": 3940,
|
|
"valid_targets_mean": 5664.0,
|
|
"valid_targets_min": 890
|
|
},
|
|
{
|
|
"epoch": 6.261904761904762,
|
|
"grad_norm": 0.4112194821979111,
|
|
"learning_rate": 1.345178290365845e-06,
|
|
"loss": 0.1303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12601318955421448,
|
|
"step": 3945,
|
|
"valid_targets_mean": 5897.8,
|
|
"valid_targets_min": 2441
|
|
},
|
|
{
|
|
"epoch": 6.26984126984127,
|
|
"grad_norm": 0.43770788181094994,
|
|
"learning_rate": 1.3167860003741218e-06,
|
|
"loss": 0.1333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13687068223953247,
|
|
"step": 3950,
|
|
"valid_targets_mean": 5294.8,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 6.277777777777778,
|
|
"grad_norm": 0.43651984734406524,
|
|
"learning_rate": 1.2886863469388389e-06,
|
|
"loss": 0.1357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13095057010650635,
|
|
"step": 3955,
|
|
"valid_targets_mean": 5041.6,
|
|
"valid_targets_min": 946
|
|
},
|
|
{
|
|
"epoch": 6.285714285714286,
|
|
"grad_norm": 0.4407006248372022,
|
|
"learning_rate": 1.2608797701869425e-06,
|
|
"loss": 0.1454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13327287137508392,
|
|
"step": 3960,
|
|
"valid_targets_mean": 5152.8,
|
|
"valid_targets_min": 968
|
|
},
|
|
{
|
|
"epoch": 6.2936507936507935,
|
|
"grad_norm": 0.4830750093647113,
|
|
"learning_rate": 1.2333667056548881e-06,
|
|
"loss": 0.1391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12351542711257935,
|
|
"step": 3965,
|
|
"valid_targets_mean": 4494.3,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 6.301587301587301,
|
|
"grad_norm": 0.4721717415354756,
|
|
"learning_rate": 1.2061475842818337e-06,
|
|
"loss": 0.1466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15659388899803162,
|
|
"step": 3970,
|
|
"valid_targets_mean": 5026.2,
|
|
"valid_targets_min": 1775
|
|
},
|
|
{
|
|
"epoch": 6.309523809523809,
|
|
"grad_norm": 0.42311493337432965,
|
|
"learning_rate": 1.1792228324028776e-06,
|
|
"loss": 0.1289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15567630529403687,
|
|
"step": 3975,
|
|
"valid_targets_mean": 5791.2,
|
|
"valid_targets_min": 3515
|
|
},
|
|
{
|
|
"epoch": 6.317460317460317,
|
|
"grad_norm": 1.4585742478929205,
|
|
"learning_rate": 1.152592871742395e-06,
|
|
"loss": 0.133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13825181126594543,
|
|
"step": 3980,
|
|
"valid_targets_mean": 5244.9,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 6.325396825396825,
|
|
"grad_norm": 0.461117951811656,
|
|
"learning_rate": 1.1262581194074152e-06,
|
|
"loss": 0.131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11796639859676361,
|
|
"step": 3985,
|
|
"valid_targets_mean": 5554.2,
|
|
"valid_targets_min": 2245
|
|
},
|
|
{
|
|
"epoch": 6.333333333333333,
|
|
"grad_norm": 0.47633924217416224,
|
|
"learning_rate": 1.100218987881112e-06,
|
|
"loss": 0.1325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12955845892429352,
|
|
"step": 3990,
|
|
"valid_targets_mean": 6029.9,
|
|
"valid_targets_min": 2670
|
|
},
|
|
{
|
|
"epoch": 6.341269841269841,
|
|
"grad_norm": 0.45154703338066743,
|
|
"learning_rate": 1.0744758850163085e-06,
|
|
"loss": 0.1408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13291522860527039,
|
|
"step": 3995,
|
|
"valid_targets_mean": 5422.9,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 6.349206349206349,
|
|
"grad_norm": 0.5119900863580499,
|
|
"learning_rate": 1.0490292140291247e-06,
|
|
"loss": 0.1382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13096119463443756,
|
|
"step": 4000,
|
|
"valid_targets_mean": 5480.8,
|
|
"valid_targets_min": 291
|
|
},
|
|
{
|
|
"epoch": 6.357142857142857,
|
|
"grad_norm": 0.5126206204332201,
|
|
"learning_rate": 1.0238793734926467e-06,
|
|
"loss": 0.13,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14911425113677979,
|
|
"step": 4005,
|
|
"valid_targets_mean": 4797.0,
|
|
"valid_targets_min": 1266
|
|
},
|
|
{
|
|
"epoch": 6.365079365079365,
|
|
"grad_norm": 0.4880357246785836,
|
|
"learning_rate": 9.990267573306745e-07,
|
|
"loss": 0.1423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1387421041727066,
|
|
"step": 4010,
|
|
"valid_targets_mean": 4544.9,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 6.3730158730158735,
|
|
"grad_norm": 0.40558138018757806,
|
|
"learning_rate": 9.744717548115613e-07,
|
|
"loss": 0.1278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12570391595363617,
|
|
"step": 4015,
|
|
"valid_targets_mean": 6268.6,
|
|
"valid_targets_min": 3291
|
|
},
|
|
{
|
|
"epoch": 6.380952380952381,
|
|
"grad_norm": 0.41810620070165866,
|
|
"learning_rate": 9.502147505421244e-07,
|
|
"loss": 0.1351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1270408034324646,
|
|
"step": 4020,
|
|
"valid_targets_mean": 6078.6,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 6.388888888888889,
|
|
"grad_norm": 0.4496644146071595,
|
|
"learning_rate": 9.262561244616108e-07,
|
|
"loss": 0.133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14739742875099182,
|
|
"step": 4025,
|
|
"valid_targets_mean": 5835.8,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 6.396825396825397,
|
|
"grad_norm": 0.45373871441149133,
|
|
"learning_rate": 9.025962518357323e-07,
|
|
"loss": 0.1241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11742935329675674,
|
|
"step": 4030,
|
|
"valid_targets_mean": 4566.4,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 6.404761904761905,
|
|
"grad_norm": 0.4617395683182264,
|
|
"learning_rate": 8.792355032508282e-07,
|
|
"loss": 0.1362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12306074798107147,
|
|
"step": 4035,
|
|
"valid_targets_mean": 5038.2,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 6.412698412698413,
|
|
"grad_norm": 0.4644079416505595,
|
|
"learning_rate": 8.561742446080168e-07,
|
|
"loss": 0.1282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1280827522277832,
|
|
"step": 4040,
|
|
"valid_targets_mean": 5057.6,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 6.420634920634921,
|
|
"grad_norm": 0.425213082782362,
|
|
"learning_rate": 8.334128371174955e-07,
|
|
"loss": 0.1418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14657798409461975,
|
|
"step": 4045,
|
|
"valid_targets_mean": 6001.6,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 6.428571428571429,
|
|
"grad_norm": 0.428425934530618,
|
|
"learning_rate": 8.109516372928605e-07,
|
|
"loss": 0.1309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13139371573925018,
|
|
"step": 4050,
|
|
"valid_targets_mean": 5651.9,
|
|
"valid_targets_min": 2298
|
|
},
|
|
{
|
|
"epoch": 6.436507936507937,
|
|
"grad_norm": 0.4405821827658464,
|
|
"learning_rate": 7.887909969455366e-07,
|
|
"loss": 0.1326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12469398230314255,
|
|
"step": 4055,
|
|
"valid_targets_mean": 5376.2,
|
|
"valid_targets_min": 267
|
|
},
|
|
{
|
|
"epoch": 6.444444444444445,
|
|
"grad_norm": 0.5057128403035726,
|
|
"learning_rate": 7.669312631792758e-07,
|
|
"loss": 0.14,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14254549145698547,
|
|
"step": 4060,
|
|
"valid_targets_mean": 5674.9,
|
|
"valid_targets_min": 2241
|
|
},
|
|
{
|
|
"epoch": 6.4523809523809526,
|
|
"grad_norm": 0.615770615958985,
|
|
"learning_rate": 7.453727783846876e-07,
|
|
"loss": 0.1439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1313585788011551,
|
|
"step": 4065,
|
|
"valid_targets_mean": 5325.8,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 6.4603174603174605,
|
|
"grad_norm": 0.4640004582080486,
|
|
"learning_rate": 7.241158802339065e-07,
|
|
"loss": 0.1406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14738494157791138,
|
|
"step": 4070,
|
|
"valid_targets_mean": 5742.9,
|
|
"valid_targets_min": 297
|
|
},
|
|
{
|
|
"epoch": 6.468253968253968,
|
|
"grad_norm": 0.44707812759358856,
|
|
"learning_rate": 7.031609016753016e-07,
|
|
"loss": 0.1372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1283096969127655,
|
|
"step": 4075,
|
|
"valid_targets_mean": 5941.6,
|
|
"valid_targets_min": 2715
|
|
},
|
|
{
|
|
"epoch": 6.476190476190476,
|
|
"grad_norm": 0.5025741831025621,
|
|
"learning_rate": 6.825081709282377e-07,
|
|
"loss": 0.1384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14924189448356628,
|
|
"step": 4080,
|
|
"valid_targets_mean": 5481.9,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 6.484126984126984,
|
|
"grad_norm": 0.4928322290987037,
|
|
"learning_rate": 6.62158011477958e-07,
|
|
"loss": 0.1391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14254453778266907,
|
|
"step": 4085,
|
|
"valid_targets_mean": 5396.4,
|
|
"valid_targets_min": 1013
|
|
},
|
|
{
|
|
"epoch": 6.492063492063492,
|
|
"grad_norm": 0.4463090392869717,
|
|
"learning_rate": 6.421107420705097e-07,
|
|
"loss": 0.139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1291046142578125,
|
|
"step": 4090,
|
|
"valid_targets_mean": 5304.8,
|
|
"valid_targets_min": 1268
|
|
},
|
|
{
|
|
"epoch": 6.5,
|
|
"grad_norm": 0.41940627052314144,
|
|
"learning_rate": 6.223666767077508e-07,
|
|
"loss": 0.1369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1368085891008377,
|
|
"step": 4095,
|
|
"valid_targets_mean": 5791.9,
|
|
"valid_targets_min": 824
|
|
},
|
|
{
|
|
"epoch": 6.507936507936508,
|
|
"grad_norm": 0.38633976220944566,
|
|
"learning_rate": 6.029261246424267e-07,
|
|
"loss": 0.1254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11674937605857849,
|
|
"step": 4100,
|
|
"valid_targets_mean": 7173.1,
|
|
"valid_targets_min": 3321
|
|
},
|
|
{
|
|
"epoch": 6.515873015873016,
|
|
"grad_norm": 0.5710755866104514,
|
|
"learning_rate": 5.837893903733394e-07,
|
|
"loss": 0.1398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14703723788261414,
|
|
"step": 4105,
|
|
"valid_targets_mean": 4992.6,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 6.523809523809524,
|
|
"grad_norm": 0.42790485901259206,
|
|
"learning_rate": 5.649567736405681e-07,
|
|
"loss": 0.1345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11689166724681854,
|
|
"step": 4110,
|
|
"valid_targets_mean": 5996.3,
|
|
"valid_targets_min": 3073
|
|
},
|
|
{
|
|
"epoch": 6.531746031746032,
|
|
"grad_norm": 0.3978542753238711,
|
|
"learning_rate": 5.464285694207672e-07,
|
|
"loss": 0.1299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12328889966011047,
|
|
"step": 4115,
|
|
"valid_targets_mean": 6366.9,
|
|
"valid_targets_min": 2921
|
|
},
|
|
{
|
|
"epoch": 6.5396825396825395,
|
|
"grad_norm": 0.4803261314786915,
|
|
"learning_rate": 5.282050679225714e-07,
|
|
"loss": 0.1332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13136357069015503,
|
|
"step": 4120,
|
|
"valid_targets_mean": 5456.8,
|
|
"valid_targets_min": 1746
|
|
},
|
|
{
|
|
"epoch": 6.5476190476190474,
|
|
"grad_norm": 0.3906973340150284,
|
|
"learning_rate": 5.102865545820245e-07,
|
|
"loss": 0.13,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12256291508674622,
|
|
"step": 4125,
|
|
"valid_targets_mean": 6359.6,
|
|
"valid_targets_min": 2305
|
|
},
|
|
{
|
|
"epoch": 6.555555555555555,
|
|
"grad_norm": 0.4059594097865994,
|
|
"learning_rate": 4.926733100581182e-07,
|
|
"loss": 0.1378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11908716708421707,
|
|
"step": 4130,
|
|
"valid_targets_mean": 6355.2,
|
|
"valid_targets_min": 325
|
|
},
|
|
{
|
|
"epoch": 6.563492063492063,
|
|
"grad_norm": 0.5832130007483879,
|
|
"learning_rate": 4.7536561022840213e-07,
|
|
"loss": 0.1346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13830284774303436,
|
|
"step": 4135,
|
|
"valid_targets_mean": 4896.4,
|
|
"valid_targets_min": 1408
|
|
},
|
|
{
|
|
"epoch": 6.571428571428571,
|
|
"grad_norm": 0.4059800621113446,
|
|
"learning_rate": 4.5836372618464964e-07,
|
|
"loss": 0.1336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12423016875982285,
|
|
"step": 4140,
|
|
"valid_targets_mean": 6594.7,
|
|
"valid_targets_min": 3811
|
|
},
|
|
{
|
|
"epoch": 6.579365079365079,
|
|
"grad_norm": 0.4185374589094962,
|
|
"learning_rate": 4.416679242286215e-07,
|
|
"loss": 0.1279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13419991731643677,
|
|
"step": 4145,
|
|
"valid_targets_mean": 6087.1,
|
|
"valid_targets_min": 2433
|
|
},
|
|
{
|
|
"epoch": 6.587301587301587,
|
|
"grad_norm": 0.5103870835544566,
|
|
"learning_rate": 4.2527846586789547e-07,
|
|
"loss": 0.136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14000031352043152,
|
|
"step": 4150,
|
|
"valid_targets_mean": 4625.3,
|
|
"valid_targets_min": 221
|
|
},
|
|
{
|
|
"epoch": 6.595238095238095,
|
|
"grad_norm": 0.5765993905041488,
|
|
"learning_rate": 4.0919560781176317e-07,
|
|
"loss": 0.1312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13032563030719757,
|
|
"step": 4155,
|
|
"valid_targets_mean": 5195.4,
|
|
"valid_targets_min": 1443
|
|
},
|
|
{
|
|
"epoch": 6.603174603174603,
|
|
"grad_norm": 0.48593436180980204,
|
|
"learning_rate": 3.934196019672176e-07,
|
|
"loss": 0.1267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13097088038921356,
|
|
"step": 4160,
|
|
"valid_targets_mean": 4735.8,
|
|
"valid_targets_min": 1686
|
|
},
|
|
{
|
|
"epoch": 6.611111111111111,
|
|
"grad_norm": 0.4395609496182445,
|
|
"learning_rate": 3.779506954349965e-07,
|
|
"loss": 0.1357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12398695945739746,
|
|
"step": 4165,
|
|
"valid_targets_mean": 6392.1,
|
|
"valid_targets_min": 2819
|
|
},
|
|
{
|
|
"epoch": 6.619047619047619,
|
|
"grad_norm": 0.47600560032316025,
|
|
"learning_rate": 3.6278913050572076e-07,
|
|
"loss": 0.1377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1410660743713379,
|
|
"step": 4170,
|
|
"valid_targets_mean": 4714.9,
|
|
"valid_targets_min": 921
|
|
},
|
|
{
|
|
"epoch": 6.6269841269841265,
|
|
"grad_norm": 0.4626328635010138,
|
|
"learning_rate": 3.4793514465610414e-07,
|
|
"loss": 0.1227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12245401740074158,
|
|
"step": 4175,
|
|
"valid_targets_mean": 5662.9,
|
|
"valid_targets_min": 1293
|
|
},
|
|
{
|
|
"epoch": 6.634920634920634,
|
|
"grad_norm": 0.4856477089680751,
|
|
"learning_rate": 3.3338897054521205e-07,
|
|
"loss": 0.1445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12981770932674408,
|
|
"step": 4180,
|
|
"valid_targets_mean": 6161.9,
|
|
"valid_targets_min": 1672
|
|
},
|
|
{
|
|
"epoch": 6.642857142857143,
|
|
"grad_norm": 0.4382225877662096,
|
|
"learning_rate": 3.191508360108464e-07,
|
|
"loss": 0.1265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12940582633018494,
|
|
"step": 4185,
|
|
"valid_targets_mean": 5541.3,
|
|
"valid_targets_min": 525
|
|
},
|
|
{
|
|
"epoch": 6.650793650793651,
|
|
"grad_norm": 0.4467797690671947,
|
|
"learning_rate": 3.0522096406595536e-07,
|
|
"loss": 0.1249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12461797893047333,
|
|
"step": 4190,
|
|
"valid_targets_mean": 5350.9,
|
|
"valid_targets_min": 1531
|
|
},
|
|
{
|
|
"epoch": 6.658730158730159,
|
|
"grad_norm": 0.4315640371338101,
|
|
"learning_rate": 2.9159957289514926e-07,
|
|
"loss": 0.1307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1185673251748085,
|
|
"step": 4195,
|
|
"valid_targets_mean": 5560.6,
|
|
"valid_targets_min": 2700
|
|
},
|
|
{
|
|
"epoch": 6.666666666666667,
|
|
"grad_norm": 0.48404703695546003,
|
|
"learning_rate": 2.782868758512791e-07,
|
|
"loss": 0.1374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13700520992279053,
|
|
"step": 4200,
|
|
"valid_targets_mean": 5088.4,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 6.674603174603175,
|
|
"grad_norm": 0.4561326307439442,
|
|
"learning_rate": 2.6528308145210125e-07,
|
|
"loss": 0.1336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14443397521972656,
|
|
"step": 4205,
|
|
"valid_targets_mean": 5388.9,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 6.682539682539683,
|
|
"grad_norm": 0.4432184683642521,
|
|
"learning_rate": 2.525883933770046e-07,
|
|
"loss": 0.1488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13129255175590515,
|
|
"step": 4210,
|
|
"valid_targets_mean": 6560.8,
|
|
"valid_targets_min": 2872
|
|
},
|
|
{
|
|
"epoch": 6.690476190476191,
|
|
"grad_norm": 0.4428498025907347,
|
|
"learning_rate": 2.402030104638198e-07,
|
|
"loss": 0.1281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1193118616938591,
|
|
"step": 4215,
|
|
"valid_targets_mean": 5178.1,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 6.698412698412699,
|
|
"grad_norm": 0.4342898053436231,
|
|
"learning_rate": 2.2812712670571502e-07,
|
|
"loss": 0.1307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13040482997894287,
|
|
"step": 4220,
|
|
"valid_targets_mean": 5622.1,
|
|
"valid_targets_min": 2496
|
|
},
|
|
{
|
|
"epoch": 6.7063492063492065,
|
|
"grad_norm": 2.0764732196910196,
|
|
"learning_rate": 2.1636093124814738e-07,
|
|
"loss": 0.136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12159843742847443,
|
|
"step": 4225,
|
|
"valid_targets_mean": 5518.6,
|
|
"valid_targets_min": 1795
|
|
},
|
|
{
|
|
"epoch": 6.714285714285714,
|
|
"grad_norm": 0.4586433877189939,
|
|
"learning_rate": 2.0490460838589855e-07,
|
|
"loss": 0.1337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12682956457138062,
|
|
"step": 4230,
|
|
"valid_targets_mean": 4948.1,
|
|
"valid_targets_min": 1032
|
|
},
|
|
{
|
|
"epoch": 6.722222222222222,
|
|
"grad_norm": 0.47132990810816733,
|
|
"learning_rate": 1.9375833756019923e-07,
|
|
"loss": 0.1348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15487657487392426,
|
|
"step": 4235,
|
|
"valid_targets_mean": 4774.9,
|
|
"valid_targets_min": 1168
|
|
},
|
|
{
|
|
"epoch": 6.73015873015873,
|
|
"grad_norm": 0.4124840094163903,
|
|
"learning_rate": 1.8292229335590716e-07,
|
|
"loss": 0.1351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11652678996324539,
|
|
"step": 4240,
|
|
"valid_targets_mean": 6393.1,
|
|
"valid_targets_min": 3197
|
|
},
|
|
{
|
|
"epoch": 6.738095238095238,
|
|
"grad_norm": 0.4350958770760526,
|
|
"learning_rate": 1.7239664549878688e-07,
|
|
"loss": 0.1297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1278618425130844,
|
|
"step": 4245,
|
|
"valid_targets_mean": 5711.4,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 6.746031746031746,
|
|
"grad_norm": 0.4380389060922931,
|
|
"learning_rate": 1.6218155885283192e-07,
|
|
"loss": 0.1333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1238991990685463,
|
|
"step": 4250,
|
|
"valid_targets_mean": 5243.3,
|
|
"valid_targets_min": 1150
|
|
},
|
|
{
|
|
"epoch": 6.753968253968254,
|
|
"grad_norm": 0.45207585431063574,
|
|
"learning_rate": 1.5227719341769364e-07,
|
|
"loss": 0.1406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12772828340530396,
|
|
"step": 4255,
|
|
"valid_targets_mean": 6135.6,
|
|
"valid_targets_min": 2455
|
|
},
|
|
{
|
|
"epoch": 6.761904761904762,
|
|
"grad_norm": 0.48236608264345426,
|
|
"learning_rate": 1.4268370432618306e-07,
|
|
"loss": 0.1464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14505012333393097,
|
|
"step": 4260,
|
|
"valid_targets_mean": 4687.0,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 6.76984126984127,
|
|
"grad_norm": 0.4233867675421516,
|
|
"learning_rate": 1.3340124184182178e-07,
|
|
"loss": 0.1289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12908919155597687,
|
|
"step": 4265,
|
|
"valid_targets_mean": 6025.6,
|
|
"valid_targets_min": 2257
|
|
},
|
|
{
|
|
"epoch": 6.777777777777778,
|
|
"grad_norm": 0.4877416792035469,
|
|
"learning_rate": 1.2442995135650393e-07,
|
|
"loss": 0.1296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15760302543640137,
|
|
"step": 4270,
|
|
"valid_targets_mean": 4828.7,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 6.785714285714286,
|
|
"grad_norm": 0.6506490177567913,
|
|
"learning_rate": 1.1576997338821339e-07,
|
|
"loss": 0.1364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13028597831726074,
|
|
"step": 4275,
|
|
"valid_targets_mean": 5155.9,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 6.7936507936507935,
|
|
"grad_norm": 0.4477970591498348,
|
|
"learning_rate": 1.0742144357882567e-07,
|
|
"loss": 0.1401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1585049331188202,
|
|
"step": 4280,
|
|
"valid_targets_mean": 5534.2,
|
|
"valid_targets_min": 310
|
|
},
|
|
{
|
|
"epoch": 6.801587301587301,
|
|
"grad_norm": 0.49507868999640176,
|
|
"learning_rate": 9.938449269197181e-08,
|
|
"loss": 0.1258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1410304307937622,
|
|
"step": 4285,
|
|
"valid_targets_mean": 4884.3,
|
|
"valid_targets_min": 447
|
|
},
|
|
{
|
|
"epoch": 6.809523809523809,
|
|
"grad_norm": 0.4842993936056907,
|
|
"learning_rate": 9.165924661100889e-08,
|
|
"loss": 0.1347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12479911744594574,
|
|
"step": 4290,
|
|
"valid_targets_mean": 5132.9,
|
|
"valid_targets_min": 397
|
|
},
|
|
{
|
|
"epoch": 6.817460317460317,
|
|
"grad_norm": 0.43326494078340105,
|
|
"learning_rate": 8.424582633703493e-08,
|
|
"loss": 0.1291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13402575254440308,
|
|
"step": 4295,
|
|
"valid_targets_mean": 5267.1,
|
|
"valid_targets_min": 1121
|
|
},
|
|
{
|
|
"epoch": 6.825396825396825,
|
|
"grad_norm": 0.39800154553874223,
|
|
"learning_rate": 7.714434798699933e-08,
|
|
"loss": 0.1228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12014246731996536,
|
|
"step": 4300,
|
|
"valid_targets_mean": 6144.5,
|
|
"valid_targets_min": 2815
|
|
},
|
|
{
|
|
"epoch": 6.833333333333333,
|
|
"grad_norm": 0.42208852936776803,
|
|
"learning_rate": 7.035492279187538e-08,
|
|
"loss": 0.1331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12092911452054977,
|
|
"step": 4305,
|
|
"valid_targets_mean": 5618.4,
|
|
"valid_targets_min": 855
|
|
},
|
|
{
|
|
"epoch": 6.841269841269841,
|
|
"grad_norm": 0.5061623759467122,
|
|
"learning_rate": 6.387765709493288e-08,
|
|
"loss": 0.1362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13144025206565857,
|
|
"step": 4310,
|
|
"valid_targets_mean": 5299.0,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 6.849206349206349,
|
|
"grad_norm": 0.4703474965866674,
|
|
"learning_rate": 5.7712652350061515e-08,
|
|
"loss": 0.1368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14938583970069885,
|
|
"step": 4315,
|
|
"valid_targets_mean": 4459.2,
|
|
"valid_targets_min": 229
|
|
},
|
|
{
|
|
"epoch": 6.857142857142857,
|
|
"grad_norm": 0.4544153706399285,
|
|
"learning_rate": 5.186000512018341e-08,
|
|
"loss": 0.1304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13210490345954895,
|
|
"step": 4320,
|
|
"valid_targets_mean": 5968.1,
|
|
"valid_targets_min": 1966
|
|
},
|
|
{
|
|
"epoch": 6.865079365079366,
|
|
"grad_norm": 0.3951693224390717,
|
|
"learning_rate": 4.631980707574535e-08,
|
|
"loss": 0.1222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10764377564191818,
|
|
"step": 4325,
|
|
"valid_targets_mean": 6102.2,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 6.8730158730158735,
|
|
"grad_norm": 0.47764153403255666,
|
|
"learning_rate": 4.10921449932733e-08,
|
|
"loss": 0.1415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13427546620368958,
|
|
"step": 4330,
|
|
"valid_targets_mean": 4686.6,
|
|
"valid_targets_min": 1301
|
|
},
|
|
{
|
|
"epoch": 6.880952380952381,
|
|
"grad_norm": 0.4686718928282114,
|
|
"learning_rate": 3.61771007540268e-08,
|
|
"loss": 0.1366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12770965695381165,
|
|
"step": 4335,
|
|
"valid_targets_mean": 5458.8,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 6.888888888888889,
|
|
"grad_norm": 0.45215811836727576,
|
|
"learning_rate": 3.157475134270227e-08,
|
|
"loss": 0.1306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14995113015174866,
|
|
"step": 4340,
|
|
"valid_targets_mean": 5664.1,
|
|
"valid_targets_min": 3600
|
|
},
|
|
{
|
|
"epoch": 6.896825396825397,
|
|
"grad_norm": 0.49008507070012214,
|
|
"learning_rate": 2.728516884624277e-08,
|
|
"loss": 0.1288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13054564595222473,
|
|
"step": 4345,
|
|
"valid_targets_mean": 5089.6,
|
|
"valid_targets_min": 637
|
|
},
|
|
{
|
|
"epoch": 6.904761904761905,
|
|
"grad_norm": 0.48404319287292985,
|
|
"learning_rate": 2.3308420452690106e-08,
|
|
"loss": 0.1448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13617941737174988,
|
|
"step": 4350,
|
|
"valid_targets_mean": 4722.2,
|
|
"valid_targets_min": 1597
|
|
},
|
|
{
|
|
"epoch": 6.912698412698413,
|
|
"grad_norm": 0.40638533094484175,
|
|
"learning_rate": 1.9644568450147837e-08,
|
|
"loss": 0.1317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14109522104263306,
|
|
"step": 4355,
|
|
"valid_targets_mean": 6445.0,
|
|
"valid_targets_min": 3553
|
|
},
|
|
{
|
|
"epoch": 6.920634920634921,
|
|
"grad_norm": 0.4629617085369057,
|
|
"learning_rate": 1.6293670225799864e-08,
|
|
"loss": 0.1253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1260548233985901,
|
|
"step": 4360,
|
|
"valid_targets_mean": 5373.1,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 6.928571428571429,
|
|
"grad_norm": 0.45402888146937276,
|
|
"learning_rate": 1.3255778265013342e-08,
|
|
"loss": 0.1365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14494284987449646,
|
|
"step": 4365,
|
|
"valid_targets_mean": 5725.3,
|
|
"valid_targets_min": 1550
|
|
},
|
|
{
|
|
"epoch": 6.936507936507937,
|
|
"grad_norm": 0.4086709277572136,
|
|
"learning_rate": 1.0530940150512703e-08,
|
|
"loss": 0.1352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1179996132850647,
|
|
"step": 4370,
|
|
"valid_targets_mean": 5990.4,
|
|
"valid_targets_min": 361
|
|
},
|
|
{
|
|
"epoch": 6.944444444444445,
|
|
"grad_norm": 0.42662410297325026,
|
|
"learning_rate": 8.119198561638009e-09,
|
|
"loss": 0.1466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14371398091316223,
|
|
"step": 4375,
|
|
"valid_targets_mean": 5997.8,
|
|
"valid_targets_min": 3437
|
|
},
|
|
{
|
|
"epoch": 6.9523809523809526,
|
|
"grad_norm": 0.42639557312324744,
|
|
"learning_rate": 6.020591273674381e-09,
|
|
"loss": 0.1317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1339547336101532,
|
|
"step": 4380,
|
|
"valid_targets_mean": 6416.2,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 6.9603174603174605,
|
|
"grad_norm": 0.4092765044556627,
|
|
"learning_rate": 4.2351511572635835e-09,
|
|
"loss": 0.1359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10867651551961899,
|
|
"step": 4385,
|
|
"valid_targets_mean": 6632.2,
|
|
"valid_targets_min": 1370
|
|
},
|
|
{
|
|
"epoch": 6.968253968253968,
|
|
"grad_norm": 0.4951156195990528,
|
|
"learning_rate": 2.7629061778866597e-09,
|
|
"loss": 0.1432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1670551598072052,
|
|
"step": 4390,
|
|
"valid_targets_mean": 5063.4,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 6.976190476190476,
|
|
"grad_norm": 0.5279789366650457,
|
|
"learning_rate": 1.603879395422059e-09,
|
|
"loss": 0.134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14463752508163452,
|
|
"step": 4395,
|
|
"valid_targets_mean": 4726.2,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 6.984126984126984,
|
|
"grad_norm": 0.43870727411718197,
|
|
"learning_rate": 7.580889637925914e-10,
|
|
"loss": 0.1397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14193710684776306,
|
|
"step": 4400,
|
|
"valid_targets_mean": 5203.9,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 6.992063492063492,
|
|
"grad_norm": 0.4604903629839532,
|
|
"learning_rate": 2.2554813067676705e-10,
|
|
"loss": 0.1268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1437646746635437,
|
|
"step": 4405,
|
|
"valid_targets_mean": 5522.9,
|
|
"valid_targets_min": 809
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"grad_norm": 0.4923560486700678,
|
|
"learning_rate": 6.265237300073778e-12,
|
|
"loss": 0.1497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15846839547157288,
|
|
"step": 4410,
|
|
"valid_targets_mean": 5295.1,
|
|
"valid_targets_min": 1258
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15846839547157288,
|
|
"step": 4410,
|
|
"total_flos": 1765744004825088.0,
|
|
"train_loss": 0.17469855595608147,
|
|
"train_runtime": 27741.8701,
|
|
"train_samples_per_second": 2.541,
|
|
"train_steps_per_second": 0.159,
|
|
"valid_targets_mean": 5295.1,
|
|
"valid_targets_min": 1258
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4410,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1765744004825088.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|