11506 lines
311 KiB
JSON
11506 lines
311 KiB
JSON
|
|
{
|
||
|
|
"best_global_step": null,
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 5.0,
|
||
|
|
"eval_steps": 500,
|
||
|
|
"global_step": 5210,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"epoch": 0.0048,
|
||
|
|
"grad_norm": 10.636854244202556,
|
||
|
|
"learning_rate": 3.071017274472169e-07,
|
||
|
|
"loss": 0.9544,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3281858265399933,
|
||
|
|
"step": 5,
|
||
|
|
"valid_targets_mean": 10263.1,
|
||
|
|
"valid_targets_min": 1622
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0096,
|
||
|
|
"grad_norm": 8.705611775216052,
|
||
|
|
"learning_rate": 6.909788867562381e-07,
|
||
|
|
"loss": 0.9596,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.33007457852363586,
|
||
|
|
"step": 10,
|
||
|
|
"valid_targets_mean": 12424.5,
|
||
|
|
"valid_targets_min": 2066
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0144,
|
||
|
|
"grad_norm": 7.625331316286804,
|
||
|
|
"learning_rate": 1.074856046065259e-06,
|
||
|
|
"loss": 0.9453,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30262356996536255,
|
||
|
|
"step": 15,
|
||
|
|
"valid_targets_mean": 10207.7,
|
||
|
|
"valid_targets_min": 1992
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0192,
|
||
|
|
"grad_norm": 5.53278243942875,
|
||
|
|
"learning_rate": 1.4587332053742803e-06,
|
||
|
|
"loss": 0.9187,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.32896602153778076,
|
||
|
|
"step": 20,
|
||
|
|
"valid_targets_mean": 12274.8,
|
||
|
|
"valid_targets_min": 1016
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.024,
|
||
|
|
"grad_norm": 2.925539335155935,
|
||
|
|
"learning_rate": 1.8426103646833015e-06,
|
||
|
|
"loss": 0.8497,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3109387755393982,
|
||
|
|
"step": 25,
|
||
|
|
"valid_targets_mean": 12681.8,
|
||
|
|
"valid_targets_min": 1505
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0288,
|
||
|
|
"grad_norm": 1.8422083015449056,
|
||
|
|
"learning_rate": 2.2264875239923228e-06,
|
||
|
|
"loss": 0.8137,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27611488103866577,
|
||
|
|
"step": 30,
|
||
|
|
"valid_targets_mean": 12296.9,
|
||
|
|
"valid_targets_min": 1668
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0336,
|
||
|
|
"grad_norm": 1.5441559016293185,
|
||
|
|
"learning_rate": 2.6103646833013433e-06,
|
||
|
|
"loss": 0.7877,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30650392174720764,
|
||
|
|
"step": 35,
|
||
|
|
"valid_targets_mean": 12300.1,
|
||
|
|
"valid_targets_min": 1479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0384,
|
||
|
|
"grad_norm": 1.1621222136438034,
|
||
|
|
"learning_rate": 2.9942418426103648e-06,
|
||
|
|
"loss": 0.7642,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2344341278076172,
|
||
|
|
"step": 40,
|
||
|
|
"valid_targets_mean": 10216.6,
|
||
|
|
"valid_targets_min": 3242
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0432,
|
||
|
|
"grad_norm": 0.7785445251214029,
|
||
|
|
"learning_rate": 3.378119001919386e-06,
|
||
|
|
"loss": 0.7259,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22525399923324585,
|
||
|
|
"step": 45,
|
||
|
|
"valid_targets_mean": 9436.8,
|
||
|
|
"valid_targets_min": 1761
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.048,
|
||
|
|
"grad_norm": 0.6851609239554484,
|
||
|
|
"learning_rate": 3.761996161228407e-06,
|
||
|
|
"loss": 0.7095,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22096958756446838,
|
||
|
|
"step": 50,
|
||
|
|
"valid_targets_mean": 8893.3,
|
||
|
|
"valid_targets_min": 1935
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0528,
|
||
|
|
"grad_norm": 0.47600710001942337,
|
||
|
|
"learning_rate": 4.145873320537428e-06,
|
||
|
|
"loss": 0.6931,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22855256497859955,
|
||
|
|
"step": 55,
|
||
|
|
"valid_targets_mean": 10395.9,
|
||
|
|
"valid_targets_min": 1283
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0576,
|
||
|
|
"grad_norm": 0.4444504600275988,
|
||
|
|
"learning_rate": 4.52975047984645e-06,
|
||
|
|
"loss": 0.6716,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20247305929660797,
|
||
|
|
"step": 60,
|
||
|
|
"valid_targets_mean": 9470.6,
|
||
|
|
"valid_targets_min": 2067
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0624,
|
||
|
|
"grad_norm": 0.3880399717802379,
|
||
|
|
"learning_rate": 4.91362763915547e-06,
|
||
|
|
"loss": 0.6494,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2117888331413269,
|
||
|
|
"step": 65,
|
||
|
|
"valid_targets_mean": 10791.5,
|
||
|
|
"valid_targets_min": 4875
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0672,
|
||
|
|
"grad_norm": 0.3032827664070752,
|
||
|
|
"learning_rate": 5.297504798464492e-06,
|
||
|
|
"loss": 0.6435,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20708134770393372,
|
||
|
|
"step": 70,
|
||
|
|
"valid_targets_mean": 10440.2,
|
||
|
|
"valid_targets_min": 2638
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.072,
|
||
|
|
"grad_norm": 0.28159068782522545,
|
||
|
|
"learning_rate": 5.681381957773513e-06,
|
||
|
|
"loss": 0.6349,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23973067104816437,
|
||
|
|
"step": 75,
|
||
|
|
"valid_targets_mean": 12223.6,
|
||
|
|
"valid_targets_min": 1619
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0768,
|
||
|
|
"grad_norm": 0.2719001663622767,
|
||
|
|
"learning_rate": 6.065259117082534e-06,
|
||
|
|
"loss": 0.613,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19408732652664185,
|
||
|
|
"step": 80,
|
||
|
|
"valid_targets_mean": 10384.3,
|
||
|
|
"valid_targets_min": 1608
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0816,
|
||
|
|
"grad_norm": 0.23703867977781148,
|
||
|
|
"learning_rate": 6.449136276391556e-06,
|
||
|
|
"loss": 0.6057,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19000616669654846,
|
||
|
|
"step": 85,
|
||
|
|
"valid_targets_mean": 11075.1,
|
||
|
|
"valid_targets_min": 2097
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0864,
|
||
|
|
"grad_norm": 0.22992604502698233,
|
||
|
|
"learning_rate": 6.833013435700576e-06,
|
||
|
|
"loss": 0.5832,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20729880034923553,
|
||
|
|
"step": 90,
|
||
|
|
"valid_targets_mean": 10286.8,
|
||
|
|
"valid_targets_min": 253
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0912,
|
||
|
|
"grad_norm": 0.22062818252236446,
|
||
|
|
"learning_rate": 7.216890595009598e-06,
|
||
|
|
"loss": 0.5787,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2066301703453064,
|
||
|
|
"step": 95,
|
||
|
|
"valid_targets_mean": 11090.3,
|
||
|
|
"valid_targets_min": 2975
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.096,
|
||
|
|
"grad_norm": 0.24007612411052925,
|
||
|
|
"learning_rate": 7.600767754318619e-06,
|
||
|
|
"loss": 0.5724,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1960715353488922,
|
||
|
|
"step": 100,
|
||
|
|
"valid_targets_mean": 10440.2,
|
||
|
|
"valid_targets_min": 1703
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1008,
|
||
|
|
"grad_norm": 0.2364846800541795,
|
||
|
|
"learning_rate": 7.98464491362764e-06,
|
||
|
|
"loss": 0.5643,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.183235764503479,
|
||
|
|
"step": 105,
|
||
|
|
"valid_targets_mean": 10720.9,
|
||
|
|
"valid_targets_min": 1279
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1056,
|
||
|
|
"grad_norm": 0.22116407241128727,
|
||
|
|
"learning_rate": 8.368522072936662e-06,
|
||
|
|
"loss": 0.5524,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15857258439064026,
|
||
|
|
"step": 110,
|
||
|
|
"valid_targets_mean": 8602.8,
|
||
|
|
"valid_targets_min": 1405
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1104,
|
||
|
|
"grad_norm": 0.24251627783577479,
|
||
|
|
"learning_rate": 8.752399232245682e-06,
|
||
|
|
"loss": 0.5528,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20528832077980042,
|
||
|
|
"step": 115,
|
||
|
|
"valid_targets_mean": 11334.2,
|
||
|
|
"valid_targets_min": 2159
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1152,
|
||
|
|
"grad_norm": 0.22397511123215,
|
||
|
|
"learning_rate": 9.136276391554704e-06,
|
||
|
|
"loss": 0.5436,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1935373693704605,
|
||
|
|
"step": 120,
|
||
|
|
"valid_targets_mean": 11852.4,
|
||
|
|
"valid_targets_min": 2174
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.12,
|
||
|
|
"grad_norm": 0.24271342022992784,
|
||
|
|
"learning_rate": 9.520153550863724e-06,
|
||
|
|
"loss": 0.5472,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19181078672409058,
|
||
|
|
"step": 125,
|
||
|
|
"valid_targets_mean": 10768.0,
|
||
|
|
"valid_targets_min": 1942
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1248,
|
||
|
|
"grad_norm": 0.2172967997455667,
|
||
|
|
"learning_rate": 9.904030710172746e-06,
|
||
|
|
"loss": 0.5468,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1923825442790985,
|
||
|
|
"step": 130,
|
||
|
|
"valid_targets_mean": 11106.3,
|
||
|
|
"valid_targets_min": 1863
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1296,
|
||
|
|
"grad_norm": 0.2357587155660315,
|
||
|
|
"learning_rate": 1.0287907869481766e-05,
|
||
|
|
"loss": 0.5344,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16063986718654633,
|
||
|
|
"step": 135,
|
||
|
|
"valid_targets_mean": 9855.5,
|
||
|
|
"valid_targets_min": 2389
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1344,
|
||
|
|
"grad_norm": 0.22906584573702501,
|
||
|
|
"learning_rate": 1.067178502879079e-05,
|
||
|
|
"loss": 0.5344,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19566833972930908,
|
||
|
|
"step": 140,
|
||
|
|
"valid_targets_mean": 11456.1,
|
||
|
|
"valid_targets_min": 2336
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1392,
|
||
|
|
"grad_norm": 0.25114235107383526,
|
||
|
|
"learning_rate": 1.105566218809981e-05,
|
||
|
|
"loss": 0.5285,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19265873730182648,
|
||
|
|
"step": 145,
|
||
|
|
"valid_targets_mean": 9929.8,
|
||
|
|
"valid_targets_min": 2020
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.144,
|
||
|
|
"grad_norm": 0.23750880102943095,
|
||
|
|
"learning_rate": 1.143953934740883e-05,
|
||
|
|
"loss": 0.5231,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20383387804031372,
|
||
|
|
"step": 150,
|
||
|
|
"valid_targets_mean": 11054.2,
|
||
|
|
"valid_targets_min": 1049
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1488,
|
||
|
|
"grad_norm": 0.23692985297514224,
|
||
|
|
"learning_rate": 1.182341650671785e-05,
|
||
|
|
"loss": 0.5245,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17097380757331848,
|
||
|
|
"step": 155,
|
||
|
|
"valid_targets_mean": 11116.3,
|
||
|
|
"valid_targets_min": 641
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1536,
|
||
|
|
"grad_norm": 0.24339076517066405,
|
||
|
|
"learning_rate": 1.2207293666026872e-05,
|
||
|
|
"loss": 0.5104,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17967820167541504,
|
||
|
|
"step": 160,
|
||
|
|
"valid_targets_mean": 11142.9,
|
||
|
|
"valid_targets_min": 2725
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1584,
|
||
|
|
"grad_norm": 0.2558192460223346,
|
||
|
|
"learning_rate": 1.2591170825335894e-05,
|
||
|
|
"loss": 0.5194,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16853728890419006,
|
||
|
|
"step": 165,
|
||
|
|
"valid_targets_mean": 10722.9,
|
||
|
|
"valid_targets_min": 3225
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1632,
|
||
|
|
"grad_norm": 0.24528680520963095,
|
||
|
|
"learning_rate": 1.2975047984644915e-05,
|
||
|
|
"loss": 0.517,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16261857748031616,
|
||
|
|
"step": 170,
|
||
|
|
"valid_targets_mean": 9597.3,
|
||
|
|
"valid_targets_min": 2714
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.168,
|
||
|
|
"grad_norm": 0.2330765401016573,
|
||
|
|
"learning_rate": 1.3358925143953936e-05,
|
||
|
|
"loss": 0.5123,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1625608503818512,
|
||
|
|
"step": 175,
|
||
|
|
"valid_targets_mean": 9343.4,
|
||
|
|
"valid_targets_min": 1455
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1728,
|
||
|
|
"grad_norm": 0.2662782596501721,
|
||
|
|
"learning_rate": 1.3742802303262956e-05,
|
||
|
|
"loss": 0.5099,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15402890741825104,
|
||
|
|
"step": 180,
|
||
|
|
"valid_targets_mean": 9405.9,
|
||
|
|
"valid_targets_min": 2750
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1776,
|
||
|
|
"grad_norm": 0.23548854171410669,
|
||
|
|
"learning_rate": 1.4126679462571978e-05,
|
||
|
|
"loss": 0.5082,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15284034609794617,
|
||
|
|
"step": 185,
|
||
|
|
"valid_targets_mean": 10169.3,
|
||
|
|
"valid_targets_min": 2559
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1824,
|
||
|
|
"grad_norm": 0.2493807137284219,
|
||
|
|
"learning_rate": 1.4510556621881e-05,
|
||
|
|
"loss": 0.5078,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.224237859249115,
|
||
|
|
"step": 190,
|
||
|
|
"valid_targets_mean": 13936.2,
|
||
|
|
"valid_targets_min": 1967
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1872,
|
||
|
|
"grad_norm": 0.23080076777747327,
|
||
|
|
"learning_rate": 1.4894433781190021e-05,
|
||
|
|
"loss": 0.4977,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18542718887329102,
|
||
|
|
"step": 195,
|
||
|
|
"valid_targets_mean": 11386.7,
|
||
|
|
"valid_targets_min": 1049
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.192,
|
||
|
|
"grad_norm": 0.24356875674311323,
|
||
|
|
"learning_rate": 1.527831094049904e-05,
|
||
|
|
"loss": 0.4938,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15462642908096313,
|
||
|
|
"step": 200,
|
||
|
|
"valid_targets_mean": 9353.9,
|
||
|
|
"valid_targets_min": 1881
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1968,
|
||
|
|
"grad_norm": 0.21231400448563495,
|
||
|
|
"learning_rate": 1.566218809980806e-05,
|
||
|
|
"loss": 0.497,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19362452626228333,
|
||
|
|
"step": 205,
|
||
|
|
"valid_targets_mean": 13543.5,
|
||
|
|
"valid_targets_min": 3151
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2016,
|
||
|
|
"grad_norm": 0.22632075151001788,
|
||
|
|
"learning_rate": 1.6046065259117082e-05,
|
||
|
|
"loss": 0.4923,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17729297280311584,
|
||
|
|
"step": 210,
|
||
|
|
"valid_targets_mean": 11808.5,
|
||
|
|
"valid_targets_min": 3102
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2064,
|
||
|
|
"grad_norm": 0.27758108971137807,
|
||
|
|
"learning_rate": 1.6429942418426105e-05,
|
||
|
|
"loss": 0.4899,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17895910143852234,
|
||
|
|
"step": 215,
|
||
|
|
"valid_targets_mean": 11082.8,
|
||
|
|
"valid_targets_min": 3656
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2112,
|
||
|
|
"grad_norm": 0.28957622378218517,
|
||
|
|
"learning_rate": 1.6813819577735126e-05,
|
||
|
|
"loss": 0.4913,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14970815181732178,
|
||
|
|
"step": 220,
|
||
|
|
"valid_targets_mean": 9667.3,
|
||
|
|
"valid_targets_min": 1173
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.216,
|
||
|
|
"grad_norm": 0.27937879612714905,
|
||
|
|
"learning_rate": 1.7197696737044146e-05,
|
||
|
|
"loss": 0.5047,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14511623978614807,
|
||
|
|
"step": 225,
|
||
|
|
"valid_targets_mean": 9465.0,
|
||
|
|
"valid_targets_min": 2634
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2208,
|
||
|
|
"grad_norm": 0.24089811245385753,
|
||
|
|
"learning_rate": 1.758157389635317e-05,
|
||
|
|
"loss": 0.4966,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16545206308364868,
|
||
|
|
"step": 230,
|
||
|
|
"valid_targets_mean": 11997.9,
|
||
|
|
"valid_targets_min": 2677
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2256,
|
||
|
|
"grad_norm": 0.2578591546428462,
|
||
|
|
"learning_rate": 1.796545105566219e-05,
|
||
|
|
"loss": 0.4873,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2065517008304596,
|
||
|
|
"step": 235,
|
||
|
|
"valid_targets_mean": 13886.1,
|
||
|
|
"valid_targets_min": 2298
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2304,
|
||
|
|
"grad_norm": 0.24203958143555748,
|
||
|
|
"learning_rate": 1.8349328214971213e-05,
|
||
|
|
"loss": 0.4971,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17147715389728546,
|
||
|
|
"step": 240,
|
||
|
|
"valid_targets_mean": 11135.5,
|
||
|
|
"valid_targets_min": 2340
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2352,
|
||
|
|
"grad_norm": 0.23970277281734395,
|
||
|
|
"learning_rate": 1.8733205374280233e-05,
|
||
|
|
"loss": 0.4825,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1593427062034607,
|
||
|
|
"step": 245,
|
||
|
|
"valid_targets_mean": 9909.9,
|
||
|
|
"valid_targets_min": 4293
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24,
|
||
|
|
"grad_norm": 0.31598621631855395,
|
||
|
|
"learning_rate": 1.9117082533589253e-05,
|
||
|
|
"loss": 0.4898,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18410634994506836,
|
||
|
|
"step": 250,
|
||
|
|
"valid_targets_mean": 12043.8,
|
||
|
|
"valid_targets_min": 1907
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2448,
|
||
|
|
"grad_norm": 0.27841537067755107,
|
||
|
|
"learning_rate": 1.9500959692898273e-05,
|
||
|
|
"loss": 0.4934,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15667122602462769,
|
||
|
|
"step": 255,
|
||
|
|
"valid_targets_mean": 9791.7,
|
||
|
|
"valid_targets_min": 1688
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2496,
|
||
|
|
"grad_norm": 0.27493246532697335,
|
||
|
|
"learning_rate": 1.9884836852207294e-05,
|
||
|
|
"loss": 0.5018,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15299372375011444,
|
||
|
|
"step": 260,
|
||
|
|
"valid_targets_mean": 9514.1,
|
||
|
|
"valid_targets_min": 301
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2544,
|
||
|
|
"grad_norm": 0.266140461511119,
|
||
|
|
"learning_rate": 2.0268714011516314e-05,
|
||
|
|
"loss": 0.4811,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15985918045043945,
|
||
|
|
"step": 265,
|
||
|
|
"valid_targets_mean": 10259.5,
|
||
|
|
"valid_targets_min": 2847
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2592,
|
||
|
|
"grad_norm": 0.2544591628433433,
|
||
|
|
"learning_rate": 2.0652591170825337e-05,
|
||
|
|
"loss": 0.4853,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17325814068317413,
|
||
|
|
"step": 270,
|
||
|
|
"valid_targets_mean": 10599.2,
|
||
|
|
"valid_targets_min": 1957
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.264,
|
||
|
|
"grad_norm": 0.2550761566659248,
|
||
|
|
"learning_rate": 2.103646833013436e-05,
|
||
|
|
"loss": 0.4838,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18994951248168945,
|
||
|
|
"step": 275,
|
||
|
|
"valid_targets_mean": 12406.7,
|
||
|
|
"valid_targets_min": 1763
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2688,
|
||
|
|
"grad_norm": 0.2648060038919729,
|
||
|
|
"learning_rate": 2.142034548944338e-05,
|
||
|
|
"loss": 0.4877,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17854821681976318,
|
||
|
|
"step": 280,
|
||
|
|
"valid_targets_mean": 12377.0,
|
||
|
|
"valid_targets_min": 3205
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2736,
|
||
|
|
"grad_norm": 0.2892489787345957,
|
||
|
|
"learning_rate": 2.18042226487524e-05,
|
||
|
|
"loss": 0.485,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15922245383262634,
|
||
|
|
"step": 285,
|
||
|
|
"valid_targets_mean": 10091.9,
|
||
|
|
"valid_targets_min": 1776
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2784,
|
||
|
|
"grad_norm": 0.27182050087807624,
|
||
|
|
"learning_rate": 2.218809980806142e-05,
|
||
|
|
"loss": 0.4832,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18173915147781372,
|
||
|
|
"step": 290,
|
||
|
|
"valid_targets_mean": 11485.0,
|
||
|
|
"valid_targets_min": 2412
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2832,
|
||
|
|
"grad_norm": 0.29753560881114305,
|
||
|
|
"learning_rate": 2.2571976967370445e-05,
|
||
|
|
"loss": 0.4876,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15241312980651855,
|
||
|
|
"step": 295,
|
||
|
|
"valid_targets_mean": 10455.7,
|
||
|
|
"valid_targets_min": 1760
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.288,
|
||
|
|
"grad_norm": 0.2920243091279055,
|
||
|
|
"learning_rate": 2.2955854126679465e-05,
|
||
|
|
"loss": 0.4809,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17634551227092743,
|
||
|
|
"step": 300,
|
||
|
|
"valid_targets_mean": 11262.6,
|
||
|
|
"valid_targets_min": 2395
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2928,
|
||
|
|
"grad_norm": 0.3068931126861716,
|
||
|
|
"learning_rate": 2.3339731285988485e-05,
|
||
|
|
"loss": 0.4787,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16455751657485962,
|
||
|
|
"step": 305,
|
||
|
|
"valid_targets_mean": 10402.1,
|
||
|
|
"valid_targets_min": 1814
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2976,
|
||
|
|
"grad_norm": 0.29189757761278545,
|
||
|
|
"learning_rate": 2.372360844529751e-05,
|
||
|
|
"loss": 0.4849,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14844027161598206,
|
||
|
|
"step": 310,
|
||
|
|
"valid_targets_mean": 10187.0,
|
||
|
|
"valid_targets_min": 1855
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3024,
|
||
|
|
"grad_norm": 0.29114274622108255,
|
||
|
|
"learning_rate": 2.4107485604606525e-05,
|
||
|
|
"loss": 0.4819,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1699836403131485,
|
||
|
|
"step": 315,
|
||
|
|
"valid_targets_mean": 11114.6,
|
||
|
|
"valid_targets_min": 1042
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3072,
|
||
|
|
"grad_norm": 0.2954997654982877,
|
||
|
|
"learning_rate": 2.449136276391555e-05,
|
||
|
|
"loss": 0.4742,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16469603776931763,
|
||
|
|
"step": 320,
|
||
|
|
"valid_targets_mean": 9133.3,
|
||
|
|
"valid_targets_min": 1297
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.312,
|
||
|
|
"grad_norm": 0.2773131204544206,
|
||
|
|
"learning_rate": 2.4875239923224573e-05,
|
||
|
|
"loss": 0.4866,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15901392698287964,
|
||
|
|
"step": 325,
|
||
|
|
"valid_targets_mean": 10840.6,
|
||
|
|
"valid_targets_min": 1563
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3168,
|
||
|
|
"grad_norm": 0.25978690369987806,
|
||
|
|
"learning_rate": 2.525911708253359e-05,
|
||
|
|
"loss": 0.483,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17150971293449402,
|
||
|
|
"step": 330,
|
||
|
|
"valid_targets_mean": 12225.6,
|
||
|
|
"valid_targets_min": 2997
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3216,
|
||
|
|
"grad_norm": 0.2534243499422545,
|
||
|
|
"learning_rate": 2.5642994241842613e-05,
|
||
|
|
"loss": 0.4759,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1597476452589035,
|
||
|
|
"step": 335,
|
||
|
|
"valid_targets_mean": 10479.3,
|
||
|
|
"valid_targets_min": 979
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3264,
|
||
|
|
"grad_norm": 0.2609983072842597,
|
||
|
|
"learning_rate": 2.6026871401151633e-05,
|
||
|
|
"loss": 0.4812,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16786131262779236,
|
||
|
|
"step": 340,
|
||
|
|
"valid_targets_mean": 10247.2,
|
||
|
|
"valid_targets_min": 3687
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3312,
|
||
|
|
"grad_norm": 0.2539569867124708,
|
||
|
|
"learning_rate": 2.6410748560460657e-05,
|
||
|
|
"loss": 0.4854,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17365418374538422,
|
||
|
|
"step": 345,
|
||
|
|
"valid_targets_mean": 11493.6,
|
||
|
|
"valid_targets_min": 1776
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.336,
|
||
|
|
"grad_norm": 0.24499201124304537,
|
||
|
|
"learning_rate": 2.6794625719769677e-05,
|
||
|
|
"loss": 0.4805,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1610392928123474,
|
||
|
|
"step": 350,
|
||
|
|
"valid_targets_mean": 11080.4,
|
||
|
|
"valid_targets_min": 885
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3408,
|
||
|
|
"grad_norm": 0.3168783571987516,
|
||
|
|
"learning_rate": 2.7178502879078697e-05,
|
||
|
|
"loss": 0.4777,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1885453164577484,
|
||
|
|
"step": 355,
|
||
|
|
"valid_targets_mean": 11284.8,
|
||
|
|
"valid_targets_min": 2053
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3456,
|
||
|
|
"grad_norm": 0.29757777058122253,
|
||
|
|
"learning_rate": 2.756238003838772e-05,
|
||
|
|
"loss": 0.4757,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12922048568725586,
|
||
|
|
"step": 360,
|
||
|
|
"valid_targets_mean": 9008.3,
|
||
|
|
"valid_targets_min": 2069
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3504,
|
||
|
|
"grad_norm": 0.29845542522302926,
|
||
|
|
"learning_rate": 2.7946257197696737e-05,
|
||
|
|
"loss": 0.4738,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1480693519115448,
|
||
|
|
"step": 365,
|
||
|
|
"valid_targets_mean": 9970.8,
|
||
|
|
"valid_targets_min": 2240
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3552,
|
||
|
|
"grad_norm": 0.2607162119403624,
|
||
|
|
"learning_rate": 2.833013435700576e-05,
|
||
|
|
"loss": 0.4573,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15535274147987366,
|
||
|
|
"step": 370,
|
||
|
|
"valid_targets_mean": 11197.2,
|
||
|
|
"valid_targets_min": 2704
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.36,
|
||
|
|
"grad_norm": 0.2899828540897501,
|
||
|
|
"learning_rate": 2.8714011516314784e-05,
|
||
|
|
"loss": 0.4684,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18022534251213074,
|
||
|
|
"step": 375,
|
||
|
|
"valid_targets_mean": 10962.7,
|
||
|
|
"valid_targets_min": 1224
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3648,
|
||
|
|
"grad_norm": 0.26481660377161126,
|
||
|
|
"learning_rate": 2.90978886756238e-05,
|
||
|
|
"loss": 0.4698,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13400977849960327,
|
||
|
|
"step": 380,
|
||
|
|
"valid_targets_mean": 9079.6,
|
||
|
|
"valid_targets_min": 1891
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3696,
|
||
|
|
"grad_norm": 0.2617162429432194,
|
||
|
|
"learning_rate": 2.9481765834932825e-05,
|
||
|
|
"loss": 0.4707,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13027355074882507,
|
||
|
|
"step": 385,
|
||
|
|
"valid_targets_mean": 8412.9,
|
||
|
|
"valid_targets_min": 1498
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3744,
|
||
|
|
"grad_norm": 0.2701862190450275,
|
||
|
|
"learning_rate": 2.9865642994241845e-05,
|
||
|
|
"loss": 0.4708,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16891103982925415,
|
||
|
|
"step": 390,
|
||
|
|
"valid_targets_mean": 11449.3,
|
||
|
|
"valid_targets_min": 1279
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3792,
|
||
|
|
"grad_norm": 0.29379480481092074,
|
||
|
|
"learning_rate": 3.0249520153550865e-05,
|
||
|
|
"loss": 0.4707,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14961659908294678,
|
||
|
|
"step": 395,
|
||
|
|
"valid_targets_mean": 8604.1,
|
||
|
|
"valid_targets_min": 1994
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.384,
|
||
|
|
"grad_norm": 0.27150103043577656,
|
||
|
|
"learning_rate": 3.063339731285989e-05,
|
||
|
|
"loss": 0.4753,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1796588897705078,
|
||
|
|
"step": 400,
|
||
|
|
"valid_targets_mean": 11844.7,
|
||
|
|
"valid_targets_min": 2622
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3888,
|
||
|
|
"grad_norm": 0.29573597688488124,
|
||
|
|
"learning_rate": 3.101727447216891e-05,
|
||
|
|
"loss": 0.4705,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20640979707241058,
|
||
|
|
"step": 405,
|
||
|
|
"valid_targets_mean": 14013.4,
|
||
|
|
"valid_targets_min": 2984
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3936,
|
||
|
|
"grad_norm": 0.25248186174434967,
|
||
|
|
"learning_rate": 3.140115163147793e-05,
|
||
|
|
"loss": 0.4614,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16427947580814362,
|
||
|
|
"step": 410,
|
||
|
|
"valid_targets_mean": 11053.4,
|
||
|
|
"valid_targets_min": 2699
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3984,
|
||
|
|
"grad_norm": 0.3765832037408977,
|
||
|
|
"learning_rate": 3.178502879078695e-05,
|
||
|
|
"loss": 0.4713,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14113269746303558,
|
||
|
|
"step": 415,
|
||
|
|
"valid_targets_mean": 9704.3,
|
||
|
|
"valid_targets_min": 3191
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4032,
|
||
|
|
"grad_norm": 0.29914718763920833,
|
||
|
|
"learning_rate": 3.216890595009597e-05,
|
||
|
|
"loss": 0.4694,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15573322772979736,
|
||
|
|
"step": 420,
|
||
|
|
"valid_targets_mean": 10724.8,
|
||
|
|
"valid_targets_min": 1674
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.408,
|
||
|
|
"grad_norm": 0.28083425113360516,
|
||
|
|
"learning_rate": 3.2552783109404996e-05,
|
||
|
|
"loss": 0.4588,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17997148633003235,
|
||
|
|
"step": 425,
|
||
|
|
"valid_targets_mean": 12482.6,
|
||
|
|
"valid_targets_min": 2166
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4128,
|
||
|
|
"grad_norm": 0.2976974373969709,
|
||
|
|
"learning_rate": 3.2936660268714016e-05,
|
||
|
|
"loss": 0.4657,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15475307404994965,
|
||
|
|
"step": 430,
|
||
|
|
"valid_targets_mean": 10911.6,
|
||
|
|
"valid_targets_min": 2009
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4176,
|
||
|
|
"grad_norm": 0.24542082786514977,
|
||
|
|
"learning_rate": 3.3320537428023036e-05,
|
||
|
|
"loss": 0.4645,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1643528938293457,
|
||
|
|
"step": 435,
|
||
|
|
"valid_targets_mean": 12095.1,
|
||
|
|
"valid_targets_min": 4705
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4224,
|
||
|
|
"grad_norm": 0.29303842872775576,
|
||
|
|
"learning_rate": 3.3704414587332056e-05,
|
||
|
|
"loss": 0.4647,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13531912863254547,
|
||
|
|
"step": 440,
|
||
|
|
"valid_targets_mean": 9222.8,
|
||
|
|
"valid_targets_min": 2233
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4272,
|
||
|
|
"grad_norm": 0.2828869010226847,
|
||
|
|
"learning_rate": 3.4088291746641077e-05,
|
||
|
|
"loss": 0.474,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14599958062171936,
|
||
|
|
"step": 445,
|
||
|
|
"valid_targets_mean": 11280.1,
|
||
|
|
"valid_targets_min": 2279
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.432,
|
||
|
|
"grad_norm": 0.29431208133429276,
|
||
|
|
"learning_rate": 3.4472168905950104e-05,
|
||
|
|
"loss": 0.4621,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14398963749408722,
|
||
|
|
"step": 450,
|
||
|
|
"valid_targets_mean": 10311.7,
|
||
|
|
"valid_targets_min": 1728
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4368,
|
||
|
|
"grad_norm": 0.2585227807649877,
|
||
|
|
"learning_rate": 3.485604606525912e-05,
|
||
|
|
"loss": 0.461,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1372634768486023,
|
||
|
|
"step": 455,
|
||
|
|
"valid_targets_mean": 8504.6,
|
||
|
|
"valid_targets_min": 1508
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4416,
|
||
|
|
"grad_norm": 0.29941740189685506,
|
||
|
|
"learning_rate": 3.5239923224568144e-05,
|
||
|
|
"loss": 0.4618,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13782289624214172,
|
||
|
|
"step": 460,
|
||
|
|
"valid_targets_mean": 9918.3,
|
||
|
|
"valid_targets_min": 2310
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4464,
|
||
|
|
"grad_norm": 0.29746723988147145,
|
||
|
|
"learning_rate": 3.5623800383877164e-05,
|
||
|
|
"loss": 0.4669,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12251640856266022,
|
||
|
|
"step": 465,
|
||
|
|
"valid_targets_mean": 8674.2,
|
||
|
|
"valid_targets_min": 1732
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4512,
|
||
|
|
"grad_norm": 0.3516933086901854,
|
||
|
|
"learning_rate": 3.6007677543186184e-05,
|
||
|
|
"loss": 0.4707,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18415644764900208,
|
||
|
|
"step": 470,
|
||
|
|
"valid_targets_mean": 12712.0,
|
||
|
|
"valid_targets_min": 1951
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.456,
|
||
|
|
"grad_norm": 0.2704944515833381,
|
||
|
|
"learning_rate": 3.6391554702495204e-05,
|
||
|
|
"loss": 0.4571,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15503855049610138,
|
||
|
|
"step": 475,
|
||
|
|
"valid_targets_mean": 10973.3,
|
||
|
|
"valid_targets_min": 2568
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4608,
|
||
|
|
"grad_norm": 0.2579883782673916,
|
||
|
|
"learning_rate": 3.6775431861804224e-05,
|
||
|
|
"loss": 0.4611,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15346036851406097,
|
||
|
|
"step": 480,
|
||
|
|
"valid_targets_mean": 9707.8,
|
||
|
|
"valid_targets_min": 3027
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4656,
|
||
|
|
"grad_norm": 0.28982082854321284,
|
||
|
|
"learning_rate": 3.7159309021113245e-05,
|
||
|
|
"loss": 0.4471,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12727707624435425,
|
||
|
|
"step": 485,
|
||
|
|
"valid_targets_mean": 9593.2,
|
||
|
|
"valid_targets_min": 2884
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4704,
|
||
|
|
"grad_norm": 0.2646662239031736,
|
||
|
|
"learning_rate": 3.7543186180422265e-05,
|
||
|
|
"loss": 0.459,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16413524746894836,
|
||
|
|
"step": 490,
|
||
|
|
"valid_targets_mean": 11934.7,
|
||
|
|
"valid_targets_min": 3560
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4752,
|
||
|
|
"grad_norm": 0.266665098284034,
|
||
|
|
"learning_rate": 3.792706333973129e-05,
|
||
|
|
"loss": 0.4616,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1243312731385231,
|
||
|
|
"step": 495,
|
||
|
|
"valid_targets_mean": 8476.5,
|
||
|
|
"valid_targets_min": 2652
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.48,
|
||
|
|
"grad_norm": 0.28190317886063865,
|
||
|
|
"learning_rate": 3.831094049904031e-05,
|
||
|
|
"loss": 0.4639,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16007450222969055,
|
||
|
|
"step": 500,
|
||
|
|
"valid_targets_mean": 9943.0,
|
||
|
|
"valid_targets_min": 2220
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4848,
|
||
|
|
"grad_norm": 0.29271223570474775,
|
||
|
|
"learning_rate": 3.869481765834933e-05,
|
||
|
|
"loss": 0.4679,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1724342703819275,
|
||
|
|
"step": 505,
|
||
|
|
"valid_targets_mean": 11642.2,
|
||
|
|
"valid_targets_min": 1354
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4896,
|
||
|
|
"grad_norm": 0.26903639753216824,
|
||
|
|
"learning_rate": 3.907869481765835e-05,
|
||
|
|
"loss": 0.449,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14683669805526733,
|
||
|
|
"step": 510,
|
||
|
|
"valid_targets_mean": 10091.7,
|
||
|
|
"valid_targets_min": 2840
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4944,
|
||
|
|
"grad_norm": 0.2559878906486554,
|
||
|
|
"learning_rate": 3.946257197696737e-05,
|
||
|
|
"loss": 0.4672,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18146558105945587,
|
||
|
|
"step": 515,
|
||
|
|
"valid_targets_mean": 11818.8,
|
||
|
|
"valid_targets_min": 939
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4992,
|
||
|
|
"grad_norm": 0.3302113861791353,
|
||
|
|
"learning_rate": 3.984644913627639e-05,
|
||
|
|
"loss": 0.4534,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13053454458713531,
|
||
|
|
"step": 520,
|
||
|
|
"valid_targets_mean": 8831.3,
|
||
|
|
"valid_targets_min": 742
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.504,
|
||
|
|
"grad_norm": 0.26972775533524873,
|
||
|
|
"learning_rate": 3.999995959997414e-05,
|
||
|
|
"loss": 0.4643,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16849611699581146,
|
||
|
|
"step": 525,
|
||
|
|
"valid_targets_mean": 10903.1,
|
||
|
|
"valid_targets_min": 1886
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5088,
|
||
|
|
"grad_norm": 0.2603085017747878,
|
||
|
|
"learning_rate": 3.999971271151827e-05,
|
||
|
|
"loss": 0.4612,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1302165389060974,
|
||
|
|
"step": 530,
|
||
|
|
"valid_targets_mean": 8817.5,
|
||
|
|
"valid_targets_min": 1843
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5136,
|
||
|
|
"grad_norm": 0.2858496522578758,
|
||
|
|
"learning_rate": 3.9999241381832614e-05,
|
||
|
|
"loss": 0.4637,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14295253157615662,
|
||
|
|
"step": 535,
|
||
|
|
"valid_targets_mean": 10053.8,
|
||
|
|
"valid_targets_min": 2740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5184,
|
||
|
|
"grad_norm": 0.26152174514020043,
|
||
|
|
"learning_rate": 3.999854561620655e-05,
|
||
|
|
"loss": 0.4595,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14226208627223969,
|
||
|
|
"step": 540,
|
||
|
|
"valid_targets_mean": 11106.9,
|
||
|
|
"valid_targets_min": 1576
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5232,
|
||
|
|
"grad_norm": 0.26028352862388787,
|
||
|
|
"learning_rate": 3.9997625422448114e-05,
|
||
|
|
"loss": 0.4545,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15191048383712769,
|
||
|
|
"step": 545,
|
||
|
|
"valid_targets_mean": 9975.0,
|
||
|
|
"valid_targets_min": 3918
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.528,
|
||
|
|
"grad_norm": 0.252965467893726,
|
||
|
|
"learning_rate": 3.999648081088391e-05,
|
||
|
|
"loss": 0.4534,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13953734934329987,
|
||
|
|
"step": 550,
|
||
|
|
"valid_targets_mean": 9347.6,
|
||
|
|
"valid_targets_min": 2024
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5328,
|
||
|
|
"grad_norm": 0.25661462861347195,
|
||
|
|
"learning_rate": 3.999511179435905e-05,
|
||
|
|
"loss": 0.4592,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15799963474273682,
|
||
|
|
"step": 555,
|
||
|
|
"valid_targets_mean": 10477.4,
|
||
|
|
"valid_targets_min": 3319
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5376,
|
||
|
|
"grad_norm": 0.270288528995941,
|
||
|
|
"learning_rate": 3.999351838823691e-05,
|
||
|
|
"loss": 0.4602,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15654206275939941,
|
||
|
|
"step": 560,
|
||
|
|
"valid_targets_mean": 10189.6,
|
||
|
|
"valid_targets_min": 1777
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5424,
|
||
|
|
"grad_norm": 0.35156440192336935,
|
||
|
|
"learning_rate": 3.999170061039908e-05,
|
||
|
|
"loss": 0.4535,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1559731662273407,
|
||
|
|
"step": 565,
|
||
|
|
"valid_targets_mean": 9539.7,
|
||
|
|
"valid_targets_min": 2028
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5472,
|
||
|
|
"grad_norm": 0.2539828685860791,
|
||
|
|
"learning_rate": 3.998965848124505e-05,
|
||
|
|
"loss": 0.4551,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13896337151527405,
|
||
|
|
"step": 570,
|
||
|
|
"valid_targets_mean": 10557.0,
|
||
|
|
"valid_targets_min": 2887
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.552,
|
||
|
|
"grad_norm": 0.2598033898247074,
|
||
|
|
"learning_rate": 3.998739202369205e-05,
|
||
|
|
"loss": 0.4584,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1374547779560089,
|
||
|
|
"step": 575,
|
||
|
|
"valid_targets_mean": 10081.8,
|
||
|
|
"valid_targets_min": 1494
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5568,
|
||
|
|
"grad_norm": 0.30314441059434327,
|
||
|
|
"learning_rate": 3.998490126317477e-05,
|
||
|
|
"loss": 0.4483,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16131776571273804,
|
||
|
|
"step": 580,
|
||
|
|
"valid_targets_mean": 10033.4,
|
||
|
|
"valid_targets_min": 1706
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5616,
|
||
|
|
"grad_norm": 0.22031863670397067,
|
||
|
|
"learning_rate": 3.9982186227645085e-05,
|
||
|
|
"loss": 0.4571,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1602936089038849,
|
||
|
|
"step": 585,
|
||
|
|
"valid_targets_mean": 11657.8,
|
||
|
|
"valid_targets_min": 1006
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5664,
|
||
|
|
"grad_norm": 0.26943631448171856,
|
||
|
|
"learning_rate": 3.9979246947571724e-05,
|
||
|
|
"loss": 0.4517,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16911017894744873,
|
||
|
|
"step": 590,
|
||
|
|
"valid_targets_mean": 12263.9,
|
||
|
|
"valid_targets_min": 1702
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5712,
|
||
|
|
"grad_norm": 0.25061495037382847,
|
||
|
|
"learning_rate": 3.9976083455939945e-05,
|
||
|
|
"loss": 0.4532,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15989217162132263,
|
||
|
|
"step": 595,
|
||
|
|
"valid_targets_mean": 11708.3,
|
||
|
|
"valid_targets_min": 1871
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.576,
|
||
|
|
"grad_norm": 0.261621992497104,
|
||
|
|
"learning_rate": 3.9972695788251155e-05,
|
||
|
|
"loss": 0.4401,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1598036289215088,
|
||
|
|
"step": 600,
|
||
|
|
"valid_targets_mean": 10135.6,
|
||
|
|
"valid_targets_min": 2569
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5808,
|
||
|
|
"grad_norm": 0.2808004182619828,
|
||
|
|
"learning_rate": 3.996908398252251e-05,
|
||
|
|
"loss": 0.4541,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1376391500234604,
|
||
|
|
"step": 605,
|
||
|
|
"valid_targets_mean": 8971.0,
|
||
|
|
"valid_targets_min": 1025
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5856,
|
||
|
|
"grad_norm": 0.25161025940924875,
|
||
|
|
"learning_rate": 3.9965248079286505e-05,
|
||
|
|
"loss": 0.4419,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15694254636764526,
|
||
|
|
"step": 610,
|
||
|
|
"valid_targets_mean": 11790.6,
|
||
|
|
"valid_targets_min": 2403
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5904,
|
||
|
|
"grad_norm": 0.391406643033309,
|
||
|
|
"learning_rate": 3.99611881215905e-05,
|
||
|
|
"loss": 0.4492,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13714802265167236,
|
||
|
|
"step": 615,
|
||
|
|
"valid_targets_mean": 8819.1,
|
||
|
|
"valid_targets_min": 1645
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5952,
|
||
|
|
"grad_norm": 0.3027879951842276,
|
||
|
|
"learning_rate": 3.995690415499624e-05,
|
||
|
|
"loss": 0.4483,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15102067589759827,
|
||
|
|
"step": 620,
|
||
|
|
"valid_targets_mean": 10631.6,
|
||
|
|
"valid_targets_min": 2143
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6,
|
||
|
|
"grad_norm": 0.25471127728925685,
|
||
|
|
"learning_rate": 3.995239622757936e-05,
|
||
|
|
"loss": 0.4476,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15196582674980164,
|
||
|
|
"step": 625,
|
||
|
|
"valid_targets_mean": 10613.7,
|
||
|
|
"valid_targets_min": 2445
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6048,
|
||
|
|
"grad_norm": 0.2491408938992097,
|
||
|
|
"learning_rate": 3.994766438992882e-05,
|
||
|
|
"loss": 0.4545,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15728193521499634,
|
||
|
|
"step": 630,
|
||
|
|
"valid_targets_mean": 11646.2,
|
||
|
|
"valid_targets_min": 3345
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6096,
|
||
|
|
"grad_norm": 0.2718517988380648,
|
||
|
|
"learning_rate": 3.994270869514635e-05,
|
||
|
|
"loss": 0.4479,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17402216792106628,
|
||
|
|
"step": 635,
|
||
|
|
"valid_targets_mean": 10870.4,
|
||
|
|
"valid_targets_min": 2319
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6144,
|
||
|
|
"grad_norm": 0.2626639858806631,
|
||
|
|
"learning_rate": 3.9937529198845864e-05,
|
||
|
|
"loss": 0.4471,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15539997816085815,
|
||
|
|
"step": 640,
|
||
|
|
"valid_targets_mean": 9441.2,
|
||
|
|
"valid_targets_min": 2925
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6192,
|
||
|
|
"grad_norm": 0.249237014812052,
|
||
|
|
"learning_rate": 3.9932125959152833e-05,
|
||
|
|
"loss": 0.4508,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13891427218914032,
|
||
|
|
"step": 645,
|
||
|
|
"valid_targets_mean": 8957.0,
|
||
|
|
"valid_targets_min": 861
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.624,
|
||
|
|
"grad_norm": 0.25544849188790725,
|
||
|
|
"learning_rate": 3.9926499036703607e-05,
|
||
|
|
"loss": 0.4544,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14250892400741577,
|
||
|
|
"step": 650,
|
||
|
|
"valid_targets_mean": 9748.9,
|
||
|
|
"valid_targets_min": 1507
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6288,
|
||
|
|
"grad_norm": 0.2577063394014419,
|
||
|
|
"learning_rate": 3.992064849464476e-05,
|
||
|
|
"loss": 0.4533,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.156438410282135,
|
||
|
|
"step": 655,
|
||
|
|
"valid_targets_mean": 10886.6,
|
||
|
|
"valid_targets_min": 1328
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6336,
|
||
|
|
"grad_norm": 0.26664954451504175,
|
||
|
|
"learning_rate": 3.991457439863238e-05,
|
||
|
|
"loss": 0.4544,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16429439187049866,
|
||
|
|
"step": 660,
|
||
|
|
"valid_targets_mean": 11470.5,
|
||
|
|
"valid_targets_min": 1398
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6384,
|
||
|
|
"grad_norm": 0.2585738675100044,
|
||
|
|
"learning_rate": 3.990827681683133e-05,
|
||
|
|
"loss": 0.4511,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1722051352262497,
|
||
|
|
"step": 665,
|
||
|
|
"valid_targets_mean": 11573.3,
|
||
|
|
"valid_targets_min": 274
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6432,
|
||
|
|
"grad_norm": 0.24473615866589019,
|
||
|
|
"learning_rate": 3.990175581991448e-05,
|
||
|
|
"loss": 0.4536,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16524508595466614,
|
||
|
|
"step": 670,
|
||
|
|
"valid_targets_mean": 11921.1,
|
||
|
|
"valid_targets_min": 2460
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.648,
|
||
|
|
"grad_norm": 0.2588924362804296,
|
||
|
|
"learning_rate": 3.989501148106189e-05,
|
||
|
|
"loss": 0.4416,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12152011692523956,
|
||
|
|
"step": 675,
|
||
|
|
"valid_targets_mean": 8583.5,
|
||
|
|
"valid_targets_min": 1520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6528,
|
||
|
|
"grad_norm": 0.2906148484888001,
|
||
|
|
"learning_rate": 3.988804387596005e-05,
|
||
|
|
"loss": 0.4543,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1639477014541626,
|
||
|
|
"step": 680,
|
||
|
|
"valid_targets_mean": 10996.6,
|
||
|
|
"valid_targets_min": 1719
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6576,
|
||
|
|
"grad_norm": 0.2269520950972862,
|
||
|
|
"learning_rate": 3.9880853082800965e-05,
|
||
|
|
"loss": 0.4538,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14809130132198334,
|
||
|
|
"step": 685,
|
||
|
|
"valid_targets_mean": 10731.2,
|
||
|
|
"valid_targets_min": 1983
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6624,
|
||
|
|
"grad_norm": 0.2878053426516386,
|
||
|
|
"learning_rate": 3.987343918228133e-05,
|
||
|
|
"loss": 0.4439,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15128569304943085,
|
||
|
|
"step": 690,
|
||
|
|
"valid_targets_mean": 11356.6,
|
||
|
|
"valid_targets_min": 4547
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6672,
|
||
|
|
"grad_norm": 0.24198387949329253,
|
||
|
|
"learning_rate": 3.9865802257601584e-05,
|
||
|
|
"loss": 0.4475,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1544976532459259,
|
||
|
|
"step": 695,
|
||
|
|
"valid_targets_mean": 10080.2,
|
||
|
|
"valid_targets_min": 1962
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.672,
|
||
|
|
"grad_norm": 0.28830935840143584,
|
||
|
|
"learning_rate": 3.9857942394464976e-05,
|
||
|
|
"loss": 0.4534,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13071781396865845,
|
||
|
|
"step": 700,
|
||
|
|
"valid_targets_mean": 8885.0,
|
||
|
|
"valid_targets_min": 1579
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6768,
|
||
|
|
"grad_norm": 0.2662828860280673,
|
||
|
|
"learning_rate": 3.984985968107667e-05,
|
||
|
|
"loss": 0.4491,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15589673817157745,
|
||
|
|
"step": 705,
|
||
|
|
"valid_targets_mean": 11279.5,
|
||
|
|
"valid_targets_min": 1977
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6816,
|
||
|
|
"grad_norm": 0.2606792021323181,
|
||
|
|
"learning_rate": 3.984155420814266e-05,
|
||
|
|
"loss": 0.4422,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14421959221363068,
|
||
|
|
"step": 710,
|
||
|
|
"valid_targets_mean": 10752.7,
|
||
|
|
"valid_targets_min": 2087
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6864,
|
||
|
|
"grad_norm": 0.28331509780837333,
|
||
|
|
"learning_rate": 3.9833026068868814e-05,
|
||
|
|
"loss": 0.4399,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16088783740997314,
|
||
|
|
"step": 715,
|
||
|
|
"valid_targets_mean": 11895.5,
|
||
|
|
"valid_targets_min": 2299
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6912,
|
||
|
|
"grad_norm": 0.2401398741428507,
|
||
|
|
"learning_rate": 3.982427535895982e-05,
|
||
|
|
"loss": 0.4493,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12636323273181915,
|
||
|
|
"step": 720,
|
||
|
|
"valid_targets_mean": 8741.3,
|
||
|
|
"valid_targets_min": 2064
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.696,
|
||
|
|
"grad_norm": 0.24623098098235072,
|
||
|
|
"learning_rate": 3.9815302176618076e-05,
|
||
|
|
"loss": 0.4372,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14504778385162354,
|
||
|
|
"step": 725,
|
||
|
|
"valid_targets_mean": 9609.4,
|
||
|
|
"valid_targets_min": 2910
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7008,
|
||
|
|
"grad_norm": 0.27011416048003173,
|
||
|
|
"learning_rate": 3.980610662254264e-05,
|
||
|
|
"loss": 0.4452,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1622447371482849,
|
||
|
|
"step": 730,
|
||
|
|
"valid_targets_mean": 11419.7,
|
||
|
|
"valid_targets_min": 2401
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7056,
|
||
|
|
"grad_norm": 0.29719134463341074,
|
||
|
|
"learning_rate": 3.9796688799928075e-05,
|
||
|
|
"loss": 0.444,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13660922646522522,
|
||
|
|
"step": 735,
|
||
|
|
"valid_targets_mean": 10302.8,
|
||
|
|
"valid_targets_min": 2296
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7104,
|
||
|
|
"grad_norm": 0.24542996505841894,
|
||
|
|
"learning_rate": 3.978704881446327e-05,
|
||
|
|
"loss": 0.4418,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17318297922611237,
|
||
|
|
"step": 740,
|
||
|
|
"valid_targets_mean": 13040.1,
|
||
|
|
"valid_targets_min": 2918
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7152,
|
||
|
|
"grad_norm": 0.23791161407706576,
|
||
|
|
"learning_rate": 3.9777186774330304e-05,
|
||
|
|
"loss": 0.4415,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14803707599639893,
|
||
|
|
"step": 745,
|
||
|
|
"valid_targets_mean": 10797.8,
|
||
|
|
"valid_targets_min": 2483
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.72,
|
||
|
|
"grad_norm": 0.25166109994851354,
|
||
|
|
"learning_rate": 3.976710279020318e-05,
|
||
|
|
"loss": 0.4447,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17335036396980286,
|
||
|
|
"step": 750,
|
||
|
|
"valid_targets_mean": 11030.3,
|
||
|
|
"valid_targets_min": 2140
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7248,
|
||
|
|
"grad_norm": 0.28361399295331674,
|
||
|
|
"learning_rate": 3.975679697524661e-05,
|
||
|
|
"loss": 0.4451,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.148225799202919,
|
||
|
|
"step": 755,
|
||
|
|
"valid_targets_mean": 11292.8,
|
||
|
|
"valid_targets_min": 1960
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7296,
|
||
|
|
"grad_norm": 0.24746500944939762,
|
||
|
|
"learning_rate": 3.974626944511475e-05,
|
||
|
|
"loss": 0.4438,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16841234266757965,
|
||
|
|
"step": 760,
|
||
|
|
"valid_targets_mean": 12154.3,
|
||
|
|
"valid_targets_min": 2203
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7344,
|
||
|
|
"grad_norm": 0.2241960168441248,
|
||
|
|
"learning_rate": 3.973552031794988e-05,
|
||
|
|
"loss": 0.4427,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14119960367679596,
|
||
|
|
"step": 765,
|
||
|
|
"valid_targets_mean": 9363.3,
|
||
|
|
"valid_targets_min": 2701
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7392,
|
||
|
|
"grad_norm": 0.23458423848116103,
|
||
|
|
"learning_rate": 3.9724549714381106e-05,
|
||
|
|
"loss": 0.4472,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13517552614212036,
|
||
|
|
"step": 770,
|
||
|
|
"valid_targets_mean": 10275.2,
|
||
|
|
"valid_targets_min": 2268
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.744,
|
||
|
|
"grad_norm": 0.24225388329162625,
|
||
|
|
"learning_rate": 3.971335775752298e-05,
|
||
|
|
"loss": 0.4472,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17425012588500977,
|
||
|
|
"step": 775,
|
||
|
|
"valid_targets_mean": 11925.4,
|
||
|
|
"valid_targets_min": 3083
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7488,
|
||
|
|
"grad_norm": 0.2297680649270912,
|
||
|
|
"learning_rate": 3.970194457297414e-05,
|
||
|
|
"loss": 0.4468,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15473422408103943,
|
||
|
|
"step": 780,
|
||
|
|
"valid_targets_mean": 10975.8,
|
||
|
|
"valid_targets_min": 1764
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7536,
|
||
|
|
"grad_norm": 0.27742186379777073,
|
||
|
|
"learning_rate": 3.9690310288815876e-05,
|
||
|
|
"loss": 0.4381,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1480221152305603,
|
||
|
|
"step": 785,
|
||
|
|
"valid_targets_mean": 10486.0,
|
||
|
|
"valid_targets_min": 1116
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7584,
|
||
|
|
"grad_norm": 0.27198081218563974,
|
||
|
|
"learning_rate": 3.967845503561073e-05,
|
||
|
|
"loss": 0.4552,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1705826222896576,
|
||
|
|
"step": 790,
|
||
|
|
"valid_targets_mean": 11610.0,
|
||
|
|
"valid_targets_min": 3639
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7632,
|
||
|
|
"grad_norm": 0.2102169384572373,
|
||
|
|
"learning_rate": 3.9666378946400974e-05,
|
||
|
|
"loss": 0.4484,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1382056474685669,
|
||
|
|
"step": 795,
|
||
|
|
"valid_targets_mean": 10808.0,
|
||
|
|
"valid_targets_min": 1630
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.768,
|
||
|
|
"grad_norm": 0.23391900735891194,
|
||
|
|
"learning_rate": 3.965408215670719e-05,
|
||
|
|
"loss": 0.4495,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15888884663581848,
|
||
|
|
"step": 800,
|
||
|
|
"valid_targets_mean": 10829.6,
|
||
|
|
"valid_targets_min": 3350
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7728,
|
||
|
|
"grad_norm": 0.22879398550516797,
|
||
|
|
"learning_rate": 3.964156480452667e-05,
|
||
|
|
"loss": 0.4371,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15504437685012817,
|
||
|
|
"step": 805,
|
||
|
|
"valid_targets_mean": 11066.4,
|
||
|
|
"valid_targets_min": 2243
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7776,
|
||
|
|
"grad_norm": 0.24667088526976355,
|
||
|
|
"learning_rate": 3.962882703033195e-05,
|
||
|
|
"loss": 0.4391,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14036577939987183,
|
||
|
|
"step": 810,
|
||
|
|
"valid_targets_mean": 10358.6,
|
||
|
|
"valid_targets_min": 2893
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7824,
|
||
|
|
"grad_norm": 0.23114425024656984,
|
||
|
|
"learning_rate": 3.961586897706915e-05,
|
||
|
|
"loss": 0.4349,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13787609338760376,
|
||
|
|
"step": 815,
|
||
|
|
"valid_targets_mean": 9714.0,
|
||
|
|
"valid_targets_min": 2687
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7872,
|
||
|
|
"grad_norm": 0.23473702906122673,
|
||
|
|
"learning_rate": 3.960269079015643e-05,
|
||
|
|
"loss": 0.4449,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15032735466957092,
|
||
|
|
"step": 820,
|
||
|
|
"valid_targets_mean": 9774.9,
|
||
|
|
"valid_targets_min": 2028
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.792,
|
||
|
|
"grad_norm": 0.2162385966678345,
|
||
|
|
"learning_rate": 3.958929261748236e-05,
|
||
|
|
"loss": 0.4405,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14596378803253174,
|
||
|
|
"step": 825,
|
||
|
|
"valid_targets_mean": 10958.2,
|
||
|
|
"valid_targets_min": 2010
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7968,
|
||
|
|
"grad_norm": 0.24222519458371453,
|
||
|
|
"learning_rate": 3.957567460940419e-05,
|
||
|
|
"loss": 0.4394,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12375211715698242,
|
||
|
|
"step": 830,
|
||
|
|
"valid_targets_mean": 8580.5,
|
||
|
|
"valid_targets_min": 1205
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8016,
|
||
|
|
"grad_norm": 0.2933772610237654,
|
||
|
|
"learning_rate": 3.9561836918746256e-05,
|
||
|
|
"loss": 0.4398,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15357926487922668,
|
||
|
|
"step": 835,
|
||
|
|
"valid_targets_mean": 11393.8,
|
||
|
|
"valid_targets_min": 2154
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8064,
|
||
|
|
"grad_norm": 0.2531199798856708,
|
||
|
|
"learning_rate": 3.95477797007982e-05,
|
||
|
|
"loss": 0.4379,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15377768874168396,
|
||
|
|
"step": 840,
|
||
|
|
"valid_targets_mean": 10993.3,
|
||
|
|
"valid_targets_min": 2009
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8112,
|
||
|
|
"grad_norm": 0.24295989807739177,
|
||
|
|
"learning_rate": 3.953350311331325e-05,
|
||
|
|
"loss": 0.4436,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11503560096025467,
|
||
|
|
"step": 845,
|
||
|
|
"valid_targets_mean": 9558.3,
|
||
|
|
"valid_targets_min": 3104
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.816,
|
||
|
|
"grad_norm": 0.3035777048117532,
|
||
|
|
"learning_rate": 3.951900731650645e-05,
|
||
|
|
"loss": 0.4324,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1553291380405426,
|
||
|
|
"step": 850,
|
||
|
|
"valid_targets_mean": 9700.8,
|
||
|
|
"valid_targets_min": 1456
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8208,
|
||
|
|
"grad_norm": 0.2708795633114363,
|
||
|
|
"learning_rate": 3.950429247305286e-05,
|
||
|
|
"loss": 0.4363,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15550169348716736,
|
||
|
|
"step": 855,
|
||
|
|
"valid_targets_mean": 11636.9,
|
||
|
|
"valid_targets_min": 1244
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8256,
|
||
|
|
"grad_norm": 0.2719042040403292,
|
||
|
|
"learning_rate": 3.9489358748085737e-05,
|
||
|
|
"loss": 0.4371,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10961782932281494,
|
||
|
|
"step": 860,
|
||
|
|
"valid_targets_mean": 8280.6,
|
||
|
|
"valid_targets_min": 1860
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8304,
|
||
|
|
"grad_norm": 0.24090820427969437,
|
||
|
|
"learning_rate": 3.947420630919466e-05,
|
||
|
|
"loss": 0.4395,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1432057023048401,
|
||
|
|
"step": 865,
|
||
|
|
"valid_targets_mean": 10116.8,
|
||
|
|
"valid_targets_min": 3043
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8352,
|
||
|
|
"grad_norm": 0.27574496492858264,
|
||
|
|
"learning_rate": 3.9458835326423674e-05,
|
||
|
|
"loss": 0.4404,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11399415135383606,
|
||
|
|
"step": 870,
|
||
|
|
"valid_targets_mean": 8621.7,
|
||
|
|
"valid_targets_min": 2606
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.84,
|
||
|
|
"grad_norm": 0.23514909460962988,
|
||
|
|
"learning_rate": 3.9443245972269376e-05,
|
||
|
|
"loss": 0.4385,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16591641306877136,
|
||
|
|
"step": 875,
|
||
|
|
"valid_targets_mean": 11010.5,
|
||
|
|
"valid_targets_min": 1678
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8448,
|
||
|
|
"grad_norm": 0.248267579212623,
|
||
|
|
"learning_rate": 3.942743842167896e-05,
|
||
|
|
"loss": 0.4349,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14936822652816772,
|
||
|
|
"step": 880,
|
||
|
|
"valid_targets_mean": 9566.6,
|
||
|
|
"valid_targets_min": 1706
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8496,
|
||
|
|
"grad_norm": 0.22128202143359202,
|
||
|
|
"learning_rate": 3.941141285204829e-05,
|
||
|
|
"loss": 0.4381,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11705336719751358,
|
||
|
|
"step": 885,
|
||
|
|
"valid_targets_mean": 8686.6,
|
||
|
|
"valid_targets_min": 1291
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8544,
|
||
|
|
"grad_norm": 0.24239550122658543,
|
||
|
|
"learning_rate": 3.939516944321986e-05,
|
||
|
|
"loss": 0.4412,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16379308700561523,
|
||
|
|
"step": 890,
|
||
|
|
"valid_targets_mean": 10584.9,
|
||
|
|
"valid_targets_min": 1898
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8592,
|
||
|
|
"grad_norm": 0.21297377581920748,
|
||
|
|
"learning_rate": 3.937870837748085e-05,
|
||
|
|
"loss": 0.4425,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15646252036094666,
|
||
|
|
"step": 895,
|
||
|
|
"valid_targets_mean": 10218.3,
|
||
|
|
"valid_targets_min": 2225
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.864,
|
||
|
|
"grad_norm": 0.271473838990007,
|
||
|
|
"learning_rate": 3.936202983956098e-05,
|
||
|
|
"loss": 0.4401,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18577039241790771,
|
||
|
|
"step": 900,
|
||
|
|
"valid_targets_mean": 12629.7,
|
||
|
|
"valid_targets_min": 2401
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8688,
|
||
|
|
"grad_norm": 0.2347160954091539,
|
||
|
|
"learning_rate": 3.934513401663052e-05,
|
||
|
|
"loss": 0.4322,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14884625375270844,
|
||
|
|
"step": 905,
|
||
|
|
"valid_targets_mean": 10616.7,
|
||
|
|
"valid_targets_min": 2416
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8736,
|
||
|
|
"grad_norm": 0.23617851555656597,
|
||
|
|
"learning_rate": 3.9328021098298164e-05,
|
||
|
|
"loss": 0.4458,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18026357889175415,
|
||
|
|
"step": 910,
|
||
|
|
"valid_targets_mean": 13847.2,
|
||
|
|
"valid_targets_min": 2601
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8784,
|
||
|
|
"grad_norm": 0.23774940308190678,
|
||
|
|
"learning_rate": 3.9310691276608894e-05,
|
||
|
|
"loss": 0.443,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1618087887763977,
|
||
|
|
"step": 915,
|
||
|
|
"valid_targets_mean": 9275.4,
|
||
|
|
"valid_targets_min": 1435
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8832,
|
||
|
|
"grad_norm": 0.2299724409741465,
|
||
|
|
"learning_rate": 3.9293144746041824e-05,
|
||
|
|
"loss": 0.4363,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1557924896478653,
|
||
|
|
"step": 920,
|
||
|
|
"valid_targets_mean": 10640.8,
|
||
|
|
"valid_targets_min": 2643
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.888,
|
||
|
|
"grad_norm": 0.20806032544047204,
|
||
|
|
"learning_rate": 3.9275381703508034e-05,
|
||
|
|
"loss": 0.434,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14001302421092987,
|
||
|
|
"step": 925,
|
||
|
|
"valid_targets_mean": 10628.8,
|
||
|
|
"valid_targets_min": 4377
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8928,
|
||
|
|
"grad_norm": 0.20442904327913097,
|
||
|
|
"learning_rate": 3.925740234834833e-05,
|
||
|
|
"loss": 0.4428,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1151759922504425,
|
||
|
|
"step": 930,
|
||
|
|
"valid_targets_mean": 9097.5,
|
||
|
|
"valid_targets_min": 2592
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8976,
|
||
|
|
"grad_norm": 0.2779844590431934,
|
||
|
|
"learning_rate": 3.9239206882331045e-05,
|
||
|
|
"loss": 0.4426,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1592455804347992,
|
||
|
|
"step": 935,
|
||
|
|
"valid_targets_mean": 10262.3,
|
||
|
|
"valid_targets_min": 1553
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9024,
|
||
|
|
"grad_norm": 0.2376479952598136,
|
||
|
|
"learning_rate": 3.922079550964976e-05,
|
||
|
|
"loss": 0.4377,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17629006505012512,
|
||
|
|
"step": 940,
|
||
|
|
"valid_targets_mean": 11368.9,
|
||
|
|
"valid_targets_min": 1441
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9072,
|
||
|
|
"grad_norm": 0.2610581487793073,
|
||
|
|
"learning_rate": 3.920216843692099e-05,
|
||
|
|
"loss": 0.4376,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1378932148218155,
|
||
|
|
"step": 945,
|
||
|
|
"valid_targets_mean": 10733.7,
|
||
|
|
"valid_targets_min": 631
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.912,
|
||
|
|
"grad_norm": 0.23384408666927872,
|
||
|
|
"learning_rate": 3.918332587318189e-05,
|
||
|
|
"loss": 0.4332,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14836661517620087,
|
||
|
|
"step": 950,
|
||
|
|
"valid_targets_mean": 10466.8,
|
||
|
|
"valid_targets_min": 1466
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9168,
|
||
|
|
"grad_norm": 0.24291701861640366,
|
||
|
|
"learning_rate": 3.916426802988791e-05,
|
||
|
|
"loss": 0.4426,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14258360862731934,
|
||
|
|
"step": 955,
|
||
|
|
"valid_targets_mean": 10359.5,
|
||
|
|
"valid_targets_min": 2176
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9216,
|
||
|
|
"grad_norm": 0.25156841355285575,
|
||
|
|
"learning_rate": 3.9144995120910414e-05,
|
||
|
|
"loss": 0.4328,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13940885663032532,
|
||
|
|
"step": 960,
|
||
|
|
"valid_targets_mean": 9649.2,
|
||
|
|
"valid_targets_min": 1351
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9264,
|
||
|
|
"grad_norm": 0.23893594978350297,
|
||
|
|
"learning_rate": 3.912550736253428e-05,
|
||
|
|
"loss": 0.4315,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14351066946983337,
|
||
|
|
"step": 965,
|
||
|
|
"valid_targets_mean": 9654.1,
|
||
|
|
"valid_targets_min": 2523
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9312,
|
||
|
|
"grad_norm": 0.21816935655353778,
|
||
|
|
"learning_rate": 3.9105804973455466e-05,
|
||
|
|
"loss": 0.4282,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15850083529949188,
|
||
|
|
"step": 970,
|
||
|
|
"valid_targets_mean": 9993.9,
|
||
|
|
"valid_targets_min": 2456
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.936,
|
||
|
|
"grad_norm": 0.21964927011900748,
|
||
|
|
"learning_rate": 3.908588817477858e-05,
|
||
|
|
"loss": 0.4372,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1543029099702835,
|
||
|
|
"step": 975,
|
||
|
|
"valid_targets_mean": 10524.8,
|
||
|
|
"valid_targets_min": 2105
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9408,
|
||
|
|
"grad_norm": 0.2520400609926727,
|
||
|
|
"learning_rate": 3.9065757190014356e-05,
|
||
|
|
"loss": 0.4309,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1531248688697815,
|
||
|
|
"step": 980,
|
||
|
|
"valid_targets_mean": 10672.6,
|
||
|
|
"valid_targets_min": 1765
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9456,
|
||
|
|
"grad_norm": 0.22766144278022285,
|
||
|
|
"learning_rate": 3.90454122450772e-05,
|
||
|
|
"loss": 0.4256,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17786508798599243,
|
||
|
|
"step": 985,
|
||
|
|
"valid_targets_mean": 14325.0,
|
||
|
|
"valid_targets_min": 1820
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9504,
|
||
|
|
"grad_norm": 0.21863446792464766,
|
||
|
|
"learning_rate": 3.9024853568282615e-05,
|
||
|
|
"loss": 0.4301,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14357662200927734,
|
||
|
|
"step": 990,
|
||
|
|
"valid_targets_mean": 11311.1,
|
||
|
|
"valid_targets_min": 2214
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9552,
|
||
|
|
"grad_norm": 0.2722964437044844,
|
||
|
|
"learning_rate": 3.900408139034464e-05,
|
||
|
|
"loss": 0.4387,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15044580399990082,
|
||
|
|
"step": 995,
|
||
|
|
"valid_targets_mean": 10024.0,
|
||
|
|
"valid_targets_min": 2247
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.96,
|
||
|
|
"grad_norm": 0.22324695304768527,
|
||
|
|
"learning_rate": 3.89830959443733e-05,
|
||
|
|
"loss": 0.4401,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11514655500650406,
|
||
|
|
"step": 1000,
|
||
|
|
"valid_targets_mean": 8407.6,
|
||
|
|
"valid_targets_min": 883
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9648,
|
||
|
|
"grad_norm": 0.21542830139427063,
|
||
|
|
"learning_rate": 3.896189746587192e-05,
|
||
|
|
"loss": 0.4278,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14129936695098877,
|
||
|
|
"step": 1005,
|
||
|
|
"valid_targets_mean": 10078.5,
|
||
|
|
"valid_targets_min": 1519
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9696,
|
||
|
|
"grad_norm": 0.2051780043293131,
|
||
|
|
"learning_rate": 3.894048619273457e-05,
|
||
|
|
"loss": 0.4368,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14626026153564453,
|
||
|
|
"step": 1010,
|
||
|
|
"valid_targets_mean": 11932.6,
|
||
|
|
"valid_targets_min": 1957
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9744,
|
||
|
|
"grad_norm": 0.2504604799803636,
|
||
|
|
"learning_rate": 3.89188623652433e-05,
|
||
|
|
"loss": 0.4333,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1564769744873047,
|
||
|
|
"step": 1015,
|
||
|
|
"valid_targets_mean": 10482.3,
|
||
|
|
"valid_targets_min": 1443
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9792,
|
||
|
|
"grad_norm": 0.21386579709839965,
|
||
|
|
"learning_rate": 3.889702622606553e-05,
|
||
|
|
"loss": 0.4358,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14831697940826416,
|
||
|
|
"step": 1020,
|
||
|
|
"valid_targets_mean": 10429.1,
|
||
|
|
"valid_targets_min": 1235
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.984,
|
||
|
|
"grad_norm": 0.21260607427844888,
|
||
|
|
"learning_rate": 3.887497802025129e-05,
|
||
|
|
"loss": 0.4304,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13159194588661194,
|
||
|
|
"step": 1025,
|
||
|
|
"valid_targets_mean": 10932.1,
|
||
|
|
"valid_targets_min": 2067
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9888,
|
||
|
|
"grad_norm": 0.21391124051203142,
|
||
|
|
"learning_rate": 3.885271799523043e-05,
|
||
|
|
"loss": 0.4355,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13605813682079315,
|
||
|
|
"step": 1030,
|
||
|
|
"valid_targets_mean": 10411.8,
|
||
|
|
"valid_targets_min": 1926
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9936,
|
||
|
|
"grad_norm": 0.23829142954877147,
|
||
|
|
"learning_rate": 3.8830246400809925e-05,
|
||
|
|
"loss": 0.4333,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17297562956809998,
|
||
|
|
"step": 1035,
|
||
|
|
"valid_targets_mean": 12221.7,
|
||
|
|
"valid_targets_min": 4900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9984,
|
||
|
|
"grad_norm": 0.2160903034725571,
|
||
|
|
"learning_rate": 3.880756348917101e-05,
|
||
|
|
"loss": 0.4387,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15260297060012817,
|
||
|
|
"step": 1040,
|
||
|
|
"valid_targets_mean": 11269.4,
|
||
|
|
"valid_targets_min": 1774
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.00288,
|
||
|
|
"grad_norm": 0.2273858654830621,
|
||
|
|
"learning_rate": 3.8784669514866365e-05,
|
||
|
|
"loss": 0.4294,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1466825157403946,
|
||
|
|
"step": 1045,
|
||
|
|
"valid_targets_mean": 10658.0,
|
||
|
|
"valid_targets_min": 3956
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.00768,
|
||
|
|
"grad_norm": 0.2315486695024205,
|
||
|
|
"learning_rate": 3.876156473481727e-05,
|
||
|
|
"loss": 0.4192,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1460821032524109,
|
||
|
|
"step": 1050,
|
||
|
|
"valid_targets_mean": 10631.7,
|
||
|
|
"valid_targets_min": 2487
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.01248,
|
||
|
|
"grad_norm": 0.20848555300232693,
|
||
|
|
"learning_rate": 3.8738249408310716e-05,
|
||
|
|
"loss": 0.4181,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15337705612182617,
|
||
|
|
"step": 1055,
|
||
|
|
"valid_targets_mean": 11120.5,
|
||
|
|
"valid_targets_min": 2295
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.01728,
|
||
|
|
"grad_norm": 0.2340676151350968,
|
||
|
|
"learning_rate": 3.871472379699648e-05,
|
||
|
|
"loss": 0.415,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12295055389404297,
|
||
|
|
"step": 1060,
|
||
|
|
"valid_targets_mean": 9224.1,
|
||
|
|
"valid_targets_min": 1906
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.02208,
|
||
|
|
"grad_norm": 0.2432845446630706,
|
||
|
|
"learning_rate": 3.869098816488422e-05,
|
||
|
|
"loss": 0.424,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16106806695461273,
|
||
|
|
"step": 1065,
|
||
|
|
"valid_targets_mean": 12667.0,
|
||
|
|
"valid_targets_min": 176
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.02688,
|
||
|
|
"grad_norm": 0.22947146647749891,
|
||
|
|
"learning_rate": 3.866704277834049e-05,
|
||
|
|
"loss": 0.4142,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11316105723381042,
|
||
|
|
"step": 1070,
|
||
|
|
"valid_targets_mean": 9858.5,
|
||
|
|
"valid_targets_min": 2145
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.03168,
|
||
|
|
"grad_norm": 0.24700109955600275,
|
||
|
|
"learning_rate": 3.864288790608573e-05,
|
||
|
|
"loss": 0.4215,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16960284113883972,
|
||
|
|
"step": 1075,
|
||
|
|
"valid_targets_mean": 12659.7,
|
||
|
|
"valid_targets_min": 3683
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.03648,
|
||
|
|
"grad_norm": 0.2935586573382416,
|
||
|
|
"learning_rate": 3.861852381919132e-05,
|
||
|
|
"loss": 0.4162,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12964990735054016,
|
||
|
|
"step": 1080,
|
||
|
|
"valid_targets_mean": 10707.2,
|
||
|
|
"valid_targets_min": 1762
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.04128,
|
||
|
|
"grad_norm": 0.2292250396833103,
|
||
|
|
"learning_rate": 3.8593950791076446e-05,
|
||
|
|
"loss": 0.4173,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1273222416639328,
|
||
|
|
"step": 1085,
|
||
|
|
"valid_targets_mean": 10119.5,
|
||
|
|
"valid_targets_min": 2996
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.04608,
|
||
|
|
"grad_norm": 0.23498404250581678,
|
||
|
|
"learning_rate": 3.856916909750512e-05,
|
||
|
|
"loss": 0.4237,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13587495684623718,
|
||
|
|
"step": 1090,
|
||
|
|
"valid_targets_mean": 9965.6,
|
||
|
|
"valid_targets_min": 2608
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.05088,
|
||
|
|
"grad_norm": 0.2553523699964282,
|
||
|
|
"learning_rate": 3.854417901658301e-05,
|
||
|
|
"loss": 0.4144,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1318424940109253,
|
||
|
|
"step": 1095,
|
||
|
|
"valid_targets_mean": 8775.3,
|
||
|
|
"valid_targets_min": 1827
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.05568,
|
||
|
|
"grad_norm": 0.28410804952434837,
|
||
|
|
"learning_rate": 3.851898082875438e-05,
|
||
|
|
"loss": 0.4127,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11669185757637024,
|
||
|
|
"step": 1100,
|
||
|
|
"valid_targets_mean": 9423.7,
|
||
|
|
"valid_targets_min": 1411
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.06048,
|
||
|
|
"grad_norm": 0.21362583920709594,
|
||
|
|
"learning_rate": 3.849357481679891e-05,
|
||
|
|
"loss": 0.4175,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13717737793922424,
|
||
|
|
"step": 1105,
|
||
|
|
"valid_targets_mean": 11139.1,
|
||
|
|
"valid_targets_min": 444
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.06528,
|
||
|
|
"grad_norm": 0.21752994977677992,
|
||
|
|
"learning_rate": 3.846796126582851e-05,
|
||
|
|
"loss": 0.4145,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13477230072021484,
|
||
|
|
"step": 1110,
|
||
|
|
"valid_targets_mean": 9537.8,
|
||
|
|
"valid_targets_min": 1859
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.07008,
|
||
|
|
"grad_norm": 0.21257443662623238,
|
||
|
|
"learning_rate": 3.844214046328416e-05,
|
||
|
|
"loss": 0.4178,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14064884185791016,
|
||
|
|
"step": 1115,
|
||
|
|
"valid_targets_mean": 10565.9,
|
||
|
|
"valid_targets_min": 1678
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.07488,
|
||
|
|
"grad_norm": 0.21862660809429926,
|
||
|
|
"learning_rate": 3.841611269893266e-05,
|
||
|
|
"loss": 0.4145,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1589120626449585,
|
||
|
|
"step": 1120,
|
||
|
|
"valid_targets_mean": 12369.5,
|
||
|
|
"valid_targets_min": 2869
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.07968,
|
||
|
|
"grad_norm": 0.23792509429969855,
|
||
|
|
"learning_rate": 3.8389878264863364e-05,
|
||
|
|
"loss": 0.421,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15697196125984192,
|
||
|
|
"step": 1125,
|
||
|
|
"valid_targets_mean": 11654.8,
|
||
|
|
"valid_targets_min": 2508
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.08448,
|
||
|
|
"grad_norm": 0.21339081246699299,
|
||
|
|
"learning_rate": 3.836343745548495e-05,
|
||
|
|
"loss": 0.4216,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14548815786838531,
|
||
|
|
"step": 1130,
|
||
|
|
"valid_targets_mean": 10870.5,
|
||
|
|
"valid_targets_min": 1455
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.08928,
|
||
|
|
"grad_norm": 0.23060046636231427,
|
||
|
|
"learning_rate": 3.833679056752205e-05,
|
||
|
|
"loss": 0.4127,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1275004744529724,
|
||
|
|
"step": 1135,
|
||
|
|
"valid_targets_mean": 10123.6,
|
||
|
|
"valid_targets_min": 1678
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.09408,
|
||
|
|
"grad_norm": 0.2636941558147357,
|
||
|
|
"learning_rate": 3.8309937900012e-05,
|
||
|
|
"loss": 0.4058,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12546730041503906,
|
||
|
|
"step": 1140,
|
||
|
|
"valid_targets_mean": 9488.0,
|
||
|
|
"valid_targets_min": 3625
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.09888,
|
||
|
|
"grad_norm": 0.2915428207378038,
|
||
|
|
"learning_rate": 3.8282879754301395e-05,
|
||
|
|
"loss": 0.4152,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.142788827419281,
|
||
|
|
"step": 1145,
|
||
|
|
"valid_targets_mean": 11112.1,
|
||
|
|
"valid_targets_min": 1347
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.10368,
|
||
|
|
"grad_norm": 0.21952478308998585,
|
||
|
|
"learning_rate": 3.825561643404277e-05,
|
||
|
|
"loss": 0.4125,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1304585486650467,
|
||
|
|
"step": 1150,
|
||
|
|
"valid_targets_mean": 10049.1,
|
||
|
|
"valid_targets_min": 2730
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.10848,
|
||
|
|
"grad_norm": 0.25038615090928107,
|
||
|
|
"learning_rate": 3.8228148245191195e-05,
|
||
|
|
"loss": 0.4078,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1327395886182785,
|
||
|
|
"step": 1155,
|
||
|
|
"valid_targets_mean": 10158.7,
|
||
|
|
"valid_targets_min": 2185
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.11328,
|
||
|
|
"grad_norm": 0.23435295175638923,
|
||
|
|
"learning_rate": 3.820047549600078e-05,
|
||
|
|
"loss": 0.4206,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11310620605945587,
|
||
|
|
"step": 1160,
|
||
|
|
"valid_targets_mean": 8340.7,
|
||
|
|
"valid_targets_min": 3053
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.11808,
|
||
|
|
"grad_norm": 0.29168324649773325,
|
||
|
|
"learning_rate": 3.8172598497021304e-05,
|
||
|
|
"loss": 0.4194,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13903772830963135,
|
||
|
|
"step": 1165,
|
||
|
|
"valid_targets_mean": 9254.2,
|
||
|
|
"valid_targets_min": 2835
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.12288,
|
||
|
|
"grad_norm": 0.23637632288605004,
|
||
|
|
"learning_rate": 3.8144517561094635e-05,
|
||
|
|
"loss": 0.41,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12995116412639618,
|
||
|
|
"step": 1170,
|
||
|
|
"valid_targets_mean": 9100.4,
|
||
|
|
"valid_targets_min": 1907
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.12768,
|
||
|
|
"grad_norm": 0.25218695892611065,
|
||
|
|
"learning_rate": 3.811623300335129e-05,
|
||
|
|
"loss": 0.4132,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14638108015060425,
|
||
|
|
"step": 1175,
|
||
|
|
"valid_targets_mean": 12772.2,
|
||
|
|
"valid_targets_min": 1483
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.13248,
|
||
|
|
"grad_norm": 0.2230090673998303,
|
||
|
|
"learning_rate": 3.808774514120689e-05,
|
||
|
|
"loss": 0.4148,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12883812189102173,
|
||
|
|
"step": 1180,
|
||
|
|
"valid_targets_mean": 10551.2,
|
||
|
|
"valid_targets_min": 3589
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.13728,
|
||
|
|
"grad_norm": 0.2309263447970177,
|
||
|
|
"learning_rate": 3.805905429435856e-05,
|
||
|
|
"loss": 0.4188,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13861598074436188,
|
||
|
|
"step": 1185,
|
||
|
|
"valid_targets_mean": 10784.2,
|
||
|
|
"valid_targets_min": 1672
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.14208,
|
||
|
|
"grad_norm": 0.20154037362791907,
|
||
|
|
"learning_rate": 3.803016078478137e-05,
|
||
|
|
"loss": 0.4225,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13277360796928406,
|
||
|
|
"step": 1190,
|
||
|
|
"valid_targets_mean": 10616.0,
|
||
|
|
"valid_targets_min": 2123
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.14688,
|
||
|
|
"grad_norm": 0.26637911945778125,
|
||
|
|
"learning_rate": 3.800106493672472e-05,
|
||
|
|
"loss": 0.4149,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1266065239906311,
|
||
|
|
"step": 1195,
|
||
|
|
"valid_targets_mean": 9035.4,
|
||
|
|
"valid_targets_min": 1418
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.15168,
|
||
|
|
"grad_norm": 0.23840346583222496,
|
||
|
|
"learning_rate": 3.7971767076708704e-05,
|
||
|
|
"loss": 0.4139,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14263322949409485,
|
||
|
|
"step": 1200,
|
||
|
|
"valid_targets_mean": 10720.5,
|
||
|
|
"valid_targets_min": 2768
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.15648,
|
||
|
|
"grad_norm": 0.2464513849806541,
|
||
|
|
"learning_rate": 3.794226753352042e-05,
|
||
|
|
"loss": 0.4169,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13991528749465942,
|
||
|
|
"step": 1205,
|
||
|
|
"valid_targets_mean": 9465.7,
|
||
|
|
"valid_targets_min": 1860
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.16128,
|
||
|
|
"grad_norm": 0.23364677185568442,
|
||
|
|
"learning_rate": 3.791256663821032e-05,
|
||
|
|
"loss": 0.4252,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14799943566322327,
|
||
|
|
"step": 1210,
|
||
|
|
"valid_targets_mean": 10576.1,
|
||
|
|
"valid_targets_min": 1972
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.16608,
|
||
|
|
"grad_norm": 0.2061601497448703,
|
||
|
|
"learning_rate": 3.788266472408846e-05,
|
||
|
|
"loss": 0.4202,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14230754971504211,
|
||
|
|
"step": 1215,
|
||
|
|
"valid_targets_mean": 11516.0,
|
||
|
|
"valid_targets_min": 2542
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.17088,
|
||
|
|
"grad_norm": 0.25288643954396195,
|
||
|
|
"learning_rate": 3.785256212672077e-05,
|
||
|
|
"loss": 0.4244,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14614298939704895,
|
||
|
|
"step": 1220,
|
||
|
|
"valid_targets_mean": 10024.8,
|
||
|
|
"valid_targets_min": 755
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.17568,
|
||
|
|
"grad_norm": 0.2397437276470604,
|
||
|
|
"learning_rate": 3.7822259183925324e-05,
|
||
|
|
"loss": 0.4195,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13796095550060272,
|
||
|
|
"step": 1225,
|
||
|
|
"valid_targets_mean": 9419.6,
|
||
|
|
"valid_targets_min": 3489
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.18048,
|
||
|
|
"grad_norm": 0.20449254174295872,
|
||
|
|
"learning_rate": 3.7791756235768476e-05,
|
||
|
|
"loss": 0.4152,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13413485884666443,
|
||
|
|
"step": 1230,
|
||
|
|
"valid_targets_mean": 11258.3,
|
||
|
|
"valid_targets_min": 1243
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1852800000000001,
|
||
|
|
"grad_norm": 0.24344645692417433,
|
||
|
|
"learning_rate": 3.7761053624561104e-05,
|
||
|
|
"loss": 0.4119,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1314702332019806,
|
||
|
|
"step": 1235,
|
||
|
|
"valid_targets_mean": 8748.3,
|
||
|
|
"valid_targets_min": 2184
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.19008,
|
||
|
|
"grad_norm": 0.20991725263743807,
|
||
|
|
"learning_rate": 3.7730151694854757e-05,
|
||
|
|
"loss": 0.4154,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14757773280143738,
|
||
|
|
"step": 1240,
|
||
|
|
"valid_targets_mean": 11077.1,
|
||
|
|
"valid_targets_min": 3770
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.19488,
|
||
|
|
"grad_norm": 0.21767909667008972,
|
||
|
|
"learning_rate": 3.769905079343777e-05,
|
||
|
|
"loss": 0.4095,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1390220671892166,
|
||
|
|
"step": 1245,
|
||
|
|
"valid_targets_mean": 11480.1,
|
||
|
|
"valid_targets_min": 2477
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.19968,
|
||
|
|
"grad_norm": 0.2257068986303116,
|
||
|
|
"learning_rate": 3.766775126933138e-05,
|
||
|
|
"loss": 0.4125,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14113309979438782,
|
||
|
|
"step": 1250,
|
||
|
|
"valid_targets_mean": 10721.3,
|
||
|
|
"valid_targets_min": 3756
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.20448,
|
||
|
|
"grad_norm": 0.21009605592367017,
|
||
|
|
"learning_rate": 3.7636253473785815e-05,
|
||
|
|
"loss": 0.411,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13418659567832947,
|
||
|
|
"step": 1255,
|
||
|
|
"valid_targets_mean": 10695.7,
|
||
|
|
"valid_targets_min": 1715
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.20928,
|
||
|
|
"grad_norm": 0.21016348721733408,
|
||
|
|
"learning_rate": 3.760455776027636e-05,
|
||
|
|
"loss": 0.4135,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13643574714660645,
|
||
|
|
"step": 1260,
|
||
|
|
"valid_targets_mean": 9691.2,
|
||
|
|
"valid_targets_min": 2566
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.21408,
|
||
|
|
"grad_norm": 0.20524820111349146,
|
||
|
|
"learning_rate": 3.7572664484499365e-05,
|
||
|
|
"loss": 0.4163,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1784619688987732,
|
||
|
|
"step": 1265,
|
||
|
|
"valid_targets_mean": 13633.3,
|
||
|
|
"valid_targets_min": 4476
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.21888,
|
||
|
|
"grad_norm": 0.21123899434747842,
|
||
|
|
"learning_rate": 3.7540574004368264e-05,
|
||
|
|
"loss": 0.4215,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1381276249885559,
|
||
|
|
"step": 1270,
|
||
|
|
"valid_targets_mean": 10542.8,
|
||
|
|
"valid_targets_min": 929
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2236799999999999,
|
||
|
|
"grad_norm": 0.21233244655622904,
|
||
|
|
"learning_rate": 3.750828668000959e-05,
|
||
|
|
"loss": 0.4131,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13086965680122375,
|
||
|
|
"step": 1275,
|
||
|
|
"valid_targets_mean": 10361.3,
|
||
|
|
"valid_targets_min": 1469
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.22848,
|
||
|
|
"grad_norm": 0.2103899640818159,
|
||
|
|
"learning_rate": 3.747580287375887e-05,
|
||
|
|
"loss": 0.4176,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10956547409296036,
|
||
|
|
"step": 1280,
|
||
|
|
"valid_targets_mean": 8526.0,
|
||
|
|
"valid_targets_min": 2046
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.23328,
|
||
|
|
"grad_norm": 0.21594844805715224,
|
||
|
|
"learning_rate": 3.744312295015662e-05,
|
||
|
|
"loss": 0.4115,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13053172826766968,
|
||
|
|
"step": 1285,
|
||
|
|
"valid_targets_mean": 9360.7,
|
||
|
|
"valid_targets_min": 832
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.23808,
|
||
|
|
"grad_norm": 0.2109355374562815,
|
||
|
|
"learning_rate": 3.7410247275944223e-05,
|
||
|
|
"loss": 0.4142,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14856983721256256,
|
||
|
|
"step": 1290,
|
||
|
|
"valid_targets_mean": 11521.5,
|
||
|
|
"valid_targets_min": 831
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.24288,
|
||
|
|
"grad_norm": 0.2243955319900153,
|
||
|
|
"learning_rate": 3.737717622005981e-05,
|
||
|
|
"loss": 0.4163,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14200091361999512,
|
||
|
|
"step": 1295,
|
||
|
|
"valid_targets_mean": 11954.9,
|
||
|
|
"valid_targets_min": 1765
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.24768,
|
||
|
|
"grad_norm": 0.21861344610496983,
|
||
|
|
"learning_rate": 3.734391015363413e-05,
|
||
|
|
"loss": 0.4258,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13023564219474792,
|
||
|
|
"step": 1300,
|
||
|
|
"valid_targets_mean": 9402.0,
|
||
|
|
"valid_targets_min": 1620
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.25248,
|
||
|
|
"grad_norm": 0.2048384270850224,
|
||
|
|
"learning_rate": 3.7310449449986404e-05,
|
||
|
|
"loss": 0.4174,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13601598143577576,
|
||
|
|
"step": 1305,
|
||
|
|
"valid_targets_mean": 9952.6,
|
||
|
|
"valid_targets_min": 3102
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.25728,
|
||
|
|
"grad_norm": 0.22440521964428292,
|
||
|
|
"learning_rate": 3.727679448462009e-05,
|
||
|
|
"loss": 0.4108,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1419730931520462,
|
||
|
|
"step": 1310,
|
||
|
|
"valid_targets_mean": 9742.3,
|
||
|
|
"valid_targets_min": 1035
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.26208,
|
||
|
|
"grad_norm": 0.20132327484258444,
|
||
|
|
"learning_rate": 3.7242945635218696e-05,
|
||
|
|
"loss": 0.4159,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16398459672927856,
|
||
|
|
"step": 1315,
|
||
|
|
"valid_targets_mean": 13264.6,
|
||
|
|
"valid_targets_min": 3009
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.26688,
|
||
|
|
"grad_norm": 0.21742026246369942,
|
||
|
|
"learning_rate": 3.720890328164156e-05,
|
||
|
|
"loss": 0.4155,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13954755663871765,
|
||
|
|
"step": 1320,
|
||
|
|
"valid_targets_mean": 11703.3,
|
||
|
|
"valid_targets_min": 2602
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.27168,
|
||
|
|
"grad_norm": 0.2232888027873828,
|
||
|
|
"learning_rate": 3.717466780591956e-05,
|
||
|
|
"loss": 0.4116,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14647075533866882,
|
||
|
|
"step": 1325,
|
||
|
|
"valid_targets_mean": 11269.9,
|
||
|
|
"valid_targets_min": 2647
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.27648,
|
||
|
|
"grad_norm": 0.21225121412651402,
|
||
|
|
"learning_rate": 3.7140239592250804e-05,
|
||
|
|
"loss": 0.4165,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1489778608083725,
|
||
|
|
"step": 1330,
|
||
|
|
"valid_targets_mean": 11695.9,
|
||
|
|
"valid_targets_min": 398
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.28128,
|
||
|
|
"grad_norm": 0.20279925882385536,
|
||
|
|
"learning_rate": 3.71056190269964e-05,
|
||
|
|
"loss": 0.4189,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15311786532402039,
|
||
|
|
"step": 1335,
|
||
|
|
"valid_targets_mean": 12422.6,
|
||
|
|
"valid_targets_min": 2518
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2860800000000001,
|
||
|
|
"grad_norm": 0.2005051063097678,
|
||
|
|
"learning_rate": 3.7070806498676025e-05,
|
||
|
|
"loss": 0.4183,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14334993064403534,
|
||
|
|
"step": 1340,
|
||
|
|
"valid_targets_mean": 11061.7,
|
||
|
|
"valid_targets_min": 1411
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.29088,
|
||
|
|
"grad_norm": 0.21303853310340795,
|
||
|
|
"learning_rate": 3.7035802397963625e-05,
|
||
|
|
"loss": 0.4092,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11744124442338943,
|
||
|
|
"step": 1345,
|
||
|
|
"valid_targets_mean": 9264.0,
|
||
|
|
"valid_targets_min": 3090
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.29568,
|
||
|
|
"grad_norm": 0.20486927969536597,
|
||
|
|
"learning_rate": 3.700060711768302e-05,
|
||
|
|
"loss": 0.4192,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14721707999706268,
|
||
|
|
"step": 1350,
|
||
|
|
"valid_targets_mean": 11086.1,
|
||
|
|
"valid_targets_min": 3188
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.30048,
|
||
|
|
"grad_norm": 0.22248326527528745,
|
||
|
|
"learning_rate": 3.696522105280348e-05,
|
||
|
|
"loss": 0.422,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15628677606582642,
|
||
|
|
"step": 1355,
|
||
|
|
"valid_targets_mean": 11830.6,
|
||
|
|
"valid_targets_min": 1827
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.30528,
|
||
|
|
"grad_norm": 0.2024567255287873,
|
||
|
|
"learning_rate": 3.6929644600435303e-05,
|
||
|
|
"loss": 0.4112,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11964519321918488,
|
||
|
|
"step": 1360,
|
||
|
|
"valid_targets_mean": 9261.3,
|
||
|
|
"valid_targets_min": 2101
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3100800000000001,
|
||
|
|
"grad_norm": 0.2386785505217334,
|
||
|
|
"learning_rate": 3.689387815982536e-05,
|
||
|
|
"loss": 0.4086,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1595247983932495,
|
||
|
|
"step": 1365,
|
||
|
|
"valid_targets_mean": 11489.6,
|
||
|
|
"valid_targets_min": 1626
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.31488,
|
||
|
|
"grad_norm": 0.20397522674415516,
|
||
|
|
"learning_rate": 3.6857922132352617e-05,
|
||
|
|
"loss": 0.4222,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11339862644672394,
|
||
|
|
"step": 1370,
|
||
|
|
"valid_targets_mean": 10194.9,
|
||
|
|
"valid_targets_min": 2049
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.31968,
|
||
|
|
"grad_norm": 0.21700887949212488,
|
||
|
|
"learning_rate": 3.6821776921523615e-05,
|
||
|
|
"loss": 0.4119,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13566583395004272,
|
||
|
|
"step": 1375,
|
||
|
|
"valid_targets_mean": 10100.3,
|
||
|
|
"valid_targets_min": 2186
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3244799999999999,
|
||
|
|
"grad_norm": 0.21424490251596093,
|
||
|
|
"learning_rate": 3.678544293296797e-05,
|
||
|
|
"loss": 0.4181,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13172942399978638,
|
||
|
|
"step": 1380,
|
||
|
|
"valid_targets_mean": 10231.2,
|
||
|
|
"valid_targets_min": 1195
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.32928,
|
||
|
|
"grad_norm": 0.24557977923755359,
|
||
|
|
"learning_rate": 3.674892057443378e-05,
|
||
|
|
"loss": 0.4143,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15770123898983002,
|
||
|
|
"step": 1385,
|
||
|
|
"valid_targets_mean": 11602.1,
|
||
|
|
"valid_targets_min": 2966
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.33408,
|
||
|
|
"grad_norm": 0.24185457949860034,
|
||
|
|
"learning_rate": 3.671221025578309e-05,
|
||
|
|
"loss": 0.4187,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14546245336532593,
|
||
|
|
"step": 1390,
|
||
|
|
"valid_targets_mean": 10557.9,
|
||
|
|
"valid_targets_min": 1920
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.33888,
|
||
|
|
"grad_norm": 0.1999001199945465,
|
||
|
|
"learning_rate": 3.6675312388987274e-05,
|
||
|
|
"loss": 0.4198,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12263347208499908,
|
||
|
|
"step": 1395,
|
||
|
|
"valid_targets_mean": 10265.5,
|
||
|
|
"valid_targets_min": 1258
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.34368,
|
||
|
|
"grad_norm": 0.2084056987805099,
|
||
|
|
"learning_rate": 3.663822738812241e-05,
|
||
|
|
"loss": 0.4124,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1537923514842987,
|
||
|
|
"step": 1400,
|
||
|
|
"valid_targets_mean": 12081.8,
|
||
|
|
"valid_targets_min": 2073
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.34848,
|
||
|
|
"grad_norm": 0.2411579451936407,
|
||
|
|
"learning_rate": 3.660095566936462e-05,
|
||
|
|
"loss": 0.4164,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13250426948070526,
|
||
|
|
"step": 1405,
|
||
|
|
"valid_targets_mean": 10960.7,
|
||
|
|
"valid_targets_min": 1195
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.35328,
|
||
|
|
"grad_norm": 0.229844139672178,
|
||
|
|
"learning_rate": 3.656349765098546e-05,
|
||
|
|
"loss": 0.3969,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10777662694454193,
|
||
|
|
"step": 1410,
|
||
|
|
"valid_targets_mean": 8199.8,
|
||
|
|
"valid_targets_min": 1762
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.35808,
|
||
|
|
"grad_norm": 0.2087959755159777,
|
||
|
|
"learning_rate": 3.652585375334714e-05,
|
||
|
|
"loss": 0.4057,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17145152390003204,
|
||
|
|
"step": 1415,
|
||
|
|
"valid_targets_mean": 12745.7,
|
||
|
|
"valid_targets_min": 1819
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.36288,
|
||
|
|
"grad_norm": 0.22405679106885293,
|
||
|
|
"learning_rate": 3.648802439889785e-05,
|
||
|
|
"loss": 0.4109,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1325543373823166,
|
||
|
|
"step": 1420,
|
||
|
|
"valid_targets_mean": 9900.9,
|
||
|
|
"valid_targets_min": 1880
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.36768,
|
||
|
|
"grad_norm": 0.2691908667273548,
|
||
|
|
"learning_rate": 3.645001001216705e-05,
|
||
|
|
"loss": 0.4063,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11722728610038757,
|
||
|
|
"step": 1425,
|
||
|
|
"valid_targets_mean": 9083.3,
|
||
|
|
"valid_targets_min": 1996
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.37248,
|
||
|
|
"grad_norm": 0.19573723387921657,
|
||
|
|
"learning_rate": 3.641181101976065e-05,
|
||
|
|
"loss": 0.4107,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11058799177408218,
|
||
|
|
"step": 1430,
|
||
|
|
"valid_targets_mean": 9659.8,
|
||
|
|
"valid_targets_min": 1617
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.37728,
|
||
|
|
"grad_norm": 0.2134623022289432,
|
||
|
|
"learning_rate": 3.637342785035624e-05,
|
||
|
|
"loss": 0.4116,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15618430078029633,
|
||
|
|
"step": 1435,
|
||
|
|
"valid_targets_mean": 12706.1,
|
||
|
|
"valid_targets_min": 1818
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.38208,
|
||
|
|
"grad_norm": 0.22397148122572136,
|
||
|
|
"learning_rate": 3.633486093469829e-05,
|
||
|
|
"loss": 0.4123,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12754711508750916,
|
||
|
|
"step": 1440,
|
||
|
|
"valid_targets_mean": 9612.8,
|
||
|
|
"valid_targets_min": 2126
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3868800000000001,
|
||
|
|
"grad_norm": 0.22201805653034568,
|
||
|
|
"learning_rate": 3.629611070559333e-05,
|
||
|
|
"loss": 0.4122,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17742006480693817,
|
||
|
|
"step": 1445,
|
||
|
|
"valid_targets_mean": 14002.2,
|
||
|
|
"valid_targets_min": 3607
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.39168,
|
||
|
|
"grad_norm": 0.20713390355062825,
|
||
|
|
"learning_rate": 3.625717759790506e-05,
|
||
|
|
"loss": 0.4046,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13317152857780457,
|
||
|
|
"step": 1450,
|
||
|
|
"valid_targets_mean": 10315.7,
|
||
|
|
"valid_targets_min": 2648
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.39648,
|
||
|
|
"grad_norm": 0.20581742916420195,
|
||
|
|
"learning_rate": 3.621806204854947e-05,
|
||
|
|
"loss": 0.4057,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12615267932415009,
|
||
|
|
"step": 1455,
|
||
|
|
"valid_targets_mean": 9761.5,
|
||
|
|
"valid_targets_min": 2595
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.40128,
|
||
|
|
"grad_norm": 0.2253326544939486,
|
||
|
|
"learning_rate": 3.617876449648998e-05,
|
||
|
|
"loss": 0.4156,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13411745429039001,
|
||
|
|
"step": 1460,
|
||
|
|
"valid_targets_mean": 10907.1,
|
||
|
|
"valid_targets_min": 1940
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.40608,
|
||
|
|
"grad_norm": 0.2158021375696706,
|
||
|
|
"learning_rate": 3.613928538273247e-05,
|
||
|
|
"loss": 0.4045,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15256470441818237,
|
||
|
|
"step": 1465,
|
||
|
|
"valid_targets_mean": 11172.7,
|
||
|
|
"valid_targets_min": 2042
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4108800000000001,
|
||
|
|
"grad_norm": 0.23215844714790715,
|
||
|
|
"learning_rate": 3.609962515032034e-05,
|
||
|
|
"loss": 0.4071,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13259349763393402,
|
||
|
|
"step": 1470,
|
||
|
|
"valid_targets_mean": 9956.5,
|
||
|
|
"valid_targets_min": 1151
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.41568,
|
||
|
|
"grad_norm": 0.20191946885445114,
|
||
|
|
"learning_rate": 3.605978424432954e-05,
|
||
|
|
"loss": 0.4086,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14187245070934296,
|
||
|
|
"step": 1475,
|
||
|
|
"valid_targets_mean": 11490.5,
|
||
|
|
"valid_targets_min": 2044
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.42048,
|
||
|
|
"grad_norm": 0.2129186932802564,
|
||
|
|
"learning_rate": 3.601976311186361e-05,
|
||
|
|
"loss": 0.4036,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15841859579086304,
|
||
|
|
"step": 1480,
|
||
|
|
"valid_targets_mean": 11375.0,
|
||
|
|
"valid_targets_min": 2719
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4252799999999999,
|
||
|
|
"grad_norm": 0.22986360631556008,
|
||
|
|
"learning_rate": 3.597956220204861e-05,
|
||
|
|
"loss": 0.4159,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14085061848163605,
|
||
|
|
"step": 1485,
|
||
|
|
"valid_targets_mean": 11803.7,
|
||
|
|
"valid_targets_min": 2323
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.43008,
|
||
|
|
"grad_norm": 0.23033386303941059,
|
||
|
|
"learning_rate": 3.5939181966028084e-05,
|
||
|
|
"loss": 0.4136,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1093810647726059,
|
||
|
|
"step": 1490,
|
||
|
|
"valid_targets_mean": 8520.8,
|
||
|
|
"valid_targets_min": 2352
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.43488,
|
||
|
|
"grad_norm": 0.22690875952678244,
|
||
|
|
"learning_rate": 3.589862285695804e-05,
|
||
|
|
"loss": 0.4072,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1403937041759491,
|
||
|
|
"step": 1495,
|
||
|
|
"valid_targets_mean": 10915.5,
|
||
|
|
"valid_targets_min": 2952
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.43968,
|
||
|
|
"grad_norm": 0.20071387459694237,
|
||
|
|
"learning_rate": 3.585788533000184e-05,
|
||
|
|
"loss": 0.4044,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12689334154129028,
|
||
|
|
"step": 1500,
|
||
|
|
"valid_targets_mean": 10028.5,
|
||
|
|
"valid_targets_min": 2214
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.44448,
|
||
|
|
"grad_norm": 0.23884170249895356,
|
||
|
|
"learning_rate": 3.581696984232508e-05,
|
||
|
|
"loss": 0.4005,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13245141506195068,
|
||
|
|
"step": 1505,
|
||
|
|
"valid_targets_mean": 10572.4,
|
||
|
|
"valid_targets_min": 384
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.44928,
|
||
|
|
"grad_norm": 0.23906878983233132,
|
||
|
|
"learning_rate": 3.5775876853090465e-05,
|
||
|
|
"loss": 0.4137,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13135510683059692,
|
||
|
|
"step": 1510,
|
||
|
|
"valid_targets_mean": 10596.6,
|
||
|
|
"valid_targets_min": 2137
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.45408,
|
||
|
|
"grad_norm": 0.21408874742248546,
|
||
|
|
"learning_rate": 3.5734606823452686e-05,
|
||
|
|
"loss": 0.4073,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15579737722873688,
|
||
|
|
"step": 1515,
|
||
|
|
"valid_targets_mean": 11520.9,
|
||
|
|
"valid_targets_min": 1168
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.45888,
|
||
|
|
"grad_norm": 0.23082224637767995,
|
||
|
|
"learning_rate": 3.569316021655319e-05,
|
||
|
|
"loss": 0.4074,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12368214875459671,
|
||
|
|
"step": 1520,
|
||
|
|
"valid_targets_mean": 9717.6,
|
||
|
|
"valid_targets_min": 3395
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.46368,
|
||
|
|
"grad_norm": 0.23974373684279676,
|
||
|
|
"learning_rate": 3.565153749751505e-05,
|
||
|
|
"loss": 0.3944,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1179773360490799,
|
||
|
|
"step": 1525,
|
||
|
|
"valid_targets_mean": 9593.9,
|
||
|
|
"valid_targets_min": 1719
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.46848,
|
||
|
|
"grad_norm": 0.21789884302707357,
|
||
|
|
"learning_rate": 3.5609739133437666e-05,
|
||
|
|
"loss": 0.403,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14593097567558289,
|
||
|
|
"step": 1530,
|
||
|
|
"valid_targets_mean": 10919.9,
|
||
|
|
"valid_targets_min": 3251
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.47328,
|
||
|
|
"grad_norm": 0.2684640365660121,
|
||
|
|
"learning_rate": 3.55677655933916e-05,
|
||
|
|
"loss": 0.4021,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12168829143047333,
|
||
|
|
"step": 1535,
|
||
|
|
"valid_targets_mean": 9522.3,
|
||
|
|
"valid_targets_min": 724
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.47808,
|
||
|
|
"grad_norm": 0.21931539915256654,
|
||
|
|
"learning_rate": 3.5525617348413265e-05,
|
||
|
|
"loss": 0.4072,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12263620644807816,
|
||
|
|
"step": 1540,
|
||
|
|
"valid_targets_mean": 9469.0,
|
||
|
|
"valid_targets_min": 1597
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.48288,
|
||
|
|
"grad_norm": 0.2263527046157457,
|
||
|
|
"learning_rate": 3.5483294871499646e-05,
|
||
|
|
"loss": 0.4099,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12820056080818176,
|
||
|
|
"step": 1545,
|
||
|
|
"valid_targets_mean": 9946.8,
|
||
|
|
"valid_targets_min": 2444
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4876800000000001,
|
||
|
|
"grad_norm": 0.2137941586747305,
|
||
|
|
"learning_rate": 3.544079863760302e-05,
|
||
|
|
"loss": 0.4034,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1168578639626503,
|
||
|
|
"step": 1550,
|
||
|
|
"valid_targets_mean": 8876.6,
|
||
|
|
"valid_targets_min": 1681
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.49248,
|
||
|
|
"grad_norm": 0.22498503782561743,
|
||
|
|
"learning_rate": 3.5398129123625565e-05,
|
||
|
|
"loss": 0.4087,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.163137748837471,
|
||
|
|
"step": 1555,
|
||
|
|
"valid_targets_mean": 11269.5,
|
||
|
|
"valid_targets_min": 1604
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.49728,
|
||
|
|
"grad_norm": 0.2096852182318285,
|
||
|
|
"learning_rate": 3.535528680841408e-05,
|
||
|
|
"loss": 0.4012,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1431254893541336,
|
||
|
|
"step": 1560,
|
||
|
|
"valid_targets_mean": 10506.5,
|
||
|
|
"valid_targets_min": 321
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5020799999999999,
|
||
|
|
"grad_norm": 0.19907417765637198,
|
||
|
|
"learning_rate": 3.5312272172754566e-05,
|
||
|
|
"loss": 0.4059,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15149365365505219,
|
||
|
|
"step": 1565,
|
||
|
|
"valid_targets_mean": 12395.6,
|
||
|
|
"valid_targets_min": 1800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.50688,
|
||
|
|
"grad_norm": 0.21755973079142352,
|
||
|
|
"learning_rate": 3.5269085699366844e-05,
|
||
|
|
"loss": 0.4053,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1263016015291214,
|
||
|
|
"step": 1570,
|
||
|
|
"valid_targets_mean": 10658.8,
|
||
|
|
"valid_targets_min": 2443
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5116800000000001,
|
||
|
|
"grad_norm": 0.24303549201558827,
|
||
|
|
"learning_rate": 3.5225727872899136e-05,
|
||
|
|
"loss": 0.4114,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12606140971183777,
|
||
|
|
"step": 1575,
|
||
|
|
"valid_targets_mean": 9330.3,
|
||
|
|
"valid_targets_min": 1058
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.51648,
|
||
|
|
"grad_norm": 0.2835144410727879,
|
||
|
|
"learning_rate": 3.518219917992262e-05,
|
||
|
|
"loss": 0.3976,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.149481400847435,
|
||
|
|
"step": 1580,
|
||
|
|
"valid_targets_mean": 11042.7,
|
||
|
|
"valid_targets_min": 1431
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.52128,
|
||
|
|
"grad_norm": 0.2441642529443939,
|
||
|
|
"learning_rate": 3.5138500108926e-05,
|
||
|
|
"loss": 0.4075,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14199629426002502,
|
||
|
|
"step": 1585,
|
||
|
|
"valid_targets_mean": 10971.2,
|
||
|
|
"valid_targets_min": 2014
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5260799999999999,
|
||
|
|
"grad_norm": 0.22227721700480615,
|
||
|
|
"learning_rate": 3.509463115030995e-05,
|
||
|
|
"loss": 0.3959,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14047616720199585,
|
||
|
|
"step": 1590,
|
||
|
|
"valid_targets_mean": 11059.1,
|
||
|
|
"valid_targets_min": 2218
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.53088,
|
||
|
|
"grad_norm": 0.22052339326979958,
|
||
|
|
"learning_rate": 3.505059279638172e-05,
|
||
|
|
"loss": 0.4033,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12846125662326813,
|
||
|
|
"step": 1595,
|
||
|
|
"valid_targets_mean": 9715.1,
|
||
|
|
"valid_targets_min": 2285
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5356800000000002,
|
||
|
|
"grad_norm": 0.2194673984483837,
|
||
|
|
"learning_rate": 3.500638554134952e-05,
|
||
|
|
"loss": 0.4039,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10912424325942993,
|
||
|
|
"step": 1600,
|
||
|
|
"valid_targets_mean": 9214.8,
|
||
|
|
"valid_targets_min": 1891
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.54048,
|
||
|
|
"grad_norm": 0.2177575764877966,
|
||
|
|
"learning_rate": 3.4962009881317005e-05,
|
||
|
|
"loss": 0.4029,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14358478784561157,
|
||
|
|
"step": 1605,
|
||
|
|
"valid_targets_mean": 11452.6,
|
||
|
|
"valid_targets_min": 2380
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.54528,
|
||
|
|
"grad_norm": 0.22537318863877479,
|
||
|
|
"learning_rate": 3.491746631427772e-05,
|
||
|
|
"loss": 0.3994,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14550143480300903,
|
||
|
|
"step": 1610,
|
||
|
|
"valid_targets_mean": 10776.5,
|
||
|
|
"valid_targets_min": 2025
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.55008,
|
||
|
|
"grad_norm": 0.22553034346150513,
|
||
|
|
"learning_rate": 3.487275534010948e-05,
|
||
|
|
"loss": 0.4053,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13195769488811493,
|
||
|
|
"step": 1615,
|
||
|
|
"valid_targets_mean": 9417.9,
|
||
|
|
"valid_targets_min": 2104
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.55488,
|
||
|
|
"grad_norm": 0.2646860204126771,
|
||
|
|
"learning_rate": 3.482787746056881e-05,
|
||
|
|
"loss": 0.4013,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13002309203147888,
|
||
|
|
"step": 1620,
|
||
|
|
"valid_targets_mean": 9772.6,
|
||
|
|
"valid_targets_min": 276
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.55968,
|
||
|
|
"grad_norm": 0.19817526014002387,
|
||
|
|
"learning_rate": 3.4782833179285256e-05,
|
||
|
|
"loss": 0.3959,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12554329633712769,
|
||
|
|
"step": 1625,
|
||
|
|
"valid_targets_mean": 10098.5,
|
||
|
|
"valid_targets_min": 3998
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.56448,
|
||
|
|
"grad_norm": 0.23265845390648024,
|
||
|
|
"learning_rate": 3.473762300175578e-05,
|
||
|
|
"loss": 0.4055,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10745157301425934,
|
||
|
|
"step": 1630,
|
||
|
|
"valid_targets_mean": 8866.5,
|
||
|
|
"valid_targets_min": 2414
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.56928,
|
||
|
|
"grad_norm": 0.21681894549050468,
|
||
|
|
"learning_rate": 3.469224743533906e-05,
|
||
|
|
"loss": 0.3997,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14574511349201202,
|
||
|
|
"step": 1635,
|
||
|
|
"valid_targets_mean": 10075.9,
|
||
|
|
"valid_targets_min": 2725
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.57408,
|
||
|
|
"grad_norm": 0.21721065351913352,
|
||
|
|
"learning_rate": 3.464670698924981e-05,
|
||
|
|
"loss": 0.3959,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13828513026237488,
|
||
|
|
"step": 1640,
|
||
|
|
"valid_targets_mean": 10433.1,
|
||
|
|
"valid_targets_min": 2115
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5788799999999998,
|
||
|
|
"grad_norm": 0.2214508043669377,
|
||
|
|
"learning_rate": 3.4601002174553055e-05,
|
||
|
|
"loss": 0.3944,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12395211309194565,
|
||
|
|
"step": 1645,
|
||
|
|
"valid_targets_mean": 9867.7,
|
||
|
|
"valid_targets_min": 2175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.58368,
|
||
|
|
"grad_norm": 0.1976456316917021,
|
||
|
|
"learning_rate": 3.45551335041584e-05,
|
||
|
|
"loss": 0.3996,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1447567641735077,
|
||
|
|
"step": 1650,
|
||
|
|
"valid_targets_mean": 10812.8,
|
||
|
|
"valid_targets_min": 1525
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5884800000000001,
|
||
|
|
"grad_norm": 0.2150887282101477,
|
||
|
|
"learning_rate": 3.4509101492814286e-05,
|
||
|
|
"loss": 0.3936,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11536484211683273,
|
||
|
|
"step": 1655,
|
||
|
|
"valid_targets_mean": 8026.8,
|
||
|
|
"valid_targets_min": 1294
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.59328,
|
||
|
|
"grad_norm": 0.2032204949161656,
|
||
|
|
"learning_rate": 3.446290665710219e-05,
|
||
|
|
"loss": 0.3986,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13227199018001556,
|
||
|
|
"step": 1660,
|
||
|
|
"valid_targets_mean": 10462.0,
|
||
|
|
"valid_targets_min": 3347
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.59808,
|
||
|
|
"grad_norm": 0.21431437862340466,
|
||
|
|
"learning_rate": 3.441654951543085e-05,
|
||
|
|
"loss": 0.397,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12322622537612915,
|
||
|
|
"step": 1665,
|
||
|
|
"valid_targets_mean": 9954.2,
|
||
|
|
"valid_targets_min": 2445
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6028799999999999,
|
||
|
|
"grad_norm": 0.21808100784816117,
|
||
|
|
"learning_rate": 3.4370030588030425e-05,
|
||
|
|
"loss": 0.4059,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1262700855731964,
|
||
|
|
"step": 1670,
|
||
|
|
"valid_targets_mean": 9128.3,
|
||
|
|
"valid_targets_min": 2970
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.60768,
|
||
|
|
"grad_norm": 0.22192127783593246,
|
||
|
|
"learning_rate": 3.432335039694669e-05,
|
||
|
|
"loss": 0.3985,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1360887736082077,
|
||
|
|
"step": 1675,
|
||
|
|
"valid_targets_mean": 10192.9,
|
||
|
|
"valid_targets_min": 2388
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6124800000000001,
|
||
|
|
"grad_norm": 0.2105589706564431,
|
||
|
|
"learning_rate": 3.427650946603513e-05,
|
||
|
|
"loss": 0.3968,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14164552092552185,
|
||
|
|
"step": 1680,
|
||
|
|
"valid_targets_mean": 11665.5,
|
||
|
|
"valid_targets_min": 1873
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.61728,
|
||
|
|
"grad_norm": 0.20917890739253076,
|
||
|
|
"learning_rate": 3.422950832095511e-05,
|
||
|
|
"loss": 0.4013,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14686794579029083,
|
||
|
|
"step": 1685,
|
||
|
|
"valid_targets_mean": 11836.5,
|
||
|
|
"valid_targets_min": 1526
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.62208,
|
||
|
|
"grad_norm": 0.218689384602607,
|
||
|
|
"learning_rate": 3.418234748916395e-05,
|
||
|
|
"loss": 0.3978,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15382401645183563,
|
||
|
|
"step": 1690,
|
||
|
|
"valid_targets_mean": 12671.2,
|
||
|
|
"valid_targets_min": 3689
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6268799999999999,
|
||
|
|
"grad_norm": 0.20427477458609572,
|
||
|
|
"learning_rate": 3.4135027499911003e-05,
|
||
|
|
"loss": 0.4039,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1121823638677597,
|
||
|
|
"step": 1695,
|
||
|
|
"valid_targets_mean": 8827.9,
|
||
|
|
"valid_targets_min": 2336
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.63168,
|
||
|
|
"grad_norm": 0.25324102620237127,
|
||
|
|
"learning_rate": 3.408754888423173e-05,
|
||
|
|
"loss": 0.4016,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12924690544605255,
|
||
|
|
"step": 1700,
|
||
|
|
"valid_targets_mean": 9792.0,
|
||
|
|
"valid_targets_min": 1363
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6364800000000002,
|
||
|
|
"grad_norm": 0.23575060015918897,
|
||
|
|
"learning_rate": 3.403991217494172e-05,
|
||
|
|
"loss": 0.4062,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12108415365219116,
|
||
|
|
"step": 1705,
|
||
|
|
"valid_targets_mean": 9316.2,
|
||
|
|
"valid_targets_min": 2950
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.64128,
|
||
|
|
"grad_norm": 0.19785865556687454,
|
||
|
|
"learning_rate": 3.3992117906630744e-05,
|
||
|
|
"loss": 0.4074,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1475123167037964,
|
||
|
|
"step": 1710,
|
||
|
|
"valid_targets_mean": 12372.2,
|
||
|
|
"valid_targets_min": 2309
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.64608,
|
||
|
|
"grad_norm": 0.21415769305814603,
|
||
|
|
"learning_rate": 3.394416661565671e-05,
|
||
|
|
"loss": 0.3947,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11862894892692566,
|
||
|
|
"step": 1715,
|
||
|
|
"valid_targets_mean": 8912.2,
|
||
|
|
"valid_targets_min": 1675
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.65088,
|
||
|
|
"grad_norm": 0.23826038703312183,
|
||
|
|
"learning_rate": 3.389605884013969e-05,
|
||
|
|
"loss": 0.4033,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1426897943019867,
|
||
|
|
"step": 1720,
|
||
|
|
"valid_targets_mean": 10804.0,
|
||
|
|
"valid_targets_min": 2234
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.65568,
|
||
|
|
"grad_norm": 0.24170373795005826,
|
||
|
|
"learning_rate": 3.384779511995587e-05,
|
||
|
|
"loss": 0.4011,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13612665235996246,
|
||
|
|
"step": 1725,
|
||
|
|
"valid_targets_mean": 10696.4,
|
||
|
|
"valid_targets_min": 1778
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.66048,
|
||
|
|
"grad_norm": 0.23931550486897354,
|
||
|
|
"learning_rate": 3.379937599673144e-05,
|
||
|
|
"loss": 0.3985,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12457090616226196,
|
||
|
|
"step": 1730,
|
||
|
|
"valid_targets_mean": 9632.0,
|
||
|
|
"valid_targets_min": 2851
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.66528,
|
||
|
|
"grad_norm": 0.2061576914555314,
|
||
|
|
"learning_rate": 3.3750802013836596e-05,
|
||
|
|
"loss": 0.4053,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11049826443195343,
|
||
|
|
"step": 1735,
|
||
|
|
"valid_targets_mean": 8703.4,
|
||
|
|
"valid_targets_min": 2022
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.67008,
|
||
|
|
"grad_norm": 0.21374204056001617,
|
||
|
|
"learning_rate": 3.370207371637939e-05,
|
||
|
|
"loss": 0.4011,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15568511188030243,
|
||
|
|
"step": 1740,
|
||
|
|
"valid_targets_mean": 11633.8,
|
||
|
|
"valid_targets_min": 3684
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.67488,
|
||
|
|
"grad_norm": 0.21337916221726186,
|
||
|
|
"learning_rate": 3.3653191651199635e-05,
|
||
|
|
"loss": 0.4064,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15145465731620789,
|
||
|
|
"step": 1745,
|
||
|
|
"valid_targets_mean": 10421.4,
|
||
|
|
"valid_targets_min": 2316
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6796799999999998,
|
||
|
|
"grad_norm": 0.23004226232229372,
|
||
|
|
"learning_rate": 3.360415636686274e-05,
|
||
|
|
"loss": 0.4024,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12970012426376343,
|
||
|
|
"step": 1750,
|
||
|
|
"valid_targets_mean": 9696.3,
|
||
|
|
"valid_targets_min": 2038
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.68448,
|
||
|
|
"grad_norm": 0.20561270288874453,
|
||
|
|
"learning_rate": 3.355496841365359e-05,
|
||
|
|
"loss": 0.3912,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12068414688110352,
|
||
|
|
"step": 1755,
|
||
|
|
"valid_targets_mean": 9217.6,
|
||
|
|
"valid_targets_min": 3244
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6892800000000001,
|
||
|
|
"grad_norm": 0.22563271293316586,
|
||
|
|
"learning_rate": 3.350562834357034e-05,
|
||
|
|
"loss": 0.3937,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14101052284240723,
|
||
|
|
"step": 1760,
|
||
|
|
"valid_targets_mean": 10387.0,
|
||
|
|
"valid_targets_min": 3548
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.69408,
|
||
|
|
"grad_norm": 0.24142812450638393,
|
||
|
|
"learning_rate": 3.345613671031827e-05,
|
||
|
|
"loss": 0.3953,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13549157977104187,
|
||
|
|
"step": 1765,
|
||
|
|
"valid_targets_mean": 10715.5,
|
||
|
|
"valid_targets_min": 2714
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.69888,
|
||
|
|
"grad_norm": 0.21228849973724254,
|
||
|
|
"learning_rate": 3.340649406930349e-05,
|
||
|
|
"loss": 0.3953,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12110074609518051,
|
||
|
|
"step": 1770,
|
||
|
|
"valid_targets_mean": 11074.2,
|
||
|
|
"valid_targets_min": 1359
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7036799999999999,
|
||
|
|
"grad_norm": 0.2156200959319992,
|
||
|
|
"learning_rate": 3.335670097762677e-05,
|
||
|
|
"loss": 0.3992,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1143205314874649,
|
||
|
|
"step": 1775,
|
||
|
|
"valid_targets_mean": 9483.5,
|
||
|
|
"valid_targets_min": 1606
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.70848,
|
||
|
|
"grad_norm": 0.23246732383901142,
|
||
|
|
"learning_rate": 3.330675799407728e-05,
|
||
|
|
"loss": 0.3956,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13464942574501038,
|
||
|
|
"step": 1780,
|
||
|
|
"valid_targets_mean": 11230.7,
|
||
|
|
"valid_targets_min": 1710
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7132800000000001,
|
||
|
|
"grad_norm": 0.22253356811827898,
|
||
|
|
"learning_rate": 3.32566656791263e-05,
|
||
|
|
"loss": 0.3986,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12669792771339417,
|
||
|
|
"step": 1785,
|
||
|
|
"valid_targets_mean": 10284.6,
|
||
|
|
"valid_targets_min": 2270
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.71808,
|
||
|
|
"grad_norm": 0.20486126933947996,
|
||
|
|
"learning_rate": 3.320642459492095e-05,
|
||
|
|
"loss": 0.3929,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12168687582015991,
|
||
|
|
"step": 1790,
|
||
|
|
"valid_targets_mean": 9668.6,
|
||
|
|
"valid_targets_min": 1877
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.72288,
|
||
|
|
"grad_norm": 0.2502018008883989,
|
||
|
|
"learning_rate": 3.315603530527785e-05,
|
||
|
|
"loss": 0.3987,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13025622069835663,
|
||
|
|
"step": 1795,
|
||
|
|
"valid_targets_mean": 10071.4,
|
||
|
|
"valid_targets_min": 1291
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7276799999999999,
|
||
|
|
"grad_norm": 0.22100999377103495,
|
||
|
|
"learning_rate": 3.310549837567685e-05,
|
||
|
|
"loss": 0.4005,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13234061002731323,
|
||
|
|
"step": 1800,
|
||
|
|
"valid_targets_mean": 10166.8,
|
||
|
|
"valid_targets_min": 1177
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.73248,
|
||
|
|
"grad_norm": 0.2129959699349399,
|
||
|
|
"learning_rate": 3.3054814373254615e-05,
|
||
|
|
"loss": 0.3973,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1542513072490692,
|
||
|
|
"step": 1805,
|
||
|
|
"valid_targets_mean": 10941.1,
|
||
|
|
"valid_targets_min": 2436
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7372800000000002,
|
||
|
|
"grad_norm": 0.20978747808047463,
|
||
|
|
"learning_rate": 3.300398386679831e-05,
|
||
|
|
"loss": 0.3999,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1411202847957611,
|
||
|
|
"step": 1810,
|
||
|
|
"valid_targets_mean": 11338.0,
|
||
|
|
"valid_targets_min": 2897
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.74208,
|
||
|
|
"grad_norm": 0.21485236379949604,
|
||
|
|
"learning_rate": 3.2953007426739204e-05,
|
||
|
|
"loss": 0.3985,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13011616468429565,
|
||
|
|
"step": 1815,
|
||
|
|
"valid_targets_mean": 9627.8,
|
||
|
|
"valid_targets_min": 2142
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.74688,
|
||
|
|
"grad_norm": 0.20840933175004347,
|
||
|
|
"learning_rate": 3.290188562514624e-05,
|
||
|
|
"loss": 0.3981,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1490216851234436,
|
||
|
|
"step": 1820,
|
||
|
|
"valid_targets_mean": 11321.6,
|
||
|
|
"valid_targets_min": 2398
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.75168,
|
||
|
|
"grad_norm": 0.20026765690303727,
|
||
|
|
"learning_rate": 3.285061903571968e-05,
|
||
|
|
"loss": 0.3957,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13110694289207458,
|
||
|
|
"step": 1825,
|
||
|
|
"valid_targets_mean": 9812.0,
|
||
|
|
"valid_targets_min": 2429
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.75648,
|
||
|
|
"grad_norm": 0.2292840920686858,
|
||
|
|
"learning_rate": 3.27992082337846e-05,
|
||
|
|
"loss": 0.4033,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15019527077674866,
|
||
|
|
"step": 1830,
|
||
|
|
"valid_targets_mean": 11044.8,
|
||
|
|
"valid_targets_min": 2095
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.76128,
|
||
|
|
"grad_norm": 0.22099436299927055,
|
||
|
|
"learning_rate": 3.274765379628447e-05,
|
||
|
|
"loss": 0.4065,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14847183227539062,
|
||
|
|
"step": 1835,
|
||
|
|
"valid_targets_mean": 11189.2,
|
||
|
|
"valid_targets_min": 1796
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.76608,
|
||
|
|
"grad_norm": 0.20320537061481944,
|
||
|
|
"learning_rate": 3.2695956301774664e-05,
|
||
|
|
"loss": 0.3973,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13340553641319275,
|
||
|
|
"step": 1840,
|
||
|
|
"valid_targets_mean": 10326.3,
|
||
|
|
"valid_targets_min": 1462
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.77088,
|
||
|
|
"grad_norm": 0.21167489305289328,
|
||
|
|
"learning_rate": 3.264411633041598e-05,
|
||
|
|
"loss": 0.3987,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1621175855398178,
|
||
|
|
"step": 1845,
|
||
|
|
"valid_targets_mean": 11443.7,
|
||
|
|
"valid_targets_min": 1542
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.77568,
|
||
|
|
"grad_norm": 0.2186721440660899,
|
||
|
|
"learning_rate": 3.259213446396812e-05,
|
||
|
|
"loss": 0.3917,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1408073753118515,
|
||
|
|
"step": 1850,
|
||
|
|
"valid_targets_mean": 11004.1,
|
||
|
|
"valid_targets_min": 497
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7804799999999998,
|
||
|
|
"grad_norm": 0.2163856042786629,
|
||
|
|
"learning_rate": 3.254001128578317e-05,
|
||
|
|
"loss": 0.391,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1433507800102234,
|
||
|
|
"step": 1855,
|
||
|
|
"valid_targets_mean": 11132.0,
|
||
|
|
"valid_targets_min": 3359
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.78528,
|
||
|
|
"grad_norm": 0.20279384664468394,
|
||
|
|
"learning_rate": 3.2487747380799036e-05,
|
||
|
|
"loss": 0.3979,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10963033139705658,
|
||
|
|
"step": 1860,
|
||
|
|
"valid_targets_mean": 8810.3,
|
||
|
|
"valid_targets_min": 2143
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7900800000000001,
|
||
|
|
"grad_norm": 0.20087820022966488,
|
||
|
|
"learning_rate": 3.243534333553291e-05,
|
||
|
|
"loss": 0.3956,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11881513893604279,
|
||
|
|
"step": 1865,
|
||
|
|
"valid_targets_mean": 11328.5,
|
||
|
|
"valid_targets_min": 2388
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.79488,
|
||
|
|
"grad_norm": 0.21398495522011107,
|
||
|
|
"learning_rate": 3.2382799738074635e-05,
|
||
|
|
"loss": 0.3989,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14420190453529358,
|
||
|
|
"step": 1870,
|
||
|
|
"valid_targets_mean": 10098.8,
|
||
|
|
"valid_targets_min": 2226
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.79968,
|
||
|
|
"grad_norm": 0.2164572857663917,
|
||
|
|
"learning_rate": 3.2330117178080184e-05,
|
||
|
|
"loss": 0.3961,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12767937779426575,
|
||
|
|
"step": 1875,
|
||
|
|
"valid_targets_mean": 9393.9,
|
||
|
|
"valid_targets_min": 2123
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8044799999999999,
|
||
|
|
"grad_norm": 0.30012965010498455,
|
||
|
|
"learning_rate": 3.227729624676497e-05,
|
||
|
|
"loss": 0.3918,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11639062315225601,
|
||
|
|
"step": 1880,
|
||
|
|
"valid_targets_mean": 9530.6,
|
||
|
|
"valid_targets_min": 1401
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.80928,
|
||
|
|
"grad_norm": 0.24176122916815607,
|
||
|
|
"learning_rate": 3.222433753689724e-05,
|
||
|
|
"loss": 0.4003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15518411993980408,
|
||
|
|
"step": 1885,
|
||
|
|
"valid_targets_mean": 10808.2,
|
||
|
|
"valid_targets_min": 1017
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8140800000000001,
|
||
|
|
"grad_norm": 0.22436718375518214,
|
||
|
|
"learning_rate": 3.2171241642791443e-05,
|
||
|
|
"loss": 0.3946,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14497432112693787,
|
||
|
|
"step": 1890,
|
||
|
|
"valid_targets_mean": 11211.7,
|
||
|
|
"valid_targets_min": 1983
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.81888,
|
||
|
|
"grad_norm": 0.23787343944723544,
|
||
|
|
"learning_rate": 3.211800916030152e-05,
|
||
|
|
"loss": 0.3912,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1524227112531662,
|
||
|
|
"step": 1895,
|
||
|
|
"valid_targets_mean": 11464.1,
|
||
|
|
"valid_targets_min": 2878
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.82368,
|
||
|
|
"grad_norm": 0.2572060361202662,
|
||
|
|
"learning_rate": 3.206464068681424e-05,
|
||
|
|
"loss": 0.3937,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12020900100469589,
|
||
|
|
"step": 1900,
|
||
|
|
"valid_targets_mean": 9636.8,
|
||
|
|
"valid_targets_min": 2548
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8284799999999999,
|
||
|
|
"grad_norm": 0.21078383512361049,
|
||
|
|
"learning_rate": 3.20111368212425e-05,
|
||
|
|
"loss": 0.3968,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11897289752960205,
|
||
|
|
"step": 1905,
|
||
|
|
"valid_targets_mean": 11681.4,
|
||
|
|
"valid_targets_min": 3107
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.83328,
|
||
|
|
"grad_norm": 0.20294316150292713,
|
||
|
|
"learning_rate": 3.19574981640186e-05,
|
||
|
|
"loss": 0.3959,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14064642786979675,
|
||
|
|
"step": 1910,
|
||
|
|
"valid_targets_mean": 11299.5,
|
||
|
|
"valid_targets_min": 2709
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8380800000000002,
|
||
|
|
"grad_norm": 0.18972454533577332,
|
||
|
|
"learning_rate": 3.1903725317087495e-05,
|
||
|
|
"loss": 0.3939,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13221979141235352,
|
||
|
|
"step": 1915,
|
||
|
|
"valid_targets_mean": 11891.0,
|
||
|
|
"valid_targets_min": 3651
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.84288,
|
||
|
|
"grad_norm": 0.18943681153553424,
|
||
|
|
"learning_rate": 3.184981888390003e-05,
|
||
|
|
"loss": 0.3957,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13814029097557068,
|
||
|
|
"step": 1920,
|
||
|
|
"valid_targets_mean": 11726.5,
|
||
|
|
"valid_targets_min": 2799
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.84768,
|
||
|
|
"grad_norm": 0.20435272678520974,
|
||
|
|
"learning_rate": 3.1795779469406226e-05,
|
||
|
|
"loss": 0.3935,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13576734066009521,
|
||
|
|
"step": 1925,
|
||
|
|
"valid_targets_mean": 12009.9,
|
||
|
|
"valid_targets_min": 2840
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.85248,
|
||
|
|
"grad_norm": 0.22181511371074905,
|
||
|
|
"learning_rate": 3.174160768004842e-05,
|
||
|
|
"loss": 0.3921,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10999979078769684,
|
||
|
|
"step": 1930,
|
||
|
|
"valid_targets_mean": 8584.6,
|
||
|
|
"valid_targets_min": 1345
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.85728,
|
||
|
|
"grad_norm": 0.20259428439496813,
|
||
|
|
"learning_rate": 3.168730412375449e-05,
|
||
|
|
"loss": 0.3995,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14881809055805206,
|
||
|
|
"step": 1935,
|
||
|
|
"valid_targets_mean": 11891.6,
|
||
|
|
"valid_targets_min": 1747
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.86208,
|
||
|
|
"grad_norm": 0.20820724171847152,
|
||
|
|
"learning_rate": 3.1632869409931036e-05,
|
||
|
|
"loss": 0.4036,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12483834475278854,
|
||
|
|
"step": 1940,
|
||
|
|
"valid_targets_mean": 10321.6,
|
||
|
|
"valid_targets_min": 2655
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.86688,
|
||
|
|
"grad_norm": 0.2273426621166987,
|
||
|
|
"learning_rate": 3.1578304149456544e-05,
|
||
|
|
"loss": 0.3928,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1542552411556244,
|
||
|
|
"step": 1945,
|
||
|
|
"valid_targets_mean": 13354.8,
|
||
|
|
"valid_targets_min": 2049
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.87168,
|
||
|
|
"grad_norm": 0.22508915131462948,
|
||
|
|
"learning_rate": 3.1523608954674524e-05,
|
||
|
|
"loss": 0.3969,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14855341613292694,
|
||
|
|
"step": 1950,
|
||
|
|
"valid_targets_mean": 9999.3,
|
||
|
|
"valid_targets_min": 1416
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.87648,
|
||
|
|
"grad_norm": 0.21601715129994264,
|
||
|
|
"learning_rate": 3.1468784439386614e-05,
|
||
|
|
"loss": 0.4013,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1591084599494934,
|
||
|
|
"step": 1955,
|
||
|
|
"valid_targets_mean": 11878.5,
|
||
|
|
"valid_targets_min": 719
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8812799999999998,
|
||
|
|
"grad_norm": 0.23007947368682746,
|
||
|
|
"learning_rate": 3.141383121884576e-05,
|
||
|
|
"loss": 0.3955,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12612038850784302,
|
||
|
|
"step": 1960,
|
||
|
|
"valid_targets_mean": 10162.7,
|
||
|
|
"valid_targets_min": 1562
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.88608,
|
||
|
|
"grad_norm": 0.23316297834483646,
|
||
|
|
"learning_rate": 3.1358749909749214e-05,
|
||
|
|
"loss": 0.3952,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11835485696792603,
|
||
|
|
"step": 1965,
|
||
|
|
"valid_targets_mean": 9112.8,
|
||
|
|
"valid_targets_min": 2014
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8908800000000001,
|
||
|
|
"grad_norm": 0.2114115011904651,
|
||
|
|
"learning_rate": 3.1303541130231703e-05,
|
||
|
|
"loss": 0.3911,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13895109295845032,
|
||
|
|
"step": 1970,
|
||
|
|
"valid_targets_mean": 11622.1,
|
||
|
|
"valid_targets_min": 1624
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.89568,
|
||
|
|
"grad_norm": 0.22891961421868048,
|
||
|
|
"learning_rate": 3.1248205499858446e-05,
|
||
|
|
"loss": 0.3998,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1330220252275467,
|
||
|
|
"step": 1975,
|
||
|
|
"valid_targets_mean": 11612.4,
|
||
|
|
"valid_targets_min": 3255
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.90048,
|
||
|
|
"grad_norm": 0.19639013400517918,
|
||
|
|
"learning_rate": 3.119274363961821e-05,
|
||
|
|
"loss": 0.3964,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11368654668331146,
|
||
|
|
"step": 1980,
|
||
|
|
"valid_targets_mean": 10622.1,
|
||
|
|
"valid_targets_min": 1229
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9052799999999999,
|
||
|
|
"grad_norm": 0.21344525100358355,
|
||
|
|
"learning_rate": 3.113715617191634e-05,
|
||
|
|
"loss": 0.3965,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15114393830299377,
|
||
|
|
"step": 1985,
|
||
|
|
"valid_targets_mean": 12539.8,
|
||
|
|
"valid_targets_min": 1786
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.91008,
|
||
|
|
"grad_norm": 0.20445368901845684,
|
||
|
|
"learning_rate": 3.1081443720567785e-05,
|
||
|
|
"loss": 0.3953,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15004891157150269,
|
||
|
|
"step": 1990,
|
||
|
|
"valid_targets_mean": 11998.1,
|
||
|
|
"valid_targets_min": 3386
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9148800000000001,
|
||
|
|
"grad_norm": 0.19929437058584107,
|
||
|
|
"learning_rate": 3.102560691079007e-05,
|
||
|
|
"loss": 0.3915,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14125114679336548,
|
||
|
|
"step": 1995,
|
||
|
|
"valid_targets_mean": 9776.0,
|
||
|
|
"valid_targets_min": 3376
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.91968,
|
||
|
|
"grad_norm": 0.22575168479848515,
|
||
|
|
"learning_rate": 3.0969646369196307e-05,
|
||
|
|
"loss": 0.3937,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13779957592487335,
|
||
|
|
"step": 2000,
|
||
|
|
"valid_targets_mean": 10278.4,
|
||
|
|
"valid_targets_min": 2325
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.92448,
|
||
|
|
"grad_norm": 0.20409262515285168,
|
||
|
|
"learning_rate": 3.0913562723788174e-05,
|
||
|
|
"loss": 0.3942,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1357709765434265,
|
||
|
|
"step": 2005,
|
||
|
|
"valid_targets_mean": 10191.1,
|
||
|
|
"valid_targets_min": 1573
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9292799999999999,
|
||
|
|
"grad_norm": 0.2352276816410141,
|
||
|
|
"learning_rate": 3.085735660394881e-05,
|
||
|
|
"loss": 0.3905,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13631772994995117,
|
||
|
|
"step": 2010,
|
||
|
|
"valid_targets_mean": 10502.3,
|
||
|
|
"valid_targets_min": 322
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.93408,
|
||
|
|
"grad_norm": 0.2109305064204127,
|
||
|
|
"learning_rate": 3.080102864043581e-05,
|
||
|
|
"loss": 0.3884,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.137748584151268,
|
||
|
|
"step": 2015,
|
||
|
|
"valid_targets_mean": 10613.8,
|
||
|
|
"valid_targets_min": 3961
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9388800000000002,
|
||
|
|
"grad_norm": 0.2136610461341701,
|
||
|
|
"learning_rate": 3.074457946537413e-05,
|
||
|
|
"loss": 0.3881,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09953026473522186,
|
||
|
|
"step": 2020,
|
||
|
|
"valid_targets_mean": 8608.3,
|
||
|
|
"valid_targets_min": 2488
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.94368,
|
||
|
|
"grad_norm": 0.20513829600366332,
|
||
|
|
"learning_rate": 3.068800971224898e-05,
|
||
|
|
"loss": 0.3874,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13289940357208252,
|
||
|
|
"step": 2025,
|
||
|
|
"valid_targets_mean": 9401.7,
|
||
|
|
"valid_targets_min": 1490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.94848,
|
||
|
|
"grad_norm": 0.20197105825927517,
|
||
|
|
"learning_rate": 3.0631320015898735e-05,
|
||
|
|
"loss": 0.3915,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12039284408092499,
|
||
|
|
"step": 2030,
|
||
|
|
"valid_targets_mean": 10141.2,
|
||
|
|
"valid_targets_min": 3005
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.95328,
|
||
|
|
"grad_norm": 0.20795752182412858,
|
||
|
|
"learning_rate": 3.057451101250778e-05,
|
||
|
|
"loss": 0.3926,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12851007282733917,
|
||
|
|
"step": 2035,
|
||
|
|
"valid_targets_mean": 9688.7,
|
||
|
|
"valid_targets_min": 1591
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.95808,
|
||
|
|
"grad_norm": 0.21314778698152376,
|
||
|
|
"learning_rate": 3.051758333959941e-05,
|
||
|
|
"loss": 0.3962,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13587504625320435,
|
||
|
|
"step": 2040,
|
||
|
|
"valid_targets_mean": 9627.9,
|
||
|
|
"valid_targets_min": 2228
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.96288,
|
||
|
|
"grad_norm": 0.20482304692000647,
|
||
|
|
"learning_rate": 3.046053763602865e-05,
|
||
|
|
"loss": 0.3932,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12451982498168945,
|
||
|
|
"step": 2045,
|
||
|
|
"valid_targets_mean": 10732.7,
|
||
|
|
"valid_targets_min": 1660
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.96768,
|
||
|
|
"grad_norm": 0.22387606059097612,
|
||
|
|
"learning_rate": 3.0403374541975078e-05,
|
||
|
|
"loss": 0.3872,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13428357243537903,
|
||
|
|
"step": 2050,
|
||
|
|
"valid_targets_mean": 10365.8,
|
||
|
|
"valid_targets_min": 2933
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.97248,
|
||
|
|
"grad_norm": 0.1998358036134206,
|
||
|
|
"learning_rate": 3.034609469893567e-05,
|
||
|
|
"loss": 0.3946,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11720042675733566,
|
||
|
|
"step": 2055,
|
||
|
|
"valid_targets_mean": 9579.6,
|
||
|
|
"valid_targets_min": 1590
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.97728,
|
||
|
|
"grad_norm": 0.19831729145649243,
|
||
|
|
"learning_rate": 3.028869874971758e-05,
|
||
|
|
"loss": 0.3948,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12150508165359497,
|
||
|
|
"step": 2060,
|
||
|
|
"valid_targets_mean": 9771.9,
|
||
|
|
"valid_targets_min": 1538
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9820799999999998,
|
||
|
|
"grad_norm": 0.21459891896364366,
|
||
|
|
"learning_rate": 3.0231187338430944e-05,
|
||
|
|
"loss": 0.3881,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12095307558774948,
|
||
|
|
"step": 2065,
|
||
|
|
"valid_targets_mean": 8782.9,
|
||
|
|
"valid_targets_min": 1729
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.98688,
|
||
|
|
"grad_norm": 0.21508972922553282,
|
||
|
|
"learning_rate": 3.0173561110481606e-05,
|
||
|
|
"loss": 0.3944,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.142621248960495,
|
||
|
|
"step": 2070,
|
||
|
|
"valid_targets_mean": 11738.8,
|
||
|
|
"valid_targets_min": 2939
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9916800000000001,
|
||
|
|
"grad_norm": 0.2374644700039593,
|
||
|
|
"learning_rate": 3.011582071256394e-05,
|
||
|
|
"loss": 0.3939,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13136368989944458,
|
||
|
|
"step": 2075,
|
||
|
|
"valid_targets_mean": 9781.7,
|
||
|
|
"valid_targets_min": 1517
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.99648,
|
||
|
|
"grad_norm": 0.22812721698709595,
|
||
|
|
"learning_rate": 3.0057966792653547e-05,
|
||
|
|
"loss": 0.3918,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16073977947235107,
|
||
|
|
"step": 2080,
|
||
|
|
"valid_targets_mean": 11632.2,
|
||
|
|
"valid_targets_min": 1621
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.00096,
|
||
|
|
"grad_norm": 0.23520090782122327,
|
||
|
|
"learning_rate": 3.0000000000000004e-05,
|
||
|
|
"loss": 0.3982,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1233157068490982,
|
||
|
|
"step": 2085,
|
||
|
|
"valid_targets_mean": 9876.7,
|
||
|
|
"valid_targets_min": 1508
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.00576,
|
||
|
|
"grad_norm": 0.2216631791006346,
|
||
|
|
"learning_rate": 2.9941920985119562e-05,
|
||
|
|
"loss": 0.391,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13492241501808167,
|
||
|
|
"step": 2090,
|
||
|
|
"valid_targets_mean": 10056.4,
|
||
|
|
"valid_targets_min": 1290
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.01056,
|
||
|
|
"grad_norm": 0.2400982673308614,
|
||
|
|
"learning_rate": 2.988373039978786e-05,
|
||
|
|
"loss": 0.3832,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1363467276096344,
|
||
|
|
"step": 2095,
|
||
|
|
"valid_targets_mean": 11063.2,
|
||
|
|
"valid_targets_min": 3770
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.01536,
|
||
|
|
"grad_norm": 0.23156897134336338,
|
||
|
|
"learning_rate": 2.98254288970326e-05,
|
||
|
|
"loss": 0.3775,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09604047238826752,
|
||
|
|
"step": 2100,
|
||
|
|
"valid_targets_mean": 7552.9,
|
||
|
|
"valid_targets_min": 2284
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.02016,
|
||
|
|
"grad_norm": 0.20724384057820167,
|
||
|
|
"learning_rate": 2.9767017131126245e-05,
|
||
|
|
"loss": 0.3916,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10275719314813614,
|
||
|
|
"step": 2105,
|
||
|
|
"valid_targets_mean": 9190.3,
|
||
|
|
"valid_targets_min": 1996
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.02496,
|
||
|
|
"grad_norm": 0.1961879146915268,
|
||
|
|
"learning_rate": 2.9708495757578633e-05,
|
||
|
|
"loss": 0.3818,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14239154756069183,
|
||
|
|
"step": 2110,
|
||
|
|
"valid_targets_mean": 11486.5,
|
||
|
|
"valid_targets_min": 1604
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.02976,
|
||
|
|
"grad_norm": 0.2151407272702704,
|
||
|
|
"learning_rate": 2.964986543312964e-05,
|
||
|
|
"loss": 0.3796,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1200903132557869,
|
||
|
|
"step": 2115,
|
||
|
|
"valid_targets_mean": 9579.2,
|
||
|
|
"valid_targets_min": 1869
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.03456,
|
||
|
|
"grad_norm": 0.19765475611331146,
|
||
|
|
"learning_rate": 2.9591126815741832e-05,
|
||
|
|
"loss": 0.38,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10982204973697662,
|
||
|
|
"step": 2120,
|
||
|
|
"valid_targets_mean": 9895.5,
|
||
|
|
"valid_targets_min": 1146
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.03936,
|
||
|
|
"grad_norm": 0.2308073882459791,
|
||
|
|
"learning_rate": 2.953228056459305e-05,
|
||
|
|
"loss": 0.3831,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13495692610740662,
|
||
|
|
"step": 2125,
|
||
|
|
"valid_targets_mean": 11730.5,
|
||
|
|
"valid_targets_min": 3230
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.04416,
|
||
|
|
"grad_norm": 0.2210810128406193,
|
||
|
|
"learning_rate": 2.947332734006903e-05,
|
||
|
|
"loss": 0.3846,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.141627237200737,
|
||
|
|
"step": 2130,
|
||
|
|
"valid_targets_mean": 11017.2,
|
||
|
|
"valid_targets_min": 3168
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.04896,
|
||
|
|
"grad_norm": 0.22292894896010498,
|
||
|
|
"learning_rate": 2.9414267803755988e-05,
|
||
|
|
"loss": 0.3744,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1303427368402481,
|
||
|
|
"step": 2135,
|
||
|
|
"valid_targets_mean": 10870.1,
|
||
|
|
"valid_targets_min": 1314
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.05376,
|
||
|
|
"grad_norm": 0.21603225038870696,
|
||
|
|
"learning_rate": 2.9355102618433197e-05,
|
||
|
|
"loss": 0.3821,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12586045265197754,
|
||
|
|
"step": 2140,
|
||
|
|
"valid_targets_mean": 9648.2,
|
||
|
|
"valid_targets_min": 1780
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.05856,
|
||
|
|
"grad_norm": 0.19143697649106933,
|
||
|
|
"learning_rate": 2.929583244806553e-05,
|
||
|
|
"loss": 0.3827,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11953039467334747,
|
||
|
|
"step": 2145,
|
||
|
|
"valid_targets_mean": 10312.8,
|
||
|
|
"valid_targets_min": 1286
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.06336,
|
||
|
|
"grad_norm": 0.1926989550668403,
|
||
|
|
"learning_rate": 2.9236457957796047e-05,
|
||
|
|
"loss": 0.3949,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1427406370639801,
|
||
|
|
"step": 2150,
|
||
|
|
"valid_targets_mean": 12204.4,
|
||
|
|
"valid_targets_min": 3038
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0681599999999998,
|
||
|
|
"grad_norm": 0.20349926888856612,
|
||
|
|
"learning_rate": 2.9176979813938494e-05,
|
||
|
|
"loss": 0.3828,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13766226172447205,
|
||
|
|
"step": 2155,
|
||
|
|
"valid_targets_mean": 11886.8,
|
||
|
|
"valid_targets_min": 1810
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.07296,
|
||
|
|
"grad_norm": 0.20794478653140658,
|
||
|
|
"learning_rate": 2.9117398683969857e-05,
|
||
|
|
"loss": 0.3849,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1351398229598999,
|
||
|
|
"step": 2160,
|
||
|
|
"valid_targets_mean": 11531.9,
|
||
|
|
"valid_targets_min": 929
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.07776,
|
||
|
|
"grad_norm": 0.19642909203052808,
|
||
|
|
"learning_rate": 2.9057715236522833e-05,
|
||
|
|
"loss": 0.3783,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13603469729423523,
|
||
|
|
"step": 2165,
|
||
|
|
"valid_targets_mean": 10830.4,
|
||
|
|
"valid_targets_min": 1968
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.08256,
|
||
|
|
"grad_norm": 0.1924151874734592,
|
||
|
|
"learning_rate": 2.899793014137836e-05,
|
||
|
|
"loss": 0.3846,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12676164507865906,
|
||
|
|
"step": 2170,
|
||
|
|
"valid_targets_mean": 10274.2,
|
||
|
|
"valid_targets_min": 1836
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.08736,
|
||
|
|
"grad_norm": 0.20744477541749562,
|
||
|
|
"learning_rate": 2.8938044069458094e-05,
|
||
|
|
"loss": 0.3768,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1298026740550995,
|
||
|
|
"step": 2175,
|
||
|
|
"valid_targets_mean": 10860.8,
|
||
|
|
"valid_targets_min": 1674
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.09216,
|
||
|
|
"grad_norm": 0.21185656425668636,
|
||
|
|
"learning_rate": 2.8878057692816877e-05,
|
||
|
|
"loss": 0.3895,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13175363838672638,
|
||
|
|
"step": 2180,
|
||
|
|
"valid_targets_mean": 11086.3,
|
||
|
|
"valid_targets_min": 2251
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.09696,
|
||
|
|
"grad_norm": 0.2012977437878008,
|
||
|
|
"learning_rate": 2.8817971684635178e-05,
|
||
|
|
"loss": 0.3836,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10911810398101807,
|
||
|
|
"step": 2185,
|
||
|
|
"valid_targets_mean": 9224.0,
|
||
|
|
"valid_targets_min": 1876
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.10176,
|
||
|
|
"grad_norm": 0.20119464174885665,
|
||
|
|
"learning_rate": 2.8757786719211555e-05,
|
||
|
|
"loss": 0.3822,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12016467750072479,
|
||
|
|
"step": 2190,
|
||
|
|
"valid_targets_mean": 9766.5,
|
||
|
|
"valid_targets_min": 2315
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.10656,
|
||
|
|
"grad_norm": 0.2103441930729234,
|
||
|
|
"learning_rate": 2.8697503471955106e-05,
|
||
|
|
"loss": 0.3733,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10794928669929504,
|
||
|
|
"step": 2195,
|
||
|
|
"valid_targets_mean": 9752.1,
|
||
|
|
"valid_targets_min": 1734
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.11136,
|
||
|
|
"grad_norm": 0.1973128218861176,
|
||
|
|
"learning_rate": 2.8637122619377848e-05,
|
||
|
|
"loss": 0.3805,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13639453053474426,
|
||
|
|
"step": 2200,
|
||
|
|
"valid_targets_mean": 10859.4,
|
||
|
|
"valid_targets_min": 374
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.11616,
|
||
|
|
"grad_norm": 0.21955743114852744,
|
||
|
|
"learning_rate": 2.8576644839087152e-05,
|
||
|
|
"loss": 0.3812,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13809733092784882,
|
||
|
|
"step": 2205,
|
||
|
|
"valid_targets_mean": 9867.9,
|
||
|
|
"valid_targets_min": 1767
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.12096,
|
||
|
|
"grad_norm": 0.2068750644897939,
|
||
|
|
"learning_rate": 2.8516070809778145e-05,
|
||
|
|
"loss": 0.3814,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10828588157892227,
|
||
|
|
"step": 2210,
|
||
|
|
"valid_targets_mean": 10207.7,
|
||
|
|
"valid_targets_min": 2529
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.12576,
|
||
|
|
"grad_norm": 0.20719191927343455,
|
||
|
|
"learning_rate": 2.845540121122607e-05,
|
||
|
|
"loss": 0.385,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12375476211309433,
|
||
|
|
"step": 2215,
|
||
|
|
"valid_targets_mean": 10426.1,
|
||
|
|
"valid_targets_min": 1358
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.13056,
|
||
|
|
"grad_norm": 0.2039973480587662,
|
||
|
|
"learning_rate": 2.839463672427867e-05,
|
||
|
|
"loss": 0.3835,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1495930552482605,
|
||
|
|
"step": 2220,
|
||
|
|
"valid_targets_mean": 11969.6,
|
||
|
|
"valid_targets_min": 2805
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.13536,
|
||
|
|
"grad_norm": 0.18986775376722906,
|
||
|
|
"learning_rate": 2.833377803084855e-05,
|
||
|
|
"loss": 0.3856,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12233369052410126,
|
||
|
|
"step": 2225,
|
||
|
|
"valid_targets_mean": 11053.6,
|
||
|
|
"valid_targets_min": 3083
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.14016,
|
||
|
|
"grad_norm": 0.2278637593281419,
|
||
|
|
"learning_rate": 2.8272825813905522e-05,
|
||
|
|
"loss": 0.3819,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1278565227985382,
|
||
|
|
"step": 2230,
|
||
|
|
"valid_targets_mean": 10910.4,
|
||
|
|
"valid_targets_min": 3721
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.14496,
|
||
|
|
"grad_norm": 0.20280471378821244,
|
||
|
|
"learning_rate": 2.8211780757468942e-05,
|
||
|
|
"loss": 0.3809,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12041682004928589,
|
||
|
|
"step": 2235,
|
||
|
|
"valid_targets_mean": 10993.0,
|
||
|
|
"valid_targets_min": 2153
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.14976,
|
||
|
|
"grad_norm": 0.19991212575067607,
|
||
|
|
"learning_rate": 2.8150643546600012e-05,
|
||
|
|
"loss": 0.3782,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11163853108882904,
|
||
|
|
"step": 2240,
|
||
|
|
"valid_targets_mean": 9899.4,
|
||
|
|
"valid_targets_min": 1843
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.15456,
|
||
|
|
"grad_norm": 0.2632475734932872,
|
||
|
|
"learning_rate": 2.808941486739414e-05,
|
||
|
|
"loss": 0.3811,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13694193959236145,
|
||
|
|
"step": 2245,
|
||
|
|
"valid_targets_mean": 11136.0,
|
||
|
|
"valid_targets_min": 2839
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.15936,
|
||
|
|
"grad_norm": 0.21727180359137785,
|
||
|
|
"learning_rate": 2.80280954069732e-05,
|
||
|
|
"loss": 0.3854,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11582939326763153,
|
||
|
|
"step": 2250,
|
||
|
|
"valid_targets_mean": 8610.1,
|
||
|
|
"valid_targets_min": 2262
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.16416,
|
||
|
|
"grad_norm": 0.22481090765530318,
|
||
|
|
"learning_rate": 2.7966685853477828e-05,
|
||
|
|
"loss": 0.3782,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12704181671142578,
|
||
|
|
"step": 2255,
|
||
|
|
"valid_targets_mean": 10174.8,
|
||
|
|
"valid_targets_min": 2123
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.16896,
|
||
|
|
"grad_norm": 0.19369274940731798,
|
||
|
|
"learning_rate": 2.790518689605971e-05,
|
||
|
|
"loss": 0.3842,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12195196747779846,
|
||
|
|
"step": 2260,
|
||
|
|
"valid_targets_mean": 10902.6,
|
||
|
|
"valid_targets_min": 2382
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.17376,
|
||
|
|
"grad_norm": 0.19672184157117886,
|
||
|
|
"learning_rate": 2.7843599224873833e-05,
|
||
|
|
"loss": 0.3836,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1163254827260971,
|
||
|
|
"step": 2265,
|
||
|
|
"valid_targets_mean": 9992.1,
|
||
|
|
"valid_targets_min": 2825
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.17856,
|
||
|
|
"grad_norm": 0.19723613015471497,
|
||
|
|
"learning_rate": 2.7781923531070775e-05,
|
||
|
|
"loss": 0.3809,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11363483965396881,
|
||
|
|
"step": 2270,
|
||
|
|
"valid_targets_mean": 9348.0,
|
||
|
|
"valid_targets_min": 2631
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.18336,
|
||
|
|
"grad_norm": 0.2116943766512944,
|
||
|
|
"learning_rate": 2.7720160506788896e-05,
|
||
|
|
"loss": 0.3781,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11357665807008743,
|
||
|
|
"step": 2275,
|
||
|
|
"valid_targets_mean": 9558.6,
|
||
|
|
"valid_targets_min": 1989
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.18816,
|
||
|
|
"grad_norm": 0.20054780631886765,
|
||
|
|
"learning_rate": 2.7658310845146598e-05,
|
||
|
|
"loss": 0.3808,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13914844393730164,
|
||
|
|
"step": 2280,
|
||
|
|
"valid_targets_mean": 11377.4,
|
||
|
|
"valid_targets_min": 4677
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.19296,
|
||
|
|
"grad_norm": 0.22068280866813472,
|
||
|
|
"learning_rate": 2.7596375240234574e-05,
|
||
|
|
"loss": 0.3746,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12389899790287018,
|
||
|
|
"step": 2285,
|
||
|
|
"valid_targets_mean": 10821.3,
|
||
|
|
"valid_targets_min": 2024
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.19776,
|
||
|
|
"grad_norm": 0.18781808934850716,
|
||
|
|
"learning_rate": 2.7534354387107975e-05,
|
||
|
|
"loss": 0.3762,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11634353548288345,
|
||
|
|
"step": 2290,
|
||
|
|
"valid_targets_mean": 10232.5,
|
||
|
|
"valid_targets_min": 2110
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.20256,
|
||
|
|
"grad_norm": 0.2010597215182537,
|
||
|
|
"learning_rate": 2.747224898177862e-05,
|
||
|
|
"loss": 0.3889,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12369689345359802,
|
||
|
|
"step": 2295,
|
||
|
|
"valid_targets_mean": 9952.7,
|
||
|
|
"valid_targets_min": 1838
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.20736,
|
||
|
|
"grad_norm": 0.21868349840227744,
|
||
|
|
"learning_rate": 2.7410059721207187e-05,
|
||
|
|
"loss": 0.3768,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11973851174116135,
|
||
|
|
"step": 2300,
|
||
|
|
"valid_targets_mean": 9082.8,
|
||
|
|
"valid_targets_min": 2068
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.21216,
|
||
|
|
"grad_norm": 0.20931814759974568,
|
||
|
|
"learning_rate": 2.734778730329543e-05,
|
||
|
|
"loss": 0.3826,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11982230842113495,
|
||
|
|
"step": 2305,
|
||
|
|
"valid_targets_mean": 9954.4,
|
||
|
|
"valid_targets_min": 1498
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.21696,
|
||
|
|
"grad_norm": 0.23696165400189556,
|
||
|
|
"learning_rate": 2.7285432426878275e-05,
|
||
|
|
"loss": 0.3806,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12035034596920013,
|
||
|
|
"step": 2310,
|
||
|
|
"valid_targets_mean": 10853.3,
|
||
|
|
"valid_targets_min": 1405
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.22176,
|
||
|
|
"grad_norm": 0.19598291857765024,
|
||
|
|
"learning_rate": 2.7222995791716034e-05,
|
||
|
|
"loss": 0.3816,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14030274748802185,
|
||
|
|
"step": 2315,
|
||
|
|
"valid_targets_mean": 11407.2,
|
||
|
|
"valid_targets_min": 1318
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.22656,
|
||
|
|
"grad_norm": 0.21215296796191113,
|
||
|
|
"learning_rate": 2.716047809848653e-05,
|
||
|
|
"loss": 0.3775,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13955843448638916,
|
||
|
|
"step": 2320,
|
||
|
|
"valid_targets_mean": 10772.2,
|
||
|
|
"valid_targets_min": 1950
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.23136,
|
||
|
|
"grad_norm": 0.18886797079153328,
|
||
|
|
"learning_rate": 2.7097880048777238e-05,
|
||
|
|
"loss": 0.3865,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12690003216266632,
|
||
|
|
"step": 2325,
|
||
|
|
"valid_targets_mean": 10627.6,
|
||
|
|
"valid_targets_min": 2055
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.23616,
|
||
|
|
"grad_norm": 0.19135950944457367,
|
||
|
|
"learning_rate": 2.703520234507742e-05,
|
||
|
|
"loss": 0.3762,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1010352373123169,
|
||
|
|
"step": 2330,
|
||
|
|
"valid_targets_mean": 9337.2,
|
||
|
|
"valid_targets_min": 1811
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.24096,
|
||
|
|
"grad_norm": 0.2055854776241585,
|
||
|
|
"learning_rate": 2.697244569077021e-05,
|
||
|
|
"loss": 0.374,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13945433497428894,
|
||
|
|
"step": 2335,
|
||
|
|
"valid_targets_mean": 10882.8,
|
||
|
|
"valid_targets_min": 2564
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.24576,
|
||
|
|
"grad_norm": 0.190056204731367,
|
||
|
|
"learning_rate": 2.6909610790124772e-05,
|
||
|
|
"loss": 0.3735,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11307604610919952,
|
||
|
|
"step": 2340,
|
||
|
|
"valid_targets_mean": 9797.4,
|
||
|
|
"valid_targets_min": 2176
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.25056,
|
||
|
|
"grad_norm": 0.19722871848427742,
|
||
|
|
"learning_rate": 2.684669834828835e-05,
|
||
|
|
"loss": 0.3751,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11232331395149231,
|
||
|
|
"step": 2345,
|
||
|
|
"valid_targets_mean": 10427.9,
|
||
|
|
"valid_targets_min": 1359
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.25536,
|
||
|
|
"grad_norm": 0.20080194987727418,
|
||
|
|
"learning_rate": 2.6783709071278372e-05,
|
||
|
|
"loss": 0.3808,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1365032196044922,
|
||
|
|
"step": 2350,
|
||
|
|
"valid_targets_mean": 10621.3,
|
||
|
|
"valid_targets_min": 1238
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.26016,
|
||
|
|
"grad_norm": 0.19001922126213333,
|
||
|
|
"learning_rate": 2.6720643665974522e-05,
|
||
|
|
"loss": 0.3845,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12681028246879578,
|
||
|
|
"step": 2355,
|
||
|
|
"valid_targets_mean": 9610.5,
|
||
|
|
"valid_targets_min": 1365
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.26496,
|
||
|
|
"grad_norm": 0.19450897157350197,
|
||
|
|
"learning_rate": 2.665750284011085e-05,
|
||
|
|
"loss": 0.3767,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10528913885354996,
|
||
|
|
"step": 2360,
|
||
|
|
"valid_targets_mean": 8842.7,
|
||
|
|
"valid_targets_min": 1482
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2697599999999998,
|
||
|
|
"grad_norm": 0.20499479594100586,
|
||
|
|
"learning_rate": 2.6594287302267744e-05,
|
||
|
|
"loss": 0.3815,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11198015511035919,
|
||
|
|
"step": 2365,
|
||
|
|
"valid_targets_mean": 8527.8,
|
||
|
|
"valid_targets_min": 1726
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.27456,
|
||
|
|
"grad_norm": 0.21729149125673355,
|
||
|
|
"learning_rate": 2.653099776186405e-05,
|
||
|
|
"loss": 0.3853,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11937613785266876,
|
||
|
|
"step": 2370,
|
||
|
|
"valid_targets_mean": 10422.3,
|
||
|
|
"valid_targets_min": 2676
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.27936,
|
||
|
|
"grad_norm": 0.23936814043935242,
|
||
|
|
"learning_rate": 2.646763492914908e-05,
|
||
|
|
"loss": 0.3803,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1370958685874939,
|
||
|
|
"step": 2375,
|
||
|
|
"valid_targets_mean": 11603.8,
|
||
|
|
"valid_targets_min": 2639
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.28416,
|
||
|
|
"grad_norm": 0.226255238554012,
|
||
|
|
"learning_rate": 2.640419951519467e-05,
|
||
|
|
"loss": 0.3843,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13546521961688995,
|
||
|
|
"step": 2380,
|
||
|
|
"valid_targets_mean": 10853.2,
|
||
|
|
"valid_targets_min": 2467
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.28896,
|
||
|
|
"grad_norm": 0.21000459214162845,
|
||
|
|
"learning_rate": 2.634069223188715e-05,
|
||
|
|
"loss": 0.3763,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1364605724811554,
|
||
|
|
"step": 2385,
|
||
|
|
"valid_targets_mean": 10490.6,
|
||
|
|
"valid_targets_min": 3184
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.29376,
|
||
|
|
"grad_norm": 0.21053819505844917,
|
||
|
|
"learning_rate": 2.627711379191939e-05,
|
||
|
|
"loss": 0.3796,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13308705389499664,
|
||
|
|
"step": 2390,
|
||
|
|
"valid_targets_mean": 10670.0,
|
||
|
|
"valid_targets_min": 2594
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.29856,
|
||
|
|
"grad_norm": 0.1968976461253721,
|
||
|
|
"learning_rate": 2.621346490878281e-05,
|
||
|
|
"loss": 0.3815,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12212714552879333,
|
||
|
|
"step": 2395,
|
||
|
|
"valid_targets_mean": 9727.0,
|
||
|
|
"valid_targets_min": 1864
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.30336,
|
||
|
|
"grad_norm": 0.20200418718795113,
|
||
|
|
"learning_rate": 2.614974629675935e-05,
|
||
|
|
"loss": 0.3859,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13653194904327393,
|
||
|
|
"step": 2400,
|
||
|
|
"valid_targets_mean": 10758.1,
|
||
|
|
"valid_targets_min": 3469
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.30816,
|
||
|
|
"grad_norm": 0.2133100603107007,
|
||
|
|
"learning_rate": 2.608595867091346e-05,
|
||
|
|
"loss": 0.3715,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12559379637241364,
|
||
|
|
"step": 2405,
|
||
|
|
"valid_targets_mean": 10498.1,
|
||
|
|
"valid_targets_min": 1046
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.31296,
|
||
|
|
"grad_norm": 0.2033345530571247,
|
||
|
|
"learning_rate": 2.6022102747084084e-05,
|
||
|
|
"loss": 0.3815,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1133851408958435,
|
||
|
|
"step": 2410,
|
||
|
|
"valid_targets_mean": 8877.1,
|
||
|
|
"valid_targets_min": 1354
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.31776,
|
||
|
|
"grad_norm": 0.2152577606604185,
|
||
|
|
"learning_rate": 2.595817924187663e-05,
|
||
|
|
"loss": 0.3734,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1213955357670784,
|
||
|
|
"step": 2415,
|
||
|
|
"valid_targets_mean": 10695.1,
|
||
|
|
"valid_targets_min": 3535
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.32256,
|
||
|
|
"grad_norm": 0.22398470485476252,
|
||
|
|
"learning_rate": 2.589418887265489e-05,
|
||
|
|
"loss": 0.3748,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11093949526548386,
|
||
|
|
"step": 2420,
|
||
|
|
"valid_targets_mean": 9716.8,
|
||
|
|
"valid_targets_min": 2130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.32736,
|
||
|
|
"grad_norm": 0.1980562737542359,
|
||
|
|
"learning_rate": 2.5830132357533044e-05,
|
||
|
|
"loss": 0.3764,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12728214263916016,
|
||
|
|
"step": 2425,
|
||
|
|
"valid_targets_mean": 10986.3,
|
||
|
|
"valid_targets_min": 2171
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.33216,
|
||
|
|
"grad_norm": 0.22230894654473113,
|
||
|
|
"learning_rate": 2.5766010415367567e-05,
|
||
|
|
"loss": 0.3865,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11757129430770874,
|
||
|
|
"step": 2430,
|
||
|
|
"valid_targets_mean": 10236.9,
|
||
|
|
"valid_targets_min": 1959
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.33696,
|
||
|
|
"grad_norm": 0.20767824848398078,
|
||
|
|
"learning_rate": 2.5701823765749187e-05,
|
||
|
|
"loss": 0.3733,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10535033047199249,
|
||
|
|
"step": 2435,
|
||
|
|
"valid_targets_mean": 10114.5,
|
||
|
|
"valid_targets_min": 3175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.34176,
|
||
|
|
"grad_norm": 0.20058624194170693,
|
||
|
|
"learning_rate": 2.563757312899477e-05,
|
||
|
|
"loss": 0.3757,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11982904374599457,
|
||
|
|
"step": 2440,
|
||
|
|
"valid_targets_mean": 9704.2,
|
||
|
|
"valid_targets_min": 1319
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.34656,
|
||
|
|
"grad_norm": 0.21452305848338077,
|
||
|
|
"learning_rate": 2.557325922613926e-05,
|
||
|
|
"loss": 0.3742,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.136878103017807,
|
||
|
|
"step": 2445,
|
||
|
|
"valid_targets_mean": 11417.2,
|
||
|
|
"valid_targets_min": 4316
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.35136,
|
||
|
|
"grad_norm": 0.20622376969404346,
|
||
|
|
"learning_rate": 2.5508882778927615e-05,
|
||
|
|
"loss": 0.3659,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09706181287765503,
|
||
|
|
"step": 2450,
|
||
|
|
"valid_targets_mean": 9376.6,
|
||
|
|
"valid_targets_min": 2567
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.35616,
|
||
|
|
"grad_norm": 0.2269780856123984,
|
||
|
|
"learning_rate": 2.5444444509806654e-05,
|
||
|
|
"loss": 0.3619,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10020601749420166,
|
||
|
|
"step": 2455,
|
||
|
|
"valid_targets_mean": 8076.2,
|
||
|
|
"valid_targets_min": 2098
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.36096,
|
||
|
|
"grad_norm": 0.20946019964551488,
|
||
|
|
"learning_rate": 2.5379945141916976e-05,
|
||
|
|
"loss": 0.3733,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09800328314304352,
|
||
|
|
"step": 2460,
|
||
|
|
"valid_targets_mean": 8551.6,
|
||
|
|
"valid_targets_min": 1660
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.36576,
|
||
|
|
"grad_norm": 0.2008350967896554,
|
||
|
|
"learning_rate": 2.531538539908486e-05,
|
||
|
|
"loss": 0.3679,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12654729187488556,
|
||
|
|
"step": 2465,
|
||
|
|
"valid_targets_mean": 10316.7,
|
||
|
|
"valid_targets_min": 1557
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3705600000000002,
|
||
|
|
"grad_norm": 0.2229363972372055,
|
||
|
|
"learning_rate": 2.5250766005814108e-05,
|
||
|
|
"loss": 0.3686,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12344948947429657,
|
||
|
|
"step": 2470,
|
||
|
|
"valid_targets_mean": 10335.5,
|
||
|
|
"valid_targets_min": 2896
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.37536,
|
||
|
|
"grad_norm": 0.21842342622258043,
|
||
|
|
"learning_rate": 2.5186087687277956e-05,
|
||
|
|
"loss": 0.373,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11907872557640076,
|
||
|
|
"step": 2475,
|
||
|
|
"valid_targets_mean": 10898.4,
|
||
|
|
"valid_targets_min": 2757
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.38016,
|
||
|
|
"grad_norm": 0.21743760724252226,
|
||
|
|
"learning_rate": 2.5121351169310887e-05,
|
||
|
|
"loss": 0.3763,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14003226161003113,
|
||
|
|
"step": 2480,
|
||
|
|
"valid_targets_mean": 11164.4,
|
||
|
|
"valid_targets_min": 2369
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.38496,
|
||
|
|
"grad_norm": 0.254434215027223,
|
||
|
|
"learning_rate": 2.505655717840052e-05,
|
||
|
|
"loss": 0.3738,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13954034447669983,
|
||
|
|
"step": 2485,
|
||
|
|
"valid_targets_mean": 11127.9,
|
||
|
|
"valid_targets_min": 2012
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.38976,
|
||
|
|
"grad_norm": 0.23856981677013078,
|
||
|
|
"learning_rate": 2.499170644167946e-05,
|
||
|
|
"loss": 0.3703,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11923474073410034,
|
||
|
|
"step": 2490,
|
||
|
|
"valid_targets_mean": 9402.1,
|
||
|
|
"valid_targets_min": 2200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3945600000000002,
|
||
|
|
"grad_norm": 0.2042522121463096,
|
||
|
|
"learning_rate": 2.49267996869171e-05,
|
||
|
|
"loss": 0.3666,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12613581120967865,
|
||
|
|
"step": 2495,
|
||
|
|
"valid_targets_mean": 11327.1,
|
||
|
|
"valid_targets_min": 2319
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.39936,
|
||
|
|
"grad_norm": 0.2169067842374301,
|
||
|
|
"learning_rate": 2.486183764251151e-05,
|
||
|
|
"loss": 0.376,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11283647269010544,
|
||
|
|
"step": 2500,
|
||
|
|
"valid_targets_mean": 10245.0,
|
||
|
|
"valid_targets_min": 316
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.40416,
|
||
|
|
"grad_norm": 0.20640537266510697,
|
||
|
|
"learning_rate": 2.4796821037481215e-05,
|
||
|
|
"loss": 0.3689,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11287974566221237,
|
||
|
|
"step": 2505,
|
||
|
|
"valid_targets_mean": 8545.9,
|
||
|
|
"valid_targets_min": 1182
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.40896,
|
||
|
|
"grad_norm": 0.2142294699914048,
|
||
|
|
"learning_rate": 2.473175060145703e-05,
|
||
|
|
"loss": 0.3627,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11034555733203888,
|
||
|
|
"step": 2510,
|
||
|
|
"valid_targets_mean": 8637.3,
|
||
|
|
"valid_targets_min": 2107
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.41376,
|
||
|
|
"grad_norm": 0.20836784046085266,
|
||
|
|
"learning_rate": 2.4666627064673892e-05,
|
||
|
|
"loss": 0.3766,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12005623430013657,
|
||
|
|
"step": 2515,
|
||
|
|
"valid_targets_mean": 10009.1,
|
||
|
|
"valid_targets_min": 2183
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.41856,
|
||
|
|
"grad_norm": 0.21472818748145794,
|
||
|
|
"learning_rate": 2.4601451157962616e-05,
|
||
|
|
"loss": 0.3703,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1286231279373169,
|
||
|
|
"step": 2520,
|
||
|
|
"valid_targets_mean": 11829.6,
|
||
|
|
"valid_targets_min": 3279
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.42336,
|
||
|
|
"grad_norm": 0.20869745151391522,
|
||
|
|
"learning_rate": 2.4536223612741754e-05,
|
||
|
|
"loss": 0.3701,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12529705464839935,
|
||
|
|
"step": 2525,
|
||
|
|
"valid_targets_mean": 10626.5,
|
||
|
|
"valid_targets_min": 279
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.42816,
|
||
|
|
"grad_norm": 0.19164417929518518,
|
||
|
|
"learning_rate": 2.447094516100934e-05,
|
||
|
|
"loss": 0.3838,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1397651880979538,
|
||
|
|
"step": 2530,
|
||
|
|
"valid_targets_mean": 12536.2,
|
||
|
|
"valid_targets_min": 1225
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.43296,
|
||
|
|
"grad_norm": 0.2036217280117751,
|
||
|
|
"learning_rate": 2.4405616535334695e-05,
|
||
|
|
"loss": 0.3671,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12778422236442566,
|
||
|
|
"step": 2535,
|
||
|
|
"valid_targets_mean": 11081.4,
|
||
|
|
"valid_targets_min": 2051
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.43776,
|
||
|
|
"grad_norm": 0.2061723618850725,
|
||
|
|
"learning_rate": 2.43402384688502e-05,
|
||
|
|
"loss": 0.3669,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1277051866054535,
|
||
|
|
"step": 2540,
|
||
|
|
"valid_targets_mean": 11323.4,
|
||
|
|
"valid_targets_min": 2471
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.44256,
|
||
|
|
"grad_norm": 0.19889048966025744,
|
||
|
|
"learning_rate": 2.4274811695243085e-05,
|
||
|
|
"loss": 0.3601,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11608883738517761,
|
||
|
|
"step": 2545,
|
||
|
|
"valid_targets_mean": 9306.0,
|
||
|
|
"valid_targets_min": 2322
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4473599999999998,
|
||
|
|
"grad_norm": 0.24626904970503338,
|
||
|
|
"learning_rate": 2.4209336948747168e-05,
|
||
|
|
"loss": 0.3759,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12030565738677979,
|
||
|
|
"step": 2550,
|
||
|
|
"valid_targets_mean": 11547.6,
|
||
|
|
"valid_targets_min": 1259
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.45216,
|
||
|
|
"grad_norm": 0.2306461738859341,
|
||
|
|
"learning_rate": 2.414381496413464e-05,
|
||
|
|
"loss": 0.3813,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10795653611421585,
|
||
|
|
"step": 2555,
|
||
|
|
"valid_targets_mean": 9398.9,
|
||
|
|
"valid_targets_min": 1431
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.45696,
|
||
|
|
"grad_norm": 0.2080435328672805,
|
||
|
|
"learning_rate": 2.4078246476707793e-05,
|
||
|
|
"loss": 0.3675,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12084488570690155,
|
||
|
|
"step": 2560,
|
||
|
|
"valid_targets_mean": 12174.3,
|
||
|
|
"valid_targets_min": 3205
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.46176,
|
||
|
|
"grad_norm": 0.2061173208068871,
|
||
|
|
"learning_rate": 2.4012632222290802e-05,
|
||
|
|
"loss": 0.3625,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12514223158359528,
|
||
|
|
"step": 2565,
|
||
|
|
"valid_targets_mean": 9897.4,
|
||
|
|
"valid_targets_min": 1837
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.46656,
|
||
|
|
"grad_norm": 0.20556503982483745,
|
||
|
|
"learning_rate": 2.3946972937221444e-05,
|
||
|
|
"loss": 0.36,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11990512907505035,
|
||
|
|
"step": 2570,
|
||
|
|
"valid_targets_mean": 9661.3,
|
||
|
|
"valid_targets_min": 1838
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.47136,
|
||
|
|
"grad_norm": 0.19615449425752077,
|
||
|
|
"learning_rate": 2.3881269358342828e-05,
|
||
|
|
"loss": 0.3701,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1309790462255478,
|
||
|
|
"step": 2575,
|
||
|
|
"valid_targets_mean": 12405.8,
|
||
|
|
"valid_targets_min": 3413
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.47616,
|
||
|
|
"grad_norm": 0.20553075982273802,
|
||
|
|
"learning_rate": 2.3815522222995158e-05,
|
||
|
|
"loss": 0.3682,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1276824176311493,
|
||
|
|
"step": 2580,
|
||
|
|
"valid_targets_mean": 10941.6,
|
||
|
|
"valid_targets_min": 2899
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.48096,
|
||
|
|
"grad_norm": 0.20756905659976432,
|
||
|
|
"learning_rate": 2.3749732269007427e-05,
|
||
|
|
"loss": 0.371,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13721966743469238,
|
||
|
|
"step": 2585,
|
||
|
|
"valid_targets_mean": 11002.2,
|
||
|
|
"valid_targets_min": 3037
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.48576,
|
||
|
|
"grad_norm": 0.2089986855086161,
|
||
|
|
"learning_rate": 2.3683900234689142e-05,
|
||
|
|
"loss": 0.3719,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10542646795511246,
|
||
|
|
"step": 2590,
|
||
|
|
"valid_targets_mean": 9137.1,
|
||
|
|
"valid_targets_min": 453
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.49056,
|
||
|
|
"grad_norm": 0.19975431503444246,
|
||
|
|
"learning_rate": 2.3618026858822054e-05,
|
||
|
|
"loss": 0.3645,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14532935619354248,
|
||
|
|
"step": 2595,
|
||
|
|
"valid_targets_mean": 11967.5,
|
||
|
|
"valid_targets_min": 2433
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.49536,
|
||
|
|
"grad_norm": 0.20669335366995573,
|
||
|
|
"learning_rate": 2.355211288065187e-05,
|
||
|
|
"loss": 0.3674,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12231142818927765,
|
||
|
|
"step": 2600,
|
||
|
|
"valid_targets_mean": 10766.8,
|
||
|
|
"valid_targets_min": 2512
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.50016,
|
||
|
|
"grad_norm": 0.20403508545321805,
|
||
|
|
"learning_rate": 2.348615903987991e-05,
|
||
|
|
"loss": 0.362,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11272934824228287,
|
||
|
|
"step": 2605,
|
||
|
|
"valid_targets_mean": 9697.4,
|
||
|
|
"valid_targets_min": 906
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.50496,
|
||
|
|
"grad_norm": 0.19991166546341962,
|
||
|
|
"learning_rate": 2.3420166076654873e-05,
|
||
|
|
"loss": 0.3682,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11372819542884827,
|
||
|
|
"step": 2610,
|
||
|
|
"valid_targets_mean": 9772.1,
|
||
|
|
"valid_targets_min": 1562
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.50976,
|
||
|
|
"grad_norm": 0.21927831366966014,
|
||
|
|
"learning_rate": 2.335413473156449e-05,
|
||
|
|
"loss": 0.3752,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11094687879085541,
|
||
|
|
"step": 2615,
|
||
|
|
"valid_targets_mean": 9214.7,
|
||
|
|
"valid_targets_min": 1582
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.51456,
|
||
|
|
"grad_norm": 0.2055659911333355,
|
||
|
|
"learning_rate": 2.328806574562722e-05,
|
||
|
|
"loss": 0.3653,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1251363754272461,
|
||
|
|
"step": 2620,
|
||
|
|
"valid_targets_mean": 10513.7,
|
||
|
|
"valid_targets_min": 2205
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.51936,
|
||
|
|
"grad_norm": 0.2025013690361369,
|
||
|
|
"learning_rate": 2.322195986028393e-05,
|
||
|
|
"loss": 0.374,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13312791287899017,
|
||
|
|
"step": 2625,
|
||
|
|
"valid_targets_mean": 9986.3,
|
||
|
|
"valid_targets_min": 1383
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.52416,
|
||
|
|
"grad_norm": 0.19390979413109546,
|
||
|
|
"learning_rate": 2.315581781738959e-05,
|
||
|
|
"loss": 0.3608,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10706675052642822,
|
||
|
|
"step": 2630,
|
||
|
|
"valid_targets_mean": 10572.9,
|
||
|
|
"valid_targets_min": 3387
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.52896,
|
||
|
|
"grad_norm": 0.21748660946987688,
|
||
|
|
"learning_rate": 2.3089640359204937e-05,
|
||
|
|
"loss": 0.3659,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12551315128803253,
|
||
|
|
"step": 2635,
|
||
|
|
"valid_targets_mean": 10573.0,
|
||
|
|
"valid_targets_min": 2608
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.53376,
|
||
|
|
"grad_norm": 0.19552172792861605,
|
||
|
|
"learning_rate": 2.3023428228388144e-05,
|
||
|
|
"loss": 0.3645,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11490911990404129,
|
||
|
|
"step": 2640,
|
||
|
|
"valid_targets_mean": 9536.7,
|
||
|
|
"valid_targets_min": 1710
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.53856,
|
||
|
|
"grad_norm": 0.2141206136144874,
|
||
|
|
"learning_rate": 2.2957182167986486e-05,
|
||
|
|
"loss": 0.3696,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11893200129270554,
|
||
|
|
"step": 2645,
|
||
|
|
"valid_targets_mean": 10167.2,
|
||
|
|
"valid_targets_min": 1590
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.54336,
|
||
|
|
"grad_norm": 0.20927258265813803,
|
||
|
|
"learning_rate": 2.2890902921428004e-05,
|
||
|
|
"loss": 0.3586,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11703682690858841,
|
||
|
|
"step": 2650,
|
||
|
|
"valid_targets_mean": 10636.1,
|
||
|
|
"valid_targets_min": 2815
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.54816,
|
||
|
|
"grad_norm": 0.1970167456568604,
|
||
|
|
"learning_rate": 2.2824591232513153e-05,
|
||
|
|
"loss": 0.3678,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10874166339635849,
|
||
|
|
"step": 2655,
|
||
|
|
"valid_targets_mean": 10392.5,
|
||
|
|
"valid_targets_min": 2435
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.55296,
|
||
|
|
"grad_norm": 0.20392076385963556,
|
||
|
|
"learning_rate": 2.2758247845406495e-05,
|
||
|
|
"loss": 0.3697,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12093451619148254,
|
||
|
|
"step": 2660,
|
||
|
|
"valid_targets_mean": 10889.5,
|
||
|
|
"valid_targets_min": 1734
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.55776,
|
||
|
|
"grad_norm": 0.18913518596868675,
|
||
|
|
"learning_rate": 2.2691873504628282e-05,
|
||
|
|
"loss": 0.3578,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10009606927633286,
|
||
|
|
"step": 2665,
|
||
|
|
"valid_targets_mean": 10179.2,
|
||
|
|
"valid_targets_min": 3734
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.56256,
|
||
|
|
"grad_norm": 0.22753404829816876,
|
||
|
|
"learning_rate": 2.2625468955046143e-05,
|
||
|
|
"loss": 0.3733,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13020503520965576,
|
||
|
|
"step": 2670,
|
||
|
|
"valid_targets_mean": 10354.9,
|
||
|
|
"valid_targets_min": 2023
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.56736,
|
||
|
|
"grad_norm": 0.2130956888250592,
|
||
|
|
"learning_rate": 2.2559034941866727e-05,
|
||
|
|
"loss": 0.3612,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1377410888671875,
|
||
|
|
"step": 2675,
|
||
|
|
"valid_targets_mean": 11961.9,
|
||
|
|
"valid_targets_min": 1854
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5721600000000002,
|
||
|
|
"grad_norm": 0.23396720104016538,
|
||
|
|
"learning_rate": 2.2492572210627325e-05,
|
||
|
|
"loss": 0.3622,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12554751336574554,
|
||
|
|
"step": 2680,
|
||
|
|
"valid_targets_mean": 11309.3,
|
||
|
|
"valid_targets_min": 3110
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.57696,
|
||
|
|
"grad_norm": 0.20949220427342713,
|
||
|
|
"learning_rate": 2.24260815071875e-05,
|
||
|
|
"loss": 0.3553,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12479083985090256,
|
||
|
|
"step": 2685,
|
||
|
|
"valid_targets_mean": 11756.3,
|
||
|
|
"valid_targets_min": 2756
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.58176,
|
||
|
|
"grad_norm": 0.22023057058606868,
|
||
|
|
"learning_rate": 2.2359563577720743e-05,
|
||
|
|
"loss": 0.3658,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12142911553382874,
|
||
|
|
"step": 2690,
|
||
|
|
"valid_targets_mean": 10502.6,
|
||
|
|
"valid_targets_min": 1432
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.58656,
|
||
|
|
"grad_norm": 0.1960286783235888,
|
||
|
|
"learning_rate": 2.229301916870606e-05,
|
||
|
|
"loss": 0.3579,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11783711612224579,
|
||
|
|
"step": 2695,
|
||
|
|
"valid_targets_mean": 10609.1,
|
||
|
|
"valid_targets_min": 1423
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.59136,
|
||
|
|
"grad_norm": 0.196287674533827,
|
||
|
|
"learning_rate": 2.2226449026919637e-05,
|
||
|
|
"loss": 0.3677,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13481172919273376,
|
||
|
|
"step": 2700,
|
||
|
|
"valid_targets_mean": 12314.3,
|
||
|
|
"valid_targets_min": 349
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5961600000000002,
|
||
|
|
"grad_norm": 0.2017354957403159,
|
||
|
|
"learning_rate": 2.2159853899426427e-05,
|
||
|
|
"loss": 0.3597,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12297806888818741,
|
||
|
|
"step": 2705,
|
||
|
|
"valid_targets_mean": 11269.6,
|
||
|
|
"valid_targets_min": 921
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.60096,
|
||
|
|
"grad_norm": 0.21504040340795968,
|
||
|
|
"learning_rate": 2.209323453357178e-05,
|
||
|
|
"loss": 0.363,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09462332725524902,
|
||
|
|
"step": 2710,
|
||
|
|
"valid_targets_mean": 7711.0,
|
||
|
|
"valid_targets_min": 1260
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.60576,
|
||
|
|
"grad_norm": 0.18774009189829208,
|
||
|
|
"learning_rate": 2.202659167697306e-05,
|
||
|
|
"loss": 0.3665,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10885201394557953,
|
||
|
|
"step": 2715,
|
||
|
|
"valid_targets_mean": 10389.9,
|
||
|
|
"valid_targets_min": 2927
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.61056,
|
||
|
|
"grad_norm": 0.20997682496666586,
|
||
|
|
"learning_rate": 2.1959926077511234e-05,
|
||
|
|
"loss": 0.3601,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1147003173828125,
|
||
|
|
"step": 2720,
|
||
|
|
"valid_targets_mean": 10074.6,
|
||
|
|
"valid_targets_min": 2761
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.61536,
|
||
|
|
"grad_norm": 0.19602241999739142,
|
||
|
|
"learning_rate": 2.1893238483322512e-05,
|
||
|
|
"loss": 0.3617,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13930413126945496,
|
||
|
|
"step": 2725,
|
||
|
|
"valid_targets_mean": 12336.5,
|
||
|
|
"valid_targets_min": 3485
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6201600000000003,
|
||
|
|
"grad_norm": 0.20225701561452372,
|
||
|
|
"learning_rate": 2.1826529642789923e-05,
|
||
|
|
"loss": 0.3593,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11931279301643372,
|
||
|
|
"step": 2730,
|
||
|
|
"valid_targets_mean": 10042.8,
|
||
|
|
"valid_targets_min": 2202
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6249599999999997,
|
||
|
|
"grad_norm": 0.22142458739273158,
|
||
|
|
"learning_rate": 2.1759800304534936e-05,
|
||
|
|
"loss": 0.3679,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14175814390182495,
|
||
|
|
"step": 2735,
|
||
|
|
"valid_targets_mean": 11615.8,
|
||
|
|
"valid_targets_min": 2372
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.62976,
|
||
|
|
"grad_norm": 0.23656684138473588,
|
||
|
|
"learning_rate": 2.1693051217409048e-05,
|
||
|
|
"loss": 0.3728,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12897023558616638,
|
||
|
|
"step": 2740,
|
||
|
|
"valid_targets_mean": 11909.6,
|
||
|
|
"valid_targets_min": 1860
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.63456,
|
||
|
|
"grad_norm": 0.20663860230606496,
|
||
|
|
"learning_rate": 2.1626283130485365e-05,
|
||
|
|
"loss": 0.3671,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11588485538959503,
|
||
|
|
"step": 2745,
|
||
|
|
"valid_targets_mean": 10080.9,
|
||
|
|
"valid_targets_min": 1081
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.63936,
|
||
|
|
"grad_norm": 0.20655720033130662,
|
||
|
|
"learning_rate": 2.1559496793050235e-05,
|
||
|
|
"loss": 0.3669,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11520221084356308,
|
||
|
|
"step": 2750,
|
||
|
|
"valid_targets_mean": 10034.1,
|
||
|
|
"valid_targets_min": 2334
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.64416,
|
||
|
|
"grad_norm": 0.2103206956535719,
|
||
|
|
"learning_rate": 2.1492692954594815e-05,
|
||
|
|
"loss": 0.3669,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1502731442451477,
|
||
|
|
"step": 2755,
|
||
|
|
"valid_targets_mean": 12340.9,
|
||
|
|
"valid_targets_min": 1574
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6489599999999998,
|
||
|
|
"grad_norm": 0.20142551016494148,
|
||
|
|
"learning_rate": 2.1425872364806642e-05,
|
||
|
|
"loss": 0.363,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14424043893814087,
|
||
|
|
"step": 2760,
|
||
|
|
"valid_targets_mean": 12143.4,
|
||
|
|
"valid_targets_min": 1267
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.65376,
|
||
|
|
"grad_norm": 0.19957066204774804,
|
||
|
|
"learning_rate": 2.1359035773561275e-05,
|
||
|
|
"loss": 0.3699,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1341516673564911,
|
||
|
|
"step": 2765,
|
||
|
|
"valid_targets_mean": 11031.7,
|
||
|
|
"valid_targets_min": 2913
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.65856,
|
||
|
|
"grad_norm": 0.2011661145672297,
|
||
|
|
"learning_rate": 2.1292183930913803e-05,
|
||
|
|
"loss": 0.3716,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14259979128837585,
|
||
|
|
"step": 2770,
|
||
|
|
"valid_targets_mean": 12015.1,
|
||
|
|
"valid_targets_min": 1469
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.66336,
|
||
|
|
"grad_norm": 0.21848616875295573,
|
||
|
|
"learning_rate": 2.1225317587090507e-05,
|
||
|
|
"loss": 0.3636,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1095510944724083,
|
||
|
|
"step": 2775,
|
||
|
|
"valid_targets_mean": 9779.4,
|
||
|
|
"valid_targets_min": 1433
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.66816,
|
||
|
|
"grad_norm": 0.24628573916420246,
|
||
|
|
"learning_rate": 2.1158437492480384e-05,
|
||
|
|
"loss": 0.3609,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1268613338470459,
|
||
|
|
"step": 2780,
|
||
|
|
"valid_targets_mean": 11715.3,
|
||
|
|
"valid_targets_min": 1504
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.67296,
|
||
|
|
"grad_norm": 0.18719854699996888,
|
||
|
|
"learning_rate": 2.1091544397626752e-05,
|
||
|
|
"loss": 0.3726,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1171911209821701,
|
||
|
|
"step": 2785,
|
||
|
|
"valid_targets_mean": 10388.6,
|
||
|
|
"valid_targets_min": 2184
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.67776,
|
||
|
|
"grad_norm": 0.2394102887498419,
|
||
|
|
"learning_rate": 2.102463905321881e-05,
|
||
|
|
"loss": 0.3686,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13961833715438843,
|
||
|
|
"step": 2790,
|
||
|
|
"valid_targets_mean": 10197.9,
|
||
|
|
"valid_targets_min": 1424
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.68256,
|
||
|
|
"grad_norm": 0.18070241452128152,
|
||
|
|
"learning_rate": 2.095772221008323e-05,
|
||
|
|
"loss": 0.3621,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12561923265457153,
|
||
|
|
"step": 2795,
|
||
|
|
"valid_targets_mean": 12434.9,
|
||
|
|
"valid_targets_min": 979
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.68736,
|
||
|
|
"grad_norm": 0.24663833335176194,
|
||
|
|
"learning_rate": 2.0890794619175745e-05,
|
||
|
|
"loss": 0.3566,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10567250102758408,
|
||
|
|
"step": 2800,
|
||
|
|
"valid_targets_mean": 8472.5,
|
||
|
|
"valid_targets_min": 1461
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.69216,
|
||
|
|
"grad_norm": 0.20289405773135852,
|
||
|
|
"learning_rate": 2.0823857031572663e-05,
|
||
|
|
"loss": 0.3643,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1317484974861145,
|
||
|
|
"step": 2805,
|
||
|
|
"valid_targets_mean": 11333.3,
|
||
|
|
"valid_targets_min": 444
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.69696,
|
||
|
|
"grad_norm": 0.20744664772043325,
|
||
|
|
"learning_rate": 2.0756910198462515e-05,
|
||
|
|
"loss": 0.3602,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13761654496192932,
|
||
|
|
"step": 2810,
|
||
|
|
"valid_targets_mean": 10870.6,
|
||
|
|
"valid_targets_min": 1506
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.70176,
|
||
|
|
"grad_norm": 0.21097955156861722,
|
||
|
|
"learning_rate": 2.0689954871137558e-05,
|
||
|
|
"loss": 0.3616,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11286822706460953,
|
||
|
|
"step": 2815,
|
||
|
|
"valid_targets_mean": 9823.9,
|
||
|
|
"valid_targets_min": 1617
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.70656,
|
||
|
|
"grad_norm": 0.2110640803089252,
|
||
|
|
"learning_rate": 2.0622991800985398e-05,
|
||
|
|
"loss": 0.3617,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11495064944028854,
|
||
|
|
"step": 2820,
|
||
|
|
"valid_targets_mean": 9073.9,
|
||
|
|
"valid_targets_min": 1187
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.71136,
|
||
|
|
"grad_norm": 0.22613715754326771,
|
||
|
|
"learning_rate": 2.055602173948051e-05,
|
||
|
|
"loss": 0.3643,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1298108845949173,
|
||
|
|
"step": 2825,
|
||
|
|
"valid_targets_mean": 10639.9,
|
||
|
|
"valid_targets_min": 2426
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.71616,
|
||
|
|
"grad_norm": 0.21676181508143738,
|
||
|
|
"learning_rate": 2.0489045438175842e-05,
|
||
|
|
"loss": 0.3615,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0962882786989212,
|
||
|
|
"step": 2830,
|
||
|
|
"valid_targets_mean": 9108.2,
|
||
|
|
"valid_targets_min": 2854
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.72096,
|
||
|
|
"grad_norm": 0.2026053135412824,
|
||
|
|
"learning_rate": 2.042206364869436e-05,
|
||
|
|
"loss": 0.3636,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11987446993589401,
|
||
|
|
"step": 2835,
|
||
|
|
"valid_targets_mean": 10001.6,
|
||
|
|
"valid_targets_min": 2254
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.72576,
|
||
|
|
"grad_norm": 0.21857205264961743,
|
||
|
|
"learning_rate": 2.0355077122720625e-05,
|
||
|
|
"loss": 0.3669,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11152277886867523,
|
||
|
|
"step": 2840,
|
||
|
|
"valid_targets_mean": 9368.9,
|
||
|
|
"valid_targets_min": 232
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.73056,
|
||
|
|
"grad_norm": 0.2235096544972368,
|
||
|
|
"learning_rate": 2.0288086611992344e-05,
|
||
|
|
"loss": 0.3597,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13391023874282837,
|
||
|
|
"step": 2845,
|
||
|
|
"valid_targets_mean": 10348.2,
|
||
|
|
"valid_targets_min": 1288
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.73536,
|
||
|
|
"grad_norm": 0.20288624688622384,
|
||
|
|
"learning_rate": 2.0221092868291953e-05,
|
||
|
|
"loss": 0.3613,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11558884382247925,
|
||
|
|
"step": 2850,
|
||
|
|
"valid_targets_mean": 10537.2,
|
||
|
|
"valid_targets_min": 2252
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.74016,
|
||
|
|
"grad_norm": 0.19954417000954933,
|
||
|
|
"learning_rate": 2.0154096643438153e-05,
|
||
|
|
"loss": 0.3658,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10435223579406738,
|
||
|
|
"step": 2855,
|
||
|
|
"valid_targets_mean": 9052.9,
|
||
|
|
"valid_targets_min": 175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.74496,
|
||
|
|
"grad_norm": 0.19802408710823705,
|
||
|
|
"learning_rate": 2.008709868927751e-05,
|
||
|
|
"loss": 0.3646,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11530742049217224,
|
||
|
|
"step": 2860,
|
||
|
|
"valid_targets_mean": 9975.5,
|
||
|
|
"valid_targets_min": 2684
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.74976,
|
||
|
|
"grad_norm": 0.2187266128310994,
|
||
|
|
"learning_rate": 2.0020099757675978e-05,
|
||
|
|
"loss": 0.3611,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11976270377635956,
|
||
|
|
"step": 2865,
|
||
|
|
"valid_targets_mean": 9873.7,
|
||
|
|
"valid_targets_min": 1817
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.75456,
|
||
|
|
"grad_norm": 0.22996724727881987,
|
||
|
|
"learning_rate": 1.9953100600510487e-05,
|
||
|
|
"loss": 0.3608,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11876045167446136,
|
||
|
|
"step": 2870,
|
||
|
|
"valid_targets_mean": 11607.8,
|
||
|
|
"valid_targets_min": 1452
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.75936,
|
||
|
|
"grad_norm": 0.2306692684829691,
|
||
|
|
"learning_rate": 1.9886101969660504e-05,
|
||
|
|
"loss": 0.3778,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11759520322084427,
|
||
|
|
"step": 2875,
|
||
|
|
"valid_targets_mean": 9185.2,
|
||
|
|
"valid_targets_min": 1996
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.76416,
|
||
|
|
"grad_norm": 0.2391988826978733,
|
||
|
|
"learning_rate": 1.9819104616999584e-05,
|
||
|
|
"loss": 0.365,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12147517502307892,
|
||
|
|
"step": 2880,
|
||
|
|
"valid_targets_mean": 11028.3,
|
||
|
|
"valid_targets_min": 3506
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.76896,
|
||
|
|
"grad_norm": 0.20185735285509163,
|
||
|
|
"learning_rate": 1.975210929438693e-05,
|
||
|
|
"loss": 0.3645,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10030334442853928,
|
||
|
|
"step": 2885,
|
||
|
|
"valid_targets_mean": 9568.4,
|
||
|
|
"valid_targets_min": 3429
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7737600000000002,
|
||
|
|
"grad_norm": 0.19875807889149402,
|
||
|
|
"learning_rate": 1.9685116753658982e-05,
|
||
|
|
"loss": 0.3501,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13128820061683655,
|
||
|
|
"step": 2890,
|
||
|
|
"valid_targets_mean": 11010.5,
|
||
|
|
"valid_targets_min": 1659
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.77856,
|
||
|
|
"grad_norm": 0.20966066469853917,
|
||
|
|
"learning_rate": 1.9618127746620944e-05,
|
||
|
|
"loss": 0.3605,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0992082953453064,
|
||
|
|
"step": 2895,
|
||
|
|
"valid_targets_mean": 9702.0,
|
||
|
|
"valid_targets_min": 1449
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.78336,
|
||
|
|
"grad_norm": 0.20465611439947812,
|
||
|
|
"learning_rate": 1.9551143025038363e-05,
|
||
|
|
"loss": 0.3602,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12780556082725525,
|
||
|
|
"step": 2900,
|
||
|
|
"valid_targets_mean": 10775.2,
|
||
|
|
"valid_targets_min": 2243
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.78816,
|
||
|
|
"grad_norm": 0.19999884250519026,
|
||
|
|
"learning_rate": 1.9484163340628724e-05,
|
||
|
|
"loss": 0.3658,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.159572571516037,
|
||
|
|
"step": 2905,
|
||
|
|
"valid_targets_mean": 13880.4,
|
||
|
|
"valid_targets_min": 2055
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.79296,
|
||
|
|
"grad_norm": 0.2267797595338644,
|
||
|
|
"learning_rate": 1.941718944505294e-05,
|
||
|
|
"loss": 0.3637,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1121618002653122,
|
||
|
|
"step": 2910,
|
||
|
|
"valid_targets_mean": 9450.2,
|
||
|
|
"valid_targets_min": 1890
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7977600000000002,
|
||
|
|
"grad_norm": 0.20806241852536878,
|
||
|
|
"learning_rate": 1.9350222089906994e-05,
|
||
|
|
"loss": 0.3569,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10966132581233978,
|
||
|
|
"step": 2915,
|
||
|
|
"valid_targets_mean": 10078.5,
|
||
|
|
"valid_targets_min": 2442
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.80256,
|
||
|
|
"grad_norm": 0.1901812453004743,
|
||
|
|
"learning_rate": 1.9283262026713456e-05,
|
||
|
|
"loss": 0.3625,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1494556963443756,
|
||
|
|
"step": 2920,
|
||
|
|
"valid_targets_mean": 13154.9,
|
||
|
|
"valid_targets_min": 2097
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.80736,
|
||
|
|
"grad_norm": 0.19629416111197162,
|
||
|
|
"learning_rate": 1.9216310006913058e-05,
|
||
|
|
"loss": 0.3632,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15318551659584045,
|
||
|
|
"step": 2925,
|
||
|
|
"valid_targets_mean": 12243.2,
|
||
|
|
"valid_targets_min": 2044
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.81216,
|
||
|
|
"grad_norm": 0.22492866436695402,
|
||
|
|
"learning_rate": 1.914936678185629e-05,
|
||
|
|
"loss": 0.3635,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10800520330667496,
|
||
|
|
"step": 2930,
|
||
|
|
"valid_targets_mean": 8264.8,
|
||
|
|
"valid_targets_min": 1432
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.81696,
|
||
|
|
"grad_norm": 0.2322759557369981,
|
||
|
|
"learning_rate": 1.9082433102794918e-05,
|
||
|
|
"loss": 0.3562,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10694816708564758,
|
||
|
|
"step": 2935,
|
||
|
|
"valid_targets_mean": 8236.9,
|
||
|
|
"valid_targets_min": 1158
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8217600000000003,
|
||
|
|
"grad_norm": 0.19751066773571385,
|
||
|
|
"learning_rate": 1.9015509720873603e-05,
|
||
|
|
"loss": 0.3618,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10615318268537521,
|
||
|
|
"step": 2940,
|
||
|
|
"valid_targets_mean": 9977.6,
|
||
|
|
"valid_targets_min": 2450
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8265599999999997,
|
||
|
|
"grad_norm": 0.2580598020963001,
|
||
|
|
"learning_rate": 1.894859738712143e-05,
|
||
|
|
"loss": 0.3577,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12153971195220947,
|
||
|
|
"step": 2945,
|
||
|
|
"valid_targets_mean": 11254.8,
|
||
|
|
"valid_targets_min": 1980
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.83136,
|
||
|
|
"grad_norm": 0.20053695833514487,
|
||
|
|
"learning_rate": 1.888169685244352e-05,
|
||
|
|
"loss": 0.3664,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12622089684009552,
|
||
|
|
"step": 2950,
|
||
|
|
"valid_targets_mean": 11249.8,
|
||
|
|
"valid_targets_min": 3481
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.83616,
|
||
|
|
"grad_norm": 0.22387411298344964,
|
||
|
|
"learning_rate": 1.8814808867612568e-05,
|
||
|
|
"loss": 0.3656,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1262330412864685,
|
||
|
|
"step": 2955,
|
||
|
|
"valid_targets_mean": 10618.3,
|
||
|
|
"valid_targets_min": 1324
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.84096,
|
||
|
|
"grad_norm": 0.20084054265106147,
|
||
|
|
"learning_rate": 1.8747934183260427e-05,
|
||
|
|
"loss": 0.3579,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12606924772262573,
|
||
|
|
"step": 2960,
|
||
|
|
"valid_targets_mean": 10792.8,
|
||
|
|
"valid_targets_min": 2360
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.84576,
|
||
|
|
"grad_norm": 0.20134811185220372,
|
||
|
|
"learning_rate": 1.868107354986971e-05,
|
||
|
|
"loss": 0.3541,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10368680953979492,
|
||
|
|
"step": 2965,
|
||
|
|
"valid_targets_mean": 9323.5,
|
||
|
|
"valid_targets_min": 2257
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8505599999999998,
|
||
|
|
"grad_norm": 0.21432043298936881,
|
||
|
|
"learning_rate": 1.8614227717765327e-05,
|
||
|
|
"loss": 0.3598,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12553778290748596,
|
||
|
|
"step": 2970,
|
||
|
|
"valid_targets_mean": 10495.8,
|
||
|
|
"valid_targets_min": 2166
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.85536,
|
||
|
|
"grad_norm": 0.21010257773368773,
|
||
|
|
"learning_rate": 1.8547397437106084e-05,
|
||
|
|
"loss": 0.3622,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10031257569789886,
|
||
|
|
"step": 2975,
|
||
|
|
"valid_targets_mean": 8612.8,
|
||
|
|
"valid_targets_min": 1990
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.86016,
|
||
|
|
"grad_norm": 0.2151100013139827,
|
||
|
|
"learning_rate": 1.848058345787629e-05,
|
||
|
|
"loss": 0.3676,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11149641126394272,
|
||
|
|
"step": 2980,
|
||
|
|
"valid_targets_mean": 9591.8,
|
||
|
|
"valid_targets_min": 1578
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.86496,
|
||
|
|
"grad_norm": 0.20690191454976406,
|
||
|
|
"learning_rate": 1.8413786529877288e-05,
|
||
|
|
"loss": 0.3643,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11248461902141571,
|
||
|
|
"step": 2985,
|
||
|
|
"valid_targets_mean": 9634.5,
|
||
|
|
"valid_targets_min": 1976
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.86976,
|
||
|
|
"grad_norm": 0.19091194792371544,
|
||
|
|
"learning_rate": 1.8347007402719082e-05,
|
||
|
|
"loss": 0.3626,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15383237600326538,
|
||
|
|
"step": 2990,
|
||
|
|
"valid_targets_mean": 13756.3,
|
||
|
|
"valid_targets_min": 3591
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.87456,
|
||
|
|
"grad_norm": 0.25381121983321503,
|
||
|
|
"learning_rate": 1.828024682581191e-05,
|
||
|
|
"loss": 0.3689,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12134871631860733,
|
||
|
|
"step": 2995,
|
||
|
|
"valid_targets_mean": 10468.4,
|
||
|
|
"valid_targets_min": 1861
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.87936,
|
||
|
|
"grad_norm": 0.20681853099155056,
|
||
|
|
"learning_rate": 1.8213505548357822e-05,
|
||
|
|
"loss": 0.359,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11575278639793396,
|
||
|
|
"step": 3000,
|
||
|
|
"valid_targets_mean": 10195.7,
|
||
|
|
"valid_targets_min": 1365
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.88416,
|
||
|
|
"grad_norm": 0.2586449714167572,
|
||
|
|
"learning_rate": 1.814678431934231e-05,
|
||
|
|
"loss": 0.3637,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13470815122127533,
|
||
|
|
"step": 3005,
|
||
|
|
"valid_targets_mean": 10735.1,
|
||
|
|
"valid_targets_min": 1758
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.88896,
|
||
|
|
"grad_norm": 0.19973456872577822,
|
||
|
|
"learning_rate": 1.8080083887525862e-05,
|
||
|
|
"loss": 0.3582,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13693934679031372,
|
||
|
|
"step": 3010,
|
||
|
|
"valid_targets_mean": 11732.4,
|
||
|
|
"valid_targets_min": 2603
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.89376,
|
||
|
|
"grad_norm": 0.27571022032343157,
|
||
|
|
"learning_rate": 1.801340500143557e-05,
|
||
|
|
"loss": 0.3672,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11827176064252853,
|
||
|
|
"step": 3015,
|
||
|
|
"valid_targets_mean": 9584.5,
|
||
|
|
"valid_targets_min": 1872
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.89856,
|
||
|
|
"grad_norm": 0.20586896983288694,
|
||
|
|
"learning_rate": 1.7946748409356746e-05,
|
||
|
|
"loss": 0.3631,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.132304847240448,
|
||
|
|
"step": 3020,
|
||
|
|
"valid_targets_mean": 11284.5,
|
||
|
|
"valid_targets_min": 2521
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.90336,
|
||
|
|
"grad_norm": 0.2156847432711145,
|
||
|
|
"learning_rate": 1.788011485932451e-05,
|
||
|
|
"loss": 0.3627,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13747841119766235,
|
||
|
|
"step": 3025,
|
||
|
|
"valid_targets_mean": 12327.1,
|
||
|
|
"valid_targets_min": 3063
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.90816,
|
||
|
|
"grad_norm": 0.19504637524682655,
|
||
|
|
"learning_rate": 1.78135050991154e-05,
|
||
|
|
"loss": 0.3601,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11804535984992981,
|
||
|
|
"step": 3030,
|
||
|
|
"valid_targets_mean": 9373.9,
|
||
|
|
"valid_targets_min": 1669
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.91296,
|
||
|
|
"grad_norm": 0.2085548541654179,
|
||
|
|
"learning_rate": 1.774691987623898e-05,
|
||
|
|
"loss": 0.3616,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11510040611028671,
|
||
|
|
"step": 3035,
|
||
|
|
"valid_targets_mean": 10716.6,
|
||
|
|
"valid_targets_min": 996
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.91776,
|
||
|
|
"grad_norm": 0.21766250562775843,
|
||
|
|
"learning_rate": 1.768035993792944e-05,
|
||
|
|
"loss": 0.3589,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13932877779006958,
|
||
|
|
"step": 3040,
|
||
|
|
"valid_targets_mean": 12401.8,
|
||
|
|
"valid_targets_min": 1150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.92256,
|
||
|
|
"grad_norm": 0.22907780830855445,
|
||
|
|
"learning_rate": 1.7613826031137245e-05,
|
||
|
|
"loss": 0.3592,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12984474003314972,
|
||
|
|
"step": 3045,
|
||
|
|
"valid_targets_mean": 9680.3,
|
||
|
|
"valid_targets_min": 3371
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.92736,
|
||
|
|
"grad_norm": 0.19920710075709197,
|
||
|
|
"learning_rate": 1.7547318902520693e-05,
|
||
|
|
"loss": 0.3563,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12504421174526215,
|
||
|
|
"step": 3050,
|
||
|
|
"valid_targets_mean": 10457.5,
|
||
|
|
"valid_targets_min": 1864
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.93216,
|
||
|
|
"grad_norm": 0.1863477376482489,
|
||
|
|
"learning_rate": 1.7480839298437612e-05,
|
||
|
|
"loss": 0.3541,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1148369163274765,
|
||
|
|
"step": 3055,
|
||
|
|
"valid_targets_mean": 10887.1,
|
||
|
|
"valid_targets_min": 2181
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.93696,
|
||
|
|
"grad_norm": 0.20514036723322157,
|
||
|
|
"learning_rate": 1.7414387964936913e-05,
|
||
|
|
"loss": 0.359,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11017856001853943,
|
||
|
|
"step": 3060,
|
||
|
|
"valid_targets_mean": 9396.2,
|
||
|
|
"valid_targets_min": 1286
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.94176,
|
||
|
|
"grad_norm": 0.19968116055603938,
|
||
|
|
"learning_rate": 1.7347965647750264e-05,
|
||
|
|
"loss": 0.3568,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11665418744087219,
|
||
|
|
"step": 3065,
|
||
|
|
"valid_targets_mean": 10102.7,
|
||
|
|
"valid_targets_min": 1289
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.94656,
|
||
|
|
"grad_norm": 0.20817684656234767,
|
||
|
|
"learning_rate": 1.7281573092283698e-05,
|
||
|
|
"loss": 0.3486,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10370459407567978,
|
||
|
|
"step": 3070,
|
||
|
|
"valid_targets_mean": 9148.8,
|
||
|
|
"valid_targets_min": 2335
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.95136,
|
||
|
|
"grad_norm": 0.1992253544024843,
|
||
|
|
"learning_rate": 1.721521104360925e-05,
|
||
|
|
"loss": 0.3632,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12574630975723267,
|
||
|
|
"step": 3075,
|
||
|
|
"valid_targets_mean": 11535.0,
|
||
|
|
"valid_targets_min": 2265
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.95616,
|
||
|
|
"grad_norm": 0.209456797096903,
|
||
|
|
"learning_rate": 1.714888024645662e-05,
|
||
|
|
"loss": 0.3598,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09037169814109802,
|
||
|
|
"step": 3080,
|
||
|
|
"valid_targets_mean": 7926.5,
|
||
|
|
"valid_targets_min": 1720
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.96096,
|
||
|
|
"grad_norm": 0.23173634572043963,
|
||
|
|
"learning_rate": 1.708258144520478e-05,
|
||
|
|
"loss": 0.3644,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11924492567777634,
|
||
|
|
"step": 3085,
|
||
|
|
"valid_targets_mean": 10889.4,
|
||
|
|
"valid_targets_min": 1408
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.96576,
|
||
|
|
"grad_norm": 0.20975622608835376,
|
||
|
|
"learning_rate": 1.7016315383873637e-05,
|
||
|
|
"loss": 0.354,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12030082941055298,
|
||
|
|
"step": 3090,
|
||
|
|
"valid_targets_mean": 10797.7,
|
||
|
|
"valid_targets_min": 3240
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.97056,
|
||
|
|
"grad_norm": 0.21701184668492338,
|
||
|
|
"learning_rate": 1.6950082806115692e-05,
|
||
|
|
"loss": 0.3615,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11946055293083191,
|
||
|
|
"step": 3095,
|
||
|
|
"valid_targets_mean": 10773.8,
|
||
|
|
"valid_targets_min": 3241
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9753600000000002,
|
||
|
|
"grad_norm": 0.19977669409558643,
|
||
|
|
"learning_rate": 1.6883884455207685e-05,
|
||
|
|
"loss": 0.3622,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11068576574325562,
|
||
|
|
"step": 3100,
|
||
|
|
"valid_targets_mean": 9751.3,
|
||
|
|
"valid_targets_min": 851
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.98016,
|
||
|
|
"grad_norm": 0.2059080526574065,
|
||
|
|
"learning_rate": 1.6817721074042254e-05,
|
||
|
|
"loss": 0.3565,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13456991314888,
|
||
|
|
"step": 3105,
|
||
|
|
"valid_targets_mean": 11390.2,
|
||
|
|
"valid_targets_min": 1956
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.98496,
|
||
|
|
"grad_norm": 0.20080719781787096,
|
||
|
|
"learning_rate": 1.675159340511958e-05,
|
||
|
|
"loss": 0.3586,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12177115678787231,
|
||
|
|
"step": 3110,
|
||
|
|
"valid_targets_mean": 11015.5,
|
||
|
|
"valid_targets_min": 2312
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.98976,
|
||
|
|
"grad_norm": 0.22633322795913888,
|
||
|
|
"learning_rate": 1.6685502190539106e-05,
|
||
|
|
"loss": 0.362,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14303162693977356,
|
||
|
|
"step": 3115,
|
||
|
|
"valid_targets_mean": 11830.6,
|
||
|
|
"valid_targets_min": 2556
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.99456,
|
||
|
|
"grad_norm": 0.20303697576064644,
|
||
|
|
"learning_rate": 1.6619448171991155e-05,
|
||
|
|
"loss": 0.3622,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1473173052072525,
|
||
|
|
"step": 3120,
|
||
|
|
"valid_targets_mean": 12810.6,
|
||
|
|
"valid_targets_min": 2805
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9993600000000002,
|
||
|
|
"grad_norm": 0.21006741878018884,
|
||
|
|
"learning_rate": 1.6553432090748624e-05,
|
||
|
|
"loss": 0.3636,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12304878979921341,
|
||
|
|
"step": 3125,
|
||
|
|
"valid_targets_mean": 10014.9,
|
||
|
|
"valid_targets_min": 352
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.00384,
|
||
|
|
"grad_norm": 0.1911338881768588,
|
||
|
|
"learning_rate": 1.648745468765869e-05,
|
||
|
|
"loss": 0.363,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1106758713722229,
|
||
|
|
"step": 3130,
|
||
|
|
"valid_targets_mean": 10679.0,
|
||
|
|
"valid_targets_min": 2647
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.00864,
|
||
|
|
"grad_norm": 0.18503418871632474,
|
||
|
|
"learning_rate": 1.6421516703134463e-05,
|
||
|
|
"loss": 0.3507,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10718891024589539,
|
||
|
|
"step": 3135,
|
||
|
|
"valid_targets_mean": 9676.6,
|
||
|
|
"valid_targets_min": 3561
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.01344,
|
||
|
|
"grad_norm": 0.19632597147141265,
|
||
|
|
"learning_rate": 1.6355618877146685e-05,
|
||
|
|
"loss": 0.3577,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1160854697227478,
|
||
|
|
"step": 3140,
|
||
|
|
"valid_targets_mean": 10539.7,
|
||
|
|
"valid_targets_min": 2460
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.01824,
|
||
|
|
"grad_norm": 0.18389649394404944,
|
||
|
|
"learning_rate": 1.6289761949215435e-05,
|
||
|
|
"loss": 0.3556,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10422074794769287,
|
||
|
|
"step": 3145,
|
||
|
|
"valid_targets_mean": 8746.8,
|
||
|
|
"valid_targets_min": 1724
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.02304,
|
||
|
|
"grad_norm": 0.19428247076021085,
|
||
|
|
"learning_rate": 1.6223946658401818e-05,
|
||
|
|
"loss": 0.3572,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11618025600910187,
|
||
|
|
"step": 3150,
|
||
|
|
"valid_targets_mean": 10644.7,
|
||
|
|
"valid_targets_min": 2639
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.02784,
|
||
|
|
"grad_norm": 0.1989538133285283,
|
||
|
|
"learning_rate": 1.6158173743299692e-05,
|
||
|
|
"loss": 0.3535,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13024590909481049,
|
||
|
|
"step": 3155,
|
||
|
|
"valid_targets_mean": 11188.2,
|
||
|
|
"valid_targets_min": 2165
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.03264,
|
||
|
|
"grad_norm": 0.1993685753545757,
|
||
|
|
"learning_rate": 1.6092443942027356e-05,
|
||
|
|
"loss": 0.3623,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.109580397605896,
|
||
|
|
"step": 3160,
|
||
|
|
"valid_targets_mean": 9663.8,
|
||
|
|
"valid_targets_min": 1420
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.03744,
|
||
|
|
"grad_norm": 0.18751059894772873,
|
||
|
|
"learning_rate": 1.602675799221927e-05,
|
||
|
|
"loss": 0.362,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12348620593547821,
|
||
|
|
"step": 3165,
|
||
|
|
"valid_targets_mean": 10874.9,
|
||
|
|
"valid_targets_min": 2228
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.04224,
|
||
|
|
"grad_norm": 0.19821491707707475,
|
||
|
|
"learning_rate": 1.59611166310178e-05,
|
||
|
|
"loss": 0.3616,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11230102926492691,
|
||
|
|
"step": 3170,
|
||
|
|
"valid_targets_mean": 9740.4,
|
||
|
|
"valid_targets_min": 1925
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.04704,
|
||
|
|
"grad_norm": 0.1944362720163763,
|
||
|
|
"learning_rate": 1.5895520595064913e-05,
|
||
|
|
"loss": 0.3529,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1168983057141304,
|
||
|
|
"step": 3175,
|
||
|
|
"valid_targets_mean": 10594.9,
|
||
|
|
"valid_targets_min": 1259
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.05184,
|
||
|
|
"grad_norm": 0.1956092410315661,
|
||
|
|
"learning_rate": 1.5829970620493932e-05,
|
||
|
|
"loss": 0.3559,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09523244202136993,
|
||
|
|
"step": 3180,
|
||
|
|
"valid_targets_mean": 8686.4,
|
||
|
|
"valid_targets_min": 1431
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.05664,
|
||
|
|
"grad_norm": 0.18827941293584474,
|
||
|
|
"learning_rate": 1.5764467442921274e-05,
|
||
|
|
"loss": 0.3503,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1236504539847374,
|
||
|
|
"step": 3185,
|
||
|
|
"valid_targets_mean": 12017.0,
|
||
|
|
"valid_targets_min": 1252
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.06144,
|
||
|
|
"grad_norm": 0.18656827183872657,
|
||
|
|
"learning_rate": 1.569901179743818e-05,
|
||
|
|
"loss": 0.3605,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12331358343362808,
|
||
|
|
"step": 3190,
|
||
|
|
"valid_targets_mean": 11676.1,
|
||
|
|
"valid_targets_min": 2242
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.06624,
|
||
|
|
"grad_norm": 0.19798995046879575,
|
||
|
|
"learning_rate": 1.5633604418602483e-05,
|
||
|
|
"loss": 0.3529,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12344086915254593,
|
||
|
|
"step": 3195,
|
||
|
|
"valid_targets_mean": 11296.1,
|
||
|
|
"valid_targets_min": 2591
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.07104,
|
||
|
|
"grad_norm": 0.20696979517957703,
|
||
|
|
"learning_rate": 1.5568246040430343e-05,
|
||
|
|
"loss": 0.3545,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1138351783156395,
|
||
|
|
"step": 3200,
|
||
|
|
"valid_targets_mean": 9281.8,
|
||
|
|
"valid_targets_min": 1942
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.07584,
|
||
|
|
"grad_norm": 0.1915849618482703,
|
||
|
|
"learning_rate": 1.5502937396388046e-05,
|
||
|
|
"loss": 0.354,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10900023579597473,
|
||
|
|
"step": 3205,
|
||
|
|
"valid_targets_mean": 10877.6,
|
||
|
|
"valid_targets_min": 2699
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.08064,
|
||
|
|
"grad_norm": 0.1827720297353329,
|
||
|
|
"learning_rate": 1.543767921938374e-05,
|
||
|
|
"loss": 0.363,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1224978119134903,
|
||
|
|
"step": 3210,
|
||
|
|
"valid_targets_mean": 11502.3,
|
||
|
|
"valid_targets_min": 1838
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.08544,
|
||
|
|
"grad_norm": 0.18647047083470117,
|
||
|
|
"learning_rate": 1.537247224175922e-05,
|
||
|
|
"loss": 0.3567,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12575547397136688,
|
||
|
|
"step": 3215,
|
||
|
|
"valid_targets_mean": 10821.2,
|
||
|
|
"valid_targets_min": 2280
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.09024,
|
||
|
|
"grad_norm": 0.20310601224851935,
|
||
|
|
"learning_rate": 1.53073171952817e-05,
|
||
|
|
"loss": 0.3613,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09874022006988525,
|
||
|
|
"step": 3220,
|
||
|
|
"valid_targets_mean": 8396.7,
|
||
|
|
"valid_targets_min": 1481
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.09504,
|
||
|
|
"grad_norm": 0.1907050422414701,
|
||
|
|
"learning_rate": 1.5242214811135631e-05,
|
||
|
|
"loss": 0.3564,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1317986696958542,
|
||
|
|
"step": 3225,
|
||
|
|
"valid_targets_mean": 11944.2,
|
||
|
|
"valid_targets_min": 1024
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.09984,
|
||
|
|
"grad_norm": 0.23024193549682415,
|
||
|
|
"learning_rate": 1.5177165819914461e-05,
|
||
|
|
"loss": 0.3649,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1109505295753479,
|
||
|
|
"step": 3230,
|
||
|
|
"valid_targets_mean": 10249.3,
|
||
|
|
"valid_targets_min": 2751
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.10464,
|
||
|
|
"grad_norm": 0.21783714840986526,
|
||
|
|
"learning_rate": 1.5112170951612455e-05,
|
||
|
|
"loss": 0.3638,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.128553107380867,
|
||
|
|
"step": 3235,
|
||
|
|
"valid_targets_mean": 9875.0,
|
||
|
|
"valid_targets_min": 1253
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.10944,
|
||
|
|
"grad_norm": 0.1859714790317045,
|
||
|
|
"learning_rate": 1.5047230935616497e-05,
|
||
|
|
"loss": 0.3496,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12540477514266968,
|
||
|
|
"step": 3240,
|
||
|
|
"valid_targets_mean": 9422.7,
|
||
|
|
"valid_targets_min": 1985
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.11424,
|
||
|
|
"grad_norm": 0.17930792691047237,
|
||
|
|
"learning_rate": 1.4982346500697916e-05,
|
||
|
|
"loss": 0.3648,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1338803768157959,
|
||
|
|
"step": 3245,
|
||
|
|
"valid_targets_mean": 11366.2,
|
||
|
|
"valid_targets_min": 1835
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.11904,
|
||
|
|
"grad_norm": 0.19775983710451614,
|
||
|
|
"learning_rate": 1.4917518375004281e-05,
|
||
|
|
"loss": 0.3565,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11442698538303375,
|
||
|
|
"step": 3250,
|
||
|
|
"valid_targets_mean": 9435.9,
|
||
|
|
"valid_targets_min": 2168
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.12384,
|
||
|
|
"grad_norm": 0.18680122426497975,
|
||
|
|
"learning_rate": 1.4852747286051254e-05,
|
||
|
|
"loss": 0.3606,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12294398248195648,
|
||
|
|
"step": 3255,
|
||
|
|
"valid_targets_mean": 10353.2,
|
||
|
|
"valid_targets_min": 1413
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.12864,
|
||
|
|
"grad_norm": 0.1925913840839781,
|
||
|
|
"learning_rate": 1.478803396071443e-05,
|
||
|
|
"loss": 0.3591,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10112228244543076,
|
||
|
|
"step": 3260,
|
||
|
|
"valid_targets_mean": 9614.4,
|
||
|
|
"valid_targets_min": 2012
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1334400000000002,
|
||
|
|
"grad_norm": 0.1892566760221108,
|
||
|
|
"learning_rate": 1.472337912522115e-05,
|
||
|
|
"loss": 0.3647,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12162721157073975,
|
||
|
|
"step": 3265,
|
||
|
|
"valid_targets_mean": 10449.9,
|
||
|
|
"valid_targets_min": 410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.13824,
|
||
|
|
"grad_norm": 0.1986424282020148,
|
||
|
|
"learning_rate": 1.4658783505142368e-05,
|
||
|
|
"loss": 0.3534,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1560564637184143,
|
||
|
|
"step": 3270,
|
||
|
|
"valid_targets_mean": 12148.1,
|
||
|
|
"valid_targets_min": 979
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.14304,
|
||
|
|
"grad_norm": 0.19036134077024724,
|
||
|
|
"learning_rate": 1.4594247825384529e-05,
|
||
|
|
"loss": 0.3533,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10178598761558533,
|
||
|
|
"step": 3275,
|
||
|
|
"valid_targets_mean": 9834.5,
|
||
|
|
"valid_targets_min": 1698
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.14784,
|
||
|
|
"grad_norm": 0.19790264725596374,
|
||
|
|
"learning_rate": 1.4529772810181398e-05,
|
||
|
|
"loss": 0.3541,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1019100546836853,
|
||
|
|
"step": 3280,
|
||
|
|
"valid_targets_mean": 9963.8,
|
||
|
|
"valid_targets_min": 1741
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.15264,
|
||
|
|
"grad_norm": 0.18612486364274605,
|
||
|
|
"learning_rate": 1.4465359183085958e-05,
|
||
|
|
"loss": 0.3506,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12211617082357407,
|
||
|
|
"step": 3285,
|
||
|
|
"valid_targets_mean": 10298.8,
|
||
|
|
"valid_targets_min": 2362
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.15744,
|
||
|
|
"grad_norm": 0.19467475396240183,
|
||
|
|
"learning_rate": 1.4401007666962276e-05,
|
||
|
|
"loss": 0.3586,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.120920330286026,
|
||
|
|
"step": 3290,
|
||
|
|
"valid_targets_mean": 10495.0,
|
||
|
|
"valid_targets_min": 2093
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.16224,
|
||
|
|
"grad_norm": 0.1929595161256868,
|
||
|
|
"learning_rate": 1.4336718983977389e-05,
|
||
|
|
"loss": 0.3647,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1146123930811882,
|
||
|
|
"step": 3295,
|
||
|
|
"valid_targets_mean": 10586.0,
|
||
|
|
"valid_targets_min": 1173
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.16704,
|
||
|
|
"grad_norm": 0.18264708976721075,
|
||
|
|
"learning_rate": 1.4272493855593222e-05,
|
||
|
|
"loss": 0.3576,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13314726948738098,
|
||
|
|
"step": 3300,
|
||
|
|
"valid_targets_mean": 11679.8,
|
||
|
|
"valid_targets_min": 1112
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.17184,
|
||
|
|
"grad_norm": 0.1913530013358868,
|
||
|
|
"learning_rate": 1.4208333002558462e-05,
|
||
|
|
"loss": 0.3583,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10753712058067322,
|
||
|
|
"step": 3305,
|
||
|
|
"valid_targets_mean": 9290.6,
|
||
|
|
"valid_targets_min": 1387
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.17664,
|
||
|
|
"grad_norm": 0.20193716674075288,
|
||
|
|
"learning_rate": 1.4144237144900497e-05,
|
||
|
|
"loss": 0.3654,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11723098158836365,
|
||
|
|
"step": 3310,
|
||
|
|
"valid_targets_mean": 10084.2,
|
||
|
|
"valid_targets_min": 1711
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.18144,
|
||
|
|
"grad_norm": 0.18684656406178907,
|
||
|
|
"learning_rate": 1.4080207001917302e-05,
|
||
|
|
"loss": 0.3524,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11985105276107788,
|
||
|
|
"step": 3315,
|
||
|
|
"valid_targets_mean": 10388.2,
|
||
|
|
"valid_targets_min": 1748
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.18624,
|
||
|
|
"grad_norm": 0.18906821405151913,
|
||
|
|
"learning_rate": 1.4016243292169413e-05,
|
||
|
|
"loss": 0.358,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10248436778783798,
|
||
|
|
"step": 3320,
|
||
|
|
"valid_targets_mean": 9951.2,
|
||
|
|
"valid_targets_min": 2571
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.19104,
|
||
|
|
"grad_norm": 0.21691396130072285,
|
||
|
|
"learning_rate": 1.3952346733471822e-05,
|
||
|
|
"loss": 0.3637,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13228540122509003,
|
||
|
|
"step": 3325,
|
||
|
|
"valid_targets_mean": 11530.5,
|
||
|
|
"valid_targets_min": 1585
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.19584,
|
||
|
|
"grad_norm": 0.20831678580666343,
|
||
|
|
"learning_rate": 1.3888518042885934e-05,
|
||
|
|
"loss": 0.3606,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11678709089756012,
|
||
|
|
"step": 3330,
|
||
|
|
"valid_targets_mean": 9578.8,
|
||
|
|
"valid_targets_min": 2241
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.20064,
|
||
|
|
"grad_norm": 0.22016906998882896,
|
||
|
|
"learning_rate": 1.3824757936711537e-05,
|
||
|
|
"loss": 0.3564,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11219100654125214,
|
||
|
|
"step": 3335,
|
||
|
|
"valid_targets_mean": 9951.4,
|
||
|
|
"valid_targets_min": 1808
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.20544,
|
||
|
|
"grad_norm": 0.1737671280514502,
|
||
|
|
"learning_rate": 1.3761067130478738e-05,
|
||
|
|
"loss": 0.3516,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12665574252605438,
|
||
|
|
"step": 3340,
|
||
|
|
"valid_targets_mean": 12212.2,
|
||
|
|
"valid_targets_min": 3127
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.21024,
|
||
|
|
"grad_norm": 0.18679582060053115,
|
||
|
|
"learning_rate": 1.3697446338939942e-05,
|
||
|
|
"loss": 0.3587,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12244650721549988,
|
||
|
|
"step": 3345,
|
||
|
|
"valid_targets_mean": 10303.5,
|
||
|
|
"valid_targets_min": 3638
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.21504,
|
||
|
|
"grad_norm": 0.18953197574821232,
|
||
|
|
"learning_rate": 1.3633896276061847e-05,
|
||
|
|
"loss": 0.3479,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11921748518943787,
|
||
|
|
"step": 3350,
|
||
|
|
"valid_targets_mean": 10747.9,
|
||
|
|
"valid_targets_min": 2550
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.21984,
|
||
|
|
"grad_norm": 0.19332856848026225,
|
||
|
|
"learning_rate": 1.3570417655017405e-05,
|
||
|
|
"loss": 0.366,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12070733308792114,
|
||
|
|
"step": 3355,
|
||
|
|
"valid_targets_mean": 10950.4,
|
||
|
|
"valid_targets_min": 1986
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.22464,
|
||
|
|
"grad_norm": 0.17752470811518664,
|
||
|
|
"learning_rate": 1.3507011188177846e-05,
|
||
|
|
"loss": 0.3559,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11526253074407578,
|
||
|
|
"step": 3360,
|
||
|
|
"valid_targets_mean": 11321.6,
|
||
|
|
"valid_targets_min": 2605
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.22944,
|
||
|
|
"grad_norm": 0.19017706432063974,
|
||
|
|
"learning_rate": 1.3443677587104655e-05,
|
||
|
|
"loss": 0.3591,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08614403009414673,
|
||
|
|
"step": 3365,
|
||
|
|
"valid_targets_mean": 8164.4,
|
||
|
|
"valid_targets_min": 1641
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.23424,
|
||
|
|
"grad_norm": 0.1923946538335053,
|
||
|
|
"learning_rate": 1.3380417562541604e-05,
|
||
|
|
"loss": 0.3678,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1379067748785019,
|
||
|
|
"step": 3370,
|
||
|
|
"valid_targets_mean": 11554.5,
|
||
|
|
"valid_targets_min": 274
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.23904,
|
||
|
|
"grad_norm": 0.2183518088024744,
|
||
|
|
"learning_rate": 1.3317231824406783e-05,
|
||
|
|
"loss": 0.3591,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12228928506374359,
|
||
|
|
"step": 3375,
|
||
|
|
"valid_targets_mean": 10087.4,
|
||
|
|
"valid_targets_min": 761
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.24384,
|
||
|
|
"grad_norm": 0.1802788340350456,
|
||
|
|
"learning_rate": 1.325412108178461e-05,
|
||
|
|
"loss": 0.3554,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11406947672367096,
|
||
|
|
"step": 3380,
|
||
|
|
"valid_targets_mean": 10006.9,
|
||
|
|
"valid_targets_min": 1848
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.24864,
|
||
|
|
"grad_norm": 0.20894077379192982,
|
||
|
|
"learning_rate": 1.3191086042917895e-05,
|
||
|
|
"loss": 0.3582,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11614423245191574,
|
||
|
|
"step": 3385,
|
||
|
|
"valid_targets_mean": 9981.8,
|
||
|
|
"valid_targets_min": 1502
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.25344,
|
||
|
|
"grad_norm": 0.19977532613637036,
|
||
|
|
"learning_rate": 1.3128127415199883e-05,
|
||
|
|
"loss": 0.3568,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1377061903476715,
|
||
|
|
"step": 3390,
|
||
|
|
"valid_targets_mean": 10633.7,
|
||
|
|
"valid_targets_min": 1012
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.25824,
|
||
|
|
"grad_norm": 0.1901378820426991,
|
||
|
|
"learning_rate": 1.3065245905166316e-05,
|
||
|
|
"loss": 0.3492,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12295252829790115,
|
||
|
|
"step": 3395,
|
||
|
|
"valid_targets_mean": 11947.4,
|
||
|
|
"valid_targets_min": 2325
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.26304,
|
||
|
|
"grad_norm": 0.18578663753606683,
|
||
|
|
"learning_rate": 1.30024422184875e-05,
|
||
|
|
"loss": 0.3569,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11808900535106659,
|
||
|
|
"step": 3400,
|
||
|
|
"valid_targets_mean": 11162.8,
|
||
|
|
"valid_targets_min": 1875
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.26784,
|
||
|
|
"grad_norm": 0.186713719935275,
|
||
|
|
"learning_rate": 1.2939717059960384e-05,
|
||
|
|
"loss": 0.3525,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11969725042581558,
|
||
|
|
"step": 3405,
|
||
|
|
"valid_targets_mean": 10443.7,
|
||
|
|
"valid_targets_min": 1996
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.27264,
|
||
|
|
"grad_norm": 0.17502503273066872,
|
||
|
|
"learning_rate": 1.287707113350068e-05,
|
||
|
|
"loss": 0.3585,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11786532402038574,
|
||
|
|
"step": 3410,
|
||
|
|
"valid_targets_mean": 11139.2,
|
||
|
|
"valid_targets_min": 1627
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.27744,
|
||
|
|
"grad_norm": 0.1998399471402172,
|
||
|
|
"learning_rate": 1.2814505142134921e-05,
|
||
|
|
"loss": 0.361,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10916991531848907,
|
||
|
|
"step": 3415,
|
||
|
|
"valid_targets_mean": 10066.5,
|
||
|
|
"valid_targets_min": 2648
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.28224,
|
||
|
|
"grad_norm": 0.18258313932232073,
|
||
|
|
"learning_rate": 1.2752019787992587e-05,
|
||
|
|
"loss": 0.3621,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12325777858495712,
|
||
|
|
"step": 3420,
|
||
|
|
"valid_targets_mean": 11230.4,
|
||
|
|
"valid_targets_min": 2024
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.28704,
|
||
|
|
"grad_norm": 0.19416187067441554,
|
||
|
|
"learning_rate": 1.268961577229824e-05,
|
||
|
|
"loss": 0.3598,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14313529431819916,
|
||
|
|
"step": 3425,
|
||
|
|
"valid_targets_mean": 12805.9,
|
||
|
|
"valid_targets_min": 2969
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.29184,
|
||
|
|
"grad_norm": 0.18140295108808063,
|
||
|
|
"learning_rate": 1.262729379536365e-05,
|
||
|
|
"loss": 0.3566,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11013881862163544,
|
||
|
|
"step": 3430,
|
||
|
|
"valid_targets_mean": 9937.2,
|
||
|
|
"valid_targets_min": 1936
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.29664,
|
||
|
|
"grad_norm": 0.199023784044211,
|
||
|
|
"learning_rate": 1.2565054556579917e-05,
|
||
|
|
"loss": 0.3584,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13415634632110596,
|
||
|
|
"step": 3435,
|
||
|
|
"valid_targets_mean": 10926.6,
|
||
|
|
"valid_targets_min": 3449
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.30144,
|
||
|
|
"grad_norm": 0.19049442802798405,
|
||
|
|
"learning_rate": 1.2502898754409637e-05,
|
||
|
|
"loss": 0.3575,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14396649599075317,
|
||
|
|
"step": 3440,
|
||
|
|
"valid_targets_mean": 12972.6,
|
||
|
|
"valid_targets_min": 811
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.30624,
|
||
|
|
"grad_norm": 0.1885705753589618,
|
||
|
|
"learning_rate": 1.2440827086379055e-05,
|
||
|
|
"loss": 0.3543,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12890590727329254,
|
||
|
|
"step": 3445,
|
||
|
|
"valid_targets_mean": 10910.4,
|
||
|
|
"valid_targets_min": 2691
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.31104,
|
||
|
|
"grad_norm": 0.19529808846683833,
|
||
|
|
"learning_rate": 1.2378840249070265e-05,
|
||
|
|
"loss": 0.3583,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09891938418149948,
|
||
|
|
"step": 3450,
|
||
|
|
"valid_targets_mean": 7235.0,
|
||
|
|
"valid_targets_min": 1740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.31584,
|
||
|
|
"grad_norm": 0.19195203994171842,
|
||
|
|
"learning_rate": 1.2316938938113356e-05,
|
||
|
|
"loss": 0.3494,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13893388211727142,
|
||
|
|
"step": 3455,
|
||
|
|
"valid_targets_mean": 10672.0,
|
||
|
|
"valid_targets_min": 1852
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.32064,
|
||
|
|
"grad_norm": 0.18938418133544757,
|
||
|
|
"learning_rate": 1.2255123848178619e-05,
|
||
|
|
"loss": 0.3637,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14761734008789062,
|
||
|
|
"step": 3460,
|
||
|
|
"valid_targets_mean": 12264.5,
|
||
|
|
"valid_targets_min": 1059
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.32544,
|
||
|
|
"grad_norm": 0.2245840467957242,
|
||
|
|
"learning_rate": 1.2193395672968765e-05,
|
||
|
|
"loss": 0.356,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1288203001022339,
|
||
|
|
"step": 3465,
|
||
|
|
"valid_targets_mean": 11682.1,
|
||
|
|
"valid_targets_min": 3245
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.33024,
|
||
|
|
"grad_norm": 0.24566943058202834,
|
||
|
|
"learning_rate": 1.2131755105211118e-05,
|
||
|
|
"loss": 0.3503,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12490352988243103,
|
||
|
|
"step": 3470,
|
||
|
|
"valid_targets_mean": 10748.3,
|
||
|
|
"valid_targets_min": 2691
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3350400000000002,
|
||
|
|
"grad_norm": 0.19570442808701657,
|
||
|
|
"learning_rate": 1.2070202836649855e-05,
|
||
|
|
"loss": 0.3556,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11817985028028488,
|
||
|
|
"step": 3475,
|
||
|
|
"valid_targets_mean": 10496.6,
|
||
|
|
"valid_targets_min": 1164
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.33984,
|
||
|
|
"grad_norm": 0.1938457143624076,
|
||
|
|
"learning_rate": 1.2008739558038247e-05,
|
||
|
|
"loss": 0.3576,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12001006305217743,
|
||
|
|
"step": 3480,
|
||
|
|
"valid_targets_mean": 10190.2,
|
||
|
|
"valid_targets_min": 1310
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.34464,
|
||
|
|
"grad_norm": 0.19830141758413053,
|
||
|
|
"learning_rate": 1.1947365959130895e-05,
|
||
|
|
"loss": 0.3484,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11415542662143707,
|
||
|
|
"step": 3485,
|
||
|
|
"valid_targets_mean": 9767.1,
|
||
|
|
"valid_targets_min": 1479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.34944,
|
||
|
|
"grad_norm": 0.18805418601396895,
|
||
|
|
"learning_rate": 1.1886082728675984e-05,
|
||
|
|
"loss": 0.3598,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10864933580160141,
|
||
|
|
"step": 3490,
|
||
|
|
"valid_targets_mean": 9316.0,
|
||
|
|
"valid_targets_min": 2287
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.35424,
|
||
|
|
"grad_norm": 0.19446719017345912,
|
||
|
|
"learning_rate": 1.1824890554407574e-05,
|
||
|
|
"loss": 0.3672,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10024982690811157,
|
||
|
|
"step": 3495,
|
||
|
|
"valid_targets_mean": 9009.9,
|
||
|
|
"valid_targets_min": 263
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3590400000000002,
|
||
|
|
"grad_norm": 0.18135088395469234,
|
||
|
|
"learning_rate": 1.1763790123037873e-05,
|
||
|
|
"loss": 0.362,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14061683416366577,
|
||
|
|
"step": 3500,
|
||
|
|
"valid_targets_mean": 12592.8,
|
||
|
|
"valid_targets_min": 3445
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.36384,
|
||
|
|
"grad_norm": 0.18546771956201968,
|
||
|
|
"learning_rate": 1.1702782120249539e-05,
|
||
|
|
"loss": 0.3538,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1133251041173935,
|
||
|
|
"step": 3505,
|
||
|
|
"valid_targets_mean": 10291.1,
|
||
|
|
"valid_targets_min": 1576
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.36864,
|
||
|
|
"grad_norm": 0.19807807518664403,
|
||
|
|
"learning_rate": 1.164186723068795e-05,
|
||
|
|
"loss": 0.361,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11753286421298981,
|
||
|
|
"step": 3510,
|
||
|
|
"valid_targets_mean": 10020.5,
|
||
|
|
"valid_targets_min": 1826
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.37344,
|
||
|
|
"grad_norm": 0.2278874445711356,
|
||
|
|
"learning_rate": 1.1581046137953575e-05,
|
||
|
|
"loss": 0.3502,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10156413912773132,
|
||
|
|
"step": 3515,
|
||
|
|
"valid_targets_mean": 9347.9,
|
||
|
|
"valid_targets_min": 2769
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.37824,
|
||
|
|
"grad_norm": 0.18710166735457182,
|
||
|
|
"learning_rate": 1.1520319524594256e-05,
|
||
|
|
"loss": 0.3563,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12491115182638168,
|
||
|
|
"step": 3520,
|
||
|
|
"valid_targets_mean": 10771.9,
|
||
|
|
"valid_targets_min": 2445
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.38304,
|
||
|
|
"grad_norm": 0.2024893783584319,
|
||
|
|
"learning_rate": 1.1459688072097568e-05,
|
||
|
|
"loss": 0.3569,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13399648666381836,
|
||
|
|
"step": 3525,
|
||
|
|
"valid_targets_mean": 9379.2,
|
||
|
|
"valid_targets_min": 2095
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.38784,
|
||
|
|
"grad_norm": 0.1861201118297581,
|
||
|
|
"learning_rate": 1.1399152460883176e-05,
|
||
|
|
"loss": 0.3568,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11231739073991776,
|
||
|
|
"step": 3530,
|
||
|
|
"valid_targets_mean": 9798.0,
|
||
|
|
"valid_targets_min": 2038
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.39264,
|
||
|
|
"grad_norm": 0.1966601165631479,
|
||
|
|
"learning_rate": 1.1338713370295189e-05,
|
||
|
|
"loss": 0.3622,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.117458775639534,
|
||
|
|
"step": 3535,
|
||
|
|
"valid_targets_mean": 10878.8,
|
||
|
|
"valid_targets_min": 2845
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.39744,
|
||
|
|
"grad_norm": 0.2085490830405768,
|
||
|
|
"learning_rate": 1.1278371478594538e-05,
|
||
|
|
"loss": 0.3551,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10531540215015411,
|
||
|
|
"step": 3540,
|
||
|
|
"valid_targets_mean": 9678.8,
|
||
|
|
"valid_targets_min": 2667
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.40224,
|
||
|
|
"grad_norm": 0.18946887709543617,
|
||
|
|
"learning_rate": 1.1218127462951367e-05,
|
||
|
|
"loss": 0.3546,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1102088987827301,
|
||
|
|
"step": 3545,
|
||
|
|
"valid_targets_mean": 10510.7,
|
||
|
|
"valid_targets_min": 1094
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.40704,
|
||
|
|
"grad_norm": 0.17892277003492862,
|
||
|
|
"learning_rate": 1.1157981999437444e-05,
|
||
|
|
"loss": 0.3499,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13594399392604828,
|
||
|
|
"step": 3550,
|
||
|
|
"valid_targets_mean": 12380.1,
|
||
|
|
"valid_targets_min": 2776
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4118399999999998,
|
||
|
|
"grad_norm": 0.217465251290386,
|
||
|
|
"learning_rate": 1.109793576301855e-05,
|
||
|
|
"loss": 0.3609,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13660013675689697,
|
||
|
|
"step": 3555,
|
||
|
|
"valid_targets_mean": 11982.4,
|
||
|
|
"valid_targets_min": 3267
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.41664,
|
||
|
|
"grad_norm": 0.20923061680624425,
|
||
|
|
"learning_rate": 1.1037989427546924e-05,
|
||
|
|
"loss": 0.3538,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10261556506156921,
|
||
|
|
"step": 3560,
|
||
|
|
"valid_targets_mean": 9321.8,
|
||
|
|
"valid_targets_min": 1740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.42144,
|
||
|
|
"grad_norm": 0.18390467258543086,
|
||
|
|
"learning_rate": 1.0978143665753692e-05,
|
||
|
|
"loss": 0.3572,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10945111513137817,
|
||
|
|
"step": 3565,
|
||
|
|
"valid_targets_mean": 9768.1,
|
||
|
|
"valid_targets_min": 424
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.42624,
|
||
|
|
"grad_norm": 0.1854201454238558,
|
||
|
|
"learning_rate": 1.0918399149241314e-05,
|
||
|
|
"loss": 0.3556,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11244992911815643,
|
||
|
|
"step": 3570,
|
||
|
|
"valid_targets_mean": 9936.3,
|
||
|
|
"valid_targets_min": 1494
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.43104,
|
||
|
|
"grad_norm": 0.19331857879904846,
|
||
|
|
"learning_rate": 1.0858756548476058e-05,
|
||
|
|
"loss": 0.3532,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11202079802751541,
|
||
|
|
"step": 3575,
|
||
|
|
"valid_targets_mean": 9803.0,
|
||
|
|
"valid_targets_min": 1987
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.43584,
|
||
|
|
"grad_norm": 0.19396064861205353,
|
||
|
|
"learning_rate": 1.0799216532780478e-05,
|
||
|
|
"loss": 0.3606,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12126053869724274,
|
||
|
|
"step": 3580,
|
||
|
|
"valid_targets_mean": 11055.3,
|
||
|
|
"valid_targets_min": 2296
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.44064,
|
||
|
|
"grad_norm": 0.17874507316377455,
|
||
|
|
"learning_rate": 1.0739779770325885e-05,
|
||
|
|
"loss": 0.3495,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12120386958122253,
|
||
|
|
"step": 3585,
|
||
|
|
"valid_targets_mean": 10554.2,
|
||
|
|
"valid_targets_min": 2168
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.44544,
|
||
|
|
"grad_norm": 0.19495375081499186,
|
||
|
|
"learning_rate": 1.0680446928124872e-05,
|
||
|
|
"loss": 0.362,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12293912470340729,
|
||
|
|
"step": 3590,
|
||
|
|
"valid_targets_mean": 10255.5,
|
||
|
|
"valid_targets_min": 1716
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.45024,
|
||
|
|
"grad_norm": 0.1910897590945172,
|
||
|
|
"learning_rate": 1.06212186720238e-05,
|
||
|
|
"loss": 0.357,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1188676506280899,
|
||
|
|
"step": 3595,
|
||
|
|
"valid_targets_mean": 9686.2,
|
||
|
|
"valid_targets_min": 3623
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.45504,
|
||
|
|
"grad_norm": 0.1811992412391965,
|
||
|
|
"learning_rate": 1.0562095666695352e-05,
|
||
|
|
"loss": 0.3511,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10783851146697998,
|
||
|
|
"step": 3600,
|
||
|
|
"valid_targets_mean": 9709.2,
|
||
|
|
"valid_targets_min": 1990
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.45984,
|
||
|
|
"grad_norm": 0.19304297830203423,
|
||
|
|
"learning_rate": 1.0503078575631052e-05,
|
||
|
|
"loss": 0.3423,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13060978055000305,
|
||
|
|
"step": 3605,
|
||
|
|
"valid_targets_mean": 12093.8,
|
||
|
|
"valid_targets_min": 2563
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.46464,
|
||
|
|
"grad_norm": 0.19848303325348635,
|
||
|
|
"learning_rate": 1.0444168061133846e-05,
|
||
|
|
"loss": 0.3243,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11622381210327148,
|
||
|
|
"step": 3610,
|
||
|
|
"valid_targets_mean": 11091.0,
|
||
|
|
"valid_targets_min": 1856
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.46944,
|
||
|
|
"grad_norm": 0.1962983514499632,
|
||
|
|
"learning_rate": 1.0385364784310636e-05,
|
||
|
|
"loss": 0.3369,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10630473494529724,
|
||
|
|
"step": 3615,
|
||
|
|
"valid_targets_mean": 9552.4,
|
||
|
|
"valid_targets_min": 1488
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.47424,
|
||
|
|
"grad_norm": 0.18394754371487584,
|
||
|
|
"learning_rate": 1.0326669405064904e-05,
|
||
|
|
"loss": 0.3406,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12049125134944916,
|
||
|
|
"step": 3620,
|
||
|
|
"valid_targets_mean": 11530.6,
|
||
|
|
"valid_targets_min": 1867
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.47904,
|
||
|
|
"grad_norm": 0.20134945522749817,
|
||
|
|
"learning_rate": 1.0268082582089263e-05,
|
||
|
|
"loss": 0.3391,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09410315752029419,
|
||
|
|
"step": 3625,
|
||
|
|
"valid_targets_mean": 9730.5,
|
||
|
|
"valid_targets_min": 1819
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.48384,
|
||
|
|
"grad_norm": 0.20430868467866287,
|
||
|
|
"learning_rate": 1.0209604972858081e-05,
|
||
|
|
"loss": 0.3446,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1092514842748642,
|
||
|
|
"step": 3630,
|
||
|
|
"valid_targets_mean": 10808.2,
|
||
|
|
"valid_targets_min": 932
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.48864,
|
||
|
|
"grad_norm": 0.1923622457140111,
|
||
|
|
"learning_rate": 1.0151237233620115e-05,
|
||
|
|
"loss": 0.3344,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10817442834377289,
|
||
|
|
"step": 3635,
|
||
|
|
"valid_targets_mean": 10063.6,
|
||
|
|
"valid_targets_min": 504
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.49344,
|
||
|
|
"grad_norm": 0.1919431133222278,
|
||
|
|
"learning_rate": 1.0092980019391132e-05,
|
||
|
|
"loss": 0.3453,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11262176185846329,
|
||
|
|
"step": 3640,
|
||
|
|
"valid_targets_mean": 11249.9,
|
||
|
|
"valid_targets_min": 2827
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.49824,
|
||
|
|
"grad_norm": 0.22049311818233236,
|
||
|
|
"learning_rate": 1.0034833983946561e-05,
|
||
|
|
"loss": 0.3279,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09713824093341827,
|
||
|
|
"step": 3645,
|
||
|
|
"valid_targets_mean": 9480.8,
|
||
|
|
"valid_targets_min": 2041
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.50304,
|
||
|
|
"grad_norm": 0.18906192877132377,
|
||
|
|
"learning_rate": 9.976799779814157e-06,
|
||
|
|
"loss": 0.3362,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10813942551612854,
|
||
|
|
"step": 3650,
|
||
|
|
"valid_targets_mean": 9799.3,
|
||
|
|
"valid_targets_min": 2045
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.50784,
|
||
|
|
"grad_norm": 0.20056904457870012,
|
||
|
|
"learning_rate": 9.918878058266687e-06,
|
||
|
|
"loss": 0.3413,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12527769804000854,
|
||
|
|
"step": 3655,
|
||
|
|
"valid_targets_mean": 11885.8,
|
||
|
|
"valid_targets_min": 2417
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.51264,
|
||
|
|
"grad_norm": 0.20011413770145697,
|
||
|
|
"learning_rate": 9.86106946931462e-06,
|
||
|
|
"loss": 0.342,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11193445324897766,
|
||
|
|
"step": 3660,
|
||
|
|
"valid_targets_mean": 9013.2,
|
||
|
|
"valid_targets_min": 1957
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.51744,
|
||
|
|
"grad_norm": 0.25272674369217807,
|
||
|
|
"learning_rate": 9.803374661698802e-06,
|
||
|
|
"loss": 0.3371,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1340043842792511,
|
||
|
|
"step": 3665,
|
||
|
|
"valid_targets_mean": 11369.7,
|
||
|
|
"valid_targets_min": 2172
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.52224,
|
||
|
|
"grad_norm": 0.2061921625077728,
|
||
|
|
"learning_rate": 9.745794282883215e-06,
|
||
|
|
"loss": 0.3417,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1308523714542389,
|
||
|
|
"step": 3670,
|
||
|
|
"valid_targets_mean": 11718.5,
|
||
|
|
"valid_targets_min": 2566
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.52704,
|
||
|
|
"grad_norm": 0.2140394863007355,
|
||
|
|
"learning_rate": 9.688328979047689e-06,
|
||
|
|
"loss": 0.3339,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1149681806564331,
|
||
|
|
"step": 3675,
|
||
|
|
"valid_targets_mean": 10297.6,
|
||
|
|
"valid_targets_min": 2433
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.53184,
|
||
|
|
"grad_norm": 0.20548285119646126,
|
||
|
|
"learning_rate": 9.630979395080667e-06,
|
||
|
|
"loss": 0.3388,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10696137696504593,
|
||
|
|
"step": 3680,
|
||
|
|
"valid_targets_mean": 10548.2,
|
||
|
|
"valid_targets_min": 2192
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5366400000000002,
|
||
|
|
"grad_norm": 0.20553549970341442,
|
||
|
|
"learning_rate": 9.573746174571947e-06,
|
||
|
|
"loss": 0.3372,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09759115427732468,
|
||
|
|
"step": 3685,
|
||
|
|
"valid_targets_mean": 9031.1,
|
||
|
|
"valid_targets_min": 394
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.54144,
|
||
|
|
"grad_norm": 0.2451074762204898,
|
||
|
|
"learning_rate": 9.516629959805468e-06,
|
||
|
|
"loss": 0.3333,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10014495253562927,
|
||
|
|
"step": 3690,
|
||
|
|
"valid_targets_mean": 9580.7,
|
||
|
|
"valid_targets_min": 1801
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.54624,
|
||
|
|
"grad_norm": 0.21626968302427205,
|
||
|
|
"learning_rate": 9.459631391752126e-06,
|
||
|
|
"loss": 0.337,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11678461730480194,
|
||
|
|
"step": 3695,
|
||
|
|
"valid_targets_mean": 10609.9,
|
||
|
|
"valid_targets_min": 1721
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.55104,
|
||
|
|
"grad_norm": 0.19392085602556414,
|
||
|
|
"learning_rate": 9.40275111006254e-06,
|
||
|
|
"loss": 0.3387,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08821696043014526,
|
||
|
|
"step": 3700,
|
||
|
|
"valid_targets_mean": 9932.8,
|
||
|
|
"valid_targets_min": 3422
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.55584,
|
||
|
|
"grad_norm": 0.201996804562783,
|
||
|
|
"learning_rate": 9.345989753059895e-06,
|
||
|
|
"loss": 0.3356,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1151978075504303,
|
||
|
|
"step": 3705,
|
||
|
|
"valid_targets_mean": 11600.8,
|
||
|
|
"valid_targets_min": 3168
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5606400000000002,
|
||
|
|
"grad_norm": 0.22336753266344042,
|
||
|
|
"learning_rate": 9.289347957732779e-06,
|
||
|
|
"loss": 0.3374,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12304936349391937,
|
||
|
|
"step": 3710,
|
||
|
|
"valid_targets_mean": 10241.5,
|
||
|
|
"valid_targets_min": 2089
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.56544,
|
||
|
|
"grad_norm": 0.19062223595038535,
|
||
|
|
"learning_rate": 9.232826359728034e-06,
|
||
|
|
"loss": 0.3378,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11184520274400711,
|
||
|
|
"step": 3715,
|
||
|
|
"valid_targets_mean": 10874.9,
|
||
|
|
"valid_targets_min": 1798
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.57024,
|
||
|
|
"grad_norm": 0.19077344761804907,
|
||
|
|
"learning_rate": 9.17642559334362e-06,
|
||
|
|
"loss": 0.336,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12285032868385315,
|
||
|
|
"step": 3720,
|
||
|
|
"valid_targets_mean": 10946.0,
|
||
|
|
"valid_targets_min": 1827
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.57504,
|
||
|
|
"grad_norm": 0.2083508079173029,
|
||
|
|
"learning_rate": 9.120146291521488e-06,
|
||
|
|
"loss": 0.3274,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10798792541027069,
|
||
|
|
"step": 3725,
|
||
|
|
"valid_targets_mean": 9431.4,
|
||
|
|
"valid_targets_min": 1744
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.57984,
|
||
|
|
"grad_norm": 0.19931430642605075,
|
||
|
|
"learning_rate": 9.063989085840506e-06,
|
||
|
|
"loss": 0.3345,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10994656383991241,
|
||
|
|
"step": 3730,
|
||
|
|
"valid_targets_mean": 10691.3,
|
||
|
|
"valid_targets_min": 3764
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5846400000000003,
|
||
|
|
"grad_norm": 0.20157589308788418,
|
||
|
|
"learning_rate": 9.007954606509346e-06,
|
||
|
|
"loss": 0.3318,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12041280418634415,
|
||
|
|
"step": 3735,
|
||
|
|
"valid_targets_mean": 11329.4,
|
||
|
|
"valid_targets_min": 317
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5894399999999997,
|
||
|
|
"grad_norm": 0.19739606946409297,
|
||
|
|
"learning_rate": 8.952043482359408e-06,
|
||
|
|
"loss": 0.3333,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11637447774410248,
|
||
|
|
"step": 3740,
|
||
|
|
"valid_targets_mean": 10785.2,
|
||
|
|
"valid_targets_min": 2678
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.59424,
|
||
|
|
"grad_norm": 0.19236658753285724,
|
||
|
|
"learning_rate": 8.896256340837779e-06,
|
||
|
|
"loss": 0.3329,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10516636818647385,
|
||
|
|
"step": 3745,
|
||
|
|
"valid_targets_mean": 10048.1,
|
||
|
|
"valid_targets_min": 452
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.59904,
|
||
|
|
"grad_norm": 0.22012325224639936,
|
||
|
|
"learning_rate": 8.840593808000182e-06,
|
||
|
|
"loss": 0.3366,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11621899902820587,
|
||
|
|
"step": 3750,
|
||
|
|
"valid_targets_mean": 10399.3,
|
||
|
|
"valid_targets_min": 2540
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.60384,
|
||
|
|
"grad_norm": 0.18658943719781973,
|
||
|
|
"learning_rate": 8.785056508503956e-06,
|
||
|
|
"loss": 0.3404,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10126075148582458,
|
||
|
|
"step": 3755,
|
||
|
|
"valid_targets_mean": 8977.8,
|
||
|
|
"valid_targets_min": 2025
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.60864,
|
||
|
|
"grad_norm": 0.20321229778516367,
|
||
|
|
"learning_rate": 8.729645065601045e-06,
|
||
|
|
"loss": 0.3321,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10659228265285492,
|
||
|
|
"step": 3760,
|
||
|
|
"valid_targets_mean": 9902.1,
|
||
|
|
"valid_targets_min": 1186
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6134399999999998,
|
||
|
|
"grad_norm": 0.20835216151670474,
|
||
|
|
"learning_rate": 8.674360101130994e-06,
|
||
|
|
"loss": 0.3303,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12322533875703812,
|
||
|
|
"step": 3765,
|
||
|
|
"valid_targets_mean": 10761.2,
|
||
|
|
"valid_targets_min": 2155
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.61824,
|
||
|
|
"grad_norm": 0.24853022651912218,
|
||
|
|
"learning_rate": 8.619202235514e-06,
|
||
|
|
"loss": 0.3324,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10587123781442642,
|
||
|
|
"step": 3770,
|
||
|
|
"valid_targets_mean": 10205.1,
|
||
|
|
"valid_targets_min": 1463
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.62304,
|
||
|
|
"grad_norm": 0.18590936223050844,
|
||
|
|
"learning_rate": 8.564172087743903e-06,
|
||
|
|
"loss": 0.3368,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09855873882770538,
|
||
|
|
"step": 3775,
|
||
|
|
"valid_targets_mean": 10165.1,
|
||
|
|
"valid_targets_min": 1423
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.62784,
|
||
|
|
"grad_norm": 0.19324732375974865,
|
||
|
|
"learning_rate": 8.50927027538128e-06,
|
||
|
|
"loss": 0.3385,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11301326006650925,
|
||
|
|
"step": 3780,
|
||
|
|
"valid_targets_mean": 11568.4,
|
||
|
|
"valid_targets_min": 3325
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.63264,
|
||
|
|
"grad_norm": 0.19183324907556853,
|
||
|
|
"learning_rate": 8.454497414546497e-06,
|
||
|
|
"loss": 0.3446,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10337097197771072,
|
||
|
|
"step": 3785,
|
||
|
|
"valid_targets_mean": 9855.3,
|
||
|
|
"valid_targets_min": 1252
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.63744,
|
||
|
|
"grad_norm": 0.21893269097934628,
|
||
|
|
"learning_rate": 8.39985411991279e-06,
|
||
|
|
"loss": 0.3404,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11065911501646042,
|
||
|
|
"step": 3790,
|
||
|
|
"valid_targets_mean": 8598.5,
|
||
|
|
"valid_targets_min": 2729
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.64224,
|
||
|
|
"grad_norm": 0.2037498536595881,
|
||
|
|
"learning_rate": 8.345341004699386e-06,
|
||
|
|
"loss": 0.3433,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10529591143131256,
|
||
|
|
"step": 3795,
|
||
|
|
"valid_targets_mean": 10991.3,
|
||
|
|
"valid_targets_min": 2372
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.64704,
|
||
|
|
"grad_norm": 0.20553055083765032,
|
||
|
|
"learning_rate": 8.290958680664591e-06,
|
||
|
|
"loss": 0.333,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11532579362392426,
|
||
|
|
"step": 3800,
|
||
|
|
"valid_targets_mean": 10666.8,
|
||
|
|
"valid_targets_min": 2320
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.65184,
|
||
|
|
"grad_norm": 0.19243076020293992,
|
||
|
|
"learning_rate": 8.236707758098965e-06,
|
||
|
|
"loss": 0.3421,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10770958662033081,
|
||
|
|
"step": 3805,
|
||
|
|
"valid_targets_mean": 10406.8,
|
||
|
|
"valid_targets_min": 2341
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.65664,
|
||
|
|
"grad_norm": 0.19327308471343702,
|
||
|
|
"learning_rate": 8.182588845818452e-06,
|
||
|
|
"loss": 0.3407,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11743837594985962,
|
||
|
|
"step": 3810,
|
||
|
|
"valid_targets_mean": 11392.2,
|
||
|
|
"valid_targets_min": 778
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.66144,
|
||
|
|
"grad_norm": 0.5322781547709468,
|
||
|
|
"learning_rate": 8.128602551157523e-06,
|
||
|
|
"loss": 0.3377,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1046619713306427,
|
||
|
|
"step": 3815,
|
||
|
|
"valid_targets_mean": 9404.5,
|
||
|
|
"valid_targets_min": 1286
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.66624,
|
||
|
|
"grad_norm": 0.18200763770766343,
|
||
|
|
"learning_rate": 8.074749479962407e-06,
|
||
|
|
"loss": 0.3407,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10478302091360092,
|
||
|
|
"step": 3820,
|
||
|
|
"valid_targets_mean": 10424.2,
|
||
|
|
"valid_targets_min": 2328
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.67104,
|
||
|
|
"grad_norm": 0.20154542379432241,
|
||
|
|
"learning_rate": 8.021030236584254e-06,
|
||
|
|
"loss": 0.3361,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10680904239416122,
|
||
|
|
"step": 3825,
|
||
|
|
"valid_targets_mean": 9686.7,
|
||
|
|
"valid_targets_min": 2481
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.67584,
|
||
|
|
"grad_norm": 0.20471388463535073,
|
||
|
|
"learning_rate": 7.967445423872384e-06,
|
||
|
|
"loss": 0.3486,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12352913618087769,
|
||
|
|
"step": 3830,
|
||
|
|
"valid_targets_mean": 10895.8,
|
||
|
|
"valid_targets_min": 2669
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.68064,
|
||
|
|
"grad_norm": 0.19496961604395602,
|
||
|
|
"learning_rate": 7.913995643167494e-06,
|
||
|
|
"loss": 0.3335,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10220825672149658,
|
||
|
|
"step": 3835,
|
||
|
|
"valid_targets_mean": 11739.0,
|
||
|
|
"valid_targets_min": 1646
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.68544,
|
||
|
|
"grad_norm": 0.18616220712690562,
|
||
|
|
"learning_rate": 7.860681494294917e-06,
|
||
|
|
"loss": 0.3331,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11336371302604675,
|
||
|
|
"step": 3840,
|
||
|
|
"valid_targets_mean": 12121.3,
|
||
|
|
"valid_targets_min": 2219
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.69024,
|
||
|
|
"grad_norm": 0.19672724322192428,
|
||
|
|
"learning_rate": 7.80750357555792e-06,
|
||
|
|
"loss": 0.3332,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10641404986381531,
|
||
|
|
"step": 3845,
|
||
|
|
"valid_targets_mean": 9148.8,
|
||
|
|
"valid_targets_min": 1182
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.69504,
|
||
|
|
"grad_norm": 0.18760061959670887,
|
||
|
|
"learning_rate": 7.75446248373094e-06,
|
||
|
|
"loss": 0.3345,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09877805411815643,
|
||
|
|
"step": 3850,
|
||
|
|
"valid_targets_mean": 10289.6,
|
||
|
|
"valid_targets_min": 1531
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.69984,
|
||
|
|
"grad_norm": 0.1989894615739904,
|
||
|
|
"learning_rate": 7.701558814052928e-06,
|
||
|
|
"loss": 0.3353,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11524476110935211,
|
||
|
|
"step": 3855,
|
||
|
|
"valid_targets_mean": 10849.2,
|
||
|
|
"valid_targets_min": 2253
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.70464,
|
||
|
|
"grad_norm": 0.21263096110802848,
|
||
|
|
"learning_rate": 7.648793160220637e-06,
|
||
|
|
"loss": 0.333,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0896211490035057,
|
||
|
|
"step": 3860,
|
||
|
|
"valid_targets_mean": 8943.8,
|
||
|
|
"valid_targets_min": 2080
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.70944,
|
||
|
|
"grad_norm": 0.20660700096766874,
|
||
|
|
"learning_rate": 7.596166114381991e-06,
|
||
|
|
"loss": 0.3375,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12305016815662384,
|
||
|
|
"step": 3865,
|
||
|
|
"valid_targets_mean": 11404.3,
|
||
|
|
"valid_targets_min": 2439
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.71424,
|
||
|
|
"grad_norm": 0.1887513060702262,
|
||
|
|
"learning_rate": 7.543678267129408e-06,
|
||
|
|
"loss": 0.3351,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10012684017419815,
|
||
|
|
"step": 3870,
|
||
|
|
"valid_targets_mean": 9752.9,
|
||
|
|
"valid_targets_min": 2479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.71904,
|
||
|
|
"grad_norm": 0.19859233526993486,
|
||
|
|
"learning_rate": 7.491330207493215e-06,
|
||
|
|
"loss": 0.3325,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10596565902233124,
|
||
|
|
"step": 3875,
|
||
|
|
"valid_targets_mean": 9964.5,
|
||
|
|
"valid_targets_min": 2429
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.72384,
|
||
|
|
"grad_norm": 0.1926164171143439,
|
||
|
|
"learning_rate": 7.4391225229349785e-06,
|
||
|
|
"loss": 0.3369,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11391852796077728,
|
||
|
|
"step": 3880,
|
||
|
|
"valid_targets_mean": 12301.7,
|
||
|
|
"valid_targets_min": 2739
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.72864,
|
||
|
|
"grad_norm": 0.2044202427462678,
|
||
|
|
"learning_rate": 7.387055799340977e-06,
|
||
|
|
"loss": 0.3385,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09524258971214294,
|
||
|
|
"step": 3885,
|
||
|
|
"valid_targets_mean": 9396.2,
|
||
|
|
"valid_targets_min": 2039
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.73344,
|
||
|
|
"grad_norm": 0.20832735693973597,
|
||
|
|
"learning_rate": 7.3351306210155645e-06,
|
||
|
|
"loss": 0.3352,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12448085099458694,
|
||
|
|
"step": 3890,
|
||
|
|
"valid_targets_mean": 11666.6,
|
||
|
|
"valid_targets_min": 2410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7382400000000002,
|
||
|
|
"grad_norm": 0.2012811682766443,
|
||
|
|
"learning_rate": 7.283347570674664e-06,
|
||
|
|
"loss": 0.3376,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09556588530540466,
|
||
|
|
"step": 3895,
|
||
|
|
"valid_targets_mean": 8674.2,
|
||
|
|
"valid_targets_min": 2091
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.74304,
|
||
|
|
"grad_norm": 0.1954028099618188,
|
||
|
|
"learning_rate": 7.231707229439191e-06,
|
||
|
|
"loss": 0.3365,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11363653838634491,
|
||
|
|
"step": 3900,
|
||
|
|
"valid_targets_mean": 10786.8,
|
||
|
|
"valid_targets_min": 1764
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.74784,
|
||
|
|
"grad_norm": 0.20984038963396634,
|
||
|
|
"learning_rate": 7.180210176828557e-06,
|
||
|
|
"loss": 0.3324,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11528477072715759,
|
||
|
|
"step": 3905,
|
||
|
|
"valid_targets_mean": 10695.7,
|
||
|
|
"valid_targets_min": 1713
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.75264,
|
||
|
|
"grad_norm": 0.19426620615010454,
|
||
|
|
"learning_rate": 7.1288569907541495e-06,
|
||
|
|
"loss": 0.3298,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09247671067714691,
|
||
|
|
"step": 3910,
|
||
|
|
"valid_targets_mean": 9820.4,
|
||
|
|
"valid_targets_min": 2085
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.75744,
|
||
|
|
"grad_norm": 0.20099406626199876,
|
||
|
|
"learning_rate": 7.0776482475128674e-06,
|
||
|
|
"loss": 0.3455,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12034967541694641,
|
||
|
|
"step": 3915,
|
||
|
|
"valid_targets_mean": 11045.9,
|
||
|
|
"valid_targets_min": 2748
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7622400000000003,
|
||
|
|
"grad_norm": 0.2012651470799037,
|
||
|
|
"learning_rate": 7.026584521780628e-06,
|
||
|
|
"loss": 0.3479,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11899619549512863,
|
||
|
|
"step": 3920,
|
||
|
|
"valid_targets_mean": 10818.9,
|
||
|
|
"valid_targets_min": 2197
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.76704,
|
||
|
|
"grad_norm": 0.2170502759681478,
|
||
|
|
"learning_rate": 6.9756663866059324e-06,
|
||
|
|
"loss": 0.3375,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10469336807727814,
|
||
|
|
"step": 3925,
|
||
|
|
"valid_targets_mean": 9436.8,
|
||
|
|
"valid_targets_min": 2091
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.77184,
|
||
|
|
"grad_norm": 0.19380358298158798,
|
||
|
|
"learning_rate": 6.924894413403434e-06,
|
||
|
|
"loss": 0.3261,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11549240350723267,
|
||
|
|
"step": 3930,
|
||
|
|
"valid_targets_mean": 10453.5,
|
||
|
|
"valid_targets_min": 993
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.77664,
|
||
|
|
"grad_norm": 0.19883634363318978,
|
||
|
|
"learning_rate": 6.874269171947516e-06,
|
||
|
|
"loss": 0.332,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09396840631961823,
|
||
|
|
"step": 3935,
|
||
|
|
"valid_targets_mean": 9193.5,
|
||
|
|
"valid_targets_min": 1774
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.78144,
|
||
|
|
"grad_norm": 0.19563421512476295,
|
||
|
|
"learning_rate": 6.8237912303659195e-06,
|
||
|
|
"loss": 0.3306,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1019580215215683,
|
||
|
|
"step": 3940,
|
||
|
|
"valid_targets_mean": 8852.4,
|
||
|
|
"valid_targets_min": 406
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7862400000000003,
|
||
|
|
"grad_norm": 0.1972328228862978,
|
||
|
|
"learning_rate": 6.773461155133334e-06,
|
||
|
|
"loss": 0.3387,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11583631485700607,
|
||
|
|
"step": 3945,
|
||
|
|
"valid_targets_mean": 10057.0,
|
||
|
|
"valid_targets_min": 1606
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7910399999999997,
|
||
|
|
"grad_norm": 0.1779667766611541,
|
||
|
|
"learning_rate": 6.723279511065088e-06,
|
||
|
|
"loss": 0.3358,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13784457743167877,
|
||
|
|
"step": 3950,
|
||
|
|
"valid_targets_mean": 12148.1,
|
||
|
|
"valid_targets_min": 1637
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.79584,
|
||
|
|
"grad_norm": 0.24094579223060217,
|
||
|
|
"learning_rate": 6.673246861310751e-06,
|
||
|
|
"loss": 0.3365,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09021230041980743,
|
||
|
|
"step": 3955,
|
||
|
|
"valid_targets_mean": 9060.4,
|
||
|
|
"valid_targets_min": 1710
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.80064,
|
||
|
|
"grad_norm": 0.20096836711477922,
|
||
|
|
"learning_rate": 6.623363767347874e-06,
|
||
|
|
"loss": 0.334,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11364022642374039,
|
||
|
|
"step": 3960,
|
||
|
|
"valid_targets_mean": 10833.1,
|
||
|
|
"valid_targets_min": 1812
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.80544,
|
||
|
|
"grad_norm": 0.19641419404223437,
|
||
|
|
"learning_rate": 6.5736307889756425e-06,
|
||
|
|
"loss": 0.3352,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12811903655529022,
|
||
|
|
"step": 3965,
|
||
|
|
"valid_targets_mean": 11935.5,
|
||
|
|
"valid_targets_min": 3243
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.81024,
|
||
|
|
"grad_norm": 0.2088422125497686,
|
||
|
|
"learning_rate": 6.5240484843086095e-06,
|
||
|
|
"loss": 0.3381,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11009708046913147,
|
||
|
|
"step": 3970,
|
||
|
|
"valid_targets_mean": 9540.8,
|
||
|
|
"valid_targets_min": 2491
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8150399999999998,
|
||
|
|
"grad_norm": 0.17629238607105105,
|
||
|
|
"learning_rate": 6.474617409770441e-06,
|
||
|
|
"loss": 0.3335,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08507069945335388,
|
||
|
|
"step": 3975,
|
||
|
|
"valid_targets_mean": 8515.1,
|
||
|
|
"valid_targets_min": 1979
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.81984,
|
||
|
|
"grad_norm": 0.20252130389575573,
|
||
|
|
"learning_rate": 6.425338120087665e-06,
|
||
|
|
"loss": 0.3345,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1134904995560646,
|
||
|
|
"step": 3980,
|
||
|
|
"valid_targets_mean": 10508.7,
|
||
|
|
"valid_targets_min": 893
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.82464,
|
||
|
|
"grad_norm": 0.1879269492686956,
|
||
|
|
"learning_rate": 6.3762111682834374e-06,
|
||
|
|
"loss": 0.3318,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09007836878299713,
|
||
|
|
"step": 3985,
|
||
|
|
"valid_targets_mean": 8626.4,
|
||
|
|
"valid_targets_min": 1175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.82944,
|
||
|
|
"grad_norm": 0.176619695453459,
|
||
|
|
"learning_rate": 6.327237105671362e-06,
|
||
|
|
"loss": 0.341,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12505346536636353,
|
||
|
|
"step": 3990,
|
||
|
|
"valid_targets_mean": 12297.1,
|
||
|
|
"valid_targets_min": 2315
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.83424,
|
||
|
|
"grad_norm": 0.18064417692761175,
|
||
|
|
"learning_rate": 6.278416481849274e-06,
|
||
|
|
"loss": 0.3388,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10311710834503174,
|
||
|
|
"step": 3995,
|
||
|
|
"valid_targets_mean": 10158.5,
|
||
|
|
"valid_targets_min": 2461
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.83904,
|
||
|
|
"grad_norm": 0.19352979258741507,
|
||
|
|
"learning_rate": 6.22974984469308e-06,
|
||
|
|
"loss": 0.3307,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10242927074432373,
|
||
|
|
"step": 4000,
|
||
|
|
"valid_targets_mean": 10619.7,
|
||
|
|
"valid_targets_min": 2218
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.84384,
|
||
|
|
"grad_norm": 0.20312962511126573,
|
||
|
|
"learning_rate": 6.181237740350625e-06,
|
||
|
|
"loss": 0.3347,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12367504835128784,
|
||
|
|
"step": 4005,
|
||
|
|
"valid_targets_mean": 10997.3,
|
||
|
|
"valid_targets_min": 2039
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.84864,
|
||
|
|
"grad_norm": 0.20281314060838945,
|
||
|
|
"learning_rate": 6.132880713235543e-06,
|
||
|
|
"loss": 0.3308,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10259454697370529,
|
||
|
|
"step": 4010,
|
||
|
|
"valid_targets_mean": 10200.3,
|
||
|
|
"valid_targets_min": 1817
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.85344,
|
||
|
|
"grad_norm": 0.19236705300579124,
|
||
|
|
"learning_rate": 6.084679306021162e-06,
|
||
|
|
"loss": 0.3341,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11262617260217667,
|
||
|
|
"step": 4015,
|
||
|
|
"valid_targets_mean": 10491.1,
|
||
|
|
"valid_targets_min": 2912
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.85824,
|
||
|
|
"grad_norm": 0.21216763050139026,
|
||
|
|
"learning_rate": 6.036634059634403e-06,
|
||
|
|
"loss": 0.3407,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12088140100240707,
|
||
|
|
"step": 4020,
|
||
|
|
"valid_targets_mean": 10524.1,
|
||
|
|
"valid_targets_min": 2892
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.86304,
|
||
|
|
"grad_norm": 0.18810111079172934,
|
||
|
|
"learning_rate": 5.988745513249723e-06,
|
||
|
|
"loss": 0.3415,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11481692641973495,
|
||
|
|
"step": 4025,
|
||
|
|
"valid_targets_mean": 11395.9,
|
||
|
|
"valid_targets_min": 2159
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.86784,
|
||
|
|
"grad_norm": 0.18646315134853542,
|
||
|
|
"learning_rate": 5.941014204283065e-06,
|
||
|
|
"loss": 0.3328,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12088949233293533,
|
||
|
|
"step": 4030,
|
||
|
|
"valid_targets_mean": 10613.7,
|
||
|
|
"valid_targets_min": 2751
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.87264,
|
||
|
|
"grad_norm": 0.24880825961937425,
|
||
|
|
"learning_rate": 5.893440668385797e-06,
|
||
|
|
"loss": 0.3421,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10480111837387085,
|
||
|
|
"step": 4035,
|
||
|
|
"valid_targets_mean": 9637.3,
|
||
|
|
"valid_targets_min": 1479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.87744,
|
||
|
|
"grad_norm": 0.21853378472081741,
|
||
|
|
"learning_rate": 5.8460254394387335e-06,
|
||
|
|
"loss": 0.3425,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10361306369304657,
|
||
|
|
"step": 4040,
|
||
|
|
"valid_targets_mean": 9368.8,
|
||
|
|
"valid_targets_min": 1418
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.88224,
|
||
|
|
"grad_norm": 0.1811642976364691,
|
||
|
|
"learning_rate": 5.798769049546136e-06,
|
||
|
|
"loss": 0.3341,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11255176365375519,
|
||
|
|
"step": 4045,
|
||
|
|
"valid_targets_mean": 12307.6,
|
||
|
|
"valid_targets_min": 3160
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.88704,
|
||
|
|
"grad_norm": 0.17835674257256226,
|
||
|
|
"learning_rate": 5.751672029029734e-06,
|
||
|
|
"loss": 0.3347,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11680912226438522,
|
||
|
|
"step": 4050,
|
||
|
|
"valid_targets_mean": 11949.1,
|
||
|
|
"valid_targets_min": 2171
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.89184,
|
||
|
|
"grad_norm": 0.2660592016540433,
|
||
|
|
"learning_rate": 5.704734906422775e-06,
|
||
|
|
"loss": 0.3386,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09463915973901749,
|
||
|
|
"step": 4055,
|
||
|
|
"valid_targets_mean": 9402.0,
|
||
|
|
"valid_targets_min": 2696
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.89664,
|
||
|
|
"grad_norm": 0.19566691507927225,
|
||
|
|
"learning_rate": 5.657958208464103e-06,
|
||
|
|
"loss": 0.3399,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12318737804889679,
|
||
|
|
"step": 4060,
|
||
|
|
"valid_targets_mean": 11424.6,
|
||
|
|
"valid_targets_min": 2863
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.90144,
|
||
|
|
"grad_norm": 0.1844478364760468,
|
||
|
|
"learning_rate": 5.611342460092244e-06,
|
||
|
|
"loss": 0.3359,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08694644272327423,
|
||
|
|
"step": 4065,
|
||
|
|
"valid_targets_mean": 9213.2,
|
||
|
|
"valid_targets_min": 1996
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.90624,
|
||
|
|
"grad_norm": 0.1873501610977791,
|
||
|
|
"learning_rate": 5.564888184439505e-06,
|
||
|
|
"loss": 0.3394,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1016213595867157,
|
||
|
|
"step": 4070,
|
||
|
|
"valid_targets_mean": 8913.0,
|
||
|
|
"valid_targets_min": 2279
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.91104,
|
||
|
|
"grad_norm": 0.1966574544973596,
|
||
|
|
"learning_rate": 5.5185959028261135e-06,
|
||
|
|
"loss": 0.3359,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10623191297054291,
|
||
|
|
"step": 4075,
|
||
|
|
"valid_targets_mean": 10693.3,
|
||
|
|
"valid_targets_min": 2865
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.91584,
|
||
|
|
"grad_norm": 0.1879431423815426,
|
||
|
|
"learning_rate": 5.47246613475436e-06,
|
||
|
|
"loss": 0.3356,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12403889000415802,
|
||
|
|
"step": 4080,
|
||
|
|
"valid_targets_mean": 11291.2,
|
||
|
|
"valid_targets_min": 1544
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.92064,
|
||
|
|
"grad_norm": 0.18364320485487998,
|
||
|
|
"learning_rate": 5.4264993979027735e-06,
|
||
|
|
"loss": 0.3292,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12664386630058289,
|
||
|
|
"step": 4085,
|
||
|
|
"valid_targets_mean": 12097.9,
|
||
|
|
"valid_targets_min": 2029
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.92544,
|
||
|
|
"grad_norm": 0.19447101711983544,
|
||
|
|
"learning_rate": 5.380696208120315e-06,
|
||
|
|
"loss": 0.332,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10284821689128876,
|
||
|
|
"step": 4090,
|
||
|
|
"valid_targets_mean": 10145.8,
|
||
|
|
"valid_targets_min": 1838
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.93024,
|
||
|
|
"grad_norm": 0.18444617156461748,
|
||
|
|
"learning_rate": 5.335057079420571e-06,
|
||
|
|
"loss": 0.3296,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10592512786388397,
|
||
|
|
"step": 4095,
|
||
|
|
"valid_targets_mean": 9922.3,
|
||
|
|
"valid_targets_min": 2912
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.93504,
|
||
|
|
"grad_norm": 0.19573933070889935,
|
||
|
|
"learning_rate": 5.289582523976015e-06,
|
||
|
|
"loss": 0.3353,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09200359880924225,
|
||
|
|
"step": 4100,
|
||
|
|
"valid_targets_mean": 8765.0,
|
||
|
|
"valid_targets_min": 3189
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9398400000000002,
|
||
|
|
"grad_norm": 0.21184991033974448,
|
||
|
|
"learning_rate": 5.244273052112241e-06,
|
||
|
|
"loss": 0.3304,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11215705424547195,
|
||
|
|
"step": 4105,
|
||
|
|
"valid_targets_mean": 9907.4,
|
||
|
|
"valid_targets_min": 1249
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.94464,
|
||
|
|
"grad_norm": 0.18565904600835953,
|
||
|
|
"learning_rate": 5.199129172302224e-06,
|
||
|
|
"loss": 0.3239,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10161211341619492,
|
||
|
|
"step": 4110,
|
||
|
|
"valid_targets_mean": 10105.3,
|
||
|
|
"valid_targets_min": 2284
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.94944,
|
||
|
|
"grad_norm": 0.1772815671503692,
|
||
|
|
"learning_rate": 5.154151391160638e-06,
|
||
|
|
"loss": 0.333,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08798927068710327,
|
||
|
|
"step": 4115,
|
||
|
|
"valid_targets_mean": 9611.5,
|
||
|
|
"valid_targets_min": 2646
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.95424,
|
||
|
|
"grad_norm": 0.19590311785568074,
|
||
|
|
"learning_rate": 5.109340213438156e-06,
|
||
|
|
"loss": 0.3329,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11088310182094574,
|
||
|
|
"step": 4120,
|
||
|
|
"valid_targets_mean": 10398.2,
|
||
|
|
"valid_targets_min": 2528
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.95904,
|
||
|
|
"grad_norm": 0.18151207861605087,
|
||
|
|
"learning_rate": 5.0646961420157995e-06,
|
||
|
|
"loss": 0.3419,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11617287993431091,
|
||
|
|
"step": 4125,
|
||
|
|
"valid_targets_mean": 11599.7,
|
||
|
|
"valid_targets_min": 2772
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9638400000000003,
|
||
|
|
"grad_norm": 0.19116201774572908,
|
||
|
|
"learning_rate": 5.020219677899276e-06,
|
||
|
|
"loss": 0.3328,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10209350287914276,
|
||
|
|
"step": 4130,
|
||
|
|
"valid_targets_mean": 10222.2,
|
||
|
|
"valid_targets_min": 2429
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9686399999999997,
|
||
|
|
"grad_norm": 0.2024256331856205,
|
||
|
|
"learning_rate": 4.975911320213365e-06,
|
||
|
|
"loss": 0.3299,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08789606392383575,
|
||
|
|
"step": 4135,
|
||
|
|
"valid_targets_mean": 8624.2,
|
||
|
|
"valid_targets_min": 1467
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.97344,
|
||
|
|
"grad_norm": 0.19195030700994084,
|
||
|
|
"learning_rate": 4.931771566196332e-06,
|
||
|
|
"loss": 0.3366,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10818715393543243,
|
||
|
|
"step": 4140,
|
||
|
|
"valid_targets_mean": 10638.1,
|
||
|
|
"valid_targets_min": 2718
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.97824,
|
||
|
|
"grad_norm": 0.18238669600596105,
|
||
|
|
"learning_rate": 4.887800911194327e-06,
|
||
|
|
"loss": 0.3373,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1121128499507904,
|
||
|
|
"step": 4145,
|
||
|
|
"valid_targets_mean": 10199.2,
|
||
|
|
"valid_targets_min": 1693
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.98304,
|
||
|
|
"grad_norm": 0.19665733186907425,
|
||
|
|
"learning_rate": 4.8439998486558246e-06,
|
||
|
|
"loss": 0.3322,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11327169835567474,
|
||
|
|
"step": 4150,
|
||
|
|
"valid_targets_mean": 10270.0,
|
||
|
|
"valid_targets_min": 2069
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9878400000000003,
|
||
|
|
"grad_norm": 0.20032239446085373,
|
||
|
|
"learning_rate": 4.800368870126111e-06,
|
||
|
|
"loss": 0.3392,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12412586808204651,
|
||
|
|
"step": 4155,
|
||
|
|
"valid_targets_mean": 9892.8,
|
||
|
|
"valid_targets_min": 1814
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9926399999999997,
|
||
|
|
"grad_norm": 0.20869919617855076,
|
||
|
|
"learning_rate": 4.756908465241736e-06,
|
||
|
|
"loss": 0.336,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12254258990287781,
|
||
|
|
"step": 4160,
|
||
|
|
"valid_targets_mean": 11407.7,
|
||
|
|
"valid_targets_min": 2934
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.99744,
|
||
|
|
"grad_norm": 0.21961616056297487,
|
||
|
|
"learning_rate": 4.713619121725039e-06,
|
||
|
|
"loss": 0.3418,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11416822671890259,
|
||
|
|
"step": 4165,
|
||
|
|
"valid_targets_mean": 9896.4,
|
||
|
|
"valid_targets_min": 2042
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.00192,
|
||
|
|
"grad_norm": 0.20465337196618452,
|
||
|
|
"learning_rate": 4.670501325378682e-06,
|
||
|
|
"loss": 0.3423,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09974993765354156,
|
||
|
|
"step": 4170,
|
||
|
|
"valid_targets_mean": 9093.5,
|
||
|
|
"valid_targets_min": 2111
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.00672,
|
||
|
|
"grad_norm": 0.18822425307587917,
|
||
|
|
"learning_rate": 4.627555560080173e-06,
|
||
|
|
"loss": 0.3463,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11911550164222717,
|
||
|
|
"step": 4175,
|
||
|
|
"valid_targets_mean": 10731.2,
|
||
|
|
"valid_targets_min": 2580
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.01152,
|
||
|
|
"grad_norm": 0.17412564908685052,
|
||
|
|
"learning_rate": 4.5847823077764565e-06,
|
||
|
|
"loss": 0.3483,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08448906242847443,
|
||
|
|
"step": 4180,
|
||
|
|
"valid_targets_mean": 8920.5,
|
||
|
|
"valid_targets_min": 1525
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.01632,
|
||
|
|
"grad_norm": 0.20289618242630778,
|
||
|
|
"learning_rate": 4.5421820484784936e-06,
|
||
|
|
"loss": 0.341,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0968443751335144,
|
||
|
|
"step": 4185,
|
||
|
|
"valid_targets_mean": 9025.8,
|
||
|
|
"valid_targets_min": 1431
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.02112,
|
||
|
|
"grad_norm": 0.18576117005555698,
|
||
|
|
"learning_rate": 4.499755260255881e-06,
|
||
|
|
"loss": 0.3408,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12763839960098267,
|
||
|
|
"step": 4190,
|
||
|
|
"valid_targets_mean": 10916.9,
|
||
|
|
"valid_targets_min": 1377
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.02592,
|
||
|
|
"grad_norm": 0.19396303860305614,
|
||
|
|
"learning_rate": 4.457502419231483e-06,
|
||
|
|
"loss": 0.3499,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11627334356307983,
|
||
|
|
"step": 4195,
|
||
|
|
"valid_targets_mean": 10039.9,
|
||
|
|
"valid_targets_min": 2095
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.03072,
|
||
|
|
"grad_norm": 0.17938812248691008,
|
||
|
|
"learning_rate": 4.415423999576091e-06,
|
||
|
|
"loss": 0.3549,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11132301390171051,
|
||
|
|
"step": 4200,
|
||
|
|
"valid_targets_mean": 9779.9,
|
||
|
|
"valid_targets_min": 1798
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.03552,
|
||
|
|
"grad_norm": 0.19198874415999007,
|
||
|
|
"learning_rate": 4.373520473503097e-06,
|
||
|
|
"loss": 0.3452,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12028753757476807,
|
||
|
|
"step": 4205,
|
||
|
|
"valid_targets_mean": 10074.8,
|
||
|
|
"valid_targets_min": 2026
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.04032,
|
||
|
|
"grad_norm": 0.19108626766525713,
|
||
|
|
"learning_rate": 4.33179231126321e-06,
|
||
|
|
"loss": 0.3394,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12190006673336029,
|
||
|
|
"step": 4210,
|
||
|
|
"valid_targets_mean": 10831.8,
|
||
|
|
"valid_targets_min": 1403
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.04512,
|
||
|
|
"grad_norm": 0.17387555307348446,
|
||
|
|
"learning_rate": 4.2902399811391575e-06,
|
||
|
|
"loss": 0.3443,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11498603224754333,
|
||
|
|
"step": 4215,
|
||
|
|
"valid_targets_mean": 11074.9,
|
||
|
|
"valid_targets_min": 3269
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.04992,
|
||
|
|
"grad_norm": 0.19072981281428716,
|
||
|
|
"learning_rate": 4.248863949440436e-06,
|
||
|
|
"loss": 0.3406,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1275830864906311,
|
||
|
|
"step": 4220,
|
||
|
|
"valid_targets_mean": 11321.0,
|
||
|
|
"valid_targets_min": 2614
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.05472,
|
||
|
|
"grad_norm": 0.17614221224241058,
|
||
|
|
"learning_rate": 4.207664680498094e-06,
|
||
|
|
"loss": 0.3486,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10699561983346939,
|
||
|
|
"step": 4225,
|
||
|
|
"valid_targets_mean": 9597.5,
|
||
|
|
"valid_targets_min": 1610
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.05952,
|
||
|
|
"grad_norm": 0.17640127607763398,
|
||
|
|
"learning_rate": 4.166642636659495e-06,
|
||
|
|
"loss": 0.3437,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12896181643009186,
|
||
|
|
"step": 4230,
|
||
|
|
"valid_targets_mean": 10785.8,
|
||
|
|
"valid_targets_min": 2068
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.06432,
|
||
|
|
"grad_norm": 0.2190402246225578,
|
||
|
|
"learning_rate": 4.125798278283155e-06,
|
||
|
|
"loss": 0.3467,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12219704687595367,
|
||
|
|
"step": 4235,
|
||
|
|
"valid_targets_mean": 11353.1,
|
||
|
|
"valid_targets_min": 1816
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.06912,
|
||
|
|
"grad_norm": 0.18092997420777618,
|
||
|
|
"learning_rate": 4.085132063733554e-06,
|
||
|
|
"loss": 0.3496,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11919304728507996,
|
||
|
|
"step": 4240,
|
||
|
|
"valid_targets_mean": 10082.1,
|
||
|
|
"valid_targets_min": 2704
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.07392,
|
||
|
|
"grad_norm": 0.1823612174354958,
|
||
|
|
"learning_rate": 4.0446444493760165e-06,
|
||
|
|
"loss": 0.3487,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12384763360023499,
|
||
|
|
"step": 4245,
|
||
|
|
"valid_targets_mean": 10839.3,
|
||
|
|
"valid_targets_min": 1936
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.07872,
|
||
|
|
"grad_norm": 0.20653639568032384,
|
||
|
|
"learning_rate": 4.004335889571556e-06,
|
||
|
|
"loss": 0.3514,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11276553571224213,
|
||
|
|
"step": 4250,
|
||
|
|
"valid_targets_mean": 10286.0,
|
||
|
|
"valid_targets_min": 2725
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.08352,
|
||
|
|
"grad_norm": 0.24765300903341972,
|
||
|
|
"learning_rate": 3.96420683667182e-06,
|
||
|
|
"loss": 0.3497,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13264083862304688,
|
||
|
|
"step": 4255,
|
||
|
|
"valid_targets_mean": 10544.3,
|
||
|
|
"valid_targets_min": 2485
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.08832,
|
||
|
|
"grad_norm": 0.1854786999415691,
|
||
|
|
"learning_rate": 3.924257741013968e-06,
|
||
|
|
"loss": 0.3472,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1216278001666069,
|
||
|
|
"step": 4260,
|
||
|
|
"valid_targets_mean": 11002.3,
|
||
|
|
"valid_targets_min": 2382
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.09312,
|
||
|
|
"grad_norm": 0.1746636739434648,
|
||
|
|
"learning_rate": 3.884489050915652e-06,
|
||
|
|
"loss": 0.3457,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10394848883152008,
|
||
|
|
"step": 4265,
|
||
|
|
"valid_targets_mean": 9912.5,
|
||
|
|
"valid_targets_min": 1695
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.09792,
|
||
|
|
"grad_norm": 0.19193550197987494,
|
||
|
|
"learning_rate": 3.844901212669962e-06,
|
||
|
|
"loss": 0.354,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10642480850219727,
|
||
|
|
"step": 4270,
|
||
|
|
"valid_targets_mean": 11185.8,
|
||
|
|
"valid_targets_min": 1752
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.10272,
|
||
|
|
"grad_norm": 0.17195296130981225,
|
||
|
|
"learning_rate": 3.8054946705404415e-06,
|
||
|
|
"loss": 0.3454,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15103162825107574,
|
||
|
|
"step": 4275,
|
||
|
|
"valid_targets_mean": 13565.3,
|
||
|
|
"valid_targets_min": 2685
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.10752,
|
||
|
|
"grad_norm": 0.18746276379642257,
|
||
|
|
"learning_rate": 3.7662698667560714e-06,
|
||
|
|
"loss": 0.3444,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1165471002459526,
|
||
|
|
"step": 4280,
|
||
|
|
"valid_targets_mean": 9233.0,
|
||
|
|
"valid_targets_min": 3457
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.11232,
|
||
|
|
"grad_norm": 0.19394818833899224,
|
||
|
|
"learning_rate": 3.7272272415063484e-06,
|
||
|
|
"loss": 0.3406,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10333932936191559,
|
||
|
|
"step": 4285,
|
||
|
|
"valid_targets_mean": 10364.3,
|
||
|
|
"valid_targets_min": 2115
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.11712,
|
||
|
|
"grad_norm": 0.19359491194847078,
|
||
|
|
"learning_rate": 3.6883672329363007e-06,
|
||
|
|
"loss": 0.343,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10657678544521332,
|
||
|
|
"step": 4290,
|
||
|
|
"valid_targets_mean": 8299.9,
|
||
|
|
"valid_targets_min": 1375
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.12192,
|
||
|
|
"grad_norm": 0.1932798960704116,
|
||
|
|
"learning_rate": 3.649690277141598e-06,
|
||
|
|
"loss": 0.3469,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11276364326477051,
|
||
|
|
"step": 4295,
|
||
|
|
"valid_targets_mean": 8782.7,
|
||
|
|
"valid_targets_min": 2273
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.12672,
|
||
|
|
"grad_norm": 0.19351234269362713,
|
||
|
|
"learning_rate": 3.6111968081636507e-06,
|
||
|
|
"loss": 0.3516,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10978444665670395,
|
||
|
|
"step": 4300,
|
||
|
|
"valid_targets_mean": 9584.4,
|
||
|
|
"valid_targets_min": 1770
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.13152,
|
||
|
|
"grad_norm": 0.18945958380808156,
|
||
|
|
"learning_rate": 3.572887257984743e-06,
|
||
|
|
"loss": 0.3409,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10315452516078949,
|
||
|
|
"step": 4305,
|
||
|
|
"valid_targets_mean": 9444.8,
|
||
|
|
"valid_targets_min": 2531
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.1363199999999996,
|
||
|
|
"grad_norm": 0.179621377461173,
|
||
|
|
"learning_rate": 3.5347620565231733e-06,
|
||
|
|
"loss": 0.3402,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10427289456129074,
|
||
|
|
"step": 4310,
|
||
|
|
"valid_targets_mean": 9924.7,
|
||
|
|
"valid_targets_min": 2863
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.14112,
|
||
|
|
"grad_norm": 0.19555836606663374,
|
||
|
|
"learning_rate": 3.496821631628442e-06,
|
||
|
|
"loss": 0.337,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10842613875865936,
|
||
|
|
"step": 4315,
|
||
|
|
"valid_targets_mean": 10095.8,
|
||
|
|
"valid_targets_min": 1958
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.14592,
|
||
|
|
"grad_norm": 0.19157171700505904,
|
||
|
|
"learning_rate": 3.459066409076448e-06,
|
||
|
|
"loss": 0.3552,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12181080877780914,
|
||
|
|
"step": 4320,
|
||
|
|
"valid_targets_mean": 10471.4,
|
||
|
|
"valid_targets_min": 3460
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.15072,
|
||
|
|
"grad_norm": 0.18309997648220724,
|
||
|
|
"learning_rate": 3.421496812564713e-06,
|
||
|
|
"loss": 0.3536,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11011368781328201,
|
||
|
|
"step": 4325,
|
||
|
|
"valid_targets_mean": 10075.7,
|
||
|
|
"valid_targets_min": 1911
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.15552,
|
||
|
|
"grad_norm": 0.1998393681086798,
|
||
|
|
"learning_rate": 3.384113263707609e-06,
|
||
|
|
"loss": 0.3387,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10688625276088715,
|
||
|
|
"step": 4330,
|
||
|
|
"valid_targets_mean": 10504.6,
|
||
|
|
"valid_targets_min": 1870
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.16032,
|
||
|
|
"grad_norm": 0.18024162398203716,
|
||
|
|
"learning_rate": 3.34691618203165e-06,
|
||
|
|
"loss": 0.345,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11504953354597092,
|
||
|
|
"step": 4335,
|
||
|
|
"valid_targets_mean": 11245.9,
|
||
|
|
"valid_targets_min": 1556
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.16512,
|
||
|
|
"grad_norm": 0.1937504451913351,
|
||
|
|
"learning_rate": 3.309905984970765e-06,
|
||
|
|
"loss": 0.3436,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12348227202892303,
|
||
|
|
"step": 4340,
|
||
|
|
"valid_targets_mean": 11246.7,
|
||
|
|
"valid_targets_min": 1248
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.16992,
|
||
|
|
"grad_norm": 0.17943992287185476,
|
||
|
|
"learning_rate": 3.2730830878616305e-06,
|
||
|
|
"loss": 0.3484,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1096753403544426,
|
||
|
|
"step": 4345,
|
||
|
|
"valid_targets_mean": 9908.5,
|
||
|
|
"valid_targets_min": 2863
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.17472,
|
||
|
|
"grad_norm": 0.20407149963617882,
|
||
|
|
"learning_rate": 3.2364479039389973e-06,
|
||
|
|
"loss": 0.3456,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1211603581905365,
|
||
|
|
"step": 4350,
|
||
|
|
"valid_targets_mean": 10384.3,
|
||
|
|
"valid_targets_min": 2893
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.17952,
|
||
|
|
"grad_norm": 0.1758432680334536,
|
||
|
|
"learning_rate": 3.2000008443310505e-06,
|
||
|
|
"loss": 0.3444,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12273865193128586,
|
||
|
|
"step": 4355,
|
||
|
|
"valid_targets_mean": 11879.3,
|
||
|
|
"valid_targets_min": 210
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.18432,
|
||
|
|
"grad_norm": 0.18831892864707575,
|
||
|
|
"learning_rate": 3.1637423180548232e-06,
|
||
|
|
"loss": 0.3482,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.07836240530014038,
|
||
|
|
"step": 4360,
|
||
|
|
"valid_targets_mean": 6800.3,
|
||
|
|
"valid_targets_min": 1637
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.18912,
|
||
|
|
"grad_norm": 0.1783636331291251,
|
||
|
|
"learning_rate": 3.127672732011564e-06,
|
||
|
|
"loss": 0.3531,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11790993809700012,
|
||
|
|
"step": 4365,
|
||
|
|
"valid_targets_mean": 11705.6,
|
||
|
|
"valid_targets_min": 3960
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.19392,
|
||
|
|
"grad_norm": 0.1805010427405642,
|
||
|
|
"learning_rate": 3.0917924909821993e-06,
|
||
|
|
"loss": 0.3473,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13108694553375244,
|
||
|
|
"step": 4370,
|
||
|
|
"valid_targets_mean": 11730.7,
|
||
|
|
"valid_targets_min": 1672
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.19872,
|
||
|
|
"grad_norm": 0.16950549661719758,
|
||
|
|
"learning_rate": 3.0561019976227867e-06,
|
||
|
|
"loss": 0.3353,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10931354761123657,
|
||
|
|
"step": 4375,
|
||
|
|
"valid_targets_mean": 11013.4,
|
||
|
|
"valid_targets_min": 2257
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.20352,
|
||
|
|
"grad_norm": 0.171592888138618,
|
||
|
|
"learning_rate": 3.020601652459989e-06,
|
||
|
|
"loss": 0.3489,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.095522940158844,
|
||
|
|
"step": 4380,
|
||
|
|
"valid_targets_mean": 9442.3,
|
||
|
|
"valid_targets_min": 2247
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.20832,
|
||
|
|
"grad_norm": 0.2140466499221073,
|
||
|
|
"learning_rate": 2.9852918538865847e-06,
|
||
|
|
"loss": 0.3401,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09676612168550491,
|
||
|
|
"step": 4385,
|
||
|
|
"valid_targets_mean": 8869.3,
|
||
|
|
"valid_targets_min": 2524
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.21312,
|
||
|
|
"grad_norm": 0.18684710732010837,
|
||
|
|
"learning_rate": 2.950172998156995e-06,
|
||
|
|
"loss": 0.3471,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11988385021686554,
|
||
|
|
"step": 4390,
|
||
|
|
"valid_targets_mean": 11438.3,
|
||
|
|
"valid_targets_min": 2188
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.21792,
|
||
|
|
"grad_norm": 0.18184742236751114,
|
||
|
|
"learning_rate": 2.91524547938284e-06,
|
||
|
|
"loss": 0.339,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09970283508300781,
|
||
|
|
"step": 4395,
|
||
|
|
"valid_targets_mean": 9403.1,
|
||
|
|
"valid_targets_min": 2697
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.22272,
|
||
|
|
"grad_norm": 0.2114210981320018,
|
||
|
|
"learning_rate": 2.880509689528519e-06,
|
||
|
|
"loss": 0.3379,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12056919932365417,
|
||
|
|
"step": 4400,
|
||
|
|
"valid_targets_mean": 10573.0,
|
||
|
|
"valid_targets_min": 1906
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.22752,
|
||
|
|
"grad_norm": 0.18883655838403018,
|
||
|
|
"learning_rate": 2.845966018406796e-06,
|
||
|
|
"loss": 0.3422,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10442760586738586,
|
||
|
|
"step": 4405,
|
||
|
|
"valid_targets_mean": 9225.2,
|
||
|
|
"valid_targets_min": 2768
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.23232,
|
||
|
|
"grad_norm": 0.17426172584380423,
|
||
|
|
"learning_rate": 2.8116148536744448e-06,
|
||
|
|
"loss": 0.346,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12738531827926636,
|
||
|
|
"step": 4410,
|
||
|
|
"valid_targets_mean": 11602.2,
|
||
|
|
"valid_targets_min": 2304
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.23712,
|
||
|
|
"grad_norm": 0.18410375145788294,
|
||
|
|
"learning_rate": 2.777456580827882e-06,
|
||
|
|
"loss": 0.3439,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12198962271213531,
|
||
|
|
"step": 4415,
|
||
|
|
"valid_targets_mean": 11050.6,
|
||
|
|
"valid_targets_min": 2639
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.24192,
|
||
|
|
"grad_norm": 0.18769558990904503,
|
||
|
|
"learning_rate": 2.7434915831988517e-06,
|
||
|
|
"loss": 0.3444,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11794613301753998,
|
||
|
|
"step": 4420,
|
||
|
|
"valid_targets_mean": 10889.8,
|
||
|
|
"valid_targets_min": 1751
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.24672,
|
||
|
|
"grad_norm": 0.2338405926765633,
|
||
|
|
"learning_rate": 2.7097202419501246e-06,
|
||
|
|
"loss": 0.3488,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11257736384868622,
|
||
|
|
"step": 4425,
|
||
|
|
"valid_targets_mean": 9287.1,
|
||
|
|
"valid_targets_min": 1177
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.25152,
|
||
|
|
"grad_norm": 0.17360265309040923,
|
||
|
|
"learning_rate": 2.6761429360712045e-06,
|
||
|
|
"loss": 0.343,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11555704474449158,
|
||
|
|
"step": 4430,
|
||
|
|
"valid_targets_mean": 10002.6,
|
||
|
|
"valid_targets_min": 2074
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.25632,
|
||
|
|
"grad_norm": 0.17568574678772966,
|
||
|
|
"learning_rate": 2.642760042374106e-06,
|
||
|
|
"loss": 0.353,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1140856221318245,
|
||
|
|
"step": 4435,
|
||
|
|
"valid_targets_mean": 10473.9,
|
||
|
|
"valid_targets_min": 993
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.26112,
|
||
|
|
"grad_norm": 0.17776634189725837,
|
||
|
|
"learning_rate": 2.6095719354890903e-06,
|
||
|
|
"loss": 0.3406,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11473214626312256,
|
||
|
|
"step": 4440,
|
||
|
|
"valid_targets_mean": 10775.9,
|
||
|
|
"valid_targets_min": 3243
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.26592,
|
||
|
|
"grad_norm": 0.1786523752405562,
|
||
|
|
"learning_rate": 2.5765789878604852e-06,
|
||
|
|
"loss": 0.3424,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11675288528203964,
|
||
|
|
"step": 4445,
|
||
|
|
"valid_targets_mean": 11546.2,
|
||
|
|
"valid_targets_min": 2326
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.27072,
|
||
|
|
"grad_norm": 0.18468001890763955,
|
||
|
|
"learning_rate": 2.543781569742496e-06,
|
||
|
|
"loss": 0.3322,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11585703492164612,
|
||
|
|
"step": 4450,
|
||
|
|
"valid_targets_mean": 10161.7,
|
||
|
|
"valid_targets_min": 2874
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.27552,
|
||
|
|
"grad_norm": 0.16761680409745108,
|
||
|
|
"learning_rate": 2.5111800491950523e-06,
|
||
|
|
"loss": 0.3453,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09747034311294556,
|
||
|
|
"step": 4455,
|
||
|
|
"valid_targets_mean": 10170.5,
|
||
|
|
"valid_targets_min": 2789
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.28032,
|
||
|
|
"grad_norm": 0.17728096040510224,
|
||
|
|
"learning_rate": 2.4787747920796723e-06,
|
||
|
|
"loss": 0.3427,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1128934770822525,
|
||
|
|
"step": 4460,
|
||
|
|
"valid_targets_mean": 9173.6,
|
||
|
|
"valid_targets_min": 2190
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.28512,
|
||
|
|
"grad_norm": 0.20554125763942052,
|
||
|
|
"learning_rate": 2.446566162055377e-06,
|
||
|
|
"loss": 0.3399,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10944950580596924,
|
||
|
|
"step": 4465,
|
||
|
|
"valid_targets_mean": 10507.8,
|
||
|
|
"valid_targets_min": 1729
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.28992,
|
||
|
|
"grad_norm": 0.1758382654598812,
|
||
|
|
"learning_rate": 2.414554520574579e-06,
|
||
|
|
"loss": 0.3433,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10320094227790833,
|
||
|
|
"step": 4470,
|
||
|
|
"valid_targets_mean": 10201.0,
|
||
|
|
"valid_targets_min": 3485
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.29472,
|
||
|
|
"grad_norm": 0.17816869342253958,
|
||
|
|
"learning_rate": 2.382740226879052e-06,
|
||
|
|
"loss": 0.3502,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10201025754213333,
|
||
|
|
"step": 4475,
|
||
|
|
"valid_targets_mean": 9838.3,
|
||
|
|
"valid_targets_min": 1592
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.29952,
|
||
|
|
"grad_norm": 0.18182023122642407,
|
||
|
|
"learning_rate": 2.3511236379958824e-06,
|
||
|
|
"loss": 0.356,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13841117918491364,
|
||
|
|
"step": 4480,
|
||
|
|
"valid_targets_mean": 11641.9,
|
||
|
|
"valid_targets_min": 1969
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.30432,
|
||
|
|
"grad_norm": 0.18550688293565978,
|
||
|
|
"learning_rate": 2.31970510873347e-06,
|
||
|
|
"loss": 0.3447,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1117786392569542,
|
||
|
|
"step": 4485,
|
||
|
|
"valid_targets_mean": 9739.4,
|
||
|
|
"valid_targets_min": 3570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.30912,
|
||
|
|
"grad_norm": 0.1928516912071047,
|
||
|
|
"learning_rate": 2.2884849916775485e-06,
|
||
|
|
"loss": 0.3405,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10244566947221756,
|
||
|
|
"step": 4490,
|
||
|
|
"valid_targets_mean": 10692.5,
|
||
|
|
"valid_targets_min": 3162
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.3139199999999995,
|
||
|
|
"grad_norm": 0.16862425883484716,
|
||
|
|
"learning_rate": 2.257463637187225e-06,
|
||
|
|
"loss": 0.3353,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11895417422056198,
|
||
|
|
"step": 4495,
|
||
|
|
"valid_targets_mean": 12142.8,
|
||
|
|
"valid_targets_min": 1920
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.31872,
|
||
|
|
"grad_norm": 0.18427884441538028,
|
||
|
|
"learning_rate": 2.2266413933910426e-06,
|
||
|
|
"loss": 0.3379,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09559936076402664,
|
||
|
|
"step": 4500,
|
||
|
|
"valid_targets_mean": 8994.3,
|
||
|
|
"valid_targets_min": 1878
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.32352,
|
||
|
|
"grad_norm": 0.18394017197686685,
|
||
|
|
"learning_rate": 2.196018606183088e-06,
|
||
|
|
"loss": 0.3418,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11445660144090652,
|
||
|
|
"step": 4505,
|
||
|
|
"valid_targets_mean": 10953.6,
|
||
|
|
"valid_targets_min": 4550
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.32832,
|
||
|
|
"grad_norm": 0.18355428048580366,
|
||
|
|
"learning_rate": 2.1655956192191007e-06,
|
||
|
|
"loss": 0.3462,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12067331373691559,
|
||
|
|
"step": 4510,
|
||
|
|
"valid_targets_mean": 10323.3,
|
||
|
|
"valid_targets_min": 1860
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.33312,
|
||
|
|
"grad_norm": 0.1761313915534513,
|
||
|
|
"learning_rate": 2.135372773912614e-06,
|
||
|
|
"loss": 0.341,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11238518357276917,
|
||
|
|
"step": 4515,
|
||
|
|
"valid_targets_mean": 11524.8,
|
||
|
|
"valid_targets_min": 1482
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.33792,
|
||
|
|
"grad_norm": 0.18470830979727362,
|
||
|
|
"learning_rate": 2.1053504094311285e-06,
|
||
|
|
"loss": 0.3376,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10500926524400711,
|
||
|
|
"step": 4520,
|
||
|
|
"valid_targets_mean": 9474.5,
|
||
|
|
"valid_targets_min": 1961
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.34272,
|
||
|
|
"grad_norm": 0.18970881351612737,
|
||
|
|
"learning_rate": 2.0755288626923022e-06,
|
||
|
|
"loss": 0.3437,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12795764207839966,
|
||
|
|
"step": 4525,
|
||
|
|
"valid_targets_mean": 11386.8,
|
||
|
|
"valid_targets_min": 2975
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.34752,
|
||
|
|
"grad_norm": 0.1770264238485995,
|
||
|
|
"learning_rate": 2.0459084683601736e-06,
|
||
|
|
"loss": 0.335,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11818675696849823,
|
||
|
|
"step": 4530,
|
||
|
|
"valid_targets_mean": 10696.7,
|
||
|
|
"valid_targets_min": 464
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.35232,
|
||
|
|
"grad_norm": 0.1739206041365824,
|
||
|
|
"learning_rate": 2.0164895588414037e-06,
|
||
|
|
"loss": 0.3453,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10573488473892212,
|
||
|
|
"step": 4535,
|
||
|
|
"valid_targets_mean": 10361.7,
|
||
|
|
"valid_targets_min": 1279
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.35712,
|
||
|
|
"grad_norm": 0.18709941006713426,
|
||
|
|
"learning_rate": 1.987272464281551e-06,
|
||
|
|
"loss": 0.3469,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11464584618806839,
|
||
|
|
"step": 4540,
|
||
|
|
"valid_targets_mean": 10028.0,
|
||
|
|
"valid_targets_min": 1617
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.36192,
|
||
|
|
"grad_norm": 0.1875055355309111,
|
||
|
|
"learning_rate": 1.95825751256135e-06,
|
||
|
|
"loss": 0.348,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1142655611038208,
|
||
|
|
"step": 4545,
|
||
|
|
"valid_targets_mean": 11262.9,
|
||
|
|
"valid_targets_min": 663
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.36672,
|
||
|
|
"grad_norm": 0.17711939064351098,
|
||
|
|
"learning_rate": 1.9294450292930576e-06,
|
||
|
|
"loss": 0.3373,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1202390268445015,
|
||
|
|
"step": 4550,
|
||
|
|
"valid_targets_mean": 10644.8,
|
||
|
|
"valid_targets_min": 1748
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.37152,
|
||
|
|
"grad_norm": 0.16979213105299187,
|
||
|
|
"learning_rate": 1.9008353378167755e-06,
|
||
|
|
"loss": 0.3391,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09257425367832184,
|
||
|
|
"step": 4555,
|
||
|
|
"valid_targets_mean": 9115.6,
|
||
|
|
"valid_targets_min": 1620
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.37632,
|
||
|
|
"grad_norm": 0.1782408703909472,
|
||
|
|
"learning_rate": 1.8724287591968294e-06,
|
||
|
|
"loss": 0.3465,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12132684886455536,
|
||
|
|
"step": 4560,
|
||
|
|
"valid_targets_mean": 9446.9,
|
||
|
|
"valid_targets_min": 1860
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.38112,
|
||
|
|
"grad_norm": 0.17449121941176965,
|
||
|
|
"learning_rate": 1.8442256122181735e-06,
|
||
|
|
"loss": 0.3399,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10900422930717468,
|
||
|
|
"step": 4565,
|
||
|
|
"valid_targets_mean": 10929.7,
|
||
|
|
"valid_targets_min": 2945
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.38592,
|
||
|
|
"grad_norm": 0.1737945902259658,
|
||
|
|
"learning_rate": 1.8162262133828013e-06,
|
||
|
|
"loss": 0.3427,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08825558423995972,
|
||
|
|
"step": 4570,
|
||
|
|
"valid_targets_mean": 8506.4,
|
||
|
|
"valid_targets_min": 1854
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.39072,
|
||
|
|
"grad_norm": 0.16676669408587796,
|
||
|
|
"learning_rate": 1.7884308769061974e-06,
|
||
|
|
"loss": 0.3375,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09462806582450867,
|
||
|
|
"step": 4575,
|
||
|
|
"valid_targets_mean": 9547.1,
|
||
|
|
"valid_targets_min": 2747
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.39552,
|
||
|
|
"grad_norm": 0.16189552847802557,
|
||
|
|
"learning_rate": 1.7608399147138278e-06,
|
||
|
|
"loss": 0.3396,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1256551742553711,
|
||
|
|
"step": 4580,
|
||
|
|
"valid_targets_mean": 11958.6,
|
||
|
|
"valid_targets_min": 2896
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.40032,
|
||
|
|
"grad_norm": 0.17469772786856827,
|
||
|
|
"learning_rate": 1.7334536364376075e-06,
|
||
|
|
"loss": 0.352,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10779605805873871,
|
||
|
|
"step": 4585,
|
||
|
|
"valid_targets_mean": 10866.5,
|
||
|
|
"valid_targets_min": 2471
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.40512,
|
||
|
|
"grad_norm": 0.1811986214517229,
|
||
|
|
"learning_rate": 1.7062723494124545e-06,
|
||
|
|
"loss": 0.3413,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10402344167232513,
|
||
|
|
"step": 4590,
|
||
|
|
"valid_targets_mean": 9006.3,
|
||
|
|
"valid_targets_min": 1845
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.40992,
|
||
|
|
"grad_norm": 0.16980388534020766,
|
||
|
|
"learning_rate": 1.6792963586728195e-06,
|
||
|
|
"loss": 0.3355,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0950300395488739,
|
||
|
|
"step": 4595,
|
||
|
|
"valid_targets_mean": 9814.0,
|
||
|
|
"valid_targets_min": 1630
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.41472,
|
||
|
|
"grad_norm": 0.16301035387109108,
|
||
|
|
"learning_rate": 1.6525259669492832e-06,
|
||
|
|
"loss": 0.342,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11410839855670929,
|
||
|
|
"step": 4600,
|
||
|
|
"valid_targets_mean": 10504.8,
|
||
|
|
"valid_targets_min": 2394
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.41952,
|
||
|
|
"grad_norm": 0.16745675250061537,
|
||
|
|
"learning_rate": 1.6259614746651364e-06,
|
||
|
|
"loss": 0.344,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09161694347858429,
|
||
|
|
"step": 4605,
|
||
|
|
"valid_targets_mean": 9358.8,
|
||
|
|
"valid_targets_min": 1206
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.42432,
|
||
|
|
"grad_norm": 0.17079049191662332,
|
||
|
|
"learning_rate": 1.5996031799330315e-06,
|
||
|
|
"loss": 0.3465,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11210022866725922,
|
||
|
|
"step": 4610,
|
||
|
|
"valid_targets_mean": 10135.9,
|
||
|
|
"valid_targets_min": 1119
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.42912,
|
||
|
|
"grad_norm": 0.18208336880669124,
|
||
|
|
"learning_rate": 1.5734513785516227e-06,
|
||
|
|
"loss": 0.3421,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13680927455425262,
|
||
|
|
"step": 4615,
|
||
|
|
"valid_targets_mean": 10683.7,
|
||
|
|
"valid_targets_min": 661
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.43392,
|
||
|
|
"grad_norm": 0.17456794356981462,
|
||
|
|
"learning_rate": 1.5475063640022425e-06,
|
||
|
|
"loss": 0.3461,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1233920305967331,
|
||
|
|
"step": 4620,
|
||
|
|
"valid_targets_mean": 11128.3,
|
||
|
|
"valid_targets_min": 2410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.43872,
|
||
|
|
"grad_norm": 0.16623507798468853,
|
||
|
|
"learning_rate": 1.5217684274456314e-06,
|
||
|
|
"loss": 0.3442,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1353224217891693,
|
||
|
|
"step": 4625,
|
||
|
|
"valid_targets_mean": 11848.9,
|
||
|
|
"valid_targets_min": 4052
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.44352,
|
||
|
|
"grad_norm": 0.16759933941207006,
|
||
|
|
"learning_rate": 1.496237857718641e-06,
|
||
|
|
"loss": 0.3396,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1048666313290596,
|
||
|
|
"step": 4630,
|
||
|
|
"valid_targets_mean": 9752.1,
|
||
|
|
"valid_targets_min": 1334
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.44832,
|
||
|
|
"grad_norm": 0.1731976789468967,
|
||
|
|
"learning_rate": 1.4709149413310076e-06,
|
||
|
|
"loss": 0.3418,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10677183419466019,
|
||
|
|
"step": 4635,
|
||
|
|
"valid_targets_mean": 10148.2,
|
||
|
|
"valid_targets_min": 1431
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.45312,
|
||
|
|
"grad_norm": 0.17632086604566713,
|
||
|
|
"learning_rate": 1.445799962462142e-06,
|
||
|
|
"loss": 0.3422,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1264108419418335,
|
||
|
|
"step": 4640,
|
||
|
|
"valid_targets_mean": 11090.9,
|
||
|
|
"valid_targets_min": 2144
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.45792,
|
||
|
|
"grad_norm": 0.2015434537773057,
|
||
|
|
"learning_rate": 1.420893202957927e-06,
|
||
|
|
"loss": 0.3393,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12211460620164871,
|
||
|
|
"step": 4645,
|
||
|
|
"valid_targets_mean": 10521.4,
|
||
|
|
"valid_targets_min": 1339
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.46272,
|
||
|
|
"grad_norm": 0.19766730718798986,
|
||
|
|
"learning_rate": 1.3961949423275622e-06,
|
||
|
|
"loss": 0.3438,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10595884174108505,
|
||
|
|
"step": 4650,
|
||
|
|
"valid_targets_mean": 10818.3,
|
||
|
|
"valid_targets_min": 1780
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.46752,
|
||
|
|
"grad_norm": 0.16173720804735958,
|
||
|
|
"learning_rate": 1.3717054577404331e-06,
|
||
|
|
"loss": 0.3405,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11681263148784637,
|
||
|
|
"step": 4655,
|
||
|
|
"valid_targets_mean": 10793.5,
|
||
|
|
"valid_targets_min": 1843
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.47232,
|
||
|
|
"grad_norm": 0.16770635422754943,
|
||
|
|
"learning_rate": 1.347425024022988e-06,
|
||
|
|
"loss": 0.3401,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11594116687774658,
|
||
|
|
"step": 4660,
|
||
|
|
"valid_targets_mean": 10751.7,
|
||
|
|
"valid_targets_min": 1547
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.47712,
|
||
|
|
"grad_norm": 0.1653276152734862,
|
||
|
|
"learning_rate": 1.3233539136556606e-06,
|
||
|
|
"loss": 0.3512,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11187846958637238,
|
||
|
|
"step": 4665,
|
||
|
|
"valid_targets_mean": 10446.6,
|
||
|
|
"valid_targets_min": 273
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.48192,
|
||
|
|
"grad_norm": 0.16370885544823352,
|
||
|
|
"learning_rate": 1.2994923967698125e-06,
|
||
|
|
"loss": 0.3421,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11684323102235794,
|
||
|
|
"step": 4670,
|
||
|
|
"valid_targets_mean": 12331.6,
|
||
|
|
"valid_targets_min": 1289
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.48672,
|
||
|
|
"grad_norm": 0.16981609122208036,
|
||
|
|
"learning_rate": 1.2758407411446982e-06,
|
||
|
|
"loss": 0.3383,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11591038107872009,
|
||
|
|
"step": 4675,
|
||
|
|
"valid_targets_mean": 11103.5,
|
||
|
|
"valid_targets_min": 2529
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.49152,
|
||
|
|
"grad_norm": 0.17284755314757133,
|
||
|
|
"learning_rate": 1.252399212204467e-06,
|
||
|
|
"loss": 0.347,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13455729186534882,
|
||
|
|
"step": 4680,
|
||
|
|
"valid_targets_mean": 11908.0,
|
||
|
|
"valid_targets_min": 2585
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.49632,
|
||
|
|
"grad_norm": 0.18136262437177075,
|
||
|
|
"learning_rate": 1.22916807301517e-06,
|
||
|
|
"loss": 0.3445,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12223325669765472,
|
||
|
|
"step": 4685,
|
||
|
|
"valid_targets_mean": 12023.1,
|
||
|
|
"valid_targets_min": 641
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.50112,
|
||
|
|
"grad_norm": 0.18252354714474517,
|
||
|
|
"learning_rate": 1.2061475842818337e-06,
|
||
|
|
"loss": 0.3485,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10622070729732513,
|
||
|
|
"step": 4690,
|
||
|
|
"valid_targets_mean": 8431.8,
|
||
|
|
"valid_targets_min": 1989
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.50592,
|
||
|
|
"grad_norm": 0.18226766259332344,
|
||
|
|
"learning_rate": 1.1833380043455e-06,
|
||
|
|
"loss": 0.3398,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11837948113679886,
|
||
|
|
"step": 4695,
|
||
|
|
"valid_targets_mean": 11590.0,
|
||
|
|
"valid_targets_min": 2921
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.51072,
|
||
|
|
"grad_norm": 0.17228749691379655,
|
||
|
|
"learning_rate": 1.1607395891803641e-06,
|
||
|
|
"loss": 0.3399,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1034156084060669,
|
||
|
|
"step": 4700,
|
||
|
|
"valid_targets_mean": 8960.0,
|
||
|
|
"valid_targets_min": 798
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.51552,
|
||
|
|
"grad_norm": 0.1674133892638723,
|
||
|
|
"learning_rate": 1.1383525923908678e-06,
|
||
|
|
"loss": 0.3418,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1247507631778717,
|
||
|
|
"step": 4705,
|
||
|
|
"valid_targets_mean": 12181.7,
|
||
|
|
"valid_targets_min": 2280
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.52032,
|
||
|
|
"grad_norm": 0.17529666189411555,
|
||
|
|
"learning_rate": 1.1161772652088775e-06,
|
||
|
|
"loss": 0.3546,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13458766043186188,
|
||
|
|
"step": 4710,
|
||
|
|
"valid_targets_mean": 12041.3,
|
||
|
|
"valid_targets_min": 1864
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.52512,
|
||
|
|
"grad_norm": 0.17111281681111487,
|
||
|
|
"learning_rate": 1.094213856490849e-06,
|
||
|
|
"loss": 0.3408,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10330748558044434,
|
||
|
|
"step": 4715,
|
||
|
|
"valid_targets_mean": 10015.7,
|
||
|
|
"valid_targets_min": 1452
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.52992,
|
||
|
|
"grad_norm": 0.2005388972740762,
|
||
|
|
"learning_rate": 1.0724626127150462e-06,
|
||
|
|
"loss": 0.3433,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08862671256065369,
|
||
|
|
"step": 4720,
|
||
|
|
"valid_targets_mean": 8354.8,
|
||
|
|
"valid_targets_min": 3056
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.53472,
|
||
|
|
"grad_norm": 0.17715957408643013,
|
||
|
|
"learning_rate": 1.0509237779787717e-06,
|
||
|
|
"loss": 0.3384,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11850816756486893,
|
||
|
|
"step": 4725,
|
||
|
|
"valid_targets_mean": 10371.2,
|
||
|
|
"valid_targets_min": 2187
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.5395199999999996,
|
||
|
|
"grad_norm": 0.17960490638825066,
|
||
|
|
"learning_rate": 1.029597593995626e-06,
|
||
|
|
"loss": 0.3427,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08903937041759491,
|
||
|
|
"step": 4730,
|
||
|
|
"valid_targets_mean": 8203.3,
|
||
|
|
"valid_targets_min": 2693
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.54432,
|
||
|
|
"grad_norm": 0.17401873267818355,
|
||
|
|
"learning_rate": 1.008484300092798e-06,
|
||
|
|
"loss": 0.3456,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10742789506912231,
|
||
|
|
"step": 4735,
|
||
|
|
"valid_targets_mean": 10027.1,
|
||
|
|
"valid_targets_min": 1839
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.54912,
|
||
|
|
"grad_norm": 0.17464251590964316,
|
||
|
|
"learning_rate": 9.87584133208368e-07,
|
||
|
|
"loss": 0.344,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11865642666816711,
|
||
|
|
"step": 4740,
|
||
|
|
"valid_targets_mean": 10533.8,
|
||
|
|
"valid_targets_min": 1782
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.55392,
|
||
|
|
"grad_norm": 0.17735860025401656,
|
||
|
|
"learning_rate": 9.66897327888665e-07,
|
||
|
|
"loss": 0.3431,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08903376758098602,
|
||
|
|
"step": 4745,
|
||
|
|
"valid_targets_mean": 7992.7,
|
||
|
|
"valid_targets_min": 1978
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.55872,
|
||
|
|
"grad_norm": 0.1746267689098251,
|
||
|
|
"learning_rate": 9.464241162856269e-07,
|
||
|
|
"loss": 0.3391,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11897937208414078,
|
||
|
|
"step": 4750,
|
||
|
|
"valid_targets_mean": 9702.0,
|
||
|
|
"valid_targets_min": 1915
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.5635200000000005,
|
||
|
|
"grad_norm": 0.198861765601018,
|
||
|
|
"learning_rate": 9.261647281541908e-07,
|
||
|
|
"loss": 0.3466,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09576556831598282,
|
||
|
|
"step": 4755,
|
||
|
|
"valid_targets_mean": 9174.2,
|
||
|
|
"valid_targets_min": 1621
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.56832,
|
||
|
|
"grad_norm": 0.18337917468789947,
|
||
|
|
"learning_rate": 9.061193908497201e-07,
|
||
|
|
"loss": 0.341,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10396026074886322,
|
||
|
|
"step": 4760,
|
||
|
|
"valid_targets_mean": 9406.8,
|
||
|
|
"valid_targets_min": 1230
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.57312,
|
||
|
|
"grad_norm": 0.1808441935327845,
|
||
|
|
"learning_rate": 8.862883293254598e-07,
|
||
|
|
"loss": 0.347,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12486404925584793,
|
||
|
|
"step": 4765,
|
||
|
|
"valid_targets_mean": 10789.8,
|
||
|
|
"valid_targets_min": 1360
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.57792,
|
||
|
|
"grad_norm": 0.16657100940314107,
|
||
|
|
"learning_rate": 8.666717661299917e-07,
|
||
|
|
"loss": 0.3419,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10040232539176941,
|
||
|
|
"step": 4770,
|
||
|
|
"valid_targets_mean": 9042.8,
|
||
|
|
"valid_targets_min": 1681
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.58272,
|
||
|
|
"grad_norm": 0.1689035714259787,
|
||
|
|
"learning_rate": 8.472699214047652e-07,
|
||
|
|
"loss": 0.3372,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10922395437955856,
|
||
|
|
"step": 4775,
|
||
|
|
"valid_targets_mean": 10596.6,
|
||
|
|
"valid_targets_min": 3097
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.58752,
|
||
|
|
"grad_norm": 0.1826428218690589,
|
||
|
|
"learning_rate": 8.280830128815953e-07,
|
||
|
|
"loss": 0.3526,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1335485279560089,
|
||
|
|
"step": 4780,
|
||
|
|
"valid_targets_mean": 10975.2,
|
||
|
|
"valid_targets_min": 2341
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.59232,
|
||
|
|
"grad_norm": 0.17116832164377946,
|
||
|
|
"learning_rate": 8.09111255880246e-07,
|
||
|
|
"loss": 0.3437,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11295709758996964,
|
||
|
|
"step": 4785,
|
||
|
|
"valid_targets_mean": 10168.0,
|
||
|
|
"valid_targets_min": 2184
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.59712,
|
||
|
|
"grad_norm": 0.17427051060924245,
|
||
|
|
"learning_rate": 7.903548633059955e-07,
|
||
|
|
"loss": 0.3511,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12648193538188934,
|
||
|
|
"step": 4790,
|
||
|
|
"valid_targets_mean": 12529.5,
|
||
|
|
"valid_targets_min": 2135
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.60192,
|
||
|
|
"grad_norm": 0.17846209627157872,
|
||
|
|
"learning_rate": 7.718140456472612e-07,
|
||
|
|
"loss": 0.3404,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12587639689445496,
|
||
|
|
"step": 4795,
|
||
|
|
"valid_targets_mean": 12501.2,
|
||
|
|
"valid_targets_min": 4045
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.60672,
|
||
|
|
"grad_norm": 0.20330776478305493,
|
||
|
|
"learning_rate": 7.534890109732229e-07,
|
||
|
|
"loss": 0.3438,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12395921349525452,
|
||
|
|
"step": 4800,
|
||
|
|
"valid_targets_mean": 10552.5,
|
||
|
|
"valid_targets_min": 2740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.61152,
|
||
|
|
"grad_norm": 0.2195640643749424,
|
||
|
|
"learning_rate": 7.353799649315085e-07,
|
||
|
|
"loss": 0.3173,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10619883239269257,
|
||
|
|
"step": 4805,
|
||
|
|
"valid_targets_mean": 9509.2,
|
||
|
|
"valid_targets_min": 2083
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.61632,
|
||
|
|
"grad_norm": 0.1969968854393267,
|
||
|
|
"learning_rate": 7.174871107458603e-07,
|
||
|
|
"loss": 0.3176,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.06790103018283844,
|
||
|
|
"step": 4810,
|
||
|
|
"valid_targets_mean": 7099.1,
|
||
|
|
"valid_targets_min": 2387
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.62112,
|
||
|
|
"grad_norm": 0.17970023876518615,
|
||
|
|
"learning_rate": 6.998106492138724e-07,
|
||
|
|
"loss": 0.3184,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08052967488765717,
|
||
|
|
"step": 4815,
|
||
|
|
"valid_targets_mean": 8661.2,
|
||
|
|
"valid_targets_min": 1760
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.62592,
|
||
|
|
"grad_norm": 0.2643732118194215,
|
||
|
|
"learning_rate": 6.823507787047346e-07,
|
||
|
|
"loss": 0.3278,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11436612904071808,
|
||
|
|
"step": 4820,
|
||
|
|
"valid_targets_mean": 10493.6,
|
||
|
|
"valid_targets_min": 2318
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.63072,
|
||
|
|
"grad_norm": 0.18775742452444713,
|
||
|
|
"learning_rate": 6.65107695156999e-07,
|
||
|
|
"loss": 0.3327,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0929485410451889,
|
||
|
|
"step": 4825,
|
||
|
|
"valid_targets_mean": 9043.9,
|
||
|
|
"valid_targets_min": 198
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.63552,
|
||
|
|
"grad_norm": 0.17230531701786073,
|
||
|
|
"learning_rate": 6.480815920763905e-07,
|
||
|
|
"loss": 0.329,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11901643872261047,
|
||
|
|
"step": 4830,
|
||
|
|
"valid_targets_mean": 11895.4,
|
||
|
|
"valid_targets_min": 2087
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.64032,
|
||
|
|
"grad_norm": 0.1867088568173494,
|
||
|
|
"learning_rate": 6.312726605336306e-07,
|
||
|
|
"loss": 0.3268,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10454428941011429,
|
||
|
|
"step": 4835,
|
||
|
|
"valid_targets_mean": 10254.8,
|
||
|
|
"valid_targets_min": 787
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.64512,
|
||
|
|
"grad_norm": 0.16712749995514461,
|
||
|
|
"learning_rate": 6.146810891622923e-07,
|
||
|
|
"loss": 0.3253,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11394192278385162,
|
||
|
|
"step": 4840,
|
||
|
|
"valid_targets_mean": 12640.7,
|
||
|
|
"valid_targets_min": 2962
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.64992,
|
||
|
|
"grad_norm": 0.19618712934379526,
|
||
|
|
"learning_rate": 5.983070641566802e-07,
|
||
|
|
"loss": 0.3262,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11127905547618866,
|
||
|
|
"step": 4845,
|
||
|
|
"valid_targets_mean": 10063.4,
|
||
|
|
"valid_targets_min": 2380
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.65472,
|
||
|
|
"grad_norm": 0.16924869165042286,
|
||
|
|
"learning_rate": 5.821507692697537e-07,
|
||
|
|
"loss": 0.3258,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11467337608337402,
|
||
|
|
"step": 4850,
|
||
|
|
"valid_targets_mean": 11078.0,
|
||
|
|
"valid_targets_min": 2362
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.65952,
|
||
|
|
"grad_norm": 0.1876290335332049,
|
||
|
|
"learning_rate": 5.662123858110513e-07,
|
||
|
|
"loss": 0.3264,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10478425770998001,
|
||
|
|
"step": 4855,
|
||
|
|
"valid_targets_mean": 9512.6,
|
||
|
|
"valid_targets_min": 1837
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.66432,
|
||
|
|
"grad_norm": 0.1766138454915962,
|
||
|
|
"learning_rate": 5.504920926446611e-07,
|
||
|
|
"loss": 0.3296,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10728782415390015,
|
||
|
|
"step": 4860,
|
||
|
|
"valid_targets_mean": 9908.5,
|
||
|
|
"valid_targets_min": 1208
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.66912,
|
||
|
|
"grad_norm": 0.2026546604505345,
|
||
|
|
"learning_rate": 5.349900661872132e-07,
|
||
|
|
"loss": 0.3193,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10532692074775696,
|
||
|
|
"step": 4865,
|
||
|
|
"valid_targets_mean": 9793.9,
|
||
|
|
"valid_targets_min": 2861
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.67392,
|
||
|
|
"grad_norm": 0.17334646571349133,
|
||
|
|
"learning_rate": 5.197064804058971e-07,
|
||
|
|
"loss": 0.3354,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13173431158065796,
|
||
|
|
"step": 4870,
|
||
|
|
"valid_targets_mean": 12489.1,
|
||
|
|
"valid_targets_min": 2679
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.67872,
|
||
|
|
"grad_norm": 0.18757067240596395,
|
||
|
|
"learning_rate": 5.046415068165234e-07,
|
||
|
|
"loss": 0.3272,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10075395554304123,
|
||
|
|
"step": 4875,
|
||
|
|
"valid_targets_mean": 9802.2,
|
||
|
|
"valid_targets_min": 485
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.68352,
|
||
|
|
"grad_norm": 0.177281674085286,
|
||
|
|
"learning_rate": 4.897953144815759e-07,
|
||
|
|
"loss": 0.3213,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10982857644557953,
|
||
|
|
"step": 4880,
|
||
|
|
"valid_targets_mean": 11444.7,
|
||
|
|
"valid_targets_min": 1799
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.68832,
|
||
|
|
"grad_norm": 0.1934502627098448,
|
||
|
|
"learning_rate": 4.7516807000833165e-07,
|
||
|
|
"loss": 0.3151,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12236753851175308,
|
||
|
|
"step": 4885,
|
||
|
|
"valid_targets_mean": 10206.5,
|
||
|
|
"valid_targets_min": 1389
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.69312,
|
||
|
|
"grad_norm": 0.18487124008481706,
|
||
|
|
"learning_rate": 4.6075993754698623e-07,
|
||
|
|
"loss": 0.324,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11507151275873184,
|
||
|
|
"step": 4890,
|
||
|
|
"valid_targets_mean": 10522.4,
|
||
|
|
"valid_targets_min": 582
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.69792,
|
||
|
|
"grad_norm": 0.17836285144449607,
|
||
|
|
"learning_rate": 4.4657107878881113e-07,
|
||
|
|
"loss": 0.3182,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10496693849563599,
|
||
|
|
"step": 4895,
|
||
|
|
"valid_targets_mean": 11460.4,
|
||
|
|
"valid_targets_min": 2740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.70272,
|
||
|
|
"grad_norm": 0.17167486779210717,
|
||
|
|
"learning_rate": 4.326016529643351e-07,
|
||
|
|
"loss": 0.3231,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11415878683328629,
|
||
|
|
"step": 4900,
|
||
|
|
"valid_targets_mean": 10817.6,
|
||
|
|
"valid_targets_min": 2485
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.70752,
|
||
|
|
"grad_norm": 0.1964776797540304,
|
||
|
|
"learning_rate": 4.1885181684157005e-07,
|
||
|
|
"loss": 0.3199,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10461924225091934,
|
||
|
|
"step": 4905,
|
||
|
|
"valid_targets_mean": 8784.8,
|
||
|
|
"valid_targets_min": 282
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.71232,
|
||
|
|
"grad_norm": 0.17362601714443157,
|
||
|
|
"learning_rate": 4.053217247242369e-07,
|
||
|
|
"loss": 0.3238,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10980254411697388,
|
||
|
|
"step": 4910,
|
||
|
|
"valid_targets_mean": 11650.2,
|
||
|
|
"valid_targets_min": 2463
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.7171199999999995,
|
||
|
|
"grad_norm": 0.17887761398779373,
|
||
|
|
"learning_rate": 3.9201152845004476e-07,
|
||
|
|
"loss": 0.3233,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12469217926263809,
|
||
|
|
"step": 4915,
|
||
|
|
"valid_targets_mean": 10989.0,
|
||
|
|
"valid_targets_min": 2740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.72192,
|
||
|
|
"grad_norm": 0.16738018734424778,
|
||
|
|
"learning_rate": 3.789213773889744e-07,
|
||
|
|
"loss": 0.3247,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13059523701667786,
|
||
|
|
"step": 4920,
|
||
|
|
"valid_targets_mean": 13034.5,
|
||
|
|
"valid_targets_min": 2818
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.72672,
|
||
|
|
"grad_norm": 0.20767029106159066,
|
||
|
|
"learning_rate": 3.6605141844162417e-07,
|
||
|
|
"loss": 0.3255,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08879049867391586,
|
||
|
|
"step": 4925,
|
||
|
|
"valid_targets_mean": 8753.4,
|
||
|
|
"valid_targets_min": 1851
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.73152,
|
||
|
|
"grad_norm": 0.1712477054459363,
|
||
|
|
"learning_rate": 3.5340179603753354e-07,
|
||
|
|
"loss": 0.3192,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10303573310375214,
|
||
|
|
"step": 4930,
|
||
|
|
"valid_targets_mean": 10877.0,
|
||
|
|
"valid_targets_min": 2178
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.73632,
|
||
|
|
"grad_norm": 0.16736585429671713,
|
||
|
|
"learning_rate": 3.4097265213358435e-07,
|
||
|
|
"loss": 0.3238,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09548261761665344,
|
||
|
|
"step": 4935,
|
||
|
|
"valid_targets_mean": 9767.3,
|
||
|
|
"valid_targets_min": 1734
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.7411200000000004,
|
||
|
|
"grad_norm": 0.17681548589034818,
|
||
|
|
"learning_rate": 3.287641262123975e-07,
|
||
|
|
"loss": 0.3283,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11017492413520813,
|
||
|
|
"step": 4940,
|
||
|
|
"valid_targets_mean": 11359.5,
|
||
|
|
"valid_targets_min": 2218
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.74592,
|
||
|
|
"grad_norm": 0.17919361359580346,
|
||
|
|
"learning_rate": 3.1677635528076566e-07,
|
||
|
|
"loss": 0.3215,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09803116321563721,
|
||
|
|
"step": 4945,
|
||
|
|
"valid_targets_mean": 10205.0,
|
||
|
|
"valid_targets_min": 1498
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.75072,
|
||
|
|
"grad_norm": 0.19010332335370647,
|
||
|
|
"learning_rate": 3.0500947386812973e-07,
|
||
|
|
"loss": 0.3186,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10954975336790085,
|
||
|
|
"step": 4950,
|
||
|
|
"valid_targets_mean": 10306.6,
|
||
|
|
"valid_targets_min": 3022
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.75552,
|
||
|
|
"grad_norm": 0.1828282852986503,
|
||
|
|
"learning_rate": 2.934636140250513e-07,
|
||
|
|
"loss": 0.3273,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12371116876602173,
|
||
|
|
"step": 4955,
|
||
|
|
"valid_targets_mean": 11596.5,
|
||
|
|
"valid_targets_min": 2277
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.76032,
|
||
|
|
"grad_norm": 0.17838738493258707,
|
||
|
|
"learning_rate": 2.821389053217383e-07,
|
||
|
|
"loss": 0.3355,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11714506149291992,
|
||
|
|
"step": 4960,
|
||
|
|
"valid_targets_mean": 10788.5,
|
||
|
|
"valid_targets_min": 2539
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.76512,
|
||
|
|
"grad_norm": 0.1870925307726946,
|
||
|
|
"learning_rate": 2.710354748465949e-07,
|
||
|
|
"loss": 0.3217,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0950358510017395,
|
||
|
|
"step": 4965,
|
||
|
|
"valid_targets_mean": 10334.6,
|
||
|
|
"valid_targets_min": 2003
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.76992,
|
||
|
|
"grad_norm": 0.1693238168125335,
|
||
|
|
"learning_rate": 2.6015344720479395e-07,
|
||
|
|
"loss": 0.3245,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11703760176897049,
|
||
|
|
"step": 4970,
|
||
|
|
"valid_targets_mean": 11903.3,
|
||
|
|
"valid_targets_min": 1790
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.77472,
|
||
|
|
"grad_norm": 0.20693809617550638,
|
||
|
|
"learning_rate": 2.49492944516867e-07,
|
||
|
|
"loss": 0.3144,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14453014731407166,
|
||
|
|
"step": 4975,
|
||
|
|
"valid_targets_mean": 13146.5,
|
||
|
|
"valid_targets_min": 1755
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.77952,
|
||
|
|
"grad_norm": 0.17184835822806188,
|
||
|
|
"learning_rate": 2.3905408641735183e-07,
|
||
|
|
"loss": 0.3195,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10461593419313431,
|
||
|
|
"step": 4980,
|
||
|
|
"valid_targets_mean": 10058.9,
|
||
|
|
"valid_targets_min": 1491
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.78432,
|
||
|
|
"grad_norm": 0.16319405021168346,
|
||
|
|
"learning_rate": 2.288369900534404e-07,
|
||
|
|
"loss": 0.326,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.0974310040473938,
|
||
|
|
"step": 4985,
|
||
|
|
"valid_targets_mean": 10337.2,
|
||
|
|
"valid_targets_min": 2286
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.7891200000000005,
|
||
|
|
"grad_norm": 0.18659021837351797,
|
||
|
|
"learning_rate": 2.1884177008366203e-07,
|
||
|
|
"loss": 0.325,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08041709661483765,
|
||
|
|
"step": 4990,
|
||
|
|
"valid_targets_mean": 7788.0,
|
||
|
|
"valid_targets_min": 2433
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.79392,
|
||
|
|
"grad_norm": 0.1798733045442116,
|
||
|
|
"learning_rate": 2.0906853867660004e-07,
|
||
|
|
"loss": 0.3208,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10868051648139954,
|
||
|
|
"step": 4995,
|
||
|
|
"valid_targets_mean": 10577.7,
|
||
|
|
"valid_targets_min": 2165
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.79872,
|
||
|
|
"grad_norm": 0.17603612006223165,
|
||
|
|
"learning_rate": 1.9951740550963493e-07,
|
||
|
|
"loss": 0.3249,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09991029649972916,
|
||
|
|
"step": 5000,
|
||
|
|
"valid_targets_mean": 9431.1,
|
||
|
|
"valid_targets_min": 1700
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.80352,
|
||
|
|
"grad_norm": 0.18066066507787595,
|
||
|
|
"learning_rate": 1.9018847776770987e-07,
|
||
|
|
"loss": 0.3236,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11398864537477493,
|
||
|
|
"step": 5005,
|
||
|
|
"valid_targets_mean": 9737.5,
|
||
|
|
"valid_targets_min": 2127
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.80832,
|
||
|
|
"grad_norm": 0.16844954184287267,
|
||
|
|
"learning_rate": 1.8108186014212935e-07,
|
||
|
|
"loss": 0.3264,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11392287909984589,
|
||
|
|
"step": 5010,
|
||
|
|
"valid_targets_mean": 12332.1,
|
||
|
|
"valid_targets_min": 2224
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.81312,
|
||
|
|
"grad_norm": 0.17563215197193438,
|
||
|
|
"learning_rate": 1.7219765482938465e-07,
|
||
|
|
"loss": 0.3265,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10459068417549133,
|
||
|
|
"step": 5015,
|
||
|
|
"valid_targets_mean": 8703.7,
|
||
|
|
"valid_targets_min": 1410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.81792,
|
||
|
|
"grad_norm": 0.1776646013731571,
|
||
|
|
"learning_rate": 1.635359615300036e-07,
|
||
|
|
"loss": 0.3164,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09462516754865646,
|
||
|
|
"step": 5020,
|
||
|
|
"valid_targets_mean": 8818.8,
|
||
|
|
"valid_targets_min": 1617
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.82272,
|
||
|
|
"grad_norm": 0.1791496937556602,
|
||
|
|
"learning_rate": 1.5509687744744262e-07,
|
||
|
|
"loss": 0.3228,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10216499119997025,
|
||
|
|
"step": 5025,
|
||
|
|
"valid_targets_mean": 9467.8,
|
||
|
|
"valid_targets_min": 2429
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.82752,
|
||
|
|
"grad_norm": 0.16653700292134213,
|
||
|
|
"learning_rate": 1.468804972869786e-07,
|
||
|
|
"loss": 0.3236,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12149834632873535,
|
||
|
|
"step": 5030,
|
||
|
|
"valid_targets_mean": 11981.9,
|
||
|
|
"valid_targets_min": 3300
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.83232,
|
||
|
|
"grad_norm": 0.17172327709459417,
|
||
|
|
"learning_rate": 1.3888691325465886e-07,
|
||
|
|
"loss": 0.3264,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08818644285202026,
|
||
|
|
"step": 5035,
|
||
|
|
"valid_targets_mean": 8631.6,
|
||
|
|
"valid_targets_min": 1206
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.83712,
|
||
|
|
"grad_norm": 0.18997648114127738,
|
||
|
|
"learning_rate": 1.3111621505626616e-07,
|
||
|
|
"loss": 0.3266,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09478943049907684,
|
||
|
|
"step": 5040,
|
||
|
|
"valid_targets_mean": 8689.6,
|
||
|
|
"valid_targets_min": 2156
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.84192,
|
||
|
|
"grad_norm": 0.18259426394205755,
|
||
|
|
"learning_rate": 1.235684898963041e-07,
|
||
|
|
"loss": 0.3211,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08341314643621445,
|
||
|
|
"step": 5045,
|
||
|
|
"valid_targets_mean": 8050.8,
|
||
|
|
"valid_targets_min": 2530
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.84672,
|
||
|
|
"grad_norm": 0.17177250528846147,
|
||
|
|
"learning_rate": 1.1624382247702681e-07,
|
||
|
|
"loss": 0.318,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11258942633867264,
|
||
|
|
"step": 5050,
|
||
|
|
"valid_targets_mean": 11464.4,
|
||
|
|
"valid_targets_min": 2338
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.85152,
|
||
|
|
"grad_norm": 0.1808670185893642,
|
||
|
|
"learning_rate": 1.0914229499748186e-07,
|
||
|
|
"loss": 0.324,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13074561953544617,
|
||
|
|
"step": 5055,
|
||
|
|
"valid_targets_mean": 12076.3,
|
||
|
|
"valid_targets_min": 3777
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.85632,
|
||
|
|
"grad_norm": 0.1819844593461489,
|
||
|
|
"learning_rate": 1.0226398715259322e-07,
|
||
|
|
"loss": 0.3235,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08755750954151154,
|
||
|
|
"step": 5060,
|
||
|
|
"valid_targets_mean": 8355.9,
|
||
|
|
"valid_targets_min": 2486
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.86112,
|
||
|
|
"grad_norm": 0.177684711321361,
|
||
|
|
"learning_rate": 9.560897613226205e-08,
|
||
|
|
"loss": 0.3347,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11981816589832306,
|
||
|
|
"step": 5065,
|
||
|
|
"valid_targets_mean": 10488.2,
|
||
|
|
"valid_targets_min": 301
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.86592,
|
||
|
|
"grad_norm": 0.18615913929358685,
|
||
|
|
"learning_rate": 8.917733662050287e-08,
|
||
|
|
"loss": 0.3258,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1071147471666336,
|
||
|
|
"step": 5070,
|
||
|
|
"valid_targets_mean": 10314.9,
|
||
|
|
"valid_targets_min": 2260
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.87072,
|
||
|
|
"grad_norm": 0.18230202290942948,
|
||
|
|
"learning_rate": 8.296914079461094e-08,
|
||
|
|
"loss": 0.3278,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10978755354881287,
|
||
|
|
"step": 5075,
|
||
|
|
"valid_targets_mean": 10930.6,
|
||
|
|
"valid_targets_min": 2365
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.87552,
|
||
|
|
"grad_norm": 0.17407475640883,
|
||
|
|
"learning_rate": 7.698445832433843e-08,
|
||
|
|
"loss": 0.3307,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11135123670101166,
|
||
|
|
"step": 5080,
|
||
|
|
"valid_targets_mean": 10441.7,
|
||
|
|
"valid_targets_min": 2090
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.88032,
|
||
|
|
"grad_norm": 0.17657507510506681,
|
||
|
|
"learning_rate": 7.122335637112621e-08,
|
||
|
|
"loss": 0.324,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10902173817157745,
|
||
|
|
"step": 5085,
|
||
|
|
"valid_targets_mean": 10547.8,
|
||
|
|
"valid_targets_min": 851
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.88512,
|
||
|
|
"grad_norm": 0.17631444132818108,
|
||
|
|
"learning_rate": 6.568589958734218e-08,
|
||
|
|
"loss": 0.3285,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10108692944049835,
|
||
|
|
"step": 5090,
|
||
|
|
"valid_targets_mean": 9943.4,
|
||
|
|
"valid_targets_min": 824
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.88992,
|
||
|
|
"grad_norm": 0.18617727239056273,
|
||
|
|
"learning_rate": 6.037215011556185e-08,
|
||
|
|
"loss": 0.3235,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14152035117149353,
|
||
|
|
"step": 5095,
|
||
|
|
"valid_targets_mean": 12125.5,
|
||
|
|
"valid_targets_min": 1758
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.8947199999999995,
|
||
|
|
"grad_norm": 0.17410976874888923,
|
||
|
|
"learning_rate": 5.5282167587862314e-08,
|
||
|
|
"loss": 0.3298,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10145286470651627,
|
||
|
|
"step": 5100,
|
||
|
|
"valid_targets_mean": 9976.4,
|
||
|
|
"valid_targets_min": 2352
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.89952,
|
||
|
|
"grad_norm": 0.2115545191279235,
|
||
|
|
"learning_rate": 5.041600912516264e-08,
|
||
|
|
"loss": 0.328,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12674742937088013,
|
||
|
|
"step": 5105,
|
||
|
|
"valid_targets_mean": 11855.3,
|
||
|
|
"valid_targets_min": 934
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.90432,
|
||
|
|
"grad_norm": 0.1786956533463368,
|
||
|
|
"learning_rate": 4.5773729336577865e-08,
|
||
|
|
"loss": 0.3279,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11230462044477463,
|
||
|
|
"step": 5110,
|
||
|
|
"valid_targets_mean": 10612.1,
|
||
|
|
"valid_targets_min": 1741
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.90912,
|
||
|
|
"grad_norm": 0.16647888826427865,
|
||
|
|
"learning_rate": 4.1355380318803816e-08,
|
||
|
|
"loss": 0.3247,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10658138990402222,
|
||
|
|
"step": 5115,
|
||
|
|
"valid_targets_mean": 12273.5,
|
||
|
|
"valid_targets_min": 2656
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.91392,
|
||
|
|
"grad_norm": 0.17235372520662695,
|
||
|
|
"learning_rate": 3.7161011655535425e-08,
|
||
|
|
"loss": 0.3217,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1133071631193161,
|
||
|
|
"step": 5120,
|
||
|
|
"valid_targets_mean": 11972.8,
|
||
|
|
"valid_targets_min": 1868
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.91872,
|
||
|
|
"grad_norm": 0.17876628581017168,
|
||
|
|
"learning_rate": 3.319067041691604e-08,
|
||
|
|
"loss": 0.3208,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10460739582777023,
|
||
|
|
"step": 5125,
|
||
|
|
"valid_targets_mean": 10743.4,
|
||
|
|
"valid_targets_min": 2740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.92352,
|
||
|
|
"grad_norm": 0.18160671877566145,
|
||
|
|
"learning_rate": 2.9444401158995606e-08,
|
||
|
|
"loss": 0.3265,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1146806925535202,
|
||
|
|
"step": 5130,
|
||
|
|
"valid_targets_mean": 11135.2,
|
||
|
|
"valid_targets_min": 2450
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.92832,
|
||
|
|
"grad_norm": 0.18394946348768662,
|
||
|
|
"learning_rate": 2.5922245923244436e-08,
|
||
|
|
"loss": 0.3152,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11141496896743774,
|
||
|
|
"step": 5135,
|
||
|
|
"valid_targets_mean": 10979.2,
|
||
|
|
"valid_targets_min": 3486
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.93312,
|
||
|
|
"grad_norm": 0.17972422728254375,
|
||
|
|
"learning_rate": 2.2624244236071348e-08,
|
||
|
|
"loss": 0.3182,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09954261779785156,
|
||
|
|
"step": 5140,
|
||
|
|
"valid_targets_mean": 10688.3,
|
||
|
|
"valid_targets_min": 1839
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.93792,
|
||
|
|
"grad_norm": 0.16849691178434315,
|
||
|
|
"learning_rate": 1.9550433108384005e-08,
|
||
|
|
"loss": 0.3263,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10053584724664688,
|
||
|
|
"step": 5145,
|
||
|
|
"valid_targets_mean": 10856.2,
|
||
|
|
"valid_targets_min": 1150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.94272,
|
||
|
|
"grad_norm": 0.1698003457201187,
|
||
|
|
"learning_rate": 1.6700847035180378e-08,
|
||
|
|
"loss": 0.3197,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09409875422716141,
|
||
|
|
"step": 5150,
|
||
|
|
"valid_targets_mean": 9585.8,
|
||
|
|
"valid_targets_min": 1851
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.94752,
|
||
|
|
"grad_norm": 0.1863998752880038,
|
||
|
|
"learning_rate": 1.4075517995146837e-08,
|
||
|
|
"loss": 0.3161,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13018685579299927,
|
||
|
|
"step": 5155,
|
||
|
|
"valid_targets_mean": 11728.6,
|
||
|
|
"valid_targets_min": 1242
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.95232,
|
||
|
|
"grad_norm": 0.23040330350119576,
|
||
|
|
"learning_rate": 1.167447545031175e-08,
|
||
|
|
"loss": 0.3255,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12115727365016937,
|
||
|
|
"step": 5160,
|
||
|
|
"valid_targets_mean": 13134.6,
|
||
|
|
"valid_targets_min": 2568
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.95712,
|
||
|
|
"grad_norm": 0.17389964545892517,
|
||
|
|
"learning_rate": 9.497746345705772e-09,
|
||
|
|
"loss": 0.3246,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09132646024227142,
|
||
|
|
"step": 5165,
|
||
|
|
"valid_targets_mean": 8934.7,
|
||
|
|
"valid_targets_min": 1490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.96192,
|
||
|
|
"grad_norm": 0.17641121909731672,
|
||
|
|
"learning_rate": 7.545355109073172e-09,
|
||
|
|
"loss": 0.3308,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.08680982142686844,
|
||
|
|
"step": 5170,
|
||
|
|
"valid_targets_mean": 9088.2,
|
||
|
|
"valid_targets_min": 2406
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.9667200000000005,
|
||
|
|
"grad_norm": 0.18141331963644278,
|
||
|
|
"learning_rate": 5.817323650578743e-09,
|
||
|
|
"loss": 0.3175,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10711181163787842,
|
||
|
|
"step": 5175,
|
||
|
|
"valid_targets_mean": 10195.1,
|
||
|
|
"valid_targets_min": 3175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.97152,
|
||
|
|
"grad_norm": 0.1711797674805141,
|
||
|
|
"learning_rate": 4.313671362576877e-09,
|
||
|
|
"loss": 0.3258,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11465975642204285,
|
||
|
|
"step": 5180,
|
||
|
|
"valid_targets_mean": 10932.3,
|
||
|
|
"valid_targets_min": 1807
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.97632,
|
||
|
|
"grad_norm": 0.1710617977047733,
|
||
|
|
"learning_rate": 3.0344151193917316e-09,
|
||
|
|
"loss": 0.3295,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09889707714319229,
|
||
|
|
"step": 5185,
|
||
|
|
"valid_targets_mean": 9771.4,
|
||
|
|
"valid_targets_min": 1784
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.98112,
|
||
|
|
"grad_norm": 0.19876991237981612,
|
||
|
|
"learning_rate": 1.979569277117399e-09,
|
||
|
|
"loss": 0.321,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09426333010196686,
|
||
|
|
"step": 5190,
|
||
|
|
"valid_targets_mean": 9083.2,
|
||
|
|
"valid_targets_min": 1813
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.98592,
|
||
|
|
"grad_norm": 0.17924834504421394,
|
||
|
|
"learning_rate": 1.1491456734713524e-09,
|
||
|
|
"loss": 0.3258,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10984489321708679,
|
||
|
|
"step": 5195,
|
||
|
|
"valid_targets_mean": 10042.7,
|
||
|
|
"valid_targets_min": 3989
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.99072,
|
||
|
|
"grad_norm": 0.1840599715302738,
|
||
|
|
"learning_rate": 5.431536276523375e-10,
|
||
|
|
"loss": 0.3309,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.10118210315704346,
|
||
|
|
"step": 5200,
|
||
|
|
"valid_targets_mean": 9091.2,
|
||
|
|
"valid_targets_min": 1931
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.99552,
|
||
|
|
"grad_norm": 0.17650718737736182,
|
||
|
|
"learning_rate": 1.6159994023601245e-10,
|
||
|
|
"loss": 0.326,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.09146692603826523,
|
||
|
|
"step": 5205,
|
||
|
|
"valid_targets_mean": 9079.4,
|
||
|
|
"valid_targets_min": 1583
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.0,
|
||
|
|
"grad_norm": 0.21227037203133356,
|
||
|
|
"learning_rate": 4.488893106113779e-12,
|
||
|
|
"loss": 0.332,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15339435636997223,
|
||
|
|
"step": 5210,
|
||
|
|
"valid_targets_mean": 9953.1,
|
||
|
|
"valid_targets_min": 2228
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.0,
|
||
|
|
"step": 5210,
|
||
|
|
"total_flos": 2.038899186550912e+19,
|
||
|
|
"train_loss": 0.0,
|
||
|
|
"train_runtime": 1.0148,
|
||
|
|
"train_samples_per_second": 492687.515,
|
||
|
|
"train_steps_per_second": 5133.804
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 5,
|
||
|
|
"max_steps": 5210,
|
||
|
|
"num_input_tokens_seen": 0,
|
||
|
|
"num_train_epochs": 5,
|
||
|
|
"save_steps": 300,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": true,
|
||
|
|
"should_training_stop": true
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 2.038899186550912e+19,
|
||
|
|
"train_batch_size": 1,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|