9728 lines
270 KiB
JSON
9728 lines
270 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4403,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.00794912559618442,
|
|
"grad_norm": 17.427739906913587,
|
|
"learning_rate": 3.6281179138322e-07,
|
|
"loss": 0.92,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37367379665374756,
|
|
"step": 5,
|
|
"valid_targets_mean": 3056.5,
|
|
"valid_targets_min": 1627
|
|
},
|
|
{
|
|
"epoch": 0.01589825119236884,
|
|
"grad_norm": 18.278478288107333,
|
|
"learning_rate": 8.163265306122449e-07,
|
|
"loss": 0.9256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3999854326248169,
|
|
"step": 10,
|
|
"valid_targets_mean": 5545.2,
|
|
"valid_targets_min": 1540
|
|
},
|
|
{
|
|
"epoch": 0.02384737678855326,
|
|
"grad_norm": 15.382889742539266,
|
|
"learning_rate": 1.26984126984127e-06,
|
|
"loss": 0.8679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32684624195098877,
|
|
"step": 15,
|
|
"valid_targets_mean": 2270.6,
|
|
"valid_targets_min": 1367
|
|
},
|
|
{
|
|
"epoch": 0.03179650238473768,
|
|
"grad_norm": 11.002800865176676,
|
|
"learning_rate": 1.723356009070295e-06,
|
|
"loss": 0.848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5363240838050842,
|
|
"step": 20,
|
|
"valid_targets_mean": 5614.4,
|
|
"valid_targets_min": 1801
|
|
},
|
|
{
|
|
"epoch": 0.0397456279809221,
|
|
"grad_norm": 9.443738789660047,
|
|
"learning_rate": 2.17687074829932e-06,
|
|
"loss": 0.8205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45277732610702515,
|
|
"step": 25,
|
|
"valid_targets_mean": 2538.1,
|
|
"valid_targets_min": 1396
|
|
},
|
|
{
|
|
"epoch": 0.04769475357710652,
|
|
"grad_norm": 4.289756865367558,
|
|
"learning_rate": 2.6303854875283447e-06,
|
|
"loss": 0.7657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4672686755657196,
|
|
"step": 30,
|
|
"valid_targets_mean": 6368.0,
|
|
"valid_targets_min": 1556
|
|
},
|
|
{
|
|
"epoch": 0.05564387917329094,
|
|
"grad_norm": 3.1073230893094417,
|
|
"learning_rate": 3.08390022675737e-06,
|
|
"loss": 0.7192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5197245478630066,
|
|
"step": 35,
|
|
"valid_targets_mean": 5126.1,
|
|
"valid_targets_min": 1829
|
|
},
|
|
{
|
|
"epoch": 0.06359300476947535,
|
|
"grad_norm": 2.3567610247243103,
|
|
"learning_rate": 3.537414965986395e-06,
|
|
"loss": 0.6803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3848802447319031,
|
|
"step": 40,
|
|
"valid_targets_mean": 3168.1,
|
|
"valid_targets_min": 1826
|
|
},
|
|
{
|
|
"epoch": 0.07154213036565978,
|
|
"grad_norm": 1.4335102096883736,
|
|
"learning_rate": 3.99092970521542e-06,
|
|
"loss": 0.6657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2820870280265808,
|
|
"step": 45,
|
|
"valid_targets_mean": 3834.2,
|
|
"valid_targets_min": 1331
|
|
},
|
|
{
|
|
"epoch": 0.0794912559618442,
|
|
"grad_norm": 1.2219503844068271,
|
|
"learning_rate": 4.444444444444444e-06,
|
|
"loss": 0.6422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29011207818984985,
|
|
"step": 50,
|
|
"valid_targets_mean": 4752.1,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 0.08744038155802862,
|
|
"grad_norm": 0.8313369139976908,
|
|
"learning_rate": 4.897959183673469e-06,
|
|
"loss": 0.613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27872806787490845,
|
|
"step": 55,
|
|
"valid_targets_mean": 3695.2,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 0.09538950715421304,
|
|
"grad_norm": 0.8997835265853216,
|
|
"learning_rate": 5.3514739229024945e-06,
|
|
"loss": 0.6403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3040982484817505,
|
|
"step": 60,
|
|
"valid_targets_mean": 3308.4,
|
|
"valid_targets_min": 1481
|
|
},
|
|
{
|
|
"epoch": 0.10333863275039745,
|
|
"grad_norm": 0.7903289891970907,
|
|
"learning_rate": 5.80498866213152e-06,
|
|
"loss": 0.6469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3204754590988159,
|
|
"step": 65,
|
|
"valid_targets_mean": 5275.6,
|
|
"valid_targets_min": 1454
|
|
},
|
|
{
|
|
"epoch": 0.11128775834658187,
|
|
"grad_norm": 0.8482800425032815,
|
|
"learning_rate": 6.258503401360545e-06,
|
|
"loss": 0.6271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2842549979686737,
|
|
"step": 70,
|
|
"valid_targets_mean": 3735.2,
|
|
"valid_targets_min": 1028
|
|
},
|
|
{
|
|
"epoch": 0.1192368839427663,
|
|
"grad_norm": 0.870657781864876,
|
|
"learning_rate": 6.71201814058957e-06,
|
|
"loss": 0.5819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.297611802816391,
|
|
"step": 75,
|
|
"valid_targets_mean": 3769.1,
|
|
"valid_targets_min": 1128
|
|
},
|
|
{
|
|
"epoch": 0.1271860095389507,
|
|
"grad_norm": 0.7893436360247763,
|
|
"learning_rate": 7.165532879818595e-06,
|
|
"loss": 0.6053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4144401550292969,
|
|
"step": 80,
|
|
"valid_targets_mean": 5886.8,
|
|
"valid_targets_min": 927
|
|
},
|
|
{
|
|
"epoch": 0.13513513513513514,
|
|
"grad_norm": 0.8170713905865753,
|
|
"learning_rate": 7.61904761904762e-06,
|
|
"loss": 0.5831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24641110002994537,
|
|
"step": 85,
|
|
"valid_targets_mean": 2802.9,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 0.14308426073131955,
|
|
"grad_norm": 0.9337404252527061,
|
|
"learning_rate": 8.072562358276645e-06,
|
|
"loss": 0.5871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3808702230453491,
|
|
"step": 90,
|
|
"valid_targets_mean": 3019.4,
|
|
"valid_targets_min": 1569
|
|
},
|
|
{
|
|
"epoch": 0.151033386327504,
|
|
"grad_norm": 0.6548465287980122,
|
|
"learning_rate": 8.52607709750567e-06,
|
|
"loss": 0.5605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28424954414367676,
|
|
"step": 95,
|
|
"valid_targets_mean": 4934.0,
|
|
"valid_targets_min": 1340
|
|
},
|
|
{
|
|
"epoch": 0.1589825119236884,
|
|
"grad_norm": 0.7180406367231116,
|
|
"learning_rate": 8.979591836734695e-06,
|
|
"loss": 0.5829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24564078450202942,
|
|
"step": 100,
|
|
"valid_targets_mean": 4774.9,
|
|
"valid_targets_min": 1979
|
|
},
|
|
{
|
|
"epoch": 0.1669316375198728,
|
|
"grad_norm": 0.7002524067733675,
|
|
"learning_rate": 9.43310657596372e-06,
|
|
"loss": 0.5722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29770559072494507,
|
|
"step": 105,
|
|
"valid_targets_mean": 4093.6,
|
|
"valid_targets_min": 1850
|
|
},
|
|
{
|
|
"epoch": 0.17488076311605724,
|
|
"grad_norm": 0.6993899235679648,
|
|
"learning_rate": 9.886621315192746e-06,
|
|
"loss": 0.5634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24801306426525116,
|
|
"step": 110,
|
|
"valid_targets_mean": 3211.2,
|
|
"valid_targets_min": 1314
|
|
},
|
|
{
|
|
"epoch": 0.18282988871224165,
|
|
"grad_norm": 0.6993476977177217,
|
|
"learning_rate": 1.034013605442177e-05,
|
|
"loss": 0.5393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2603301703929901,
|
|
"step": 115,
|
|
"valid_targets_mean": 3729.4,
|
|
"valid_targets_min": 1436
|
|
},
|
|
{
|
|
"epoch": 0.1907790143084261,
|
|
"grad_norm": 0.7181047605468096,
|
|
"learning_rate": 1.0793650793650794e-05,
|
|
"loss": 0.5236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22710895538330078,
|
|
"step": 120,
|
|
"valid_targets_mean": 3094.6,
|
|
"valid_targets_min": 1889
|
|
},
|
|
{
|
|
"epoch": 0.1987281399046105,
|
|
"grad_norm": 1.3505224511423912,
|
|
"learning_rate": 1.124716553287982e-05,
|
|
"loss": 0.5482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4447000026702881,
|
|
"step": 125,
|
|
"valid_targets_mean": 4055.8,
|
|
"valid_targets_min": 1024
|
|
},
|
|
{
|
|
"epoch": 0.2066772655007949,
|
|
"grad_norm": 0.5568540913034706,
|
|
"learning_rate": 1.1700680272108845e-05,
|
|
"loss": 0.5553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20323292911052704,
|
|
"step": 130,
|
|
"valid_targets_mean": 4619.6,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 0.21462639109697934,
|
|
"grad_norm": 0.5459293378177086,
|
|
"learning_rate": 1.215419501133787e-05,
|
|
"loss": 0.5129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30654698610305786,
|
|
"step": 135,
|
|
"valid_targets_mean": 6362.6,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 0.22257551669316375,
|
|
"grad_norm": 1.9081985491506623,
|
|
"learning_rate": 1.2607709750566895e-05,
|
|
"loss": 0.5027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3114013075828552,
|
|
"step": 140,
|
|
"valid_targets_mean": 3423.6,
|
|
"valid_targets_min": 925
|
|
},
|
|
{
|
|
"epoch": 0.23052464228934816,
|
|
"grad_norm": 0.8005956080725973,
|
|
"learning_rate": 1.3061224489795918e-05,
|
|
"loss": 0.5279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2415599822998047,
|
|
"step": 145,
|
|
"valid_targets_mean": 2406.9,
|
|
"valid_targets_min": 1230
|
|
},
|
|
{
|
|
"epoch": 0.2384737678855326,
|
|
"grad_norm": 0.6305836856345688,
|
|
"learning_rate": 1.3514739229024945e-05,
|
|
"loss": 0.4994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30976468324661255,
|
|
"step": 150,
|
|
"valid_targets_mean": 4874.6,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 0.246422893481717,
|
|
"grad_norm": 0.8110079486765456,
|
|
"learning_rate": 1.3968253968253968e-05,
|
|
"loss": 0.5084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25239890813827515,
|
|
"step": 155,
|
|
"valid_targets_mean": 3271.2,
|
|
"valid_targets_min": 1304
|
|
},
|
|
{
|
|
"epoch": 0.2543720190779014,
|
|
"grad_norm": 0.7410171326700915,
|
|
"learning_rate": 1.4421768707482994e-05,
|
|
"loss": 0.5128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28796687722206116,
|
|
"step": 160,
|
|
"valid_targets_mean": 3651.2,
|
|
"valid_targets_min": 1336
|
|
},
|
|
{
|
|
"epoch": 0.26232114467408585,
|
|
"grad_norm": 0.6208269968386544,
|
|
"learning_rate": 1.4875283446712018e-05,
|
|
"loss": 0.4965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19565162062644958,
|
|
"step": 165,
|
|
"valid_targets_mean": 3388.6,
|
|
"valid_targets_min": 1131
|
|
},
|
|
{
|
|
"epoch": 0.2702702702702703,
|
|
"grad_norm": 0.7428238675868281,
|
|
"learning_rate": 1.5328798185941044e-05,
|
|
"loss": 0.4977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2785342335700989,
|
|
"step": 170,
|
|
"valid_targets_mean": 3411.6,
|
|
"valid_targets_min": 1505
|
|
},
|
|
{
|
|
"epoch": 0.27821939586645467,
|
|
"grad_norm": 0.6726472494144805,
|
|
"learning_rate": 1.578231292517007e-05,
|
|
"loss": 0.5046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20545580983161926,
|
|
"step": 175,
|
|
"valid_targets_mean": 2871.5,
|
|
"valid_targets_min": 1366
|
|
},
|
|
{
|
|
"epoch": 0.2861685214626391,
|
|
"grad_norm": 0.7302160943361461,
|
|
"learning_rate": 1.6235827664399097e-05,
|
|
"loss": 0.4824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2705424427986145,
|
|
"step": 180,
|
|
"valid_targets_mean": 3835.4,
|
|
"valid_targets_min": 1589
|
|
},
|
|
{
|
|
"epoch": 0.29411764705882354,
|
|
"grad_norm": 0.6690466040853548,
|
|
"learning_rate": 1.668934240362812e-05,
|
|
"loss": 0.4837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1811181902885437,
|
|
"step": 185,
|
|
"valid_targets_mean": 3589.9,
|
|
"valid_targets_min": 1150
|
|
},
|
|
{
|
|
"epoch": 0.302066772655008,
|
|
"grad_norm": 0.729477152544353,
|
|
"learning_rate": 1.7142857142857142e-05,
|
|
"loss": 0.4687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21869003772735596,
|
|
"step": 190,
|
|
"valid_targets_mean": 3461.9,
|
|
"valid_targets_min": 1087
|
|
},
|
|
{
|
|
"epoch": 0.31001589825119236,
|
|
"grad_norm": 0.6632493453578093,
|
|
"learning_rate": 1.759637188208617e-05,
|
|
"loss": 0.4948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17977774143218994,
|
|
"step": 195,
|
|
"valid_targets_mean": 3021.6,
|
|
"valid_targets_min": 1255
|
|
},
|
|
{
|
|
"epoch": 0.3179650238473768,
|
|
"grad_norm": 0.5988252493524185,
|
|
"learning_rate": 1.8049886621315194e-05,
|
|
"loss": 0.5051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23335793614387512,
|
|
"step": 200,
|
|
"valid_targets_mean": 5178.9,
|
|
"valid_targets_min": 1811
|
|
},
|
|
{
|
|
"epoch": 0.32591414944356123,
|
|
"grad_norm": 0.7152226971179542,
|
|
"learning_rate": 1.8503401360544218e-05,
|
|
"loss": 0.4571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1936107873916626,
|
|
"step": 205,
|
|
"valid_targets_mean": 2977.5,
|
|
"valid_targets_min": 1516
|
|
},
|
|
{
|
|
"epoch": 0.3338632750397456,
|
|
"grad_norm": 0.6802704267431957,
|
|
"learning_rate": 1.8956916099773243e-05,
|
|
"loss": 0.4904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14781203866004944,
|
|
"step": 210,
|
|
"valid_targets_mean": 2359.2,
|
|
"valid_targets_min": 1167
|
|
},
|
|
{
|
|
"epoch": 0.34181240063593005,
|
|
"grad_norm": 0.6969951323412201,
|
|
"learning_rate": 1.941043083900227e-05,
|
|
"loss": 0.4869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2743694484233856,
|
|
"step": 215,
|
|
"valid_targets_mean": 5468.8,
|
|
"valid_targets_min": 1223
|
|
},
|
|
{
|
|
"epoch": 0.3497615262321145,
|
|
"grad_norm": 0.7062375719269839,
|
|
"learning_rate": 1.9863945578231295e-05,
|
|
"loss": 0.4716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2669013738632202,
|
|
"step": 220,
|
|
"valid_targets_mean": 4258.8,
|
|
"valid_targets_min": 1817
|
|
},
|
|
{
|
|
"epoch": 0.35771065182829886,
|
|
"grad_norm": 0.7410768795257383,
|
|
"learning_rate": 2.031746031746032e-05,
|
|
"loss": 0.4639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13843026757240295,
|
|
"step": 225,
|
|
"valid_targets_mean": 2092.9,
|
|
"valid_targets_min": 1061
|
|
},
|
|
{
|
|
"epoch": 0.3656597774244833,
|
|
"grad_norm": 0.8194146789513875,
|
|
"learning_rate": 2.0770975056689343e-05,
|
|
"loss": 0.4976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30189141631126404,
|
|
"step": 230,
|
|
"valid_targets_mean": 3245.9,
|
|
"valid_targets_min": 988
|
|
},
|
|
{
|
|
"epoch": 0.37360890302066774,
|
|
"grad_norm": 0.6218718160127596,
|
|
"learning_rate": 2.122448979591837e-05,
|
|
"loss": 0.466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22028835117816925,
|
|
"step": 235,
|
|
"valid_targets_mean": 4241.8,
|
|
"valid_targets_min": 1544
|
|
},
|
|
{
|
|
"epoch": 0.3815580286168522,
|
|
"grad_norm": 0.7142806767768201,
|
|
"learning_rate": 2.1678004535147395e-05,
|
|
"loss": 0.4864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22995081543922424,
|
|
"step": 240,
|
|
"valid_targets_mean": 3769.6,
|
|
"valid_targets_min": 1405
|
|
},
|
|
{
|
|
"epoch": 0.38950715421303655,
|
|
"grad_norm": 0.771170806578013,
|
|
"learning_rate": 2.213151927437642e-05,
|
|
"loss": 0.4606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19811928272247314,
|
|
"step": 245,
|
|
"valid_targets_mean": 3168.2,
|
|
"valid_targets_min": 1400
|
|
},
|
|
{
|
|
"epoch": 0.397456279809221,
|
|
"grad_norm": 0.7355456640005622,
|
|
"learning_rate": 2.2585034013605444e-05,
|
|
"loss": 0.4879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.243247389793396,
|
|
"step": 250,
|
|
"valid_targets_mean": 3826.9,
|
|
"valid_targets_min": 1028
|
|
},
|
|
{
|
|
"epoch": 0.40540540540540543,
|
|
"grad_norm": 0.7056531202188868,
|
|
"learning_rate": 2.3038548752834472e-05,
|
|
"loss": 0.4555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2440948486328125,
|
|
"step": 255,
|
|
"valid_targets_mean": 3197.9,
|
|
"valid_targets_min": 1446
|
|
},
|
|
{
|
|
"epoch": 0.4133545310015898,
|
|
"grad_norm": 0.8574008763305963,
|
|
"learning_rate": 2.3492063492063496e-05,
|
|
"loss": 0.4395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31400373578071594,
|
|
"step": 260,
|
|
"valid_targets_mean": 4229.5,
|
|
"valid_targets_min": 1236
|
|
},
|
|
{
|
|
"epoch": 0.42130365659777425,
|
|
"grad_norm": 0.791764680940932,
|
|
"learning_rate": 2.394557823129252e-05,
|
|
"loss": 0.4942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24072939157485962,
|
|
"step": 265,
|
|
"valid_targets_mean": 3118.4,
|
|
"valid_targets_min": 1361
|
|
},
|
|
{
|
|
"epoch": 0.4292527821939587,
|
|
"grad_norm": 0.756212835616936,
|
|
"learning_rate": 2.439909297052154e-05,
|
|
"loss": 0.4666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23310956358909607,
|
|
"step": 270,
|
|
"valid_targets_mean": 6011.4,
|
|
"valid_targets_min": 1822
|
|
},
|
|
{
|
|
"epoch": 0.43720190779014306,
|
|
"grad_norm": 0.6994703573555281,
|
|
"learning_rate": 2.4852607709750566e-05,
|
|
"loss": 0.4489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23863139748573303,
|
|
"step": 275,
|
|
"valid_targets_mean": 3740.6,
|
|
"valid_targets_min": 1116
|
|
},
|
|
{
|
|
"epoch": 0.4451510333863275,
|
|
"grad_norm": 0.9148899960249289,
|
|
"learning_rate": 2.5306122448979597e-05,
|
|
"loss": 0.4592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28538745641708374,
|
|
"step": 280,
|
|
"valid_targets_mean": 4679.2,
|
|
"valid_targets_min": 2227
|
|
},
|
|
{
|
|
"epoch": 0.45310015898251194,
|
|
"grad_norm": 0.769928834362706,
|
|
"learning_rate": 2.5759637188208618e-05,
|
|
"loss": 0.4431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2456960827112198,
|
|
"step": 285,
|
|
"valid_targets_mean": 4105.9,
|
|
"valid_targets_min": 1366
|
|
},
|
|
{
|
|
"epoch": 0.4610492845786963,
|
|
"grad_norm": 0.8350131269990385,
|
|
"learning_rate": 2.6213151927437642e-05,
|
|
"loss": 0.46,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21551910042762756,
|
|
"step": 290,
|
|
"valid_targets_mean": 2773.6,
|
|
"valid_targets_min": 1159
|
|
},
|
|
{
|
|
"epoch": 0.46899841017488075,
|
|
"grad_norm": 0.8896106092457188,
|
|
"learning_rate": 2.6666666666666667e-05,
|
|
"loss": 0.4796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22988539934158325,
|
|
"step": 295,
|
|
"valid_targets_mean": 2381.0,
|
|
"valid_targets_min": 1266
|
|
},
|
|
{
|
|
"epoch": 0.4769475357710652,
|
|
"grad_norm": 2.3488906873622764,
|
|
"learning_rate": 2.7120181405895694e-05,
|
|
"loss": 0.4486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20080776512622833,
|
|
"step": 300,
|
|
"valid_targets_mean": 3730.8,
|
|
"valid_targets_min": 1660
|
|
},
|
|
{
|
|
"epoch": 0.4848966613672496,
|
|
"grad_norm": 0.7404292848124178,
|
|
"learning_rate": 2.757369614512472e-05,
|
|
"loss": 0.4467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15650013089179993,
|
|
"step": 305,
|
|
"valid_targets_mean": 2895.9,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 0.492845786963434,
|
|
"grad_norm": 1.0154983461494191,
|
|
"learning_rate": 2.8027210884353743e-05,
|
|
"loss": 0.4381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18917216360569,
|
|
"step": 310,
|
|
"valid_targets_mean": 3387.1,
|
|
"valid_targets_min": 1237
|
|
},
|
|
{
|
|
"epoch": 0.5007949125596184,
|
|
"grad_norm": 0.62291435896969,
|
|
"learning_rate": 2.8480725623582767e-05,
|
|
"loss": 0.4513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.223161518573761,
|
|
"step": 315,
|
|
"valid_targets_mean": 4592.8,
|
|
"valid_targets_min": 1197
|
|
},
|
|
{
|
|
"epoch": 0.5087440381558028,
|
|
"grad_norm": 0.740704856690832,
|
|
"learning_rate": 2.893424036281179e-05,
|
|
"loss": 0.4484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2740631699562073,
|
|
"step": 320,
|
|
"valid_targets_mean": 3944.5,
|
|
"valid_targets_min": 1226
|
|
},
|
|
{
|
|
"epoch": 0.5166931637519873,
|
|
"grad_norm": 0.7911867673233632,
|
|
"learning_rate": 2.938775510204082e-05,
|
|
"loss": 0.4449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2467116415500641,
|
|
"step": 325,
|
|
"valid_targets_mean": 3176.6,
|
|
"valid_targets_min": 1721
|
|
},
|
|
{
|
|
"epoch": 0.5246422893481717,
|
|
"grad_norm": 0.9202815305972807,
|
|
"learning_rate": 2.9841269841269844e-05,
|
|
"loss": 0.469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22578208148479462,
|
|
"step": 330,
|
|
"valid_targets_mean": 2134.1,
|
|
"valid_targets_min": 815
|
|
},
|
|
{
|
|
"epoch": 0.5325914149443561,
|
|
"grad_norm": 0.6618388383504596,
|
|
"learning_rate": 3.0294784580498868e-05,
|
|
"loss": 0.4289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2518542408943176,
|
|
"step": 335,
|
|
"valid_targets_mean": 4310.5,
|
|
"valid_targets_min": 1795
|
|
},
|
|
{
|
|
"epoch": 0.5405405405405406,
|
|
"grad_norm": 0.7503702481268859,
|
|
"learning_rate": 3.074829931972789e-05,
|
|
"loss": 0.4245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2288234531879425,
|
|
"step": 340,
|
|
"valid_targets_mean": 3172.0,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 0.548489666136725,
|
|
"grad_norm": 0.6319664035685799,
|
|
"learning_rate": 3.1201814058956924e-05,
|
|
"loss": 0.4253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1952660232782364,
|
|
"step": 345,
|
|
"valid_targets_mean": 4467.0,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 0.5564387917329093,
|
|
"grad_norm": 0.5787912817849061,
|
|
"learning_rate": 3.1655328798185945e-05,
|
|
"loss": 0.4351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20080778002738953,
|
|
"step": 350,
|
|
"valid_targets_mean": 4411.9,
|
|
"valid_targets_min": 1683
|
|
},
|
|
{
|
|
"epoch": 0.5643879173290938,
|
|
"grad_norm": 0.7897515897763938,
|
|
"learning_rate": 3.2108843537414965e-05,
|
|
"loss": 0.4331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2288242131471634,
|
|
"step": 355,
|
|
"valid_targets_mean": 4564.2,
|
|
"valid_targets_min": 1544
|
|
},
|
|
{
|
|
"epoch": 0.5723370429252782,
|
|
"grad_norm": 0.6256923441099292,
|
|
"learning_rate": 3.256235827664399e-05,
|
|
"loss": 0.4182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23909230530261993,
|
|
"step": 360,
|
|
"valid_targets_mean": 5270.0,
|
|
"valid_targets_min": 1723
|
|
},
|
|
{
|
|
"epoch": 0.5802861685214626,
|
|
"grad_norm": 0.7773271743480481,
|
|
"learning_rate": 3.3015873015873014e-05,
|
|
"loss": 0.465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23176656663417816,
|
|
"step": 365,
|
|
"valid_targets_mean": 3167.8,
|
|
"valid_targets_min": 1551
|
|
},
|
|
{
|
|
"epoch": 0.5882352941176471,
|
|
"grad_norm": 0.6859799970308466,
|
|
"learning_rate": 3.346938775510204e-05,
|
|
"loss": 0.4416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1526610553264618,
|
|
"step": 370,
|
|
"valid_targets_mean": 2651.8,
|
|
"valid_targets_min": 1829
|
|
},
|
|
{
|
|
"epoch": 0.5961844197138315,
|
|
"grad_norm": 0.8117657099530576,
|
|
"learning_rate": 3.392290249433107e-05,
|
|
"loss": 0.4583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23585307598114014,
|
|
"step": 375,
|
|
"valid_targets_mean": 2812.9,
|
|
"valid_targets_min": 1128
|
|
},
|
|
{
|
|
"epoch": 0.604133545310016,
|
|
"grad_norm": 0.8302510952638039,
|
|
"learning_rate": 3.437641723356009e-05,
|
|
"loss": 0.4242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24547621607780457,
|
|
"step": 380,
|
|
"valid_targets_mean": 4161.2,
|
|
"valid_targets_min": 1645
|
|
},
|
|
{
|
|
"epoch": 0.6120826709062003,
|
|
"grad_norm": 0.6998287089498872,
|
|
"learning_rate": 3.482993197278912e-05,
|
|
"loss": 0.4205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16593489050865173,
|
|
"step": 385,
|
|
"valid_targets_mean": 4611.6,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 0.6200317965023847,
|
|
"grad_norm": 0.5960187939195858,
|
|
"learning_rate": 3.5283446712018146e-05,
|
|
"loss": 0.4248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20833107829093933,
|
|
"step": 390,
|
|
"valid_targets_mean": 5116.9,
|
|
"valid_targets_min": 2519
|
|
},
|
|
{
|
|
"epoch": 0.6279809220985691,
|
|
"grad_norm": 0.5995240781268688,
|
|
"learning_rate": 3.573696145124717e-05,
|
|
"loss": 0.4571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3176567554473877,
|
|
"step": 395,
|
|
"valid_targets_mean": 7817.8,
|
|
"valid_targets_min": 3112
|
|
},
|
|
{
|
|
"epoch": 0.6359300476947536,
|
|
"grad_norm": 0.6220983470970641,
|
|
"learning_rate": 3.6190476190476195e-05,
|
|
"loss": 0.4272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2201247215270996,
|
|
"step": 400,
|
|
"valid_targets_mean": 4501.1,
|
|
"valid_targets_min": 1992
|
|
},
|
|
{
|
|
"epoch": 0.643879173290938,
|
|
"grad_norm": 0.9392365634462518,
|
|
"learning_rate": 3.6643990929705216e-05,
|
|
"loss": 0.4292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21961425244808197,
|
|
"step": 405,
|
|
"valid_targets_mean": 3784.5,
|
|
"valid_targets_min": 1475
|
|
},
|
|
{
|
|
"epoch": 0.6518282988871225,
|
|
"grad_norm": 0.6070231113976782,
|
|
"learning_rate": 3.7097505668934243e-05,
|
|
"loss": 0.425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26606374979019165,
|
|
"step": 410,
|
|
"valid_targets_mean": 5851.1,
|
|
"valid_targets_min": 1236
|
|
},
|
|
{
|
|
"epoch": 0.6597774244833068,
|
|
"grad_norm": 0.5824491480849711,
|
|
"learning_rate": 3.755102040816327e-05,
|
|
"loss": 0.4028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15032368898391724,
|
|
"step": 415,
|
|
"valid_targets_mean": 3427.4,
|
|
"valid_targets_min": 1634
|
|
},
|
|
{
|
|
"epoch": 0.6677265500794912,
|
|
"grad_norm": 0.738763454615259,
|
|
"learning_rate": 3.800453514739229e-05,
|
|
"loss": 0.4031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13937926292419434,
|
|
"step": 420,
|
|
"valid_targets_mean": 3000.9,
|
|
"valid_targets_min": 1818
|
|
},
|
|
{
|
|
"epoch": 0.6756756756756757,
|
|
"grad_norm": 0.6598986356529651,
|
|
"learning_rate": 3.845804988662132e-05,
|
|
"loss": 0.4112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22225333750247955,
|
|
"step": 425,
|
|
"valid_targets_mean": 6010.4,
|
|
"valid_targets_min": 3196
|
|
},
|
|
{
|
|
"epoch": 0.6836248012718601,
|
|
"grad_norm": 0.7638368470533009,
|
|
"learning_rate": 3.891156462585034e-05,
|
|
"loss": 0.4443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15982800722122192,
|
|
"step": 430,
|
|
"valid_targets_mean": 2238.4,
|
|
"valid_targets_min": 1334
|
|
},
|
|
{
|
|
"epoch": 0.6915739268680445,
|
|
"grad_norm": 0.903407943971446,
|
|
"learning_rate": 3.936507936507937e-05,
|
|
"loss": 0.4344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2535330057144165,
|
|
"step": 435,
|
|
"valid_targets_mean": 3179.8,
|
|
"valid_targets_min": 1487
|
|
},
|
|
{
|
|
"epoch": 0.699523052464229,
|
|
"grad_norm": 0.7016064173898509,
|
|
"learning_rate": 3.9818594104308396e-05,
|
|
"loss": 0.4563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13231216371059418,
|
|
"step": 440,
|
|
"valid_targets_mean": 2246.5,
|
|
"valid_targets_min": 1606
|
|
},
|
|
{
|
|
"epoch": 0.7074721780604134,
|
|
"grad_norm": 0.6109748855868059,
|
|
"learning_rate": 3.999994341346418e-05,
|
|
"loss": 0.4287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20758157968521118,
|
|
"step": 445,
|
|
"valid_targets_mean": 4617.8,
|
|
"valid_targets_min": 1673
|
|
},
|
|
{
|
|
"epoch": 0.7154213036565977,
|
|
"grad_norm": 0.7156403542180713,
|
|
"learning_rate": 3.999959760801596e-05,
|
|
"loss": 0.3965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1698288768529892,
|
|
"step": 450,
|
|
"valid_targets_mean": 2544.8,
|
|
"valid_targets_min": 867
|
|
},
|
|
{
|
|
"epoch": 0.7233704292527822,
|
|
"grad_norm": 0.6324100086954229,
|
|
"learning_rate": 3.999893743951281e-05,
|
|
"loss": 0.3956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16563759744167328,
|
|
"step": 455,
|
|
"valid_targets_mean": 3440.9,
|
|
"valid_targets_min": 1482
|
|
},
|
|
{
|
|
"epoch": 0.7313195548489666,
|
|
"grad_norm": 0.6927412106647844,
|
|
"learning_rate": 3.9997962918331554e-05,
|
|
"loss": 0.4049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15543729066848755,
|
|
"step": 460,
|
|
"valid_targets_mean": 2834.5,
|
|
"valid_targets_min": 972
|
|
},
|
|
{
|
|
"epoch": 0.739268680445151,
|
|
"grad_norm": 0.68986003149986,
|
|
"learning_rate": 3.999667405979019e-05,
|
|
"loss": 0.3935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18105608224868774,
|
|
"step": 465,
|
|
"valid_targets_mean": 4867.9,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 0.7472178060413355,
|
|
"grad_norm": 0.631548550901122,
|
|
"learning_rate": 3.9995070884147604e-05,
|
|
"loss": 0.4248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19634869694709778,
|
|
"step": 470,
|
|
"valid_targets_mean": 4318.4,
|
|
"valid_targets_min": 1268
|
|
},
|
|
{
|
|
"epoch": 0.7551669316375199,
|
|
"grad_norm": 0.5443988826998521,
|
|
"learning_rate": 3.999315341660325e-05,
|
|
"loss": 0.4063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1556302011013031,
|
|
"step": 475,
|
|
"valid_targets_mean": 5036.2,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 0.7631160572337043,
|
|
"grad_norm": 0.6059887639583496,
|
|
"learning_rate": 3.9990921687296785e-05,
|
|
"loss": 0.4298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35763978958129883,
|
|
"step": 480,
|
|
"valid_targets_mean": 7205.9,
|
|
"valid_targets_min": 1817
|
|
},
|
|
{
|
|
"epoch": 0.7710651828298887,
|
|
"grad_norm": 0.7583340162534273,
|
|
"learning_rate": 3.998837573130758e-05,
|
|
"loss": 0.4278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23325160145759583,
|
|
"step": 485,
|
|
"valid_targets_mean": 3737.2,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 0.7790143084260731,
|
|
"grad_norm": 0.7037294015636304,
|
|
"learning_rate": 3.9985515588654166e-05,
|
|
"loss": 0.3991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1624503880739212,
|
|
"step": 490,
|
|
"valid_targets_mean": 3006.9,
|
|
"valid_targets_min": 2055
|
|
},
|
|
{
|
|
"epoch": 0.7869634340222575,
|
|
"grad_norm": 0.6607266015858851,
|
|
"learning_rate": 3.99823413042936e-05,
|
|
"loss": 0.4299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2131785750389099,
|
|
"step": 495,
|
|
"valid_targets_mean": 4284.4,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 0.794912559618442,
|
|
"grad_norm": 0.6834839654312513,
|
|
"learning_rate": 3.997885292812078e-05,
|
|
"loss": 0.4042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15849092602729797,
|
|
"step": 500,
|
|
"valid_targets_mean": 2798.0,
|
|
"valid_targets_min": 1309
|
|
},
|
|
{
|
|
"epoch": 0.8028616852146264,
|
|
"grad_norm": 0.7803873344788964,
|
|
"learning_rate": 3.997505051496764e-05,
|
|
"loss": 0.4219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17525087296962738,
|
|
"step": 505,
|
|
"valid_targets_mean": 2455.5,
|
|
"valid_targets_min": 1722
|
|
},
|
|
{
|
|
"epoch": 0.8108108108108109,
|
|
"grad_norm": 0.6787329897361172,
|
|
"learning_rate": 3.997093412460229e-05,
|
|
"loss": 0.3953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24851000308990479,
|
|
"step": 510,
|
|
"valid_targets_mean": 5985.4,
|
|
"valid_targets_min": 2105
|
|
},
|
|
{
|
|
"epoch": 0.8187599364069952,
|
|
"grad_norm": 0.7120054933034567,
|
|
"learning_rate": 3.9966503821728074e-05,
|
|
"loss": 0.4207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2145119458436966,
|
|
"step": 515,
|
|
"valid_targets_mean": 3888.4,
|
|
"valid_targets_min": 1067
|
|
},
|
|
{
|
|
"epoch": 0.8267090620031796,
|
|
"grad_norm": 0.7955565217141728,
|
|
"learning_rate": 3.996175967598258e-05,
|
|
"loss": 0.4238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23422761261463165,
|
|
"step": 520,
|
|
"valid_targets_mean": 4003.4,
|
|
"valid_targets_min": 1521
|
|
},
|
|
{
|
|
"epoch": 0.834658187599364,
|
|
"grad_norm": 0.6649734880749839,
|
|
"learning_rate": 3.995670176193651e-05,
|
|
"loss": 0.3926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16731323301792145,
|
|
"step": 525,
|
|
"valid_targets_mean": 3570.0,
|
|
"valid_targets_min": 1339
|
|
},
|
|
{
|
|
"epoch": 0.8426073131955485,
|
|
"grad_norm": 0.5305607455319297,
|
|
"learning_rate": 3.9951330159092554e-05,
|
|
"loss": 0.3906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18263310194015503,
|
|
"step": 530,
|
|
"valid_targets_mean": 7381.9,
|
|
"valid_targets_min": 1816
|
|
},
|
|
{
|
|
"epoch": 0.8505564387917329,
|
|
"grad_norm": 0.9135105007655274,
|
|
"learning_rate": 3.994564495188405e-05,
|
|
"loss": 0.4146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29586079716682434,
|
|
"step": 535,
|
|
"valid_targets_mean": 2882.1,
|
|
"valid_targets_min": 1714
|
|
},
|
|
{
|
|
"epoch": 0.8585055643879174,
|
|
"grad_norm": 0.6711825120823967,
|
|
"learning_rate": 3.9939646229673775e-05,
|
|
"loss": 0.4009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15307602286338806,
|
|
"step": 540,
|
|
"valid_targets_mean": 3414.5,
|
|
"valid_targets_min": 1533
|
|
},
|
|
{
|
|
"epoch": 0.8664546899841018,
|
|
"grad_norm": 0.6194962458734112,
|
|
"learning_rate": 3.993333408675244e-05,
|
|
"loss": 0.3847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21103698015213013,
|
|
"step": 545,
|
|
"valid_targets_mean": 4158.4,
|
|
"valid_targets_min": 944
|
|
},
|
|
{
|
|
"epoch": 0.8744038155802861,
|
|
"grad_norm": 0.64187442147393,
|
|
"learning_rate": 3.9926708622337285e-05,
|
|
"loss": 0.3976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18612515926361084,
|
|
"step": 550,
|
|
"valid_targets_mean": 4043.5,
|
|
"valid_targets_min": 2275
|
|
},
|
|
{
|
|
"epoch": 0.8823529411764706,
|
|
"grad_norm": 0.6044705843328582,
|
|
"learning_rate": 3.991976994057046e-05,
|
|
"loss": 0.4025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18610519170761108,
|
|
"step": 555,
|
|
"valid_targets_mean": 3706.1,
|
|
"valid_targets_min": 1807
|
|
},
|
|
{
|
|
"epoch": 0.890302066772655,
|
|
"grad_norm": 0.6308645687683865,
|
|
"learning_rate": 3.991251815051741e-05,
|
|
"loss": 0.3998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19699440896511078,
|
|
"step": 560,
|
|
"valid_targets_mean": 3753.5,
|
|
"valid_targets_min": 1803
|
|
},
|
|
{
|
|
"epoch": 0.8982511923688394,
|
|
"grad_norm": 0.7128199218679743,
|
|
"learning_rate": 3.990495336616519e-05,
|
|
"loss": 0.4096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20944935083389282,
|
|
"step": 565,
|
|
"valid_targets_mean": 2879.1,
|
|
"valid_targets_min": 941
|
|
},
|
|
{
|
|
"epoch": 0.9062003179650239,
|
|
"grad_norm": 0.6720867967704229,
|
|
"learning_rate": 3.989707570642062e-05,
|
|
"loss": 0.4087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16372711956501007,
|
|
"step": 570,
|
|
"valid_targets_mean": 3729.9,
|
|
"valid_targets_min": 1521
|
|
},
|
|
{
|
|
"epoch": 0.9141494435612083,
|
|
"grad_norm": 0.6840095827526829,
|
|
"learning_rate": 3.988888529510844e-05,
|
|
"loss": 0.39,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2171524167060852,
|
|
"step": 575,
|
|
"valid_targets_mean": 4454.8,
|
|
"valid_targets_min": 1475
|
|
},
|
|
{
|
|
"epoch": 0.9220985691573926,
|
|
"grad_norm": 0.6771458913877798,
|
|
"learning_rate": 3.988038226096939e-05,
|
|
"loss": 0.3839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21544820070266724,
|
|
"step": 580,
|
|
"valid_targets_mean": 4007.6,
|
|
"valid_targets_min": 1177
|
|
},
|
|
{
|
|
"epoch": 0.9300476947535771,
|
|
"grad_norm": 0.6744159786093661,
|
|
"learning_rate": 3.9871566737658144e-05,
|
|
"loss": 0.3844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1731451153755188,
|
|
"step": 585,
|
|
"valid_targets_mean": 3204.2,
|
|
"valid_targets_min": 1229
|
|
},
|
|
{
|
|
"epoch": 0.9379968203497615,
|
|
"grad_norm": 0.71411097993976,
|
|
"learning_rate": 3.986243886374124e-05,
|
|
"loss": 0.4305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2717849910259247,
|
|
"step": 590,
|
|
"valid_targets_mean": 4752.9,
|
|
"valid_targets_min": 1513
|
|
},
|
|
{
|
|
"epoch": 0.9459459459459459,
|
|
"grad_norm": 1.3957001247578276,
|
|
"learning_rate": 3.985299878269486e-05,
|
|
"loss": 0.3718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2410079538822174,
|
|
"step": 595,
|
|
"valid_targets_mean": 5143.6,
|
|
"valid_targets_min": 1677
|
|
},
|
|
{
|
|
"epoch": 0.9538950715421304,
|
|
"grad_norm": 0.7471144602924334,
|
|
"learning_rate": 3.9843246642902646e-05,
|
|
"loss": 0.3956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20499342679977417,
|
|
"step": 600,
|
|
"valid_targets_mean": 3514.0,
|
|
"valid_targets_min": 1691
|
|
},
|
|
{
|
|
"epoch": 0.9618441971383148,
|
|
"grad_norm": 0.6325777375763035,
|
|
"learning_rate": 3.98331825976533e-05,
|
|
"loss": 0.3804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17984524369239807,
|
|
"step": 605,
|
|
"valid_targets_mean": 3144.0,
|
|
"valid_targets_min": 1821
|
|
},
|
|
{
|
|
"epoch": 0.9697933227344993,
|
|
"grad_norm": 0.6304133895035731,
|
|
"learning_rate": 3.98228068051382e-05,
|
|
"loss": 0.3675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1702899932861328,
|
|
"step": 610,
|
|
"valid_targets_mean": 3755.0,
|
|
"valid_targets_min": 1384
|
|
},
|
|
{
|
|
"epoch": 0.9777424483306836,
|
|
"grad_norm": 0.6831488360956108,
|
|
"learning_rate": 3.9812119428448926e-05,
|
|
"loss": 0.3856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17801375687122345,
|
|
"step": 615,
|
|
"valid_targets_mean": 2340.0,
|
|
"valid_targets_min": 1245
|
|
},
|
|
{
|
|
"epoch": 0.985691573926868,
|
|
"grad_norm": 0.9850765283023625,
|
|
"learning_rate": 3.9801120635574664e-05,
|
|
"loss": 0.4099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2061094343662262,
|
|
"step": 620,
|
|
"valid_targets_mean": 1898.1,
|
|
"valid_targets_min": 1030
|
|
},
|
|
{
|
|
"epoch": 0.9936406995230525,
|
|
"grad_norm": 0.7380372299973853,
|
|
"learning_rate": 3.978981059939961e-05,
|
|
"loss": 0.4069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21197408437728882,
|
|
"step": 625,
|
|
"valid_targets_mean": 3152.4,
|
|
"valid_targets_min": 1965
|
|
},
|
|
{
|
|
"epoch": 1.0015898251192368,
|
|
"grad_norm": 0.6323671295781639,
|
|
"learning_rate": 3.977818949770022e-05,
|
|
"loss": 0.3954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19854407012462616,
|
|
"step": 630,
|
|
"valid_targets_mean": 4659.8,
|
|
"valid_targets_min": 1551
|
|
},
|
|
{
|
|
"epoch": 1.0095389507154213,
|
|
"grad_norm": 0.5930886442767445,
|
|
"learning_rate": 3.976625751314241e-05,
|
|
"loss": 0.3808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13020233809947968,
|
|
"step": 635,
|
|
"valid_targets_mean": 3716.6,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 1.0174880763116056,
|
|
"grad_norm": 0.6620476492851461,
|
|
"learning_rate": 3.975401483327871e-05,
|
|
"loss": 0.3565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15469133853912354,
|
|
"step": 640,
|
|
"valid_targets_mean": 3731.6,
|
|
"valid_targets_min": 1717
|
|
},
|
|
{
|
|
"epoch": 1.0254372019077902,
|
|
"grad_norm": 0.5418631861334319,
|
|
"learning_rate": 3.974146165054532e-05,
|
|
"loss": 0.3965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2311570644378662,
|
|
"step": 645,
|
|
"valid_targets_mean": 6456.9,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 1.0333863275039745,
|
|
"grad_norm": 0.7537742087307113,
|
|
"learning_rate": 3.972859816225904e-05,
|
|
"loss": 0.3566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19586922228336334,
|
|
"step": 650,
|
|
"valid_targets_mean": 4644.2,
|
|
"valid_targets_min": 1281
|
|
},
|
|
{
|
|
"epoch": 1.041335453100159,
|
|
"grad_norm": 0.5583270297148698,
|
|
"learning_rate": 3.97154245706142e-05,
|
|
"loss": 0.3698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1803000271320343,
|
|
"step": 655,
|
|
"valid_targets_mean": 5600.0,
|
|
"valid_targets_min": 2367
|
|
},
|
|
{
|
|
"epoch": 1.0492845786963434,
|
|
"grad_norm": 0.6374048046848771,
|
|
"learning_rate": 3.970194108267952e-05,
|
|
"loss": 0.3681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12824052572250366,
|
|
"step": 660,
|
|
"valid_targets_mean": 2621.2,
|
|
"valid_targets_min": 1390
|
|
},
|
|
{
|
|
"epoch": 1.0572337042925277,
|
|
"grad_norm": 0.6481646774277458,
|
|
"learning_rate": 3.968814791039477e-05,
|
|
"loss": 0.3634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18034091591835022,
|
|
"step": 665,
|
|
"valid_targets_mean": 4815.9,
|
|
"valid_targets_min": 1701
|
|
},
|
|
{
|
|
"epoch": 1.0651828298887123,
|
|
"grad_norm": 0.5604937418534699,
|
|
"learning_rate": 3.967404527056751e-05,
|
|
"loss": 0.3842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14078344404697418,
|
|
"step": 670,
|
|
"valid_targets_mean": 3346.8,
|
|
"valid_targets_min": 1680
|
|
},
|
|
{
|
|
"epoch": 1.0731319554848966,
|
|
"grad_norm": 0.6268490722284965,
|
|
"learning_rate": 3.9659633384869626e-05,
|
|
"loss": 0.3775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16411596536636353,
|
|
"step": 675,
|
|
"valid_targets_mean": 3405.9,
|
|
"valid_targets_min": 1416
|
|
},
|
|
{
|
|
"epoch": 1.0810810810810811,
|
|
"grad_norm": 0.5734192702859303,
|
|
"learning_rate": 3.964491247983392e-05,
|
|
"loss": 0.3391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18273639678955078,
|
|
"step": 680,
|
|
"valid_targets_mean": 6111.9,
|
|
"valid_targets_min": 1143
|
|
},
|
|
{
|
|
"epoch": 1.0890302066772655,
|
|
"grad_norm": 0.7338745123074238,
|
|
"learning_rate": 3.962988278685047e-05,
|
|
"loss": 0.3809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2470945417881012,
|
|
"step": 685,
|
|
"valid_targets_mean": 3405.1,
|
|
"valid_targets_min": 1610
|
|
},
|
|
{
|
|
"epoch": 1.09697933227345,
|
|
"grad_norm": 0.6350833888658609,
|
|
"learning_rate": 3.961454454216305e-05,
|
|
"loss": 0.3898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20519232749938965,
|
|
"step": 690,
|
|
"valid_targets_mean": 3878.4,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 1.1049284578696343,
|
|
"grad_norm": 0.7446350028510265,
|
|
"learning_rate": 3.9598897986865364e-05,
|
|
"loss": 0.3566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16771160066127777,
|
|
"step": 695,
|
|
"valid_targets_mean": 3192.0,
|
|
"valid_targets_min": 1076
|
|
},
|
|
{
|
|
"epoch": 1.1128775834658187,
|
|
"grad_norm": 0.9953888397706585,
|
|
"learning_rate": 3.9582943366897316e-05,
|
|
"loss": 0.3755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1620779037475586,
|
|
"step": 700,
|
|
"valid_targets_mean": 3072.9,
|
|
"valid_targets_min": 1415
|
|
},
|
|
{
|
|
"epoch": 1.1208267090620032,
|
|
"grad_norm": 0.6308021816044722,
|
|
"learning_rate": 3.956668093304112e-05,
|
|
"loss": 0.3829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12482254207134247,
|
|
"step": 705,
|
|
"valid_targets_mean": 3077.5,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 1.1287758346581875,
|
|
"grad_norm": 0.7570644240489994,
|
|
"learning_rate": 3.9550110940917313e-05,
|
|
"loss": 0.3766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1371377557516098,
|
|
"step": 710,
|
|
"valid_targets_mean": 2388.4,
|
|
"valid_targets_min": 1034
|
|
},
|
|
{
|
|
"epoch": 1.136724960254372,
|
|
"grad_norm": 0.49670622923673674,
|
|
"learning_rate": 3.953323365098082e-05,
|
|
"loss": 0.3666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20864556729793549,
|
|
"step": 715,
|
|
"valid_targets_mean": 7933.9,
|
|
"valid_targets_min": 2342
|
|
},
|
|
{
|
|
"epoch": 1.1446740858505564,
|
|
"grad_norm": 0.6340614710509375,
|
|
"learning_rate": 3.9516049328516795e-05,
|
|
"loss": 0.3603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20961293578147888,
|
|
"step": 720,
|
|
"valid_targets_mean": 4206.6,
|
|
"valid_targets_min": 1531
|
|
},
|
|
{
|
|
"epoch": 1.1526232114467407,
|
|
"grad_norm": 0.7776417535619051,
|
|
"learning_rate": 3.949855824363647e-05,
|
|
"loss": 0.3767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16365006566047668,
|
|
"step": 725,
|
|
"valid_targets_mean": 2456.5,
|
|
"valid_targets_min": 1468
|
|
},
|
|
{
|
|
"epoch": 1.1605723370429253,
|
|
"grad_norm": 0.6023475439837517,
|
|
"learning_rate": 3.948076067127294e-05,
|
|
"loss": 0.3875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18141667544841766,
|
|
"step": 730,
|
|
"valid_targets_mean": 4452.1,
|
|
"valid_targets_min": 1460
|
|
},
|
|
{
|
|
"epoch": 1.1685214626391096,
|
|
"grad_norm": 0.6345006089775491,
|
|
"learning_rate": 3.946265689117677e-05,
|
|
"loss": 0.3713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20253023505210876,
|
|
"step": 735,
|
|
"valid_targets_mean": 4549.2,
|
|
"valid_targets_min": 1957
|
|
},
|
|
{
|
|
"epoch": 1.1764705882352942,
|
|
"grad_norm": 0.5609595201317795,
|
|
"learning_rate": 3.944424718791169e-05,
|
|
"loss": 0.3646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21107006072998047,
|
|
"step": 740,
|
|
"valid_targets_mean": 5578.8,
|
|
"valid_targets_min": 2653
|
|
},
|
|
{
|
|
"epoch": 1.1844197138314785,
|
|
"grad_norm": 0.6428063852148955,
|
|
"learning_rate": 3.942553185085003e-05,
|
|
"loss": 0.3756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14650104939937592,
|
|
"step": 745,
|
|
"valid_targets_mean": 3856.8,
|
|
"valid_targets_min": 1965
|
|
},
|
|
{
|
|
"epoch": 1.192368839427663,
|
|
"grad_norm": 0.6756009605277479,
|
|
"learning_rate": 3.940651117416824e-05,
|
|
"loss": 0.3981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22126460075378418,
|
|
"step": 750,
|
|
"valid_targets_mean": 4050.8,
|
|
"valid_targets_min": 1852
|
|
},
|
|
{
|
|
"epoch": 1.2003179650238474,
|
|
"grad_norm": 0.7278642405654511,
|
|
"learning_rate": 3.9387185456842247e-05,
|
|
"loss": 0.3895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15331067144870758,
|
|
"step": 755,
|
|
"valid_targets_mean": 3092.5,
|
|
"valid_targets_min": 2234
|
|
},
|
|
{
|
|
"epoch": 1.2082670906200317,
|
|
"grad_norm": 0.5847783869864395,
|
|
"learning_rate": 3.936755500264274e-05,
|
|
"loss": 0.3925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17170101404190063,
|
|
"step": 760,
|
|
"valid_targets_mean": 4526.8,
|
|
"valid_targets_min": 1883
|
|
},
|
|
{
|
|
"epoch": 1.2162162162162162,
|
|
"grad_norm": 0.5592754583249123,
|
|
"learning_rate": 3.9347620120130384e-05,
|
|
"loss": 0.3694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16503629088401794,
|
|
"step": 765,
|
|
"valid_targets_mean": 4835.6,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 1.2241653418124006,
|
|
"grad_norm": 0.7398701345600385,
|
|
"learning_rate": 3.932738112265103e-05,
|
|
"loss": 0.3721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17967942357063293,
|
|
"step": 770,
|
|
"valid_targets_mean": 2778.4,
|
|
"valid_targets_min": 1462
|
|
},
|
|
{
|
|
"epoch": 1.232114467408585,
|
|
"grad_norm": 0.690097404509567,
|
|
"learning_rate": 3.930683832833073e-05,
|
|
"loss": 0.3853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1965092122554779,
|
|
"step": 775,
|
|
"valid_targets_mean": 4384.5,
|
|
"valid_targets_min": 1472
|
|
},
|
|
{
|
|
"epoch": 1.2400635930047694,
|
|
"grad_norm": 0.5713611698619189,
|
|
"learning_rate": 3.928599206007076e-05,
|
|
"loss": 0.366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15468665957450867,
|
|
"step": 780,
|
|
"valid_targets_mean": 4261.5,
|
|
"valid_targets_min": 1742
|
|
},
|
|
{
|
|
"epoch": 1.248012718600954,
|
|
"grad_norm": 0.761211917413852,
|
|
"learning_rate": 3.926484264554253e-05,
|
|
"loss": 0.352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1935214400291443,
|
|
"step": 785,
|
|
"valid_targets_mean": 3681.1,
|
|
"valid_targets_min": 1393
|
|
},
|
|
{
|
|
"epoch": 1.2559618441971383,
|
|
"grad_norm": 0.6381429191289953,
|
|
"learning_rate": 3.924339041718247e-05,
|
|
"loss": 0.357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21193401515483856,
|
|
"step": 790,
|
|
"valid_targets_mean": 4212.9,
|
|
"valid_targets_min": 1588
|
|
},
|
|
{
|
|
"epoch": 1.2639109697933226,
|
|
"grad_norm": 0.6959676861756574,
|
|
"learning_rate": 3.922163571218676e-05,
|
|
"loss": 0.3601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16991227865219116,
|
|
"step": 795,
|
|
"valid_targets_mean": 3146.0,
|
|
"valid_targets_min": 1504
|
|
},
|
|
{
|
|
"epoch": 1.2718600953895072,
|
|
"grad_norm": 0.5997271361868702,
|
|
"learning_rate": 3.919957887250606e-05,
|
|
"loss": 0.3524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2026892602443695,
|
|
"step": 800,
|
|
"valid_targets_mean": 4720.2,
|
|
"valid_targets_min": 1516
|
|
},
|
|
{
|
|
"epoch": 1.2798092209856915,
|
|
"grad_norm": 0.679534326529102,
|
|
"learning_rate": 3.917722024484011e-05,
|
|
"loss": 0.3691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1892189383506775,
|
|
"step": 805,
|
|
"valid_targets_mean": 4413.6,
|
|
"valid_targets_min": 1193
|
|
},
|
|
{
|
|
"epoch": 1.287758346581876,
|
|
"grad_norm": 0.8199914951759314,
|
|
"learning_rate": 3.915456018063232e-05,
|
|
"loss": 0.3445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17721492052078247,
|
|
"step": 810,
|
|
"valid_targets_mean": 2322.0,
|
|
"valid_targets_min": 1375
|
|
},
|
|
{
|
|
"epoch": 1.2957074721780604,
|
|
"grad_norm": 0.5989279163067976,
|
|
"learning_rate": 3.9131599036064204e-05,
|
|
"loss": 0.3643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20825499296188354,
|
|
"step": 815,
|
|
"valid_targets_mean": 4533.8,
|
|
"valid_targets_min": 1859
|
|
},
|
|
{
|
|
"epoch": 1.303656597774245,
|
|
"grad_norm": 0.5363866062067153,
|
|
"learning_rate": 3.9108337172049794e-05,
|
|
"loss": 0.3619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12992757558822632,
|
|
"step": 820,
|
|
"valid_targets_mean": 2938.5,
|
|
"valid_targets_min": 1545
|
|
},
|
|
{
|
|
"epoch": 1.3116057233704292,
|
|
"grad_norm": 0.6206543733474164,
|
|
"learning_rate": 3.908477495422998e-05,
|
|
"loss": 0.3569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22375190258026123,
|
|
"step": 825,
|
|
"valid_targets_mean": 5505.0,
|
|
"valid_targets_min": 1661
|
|
},
|
|
{
|
|
"epoch": 1.3195548489666136,
|
|
"grad_norm": 0.6448129470414484,
|
|
"learning_rate": 3.906091275296676e-05,
|
|
"loss": 0.3617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18899646401405334,
|
|
"step": 830,
|
|
"valid_targets_mean": 3576.0,
|
|
"valid_targets_min": 1525
|
|
},
|
|
{
|
|
"epoch": 1.3275039745627981,
|
|
"grad_norm": 0.6032919353781647,
|
|
"learning_rate": 3.903675094333739e-05,
|
|
"loss": 0.3836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14362382888793945,
|
|
"step": 835,
|
|
"valid_targets_mean": 3384.4,
|
|
"valid_targets_min": 972
|
|
},
|
|
{
|
|
"epoch": 1.3354531001589824,
|
|
"grad_norm": 0.5680016933108565,
|
|
"learning_rate": 3.901228990512854e-05,
|
|
"loss": 0.3772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17037247121334076,
|
|
"step": 840,
|
|
"valid_targets_mean": 4402.4,
|
|
"valid_targets_min": 1597
|
|
},
|
|
{
|
|
"epoch": 1.343402225755167,
|
|
"grad_norm": 0.5555235405666129,
|
|
"learning_rate": 3.898753002283027e-05,
|
|
"loss": 0.3789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2192343920469284,
|
|
"step": 845,
|
|
"valid_targets_mean": 6254.4,
|
|
"valid_targets_min": 1854
|
|
},
|
|
{
|
|
"epoch": 1.3513513513513513,
|
|
"grad_norm": 0.6193777063013585,
|
|
"learning_rate": 3.896247168563004e-05,
|
|
"loss": 0.3493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21873369812965393,
|
|
"step": 850,
|
|
"valid_targets_mean": 4775.6,
|
|
"valid_targets_min": 1224
|
|
},
|
|
{
|
|
"epoch": 1.3593004769475359,
|
|
"grad_norm": 0.5117149485882452,
|
|
"learning_rate": 3.8937115287406524e-05,
|
|
"loss": 0.3628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24154624342918396,
|
|
"step": 855,
|
|
"valid_targets_mean": 6557.1,
|
|
"valid_targets_min": 1646
|
|
},
|
|
{
|
|
"epoch": 1.3672496025437202,
|
|
"grad_norm": 0.7370116778498107,
|
|
"learning_rate": 3.891146122672349e-05,
|
|
"loss": 0.3869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16179916262626648,
|
|
"step": 860,
|
|
"valid_targets_mean": 3696.9,
|
|
"valid_targets_min": 1277
|
|
},
|
|
{
|
|
"epoch": 1.3751987281399045,
|
|
"grad_norm": 0.6166919791180683,
|
|
"learning_rate": 3.8885509906823496e-05,
|
|
"loss": 0.3839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20195326209068298,
|
|
"step": 865,
|
|
"valid_targets_mean": 3952.4,
|
|
"valid_targets_min": 1269
|
|
},
|
|
{
|
|
"epoch": 1.383147853736089,
|
|
"grad_norm": 0.5521349877805194,
|
|
"learning_rate": 3.885926173562157e-05,
|
|
"loss": 0.3716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1741231083869934,
|
|
"step": 870,
|
|
"valid_targets_mean": 5520.4,
|
|
"valid_targets_min": 2155
|
|
},
|
|
{
|
|
"epoch": 1.3910969793322734,
|
|
"grad_norm": 0.5691206999289424,
|
|
"learning_rate": 3.883271712569875e-05,
|
|
"loss": 0.3689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12950462102890015,
|
|
"step": 875,
|
|
"valid_targets_mean": 3328.4,
|
|
"valid_targets_min": 882
|
|
},
|
|
{
|
|
"epoch": 1.399046104928458,
|
|
"grad_norm": 0.6505473691657189,
|
|
"learning_rate": 3.8805876494295694e-05,
|
|
"loss": 0.3656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11904367804527283,
|
|
"step": 880,
|
|
"valid_targets_mean": 2135.6,
|
|
"valid_targets_min": 408
|
|
},
|
|
{
|
|
"epoch": 1.4069952305246423,
|
|
"grad_norm": 0.7516212546019687,
|
|
"learning_rate": 3.877874026330602e-05,
|
|
"loss": 0.3489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13621735572814941,
|
|
"step": 885,
|
|
"valid_targets_mean": 2374.8,
|
|
"valid_targets_min": 1187
|
|
},
|
|
{
|
|
"epoch": 1.4149443561208268,
|
|
"grad_norm": 0.6794137292930527,
|
|
"learning_rate": 3.875130885926973e-05,
|
|
"loss": 0.3608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22253155708312988,
|
|
"step": 890,
|
|
"valid_targets_mean": 4001.5,
|
|
"valid_targets_min": 1104
|
|
},
|
|
{
|
|
"epoch": 1.4228934817170111,
|
|
"grad_norm": 0.7038381354871681,
|
|
"learning_rate": 3.872358271336651e-05,
|
|
"loss": 0.3976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20822739601135254,
|
|
"step": 895,
|
|
"valid_targets_mean": 2815.1,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 1.4308426073131955,
|
|
"grad_norm": 0.7825910202553955,
|
|
"learning_rate": 3.8695562261408915e-05,
|
|
"loss": 0.3692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11792026460170746,
|
|
"step": 900,
|
|
"valid_targets_mean": 3000.1,
|
|
"valid_targets_min": 1226
|
|
},
|
|
{
|
|
"epoch": 1.43879173290938,
|
|
"grad_norm": 0.6966724114222536,
|
|
"learning_rate": 3.8667247943835555e-05,
|
|
"loss": 0.3502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13646200299263,
|
|
"step": 905,
|
|
"valid_targets_mean": 2029.8,
|
|
"valid_targets_min": 1033
|
|
},
|
|
{
|
|
"epoch": 1.4467408585055643,
|
|
"grad_norm": 0.7088060747787347,
|
|
"learning_rate": 3.863864020570414e-05,
|
|
"loss": 0.3685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17936602234840393,
|
|
"step": 910,
|
|
"valid_targets_mean": 3115.9,
|
|
"valid_targets_min": 1678
|
|
},
|
|
{
|
|
"epoch": 1.4546899841017489,
|
|
"grad_norm": 0.5784160911219152,
|
|
"learning_rate": 3.860973949668454e-05,
|
|
"loss": 0.3931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19435420632362366,
|
|
"step": 915,
|
|
"valid_targets_mean": 4225.2,
|
|
"valid_targets_min": 927
|
|
},
|
|
{
|
|
"epoch": 1.4626391096979332,
|
|
"grad_norm": 0.5167987626270913,
|
|
"learning_rate": 3.8580546271051634e-05,
|
|
"loss": 0.3627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21843630075454712,
|
|
"step": 920,
|
|
"valid_targets_mean": 6280.8,
|
|
"valid_targets_min": 1351
|
|
},
|
|
{
|
|
"epoch": 1.4705882352941178,
|
|
"grad_norm": 0.5762098660490328,
|
|
"learning_rate": 3.8551060987678236e-05,
|
|
"loss": 0.3649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16239863634109497,
|
|
"step": 925,
|
|
"valid_targets_mean": 4458.0,
|
|
"valid_targets_min": 2096
|
|
},
|
|
{
|
|
"epoch": 1.478537360890302,
|
|
"grad_norm": 0.65489489605191,
|
|
"learning_rate": 3.852128411002787e-05,
|
|
"loss": 0.361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2063177525997162,
|
|
"step": 930,
|
|
"valid_targets_mean": 4031.4,
|
|
"valid_targets_min": 1516
|
|
},
|
|
{
|
|
"epoch": 1.4864864864864864,
|
|
"grad_norm": 0.6153603679044886,
|
|
"learning_rate": 3.849121610614745e-05,
|
|
"loss": 0.3439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15056440234184265,
|
|
"step": 935,
|
|
"valid_targets_mean": 3376.5,
|
|
"valid_targets_min": 1450
|
|
},
|
|
{
|
|
"epoch": 1.494435612082671,
|
|
"grad_norm": 0.720720885080228,
|
|
"learning_rate": 3.8460857448659975e-05,
|
|
"loss": 0.3866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19375024735927582,
|
|
"step": 940,
|
|
"valid_targets_mean": 2359.4,
|
|
"valid_targets_min": 940
|
|
},
|
|
{
|
|
"epoch": 1.5023847376788553,
|
|
"grad_norm": 0.7088207850227407,
|
|
"learning_rate": 3.8430208614757044e-05,
|
|
"loss": 0.3566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2336258888244629,
|
|
"step": 945,
|
|
"valid_targets_mean": 3285.8,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 1.5103338632750396,
|
|
"grad_norm": 0.6136321545069627,
|
|
"learning_rate": 3.8399270086191425e-05,
|
|
"loss": 0.3719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17189423739910126,
|
|
"step": 950,
|
|
"valid_targets_mean": 3898.2,
|
|
"valid_targets_min": 1233
|
|
},
|
|
{
|
|
"epoch": 1.5182829888712241,
|
|
"grad_norm": 0.7949538125164022,
|
|
"learning_rate": 3.8368042349269405e-05,
|
|
"loss": 0.3359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13399812579154968,
|
|
"step": 955,
|
|
"valid_targets_mean": 2868.6,
|
|
"valid_targets_min": 1678
|
|
},
|
|
{
|
|
"epoch": 1.5262321144674087,
|
|
"grad_norm": 0.606359537348059,
|
|
"learning_rate": 3.83365258948432e-05,
|
|
"loss": 0.3782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1899017095565796,
|
|
"step": 960,
|
|
"valid_targets_mean": 4916.1,
|
|
"valid_targets_min": 1391
|
|
},
|
|
{
|
|
"epoch": 1.534181240063593,
|
|
"grad_norm": 0.6679926943719391,
|
|
"learning_rate": 3.830472121830323e-05,
|
|
"loss": 0.3672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17197825014591217,
|
|
"step": 965,
|
|
"valid_targets_mean": 2585.5,
|
|
"valid_targets_min": 1387
|
|
},
|
|
{
|
|
"epoch": 1.5421303656597773,
|
|
"grad_norm": 0.6317885666509024,
|
|
"learning_rate": 3.82726288195703e-05,
|
|
"loss": 0.3671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23312941193580627,
|
|
"step": 970,
|
|
"valid_targets_mean": 3870.1,
|
|
"valid_targets_min": 2089
|
|
},
|
|
{
|
|
"epoch": 1.550079491255962,
|
|
"grad_norm": 0.5504192580508691,
|
|
"learning_rate": 3.824024920308781e-05,
|
|
"loss": 0.3682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2724580466747284,
|
|
"step": 975,
|
|
"valid_targets_mean": 6295.1,
|
|
"valid_targets_min": 1044
|
|
},
|
|
{
|
|
"epoch": 1.5580286168521462,
|
|
"grad_norm": 0.5307671733223747,
|
|
"learning_rate": 3.820758287781374e-05,
|
|
"loss": 0.3757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13338515162467957,
|
|
"step": 980,
|
|
"valid_targets_mean": 3808.4,
|
|
"valid_targets_min": 1350
|
|
},
|
|
{
|
|
"epoch": 1.5659777424483305,
|
|
"grad_norm": 0.5707215536303206,
|
|
"learning_rate": 3.8174630357212714e-05,
|
|
"loss": 0.351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19606107473373413,
|
|
"step": 985,
|
|
"valid_targets_mean": 5892.4,
|
|
"valid_targets_min": 1284
|
|
},
|
|
{
|
|
"epoch": 1.573926868044515,
|
|
"grad_norm": 0.5462372869946902,
|
|
"learning_rate": 3.8141392159247905e-05,
|
|
"loss": 0.3384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1492500901222229,
|
|
"step": 990,
|
|
"valid_targets_mean": 4361.0,
|
|
"valid_targets_min": 1214
|
|
},
|
|
{
|
|
"epoch": 1.5818759936406996,
|
|
"grad_norm": 0.5911439418691301,
|
|
"learning_rate": 3.81078688063729e-05,
|
|
"loss": 0.3552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19003117084503174,
|
|
"step": 995,
|
|
"valid_targets_mean": 4883.8,
|
|
"valid_targets_min": 1881
|
|
},
|
|
{
|
|
"epoch": 1.589825119236884,
|
|
"grad_norm": 0.5679659872208389,
|
|
"learning_rate": 3.807406082552348e-05,
|
|
"loss": 0.3741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1483040750026703,
|
|
"step": 1000,
|
|
"valid_targets_mean": 3333.6,
|
|
"valid_targets_min": 908
|
|
},
|
|
{
|
|
"epoch": 1.5977742448330683,
|
|
"grad_norm": 0.5501186351185101,
|
|
"learning_rate": 3.803996874810934e-05,
|
|
"loss": 0.3708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20205837488174438,
|
|
"step": 1005,
|
|
"valid_targets_mean": 5963.9,
|
|
"valid_targets_min": 1723
|
|
},
|
|
{
|
|
"epoch": 1.6057233704292528,
|
|
"grad_norm": 0.5923202096306006,
|
|
"learning_rate": 3.800559311000575e-05,
|
|
"loss": 0.3534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14523530006408691,
|
|
"step": 1010,
|
|
"valid_targets_mean": 2910.9,
|
|
"valid_targets_min": 1354
|
|
},
|
|
{
|
|
"epoch": 1.6136724960254372,
|
|
"grad_norm": 0.6604531207438563,
|
|
"learning_rate": 3.7970934451545104e-05,
|
|
"loss": 0.3588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18654431402683258,
|
|
"step": 1015,
|
|
"valid_targets_mean": 3332.9,
|
|
"valid_targets_min": 1212
|
|
},
|
|
{
|
|
"epoch": 1.6216216216216215,
|
|
"grad_norm": 0.5741841166836468,
|
|
"learning_rate": 3.7935993317508455e-05,
|
|
"loss": 0.3586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21181052923202515,
|
|
"step": 1020,
|
|
"valid_targets_mean": 6463.5,
|
|
"valid_targets_min": 2064
|
|
},
|
|
{
|
|
"epoch": 1.629570747217806,
|
|
"grad_norm": 0.6866813517566822,
|
|
"learning_rate": 3.790077025711694e-05,
|
|
"loss": 0.381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12963923811912537,
|
|
"step": 1025,
|
|
"valid_targets_mean": 2346.8,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 1.6375198728139906,
|
|
"grad_norm": 0.5817536289656748,
|
|
"learning_rate": 3.786526582402313e-05,
|
|
"loss": 0.3957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2132720947265625,
|
|
"step": 1030,
|
|
"valid_targets_mean": 5020.4,
|
|
"valid_targets_min": 1118
|
|
},
|
|
{
|
|
"epoch": 1.645468998410175,
|
|
"grad_norm": 0.6680047000641487,
|
|
"learning_rate": 3.782948057630236e-05,
|
|
"loss": 0.3732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23795434832572937,
|
|
"step": 1035,
|
|
"valid_targets_mean": 3511.0,
|
|
"valid_targets_min": 1206
|
|
},
|
|
{
|
|
"epoch": 1.6534181240063592,
|
|
"grad_norm": 0.583669836380602,
|
|
"learning_rate": 3.779341507644394e-05,
|
|
"loss": 0.36,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15330632030963898,
|
|
"step": 1040,
|
|
"valid_targets_mean": 3401.6,
|
|
"valid_targets_min": 1143
|
|
},
|
|
{
|
|
"epoch": 1.6613672496025438,
|
|
"grad_norm": 0.7231419033987354,
|
|
"learning_rate": 3.775706989134231e-05,
|
|
"loss": 0.3667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2079402506351471,
|
|
"step": 1045,
|
|
"valid_targets_mean": 3205.5,
|
|
"valid_targets_min": 1125
|
|
},
|
|
{
|
|
"epoch": 1.669316375198728,
|
|
"grad_norm": 0.6525774690308793,
|
|
"learning_rate": 3.772044559228813e-05,
|
|
"loss": 0.3654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19064779579639435,
|
|
"step": 1050,
|
|
"valid_targets_mean": 4439.6,
|
|
"valid_targets_min": 2347
|
|
},
|
|
{
|
|
"epoch": 1.6772655007949124,
|
|
"grad_norm": 0.6484702379102036,
|
|
"learning_rate": 3.768354275495933e-05,
|
|
"loss": 0.3689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2309150993824005,
|
|
"step": 1055,
|
|
"valid_targets_mean": 5336.1,
|
|
"valid_targets_min": 1190
|
|
},
|
|
{
|
|
"epoch": 1.685214626391097,
|
|
"grad_norm": 0.6674627882856452,
|
|
"learning_rate": 3.764636195941198e-05,
|
|
"loss": 0.3491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2122138887643814,
|
|
"step": 1060,
|
|
"valid_targets_mean": 4063.1,
|
|
"valid_targets_min": 1748
|
|
},
|
|
{
|
|
"epoch": 1.6931637519872815,
|
|
"grad_norm": 0.7349284396034317,
|
|
"learning_rate": 3.760890379007129e-05,
|
|
"loss": 0.3839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1845802664756775,
|
|
"step": 1065,
|
|
"valid_targets_mean": 2808.2,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 1.7011128775834659,
|
|
"grad_norm": 0.6626311851762324,
|
|
"learning_rate": 3.757116883572232e-05,
|
|
"loss": 0.3531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1868089735507965,
|
|
"step": 1070,
|
|
"valid_targets_mean": 3413.6,
|
|
"valid_targets_min": 1324
|
|
},
|
|
{
|
|
"epoch": 1.7090620031796502,
|
|
"grad_norm": 0.4922226767076289,
|
|
"learning_rate": 3.753315768950079e-05,
|
|
"loss": 0.3425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09707795083522797,
|
|
"step": 1075,
|
|
"valid_targets_mean": 2849.1,
|
|
"valid_targets_min": 1275
|
|
},
|
|
{
|
|
"epoch": 1.7170111287758347,
|
|
"grad_norm": 0.6859242382615042,
|
|
"learning_rate": 3.74948709488837e-05,
|
|
"loss": 0.354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20478245615959167,
|
|
"step": 1080,
|
|
"valid_targets_mean": 4360.9,
|
|
"valid_targets_min": 1384
|
|
},
|
|
{
|
|
"epoch": 1.724960254372019,
|
|
"grad_norm": 0.5770801265901374,
|
|
"learning_rate": 3.745630921568004e-05,
|
|
"loss": 0.3563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14658771455287933,
|
|
"step": 1085,
|
|
"valid_targets_mean": 4054.5,
|
|
"valid_targets_min": 1139
|
|
},
|
|
{
|
|
"epoch": 1.7329093799682034,
|
|
"grad_norm": 0.5637514099816471,
|
|
"learning_rate": 3.741747309602117e-05,
|
|
"loss": 0.3583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19290859997272491,
|
|
"step": 1090,
|
|
"valid_targets_mean": 5286.8,
|
|
"valid_targets_min": 1874
|
|
},
|
|
{
|
|
"epoch": 1.740858505564388,
|
|
"grad_norm": 0.6802379856424625,
|
|
"learning_rate": 3.737836320035146e-05,
|
|
"loss": 0.3368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.116112120449543,
|
|
"step": 1095,
|
|
"valid_targets_mean": 2164.1,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 1.7488076311605725,
|
|
"grad_norm": 0.6649683485767957,
|
|
"learning_rate": 3.733898014341858e-05,
|
|
"loss": 0.3654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1766214668750763,
|
|
"step": 1100,
|
|
"valid_targets_mean": 3082.8,
|
|
"valid_targets_min": 1067
|
|
},
|
|
{
|
|
"epoch": 1.7567567567567568,
|
|
"grad_norm": 0.7362506126332726,
|
|
"learning_rate": 3.729932454426391e-05,
|
|
"loss": 0.3814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18259881436824799,
|
|
"step": 1105,
|
|
"valid_targets_mean": 2773.4,
|
|
"valid_targets_min": 1511
|
|
},
|
|
{
|
|
"epoch": 1.7647058823529411,
|
|
"grad_norm": 0.7614375004820978,
|
|
"learning_rate": 3.725939702621273e-05,
|
|
"loss": 0.3485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19514119625091553,
|
|
"step": 1110,
|
|
"valid_targets_mean": 2938.6,
|
|
"valid_targets_min": 2091
|
|
},
|
|
{
|
|
"epoch": 1.7726550079491257,
|
|
"grad_norm": 0.6770025576894173,
|
|
"learning_rate": 3.72191982168645e-05,
|
|
"loss": 0.3599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19374173879623413,
|
|
"step": 1115,
|
|
"valid_targets_mean": 3091.5,
|
|
"valid_targets_min": 1087
|
|
},
|
|
{
|
|
"epoch": 1.78060413354531,
|
|
"grad_norm": 0.6195847799672252,
|
|
"learning_rate": 3.717872874808298e-05,
|
|
"loss": 0.3533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16513989865779877,
|
|
"step": 1120,
|
|
"valid_targets_mean": 3940.8,
|
|
"valid_targets_min": 1079
|
|
},
|
|
{
|
|
"epoch": 1.7885532591414943,
|
|
"grad_norm": 0.7190328313184327,
|
|
"learning_rate": 3.713798925598623e-05,
|
|
"loss": 0.3677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1348995566368103,
|
|
"step": 1125,
|
|
"valid_targets_mean": 1953.6,
|
|
"valid_targets_min": 1164
|
|
},
|
|
{
|
|
"epoch": 1.7965023847376789,
|
|
"grad_norm": 0.6026081920719627,
|
|
"learning_rate": 3.709698038093671e-05,
|
|
"loss": 0.36,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13964048027992249,
|
|
"step": 1130,
|
|
"valid_targets_mean": 2851.6,
|
|
"valid_targets_min": 1606
|
|
},
|
|
{
|
|
"epoch": 1.8044515103338634,
|
|
"grad_norm": 0.5393044296458391,
|
|
"learning_rate": 3.705570276753116e-05,
|
|
"loss": 0.3519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18251264095306396,
|
|
"step": 1135,
|
|
"valid_targets_mean": 4347.2,
|
|
"valid_targets_min": 1677
|
|
},
|
|
{
|
|
"epoch": 1.8124006359300477,
|
|
"grad_norm": 0.6538359224832155,
|
|
"learning_rate": 3.701415706459044e-05,
|
|
"loss": 0.3403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17453143000602722,
|
|
"step": 1140,
|
|
"valid_targets_mean": 3594.2,
|
|
"valid_targets_min": 1278
|
|
},
|
|
{
|
|
"epoch": 1.820349761526232,
|
|
"grad_norm": 0.6166308790415599,
|
|
"learning_rate": 3.697234392514942e-05,
|
|
"loss": 0.3598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11332182586193085,
|
|
"step": 1145,
|
|
"valid_targets_mean": 2065.2,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 1.8282988871224166,
|
|
"grad_norm": 0.6769851275245052,
|
|
"learning_rate": 3.693026400644662e-05,
|
|
"loss": 0.3469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19679512083530426,
|
|
"step": 1150,
|
|
"valid_targets_mean": 3868.2,
|
|
"valid_targets_min": 903
|
|
},
|
|
{
|
|
"epoch": 1.836248012718601,
|
|
"grad_norm": 0.6730886131101186,
|
|
"learning_rate": 3.6887917969913944e-05,
|
|
"loss": 0.332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1796862781047821,
|
|
"step": 1155,
|
|
"valid_targets_mean": 4145.0,
|
|
"valid_targets_min": 1951
|
|
},
|
|
{
|
|
"epoch": 1.8441971383147853,
|
|
"grad_norm": 0.5732379649193706,
|
|
"learning_rate": 3.684530648116625e-05,
|
|
"loss": 0.3581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11382512003183365,
|
|
"step": 1160,
|
|
"valid_targets_mean": 3540.4,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 1.8521462639109698,
|
|
"grad_norm": 0.6105738788250634,
|
|
"learning_rate": 3.68024302099909e-05,
|
|
"loss": 0.3714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15084877610206604,
|
|
"step": 1165,
|
|
"valid_targets_mean": 4288.4,
|
|
"valid_targets_min": 1209
|
|
},
|
|
{
|
|
"epoch": 1.8600953895071544,
|
|
"grad_norm": 0.6640329727062773,
|
|
"learning_rate": 3.6759289830337246e-05,
|
|
"loss": 0.3409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14186522364616394,
|
|
"step": 1170,
|
|
"valid_targets_mean": 2868.9,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 1.8680445151033387,
|
|
"grad_norm": 0.6282494997125555,
|
|
"learning_rate": 3.6715886020306e-05,
|
|
"loss": 0.3947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22236737608909607,
|
|
"step": 1175,
|
|
"valid_targets_mean": 5088.0,
|
|
"valid_targets_min": 2009
|
|
},
|
|
{
|
|
"epoch": 1.875993640699523,
|
|
"grad_norm": 0.5980016924564114,
|
|
"learning_rate": 3.6672219462138604e-05,
|
|
"loss": 0.3705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16929709911346436,
|
|
"step": 1180,
|
|
"valid_targets_mean": 3898.2,
|
|
"valid_targets_min": 966
|
|
},
|
|
{
|
|
"epoch": 1.8839427662957076,
|
|
"grad_norm": 0.5328791062143529,
|
|
"learning_rate": 3.6628290842206495e-05,
|
|
"loss": 0.3639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19334006309509277,
|
|
"step": 1185,
|
|
"valid_targets_mean": 4075.6,
|
|
"valid_targets_min": 1622
|
|
},
|
|
{
|
|
"epoch": 1.8918918918918919,
|
|
"grad_norm": 0.6083154910411095,
|
|
"learning_rate": 3.658410085100034e-05,
|
|
"loss": 0.3537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15433341264724731,
|
|
"step": 1190,
|
|
"valid_targets_mean": 3343.4,
|
|
"valid_targets_min": 1811
|
|
},
|
|
{
|
|
"epoch": 1.8998410174880762,
|
|
"grad_norm": 0.6689679102855909,
|
|
"learning_rate": 3.6539650183119126e-05,
|
|
"loss": 0.3445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1287471354007721,
|
|
"step": 1195,
|
|
"valid_targets_mean": 2078.6,
|
|
"valid_targets_min": 1256
|
|
},
|
|
{
|
|
"epoch": 1.9077901430842608,
|
|
"grad_norm": 0.5104537781915853,
|
|
"learning_rate": 3.64949395372593e-05,
|
|
"loss": 0.3541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0954732820391655,
|
|
"step": 1200,
|
|
"valid_targets_mean": 3688.2,
|
|
"valid_targets_min": 1650
|
|
},
|
|
{
|
|
"epoch": 1.9157392686804453,
|
|
"grad_norm": 0.6080060580714937,
|
|
"learning_rate": 3.644996961620378e-05,
|
|
"loss": 0.3524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19080494344234467,
|
|
"step": 1205,
|
|
"valid_targets_mean": 4640.5,
|
|
"valid_targets_min": 2451
|
|
},
|
|
{
|
|
"epoch": 1.9236883942766294,
|
|
"grad_norm": 0.9593180490991776,
|
|
"learning_rate": 3.6404741126810854e-05,
|
|
"loss": 0.342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1754782795906067,
|
|
"step": 1210,
|
|
"valid_targets_mean": 3210.6,
|
|
"valid_targets_min": 1035
|
|
},
|
|
{
|
|
"epoch": 1.931637519872814,
|
|
"grad_norm": 0.5925405067672451,
|
|
"learning_rate": 3.635925478000315e-05,
|
|
"loss": 0.3946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22254809737205505,
|
|
"step": 1215,
|
|
"valid_targets_mean": 4403.1,
|
|
"valid_targets_min": 1291
|
|
},
|
|
{
|
|
"epoch": 1.9395866454689985,
|
|
"grad_norm": 0.598275034000351,
|
|
"learning_rate": 3.631351129075638e-05,
|
|
"loss": 0.3449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18938316404819489,
|
|
"step": 1220,
|
|
"valid_targets_mean": 4187.8,
|
|
"valid_targets_min": 2034
|
|
},
|
|
{
|
|
"epoch": 1.9475357710651828,
|
|
"grad_norm": 0.640793199942592,
|
|
"learning_rate": 3.6267511378088174e-05,
|
|
"loss": 0.3451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15500642359256744,
|
|
"step": 1225,
|
|
"valid_targets_mean": 2808.8,
|
|
"valid_targets_min": 1599
|
|
},
|
|
{
|
|
"epoch": 1.9554848966613672,
|
|
"grad_norm": 0.5916311845122331,
|
|
"learning_rate": 3.622125576504674e-05,
|
|
"loss": 0.3642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21028228104114532,
|
|
"step": 1230,
|
|
"valid_targets_mean": 4243.1,
|
|
"valid_targets_min": 1227
|
|
},
|
|
{
|
|
"epoch": 1.9634340222575517,
|
|
"grad_norm": 0.509752473461242,
|
|
"learning_rate": 3.6174745178699484e-05,
|
|
"loss": 0.3453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11429096013307571,
|
|
"step": 1235,
|
|
"valid_targets_mean": 3199.5,
|
|
"valid_targets_min": 1723
|
|
},
|
|
{
|
|
"epoch": 1.9713831478537363,
|
|
"grad_norm": 0.5987109933065777,
|
|
"learning_rate": 3.612798035012161e-05,
|
|
"loss": 0.356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19191518425941467,
|
|
"step": 1240,
|
|
"valid_targets_mean": 4128.2,
|
|
"valid_targets_min": 1780
|
|
},
|
|
{
|
|
"epoch": 1.9793322734499204,
|
|
"grad_norm": 0.6565850047752417,
|
|
"learning_rate": 3.608096201438465e-05,
|
|
"loss": 0.3455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14762930572032928,
|
|
"step": 1245,
|
|
"valid_targets_mean": 3230.0,
|
|
"valid_targets_min": 831
|
|
},
|
|
{
|
|
"epoch": 1.987281399046105,
|
|
"grad_norm": 0.6889832025769377,
|
|
"learning_rate": 3.603369091054484e-05,
|
|
"loss": 0.3617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1776995062828064,
|
|
"step": 1250,
|
|
"valid_targets_mean": 2915.4,
|
|
"valid_targets_min": 1470
|
|
},
|
|
{
|
|
"epoch": 1.9952305246422894,
|
|
"grad_norm": 0.6670984467362939,
|
|
"learning_rate": 3.5986167781631556e-05,
|
|
"loss": 0.3715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21748743951320648,
|
|
"step": 1255,
|
|
"valid_targets_mean": 3642.2,
|
|
"valid_targets_min": 1296
|
|
},
|
|
{
|
|
"epoch": 2.0031796502384736,
|
|
"grad_norm": 0.8181330550405205,
|
|
"learning_rate": 3.5938393374635634e-05,
|
|
"loss": 0.3341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18551386892795563,
|
|
"step": 1260,
|
|
"valid_targets_mean": 5446.5,
|
|
"valid_targets_min": 2304
|
|
},
|
|
{
|
|
"epoch": 2.011128775834658,
|
|
"grad_norm": 0.5106034893619054,
|
|
"learning_rate": 3.589036844049762e-05,
|
|
"loss": 0.314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12697644531726837,
|
|
"step": 1265,
|
|
"valid_targets_mean": 3709.9,
|
|
"valid_targets_min": 1371
|
|
},
|
|
{
|
|
"epoch": 2.0190779014308426,
|
|
"grad_norm": 0.5986798747580412,
|
|
"learning_rate": 3.584209373409593e-05,
|
|
"loss": 0.3304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18665358424186707,
|
|
"step": 1270,
|
|
"valid_targets_mean": 4185.0,
|
|
"valid_targets_min": 1271
|
|
},
|
|
{
|
|
"epoch": 2.027027027027027,
|
|
"grad_norm": 0.5269148666464155,
|
|
"learning_rate": 3.579357001423505e-05,
|
|
"loss": 0.3216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25899261236190796,
|
|
"step": 1275,
|
|
"valid_targets_mean": 6692.8,
|
|
"valid_targets_min": 1330
|
|
},
|
|
{
|
|
"epoch": 2.0349761526232113,
|
|
"grad_norm": 0.5433293416870248,
|
|
"learning_rate": 3.5744798043633566e-05,
|
|
"loss": 0.3542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23166561126708984,
|
|
"step": 1280,
|
|
"valid_targets_mean": 6078.2,
|
|
"valid_targets_min": 2245
|
|
},
|
|
{
|
|
"epoch": 2.042925278219396,
|
|
"grad_norm": 0.5513571446310909,
|
|
"learning_rate": 3.569577858891219e-05,
|
|
"loss": 0.3156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15241014957427979,
|
|
"step": 1285,
|
|
"valid_targets_mean": 4925.0,
|
|
"valid_targets_min": 1455
|
|
},
|
|
{
|
|
"epoch": 2.0508744038155804,
|
|
"grad_norm": 0.6533000134915221,
|
|
"learning_rate": 3.56465124205817e-05,
|
|
"loss": 0.3171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1586187481880188,
|
|
"step": 1290,
|
|
"valid_targets_mean": 6166.2,
|
|
"valid_targets_min": 2235
|
|
},
|
|
{
|
|
"epoch": 2.0588235294117645,
|
|
"grad_norm": 0.7553542549720361,
|
|
"learning_rate": 3.559700031303082e-05,
|
|
"loss": 0.375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1326591819524765,
|
|
"step": 1295,
|
|
"valid_targets_mean": 2367.0,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 2.066772655007949,
|
|
"grad_norm": 0.6462734468803769,
|
|
"learning_rate": 3.554724304451411e-05,
|
|
"loss": 0.3293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1373012363910675,
|
|
"step": 1300,
|
|
"valid_targets_mean": 3322.4,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 2.0747217806041336,
|
|
"grad_norm": 0.6467779747848436,
|
|
"learning_rate": 3.549724139713962e-05,
|
|
"loss": 0.3191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13097721338272095,
|
|
"step": 1305,
|
|
"valid_targets_mean": 2458.6,
|
|
"valid_targets_min": 1135
|
|
},
|
|
{
|
|
"epoch": 2.082670906200318,
|
|
"grad_norm": 0.7399584033788398,
|
|
"learning_rate": 3.544699615685671e-05,
|
|
"loss": 0.3329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14833121001720428,
|
|
"step": 1310,
|
|
"valid_targets_mean": 2447.2,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 2.0906200317965022,
|
|
"grad_norm": 0.6544911119306583,
|
|
"learning_rate": 3.539650811344363e-05,
|
|
"loss": 0.3391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14502529799938202,
|
|
"step": 1315,
|
|
"valid_targets_mean": 2711.2,
|
|
"valid_targets_min": 1481
|
|
},
|
|
{
|
|
"epoch": 2.098569157392687,
|
|
"grad_norm": 0.6751795253497,
|
|
"learning_rate": 3.534577806049512e-05,
|
|
"loss": 0.3091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13287970423698425,
|
|
"step": 1320,
|
|
"valid_targets_mean": 2654.4,
|
|
"valid_targets_min": 1384
|
|
},
|
|
{
|
|
"epoch": 2.1065182829888713,
|
|
"grad_norm": 0.6703079184474495,
|
|
"learning_rate": 3.529480679540996e-05,
|
|
"loss": 0.344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12706345319747925,
|
|
"step": 1325,
|
|
"valid_targets_mean": 3405.2,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 2.1144674085850554,
|
|
"grad_norm": 0.6243833344832,
|
|
"learning_rate": 3.524359511937838e-05,
|
|
"loss": 0.3331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1704496592283249,
|
|
"step": 1330,
|
|
"valid_targets_mean": 3557.9,
|
|
"valid_targets_min": 867
|
|
},
|
|
{
|
|
"epoch": 2.12241653418124,
|
|
"grad_norm": 0.702935724221822,
|
|
"learning_rate": 3.5192143837369523e-05,
|
|
"loss": 0.3286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14811083674430847,
|
|
"step": 1335,
|
|
"valid_targets_mean": 2622.0,
|
|
"valid_targets_min": 1847
|
|
},
|
|
{
|
|
"epoch": 2.1303656597774245,
|
|
"grad_norm": 0.6061968007214582,
|
|
"learning_rate": 3.514045375811878e-05,
|
|
"loss": 0.3303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17746277153491974,
|
|
"step": 1340,
|
|
"valid_targets_mean": 3444.5,
|
|
"valid_targets_min": 1601
|
|
},
|
|
{
|
|
"epoch": 2.138314785373609,
|
|
"grad_norm": 0.7976640047799736,
|
|
"learning_rate": 3.508852569411506e-05,
|
|
"loss": 0.3324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1408448964357376,
|
|
"step": 1345,
|
|
"valid_targets_mean": 2202.8,
|
|
"valid_targets_min": 1102
|
|
},
|
|
{
|
|
"epoch": 2.146263910969793,
|
|
"grad_norm": 0.6589020594793475,
|
|
"learning_rate": 3.503636046158803e-05,
|
|
"loss": 0.3322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16979548335075378,
|
|
"step": 1350,
|
|
"valid_targets_mean": 3903.2,
|
|
"valid_targets_min": 1062
|
|
},
|
|
{
|
|
"epoch": 2.1542130365659777,
|
|
"grad_norm": 0.6773105824531864,
|
|
"learning_rate": 3.498395888049526e-05,
|
|
"loss": 0.3213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14706167578697205,
|
|
"step": 1355,
|
|
"valid_targets_mean": 2444.0,
|
|
"valid_targets_min": 1499
|
|
},
|
|
{
|
|
"epoch": 2.1621621621621623,
|
|
"grad_norm": 0.6531237863578171,
|
|
"learning_rate": 3.4931321774509396e-05,
|
|
"loss": 0.321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18497243523597717,
|
|
"step": 1360,
|
|
"valid_targets_mean": 4354.8,
|
|
"valid_targets_min": 1953
|
|
},
|
|
{
|
|
"epoch": 2.1701112877583464,
|
|
"grad_norm": 0.6754611110680575,
|
|
"learning_rate": 3.487844997100515e-05,
|
|
"loss": 0.3122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09990522265434265,
|
|
"step": 1365,
|
|
"valid_targets_mean": 1893.4,
|
|
"valid_targets_min": 902
|
|
},
|
|
{
|
|
"epoch": 2.178060413354531,
|
|
"grad_norm": 0.5825678080954483,
|
|
"learning_rate": 3.482534430104633e-05,
|
|
"loss": 0.3308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11447185277938843,
|
|
"step": 1370,
|
|
"valid_targets_mean": 3309.4,
|
|
"valid_targets_min": 833
|
|
},
|
|
{
|
|
"epoch": 2.1860095389507155,
|
|
"grad_norm": 0.6944670313530548,
|
|
"learning_rate": 3.4772005599372764e-05,
|
|
"loss": 0.3119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17034995555877686,
|
|
"step": 1375,
|
|
"valid_targets_mean": 3420.9,
|
|
"valid_targets_min": 1424
|
|
},
|
|
{
|
|
"epoch": 2.1939586645469,
|
|
"grad_norm": 0.6626791843037448,
|
|
"learning_rate": 3.4718434704387174e-05,
|
|
"loss": 0.3423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21534350514411926,
|
|
"step": 1380,
|
|
"valid_targets_mean": 3778.2,
|
|
"valid_targets_min": 1881
|
|
},
|
|
{
|
|
"epoch": 2.201907790143084,
|
|
"grad_norm": 0.5383478497557638,
|
|
"learning_rate": 3.4664632458142016e-05,
|
|
"loss": 0.3258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12855181097984314,
|
|
"step": 1385,
|
|
"valid_targets_mean": 3273.8,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 2.2098569157392687,
|
|
"grad_norm": 0.4831730167180584,
|
|
"learning_rate": 3.461059970632622e-05,
|
|
"loss": 0.2984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1313454508781433,
|
|
"step": 1390,
|
|
"valid_targets_mean": 6938.2,
|
|
"valid_targets_min": 2210
|
|
},
|
|
{
|
|
"epoch": 2.2178060413354532,
|
|
"grad_norm": 0.5183586208492955,
|
|
"learning_rate": 3.4556337298251943e-05,
|
|
"loss": 0.3041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1374768614768982,
|
|
"step": 1395,
|
|
"valid_targets_mean": 4626.5,
|
|
"valid_targets_min": 1220
|
|
},
|
|
{
|
|
"epoch": 2.2257551669316373,
|
|
"grad_norm": 0.5633870098819537,
|
|
"learning_rate": 3.450184608684114e-05,
|
|
"loss": 0.3241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.133875772356987,
|
|
"step": 1400,
|
|
"valid_targets_mean": 3611.1,
|
|
"valid_targets_min": 1259
|
|
},
|
|
{
|
|
"epoch": 2.233704292527822,
|
|
"grad_norm": 0.5642052365472126,
|
|
"learning_rate": 3.444712692861224e-05,
|
|
"loss": 0.3215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1603538990020752,
|
|
"step": 1405,
|
|
"valid_targets_mean": 5063.1,
|
|
"valid_targets_min": 943
|
|
},
|
|
{
|
|
"epoch": 2.2416534181240064,
|
|
"grad_norm": 0.5637982259772618,
|
|
"learning_rate": 3.439218068366663e-05,
|
|
"loss": 0.3113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10265891999006271,
|
|
"step": 1410,
|
|
"valid_targets_mean": 3293.0,
|
|
"valid_targets_min": 1941
|
|
},
|
|
{
|
|
"epoch": 2.249602543720191,
|
|
"grad_norm": 0.6614725412772389,
|
|
"learning_rate": 3.433700821567516e-05,
|
|
"loss": 0.3272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15207067131996155,
|
|
"step": 1415,
|
|
"valid_targets_mean": 3494.0,
|
|
"valid_targets_min": 1696
|
|
},
|
|
{
|
|
"epoch": 2.257551669316375,
|
|
"grad_norm": 0.618101791916237,
|
|
"learning_rate": 3.428161039186456e-05,
|
|
"loss": 0.3168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17047543823719025,
|
|
"step": 1420,
|
|
"valid_targets_mean": 4191.0,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 2.2655007949125596,
|
|
"grad_norm": 0.5222512915107774,
|
|
"learning_rate": 3.42259880830038e-05,
|
|
"loss": 0.3201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21436159312725067,
|
|
"step": 1425,
|
|
"valid_targets_mean": 6339.6,
|
|
"valid_targets_min": 2344
|
|
},
|
|
{
|
|
"epoch": 2.273449920508744,
|
|
"grad_norm": 0.6347705679898757,
|
|
"learning_rate": 3.417014216339043e-05,
|
|
"loss": 0.3313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19498160481452942,
|
|
"step": 1430,
|
|
"valid_targets_mean": 3726.9,
|
|
"valid_targets_min": 1473
|
|
},
|
|
{
|
|
"epoch": 2.2813990461049283,
|
|
"grad_norm": 0.5374041589597816,
|
|
"learning_rate": 3.4114073510836794e-05,
|
|
"loss": 0.35,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16581955552101135,
|
|
"step": 1435,
|
|
"valid_targets_mean": 4560.4,
|
|
"valid_targets_min": 2045
|
|
},
|
|
{
|
|
"epoch": 2.289348171701113,
|
|
"grad_norm": 0.5803405421710033,
|
|
"learning_rate": 3.4057783006656274e-05,
|
|
"loss": 0.351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20126879215240479,
|
|
"step": 1440,
|
|
"valid_targets_mean": 5261.8,
|
|
"valid_targets_min": 1322
|
|
},
|
|
{
|
|
"epoch": 2.2972972972972974,
|
|
"grad_norm": 0.723881430467387,
|
|
"learning_rate": 3.400127153564941e-05,
|
|
"loss": 0.3205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18336385488510132,
|
|
"step": 1445,
|
|
"valid_targets_mean": 2683.6,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 2.3052464228934815,
|
|
"grad_norm": 0.787730562649186,
|
|
"learning_rate": 3.394453998609001e-05,
|
|
"loss": 0.3425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21171879768371582,
|
|
"step": 1450,
|
|
"valid_targets_mean": 5184.8,
|
|
"valid_targets_min": 2067
|
|
},
|
|
{
|
|
"epoch": 2.313195548489666,
|
|
"grad_norm": 0.7482146196440238,
|
|
"learning_rate": 3.388758924971117e-05,
|
|
"loss": 0.3312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15064117312431335,
|
|
"step": 1455,
|
|
"valid_targets_mean": 2597.6,
|
|
"valid_targets_min": 1122
|
|
},
|
|
{
|
|
"epoch": 2.3211446740858506,
|
|
"grad_norm": 0.5499685765406672,
|
|
"learning_rate": 3.3830420221691286e-05,
|
|
"loss": 0.3485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1761244237422943,
|
|
"step": 1460,
|
|
"valid_targets_mean": 5385.6,
|
|
"valid_targets_min": 1171
|
|
},
|
|
{
|
|
"epoch": 2.329093799682035,
|
|
"grad_norm": 0.6660733815470989,
|
|
"learning_rate": 3.377303380063995e-05,
|
|
"loss": 0.3319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1920149177312851,
|
|
"step": 1465,
|
|
"valid_targets_mean": 3346.4,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 2.337042925278219,
|
|
"grad_norm": 0.7256148583125985,
|
|
"learning_rate": 3.371543088858384e-05,
|
|
"loss": 0.3525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20087599754333496,
|
|
"step": 1470,
|
|
"valid_targets_mean": 3248.2,
|
|
"valid_targets_min": 1246
|
|
},
|
|
{
|
|
"epoch": 2.3449920508744038,
|
|
"grad_norm": 0.6107888720912521,
|
|
"learning_rate": 3.365761239095253e-05,
|
|
"loss": 0.3229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16190680861473083,
|
|
"step": 1475,
|
|
"valid_targets_mean": 3124.5,
|
|
"valid_targets_min": 1262
|
|
},
|
|
{
|
|
"epoch": 2.3529411764705883,
|
|
"grad_norm": 0.5674324607835163,
|
|
"learning_rate": 3.3599579216564314e-05,
|
|
"loss": 0.3319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13981136679649353,
|
|
"step": 1480,
|
|
"valid_targets_mean": 3981.9,
|
|
"valid_targets_min": 1697
|
|
},
|
|
{
|
|
"epoch": 2.360890302066773,
|
|
"grad_norm": 0.5862663712709394,
|
|
"learning_rate": 3.354133227761181e-05,
|
|
"loss": 0.3436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1091814711689949,
|
|
"step": 1485,
|
|
"valid_targets_mean": 3252.6,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 2.368839427662957,
|
|
"grad_norm": 0.6719361792367678,
|
|
"learning_rate": 3.3482872489647745e-05,
|
|
"loss": 0.3361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1568145453929901,
|
|
"step": 1490,
|
|
"valid_targets_mean": 2876.8,
|
|
"valid_targets_min": 1291
|
|
},
|
|
{
|
|
"epoch": 2.3767885532591415,
|
|
"grad_norm": 0.5785827655205386,
|
|
"learning_rate": 3.342420077157047e-05,
|
|
"loss": 0.3487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26261404156684875,
|
|
"step": 1495,
|
|
"valid_targets_mean": 6562.1,
|
|
"valid_targets_min": 1399
|
|
},
|
|
{
|
|
"epoch": 2.384737678855326,
|
|
"grad_norm": 0.7324100797235176,
|
|
"learning_rate": 3.336531804560957e-05,
|
|
"loss": 0.3282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20457011461257935,
|
|
"step": 1500,
|
|
"valid_targets_mean": 3124.2,
|
|
"valid_targets_min": 1036
|
|
},
|
|
{
|
|
"epoch": 2.39268680445151,
|
|
"grad_norm": 0.7024843625353243,
|
|
"learning_rate": 3.330622523731136e-05,
|
|
"loss": 0.3268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16395874321460724,
|
|
"step": 1505,
|
|
"valid_targets_mean": 3874.9,
|
|
"valid_targets_min": 1396
|
|
},
|
|
{
|
|
"epoch": 2.4006359300476947,
|
|
"grad_norm": 0.590687115346679,
|
|
"learning_rate": 3.32469232755243e-05,
|
|
"loss": 0.3256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17100322246551514,
|
|
"step": 1510,
|
|
"valid_targets_mean": 4807.8,
|
|
"valid_targets_min": 1375
|
|
},
|
|
{
|
|
"epoch": 2.4085850556438793,
|
|
"grad_norm": 0.5999477052930492,
|
|
"learning_rate": 3.318741309238444e-05,
|
|
"loss": 0.332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1923142373561859,
|
|
"step": 1515,
|
|
"valid_targets_mean": 5180.1,
|
|
"valid_targets_min": 1922
|
|
},
|
|
{
|
|
"epoch": 2.4165341812400634,
|
|
"grad_norm": 0.5655309774373689,
|
|
"learning_rate": 3.312769562330075e-05,
|
|
"loss": 0.3255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11448682099580765,
|
|
"step": 1520,
|
|
"valid_targets_mean": 3210.2,
|
|
"valid_targets_min": 903
|
|
},
|
|
{
|
|
"epoch": 2.424483306836248,
|
|
"grad_norm": 0.4362019707709764,
|
|
"learning_rate": 3.306777180694042e-05,
|
|
"loss": 0.3053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14830708503723145,
|
|
"step": 1525,
|
|
"valid_targets_mean": 6642.8,
|
|
"valid_targets_min": 2507
|
|
},
|
|
{
|
|
"epoch": 2.4324324324324325,
|
|
"grad_norm": 0.6860102159887091,
|
|
"learning_rate": 3.30076425852141e-05,
|
|
"loss": 0.3112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18326956033706665,
|
|
"step": 1530,
|
|
"valid_targets_mean": 2916.0,
|
|
"valid_targets_min": 2030
|
|
},
|
|
{
|
|
"epoch": 2.440381558028617,
|
|
"grad_norm": 0.7601940514766555,
|
|
"learning_rate": 3.294730890326109e-05,
|
|
"loss": 0.3522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1639365404844284,
|
|
"step": 1535,
|
|
"valid_targets_mean": 1931.6,
|
|
"valid_targets_min": 1230
|
|
},
|
|
{
|
|
"epoch": 2.448330683624801,
|
|
"grad_norm": 0.7044049600485459,
|
|
"learning_rate": 3.2886771709434504e-05,
|
|
"loss": 0.3377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13485601544380188,
|
|
"step": 1540,
|
|
"valid_targets_mean": 2230.9,
|
|
"valid_targets_min": 1272
|
|
},
|
|
{
|
|
"epoch": 2.4562798092209857,
|
|
"grad_norm": 0.5857707924480844,
|
|
"learning_rate": 3.282603195528635e-05,
|
|
"loss": 0.3214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17711205780506134,
|
|
"step": 1545,
|
|
"valid_targets_mean": 4256.6,
|
|
"valid_targets_min": 1619
|
|
},
|
|
{
|
|
"epoch": 2.46422893481717,
|
|
"grad_norm": 0.6653083958162175,
|
|
"learning_rate": 3.276509059555257e-05,
|
|
"loss": 0.3322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16080039739608765,
|
|
"step": 1550,
|
|
"valid_targets_mean": 2926.4,
|
|
"valid_targets_min": 2147
|
|
},
|
|
{
|
|
"epoch": 2.4721780604133547,
|
|
"grad_norm": 0.6162263214513273,
|
|
"learning_rate": 3.270394858813802e-05,
|
|
"loss": 0.3586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18909452855587006,
|
|
"step": 1555,
|
|
"valid_targets_mean": 4189.4,
|
|
"valid_targets_min": 2224
|
|
},
|
|
{
|
|
"epoch": 2.480127186009539,
|
|
"grad_norm": 0.6738285389123028,
|
|
"learning_rate": 3.264260689410147e-05,
|
|
"loss": 0.3283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1735895872116089,
|
|
"step": 1560,
|
|
"valid_targets_mean": 3832.8,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 2.4880763116057234,
|
|
"grad_norm": 0.6629668074862213,
|
|
"learning_rate": 3.2581066477640435e-05,
|
|
"loss": 0.3275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12273503839969635,
|
|
"step": 1565,
|
|
"valid_targets_mean": 2423.2,
|
|
"valid_targets_min": 1304
|
|
},
|
|
{
|
|
"epoch": 2.496025437201908,
|
|
"grad_norm": 0.615475039091319,
|
|
"learning_rate": 3.251932830607603e-05,
|
|
"loss": 0.3235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16254132986068726,
|
|
"step": 1570,
|
|
"valid_targets_mean": 3841.8,
|
|
"valid_targets_min": 1335
|
|
},
|
|
{
|
|
"epoch": 2.503974562798092,
|
|
"grad_norm": 0.7407980753518146,
|
|
"learning_rate": 3.245739334983779e-05,
|
|
"loss": 0.3356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16704031825065613,
|
|
"step": 1575,
|
|
"valid_targets_mean": 2616.2,
|
|
"valid_targets_min": 1119
|
|
},
|
|
{
|
|
"epoch": 2.5119236883942766,
|
|
"grad_norm": 0.7123719746188885,
|
|
"learning_rate": 3.239526258244842e-05,
|
|
"loss": 0.3175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17932352423667908,
|
|
"step": 1580,
|
|
"valid_targets_mean": 3142.9,
|
|
"valid_targets_min": 1279
|
|
},
|
|
{
|
|
"epoch": 2.519872813990461,
|
|
"grad_norm": 0.609929511217417,
|
|
"learning_rate": 3.233293698050845e-05,
|
|
"loss": 0.3094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11435022205114365,
|
|
"step": 1585,
|
|
"valid_targets_mean": 3124.5,
|
|
"valid_targets_min": 1303
|
|
},
|
|
{
|
|
"epoch": 2.5278219395866453,
|
|
"grad_norm": 0.5827128653709648,
|
|
"learning_rate": 3.227041752368091e-05,
|
|
"loss": 0.3459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15017487108707428,
|
|
"step": 1590,
|
|
"valid_targets_mean": 4104.4,
|
|
"valid_targets_min": 1502
|
|
},
|
|
{
|
|
"epoch": 2.53577106518283,
|
|
"grad_norm": 0.5854585931719669,
|
|
"learning_rate": 3.220770519467597e-05,
|
|
"loss": 0.333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15122590959072113,
|
|
"step": 1595,
|
|
"valid_targets_mean": 3371.9,
|
|
"valid_targets_min": 1811
|
|
},
|
|
{
|
|
"epoch": 2.5437201907790143,
|
|
"grad_norm": 0.6399753092351013,
|
|
"learning_rate": 3.214480097923542e-05,
|
|
"loss": 0.317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14235199987888336,
|
|
"step": 1600,
|
|
"valid_targets_mean": 3999.0,
|
|
"valid_targets_min": 1576
|
|
},
|
|
{
|
|
"epoch": 2.551669316375199,
|
|
"grad_norm": 0.686944672431717,
|
|
"learning_rate": 3.208170586611721e-05,
|
|
"loss": 0.3304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22521042823791504,
|
|
"step": 1605,
|
|
"valid_targets_mean": 2805.0,
|
|
"valid_targets_min": 1106
|
|
},
|
|
{
|
|
"epoch": 2.559618441971383,
|
|
"grad_norm": 0.6309732368201819,
|
|
"learning_rate": 3.201842084707993e-05,
|
|
"loss": 0.3187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2433374524116516,
|
|
"step": 1610,
|
|
"valid_targets_mean": 5035.8,
|
|
"valid_targets_min": 1328
|
|
},
|
|
{
|
|
"epoch": 2.5675675675675675,
|
|
"grad_norm": 0.6491839858989336,
|
|
"learning_rate": 3.195494691686718e-05,
|
|
"loss": 0.3384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14189361035823822,
|
|
"step": 1615,
|
|
"valid_targets_mean": 3480.8,
|
|
"valid_targets_min": 1531
|
|
},
|
|
{
|
|
"epoch": 2.575516693163752,
|
|
"grad_norm": 0.5081873185287394,
|
|
"learning_rate": 3.189128507319197e-05,
|
|
"loss": 0.3645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13032974302768707,
|
|
"step": 1620,
|
|
"valid_targets_mean": 4447.6,
|
|
"valid_targets_min": 917
|
|
},
|
|
{
|
|
"epoch": 2.5834658187599366,
|
|
"grad_norm": 0.49202710337126104,
|
|
"learning_rate": 3.182743631672102e-05,
|
|
"loss": 0.3208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26466819643974304,
|
|
"step": 1625,
|
|
"valid_targets_mean": 8314.1,
|
|
"valid_targets_min": 1882
|
|
},
|
|
{
|
|
"epoch": 2.5914149443561207,
|
|
"grad_norm": 0.6374175205452308,
|
|
"learning_rate": 3.1763401651059025e-05,
|
|
"loss": 0.3404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20969203114509583,
|
|
"step": 1630,
|
|
"valid_targets_mean": 3612.1,
|
|
"valid_targets_min": 1369
|
|
},
|
|
{
|
|
"epoch": 2.5993640699523053,
|
|
"grad_norm": 0.7521587126307281,
|
|
"learning_rate": 3.1699182082732886e-05,
|
|
"loss": 0.3169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18052595853805542,
|
|
"step": 1635,
|
|
"valid_targets_mean": 2733.2,
|
|
"valid_targets_min": 887
|
|
},
|
|
{
|
|
"epoch": 2.60731319554849,
|
|
"grad_norm": 0.5376977710749674,
|
|
"learning_rate": 3.1634778621175905e-05,
|
|
"loss": 0.3302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2236747294664383,
|
|
"step": 1640,
|
|
"valid_targets_mean": 6192.2,
|
|
"valid_targets_min": 1167
|
|
},
|
|
{
|
|
"epoch": 2.615262321144674,
|
|
"grad_norm": 0.6096555924261151,
|
|
"learning_rate": 3.157019227871189e-05,
|
|
"loss": 0.3227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1605105698108673,
|
|
"step": 1645,
|
|
"valid_targets_mean": 3999.2,
|
|
"valid_targets_min": 1946
|
|
},
|
|
{
|
|
"epoch": 2.6232114467408585,
|
|
"grad_norm": 0.5525062486801042,
|
|
"learning_rate": 3.150542407053927e-05,
|
|
"loss": 0.3343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22758035361766815,
|
|
"step": 1650,
|
|
"valid_targets_mean": 5145.8,
|
|
"valid_targets_min": 1480
|
|
},
|
|
{
|
|
"epoch": 2.631160572337043,
|
|
"grad_norm": 0.6092884704368037,
|
|
"learning_rate": 3.144047501471511e-05,
|
|
"loss": 0.3272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16397756338119507,
|
|
"step": 1655,
|
|
"valid_targets_mean": 3598.6,
|
|
"valid_targets_min": 1507
|
|
},
|
|
{
|
|
"epoch": 2.639109697933227,
|
|
"grad_norm": 0.5170450765282795,
|
|
"learning_rate": 3.1375346132139135e-05,
|
|
"loss": 0.3196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17133986949920654,
|
|
"step": 1660,
|
|
"valid_targets_mean": 5411.5,
|
|
"valid_targets_min": 1677
|
|
},
|
|
{
|
|
"epoch": 2.6470588235294117,
|
|
"grad_norm": 0.5613566077415174,
|
|
"learning_rate": 3.131003844653766e-05,
|
|
"loss": 0.3208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11237911880016327,
|
|
"step": 1665,
|
|
"valid_targets_mean": 2721.8,
|
|
"valid_targets_min": 1105
|
|
},
|
|
{
|
|
"epoch": 2.6550079491255962,
|
|
"grad_norm": 0.5431563540622959,
|
|
"learning_rate": 3.124455298444752e-05,
|
|
"loss": 0.3101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10858015716075897,
|
|
"step": 1670,
|
|
"valid_targets_mean": 3071.0,
|
|
"valid_targets_min": 1811
|
|
},
|
|
{
|
|
"epoch": 2.6629570747217803,
|
|
"grad_norm": 0.5829524098018979,
|
|
"learning_rate": 3.1178890775199925e-05,
|
|
"loss": 0.3168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12583525478839874,
|
|
"step": 1675,
|
|
"valid_targets_mean": 3418.2,
|
|
"valid_targets_min": 1621
|
|
},
|
|
{
|
|
"epoch": 2.670906200317965,
|
|
"grad_norm": 0.7604433754034259,
|
|
"learning_rate": 3.1113052850904275e-05,
|
|
"loss": 0.3302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16003915667533875,
|
|
"step": 1680,
|
|
"valid_targets_mean": 2765.9,
|
|
"valid_targets_min": 1400
|
|
},
|
|
{
|
|
"epoch": 2.6788553259141494,
|
|
"grad_norm": 0.7613120457514381,
|
|
"learning_rate": 3.1047040246431936e-05,
|
|
"loss": 0.3423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15300467610359192,
|
|
"step": 1685,
|
|
"valid_targets_mean": 2752.9,
|
|
"valid_targets_min": 1338
|
|
},
|
|
{
|
|
"epoch": 2.686804451510334,
|
|
"grad_norm": 0.7115927486570475,
|
|
"learning_rate": 3.098085399939998e-05,
|
|
"loss": 0.3504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1998613476753235,
|
|
"step": 1690,
|
|
"valid_targets_mean": 3649.2,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 2.6947535771065185,
|
|
"grad_norm": 0.522356712099586,
|
|
"learning_rate": 3.091449515015489e-05,
|
|
"loss": 0.3135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1739317625761032,
|
|
"step": 1695,
|
|
"valid_targets_mean": 5894.2,
|
|
"valid_targets_min": 1534
|
|
},
|
|
{
|
|
"epoch": 2.7027027027027026,
|
|
"grad_norm": 0.5708426601053789,
|
|
"learning_rate": 3.084796474175618e-05,
|
|
"loss": 0.3157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12891064584255219,
|
|
"step": 1700,
|
|
"valid_targets_mean": 3548.2,
|
|
"valid_targets_min": 1391
|
|
},
|
|
{
|
|
"epoch": 2.710651828298887,
|
|
"grad_norm": 0.7492271902020405,
|
|
"learning_rate": 3.078126381996001e-05,
|
|
"loss": 0.3378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2279568761587143,
|
|
"step": 1705,
|
|
"valid_targets_mean": 3080.9,
|
|
"valid_targets_min": 1488
|
|
},
|
|
{
|
|
"epoch": 2.7186009538950717,
|
|
"grad_norm": 0.6157955691057388,
|
|
"learning_rate": 3.071439343320274e-05,
|
|
"loss": 0.3319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11915339529514313,
|
|
"step": 1710,
|
|
"valid_targets_mean": 3054.2,
|
|
"valid_targets_min": 1424
|
|
},
|
|
{
|
|
"epoch": 2.726550079491256,
|
|
"grad_norm": 0.7262078485648868,
|
|
"learning_rate": 3.064735463258449e-05,
|
|
"loss": 0.3263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25576066970825195,
|
|
"step": 1715,
|
|
"valid_targets_mean": 3610.1,
|
|
"valid_targets_min": 985
|
|
},
|
|
{
|
|
"epoch": 2.7344992050874404,
|
|
"grad_norm": 0.8434303644841046,
|
|
"learning_rate": 3.0580148471852544e-05,
|
|
"loss": 0.3737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2209487408399582,
|
|
"step": 1720,
|
|
"valid_targets_mean": 3562.8,
|
|
"valid_targets_min": 1530
|
|
},
|
|
{
|
|
"epoch": 2.742448330683625,
|
|
"grad_norm": 0.6898146080457395,
|
|
"learning_rate": 3.0512776007384882e-05,
|
|
"loss": 0.3307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19492581486701965,
|
|
"step": 1725,
|
|
"valid_targets_mean": 3983.8,
|
|
"valid_targets_min": 2102
|
|
},
|
|
{
|
|
"epoch": 2.750397456279809,
|
|
"grad_norm": 0.6479469076509089,
|
|
"learning_rate": 3.0445238298173492e-05,
|
|
"loss": 0.3256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14962363243103027,
|
|
"step": 1730,
|
|
"valid_targets_mean": 4064.6,
|
|
"valid_targets_min": 1862
|
|
},
|
|
{
|
|
"epoch": 2.7583465818759936,
|
|
"grad_norm": 0.6908917878570248,
|
|
"learning_rate": 3.0377536405807753e-05,
|
|
"loss": 0.3275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1632249504327774,
|
|
"step": 1735,
|
|
"valid_targets_mean": 2292.1,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 2.766295707472178,
|
|
"grad_norm": 0.6623062767400353,
|
|
"learning_rate": 3.030967139445776e-05,
|
|
"loss": 0.3268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13153794407844543,
|
|
"step": 1740,
|
|
"valid_targets_mean": 3749.6,
|
|
"valid_targets_min": 1714
|
|
},
|
|
{
|
|
"epoch": 2.7742448330683622,
|
|
"grad_norm": 0.612323463308583,
|
|
"learning_rate": 3.0241644330857604e-05,
|
|
"loss": 0.3311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11851771175861359,
|
|
"step": 1745,
|
|
"valid_targets_mean": 2574.8,
|
|
"valid_targets_min": 1303
|
|
},
|
|
{
|
|
"epoch": 2.7821939586645468,
|
|
"grad_norm": 0.557382580489836,
|
|
"learning_rate": 3.0173456284288565e-05,
|
|
"loss": 0.3048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15527410805225372,
|
|
"step": 1750,
|
|
"valid_targets_mean": 5463.1,
|
|
"valid_targets_min": 2162
|
|
},
|
|
{
|
|
"epoch": 2.7901430842607313,
|
|
"grad_norm": 0.7279739074375194,
|
|
"learning_rate": 3.010510832656233e-05,
|
|
"loss": 0.3348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1903541386127472,
|
|
"step": 1755,
|
|
"valid_targets_mean": 3484.8,
|
|
"valid_targets_min": 1393
|
|
},
|
|
{
|
|
"epoch": 2.798092209856916,
|
|
"grad_norm": 0.7164175581770287,
|
|
"learning_rate": 3.0036601532004175e-05,
|
|
"loss": 0.3284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1734030842781067,
|
|
"step": 1760,
|
|
"valid_targets_mean": 3292.9,
|
|
"valid_targets_min": 944
|
|
},
|
|
{
|
|
"epoch": 2.8060413354531004,
|
|
"grad_norm": 0.43116874351447815,
|
|
"learning_rate": 2.996793697743601e-05,
|
|
"loss": 0.3192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11032368242740631,
|
|
"step": 1765,
|
|
"valid_targets_mean": 3994.1,
|
|
"valid_targets_min": 2014
|
|
},
|
|
{
|
|
"epoch": 2.8139904610492845,
|
|
"grad_norm": 0.8896935086630756,
|
|
"learning_rate": 2.9899115742159512e-05,
|
|
"loss": 0.3364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15294399857521057,
|
|
"step": 1770,
|
|
"valid_targets_mean": 2824.2,
|
|
"valid_targets_min": 1832
|
|
},
|
|
{
|
|
"epoch": 2.821939586645469,
|
|
"grad_norm": 0.6478090751572374,
|
|
"learning_rate": 2.9830138907939137e-05,
|
|
"loss": 0.3311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16810470819473267,
|
|
"step": 1775,
|
|
"valid_targets_mean": 4880.5,
|
|
"valid_targets_min": 2324
|
|
},
|
|
{
|
|
"epoch": 2.8298887122416536,
|
|
"grad_norm": 0.6111576105738485,
|
|
"learning_rate": 2.976100755898511e-05,
|
|
"loss": 0.3008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15136617422103882,
|
|
"step": 1780,
|
|
"valid_targets_mean": 3223.0,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 2.8378378378378377,
|
|
"grad_norm": 0.5834929293069772,
|
|
"learning_rate": 2.9691722781936398e-05,
|
|
"loss": 0.3181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20350173115730286,
|
|
"step": 1785,
|
|
"valid_targets_mean": 4635.9,
|
|
"valid_targets_min": 1076
|
|
},
|
|
{
|
|
"epoch": 2.8457869634340223,
|
|
"grad_norm": 0.49162115708035214,
|
|
"learning_rate": 2.962228566584362e-05,
|
|
"loss": 0.2983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1713608205318451,
|
|
"step": 1790,
|
|
"valid_targets_mean": 5182.4,
|
|
"valid_targets_min": 1320
|
|
},
|
|
{
|
|
"epoch": 2.853736089030207,
|
|
"grad_norm": 0.5564490869840354,
|
|
"learning_rate": 2.9552697302151937e-05,
|
|
"loss": 0.3431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2004539966583252,
|
|
"step": 1795,
|
|
"valid_targets_mean": 5286.2,
|
|
"valid_targets_min": 1230
|
|
},
|
|
{
|
|
"epoch": 2.861685214626391,
|
|
"grad_norm": 0.7238516726975478,
|
|
"learning_rate": 2.9482958784683883e-05,
|
|
"loss": 0.3479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16671222448349,
|
|
"step": 1800,
|
|
"valid_targets_mean": 2532.1,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 2.8696343402225755,
|
|
"grad_norm": 0.6015021715890214,
|
|
"learning_rate": 2.9413071209622174e-05,
|
|
"loss": 0.328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19223028421401978,
|
|
"step": 1805,
|
|
"valid_targets_mean": 5203.1,
|
|
"valid_targets_min": 896
|
|
},
|
|
{
|
|
"epoch": 2.87758346581876,
|
|
"grad_norm": 0.71233512948398,
|
|
"learning_rate": 2.934303567549251e-05,
|
|
"loss": 0.3331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20446409285068512,
|
|
"step": 1810,
|
|
"valid_targets_mean": 3668.5,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 2.885532591414944,
|
|
"grad_norm": 0.5663212762644189,
|
|
"learning_rate": 2.9272853283146255e-05,
|
|
"loss": 0.3252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16672159731388092,
|
|
"step": 1815,
|
|
"valid_targets_mean": 4424.0,
|
|
"valid_targets_min": 857
|
|
},
|
|
{
|
|
"epoch": 2.8934817170111287,
|
|
"grad_norm": 0.5646382438656454,
|
|
"learning_rate": 2.9202525135743158e-05,
|
|
"loss": 0.3271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13206534087657928,
|
|
"step": 1820,
|
|
"valid_targets_mean": 2982.0,
|
|
"valid_targets_min": 1254
|
|
},
|
|
{
|
|
"epoch": 2.901430842607313,
|
|
"grad_norm": 0.5979858155090125,
|
|
"learning_rate": 2.9132052338734033e-05,
|
|
"loss": 0.332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2724117040634155,
|
|
"step": 1825,
|
|
"valid_targets_mean": 6287.2,
|
|
"valid_targets_min": 1913
|
|
},
|
|
{
|
|
"epoch": 2.9093799682034978,
|
|
"grad_norm": 0.5199746384712179,
|
|
"learning_rate": 2.9061435999843354e-05,
|
|
"loss": 0.3193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20305633544921875,
|
|
"step": 1830,
|
|
"valid_targets_mean": 5638.5,
|
|
"valid_targets_min": 1143
|
|
},
|
|
{
|
|
"epoch": 2.9173290937996823,
|
|
"grad_norm": 0.5819199340511133,
|
|
"learning_rate": 2.8990677229051855e-05,
|
|
"loss": 0.3013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16691234707832336,
|
|
"step": 1835,
|
|
"valid_targets_mean": 3754.4,
|
|
"valid_targets_min": 2057
|
|
},
|
|
{
|
|
"epoch": 2.9252782193958664,
|
|
"grad_norm": 0.5949719300805463,
|
|
"learning_rate": 2.8919777138579074e-05,
|
|
"loss": 0.3215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1438504159450531,
|
|
"step": 1840,
|
|
"valid_targets_mean": 4242.8,
|
|
"valid_targets_min": 2592
|
|
},
|
|
{
|
|
"epoch": 2.933227344992051,
|
|
"grad_norm": 0.60110475561315,
|
|
"learning_rate": 2.8848736842865893e-05,
|
|
"loss": 0.3568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12951946258544922,
|
|
"step": 1845,
|
|
"valid_targets_mean": 3351.9,
|
|
"valid_targets_min": 1312
|
|
},
|
|
{
|
|
"epoch": 2.9411764705882355,
|
|
"grad_norm": 0.5594109136917053,
|
|
"learning_rate": 2.8777557458556993e-05,
|
|
"loss": 0.3443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1365208476781845,
|
|
"step": 1850,
|
|
"valid_targets_mean": 3520.6,
|
|
"valid_targets_min": 1245
|
|
},
|
|
{
|
|
"epoch": 2.9491255961844196,
|
|
"grad_norm": 0.6333160967768099,
|
|
"learning_rate": 2.870624010448332e-05,
|
|
"loss": 0.3229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11735793948173523,
|
|
"step": 1855,
|
|
"valid_targets_mean": 2927.9,
|
|
"valid_targets_min": 1494
|
|
},
|
|
{
|
|
"epoch": 2.957074721780604,
|
|
"grad_norm": 0.5471794831028691,
|
|
"learning_rate": 2.8634785901644497e-05,
|
|
"loss": 0.3395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15754257142543793,
|
|
"step": 1860,
|
|
"valid_targets_mean": 4217.0,
|
|
"valid_targets_min": 1892
|
|
},
|
|
{
|
|
"epoch": 2.9650238473767887,
|
|
"grad_norm": 0.6974601171594974,
|
|
"learning_rate": 2.856319597319119e-05,
|
|
"loss": 0.3247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13232719898223877,
|
|
"step": 1865,
|
|
"valid_targets_mean": 2323.1,
|
|
"valid_targets_min": 1326
|
|
},
|
|
{
|
|
"epoch": 2.972972972972973,
|
|
"grad_norm": 0.5279384240025351,
|
|
"learning_rate": 2.849147144440747e-05,
|
|
"loss": 0.3105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16642966866493225,
|
|
"step": 1870,
|
|
"valid_targets_mean": 4880.8,
|
|
"valid_targets_min": 1692
|
|
},
|
|
{
|
|
"epoch": 2.9809220985691574,
|
|
"grad_norm": 0.523949239441607,
|
|
"learning_rate": 2.8419613442693127e-05,
|
|
"loss": 0.3349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08750275522470474,
|
|
"step": 1875,
|
|
"valid_targets_mean": 2512.4,
|
|
"valid_targets_min": 1366
|
|
},
|
|
{
|
|
"epoch": 2.988871224165342,
|
|
"grad_norm": 0.5677058143191309,
|
|
"learning_rate": 2.834762309754593e-05,
|
|
"loss": 0.3181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1473868489265442,
|
|
"step": 1880,
|
|
"valid_targets_mean": 3897.0,
|
|
"valid_targets_min": 1606
|
|
},
|
|
{
|
|
"epoch": 2.996820349761526,
|
|
"grad_norm": 0.6935313387075344,
|
|
"learning_rate": 2.8275501540543877e-05,
|
|
"loss": 0.322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17370307445526123,
|
|
"step": 1885,
|
|
"valid_targets_mean": 3508.6,
|
|
"valid_targets_min": 1697
|
|
},
|
|
{
|
|
"epoch": 3.0047694753577106,
|
|
"grad_norm": 0.43490540012354023,
|
|
"learning_rate": 2.8203249905327434e-05,
|
|
"loss": 0.306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1209859848022461,
|
|
"step": 1890,
|
|
"valid_targets_mean": 6596.5,
|
|
"valid_targets_min": 1582
|
|
},
|
|
{
|
|
"epoch": 3.012718600953895,
|
|
"grad_norm": 0.6856368224073138,
|
|
"learning_rate": 2.81308693275817e-05,
|
|
"loss": 0.3068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1579587161540985,
|
|
"step": 1895,
|
|
"valid_targets_mean": 3335.0,
|
|
"valid_targets_min": 1900
|
|
},
|
|
{
|
|
"epoch": 3.0206677265500796,
|
|
"grad_norm": 0.5799624059517718,
|
|
"learning_rate": 2.8058360945018518e-05,
|
|
"loss": 0.2877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12864336371421814,
|
|
"step": 1900,
|
|
"valid_targets_mean": 3558.8,
|
|
"valid_targets_min": 1471
|
|
},
|
|
{
|
|
"epoch": 3.0286168521462637,
|
|
"grad_norm": 0.6643314635137537,
|
|
"learning_rate": 2.7985725897358665e-05,
|
|
"loss": 0.278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18276259303092957,
|
|
"step": 1905,
|
|
"valid_targets_mean": 3466.2,
|
|
"valid_targets_min": 2116
|
|
},
|
|
{
|
|
"epoch": 3.0365659777424483,
|
|
"grad_norm": 0.49388904490625535,
|
|
"learning_rate": 2.791296532631389e-05,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17703765630722046,
|
|
"step": 1910,
|
|
"valid_targets_mean": 6177.6,
|
|
"valid_targets_min": 1605
|
|
},
|
|
{
|
|
"epoch": 3.044515103338633,
|
|
"grad_norm": 0.5275149205712758,
|
|
"learning_rate": 2.7840080375568964e-05,
|
|
"loss": 0.2825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19058680534362793,
|
|
"step": 1915,
|
|
"valid_targets_mean": 6615.5,
|
|
"valid_targets_min": 2415
|
|
},
|
|
{
|
|
"epoch": 3.0524642289348174,
|
|
"grad_norm": 0.7976590863448021,
|
|
"learning_rate": 2.7767072190763733e-05,
|
|
"loss": 0.3069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21448560059070587,
|
|
"step": 1920,
|
|
"valid_targets_mean": 4063.8,
|
|
"valid_targets_min": 1859
|
|
},
|
|
{
|
|
"epoch": 3.0604133545310015,
|
|
"grad_norm": 0.5519358838168349,
|
|
"learning_rate": 2.7693941919475076e-05,
|
|
"loss": 0.2965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12203872948884964,
|
|
"step": 1925,
|
|
"valid_targets_mean": 3897.8,
|
|
"valid_targets_min": 1484
|
|
},
|
|
{
|
|
"epoch": 3.068362480127186,
|
|
"grad_norm": 0.6261791399436588,
|
|
"learning_rate": 2.7620690711198906e-05,
|
|
"loss": 0.3087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10429413616657257,
|
|
"step": 1930,
|
|
"valid_targets_mean": 2748.6,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 3.0763116057233706,
|
|
"grad_norm": 0.6080920205804436,
|
|
"learning_rate": 2.7547319717332066e-05,
|
|
"loss": 0.3196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11206123232841492,
|
|
"step": 1935,
|
|
"valid_targets_mean": 2754.8,
|
|
"valid_targets_min": 1053
|
|
},
|
|
{
|
|
"epoch": 3.0842607313195547,
|
|
"grad_norm": 0.6075618971245208,
|
|
"learning_rate": 2.7473830091154243e-05,
|
|
"loss": 0.3427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13101550936698914,
|
|
"step": 1940,
|
|
"valid_targets_mean": 4100.8,
|
|
"valid_targets_min": 1129
|
|
},
|
|
{
|
|
"epoch": 3.0922098569157392,
|
|
"grad_norm": 0.5902930406665853,
|
|
"learning_rate": 2.7400222987809856e-05,
|
|
"loss": 0.2952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10681091248989105,
|
|
"step": 1945,
|
|
"valid_targets_mean": 3202.0,
|
|
"valid_targets_min": 1177
|
|
},
|
|
{
|
|
"epoch": 3.100158982511924,
|
|
"grad_norm": 0.9463230239083071,
|
|
"learning_rate": 2.7326499564289867e-05,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14917445182800293,
|
|
"step": 1950,
|
|
"valid_targets_mean": 3440.0,
|
|
"valid_targets_min": 1127
|
|
},
|
|
{
|
|
"epoch": 3.108108108108108,
|
|
"grad_norm": 0.6504980859510896,
|
|
"learning_rate": 2.725266097941363e-05,
|
|
"loss": 0.2999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13310322165489197,
|
|
"step": 1955,
|
|
"valid_targets_mean": 3611.8,
|
|
"valid_targets_min": 1083
|
|
},
|
|
{
|
|
"epoch": 3.1160572337042924,
|
|
"grad_norm": 0.5059284291690085,
|
|
"learning_rate": 2.717870839381066e-05,
|
|
"loss": 0.2754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11314306408166885,
|
|
"step": 1960,
|
|
"valid_targets_mean": 4432.5,
|
|
"valid_targets_min": 1998
|
|
},
|
|
{
|
|
"epoch": 3.124006359300477,
|
|
"grad_norm": 0.6582059405884192,
|
|
"learning_rate": 2.7104642969902357e-05,
|
|
"loss": 0.3097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09653021395206451,
|
|
"step": 1965,
|
|
"valid_targets_mean": 2087.4,
|
|
"valid_targets_min": 954
|
|
},
|
|
{
|
|
"epoch": 3.1319554848966615,
|
|
"grad_norm": 0.5888314848362575,
|
|
"learning_rate": 2.7030465871883812e-05,
|
|
"loss": 0.3122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15990781784057617,
|
|
"step": 1970,
|
|
"valid_targets_mean": 3609.4,
|
|
"valid_targets_min": 1598
|
|
},
|
|
{
|
|
"epoch": 3.1399046104928456,
|
|
"grad_norm": 0.6004716706631232,
|
|
"learning_rate": 2.6956178265705434e-05,
|
|
"loss": 0.3005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18831032514572144,
|
|
"step": 1975,
|
|
"valid_targets_mean": 4997.4,
|
|
"valid_targets_min": 1803
|
|
},
|
|
{
|
|
"epoch": 3.14785373608903,
|
|
"grad_norm": 0.5732036509790073,
|
|
"learning_rate": 2.688178131905465e-05,
|
|
"loss": 0.3207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16693753004074097,
|
|
"step": 1980,
|
|
"valid_targets_mean": 4285.1,
|
|
"valid_targets_min": 1977
|
|
},
|
|
{
|
|
"epoch": 3.1558028616852147,
|
|
"grad_norm": 0.6642716587449456,
|
|
"learning_rate": 2.680727620133757e-05,
|
|
"loss": 0.3191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22563181817531586,
|
|
"step": 1985,
|
|
"valid_targets_mean": 4903.2,
|
|
"valid_targets_min": 1632
|
|
},
|
|
{
|
|
"epoch": 3.1637519872813993,
|
|
"grad_norm": 0.5730320283371737,
|
|
"learning_rate": 2.673266408366057e-05,
|
|
"loss": 0.3244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17787693440914154,
|
|
"step": 1990,
|
|
"valid_targets_mean": 6239.9,
|
|
"valid_targets_min": 2056
|
|
},
|
|
{
|
|
"epoch": 3.1717011128775834,
|
|
"grad_norm": 0.65116834922532,
|
|
"learning_rate": 2.6657946138811915e-05,
|
|
"loss": 0.3171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17144034802913666,
|
|
"step": 1995,
|
|
"valid_targets_mean": 2956.0,
|
|
"valid_targets_min": 1683
|
|
},
|
|
{
|
|
"epoch": 3.179650238473768,
|
|
"grad_norm": 0.621857147288396,
|
|
"learning_rate": 2.6583123541243302e-05,
|
|
"loss": 0.292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12824256718158722,
|
|
"step": 2000,
|
|
"valid_targets_mean": 3080.2,
|
|
"valid_targets_min": 1583
|
|
},
|
|
{
|
|
"epoch": 3.1875993640699525,
|
|
"grad_norm": 0.6880508453786564,
|
|
"learning_rate": 2.6508197467051406e-05,
|
|
"loss": 0.3192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11372699588537216,
|
|
"step": 2005,
|
|
"valid_targets_mean": 3380.0,
|
|
"valid_targets_min": 1155
|
|
},
|
|
{
|
|
"epoch": 3.1955484896661366,
|
|
"grad_norm": 0.48658080685304217,
|
|
"learning_rate": 2.6433169093959405e-05,
|
|
"loss": 0.2963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09367263317108154,
|
|
"step": 2010,
|
|
"valid_targets_mean": 4550.9,
|
|
"valid_targets_min": 2049
|
|
},
|
|
{
|
|
"epoch": 3.203497615262321,
|
|
"grad_norm": 0.6790245657991026,
|
|
"learning_rate": 2.6358039601298454e-05,
|
|
"loss": 0.3038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2737703323364258,
|
|
"step": 2015,
|
|
"valid_targets_mean": 4705.5,
|
|
"valid_targets_min": 1223
|
|
},
|
|
{
|
|
"epoch": 3.2114467408585057,
|
|
"grad_norm": 0.4781584662459681,
|
|
"learning_rate": 2.6282810169989158e-05,
|
|
"loss": 0.2793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13447093963623047,
|
|
"step": 2020,
|
|
"valid_targets_mean": 5479.1,
|
|
"valid_targets_min": 1603
|
|
},
|
|
{
|
|
"epoch": 3.21939586645469,
|
|
"grad_norm": 0.6211917748030487,
|
|
"learning_rate": 2.6207481982523e-05,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1350020468235016,
|
|
"step": 2025,
|
|
"valid_targets_mean": 3330.5,
|
|
"valid_targets_min": 1440
|
|
},
|
|
{
|
|
"epoch": 3.2273449920508743,
|
|
"grad_norm": 0.6977224109847946,
|
|
"learning_rate": 2.6132056222943757e-05,
|
|
"loss": 0.294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14188238978385925,
|
|
"step": 2030,
|
|
"valid_targets_mean": 2958.2,
|
|
"valid_targets_min": 1528
|
|
},
|
|
{
|
|
"epoch": 3.235294117647059,
|
|
"grad_norm": 0.5810958181630662,
|
|
"learning_rate": 2.6056534076828883e-05,
|
|
"loss": 0.2957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12793472409248352,
|
|
"step": 2035,
|
|
"valid_targets_mean": 3674.5,
|
|
"valid_targets_min": 1780
|
|
},
|
|
{
|
|
"epoch": 3.2432432432432434,
|
|
"grad_norm": 0.7675646063396296,
|
|
"learning_rate": 2.598091673127091e-05,
|
|
"loss": 0.3007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23905090987682343,
|
|
"step": 2040,
|
|
"valid_targets_mean": 4330.9,
|
|
"valid_targets_min": 1296
|
|
},
|
|
{
|
|
"epoch": 3.2511923688394275,
|
|
"grad_norm": 1.1243672904167026,
|
|
"learning_rate": 2.5905205374858728e-05,
|
|
"loss": 0.3304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17181314527988434,
|
|
"step": 2045,
|
|
"valid_targets_mean": 2871.4,
|
|
"valid_targets_min": 1503
|
|
},
|
|
{
|
|
"epoch": 3.259141494435612,
|
|
"grad_norm": 0.6052052264685953,
|
|
"learning_rate": 2.5829401197658946e-05,
|
|
"loss": 0.3079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1263623833656311,
|
|
"step": 2050,
|
|
"valid_targets_mean": 3440.6,
|
|
"valid_targets_min": 1396
|
|
},
|
|
{
|
|
"epoch": 3.2670906200317966,
|
|
"grad_norm": 0.5810288016176254,
|
|
"learning_rate": 2.5753505391197173e-05,
|
|
"loss": 0.2967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12817180156707764,
|
|
"step": 2055,
|
|
"valid_targets_mean": 3581.9,
|
|
"valid_targets_min": 1907
|
|
},
|
|
{
|
|
"epoch": 3.275039745627981,
|
|
"grad_norm": 0.5862886909195938,
|
|
"learning_rate": 2.5677519148439286e-05,
|
|
"loss": 0.3181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2527906000614166,
|
|
"step": 2060,
|
|
"valid_targets_mean": 6525.5,
|
|
"valid_targets_min": 1563
|
|
},
|
|
{
|
|
"epoch": 3.2829888712241653,
|
|
"grad_norm": 0.6103781033953464,
|
|
"learning_rate": 2.56014436637727e-05,
|
|
"loss": 0.2928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11945773661136627,
|
|
"step": 2065,
|
|
"valid_targets_mean": 3452.2,
|
|
"valid_targets_min": 1268
|
|
},
|
|
{
|
|
"epoch": 3.29093799682035,
|
|
"grad_norm": 0.6049733777919735,
|
|
"learning_rate": 2.5525280132987544e-05,
|
|
"loss": 0.3054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13118848204612732,
|
|
"step": 2070,
|
|
"valid_targets_mean": 3113.2,
|
|
"valid_targets_min": 1212
|
|
},
|
|
{
|
|
"epoch": 3.2988871224165344,
|
|
"grad_norm": 0.7797091432078112,
|
|
"learning_rate": 2.544902975325793e-05,
|
|
"loss": 0.3336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1907491683959961,
|
|
"step": 2075,
|
|
"valid_targets_mean": 3001.5,
|
|
"valid_targets_min": 1673
|
|
},
|
|
{
|
|
"epoch": 3.3068362480127185,
|
|
"grad_norm": 1.256494363670853,
|
|
"learning_rate": 2.5372693723123075e-05,
|
|
"loss": 0.3299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3037179708480835,
|
|
"step": 2080,
|
|
"valid_targets_mean": 5284.5,
|
|
"valid_targets_min": 1451
|
|
},
|
|
{
|
|
"epoch": 3.314785373608903,
|
|
"grad_norm": 0.5637946782086485,
|
|
"learning_rate": 2.5296273242468514e-05,
|
|
"loss": 0.3024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10294333100318909,
|
|
"step": 2085,
|
|
"valid_targets_mean": 3113.9,
|
|
"valid_targets_min": 1385
|
|
},
|
|
{
|
|
"epoch": 3.3227344992050876,
|
|
"grad_norm": 0.6688942117290343,
|
|
"learning_rate": 2.5219769512507202e-05,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10579895228147507,
|
|
"step": 2090,
|
|
"valid_targets_mean": 3372.0,
|
|
"valid_targets_min": 1342
|
|
},
|
|
{
|
|
"epoch": 3.3306836248012717,
|
|
"grad_norm": 0.5746376886119903,
|
|
"learning_rate": 2.5143183735760638e-05,
|
|
"loss": 0.3311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0663488507270813,
|
|
"step": 2095,
|
|
"valid_targets_mean": 2280.2,
|
|
"valid_targets_min": 1255
|
|
},
|
|
{
|
|
"epoch": 3.338632750397456,
|
|
"grad_norm": 0.6230652155709033,
|
|
"learning_rate": 2.5066517116039978e-05,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15298491716384888,
|
|
"step": 2100,
|
|
"valid_targets_mean": 3599.8,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 3.3465818759936408,
|
|
"grad_norm": 0.6546477947224949,
|
|
"learning_rate": 2.4989770858427113e-05,
|
|
"loss": 0.2917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12455105781555176,
|
|
"step": 2105,
|
|
"valid_targets_mean": 2634.4,
|
|
"valid_targets_min": 1242
|
|
},
|
|
{
|
|
"epoch": 3.3545310015898253,
|
|
"grad_norm": 0.6179622551759897,
|
|
"learning_rate": 2.4912946169255722e-05,
|
|
"loss": 0.3086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10072465240955353,
|
|
"step": 2110,
|
|
"valid_targets_mean": 3045.4,
|
|
"valid_targets_min": 938
|
|
},
|
|
{
|
|
"epoch": 3.3624801271860094,
|
|
"grad_norm": 0.661798409802034,
|
|
"learning_rate": 2.4836044256092288e-05,
|
|
"loss": 0.3038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10123787820339203,
|
|
"step": 2115,
|
|
"valid_targets_mean": 2448.8,
|
|
"valid_targets_min": 1025
|
|
},
|
|
{
|
|
"epoch": 3.370429252782194,
|
|
"grad_norm": 0.573462069765484,
|
|
"learning_rate": 2.475906632771714e-05,
|
|
"loss": 0.3086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10489889979362488,
|
|
"step": 2120,
|
|
"valid_targets_mean": 2793.4,
|
|
"valid_targets_min": 1746
|
|
},
|
|
{
|
|
"epoch": 3.3783783783783785,
|
|
"grad_norm": 0.5239440291256261,
|
|
"learning_rate": 2.468201359410548e-05,
|
|
"loss": 0.2995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11227285861968994,
|
|
"step": 2125,
|
|
"valid_targets_mean": 4188.2,
|
|
"valid_targets_min": 1186
|
|
},
|
|
{
|
|
"epoch": 3.3863275039745626,
|
|
"grad_norm": 0.5944843779181651,
|
|
"learning_rate": 2.4604887266408304e-05,
|
|
"loss": 0.2954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09702493995428085,
|
|
"step": 2130,
|
|
"valid_targets_mean": 3138.1,
|
|
"valid_targets_min": 1270
|
|
},
|
|
{
|
|
"epoch": 3.394276629570747,
|
|
"grad_norm": 0.5952784117423808,
|
|
"learning_rate": 2.4527688556933402e-05,
|
|
"loss": 0.3036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15025582909584045,
|
|
"step": 2135,
|
|
"valid_targets_mean": 4062.9,
|
|
"valid_targets_min": 1547
|
|
},
|
|
{
|
|
"epoch": 3.4022257551669317,
|
|
"grad_norm": 0.5594816213733672,
|
|
"learning_rate": 2.445041867912629e-05,
|
|
"loss": 0.2991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2255047857761383,
|
|
"step": 2140,
|
|
"valid_targets_mean": 6051.4,
|
|
"valid_targets_min": 1183
|
|
},
|
|
{
|
|
"epoch": 3.4101748807631163,
|
|
"grad_norm": 0.534637619221854,
|
|
"learning_rate": 2.4373078847551154e-05,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14731615781784058,
|
|
"step": 2145,
|
|
"valid_targets_mean": 4334.1,
|
|
"valid_targets_min": 1209
|
|
},
|
|
{
|
|
"epoch": 3.4181240063593004,
|
|
"grad_norm": 0.6668816490999573,
|
|
"learning_rate": 2.4295670277871736e-05,
|
|
"loss": 0.2917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14683671295642853,
|
|
"step": 2150,
|
|
"valid_targets_mean": 3849.8,
|
|
"valid_targets_min": 1855
|
|
},
|
|
{
|
|
"epoch": 3.426073131955485,
|
|
"grad_norm": 0.5695793703052379,
|
|
"learning_rate": 2.4218194186832237e-05,
|
|
"loss": 0.3262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16522330045700073,
|
|
"step": 2155,
|
|
"valid_targets_mean": 3944.9,
|
|
"valid_targets_min": 1018
|
|
},
|
|
{
|
|
"epoch": 3.4340222575516695,
|
|
"grad_norm": 0.6896534991269395,
|
|
"learning_rate": 2.4140651792238193e-05,
|
|
"loss": 0.3041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13288754224777222,
|
|
"step": 2160,
|
|
"valid_targets_mean": 2797.4,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 3.4419713831478536,
|
|
"grad_norm": 0.554397523858313,
|
|
"learning_rate": 2.4063044312937332e-05,
|
|
"loss": 0.2873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11009293794631958,
|
|
"step": 2165,
|
|
"valid_targets_mean": 3307.2,
|
|
"valid_targets_min": 1627
|
|
},
|
|
{
|
|
"epoch": 3.449920508744038,
|
|
"grad_norm": 0.6361996486402922,
|
|
"learning_rate": 2.3985372968800407e-05,
|
|
"loss": 0.3102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16490855813026428,
|
|
"step": 2170,
|
|
"valid_targets_mean": 3827.1,
|
|
"valid_targets_min": 1311
|
|
},
|
|
{
|
|
"epoch": 3.4578696343402227,
|
|
"grad_norm": 0.5181622617984399,
|
|
"learning_rate": 2.3907638980702043e-05,
|
|
"loss": 0.304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22008943557739258,
|
|
"step": 2175,
|
|
"valid_targets_mean": 6716.4,
|
|
"valid_targets_min": 1714
|
|
},
|
|
{
|
|
"epoch": 3.4658187599364068,
|
|
"grad_norm": 0.6886032433666481,
|
|
"learning_rate": 2.382984357050151e-05,
|
|
"loss": 0.3179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19594290852546692,
|
|
"step": 2180,
|
|
"valid_targets_mean": 3620.4,
|
|
"valid_targets_min": 1617
|
|
},
|
|
{
|
|
"epoch": 3.4737678855325913,
|
|
"grad_norm": 0.7448495521414668,
|
|
"learning_rate": 2.3751987961023545e-05,
|
|
"loss": 0.2993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19874638319015503,
|
|
"step": 2185,
|
|
"valid_targets_mean": 5841.4,
|
|
"valid_targets_min": 1955
|
|
},
|
|
{
|
|
"epoch": 3.481717011128776,
|
|
"grad_norm": 0.8131564311244124,
|
|
"learning_rate": 2.3674073376039152e-05,
|
|
"loss": 0.3173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10642845183610916,
|
|
"step": 2190,
|
|
"valid_targets_mean": 2958.9,
|
|
"valid_targets_min": 1333
|
|
},
|
|
{
|
|
"epoch": 3.4896661367249604,
|
|
"grad_norm": 0.6093349114673476,
|
|
"learning_rate": 2.359610104024631e-05,
|
|
"loss": 0.2991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13624650239944458,
|
|
"step": 2195,
|
|
"valid_targets_mean": 3616.9,
|
|
"valid_targets_min": 1474
|
|
},
|
|
{
|
|
"epoch": 3.4976152623211445,
|
|
"grad_norm": 0.5358342734308147,
|
|
"learning_rate": 2.3518072179250753e-05,
|
|
"loss": 0.2956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14074242115020752,
|
|
"step": 2200,
|
|
"valid_targets_mean": 4079.2,
|
|
"valid_targets_min": 1876
|
|
},
|
|
{
|
|
"epoch": 3.505564387917329,
|
|
"grad_norm": 0.5217671569085968,
|
|
"learning_rate": 2.343998801954673e-05,
|
|
"loss": 0.2983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11194353550672531,
|
|
"step": 2205,
|
|
"valid_targets_mean": 4461.9,
|
|
"valid_targets_min": 1264
|
|
},
|
|
{
|
|
"epoch": 3.5135135135135136,
|
|
"grad_norm": 0.6555806058373612,
|
|
"learning_rate": 2.3361849788497666e-05,
|
|
"loss": 0.2999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13806423544883728,
|
|
"step": 2210,
|
|
"valid_targets_mean": 2893.8,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 3.521462639109698,
|
|
"grad_norm": 0.5782714606570237,
|
|
"learning_rate": 2.3283658714316935e-05,
|
|
"loss": 0.2927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16085699200630188,
|
|
"step": 2215,
|
|
"valid_targets_mean": 5370.1,
|
|
"valid_targets_min": 1133
|
|
},
|
|
{
|
|
"epoch": 3.5294117647058822,
|
|
"grad_norm": 0.5645329360837099,
|
|
"learning_rate": 2.320541602604851e-05,
|
|
"loss": 0.3148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11702888458967209,
|
|
"step": 2220,
|
|
"valid_targets_mean": 3576.6,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 3.537360890302067,
|
|
"grad_norm": 0.8603949266554655,
|
|
"learning_rate": 2.3127122953547663e-05,
|
|
"loss": 0.3307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17808297276496887,
|
|
"step": 2225,
|
|
"valid_targets_mean": 2315.8,
|
|
"valid_targets_min": 879
|
|
},
|
|
{
|
|
"epoch": 3.5453100158982513,
|
|
"grad_norm": 0.7344939096866365,
|
|
"learning_rate": 2.3048780727461627e-05,
|
|
"loss": 0.3173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17074677348136902,
|
|
"step": 2230,
|
|
"valid_targets_mean": 3106.6,
|
|
"valid_targets_min": 1409
|
|
},
|
|
{
|
|
"epoch": 3.5532591414944354,
|
|
"grad_norm": 0.5557953312933946,
|
|
"learning_rate": 2.2970390579210246e-05,
|
|
"loss": 0.285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1546015441417694,
|
|
"step": 2235,
|
|
"valid_targets_mean": 5122.5,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 3.56120826709062,
|
|
"grad_norm": 0.6220085262343654,
|
|
"learning_rate": 2.2891953740966643e-05,
|
|
"loss": 0.3122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.111165851354599,
|
|
"step": 2240,
|
|
"valid_targets_mean": 2583.0,
|
|
"valid_targets_min": 1622
|
|
},
|
|
{
|
|
"epoch": 3.5691573926868045,
|
|
"grad_norm": 0.5482730130647712,
|
|
"learning_rate": 2.281347144563782e-05,
|
|
"loss": 0.3118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12136892974376678,
|
|
"step": 2245,
|
|
"valid_targets_mean": 4651.5,
|
|
"valid_targets_min": 2087
|
|
},
|
|
{
|
|
"epoch": 3.5771065182829886,
|
|
"grad_norm": 0.7076033295864077,
|
|
"learning_rate": 2.273494492684531e-05,
|
|
"loss": 0.3147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13101047277450562,
|
|
"step": 2250,
|
|
"valid_targets_mean": 2563.9,
|
|
"valid_targets_min": 1627
|
|
},
|
|
{
|
|
"epoch": 3.585055643879173,
|
|
"grad_norm": 0.6391031355661594,
|
|
"learning_rate": 2.265637541890577e-05,
|
|
"loss": 0.305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11939674615859985,
|
|
"step": 2255,
|
|
"valid_targets_mean": 3398.1,
|
|
"valid_targets_min": 1835
|
|
},
|
|
{
|
|
"epoch": 3.5930047694753577,
|
|
"grad_norm": 0.6117856080838241,
|
|
"learning_rate": 2.2577764156811563e-05,
|
|
"loss": 0.3152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16096490621566772,
|
|
"step": 2260,
|
|
"valid_targets_mean": 4265.2,
|
|
"valid_targets_min": 1579
|
|
},
|
|
{
|
|
"epoch": 3.6009538950715423,
|
|
"grad_norm": 0.7256641614231789,
|
|
"learning_rate": 2.2499112376211373e-05,
|
|
"loss": 0.3171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14876562356948853,
|
|
"step": 2265,
|
|
"valid_targets_mean": 2555.2,
|
|
"valid_targets_min": 1518
|
|
},
|
|
{
|
|
"epoch": 3.6089030206677264,
|
|
"grad_norm": 0.6940346989218751,
|
|
"learning_rate": 2.2420421313390776e-05,
|
|
"loss": 0.2951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1322866976261139,
|
|
"step": 2270,
|
|
"valid_targets_mean": 2997.2,
|
|
"valid_targets_min": 1145
|
|
},
|
|
{
|
|
"epoch": 3.616852146263911,
|
|
"grad_norm": 0.6252450319585947,
|
|
"learning_rate": 2.234169220525282e-05,
|
|
"loss": 0.2903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1188284233212471,
|
|
"step": 2275,
|
|
"valid_targets_mean": 3281.6,
|
|
"valid_targets_min": 1202
|
|
},
|
|
{
|
|
"epoch": 3.6248012718600955,
|
|
"grad_norm": 0.6257122047758721,
|
|
"learning_rate": 2.226292628929853e-05,
|
|
"loss": 0.3066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.161021888256073,
|
|
"step": 2280,
|
|
"valid_targets_mean": 3460.4,
|
|
"valid_targets_min": 1299
|
|
},
|
|
{
|
|
"epoch": 3.63275039745628,
|
|
"grad_norm": 0.7187100392945056,
|
|
"learning_rate": 2.2184124803607525e-05,
|
|
"loss": 0.3057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17227305471897125,
|
|
"step": 2285,
|
|
"valid_targets_mean": 3115.1,
|
|
"valid_targets_min": 1622
|
|
},
|
|
{
|
|
"epoch": 3.640699523052464,
|
|
"grad_norm": 0.6081829328829647,
|
|
"learning_rate": 2.210528898681851e-05,
|
|
"loss": 0.2944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17318472266197205,
|
|
"step": 2290,
|
|
"valid_targets_mean": 4387.6,
|
|
"valid_targets_min": 944
|
|
},
|
|
{
|
|
"epoch": 3.6486486486486487,
|
|
"grad_norm": 0.6553767483790222,
|
|
"learning_rate": 2.2026420078109825e-05,
|
|
"loss": 0.293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15405075252056122,
|
|
"step": 2295,
|
|
"valid_targets_mean": 3322.5,
|
|
"valid_targets_min": 1461
|
|
},
|
|
{
|
|
"epoch": 3.6565977742448332,
|
|
"grad_norm": 0.6277230401942127,
|
|
"learning_rate": 2.1947519317179972e-05,
|
|
"loss": 0.2873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12562406063079834,
|
|
"step": 2300,
|
|
"valid_targets_mean": 3499.0,
|
|
"valid_targets_min": 2161
|
|
},
|
|
{
|
|
"epoch": 3.6645468998410173,
|
|
"grad_norm": 0.768175397879882,
|
|
"learning_rate": 2.1868587944228118e-05,
|
|
"loss": 0.3258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22613877058029175,
|
|
"step": 2305,
|
|
"valid_targets_mean": 3098.5,
|
|
"valid_targets_min": 1556
|
|
},
|
|
{
|
|
"epoch": 3.672496025437202,
|
|
"grad_norm": 0.6328229156162137,
|
|
"learning_rate": 2.1789627199934588e-05,
|
|
"loss": 0.2924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15439720451831818,
|
|
"step": 2310,
|
|
"valid_targets_mean": 4416.8,
|
|
"valid_targets_min": 1389
|
|
},
|
|
{
|
|
"epoch": 3.6804451510333864,
|
|
"grad_norm": 0.5209043898962341,
|
|
"learning_rate": 2.1710638325441408e-05,
|
|
"loss": 0.2996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11477707326412201,
|
|
"step": 2315,
|
|
"valid_targets_mean": 4415.9,
|
|
"valid_targets_min": 1894
|
|
},
|
|
{
|
|
"epoch": 3.6883942766295705,
|
|
"grad_norm": 0.5715967616053358,
|
|
"learning_rate": 2.1631622562332744e-05,
|
|
"loss": 0.2993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1532776653766632,
|
|
"step": 2320,
|
|
"valid_targets_mean": 4725.4,
|
|
"valid_targets_min": 1850
|
|
},
|
|
{
|
|
"epoch": 3.696343402225755,
|
|
"grad_norm": 0.57193989551333,
|
|
"learning_rate": 2.155258115261542e-05,
|
|
"loss": 0.3151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16717489063739777,
|
|
"step": 2325,
|
|
"valid_targets_mean": 4995.4,
|
|
"valid_targets_min": 1957
|
|
},
|
|
{
|
|
"epoch": 3.7042925278219396,
|
|
"grad_norm": 0.6757669928670674,
|
|
"learning_rate": 2.1473515338699383e-05,
|
|
"loss": 0.2963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12959465384483337,
|
|
"step": 2330,
|
|
"valid_targets_mean": 3294.0,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 3.7122416534181237,
|
|
"grad_norm": 0.5952418809897426,
|
|
"learning_rate": 2.1394426363378186e-05,
|
|
"loss": 0.3273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19120419025421143,
|
|
"step": 2335,
|
|
"valid_targets_mean": 5623.0,
|
|
"valid_targets_min": 3124
|
|
},
|
|
{
|
|
"epoch": 3.7201907790143083,
|
|
"grad_norm": 0.5128628204952419,
|
|
"learning_rate": 2.1315315469809426e-05,
|
|
"loss": 0.2916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12095711380243301,
|
|
"step": 2340,
|
|
"valid_targets_mean": 4198.5,
|
|
"valid_targets_min": 1566
|
|
},
|
|
{
|
|
"epoch": 3.728139904610493,
|
|
"grad_norm": 0.5964304651282649,
|
|
"learning_rate": 2.1236183901495236e-05,
|
|
"loss": 0.3014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1730019897222519,
|
|
"step": 2345,
|
|
"valid_targets_mean": 4573.9,
|
|
"valid_targets_min": 1555
|
|
},
|
|
{
|
|
"epoch": 3.7360890302066774,
|
|
"grad_norm": 0.5863264850949853,
|
|
"learning_rate": 2.1157032902262716e-05,
|
|
"loss": 0.3038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1440700888633728,
|
|
"step": 2350,
|
|
"valid_targets_mean": 3816.4,
|
|
"valid_targets_min": 1418
|
|
},
|
|
{
|
|
"epoch": 3.744038155802862,
|
|
"grad_norm": 0.7011639670834071,
|
|
"learning_rate": 2.1077863716244388e-05,
|
|
"loss": 0.3128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13412079215049744,
|
|
"step": 2355,
|
|
"valid_targets_mean": 2686.5,
|
|
"valid_targets_min": 1722
|
|
},
|
|
{
|
|
"epoch": 3.751987281399046,
|
|
"grad_norm": 0.7035157321324028,
|
|
"learning_rate": 2.099867758785866e-05,
|
|
"loss": 0.2972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12869735062122345,
|
|
"step": 2360,
|
|
"valid_targets_mean": 2790.5,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 3.7599364069952306,
|
|
"grad_norm": 0.6101104615733577,
|
|
"learning_rate": 2.091947576179023e-05,
|
|
"loss": 0.3112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1460449993610382,
|
|
"step": 2365,
|
|
"valid_targets_mean": 3654.4,
|
|
"valid_targets_min": 1279
|
|
},
|
|
{
|
|
"epoch": 3.767885532591415,
|
|
"grad_norm": 0.6167973637254529,
|
|
"learning_rate": 2.084025948297055e-05,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15653446316719055,
|
|
"step": 2370,
|
|
"valid_targets_mean": 4159.1,
|
|
"valid_targets_min": 1350
|
|
},
|
|
{
|
|
"epoch": 3.7758346581875992,
|
|
"grad_norm": 0.46087599603559204,
|
|
"learning_rate": 2.0761029996558233e-05,
|
|
"loss": 0.3284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17936599254608154,
|
|
"step": 2375,
|
|
"valid_targets_mean": 6955.2,
|
|
"valid_targets_min": 1622
|
|
},
|
|
{
|
|
"epoch": 3.7837837837837838,
|
|
"grad_norm": 0.6888668616904009,
|
|
"learning_rate": 2.068178854791951e-05,
|
|
"loss": 0.3035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10925927758216858,
|
|
"step": 2380,
|
|
"valid_targets_mean": 3455.6,
|
|
"valid_targets_min": 1067
|
|
},
|
|
{
|
|
"epoch": 3.7917329093799683,
|
|
"grad_norm": 0.6196221197430387,
|
|
"learning_rate": 2.0602536382608638e-05,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11506910622119904,
|
|
"step": 2385,
|
|
"valid_targets_mean": 2697.5,
|
|
"valid_targets_min": 1504
|
|
},
|
|
{
|
|
"epoch": 3.7996820349761524,
|
|
"grad_norm": 0.7349165129812252,
|
|
"learning_rate": 2.0523274746348315e-05,
|
|
"loss": 0.3138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19475872814655304,
|
|
"step": 2390,
|
|
"valid_targets_mean": 2619.8,
|
|
"valid_targets_min": 1549
|
|
},
|
|
{
|
|
"epoch": 3.807631160572337,
|
|
"grad_norm": 0.734008505185106,
|
|
"learning_rate": 2.0444004885010114e-05,
|
|
"loss": 0.3118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16301903128623962,
|
|
"step": 2395,
|
|
"valid_targets_mean": 3585.4,
|
|
"valid_targets_min": 1757
|
|
},
|
|
{
|
|
"epoch": 3.8155802861685215,
|
|
"grad_norm": 0.5463549387476496,
|
|
"learning_rate": 2.0364728044594897e-05,
|
|
"loss": 0.2958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13853493332862854,
|
|
"step": 2400,
|
|
"valid_targets_mean": 4872.1,
|
|
"valid_targets_min": 1392
|
|
},
|
|
{
|
|
"epoch": 3.8235294117647056,
|
|
"grad_norm": 0.5685200487462948,
|
|
"learning_rate": 2.0285445471213218e-05,
|
|
"loss": 0.3125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10278450697660446,
|
|
"step": 2405,
|
|
"valid_targets_mean": 3722.8,
|
|
"valid_targets_min": 1105
|
|
},
|
|
{
|
|
"epoch": 3.83147853736089,
|
|
"grad_norm": 0.5713864922944528,
|
|
"learning_rate": 2.020615841106575e-05,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11323775351047516,
|
|
"step": 2410,
|
|
"valid_targets_mean": 3125.4,
|
|
"valid_targets_min": 1024
|
|
},
|
|
{
|
|
"epoch": 3.8394276629570747,
|
|
"grad_norm": 0.657025793832526,
|
|
"learning_rate": 2.0126868110423685e-05,
|
|
"loss": 0.3048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1765354424715042,
|
|
"step": 2415,
|
|
"valid_targets_mean": 3547.4,
|
|
"valid_targets_min": 1212
|
|
},
|
|
{
|
|
"epoch": 3.8473767885532593,
|
|
"grad_norm": 0.7248770895470373,
|
|
"learning_rate": 2.0047575815609166e-05,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10319070518016815,
|
|
"step": 2420,
|
|
"valid_targets_mean": 2010.2,
|
|
"valid_targets_min": 1403
|
|
},
|
|
{
|
|
"epoch": 3.855325914149444,
|
|
"grad_norm": 0.5274228488347601,
|
|
"learning_rate": 1.996828277297566e-05,
|
|
"loss": 0.2893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1248902976512909,
|
|
"step": 2425,
|
|
"valid_targets_mean": 4434.6,
|
|
"valid_targets_min": 1591
|
|
},
|
|
{
|
|
"epoch": 3.863275039745628,
|
|
"grad_norm": 0.5925715386503325,
|
|
"learning_rate": 1.988899022888841e-05,
|
|
"loss": 0.3014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14048996567726135,
|
|
"step": 2430,
|
|
"valid_targets_mean": 3368.8,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 3.8712241653418125,
|
|
"grad_norm": 0.7441749653558069,
|
|
"learning_rate": 1.98096994297048e-05,
|
|
"loss": 0.3043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17203864455223083,
|
|
"step": 2435,
|
|
"valid_targets_mean": 4758.6,
|
|
"valid_targets_min": 1932
|
|
},
|
|
{
|
|
"epoch": 3.879173290937997,
|
|
"grad_norm": 0.6451042110538078,
|
|
"learning_rate": 1.9730411621754798e-05,
|
|
"loss": 0.2993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08983398973941803,
|
|
"step": 2440,
|
|
"valid_targets_mean": 2440.9,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 3.887122416534181,
|
|
"grad_norm": 0.5532111537864854,
|
|
"learning_rate": 1.9651128051321376e-05,
|
|
"loss": 0.2957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1459672451019287,
|
|
"step": 2445,
|
|
"valid_targets_mean": 4952.5,
|
|
"valid_targets_min": 1139
|
|
},
|
|
{
|
|
"epoch": 3.8950715421303657,
|
|
"grad_norm": 0.7413196713366026,
|
|
"learning_rate": 1.9571849964620858e-05,
|
|
"loss": 0.2986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14475509524345398,
|
|
"step": 2450,
|
|
"valid_targets_mean": 2548.0,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 3.90302066772655,
|
|
"grad_norm": 0.6009561794654499,
|
|
"learning_rate": 1.949257860778339e-05,
|
|
"loss": 0.2837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13547420501708984,
|
|
"step": 2455,
|
|
"valid_targets_mean": 3549.2,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 3.9109697933227343,
|
|
"grad_norm": 0.5691091166815203,
|
|
"learning_rate": 1.9413315226833343e-05,
|
|
"loss": 0.2973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1424901783466339,
|
|
"step": 2460,
|
|
"valid_targets_mean": 3616.8,
|
|
"valid_targets_min": 1381
|
|
},
|
|
{
|
|
"epoch": 3.918918918918919,
|
|
"grad_norm": 0.5055239375994666,
|
|
"learning_rate": 1.9334061067669725e-05,
|
|
"loss": 0.2849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13945041596889496,
|
|
"step": 2465,
|
|
"valid_targets_mean": 5371.9,
|
|
"valid_targets_min": 2845
|
|
},
|
|
{
|
|
"epoch": 3.9268680445151034,
|
|
"grad_norm": 0.6309774014485466,
|
|
"learning_rate": 1.9254817376046556e-05,
|
|
"loss": 0.3046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17157450318336487,
|
|
"step": 2470,
|
|
"valid_targets_mean": 3757.8,
|
|
"valid_targets_min": 1618
|
|
},
|
|
{
|
|
"epoch": 3.9348171701112875,
|
|
"grad_norm": 0.4813855553302401,
|
|
"learning_rate": 1.9175585397553368e-05,
|
|
"loss": 0.2994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24460294842720032,
|
|
"step": 2475,
|
|
"valid_targets_mean": 7888.5,
|
|
"valid_targets_min": 1656
|
|
},
|
|
{
|
|
"epoch": 3.942766295707472,
|
|
"grad_norm": 0.6862712538793074,
|
|
"learning_rate": 1.909636637759554e-05,
|
|
"loss": 0.2998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17342308163642883,
|
|
"step": 2480,
|
|
"valid_targets_mean": 3915.4,
|
|
"valid_targets_min": 1645
|
|
},
|
|
{
|
|
"epoch": 3.9507154213036566,
|
|
"grad_norm": 0.7215776048288414,
|
|
"learning_rate": 1.9017161561374787e-05,
|
|
"loss": 0.3057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12837374210357666,
|
|
"step": 2485,
|
|
"valid_targets_mean": 2572.2,
|
|
"valid_targets_min": 1399
|
|
},
|
|
{
|
|
"epoch": 3.958664546899841,
|
|
"grad_norm": 0.5587844021312857,
|
|
"learning_rate": 1.893797219386957e-05,
|
|
"loss": 0.2946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.153514564037323,
|
|
"step": 2490,
|
|
"valid_targets_mean": 4271.6,
|
|
"valid_targets_min": 1876
|
|
},
|
|
{
|
|
"epoch": 3.9666136724960257,
|
|
"grad_norm": 0.5819605276890002,
|
|
"learning_rate": 1.885879951981549e-05,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1574709117412567,
|
|
"step": 2495,
|
|
"valid_targets_mean": 4666.6,
|
|
"valid_targets_min": 1521
|
|
},
|
|
{
|
|
"epoch": 3.97456279809221,
|
|
"grad_norm": 0.7266329289148119,
|
|
"learning_rate": 1.877964478368577e-05,
|
|
"loss": 0.3092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15330064296722412,
|
|
"step": 2500,
|
|
"valid_targets_mean": 2669.2,
|
|
"valid_targets_min": 1087
|
|
},
|
|
{
|
|
"epoch": 3.9825119236883944,
|
|
"grad_norm": 0.5772566296723057,
|
|
"learning_rate": 1.8700509229671696e-05,
|
|
"loss": 0.2812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18658240139484406,
|
|
"step": 2505,
|
|
"valid_targets_mean": 5354.9,
|
|
"valid_targets_min": 1704
|
|
},
|
|
{
|
|
"epoch": 3.990461049284579,
|
|
"grad_norm": 0.6820390468066141,
|
|
"learning_rate": 1.8621394101663003e-05,
|
|
"loss": 0.2877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11891550570726395,
|
|
"step": 2510,
|
|
"valid_targets_mean": 2940.1,
|
|
"valid_targets_min": 1405
|
|
},
|
|
{
|
|
"epoch": 3.998410174880763,
|
|
"grad_norm": 0.8509794938183721,
|
|
"learning_rate": 1.854230064322837e-05,
|
|
"loss": 0.3071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18318158388137817,
|
|
"step": 2515,
|
|
"valid_targets_mean": 2696.5,
|
|
"valid_targets_min": 805
|
|
},
|
|
{
|
|
"epoch": 4.006359300476947,
|
|
"grad_norm": 0.7613570632254215,
|
|
"learning_rate": 1.8463230097595887e-05,
|
|
"loss": 0.2872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17578919231891632,
|
|
"step": 2520,
|
|
"valid_targets_mean": 2811.9,
|
|
"valid_targets_min": 836
|
|
},
|
|
{
|
|
"epoch": 4.014308426073132,
|
|
"grad_norm": 0.665550179592942,
|
|
"learning_rate": 1.8384183707633475e-05,
|
|
"loss": 0.2995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1365724503993988,
|
|
"step": 2525,
|
|
"valid_targets_mean": 3265.0,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 4.022257551669316,
|
|
"grad_norm": 0.6929421660412779,
|
|
"learning_rate": 1.8305162715829348e-05,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16255304217338562,
|
|
"step": 2530,
|
|
"valid_targets_mean": 3823.4,
|
|
"valid_targets_min": 1832
|
|
},
|
|
{
|
|
"epoch": 4.030206677265501,
|
|
"grad_norm": 0.6730572645265154,
|
|
"learning_rate": 1.8226168364272534e-05,
|
|
"loss": 0.2766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13849571347236633,
|
|
"step": 2535,
|
|
"valid_targets_mean": 3259.9,
|
|
"valid_targets_min": 1310
|
|
},
|
|
{
|
|
"epoch": 4.038155802861685,
|
|
"grad_norm": 0.6100025338597829,
|
|
"learning_rate": 1.8147201894633282e-05,
|
|
"loss": 0.275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16024865210056305,
|
|
"step": 2540,
|
|
"valid_targets_mean": 4355.5,
|
|
"valid_targets_min": 1685
|
|
},
|
|
{
|
|
"epoch": 4.046104928457869,
|
|
"grad_norm": 0.6754332116787131,
|
|
"learning_rate": 1.8068264548143605e-05,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15205305814743042,
|
|
"step": 2545,
|
|
"valid_targets_mean": 3525.8,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 4.054054054054054,
|
|
"grad_norm": 0.5261870530072121,
|
|
"learning_rate": 1.7989357565577746e-05,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09566827863454819,
|
|
"step": 2550,
|
|
"valid_targets_mean": 3798.0,
|
|
"valid_targets_min": 1292
|
|
},
|
|
{
|
|
"epoch": 4.0620031796502385,
|
|
"grad_norm": 0.6671055480681432,
|
|
"learning_rate": 1.7910482187232643e-05,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1101292073726654,
|
|
"step": 2555,
|
|
"valid_targets_mean": 2411.1,
|
|
"valid_targets_min": 985
|
|
},
|
|
{
|
|
"epoch": 4.069952305246423,
|
|
"grad_norm": 0.6039341294203299,
|
|
"learning_rate": 1.7831639652908507e-05,
|
|
"loss": 0.2716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12809130549430847,
|
|
"step": 2560,
|
|
"valid_targets_mean": 3140.5,
|
|
"valid_targets_min": 1956
|
|
},
|
|
{
|
|
"epoch": 4.077901430842608,
|
|
"grad_norm": 0.5931097782099924,
|
|
"learning_rate": 1.775283120188925e-05,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13419832289218903,
|
|
"step": 2565,
|
|
"valid_targets_mean": 4870.6,
|
|
"valid_targets_min": 1309
|
|
},
|
|
{
|
|
"epoch": 4.085850556438792,
|
|
"grad_norm": 0.6221336779640464,
|
|
"learning_rate": 1.7674058072923075e-05,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15571482479572296,
|
|
"step": 2570,
|
|
"valid_targets_mean": 4212.0,
|
|
"valid_targets_min": 1222
|
|
},
|
|
{
|
|
"epoch": 4.093799682034976,
|
|
"grad_norm": 0.667175830348244,
|
|
"learning_rate": 1.7595321504202977e-05,
|
|
"loss": 0.2685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1416112631559372,
|
|
"step": 2575,
|
|
"valid_targets_mean": 2872.4,
|
|
"valid_targets_min": 969
|
|
},
|
|
{
|
|
"epoch": 4.101748807631161,
|
|
"grad_norm": 0.6095607203831143,
|
|
"learning_rate": 1.751662273334725e-05,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13507461547851562,
|
|
"step": 2580,
|
|
"valid_targets_mean": 4221.5,
|
|
"valid_targets_min": 1249
|
|
},
|
|
{
|
|
"epoch": 4.109697933227345,
|
|
"grad_norm": 0.6170915177711385,
|
|
"learning_rate": 1.7437962997380093e-05,
|
|
"loss": 0.2678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09853905439376831,
|
|
"step": 2585,
|
|
"valid_targets_mean": 3120.4,
|
|
"valid_targets_min": 1227
|
|
},
|
|
{
|
|
"epoch": 4.117647058823529,
|
|
"grad_norm": 0.6353675316926362,
|
|
"learning_rate": 1.7359343532712135e-05,
|
|
"loss": 0.2916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17472150921821594,
|
|
"step": 2590,
|
|
"valid_targets_mean": 4038.4,
|
|
"valid_targets_min": 1381
|
|
},
|
|
{
|
|
"epoch": 4.125596184419714,
|
|
"grad_norm": 0.6998032960296284,
|
|
"learning_rate": 1.7280765575120992e-05,
|
|
"loss": 0.2682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16725534200668335,
|
|
"step": 2595,
|
|
"valid_targets_mean": 3618.5,
|
|
"valid_targets_min": 1403
|
|
},
|
|
{
|
|
"epoch": 4.133545310015898,
|
|
"grad_norm": 0.507948415747279,
|
|
"learning_rate": 1.7202230359731835e-05,
|
|
"loss": 0.2976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22382380068302155,
|
|
"step": 2600,
|
|
"valid_targets_mean": 9346.9,
|
|
"valid_targets_min": 1126
|
|
},
|
|
{
|
|
"epoch": 4.141494435612083,
|
|
"grad_norm": 0.5853055155357535,
|
|
"learning_rate": 1.7123739120998033e-05,
|
|
"loss": 0.2792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0993199497461319,
|
|
"step": 2605,
|
|
"valid_targets_mean": 3580.2,
|
|
"valid_targets_min": 953
|
|
},
|
|
{
|
|
"epoch": 4.149443561208267,
|
|
"grad_norm": 0.6020011065165445,
|
|
"learning_rate": 1.7045293092681686e-05,
|
|
"loss": 0.2658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1053726002573967,
|
|
"step": 2610,
|
|
"valid_targets_mean": 3310.8,
|
|
"valid_targets_min": 918
|
|
},
|
|
{
|
|
"epoch": 4.157392686804451,
|
|
"grad_norm": 0.6384910059636179,
|
|
"learning_rate": 1.6966893507834242e-05,
|
|
"loss": 0.2838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1392892301082611,
|
|
"step": 2615,
|
|
"valid_targets_mean": 3959.2,
|
|
"valid_targets_min": 1223
|
|
},
|
|
{
|
|
"epoch": 4.165341812400636,
|
|
"grad_norm": 1.1452697433876478,
|
|
"learning_rate": 1.6888541598777167e-05,
|
|
"loss": 0.275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13290053606033325,
|
|
"step": 2620,
|
|
"valid_targets_mean": 3322.9,
|
|
"valid_targets_min": 1776
|
|
},
|
|
{
|
|
"epoch": 4.17329093799682,
|
|
"grad_norm": 0.9143919460097873,
|
|
"learning_rate": 1.68102385970825e-05,
|
|
"loss": 0.2881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18551841378211975,
|
|
"step": 2625,
|
|
"valid_targets_mean": 2036.8,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 4.1812400635930045,
|
|
"grad_norm": 0.6709409201926725,
|
|
"learning_rate": 1.6731985733553545e-05,
|
|
"loss": 0.2587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14816510677337646,
|
|
"step": 2630,
|
|
"valid_targets_mean": 3950.9,
|
|
"valid_targets_min": 2061
|
|
},
|
|
{
|
|
"epoch": 4.1891891891891895,
|
|
"grad_norm": 0.5858882904602604,
|
|
"learning_rate": 1.6653784238205525e-05,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12594446539878845,
|
|
"step": 2635,
|
|
"valid_targets_mean": 3524.1,
|
|
"valid_targets_min": 1567
|
|
},
|
|
{
|
|
"epoch": 4.197138314785374,
|
|
"grad_norm": 0.790636567721086,
|
|
"learning_rate": 1.6575635340246203e-05,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.130628764629364,
|
|
"step": 2640,
|
|
"valid_targets_mean": 2347.0,
|
|
"valid_targets_min": 902
|
|
},
|
|
{
|
|
"epoch": 4.205087440381558,
|
|
"grad_norm": 0.5526079027728118,
|
|
"learning_rate": 1.649754026805662e-05,
|
|
"loss": 0.2973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0728602409362793,
|
|
"step": 2645,
|
|
"valid_targets_mean": 2556.1,
|
|
"valid_targets_min": 1465
|
|
},
|
|
{
|
|
"epoch": 4.213036565977743,
|
|
"grad_norm": 0.6974987351080191,
|
|
"learning_rate": 1.6419500249171737e-05,
|
|
"loss": 0.2719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10859890282154083,
|
|
"step": 2650,
|
|
"valid_targets_mean": 2545.4,
|
|
"valid_targets_min": 1342
|
|
},
|
|
{
|
|
"epoch": 4.220985691573927,
|
|
"grad_norm": 0.6181361663014899,
|
|
"learning_rate": 1.634151651026118e-05,
|
|
"loss": 0.2954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15426141023635864,
|
|
"step": 2655,
|
|
"valid_targets_mean": 5137.1,
|
|
"valid_targets_min": 1653
|
|
},
|
|
{
|
|
"epoch": 4.228934817170111,
|
|
"grad_norm": 0.6965057456952938,
|
|
"learning_rate": 1.626359027710993e-05,
|
|
"loss": 0.2928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16950593888759613,
|
|
"step": 2660,
|
|
"valid_targets_mean": 4172.8,
|
|
"valid_targets_min": 1661
|
|
},
|
|
{
|
|
"epoch": 4.236883942766296,
|
|
"grad_norm": 0.776797629359818,
|
|
"learning_rate": 1.6185722774599064e-05,
|
|
"loss": 0.2898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17750418186187744,
|
|
"step": 2665,
|
|
"valid_targets_mean": 2694.2,
|
|
"valid_targets_min": 1125
|
|
},
|
|
{
|
|
"epoch": 4.24483306836248,
|
|
"grad_norm": 0.8981270734333783,
|
|
"learning_rate": 1.6107915226686504e-05,
|
|
"loss": 0.2735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09386648237705231,
|
|
"step": 2670,
|
|
"valid_targets_mean": 1984.2,
|
|
"valid_targets_min": 1428
|
|
},
|
|
{
|
|
"epoch": 4.252782193958664,
|
|
"grad_norm": 0.5337841741649596,
|
|
"learning_rate": 1.603016885638779e-05,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1550283282995224,
|
|
"step": 2675,
|
|
"valid_targets_mean": 5832.2,
|
|
"valid_targets_min": 1105
|
|
},
|
|
{
|
|
"epoch": 4.260731319554849,
|
|
"grad_norm": 0.5935884985673398,
|
|
"learning_rate": 1.5952484885756827e-05,
|
|
"loss": 0.3041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10803897678852081,
|
|
"step": 2680,
|
|
"valid_targets_mean": 3962.9,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 4.268680445151033,
|
|
"grad_norm": 0.667116703164492,
|
|
"learning_rate": 1.587486453586669e-05,
|
|
"loss": 0.2991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15034128725528717,
|
|
"step": 2685,
|
|
"valid_targets_mean": 3566.2,
|
|
"valid_targets_min": 1513
|
|
},
|
|
{
|
|
"epoch": 4.276629570747218,
|
|
"grad_norm": 0.5704801058227873,
|
|
"learning_rate": 1.579730902679045e-05,
|
|
"loss": 0.2718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1056005209684372,
|
|
"step": 2690,
|
|
"valid_targets_mean": 3583.8,
|
|
"valid_targets_min": 1284
|
|
},
|
|
{
|
|
"epoch": 4.284578696343402,
|
|
"grad_norm": 0.5765377556877987,
|
|
"learning_rate": 1.5719819577581982e-05,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1161583811044693,
|
|
"step": 2695,
|
|
"valid_targets_mean": 4232.9,
|
|
"valid_targets_min": 1420
|
|
},
|
|
{
|
|
"epoch": 4.292527821939586,
|
|
"grad_norm": 0.7174036407831434,
|
|
"learning_rate": 1.5642397406256768e-05,
|
|
"loss": 0.3012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14371202886104584,
|
|
"step": 2700,
|
|
"valid_targets_mean": 3475.5,
|
|
"valid_targets_min": 1511
|
|
},
|
|
{
|
|
"epoch": 4.300476947535771,
|
|
"grad_norm": 0.5832351090990329,
|
|
"learning_rate": 1.556504372977283e-05,
|
|
"loss": 0.2943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17427685856819153,
|
|
"step": 2705,
|
|
"valid_targets_mean": 5465.4,
|
|
"valid_targets_min": 1634
|
|
},
|
|
{
|
|
"epoch": 4.3084260731319555,
|
|
"grad_norm": 0.6243689668006394,
|
|
"learning_rate": 1.548775976401152e-05,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1188681349158287,
|
|
"step": 2710,
|
|
"valid_targets_mean": 3298.1,
|
|
"valid_targets_min": 1710
|
|
},
|
|
{
|
|
"epoch": 4.31637519872814,
|
|
"grad_norm": 0.4911243189351732,
|
|
"learning_rate": 1.5410546723758452e-05,
|
|
"loss": 0.2667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12772631645202637,
|
|
"step": 2715,
|
|
"valid_targets_mean": 5534.9,
|
|
"valid_targets_min": 1068
|
|
},
|
|
{
|
|
"epoch": 4.324324324324325,
|
|
"grad_norm": 0.8895208579389339,
|
|
"learning_rate": 1.5333405822684428e-05,
|
|
"loss": 0.2945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13373422622680664,
|
|
"step": 2720,
|
|
"valid_targets_mean": 2084.1,
|
|
"valid_targets_min": 1139
|
|
},
|
|
{
|
|
"epoch": 4.332273449920509,
|
|
"grad_norm": 0.7143427192404614,
|
|
"learning_rate": 1.5256338273326293e-05,
|
|
"loss": 0.2901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11730432510375977,
|
|
"step": 2725,
|
|
"valid_targets_mean": 2734.8,
|
|
"valid_targets_min": 1033
|
|
},
|
|
{
|
|
"epoch": 4.340222575516693,
|
|
"grad_norm": 0.5725846150183455,
|
|
"learning_rate": 1.5179345287067935e-05,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17064280807971954,
|
|
"step": 2730,
|
|
"valid_targets_mean": 4865.1,
|
|
"valid_targets_min": 1919
|
|
},
|
|
{
|
|
"epoch": 4.348171701112878,
|
|
"grad_norm": 0.6329182924306412,
|
|
"learning_rate": 1.5102428074121222e-05,
|
|
"loss": 0.2986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14636817574501038,
|
|
"step": 2735,
|
|
"valid_targets_mean": 5455.2,
|
|
"valid_targets_min": 1220
|
|
},
|
|
{
|
|
"epoch": 4.356120826709062,
|
|
"grad_norm": 0.6712836157446482,
|
|
"learning_rate": 1.5025587843506986e-05,
|
|
"loss": 0.2698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11480337381362915,
|
|
"step": 2740,
|
|
"valid_targets_mean": 3891.2,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 4.364069952305247,
|
|
"grad_norm": 0.6187580785499945,
|
|
"learning_rate": 1.4948825803035996e-05,
|
|
"loss": 0.3039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16084839403629303,
|
|
"step": 2745,
|
|
"valid_targets_mean": 4768.4,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 4.372019077901431,
|
|
"grad_norm": 0.6846968655985675,
|
|
"learning_rate": 1.4872143159290016e-05,
|
|
"loss": 0.289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12469711154699326,
|
|
"step": 2750,
|
|
"valid_targets_mean": 2932.5,
|
|
"valid_targets_min": 1564
|
|
},
|
|
{
|
|
"epoch": 4.379968203497615,
|
|
"grad_norm": 0.5808613385408403,
|
|
"learning_rate": 1.4795541117602808e-05,
|
|
"loss": 0.2779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13453423976898193,
|
|
"step": 2755,
|
|
"valid_targets_mean": 4490.8,
|
|
"valid_targets_min": 1339
|
|
},
|
|
{
|
|
"epoch": 4.3879173290938,
|
|
"grad_norm": 0.543922959153577,
|
|
"learning_rate": 1.4719020882041175e-05,
|
|
"loss": 0.2667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11703817546367645,
|
|
"step": 2760,
|
|
"valid_targets_mean": 5518.9,
|
|
"valid_targets_min": 1747
|
|
},
|
|
{
|
|
"epoch": 4.395866454689984,
|
|
"grad_norm": 0.5601022715146916,
|
|
"learning_rate": 1.4642583655386084e-05,
|
|
"loss": 0.2986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.059394720941782,
|
|
"step": 2765,
|
|
"valid_targets_mean": 2167.6,
|
|
"valid_targets_min": 1402
|
|
},
|
|
{
|
|
"epoch": 4.403815580286168,
|
|
"grad_norm": 0.7764578207121494,
|
|
"learning_rate": 1.4566230639113696e-05,
|
|
"loss": 0.2794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1750270128250122,
|
|
"step": 2770,
|
|
"valid_targets_mean": 3337.9,
|
|
"valid_targets_min": 1488
|
|
},
|
|
{
|
|
"epoch": 4.411764705882353,
|
|
"grad_norm": 0.6291021077047266,
|
|
"learning_rate": 1.448996303337654e-05,
|
|
"loss": 0.2834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10824283957481384,
|
|
"step": 2775,
|
|
"valid_targets_mean": 3307.2,
|
|
"valid_targets_min": 1444
|
|
},
|
|
{
|
|
"epoch": 4.419713831478537,
|
|
"grad_norm": 0.6032122791239393,
|
|
"learning_rate": 1.4413782036984616e-05,
|
|
"loss": 0.2676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1284392774105072,
|
|
"step": 2780,
|
|
"valid_targets_mean": 4128.0,
|
|
"valid_targets_min": 1034
|
|
},
|
|
{
|
|
"epoch": 4.4276629570747215,
|
|
"grad_norm": 0.6641005639508057,
|
|
"learning_rate": 1.4337688847386542e-05,
|
|
"loss": 0.2834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12396696209907532,
|
|
"step": 2785,
|
|
"valid_targets_mean": 3295.0,
|
|
"valid_targets_min": 1714
|
|
},
|
|
{
|
|
"epoch": 4.4356120826709065,
|
|
"grad_norm": 0.6481897382450877,
|
|
"learning_rate": 1.426168466065077e-05,
|
|
"loss": 0.2924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20064634084701538,
|
|
"step": 2790,
|
|
"valid_targets_mean": 4399.5,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 4.443561208267091,
|
|
"grad_norm": 0.687506286090942,
|
|
"learning_rate": 1.4185770671446743e-05,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10745537281036377,
|
|
"step": 2795,
|
|
"valid_targets_mean": 2257.5,
|
|
"valid_targets_min": 846
|
|
},
|
|
{
|
|
"epoch": 4.451510333863275,
|
|
"grad_norm": 0.6097303559954192,
|
|
"learning_rate": 1.4109948073026153e-05,
|
|
"loss": 0.2835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13296522200107574,
|
|
"step": 2800,
|
|
"valid_targets_mean": 3979.4,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 4.45945945945946,
|
|
"grad_norm": 0.6696206259157537,
|
|
"learning_rate": 1.4034218057204165e-05,
|
|
"loss": 0.2938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14487697184085846,
|
|
"step": 2805,
|
|
"valid_targets_mean": 3560.8,
|
|
"valid_targets_min": 1405
|
|
},
|
|
{
|
|
"epoch": 4.467408585055644,
|
|
"grad_norm": 0.6062209509028393,
|
|
"learning_rate": 1.3958581814340679e-05,
|
|
"loss": 0.2792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11983238160610199,
|
|
"step": 2810,
|
|
"valid_targets_mean": 4156.5,
|
|
"valid_targets_min": 1622
|
|
},
|
|
{
|
|
"epoch": 4.475357710651828,
|
|
"grad_norm": 0.49447740983226324,
|
|
"learning_rate": 1.3883040533321637e-05,
|
|
"loss": 0.297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12950390577316284,
|
|
"step": 2815,
|
|
"valid_targets_mean": 5266.5,
|
|
"valid_targets_min": 1534
|
|
},
|
|
{
|
|
"epoch": 4.483306836248013,
|
|
"grad_norm": 0.6829672087857537,
|
|
"learning_rate": 1.3807595401540322e-05,
|
|
"loss": 0.3046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10179359465837479,
|
|
"step": 2820,
|
|
"valid_targets_mean": 2277.5,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 4.491255961844197,
|
|
"grad_norm": 0.6467168019295693,
|
|
"learning_rate": 1.3732247604878697e-05,
|
|
"loss": 0.2836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13071471452713013,
|
|
"step": 2825,
|
|
"valid_targets_mean": 2987.9,
|
|
"valid_targets_min": 1104
|
|
},
|
|
{
|
|
"epoch": 4.499205087440382,
|
|
"grad_norm": 0.5955764848926297,
|
|
"learning_rate": 1.3656998327688764e-05,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16149049997329712,
|
|
"step": 2830,
|
|
"valid_targets_mean": 5150.5,
|
|
"valid_targets_min": 1545
|
|
},
|
|
{
|
|
"epoch": 4.507154213036566,
|
|
"grad_norm": 0.6206468699369816,
|
|
"learning_rate": 1.3581848752773961e-05,
|
|
"loss": 0.3205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.112500861287117,
|
|
"step": 2835,
|
|
"valid_targets_mean": 3059.5,
|
|
"valid_targets_min": 1608
|
|
},
|
|
{
|
|
"epoch": 4.51510333863275,
|
|
"grad_norm": 0.5482687222881866,
|
|
"learning_rate": 1.3506800061370555e-05,
|
|
"loss": 0.2796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1251266449689865,
|
|
"step": 2840,
|
|
"valid_targets_mean": 3668.2,
|
|
"valid_targets_min": 1537
|
|
},
|
|
{
|
|
"epoch": 4.523052464228935,
|
|
"grad_norm": 0.6816874897576661,
|
|
"learning_rate": 1.3431853433129058e-05,
|
|
"loss": 0.2743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11492455750703812,
|
|
"step": 2845,
|
|
"valid_targets_mean": 3143.2,
|
|
"valid_targets_min": 2075
|
|
},
|
|
{
|
|
"epoch": 4.531001589825119,
|
|
"grad_norm": 1.1358400816406755,
|
|
"learning_rate": 1.3357010046095741e-05,
|
|
"loss": 0.2959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28642940521240234,
|
|
"step": 2850,
|
|
"valid_targets_mean": 4650.4,
|
|
"valid_targets_min": 408
|
|
},
|
|
{
|
|
"epoch": 4.538950715421303,
|
|
"grad_norm": 0.6000838925580328,
|
|
"learning_rate": 1.3282271076694052e-05,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22131921350955963,
|
|
"step": 2855,
|
|
"valid_targets_mean": 5799.1,
|
|
"valid_targets_min": 1597
|
|
},
|
|
{
|
|
"epoch": 4.546899841017488,
|
|
"grad_norm": 0.7314678470467617,
|
|
"learning_rate": 1.3207637699706162e-05,
|
|
"loss": 0.3167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17173025012016296,
|
|
"step": 2860,
|
|
"valid_targets_mean": 3831.6,
|
|
"valid_targets_min": 1561
|
|
},
|
|
{
|
|
"epoch": 4.5548489666136724,
|
|
"grad_norm": 0.6249529266993146,
|
|
"learning_rate": 1.3133111088254507e-05,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12098148465156555,
|
|
"step": 2865,
|
|
"valid_targets_mean": 3392.9,
|
|
"valid_targets_min": 1855
|
|
},
|
|
{
|
|
"epoch": 4.5627980922098565,
|
|
"grad_norm": 0.6858850262928099,
|
|
"learning_rate": 1.3058692413783307e-05,
|
|
"loss": 0.2768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1316334307193756,
|
|
"step": 2870,
|
|
"valid_targets_mean": 2866.0,
|
|
"valid_targets_min": 1154
|
|
},
|
|
{
|
|
"epoch": 4.5707472178060415,
|
|
"grad_norm": 0.6159596236716103,
|
|
"learning_rate": 1.2984382846040187e-05,
|
|
"loss": 0.3058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13360215723514557,
|
|
"step": 2875,
|
|
"valid_targets_mean": 4015.1,
|
|
"valid_targets_min": 1061
|
|
},
|
|
{
|
|
"epoch": 4.578696343402226,
|
|
"grad_norm": 0.5607382947721867,
|
|
"learning_rate": 1.2910183553057788e-05,
|
|
"loss": 0.2873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1157282143831253,
|
|
"step": 2880,
|
|
"valid_targets_mean": 3252.2,
|
|
"valid_targets_min": 1578
|
|
},
|
|
{
|
|
"epoch": 4.586645468998411,
|
|
"grad_norm": 0.645645755990382,
|
|
"learning_rate": 1.2836095701135398e-05,
|
|
"loss": 0.2931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13126234710216522,
|
|
"step": 2885,
|
|
"valid_targets_mean": 3559.2,
|
|
"valid_targets_min": 978
|
|
},
|
|
{
|
|
"epoch": 4.594594594594595,
|
|
"grad_norm": 0.6947760468611776,
|
|
"learning_rate": 1.2762120454820628e-05,
|
|
"loss": 0.2815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13670824468135834,
|
|
"step": 2890,
|
|
"valid_targets_mean": 2664.8,
|
|
"valid_targets_min": 1194
|
|
},
|
|
{
|
|
"epoch": 4.602543720190779,
|
|
"grad_norm": 0.7117406870857815,
|
|
"learning_rate": 1.268825897689108e-05,
|
|
"loss": 0.2655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16626793146133423,
|
|
"step": 2895,
|
|
"valid_targets_mean": 3092.8,
|
|
"valid_targets_min": 1541
|
|
},
|
|
{
|
|
"epoch": 4.610492845786963,
|
|
"grad_norm": 0.6487111787430797,
|
|
"learning_rate": 1.2614512428336105e-05,
|
|
"loss": 0.2693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11249272525310516,
|
|
"step": 2900,
|
|
"valid_targets_mean": 2991.6,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 4.618441971383148,
|
|
"grad_norm": 0.5690772792228552,
|
|
"learning_rate": 1.254088196833855e-05,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12510134279727936,
|
|
"step": 2905,
|
|
"valid_targets_mean": 3653.8,
|
|
"valid_targets_min": 1390
|
|
},
|
|
{
|
|
"epoch": 4.626391096979332,
|
|
"grad_norm": 0.5907761625906671,
|
|
"learning_rate": 1.2467368754256513e-05,
|
|
"loss": 0.2726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12357486039400101,
|
|
"step": 2910,
|
|
"valid_targets_mean": 4080.2,
|
|
"valid_targets_min": 966
|
|
},
|
|
{
|
|
"epoch": 4.634340222575517,
|
|
"grad_norm": 0.6922586816596724,
|
|
"learning_rate": 1.2393973941605161e-05,
|
|
"loss": 0.2812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12054169178009033,
|
|
"step": 2915,
|
|
"valid_targets_mean": 2867.4,
|
|
"valid_targets_min": 1116
|
|
},
|
|
{
|
|
"epoch": 4.642289348171701,
|
|
"grad_norm": 0.6909115007530438,
|
|
"learning_rate": 1.2320698684038599e-05,
|
|
"loss": 0.2962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20486235618591309,
|
|
"step": 2920,
|
|
"valid_targets_mean": 3631.0,
|
|
"valid_targets_min": 892
|
|
},
|
|
{
|
|
"epoch": 4.650238473767885,
|
|
"grad_norm": 0.6138202572609754,
|
|
"learning_rate": 1.2247544133331681e-05,
|
|
"loss": 0.2754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1196434497833252,
|
|
"step": 2925,
|
|
"valid_targets_mean": 3019.1,
|
|
"valid_targets_min": 1430
|
|
},
|
|
{
|
|
"epoch": 4.65818759936407,
|
|
"grad_norm": 0.6646898124171521,
|
|
"learning_rate": 1.2174511439361943e-05,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14891402423381805,
|
|
"step": 2930,
|
|
"valid_targets_mean": 3528.2,
|
|
"valid_targets_min": 1998
|
|
},
|
|
{
|
|
"epoch": 4.666136724960254,
|
|
"grad_norm": 0.6186631375894875,
|
|
"learning_rate": 1.2101601750091528e-05,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13548314571380615,
|
|
"step": 2935,
|
|
"valid_targets_mean": 3613.1,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 4.674085850556438,
|
|
"grad_norm": 0.6076553189291499,
|
|
"learning_rate": 1.2028816211549117e-05,
|
|
"loss": 0.2861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1686253398656845,
|
|
"step": 2940,
|
|
"valid_targets_mean": 5016.6,
|
|
"valid_targets_min": 1606
|
|
},
|
|
{
|
|
"epoch": 4.682034976152623,
|
|
"grad_norm": 0.5921112103338615,
|
|
"learning_rate": 1.195615596781194e-05,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14643877744674683,
|
|
"step": 2945,
|
|
"valid_targets_mean": 5471.0,
|
|
"valid_targets_min": 1334
|
|
},
|
|
{
|
|
"epoch": 4.6899841017488075,
|
|
"grad_norm": 0.6263896515104662,
|
|
"learning_rate": 1.18836221609878e-05,
|
|
"loss": 0.3177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28924280405044556,
|
|
"step": 2950,
|
|
"valid_targets_mean": 6145.5,
|
|
"valid_targets_min": 2190
|
|
},
|
|
{
|
|
"epoch": 4.697933227344992,
|
|
"grad_norm": 0.520183310452159,
|
|
"learning_rate": 1.1811215931197084e-05,
|
|
"loss": 0.2842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13344112038612366,
|
|
"step": 2955,
|
|
"valid_targets_mean": 5916.5,
|
|
"valid_targets_min": 1480
|
|
},
|
|
{
|
|
"epoch": 4.705882352941177,
|
|
"grad_norm": 0.6569811257513435,
|
|
"learning_rate": 1.1738938416554857e-05,
|
|
"loss": 0.2656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16415748000144958,
|
|
"step": 2960,
|
|
"valid_targets_mean": 3257.5,
|
|
"valid_targets_min": 1575
|
|
},
|
|
{
|
|
"epoch": 4.713831478537361,
|
|
"grad_norm": 0.6318040225504936,
|
|
"learning_rate": 1.1666790753153009e-05,
|
|
"loss": 0.2676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11908093094825745,
|
|
"step": 2965,
|
|
"valid_targets_mean": 3177.1,
|
|
"valid_targets_min": 1405
|
|
},
|
|
{
|
|
"epoch": 4.721780604133546,
|
|
"grad_norm": 0.591710668338057,
|
|
"learning_rate": 1.1594774075042345e-05,
|
|
"loss": 0.285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19112607836723328,
|
|
"step": 2970,
|
|
"valid_targets_mean": 6152.1,
|
|
"valid_targets_min": 1495
|
|
},
|
|
{
|
|
"epoch": 4.72972972972973,
|
|
"grad_norm": 0.7675269853864845,
|
|
"learning_rate": 1.152288951421478e-05,
|
|
"loss": 0.3008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16331367194652557,
|
|
"step": 2975,
|
|
"valid_targets_mean": 2599.8,
|
|
"valid_targets_min": 1186
|
|
},
|
|
{
|
|
"epoch": 4.737678855325914,
|
|
"grad_norm": 0.6746614003252736,
|
|
"learning_rate": 1.1451138200585567e-05,
|
|
"loss": 0.2853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1770399808883667,
|
|
"step": 2980,
|
|
"valid_targets_mean": 3989.8,
|
|
"valid_targets_min": 1465
|
|
},
|
|
{
|
|
"epoch": 4.745627980922099,
|
|
"grad_norm": 0.8208860428274075,
|
|
"learning_rate": 1.13795212619755e-05,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14217200875282288,
|
|
"step": 2985,
|
|
"valid_targets_mean": 2114.4,
|
|
"valid_targets_min": 1243
|
|
},
|
|
{
|
|
"epoch": 4.753577106518283,
|
|
"grad_norm": 0.5299673929646598,
|
|
"learning_rate": 1.1308039824093197e-05,
|
|
"loss": 0.2758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1993390917778015,
|
|
"step": 2990,
|
|
"valid_targets_mean": 5475.2,
|
|
"valid_targets_min": 1138
|
|
},
|
|
{
|
|
"epoch": 4.761526232114467,
|
|
"grad_norm": 0.629244885787315,
|
|
"learning_rate": 1.1236695010517434e-05,
|
|
"loss": 0.3003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1893625110387802,
|
|
"step": 2995,
|
|
"valid_targets_mean": 4824.4,
|
|
"valid_targets_min": 1176
|
|
},
|
|
{
|
|
"epoch": 4.769475357710652,
|
|
"grad_norm": 0.5928695819763566,
|
|
"learning_rate": 1.116548794267945e-05,
|
|
"loss": 0.3123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2211119532585144,
|
|
"step": 3000,
|
|
"valid_targets_mean": 5032.4,
|
|
"valid_targets_min": 1433
|
|
},
|
|
{
|
|
"epoch": 4.777424483306836,
|
|
"grad_norm": 0.6755181362529614,
|
|
"learning_rate": 1.109441973984534e-05,
|
|
"loss": 0.2797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10464102774858475,
|
|
"step": 3005,
|
|
"valid_targets_mean": 2779.2,
|
|
"valid_targets_min": 963
|
|
},
|
|
{
|
|
"epoch": 4.78537360890302,
|
|
"grad_norm": 0.6656201472026345,
|
|
"learning_rate": 1.1023491519098439e-05,
|
|
"loss": 0.2816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14056362211704254,
|
|
"step": 3010,
|
|
"valid_targets_mean": 4387.8,
|
|
"valid_targets_min": 1722
|
|
},
|
|
{
|
|
"epoch": 4.793322734499205,
|
|
"grad_norm": 0.6907884419504595,
|
|
"learning_rate": 1.0952704395321781e-05,
|
|
"loss": 0.2945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14489704370498657,
|
|
"step": 3015,
|
|
"valid_targets_mean": 3666.8,
|
|
"valid_targets_min": 1291
|
|
},
|
|
{
|
|
"epoch": 4.801271860095389,
|
|
"grad_norm": 0.6097620390735313,
|
|
"learning_rate": 1.0882059481180588e-05,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10279364883899689,
|
|
"step": 3020,
|
|
"valid_targets_mean": 2736.2,
|
|
"valid_targets_min": 1764
|
|
},
|
|
{
|
|
"epoch": 4.809220985691574,
|
|
"grad_norm": 0.566710831435134,
|
|
"learning_rate": 1.0811557887104747e-05,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1383320391178131,
|
|
"step": 3025,
|
|
"valid_targets_mean": 3449.0,
|
|
"valid_targets_min": 918
|
|
},
|
|
{
|
|
"epoch": 4.8171701112877585,
|
|
"grad_norm": 0.5804723945654928,
|
|
"learning_rate": 1.074120072127137e-05,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17098158597946167,
|
|
"step": 3030,
|
|
"valid_targets_mean": 4845.0,
|
|
"valid_targets_min": 1322
|
|
},
|
|
{
|
|
"epoch": 4.825119236883943,
|
|
"grad_norm": 0.6935423425823624,
|
|
"learning_rate": 1.0670989089587395e-05,
|
|
"loss": 0.2848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18946236371994019,
|
|
"step": 3035,
|
|
"valid_targets_mean": 4160.8,
|
|
"valid_targets_min": 1923
|
|
},
|
|
{
|
|
"epoch": 4.833068362480127,
|
|
"grad_norm": 0.5216079314768822,
|
|
"learning_rate": 1.0600924095672184e-05,
|
|
"loss": 0.2803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10693530738353729,
|
|
"step": 3040,
|
|
"valid_targets_mean": 4593.0,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 4.841017488076312,
|
|
"grad_norm": 0.6090986391801188,
|
|
"learning_rate": 1.0531006840840162e-05,
|
|
"loss": 0.3129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12369546294212341,
|
|
"step": 3045,
|
|
"valid_targets_mean": 4007.5,
|
|
"valid_targets_min": 1248
|
|
},
|
|
{
|
|
"epoch": 4.848966613672496,
|
|
"grad_norm": 0.6000229394241546,
|
|
"learning_rate": 1.046123842408354e-05,
|
|
"loss": 0.2618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11460407078266144,
|
|
"step": 3050,
|
|
"valid_targets_mean": 3182.1,
|
|
"valid_targets_min": 1083
|
|
},
|
|
{
|
|
"epoch": 4.856915739268681,
|
|
"grad_norm": 0.6735580147386395,
|
|
"learning_rate": 1.0391619942055007e-05,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11546464264392853,
|
|
"step": 3055,
|
|
"valid_targets_mean": 3036.5,
|
|
"valid_targets_min": 1266
|
|
},
|
|
{
|
|
"epoch": 4.864864864864865,
|
|
"grad_norm": 0.6363724422676876,
|
|
"learning_rate": 1.0322152489050508e-05,
|
|
"loss": 0.2871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15131962299346924,
|
|
"step": 3060,
|
|
"valid_targets_mean": 4279.5,
|
|
"valid_targets_min": 1573
|
|
},
|
|
{
|
|
"epoch": 4.872813990461049,
|
|
"grad_norm": 0.542343887252272,
|
|
"learning_rate": 1.0252837156992065e-05,
|
|
"loss": 0.2933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10399461537599564,
|
|
"step": 3065,
|
|
"valid_targets_mean": 3536.5,
|
|
"valid_targets_min": 1390
|
|
},
|
|
{
|
|
"epoch": 4.880763116057234,
|
|
"grad_norm": 0.6493761139441392,
|
|
"learning_rate": 1.018367503541057e-05,
|
|
"loss": 0.2886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1593492180109024,
|
|
"step": 3070,
|
|
"valid_targets_mean": 4043.0,
|
|
"valid_targets_min": 1594
|
|
},
|
|
{
|
|
"epoch": 4.888712241653418,
|
|
"grad_norm": 0.6112792871078098,
|
|
"learning_rate": 1.0114667211428675e-05,
|
|
"loss": 0.2678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12747317552566528,
|
|
"step": 3075,
|
|
"valid_targets_mean": 4264.0,
|
|
"valid_targets_min": 2000
|
|
},
|
|
{
|
|
"epoch": 4.896661367249602,
|
|
"grad_norm": 0.6501764493698524,
|
|
"learning_rate": 1.0045814769743731e-05,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12961798906326294,
|
|
"step": 3080,
|
|
"valid_targets_mean": 2677.2,
|
|
"valid_targets_min": 1721
|
|
},
|
|
{
|
|
"epoch": 4.904610492845787,
|
|
"grad_norm": 0.7316885008892964,
|
|
"learning_rate": 9.977118792610719e-06,
|
|
"loss": 0.2863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11821393668651581,
|
|
"step": 3085,
|
|
"valid_targets_mean": 2774.6,
|
|
"valid_targets_min": 1153
|
|
},
|
|
{
|
|
"epoch": 4.912559618441971,
|
|
"grad_norm": 0.6134505461662394,
|
|
"learning_rate": 9.908580359825204e-06,
|
|
"loss": 0.2812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11769888550043106,
|
|
"step": 3090,
|
|
"valid_targets_mean": 3919.1,
|
|
"valid_targets_min": 1277
|
|
},
|
|
{
|
|
"epoch": 4.920508744038155,
|
|
"grad_norm": 0.6060879539252468,
|
|
"learning_rate": 9.840200548706435e-06,
|
|
"loss": 0.2842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13488918542861938,
|
|
"step": 3095,
|
|
"valid_targets_mean": 4077.4,
|
|
"valid_targets_min": 2616
|
|
},
|
|
{
|
|
"epoch": 4.92845786963434,
|
|
"grad_norm": 0.542176975440222,
|
|
"learning_rate": 9.771980434080348e-06,
|
|
"loss": 0.282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12605437636375427,
|
|
"step": 3100,
|
|
"valid_targets_mean": 4327.8,
|
|
"valid_targets_min": 1264
|
|
},
|
|
{
|
|
"epoch": 4.9364069952305245,
|
|
"grad_norm": 0.6520291519275241,
|
|
"learning_rate": 9.70392108826269e-06,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1679481565952301,
|
|
"step": 3105,
|
|
"valid_targets_mean": 4077.5,
|
|
"valid_targets_min": 1242
|
|
},
|
|
{
|
|
"epoch": 4.9443561208267095,
|
|
"grad_norm": 0.6813393745404305,
|
|
"learning_rate": 9.636023581042191e-06,
|
|
"loss": 0.2754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12544550001621246,
|
|
"step": 3110,
|
|
"valid_targets_mean": 2854.8,
|
|
"valid_targets_min": 903
|
|
},
|
|
{
|
|
"epoch": 4.952305246422894,
|
|
"grad_norm": 0.5976091183986858,
|
|
"learning_rate": 9.5682889796637e-06,
|
|
"loss": 0.2874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11039729416370392,
|
|
"step": 3115,
|
|
"valid_targets_mean": 2979.6,
|
|
"valid_targets_min": 1125
|
|
},
|
|
{
|
|
"epoch": 4.960254372019078,
|
|
"grad_norm": 0.6839755770306887,
|
|
"learning_rate": 9.500718348811457e-06,
|
|
"loss": 0.2919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08625346422195435,
|
|
"step": 3120,
|
|
"valid_targets_mean": 2428.1,
|
|
"valid_targets_min": 1254
|
|
},
|
|
{
|
|
"epoch": 4.968203497615263,
|
|
"grad_norm": 0.725638653493915,
|
|
"learning_rate": 9.433312750592337e-06,
|
|
"loss": 0.2724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10997235029935837,
|
|
"step": 3125,
|
|
"valid_targets_mean": 2006.0,
|
|
"valid_targets_min": 1079
|
|
},
|
|
{
|
|
"epoch": 4.976152623211447,
|
|
"grad_norm": 0.7278728270868822,
|
|
"learning_rate": 9.366073244519124e-06,
|
|
"loss": 0.3025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10344748198986053,
|
|
"step": 3130,
|
|
"valid_targets_mean": 2294.8,
|
|
"valid_targets_min": 1224
|
|
},
|
|
{
|
|
"epoch": 4.984101748807631,
|
|
"grad_norm": 0.6586432404260681,
|
|
"learning_rate": 9.299000887493934e-06,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10997908562421799,
|
|
"step": 3135,
|
|
"valid_targets_mean": 3221.1,
|
|
"valid_targets_min": 1793
|
|
},
|
|
{
|
|
"epoch": 4.992050874403816,
|
|
"grad_norm": 0.6825832212524328,
|
|
"learning_rate": 9.232096733791518e-06,
|
|
"loss": 0.283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16510997712612152,
|
|
"step": 3140,
|
|
"valid_targets_mean": 3894.4,
|
|
"valid_targets_min": 1661
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.6333647618294471,
|
|
"learning_rate": 9.165361835042734e-06,
|
|
"loss": 0.2794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1531069278717041,
|
|
"step": 3145,
|
|
"valid_targets_mean": 4285.0,
|
|
"valid_targets_min": 1559
|
|
},
|
|
{
|
|
"epoch": 5.007949125596184,
|
|
"grad_norm": 0.5548700127401147,
|
|
"learning_rate": 9.098797240218036e-06,
|
|
"loss": 0.2812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13602089881896973,
|
|
"step": 3150,
|
|
"valid_targets_mean": 4779.1,
|
|
"valid_targets_min": 1331
|
|
},
|
|
{
|
|
"epoch": 5.015898251192369,
|
|
"grad_norm": 0.7046026663850642,
|
|
"learning_rate": 9.032403995610937e-06,
|
|
"loss": 0.2807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16171672940254211,
|
|
"step": 3155,
|
|
"valid_targets_mean": 3424.8,
|
|
"valid_targets_min": 966
|
|
},
|
|
{
|
|
"epoch": 5.023847376788553,
|
|
"grad_norm": 0.5135174634392983,
|
|
"learning_rate": 8.966183144821583e-06,
|
|
"loss": 0.2605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13624900579452515,
|
|
"step": 3160,
|
|
"valid_targets_mean": 5872.9,
|
|
"valid_targets_min": 1722
|
|
},
|
|
{
|
|
"epoch": 5.031796502384737,
|
|
"grad_norm": 0.5488314594282752,
|
|
"learning_rate": 8.900135728740373e-06,
|
|
"loss": 0.2678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10430087149143219,
|
|
"step": 3165,
|
|
"valid_targets_mean": 4343.9,
|
|
"valid_targets_min": 1634
|
|
},
|
|
{
|
|
"epoch": 5.039745627980922,
|
|
"grad_norm": 0.5465924383670139,
|
|
"learning_rate": 8.83426278553158e-06,
|
|
"loss": 0.2661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12529496848583221,
|
|
"step": 3170,
|
|
"valid_targets_mean": 5406.0,
|
|
"valid_targets_min": 2144
|
|
},
|
|
{
|
|
"epoch": 5.047694753577106,
|
|
"grad_norm": 0.5854199897952757,
|
|
"learning_rate": 8.768565350616998e-06,
|
|
"loss": 0.2837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08451852202415466,
|
|
"step": 3175,
|
|
"valid_targets_mean": 3967.6,
|
|
"valid_targets_min": 1742
|
|
},
|
|
{
|
|
"epoch": 5.0556438791732905,
|
|
"grad_norm": 0.5392584517132655,
|
|
"learning_rate": 8.703044456659741e-06,
|
|
"loss": 0.267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1213638186454773,
|
|
"step": 3180,
|
|
"valid_targets_mean": 4397.5,
|
|
"valid_targets_min": 1540
|
|
},
|
|
{
|
|
"epoch": 5.0635930047694755,
|
|
"grad_norm": 0.6289785398459488,
|
|
"learning_rate": 8.63770113354794e-06,
|
|
"loss": 0.3012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16546054184436798,
|
|
"step": 3185,
|
|
"valid_targets_mean": 4564.2,
|
|
"valid_targets_min": 1425
|
|
},
|
|
{
|
|
"epoch": 5.07154213036566,
|
|
"grad_norm": 0.7098473524737194,
|
|
"learning_rate": 8.572536408378587e-06,
|
|
"loss": 0.3028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16755597293376923,
|
|
"step": 3190,
|
|
"valid_targets_mean": 3462.5,
|
|
"valid_targets_min": 1468
|
|
},
|
|
{
|
|
"epoch": 5.079491255961845,
|
|
"grad_norm": 0.5760785975607284,
|
|
"learning_rate": 8.507551305441408e-06,
|
|
"loss": 0.2864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10758992284536362,
|
|
"step": 3195,
|
|
"valid_targets_mean": 4294.2,
|
|
"valid_targets_min": 1123
|
|
},
|
|
{
|
|
"epoch": 5.087440381558029,
|
|
"grad_norm": 0.5913448711583957,
|
|
"learning_rate": 8.442746846202711e-06,
|
|
"loss": 0.254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12651881575584412,
|
|
"step": 3200,
|
|
"valid_targets_mean": 3967.5,
|
|
"valid_targets_min": 1349
|
|
},
|
|
{
|
|
"epoch": 5.095389507154213,
|
|
"grad_norm": 0.7645209933839672,
|
|
"learning_rate": 8.378124049289394e-06,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13170547783374786,
|
|
"step": 3205,
|
|
"valid_targets_mean": 3264.1,
|
|
"valid_targets_min": 1523
|
|
},
|
|
{
|
|
"epoch": 5.103338632750398,
|
|
"grad_norm": 0.5645617398126972,
|
|
"learning_rate": 8.313683930472889e-06,
|
|
"loss": 0.2524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16058297455310822,
|
|
"step": 3210,
|
|
"valid_targets_mean": 4690.2,
|
|
"valid_targets_min": 1633
|
|
},
|
|
{
|
|
"epoch": 5.111287758346582,
|
|
"grad_norm": 0.7583828121297401,
|
|
"learning_rate": 8.249427502653198e-06,
|
|
"loss": 0.2669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13732054829597473,
|
|
"step": 3215,
|
|
"valid_targets_mean": 2575.6,
|
|
"valid_targets_min": 1398
|
|
},
|
|
{
|
|
"epoch": 5.119236883942766,
|
|
"grad_norm": 0.6726352492118366,
|
|
"learning_rate": 8.185355775842982e-06,
|
|
"loss": 0.2999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15156151354312897,
|
|
"step": 3220,
|
|
"valid_targets_mean": 4516.1,
|
|
"valid_targets_min": 1077
|
|
},
|
|
{
|
|
"epoch": 5.127186009538951,
|
|
"grad_norm": 0.5352592155625615,
|
|
"learning_rate": 8.12146975715171e-06,
|
|
"loss": 0.2772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11848842352628708,
|
|
"step": 3225,
|
|
"valid_targets_mean": 4503.1,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 5.135135135135135,
|
|
"grad_norm": 0.6235458935932251,
|
|
"learning_rate": 8.057770450769771e-06,
|
|
"loss": 0.2621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14192286133766174,
|
|
"step": 3230,
|
|
"valid_targets_mean": 4079.6,
|
|
"valid_targets_min": 1679
|
|
},
|
|
{
|
|
"epoch": 5.143084260731319,
|
|
"grad_norm": 0.6978264640966988,
|
|
"learning_rate": 7.994258857952748e-06,
|
|
"loss": 0.2773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09355522692203522,
|
|
"step": 3235,
|
|
"valid_targets_mean": 2944.6,
|
|
"valid_targets_min": 1982
|
|
},
|
|
{
|
|
"epoch": 5.151033386327504,
|
|
"grad_norm": 0.5388445502223251,
|
|
"learning_rate": 7.93093597700564e-06,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15163496136665344,
|
|
"step": 3240,
|
|
"valid_targets_mean": 5585.1,
|
|
"valid_targets_min": 1270
|
|
},
|
|
{
|
|
"epoch": 5.158982511923688,
|
|
"grad_norm": 0.7127298989075432,
|
|
"learning_rate": 7.867802803267182e-06,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15663330256938934,
|
|
"step": 3245,
|
|
"valid_targets_mean": 3741.1,
|
|
"valid_targets_min": 1174
|
|
},
|
|
{
|
|
"epoch": 5.166931637519872,
|
|
"grad_norm": 0.6238206289500292,
|
|
"learning_rate": 7.80486032909421e-06,
|
|
"loss": 0.2613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1542261689901352,
|
|
"step": 3250,
|
|
"valid_targets_mean": 3928.4,
|
|
"valid_targets_min": 954
|
|
},
|
|
{
|
|
"epoch": 5.174880763116057,
|
|
"grad_norm": 0.5671189263097933,
|
|
"learning_rate": 7.742109543846063e-06,
|
|
"loss": 0.2712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14505670964717865,
|
|
"step": 3255,
|
|
"valid_targets_mean": 4821.4,
|
|
"valid_targets_min": 2074
|
|
},
|
|
{
|
|
"epoch": 5.1828298887122415,
|
|
"grad_norm": 0.868877666365973,
|
|
"learning_rate": 7.679551433869001e-06,
|
|
"loss": 0.3023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23676635324954987,
|
|
"step": 3260,
|
|
"valid_targets_mean": 2787.4,
|
|
"valid_targets_min": 1521
|
|
},
|
|
{
|
|
"epoch": 5.1907790143084265,
|
|
"grad_norm": 0.5893760577989423,
|
|
"learning_rate": 7.617186982480749e-06,
|
|
"loss": 0.2925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09221216291189194,
|
|
"step": 3265,
|
|
"valid_targets_mean": 3065.2,
|
|
"valid_targets_min": 1324
|
|
},
|
|
{
|
|
"epoch": 5.198728139904611,
|
|
"grad_norm": 0.7491864014479452,
|
|
"learning_rate": 7.5550171699549945e-06,
|
|
"loss": 0.2542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14138752222061157,
|
|
"step": 3270,
|
|
"valid_targets_mean": 3224.0,
|
|
"valid_targets_min": 1669
|
|
},
|
|
{
|
|
"epoch": 5.206677265500795,
|
|
"grad_norm": 0.6351545249768293,
|
|
"learning_rate": 7.493042973506e-06,
|
|
"loss": 0.2485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10765412449836731,
|
|
"step": 3275,
|
|
"valid_targets_mean": 3410.1,
|
|
"valid_targets_min": 1384
|
|
},
|
|
{
|
|
"epoch": 5.21462639109698,
|
|
"grad_norm": 0.7029933698187032,
|
|
"learning_rate": 7.431265367273268e-06,
|
|
"loss": 0.2562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17145150899887085,
|
|
"step": 3280,
|
|
"valid_targets_mean": 4428.0,
|
|
"valid_targets_min": 1433
|
|
},
|
|
{
|
|
"epoch": 5.222575516693164,
|
|
"grad_norm": 0.48822997385058003,
|
|
"learning_rate": 7.36968532230617e-06,
|
|
"loss": 0.291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10615774989128113,
|
|
"step": 3285,
|
|
"valid_targets_mean": 4383.9,
|
|
"valid_targets_min": 1532
|
|
},
|
|
{
|
|
"epoch": 5.230524642289348,
|
|
"grad_norm": 0.6053279392007734,
|
|
"learning_rate": 7.308303806548742e-06,
|
|
"loss": 0.2507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10968545079231262,
|
|
"step": 3290,
|
|
"valid_targets_mean": 3829.0,
|
|
"valid_targets_min": 857
|
|
},
|
|
{
|
|
"epoch": 5.238473767885533,
|
|
"grad_norm": 0.6788023691245904,
|
|
"learning_rate": 7.247121784824445e-06,
|
|
"loss": 0.2617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12979434430599213,
|
|
"step": 3295,
|
|
"valid_targets_mean": 3644.4,
|
|
"valid_targets_min": 2326
|
|
},
|
|
{
|
|
"epoch": 5.246422893481717,
|
|
"grad_norm": 0.7583103299599769,
|
|
"learning_rate": 7.186140218820979e-06,
|
|
"loss": 0.2859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16850461065769196,
|
|
"step": 3300,
|
|
"valid_targets_mean": 3249.0,
|
|
"valid_targets_min": 1306
|
|
},
|
|
{
|
|
"epoch": 5.254372019077901,
|
|
"grad_norm": 0.6094663568941164,
|
|
"learning_rate": 7.125360067075196e-06,
|
|
"loss": 0.2547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1243053525686264,
|
|
"step": 3305,
|
|
"valid_targets_mean": 4031.6,
|
|
"valid_targets_min": 1234
|
|
},
|
|
{
|
|
"epoch": 5.262321144674086,
|
|
"grad_norm": 0.5960184862072287,
|
|
"learning_rate": 7.0647822849580385e-06,
|
|
"loss": 0.2726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10782050341367722,
|
|
"step": 3310,
|
|
"valid_targets_mean": 4405.6,
|
|
"valid_targets_min": 1223
|
|
},
|
|
{
|
|
"epoch": 5.27027027027027,
|
|
"grad_norm": 0.6634523012801189,
|
|
"learning_rate": 7.004407824659491e-06,
|
|
"loss": 0.2579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10181061923503876,
|
|
"step": 3315,
|
|
"valid_targets_mean": 3222.5,
|
|
"valid_targets_min": 852
|
|
},
|
|
{
|
|
"epoch": 5.278219395866454,
|
|
"grad_norm": 0.6098489279259934,
|
|
"learning_rate": 6.944237635173627e-06,
|
|
"loss": 0.2512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11660449951887131,
|
|
"step": 3320,
|
|
"valid_targets_mean": 4513.0,
|
|
"valid_targets_min": 1443
|
|
},
|
|
{
|
|
"epoch": 5.286168521462639,
|
|
"grad_norm": 0.6191337149334263,
|
|
"learning_rate": 6.88427266228372e-06,
|
|
"loss": 0.2789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08818727731704712,
|
|
"step": 3325,
|
|
"valid_targets_mean": 2716.5,
|
|
"valid_targets_min": 1044
|
|
},
|
|
{
|
|
"epoch": 5.294117647058823,
|
|
"grad_norm": 0.7821667522607078,
|
|
"learning_rate": 6.824513848547323e-06,
|
|
"loss": 0.2656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14952991902828217,
|
|
"step": 3330,
|
|
"valid_targets_mean": 3371.0,
|
|
"valid_targets_min": 954
|
|
},
|
|
{
|
|
"epoch": 5.302066772655008,
|
|
"grad_norm": 0.5654129926548702,
|
|
"learning_rate": 6.764962133281503e-06,
|
|
"loss": 0.2709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09026852995157242,
|
|
"step": 3335,
|
|
"valid_targets_mean": 3161.4,
|
|
"valid_targets_min": 1197
|
|
},
|
|
{
|
|
"epoch": 5.3100158982511925,
|
|
"grad_norm": 0.8866483906710874,
|
|
"learning_rate": 6.705618452548057e-06,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12158630788326263,
|
|
"step": 3340,
|
|
"valid_targets_mean": 2480.4,
|
|
"valid_targets_min": 1090
|
|
},
|
|
{
|
|
"epoch": 5.317965023847377,
|
|
"grad_norm": 0.6868639499452497,
|
|
"learning_rate": 6.646483739138778e-06,
|
|
"loss": 0.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13030937314033508,
|
|
"step": 3345,
|
|
"valid_targets_mean": 3079.2,
|
|
"valid_targets_min": 1138
|
|
},
|
|
{
|
|
"epoch": 5.325914149443562,
|
|
"grad_norm": 0.7256018869000503,
|
|
"learning_rate": 6.5875589225608376e-06,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1350623071193695,
|
|
"step": 3350,
|
|
"valid_targets_mean": 4025.1,
|
|
"valid_targets_min": 1473
|
|
},
|
|
{
|
|
"epoch": 5.333863275039746,
|
|
"grad_norm": 0.7227439375959568,
|
|
"learning_rate": 6.528844929022134e-06,
|
|
"loss": 0.2661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11379668861627579,
|
|
"step": 3355,
|
|
"valid_targets_mean": 2906.4,
|
|
"valid_targets_min": 1185
|
|
},
|
|
{
|
|
"epoch": 5.34181240063593,
|
|
"grad_norm": 0.5991086551937529,
|
|
"learning_rate": 6.4703426814167434e-06,
|
|
"loss": 0.254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1061788946390152,
|
|
"step": 3360,
|
|
"valid_targets_mean": 4791.5,
|
|
"valid_targets_min": 1362
|
|
},
|
|
{
|
|
"epoch": 5.349761526232115,
|
|
"grad_norm": 0.7053386087621063,
|
|
"learning_rate": 6.412053099310449e-06,
|
|
"loss": 0.2618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10495362430810928,
|
|
"step": 3365,
|
|
"valid_targets_mean": 2796.6,
|
|
"valid_targets_min": 1197
|
|
},
|
|
{
|
|
"epoch": 5.357710651828299,
|
|
"grad_norm": 0.7387994886117878,
|
|
"learning_rate": 6.353977098926225e-06,
|
|
"loss": 0.2885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08723889291286469,
|
|
"step": 3370,
|
|
"valid_targets_mean": 2030.2,
|
|
"valid_targets_min": 1177
|
|
},
|
|
{
|
|
"epoch": 5.365659777424483,
|
|
"grad_norm": 0.7080284672283791,
|
|
"learning_rate": 6.296115593129888e-06,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09850680828094482,
|
|
"step": 3375,
|
|
"valid_targets_mean": 2414.8,
|
|
"valid_targets_min": 1409
|
|
},
|
|
{
|
|
"epoch": 5.373608903020668,
|
|
"grad_norm": 0.6323823848822012,
|
|
"learning_rate": 6.238469491415728e-06,
|
|
"loss": 0.2665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12637121975421906,
|
|
"step": 3380,
|
|
"valid_targets_mean": 3372.8,
|
|
"valid_targets_min": 1574
|
|
},
|
|
{
|
|
"epoch": 5.381558028616852,
|
|
"grad_norm": 0.6389448485395631,
|
|
"learning_rate": 6.181039699892206e-06,
|
|
"loss": 0.2617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11737532913684845,
|
|
"step": 3385,
|
|
"valid_targets_mean": 4011.2,
|
|
"valid_targets_min": 1106
|
|
},
|
|
{
|
|
"epoch": 5.389507154213036,
|
|
"grad_norm": 0.4951127781161338,
|
|
"learning_rate": 6.123827121267709e-06,
|
|
"loss": 0.2462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09638971090316772,
|
|
"step": 3390,
|
|
"valid_targets_mean": 3757.9,
|
|
"valid_targets_min": 1484
|
|
},
|
|
{
|
|
"epoch": 5.397456279809221,
|
|
"grad_norm": 0.8238779073186115,
|
|
"learning_rate": 6.066832654836396e-06,
|
|
"loss": 0.2772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16826272010803223,
|
|
"step": 3395,
|
|
"valid_targets_mean": 2405.8,
|
|
"valid_targets_min": 1605
|
|
},
|
|
{
|
|
"epoch": 5.405405405405405,
|
|
"grad_norm": 0.6845792078696029,
|
|
"learning_rate": 6.010057196464012e-06,
|
|
"loss": 0.2849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10738080739974976,
|
|
"step": 3400,
|
|
"valid_targets_mean": 3052.1,
|
|
"valid_targets_min": 1242
|
|
},
|
|
{
|
|
"epoch": 5.413354531001589,
|
|
"grad_norm": 0.6011841541928575,
|
|
"learning_rate": 5.9535016385738335e-06,
|
|
"loss": 0.2489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.114475779235363,
|
|
"step": 3405,
|
|
"valid_targets_mean": 3896.4,
|
|
"valid_targets_min": 1167
|
|
},
|
|
{
|
|
"epoch": 5.421303656597774,
|
|
"grad_norm": 0.6854204807114475,
|
|
"learning_rate": 5.897166870132658e-06,
|
|
"loss": 0.2849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2282213717699051,
|
|
"step": 3410,
|
|
"valid_targets_mean": 5582.4,
|
|
"valid_targets_min": 1326
|
|
},
|
|
{
|
|
"epoch": 5.4292527821939585,
|
|
"grad_norm": 0.7761328966705092,
|
|
"learning_rate": 5.841053776636781e-06,
|
|
"loss": 0.2786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13135433197021484,
|
|
"step": 3415,
|
|
"valid_targets_mean": 2853.1,
|
|
"valid_targets_min": 1510
|
|
},
|
|
{
|
|
"epoch": 5.4372019077901435,
|
|
"grad_norm": 0.6125832967655993,
|
|
"learning_rate": 5.7851632400981285e-06,
|
|
"loss": 0.2753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12467734515666962,
|
|
"step": 3420,
|
|
"valid_targets_mean": 3323.1,
|
|
"valid_targets_min": 1269
|
|
},
|
|
{
|
|
"epoch": 5.4451510333863276,
|
|
"grad_norm": 0.6887666716001182,
|
|
"learning_rate": 5.729496139030377e-06,
|
|
"loss": 0.2916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11000342667102814,
|
|
"step": 3425,
|
|
"valid_targets_mean": 2999.0,
|
|
"valid_targets_min": 1448
|
|
},
|
|
{
|
|
"epoch": 5.453100158982512,
|
|
"grad_norm": 0.7667540488795341,
|
|
"learning_rate": 5.67405334843512e-06,
|
|
"loss": 0.2617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12887531518936157,
|
|
"step": 3430,
|
|
"valid_targets_mean": 2722.6,
|
|
"valid_targets_min": 1587
|
|
},
|
|
{
|
|
"epoch": 5.461049284578697,
|
|
"grad_norm": 0.6023615757617302,
|
|
"learning_rate": 5.618835739788136e-06,
|
|
"loss": 0.2841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1458101123571396,
|
|
"step": 3435,
|
|
"valid_targets_mean": 5397.9,
|
|
"valid_targets_min": 812
|
|
},
|
|
{
|
|
"epoch": 5.468998410174881,
|
|
"grad_norm": 0.8188586543042174,
|
|
"learning_rate": 5.563844181025706e-06,
|
|
"loss": 0.2748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18201018869876862,
|
|
"step": 3440,
|
|
"valid_targets_mean": 2975.9,
|
|
"valid_targets_min": 1375
|
|
},
|
|
{
|
|
"epoch": 5.476947535771065,
|
|
"grad_norm": 0.6313280501270969,
|
|
"learning_rate": 5.509079536530939e-06,
|
|
"loss": 0.2674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13613098859786987,
|
|
"step": 3445,
|
|
"valid_targets_mean": 3323.6,
|
|
"valid_targets_min": 1504
|
|
},
|
|
{
|
|
"epoch": 5.48489666136725,
|
|
"grad_norm": 0.5666386990907872,
|
|
"learning_rate": 5.4545426671201905e-06,
|
|
"loss": 0.2668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12922339141368866,
|
|
"step": 3450,
|
|
"valid_targets_mean": 4887.1,
|
|
"valid_targets_min": 1641
|
|
},
|
|
{
|
|
"epoch": 5.492845786963434,
|
|
"grad_norm": 0.6986392790009974,
|
|
"learning_rate": 5.400234430029561e-06,
|
|
"loss": 0.2892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14214450120925903,
|
|
"step": 3455,
|
|
"valid_targets_mean": 3609.4,
|
|
"valid_targets_min": 1671
|
|
},
|
|
{
|
|
"epoch": 5.500794912559618,
|
|
"grad_norm": 0.6264359343360186,
|
|
"learning_rate": 5.346155678901392e-06,
|
|
"loss": 0.2628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11440618336200714,
|
|
"step": 3460,
|
|
"valid_targets_mean": 3175.6,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 5.508744038155803,
|
|
"grad_norm": 0.6340616993944738,
|
|
"learning_rate": 5.292307263770859e-06,
|
|
"loss": 0.243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16398218274116516,
|
|
"step": 3465,
|
|
"valid_targets_mean": 5138.0,
|
|
"valid_targets_min": 2161
|
|
},
|
|
{
|
|
"epoch": 5.516693163751987,
|
|
"grad_norm": 0.5897281827554604,
|
|
"learning_rate": 5.238690031052603e-06,
|
|
"loss": 0.2704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1538253128528595,
|
|
"step": 3470,
|
|
"valid_targets_mean": 5059.9,
|
|
"valid_targets_min": 1683
|
|
},
|
|
{
|
|
"epoch": 5.524642289348172,
|
|
"grad_norm": 0.7756110205737315,
|
|
"learning_rate": 5.185304823527426e-06,
|
|
"loss": 0.2642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15340808033943176,
|
|
"step": 3475,
|
|
"valid_targets_mean": 3223.6,
|
|
"valid_targets_min": 1107
|
|
},
|
|
{
|
|
"epoch": 5.532591414944356,
|
|
"grad_norm": 0.6201666017190486,
|
|
"learning_rate": 5.132152480329072e-06,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13399487733840942,
|
|
"step": 3480,
|
|
"valid_targets_mean": 3522.5,
|
|
"valid_targets_min": 2018
|
|
},
|
|
{
|
|
"epoch": 5.54054054054054,
|
|
"grad_norm": 0.768289880785717,
|
|
"learning_rate": 5.07923383693099e-06,
|
|
"loss": 0.2688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13348113000392914,
|
|
"step": 3485,
|
|
"valid_targets_mean": 2713.1,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 5.548489666136725,
|
|
"grad_norm": 0.711331878881533,
|
|
"learning_rate": 5.0265497251332314e-06,
|
|
"loss": 0.2652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12099003791809082,
|
|
"step": 3490,
|
|
"valid_targets_mean": 3460.1,
|
|
"valid_targets_min": 1479
|
|
},
|
|
{
|
|
"epoch": 5.556438791732909,
|
|
"grad_norm": 0.6362586331123075,
|
|
"learning_rate": 4.974100973049385e-06,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14766527712345123,
|
|
"step": 3495,
|
|
"valid_targets_mean": 4538.0,
|
|
"valid_targets_min": 1380
|
|
},
|
|
{
|
|
"epoch": 5.5643879173290935,
|
|
"grad_norm": 0.6993136396447482,
|
|
"learning_rate": 4.921888405093525e-06,
|
|
"loss": 0.2653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12348655611276627,
|
|
"step": 3500,
|
|
"valid_targets_mean": 3730.0,
|
|
"valid_targets_min": 1537
|
|
},
|
|
{
|
|
"epoch": 5.5723370429252785,
|
|
"grad_norm": 0.642793319699132,
|
|
"learning_rate": 4.869912841967286e-06,
|
|
"loss": 0.2758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11578582227230072,
|
|
"step": 3505,
|
|
"valid_targets_mean": 3134.0,
|
|
"valid_targets_min": 1712
|
|
},
|
|
{
|
|
"epoch": 5.580286168521463,
|
|
"grad_norm": 0.9336272001565746,
|
|
"learning_rate": 4.818175100646952e-06,
|
|
"loss": 0.2686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1409333050251007,
|
|
"step": 3510,
|
|
"valid_targets_mean": 3306.0,
|
|
"valid_targets_min": 1078
|
|
},
|
|
{
|
|
"epoch": 5.588235294117647,
|
|
"grad_norm": 0.670575043893854,
|
|
"learning_rate": 4.766675994370598e-06,
|
|
"loss": 0.2713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11387854069471359,
|
|
"step": 3515,
|
|
"valid_targets_mean": 2881.4,
|
|
"valid_targets_min": 1264
|
|
},
|
|
{
|
|
"epoch": 5.596184419713832,
|
|
"grad_norm": 0.6046266838656398,
|
|
"learning_rate": 4.7154163326253265e-06,
|
|
"loss": 0.2551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09745581448078156,
|
|
"step": 3520,
|
|
"valid_targets_mean": 2944.9,
|
|
"valid_targets_min": 1572
|
|
},
|
|
{
|
|
"epoch": 5.604133545310016,
|
|
"grad_norm": 0.6972532800085807,
|
|
"learning_rate": 4.664396921134551e-06,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11021900177001953,
|
|
"step": 3525,
|
|
"valid_targets_mean": 2718.0,
|
|
"valid_targets_min": 1051
|
|
},
|
|
{
|
|
"epoch": 5.6120826709062,
|
|
"grad_norm": 0.75892574425528,
|
|
"learning_rate": 4.613618561845306e-06,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11320896446704865,
|
|
"step": 3530,
|
|
"valid_targets_mean": 2564.2,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 5.620031796502385,
|
|
"grad_norm": 0.7230291508936358,
|
|
"learning_rate": 4.563082052915649e-06,
|
|
"loss": 0.2636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12366051226854324,
|
|
"step": 3535,
|
|
"valid_targets_mean": 2691.0,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 5.627980922098569,
|
|
"grad_norm": 0.6480412418745505,
|
|
"learning_rate": 4.512788188702135e-06,
|
|
"loss": 0.2519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12470411509275436,
|
|
"step": 3540,
|
|
"valid_targets_mean": 4086.2,
|
|
"valid_targets_min": 1748
|
|
},
|
|
{
|
|
"epoch": 5.635930047694753,
|
|
"grad_norm": 0.6241547317395583,
|
|
"learning_rate": 4.462737759747315e-06,
|
|
"loss": 0.2314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1251395046710968,
|
|
"step": 3545,
|
|
"valid_targets_mean": 4593.9,
|
|
"valid_targets_min": 1391
|
|
},
|
|
{
|
|
"epoch": 5.643879173290938,
|
|
"grad_norm": 0.7430210862100454,
|
|
"learning_rate": 4.412931552767295e-06,
|
|
"loss": 0.2783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13304971158504486,
|
|
"step": 3550,
|
|
"valid_targets_mean": 2666.1,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 5.651828298887122,
|
|
"grad_norm": 0.6364521343480536,
|
|
"learning_rate": 4.363370350639405e-06,
|
|
"loss": 0.2711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1463729590177536,
|
|
"step": 3555,
|
|
"valid_targets_mean": 3870.2,
|
|
"valid_targets_min": 2000
|
|
},
|
|
{
|
|
"epoch": 5.659777424483307,
|
|
"grad_norm": 0.5123559279407844,
|
|
"learning_rate": 4.314054932389859e-06,
|
|
"loss": 0.2893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16127212345600128,
|
|
"step": 3560,
|
|
"valid_targets_mean": 6267.8,
|
|
"valid_targets_min": 1303
|
|
},
|
|
{
|
|
"epoch": 5.667726550079491,
|
|
"grad_norm": 0.6381872372309723,
|
|
"learning_rate": 4.2649860731815255e-06,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13359318673610687,
|
|
"step": 3565,
|
|
"valid_targets_mean": 3260.8,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 5.675675675675675,
|
|
"grad_norm": 0.6219632238511407,
|
|
"learning_rate": 4.216164544301755e-06,
|
|
"loss": 0.2656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09206640720367432,
|
|
"step": 3570,
|
|
"valid_targets_mean": 2515.8,
|
|
"valid_targets_min": 1173
|
|
},
|
|
{
|
|
"epoch": 5.68362480127186,
|
|
"grad_norm": 0.6631650656969427,
|
|
"learning_rate": 4.167591113150225e-06,
|
|
"loss": 0.2745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17615485191345215,
|
|
"step": 3575,
|
|
"valid_targets_mean": 4543.1,
|
|
"valid_targets_min": 1187
|
|
},
|
|
{
|
|
"epoch": 5.6915739268680445,
|
|
"grad_norm": 0.8356123436648725,
|
|
"learning_rate": 4.119266543226921e-06,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0813077837228775,
|
|
"step": 3580,
|
|
"valid_targets_mean": 1957.2,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 5.699523052464229,
|
|
"grad_norm": 0.7109153209982301,
|
|
"learning_rate": 4.071191594120081e-06,
|
|
"loss": 0.2453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17518946528434753,
|
|
"step": 3585,
|
|
"valid_targets_mean": 5129.1,
|
|
"valid_targets_min": 3275
|
|
},
|
|
{
|
|
"epoch": 5.707472178060414,
|
|
"grad_norm": 0.6141079502440066,
|
|
"learning_rate": 4.023367021494313e-06,
|
|
"loss": 0.2669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10772644728422165,
|
|
"step": 3590,
|
|
"valid_targets_mean": 3077.0,
|
|
"valid_targets_min": 1291
|
|
},
|
|
{
|
|
"epoch": 5.715421303656598,
|
|
"grad_norm": 0.8439056629328021,
|
|
"learning_rate": 3.975793577078682e-06,
|
|
"loss": 0.2618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1612888127565384,
|
|
"step": 3595,
|
|
"valid_targets_mean": 2849.9,
|
|
"valid_targets_min": 1855
|
|
},
|
|
{
|
|
"epoch": 5.723370429252782,
|
|
"grad_norm": 0.6037366867027146,
|
|
"learning_rate": 3.928472008654891e-06,
|
|
"loss": 0.2549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1114010214805603,
|
|
"step": 3600,
|
|
"valid_targets_mean": 4350.2,
|
|
"valid_targets_min": 1428
|
|
},
|
|
{
|
|
"epoch": 5.731319554848967,
|
|
"grad_norm": 0.7278975933711467,
|
|
"learning_rate": 3.881403060045545e-06,
|
|
"loss": 0.2744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14777927100658417,
|
|
"step": 3605,
|
|
"valid_targets_mean": 3028.6,
|
|
"valid_targets_min": 805
|
|
},
|
|
{
|
|
"epoch": 5.739268680445151,
|
|
"grad_norm": 0.6035339792988637,
|
|
"learning_rate": 3.834587471102464e-06,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10571078956127167,
|
|
"step": 3610,
|
|
"valid_targets_mean": 3580.8,
|
|
"valid_targets_min": 1544
|
|
},
|
|
{
|
|
"epoch": 5.747217806041336,
|
|
"grad_norm": 0.8196723088396369,
|
|
"learning_rate": 3.7880259776950224e-06,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1502171903848648,
|
|
"step": 3615,
|
|
"valid_targets_mean": 3420.9,
|
|
"valid_targets_min": 1279
|
|
},
|
|
{
|
|
"epoch": 5.75516693163752,
|
|
"grad_norm": 0.6636153967103018,
|
|
"learning_rate": 3.741719311698608e-06,
|
|
"loss": 0.2779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11655619740486145,
|
|
"step": 3620,
|
|
"valid_targets_mean": 3457.8,
|
|
"valid_targets_min": 1430
|
|
},
|
|
{
|
|
"epoch": 5.763116057233704,
|
|
"grad_norm": 0.7168400864739486,
|
|
"learning_rate": 3.69566820098312e-06,
|
|
"loss": 0.2822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12356597185134888,
|
|
"step": 3625,
|
|
"valid_targets_mean": 3298.9,
|
|
"valid_targets_min": 1051
|
|
},
|
|
{
|
|
"epoch": 5.771065182829888,
|
|
"grad_norm": 0.7222204646243864,
|
|
"learning_rate": 3.6498733694015197e-06,
|
|
"loss": 0.2807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14534445106983185,
|
|
"step": 3630,
|
|
"valid_targets_mean": 3306.1,
|
|
"valid_targets_min": 408
|
|
},
|
|
{
|
|
"epoch": 5.779014308426073,
|
|
"grad_norm": 0.6428801981092538,
|
|
"learning_rate": 3.604335536778434e-06,
|
|
"loss": 0.2587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2587358355522156,
|
|
"step": 3635,
|
|
"valid_targets_mean": 5787.8,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 5.786963434022257,
|
|
"grad_norm": 0.6721680527281032,
|
|
"learning_rate": 3.559055418898887e-06,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1227462887763977,
|
|
"step": 3640,
|
|
"valid_targets_mean": 3116.5,
|
|
"valid_targets_min": 1334
|
|
},
|
|
{
|
|
"epoch": 5.794912559618442,
|
|
"grad_norm": 0.6531818230174867,
|
|
"learning_rate": 3.5140337274970014e-06,
|
|
"loss": 0.2565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14149439334869385,
|
|
"step": 3645,
|
|
"valid_targets_mean": 3820.9,
|
|
"valid_targets_min": 1182
|
|
},
|
|
{
|
|
"epoch": 5.802861685214626,
|
|
"grad_norm": 0.7091290938302351,
|
|
"learning_rate": 3.469271170244832e-06,
|
|
"loss": 0.2785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16764888167381287,
|
|
"step": 3650,
|
|
"valid_targets_mean": 4756.6,
|
|
"valid_targets_min": 2138
|
|
},
|
|
{
|
|
"epoch": 5.8108108108108105,
|
|
"grad_norm": 0.6530413665507047,
|
|
"learning_rate": 3.4247684507412605e-06,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10965146124362946,
|
|
"step": 3655,
|
|
"valid_targets_mean": 3748.6,
|
|
"valid_targets_min": 1678
|
|
},
|
|
{
|
|
"epoch": 5.8187599364069955,
|
|
"grad_norm": 0.6028250089856403,
|
|
"learning_rate": 3.380526268500892e-06,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1334403157234192,
|
|
"step": 3660,
|
|
"valid_targets_mean": 3941.0,
|
|
"valid_targets_min": 1971
|
|
},
|
|
{
|
|
"epoch": 5.82670906200318,
|
|
"grad_norm": 0.6789598792304273,
|
|
"learning_rate": 3.3365453189430984e-06,
|
|
"loss": 0.2891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10922451317310333,
|
|
"step": 3665,
|
|
"valid_targets_mean": 2876.9,
|
|
"valid_targets_min": 1767
|
|
},
|
|
{
|
|
"epoch": 5.834658187599364,
|
|
"grad_norm": 0.5585530913640019,
|
|
"learning_rate": 3.292826293381071e-06,
|
|
"loss": 0.2819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14635950326919556,
|
|
"step": 3670,
|
|
"valid_targets_mean": 5046.0,
|
|
"valid_targets_min": 1469
|
|
},
|
|
{
|
|
"epoch": 5.842607313195549,
|
|
"grad_norm": 0.7641250167380362,
|
|
"learning_rate": 3.2493698790109664e-06,
|
|
"loss": 0.2666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1338251531124115,
|
|
"step": 3675,
|
|
"valid_targets_mean": 3046.4,
|
|
"valid_targets_min": 1339
|
|
},
|
|
{
|
|
"epoch": 5.850556438791733,
|
|
"grad_norm": 0.6560810428034725,
|
|
"learning_rate": 3.2061767589010763e-06,
|
|
"loss": 0.2746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16354107856750488,
|
|
"step": 3680,
|
|
"valid_targets_mean": 4282.8,
|
|
"valid_targets_min": 1677
|
|
},
|
|
{
|
|
"epoch": 5.858505564387917,
|
|
"grad_norm": 0.6716559762205456,
|
|
"learning_rate": 3.1632476119811285e-06,
|
|
"loss": 0.2802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15302489697933197,
|
|
"step": 3685,
|
|
"valid_targets_mean": 3744.4,
|
|
"valid_targets_min": 2253
|
|
},
|
|
{
|
|
"epoch": 5.866454689984102,
|
|
"grad_norm": 0.6217489801096618,
|
|
"learning_rate": 3.120583113031579e-06,
|
|
"loss": 0.2711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14532360434532166,
|
|
"step": 3690,
|
|
"valid_targets_mean": 5256.4,
|
|
"valid_targets_min": 1607
|
|
},
|
|
{
|
|
"epoch": 5.874403815580286,
|
|
"grad_norm": 0.6620048472350588,
|
|
"learning_rate": 3.07818393267304e-06,
|
|
"loss": 0.2659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13492019474506378,
|
|
"step": 3695,
|
|
"valid_targets_mean": 3030.2,
|
|
"valid_targets_min": 1062
|
|
},
|
|
{
|
|
"epoch": 5.882352941176471,
|
|
"grad_norm": 0.7518320989139965,
|
|
"learning_rate": 3.036050737355709e-06,
|
|
"loss": 0.2931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14279663562774658,
|
|
"step": 3700,
|
|
"valid_targets_mean": 3147.0,
|
|
"valid_targets_min": 1226
|
|
},
|
|
{
|
|
"epoch": 5.890302066772655,
|
|
"grad_norm": 0.6826640475331228,
|
|
"learning_rate": 2.9941841893489075e-06,
|
|
"loss": 0.2826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10652248561382294,
|
|
"step": 3705,
|
|
"valid_targets_mean": 4087.1,
|
|
"valid_targets_min": 1578
|
|
},
|
|
{
|
|
"epoch": 5.898251192368839,
|
|
"grad_norm": 0.6191108512510489,
|
|
"learning_rate": 2.9525849467306766e-06,
|
|
"loss": 0.2849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22080844640731812,
|
|
"step": 3710,
|
|
"valid_targets_mean": 5204.8,
|
|
"valid_targets_min": 1740
|
|
},
|
|
{
|
|
"epoch": 5.906200317965024,
|
|
"grad_norm": 0.6393355991096903,
|
|
"learning_rate": 2.9112536633774245e-06,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1301059126853943,
|
|
"step": 3715,
|
|
"valid_targets_mean": 3957.4,
|
|
"valid_targets_min": 1437
|
|
},
|
|
{
|
|
"epoch": 5.914149443561208,
|
|
"grad_norm": 0.5673530366130025,
|
|
"learning_rate": 2.8701909889536384e-06,
|
|
"loss": 0.2628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11331449449062347,
|
|
"step": 3720,
|
|
"valid_targets_mean": 4121.2,
|
|
"valid_targets_min": 1356
|
|
},
|
|
{
|
|
"epoch": 5.922098569157392,
|
|
"grad_norm": 0.6398513670867448,
|
|
"learning_rate": 2.8293975689017018e-06,
|
|
"loss": 0.2706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15489056706428528,
|
|
"step": 3725,
|
|
"valid_targets_mean": 4998.8,
|
|
"valid_targets_min": 2193
|
|
},
|
|
{
|
|
"epoch": 5.930047694753577,
|
|
"grad_norm": 0.5529247415832941,
|
|
"learning_rate": 2.788874044431722e-06,
|
|
"loss": 0.2677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12102428823709488,
|
|
"step": 3730,
|
|
"valid_targets_mean": 4965.0,
|
|
"valid_targets_min": 1164
|
|
},
|
|
{
|
|
"epoch": 5.9379968203497615,
|
|
"grad_norm": 0.5987086456289487,
|
|
"learning_rate": 2.7486210525114533e-06,
|
|
"loss": 0.2761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1581522822380066,
|
|
"step": 3735,
|
|
"valid_targets_mean": 4767.4,
|
|
"valid_targets_min": 1559
|
|
},
|
|
{
|
|
"epoch": 5.945945945945946,
|
|
"grad_norm": 0.5908991064410971,
|
|
"learning_rate": 2.708639225856311e-06,
|
|
"loss": 0.2847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11040371656417847,
|
|
"step": 3740,
|
|
"valid_targets_mean": 3762.0,
|
|
"valid_targets_min": 1154
|
|
},
|
|
{
|
|
"epoch": 5.953895071542131,
|
|
"grad_norm": 0.6499260409439884,
|
|
"learning_rate": 2.6689291929193962e-06,
|
|
"loss": 0.2532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11049375683069229,
|
|
"step": 3745,
|
|
"valid_targets_mean": 4363.9,
|
|
"valid_targets_min": 2063
|
|
},
|
|
{
|
|
"epoch": 5.961844197138315,
|
|
"grad_norm": 0.8136687718223724,
|
|
"learning_rate": 2.629491577881622e-06,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1502695381641388,
|
|
"step": 3750,
|
|
"valid_targets_mean": 3193.8,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 5.9697933227345,
|
|
"grad_norm": 0.5541902095317577,
|
|
"learning_rate": 2.5903270006419236e-06,
|
|
"loss": 0.2568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1092134565114975,
|
|
"step": 3755,
|
|
"valid_targets_mean": 4531.9,
|
|
"valid_targets_min": 1354
|
|
},
|
|
{
|
|
"epoch": 5.977742448330684,
|
|
"grad_norm": 0.6003377880989003,
|
|
"learning_rate": 2.551436076807501e-06,
|
|
"loss": 0.2584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09888657182455063,
|
|
"step": 3760,
|
|
"valid_targets_mean": 4009.0,
|
|
"valid_targets_min": 1068
|
|
},
|
|
{
|
|
"epoch": 5.985691573926868,
|
|
"grad_norm": 0.5942436825452202,
|
|
"learning_rate": 2.5128194176841226e-06,
|
|
"loss": 0.2509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16180501878261566,
|
|
"step": 3765,
|
|
"valid_targets_mean": 6323.5,
|
|
"valid_targets_min": 1504
|
|
},
|
|
{
|
|
"epoch": 5.993640699523052,
|
|
"grad_norm": 0.7156331491582958,
|
|
"learning_rate": 2.4744776302665563e-06,
|
|
"loss": 0.2748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09515075385570526,
|
|
"step": 3770,
|
|
"valid_targets_mean": 2870.0,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 6.001589825119237,
|
|
"grad_norm": 0.8433997475713761,
|
|
"learning_rate": 2.436411317228997e-06,
|
|
"loss": 0.2876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17560066282749176,
|
|
"step": 3775,
|
|
"valid_targets_mean": 2793.9,
|
|
"valid_targets_min": 1248
|
|
},
|
|
{
|
|
"epoch": 6.009538950715421,
|
|
"grad_norm": 0.5333413422281913,
|
|
"learning_rate": 2.3986210769155994e-06,
|
|
"loss": 0.2516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16520081460475922,
|
|
"step": 3780,
|
|
"valid_targets_mean": 6061.8,
|
|
"valid_targets_min": 1606
|
|
},
|
|
{
|
|
"epoch": 6.017488076311606,
|
|
"grad_norm": 0.66502275882437,
|
|
"learning_rate": 2.361107503331095e-06,
|
|
"loss": 0.2476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10621203482151031,
|
|
"step": 3785,
|
|
"valid_targets_mean": 2985.8,
|
|
"valid_targets_min": 1352
|
|
},
|
|
{
|
|
"epoch": 6.02543720190779,
|
|
"grad_norm": 0.5243657913143881,
|
|
"learning_rate": 2.3238711861314165e-06,
|
|
"loss": 0.2627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14354604482650757,
|
|
"step": 3790,
|
|
"valid_targets_mean": 5367.0,
|
|
"valid_targets_min": 1869
|
|
},
|
|
{
|
|
"epoch": 6.033386327503974,
|
|
"grad_norm": 0.7066023516227493,
|
|
"learning_rate": 2.2869127106144663e-06,
|
|
"loss": 0.2785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15328553318977356,
|
|
"step": 3795,
|
|
"valid_targets_mean": 3408.0,
|
|
"valid_targets_min": 914
|
|
},
|
|
{
|
|
"epoch": 6.041335453100159,
|
|
"grad_norm": 0.540739606098391,
|
|
"learning_rate": 2.2502326577109e-06,
|
|
"loss": 0.2494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08416876941919327,
|
|
"step": 3800,
|
|
"valid_targets_mean": 3373.0,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 6.049284578696343,
|
|
"grad_norm": 0.6329615315476147,
|
|
"learning_rate": 2.213831603974985e-06,
|
|
"loss": 0.247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.131415456533432,
|
|
"step": 3805,
|
|
"valid_targets_mean": 3742.4,
|
|
"valid_targets_min": 1357
|
|
},
|
|
{
|
|
"epoch": 6.0572337042925275,
|
|
"grad_norm": 0.6927051693640695,
|
|
"learning_rate": 2.1777101215755624e-06,
|
|
"loss": 0.253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1161649227142334,
|
|
"step": 3810,
|
|
"valid_targets_mean": 3210.9,
|
|
"valid_targets_min": 1409
|
|
},
|
|
{
|
|
"epoch": 6.0651828298887125,
|
|
"grad_norm": 0.5964539888681721,
|
|
"learning_rate": 2.1418687782870284e-06,
|
|
"loss": 0.2683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18598739802837372,
|
|
"step": 3815,
|
|
"valid_targets_mean": 5236.8,
|
|
"valid_targets_min": 1688
|
|
},
|
|
{
|
|
"epoch": 6.073131955484897,
|
|
"grad_norm": 0.6281005675484677,
|
|
"learning_rate": 2.1063081374804263e-06,
|
|
"loss": 0.3012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1468728482723236,
|
|
"step": 3820,
|
|
"valid_targets_mean": 4794.6,
|
|
"valid_targets_min": 1555
|
|
},
|
|
{
|
|
"epoch": 6.081081081081081,
|
|
"grad_norm": 0.5442987832878683,
|
|
"learning_rate": 2.0710287581145884e-06,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09267094731330872,
|
|
"step": 3825,
|
|
"valid_targets_mean": 4493.9,
|
|
"valid_targets_min": 1334
|
|
},
|
|
{
|
|
"epoch": 6.089030206677266,
|
|
"grad_norm": 0.7712116535139424,
|
|
"learning_rate": 2.036031194727346e-06,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09872271865606308,
|
|
"step": 3830,
|
|
"valid_targets_mean": 1966.9,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 6.09697933227345,
|
|
"grad_norm": 0.648841682678685,
|
|
"learning_rate": 2.0013159974268094e-06,
|
|
"loss": 0.266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12190896272659302,
|
|
"step": 3835,
|
|
"valid_targets_mean": 3530.5,
|
|
"valid_targets_min": 1067
|
|
},
|
|
{
|
|
"epoch": 6.104928457869635,
|
|
"grad_norm": 0.6023868107503677,
|
|
"learning_rate": 1.9668837118827346e-06,
|
|
"loss": 0.2677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12058161199092865,
|
|
"step": 3840,
|
|
"valid_targets_mean": 3725.4,
|
|
"valid_targets_min": 1102
|
|
},
|
|
{
|
|
"epoch": 6.112877583465819,
|
|
"grad_norm": 0.5584239065681313,
|
|
"learning_rate": 1.932734879317937e-06,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1474909484386444,
|
|
"step": 3845,
|
|
"valid_targets_mean": 5673.8,
|
|
"valid_targets_min": 1220
|
|
},
|
|
{
|
|
"epoch": 6.120826709062003,
|
|
"grad_norm": 0.7071455255859755,
|
|
"learning_rate": 1.8988700364997758e-06,
|
|
"loss": 0.2543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11278359591960907,
|
|
"step": 3850,
|
|
"valid_targets_mean": 2505.4,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 6.128775834658188,
|
|
"grad_norm": 0.6979681649094387,
|
|
"learning_rate": 1.8652897157317395e-06,
|
|
"loss": 0.2547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14248163998126984,
|
|
"step": 3855,
|
|
"valid_targets_mean": 3642.1,
|
|
"valid_targets_min": 1811
|
|
},
|
|
{
|
|
"epoch": 6.136724960254372,
|
|
"grad_norm": 0.6384017988114666,
|
|
"learning_rate": 1.8319944448450578e-06,
|
|
"loss": 0.2608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12417655438184738,
|
|
"step": 3860,
|
|
"valid_targets_mean": 3871.2,
|
|
"valid_targets_min": 1107
|
|
},
|
|
{
|
|
"epoch": 6.144674085850556,
|
|
"grad_norm": 0.6624507345998915,
|
|
"learning_rate": 1.7989847471904065e-06,
|
|
"loss": 0.2608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1384684145450592,
|
|
"step": 3865,
|
|
"valid_targets_mean": 4294.2,
|
|
"valid_targets_min": 1221
|
|
},
|
|
{
|
|
"epoch": 6.152623211446741,
|
|
"grad_norm": 0.770870605302227,
|
|
"learning_rate": 1.766261141629706e-06,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14443659782409668,
|
|
"step": 3870,
|
|
"valid_targets_mean": 3140.0,
|
|
"valid_targets_min": 1433
|
|
},
|
|
{
|
|
"epoch": 6.160572337042925,
|
|
"grad_norm": 0.6753386353724785,
|
|
"learning_rate": 1.7338241425279244e-06,
|
|
"loss": 0.2423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09500207006931305,
|
|
"step": 3875,
|
|
"valid_targets_mean": 2529.2,
|
|
"valid_targets_min": 1029
|
|
},
|
|
{
|
|
"epoch": 6.168521462639109,
|
|
"grad_norm": 0.6099932077815367,
|
|
"learning_rate": 1.7016742597450341e-06,
|
|
"loss": 0.258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13668741285800934,
|
|
"step": 3880,
|
|
"valid_targets_mean": 3941.4,
|
|
"valid_targets_min": 1725
|
|
},
|
|
{
|
|
"epoch": 6.176470588235294,
|
|
"grad_norm": 0.6303081078745002,
|
|
"learning_rate": 1.6698119986279726e-06,
|
|
"loss": 0.2758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24010378122329712,
|
|
"step": 3885,
|
|
"valid_targets_mean": 5786.0,
|
|
"valid_targets_min": 1312
|
|
},
|
|
{
|
|
"epoch": 6.1844197138314785,
|
|
"grad_norm": 0.6329683280045748,
|
|
"learning_rate": 1.6382378600026982e-06,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14753060042858124,
|
|
"step": 3890,
|
|
"valid_targets_mean": 4507.9,
|
|
"valid_targets_min": 1731
|
|
},
|
|
{
|
|
"epoch": 6.192368839427663,
|
|
"grad_norm": 0.5839957995591698,
|
|
"learning_rate": 1.60695234016633e-06,
|
|
"loss": 0.2389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11078199744224548,
|
|
"step": 3895,
|
|
"valid_targets_mean": 4253.2,
|
|
"valid_targets_min": 1232
|
|
},
|
|
{
|
|
"epoch": 6.200317965023848,
|
|
"grad_norm": 0.7332947914168783,
|
|
"learning_rate": 1.5759559308793448e-06,
|
|
"loss": 0.2717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14138217270374298,
|
|
"step": 3900,
|
|
"valid_targets_mean": 3175.2,
|
|
"valid_targets_min": 1998
|
|
},
|
|
{
|
|
"epoch": 6.208267090620032,
|
|
"grad_norm": 0.6374193980573579,
|
|
"learning_rate": 1.5452491193578412e-06,
|
|
"loss": 0.2724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12386956810951233,
|
|
"step": 3905,
|
|
"valid_targets_mean": 3345.5,
|
|
"valid_targets_min": 1392
|
|
},
|
|
{
|
|
"epoch": 6.216216216216216,
|
|
"grad_norm": 0.7700758984872191,
|
|
"learning_rate": 1.5148323882658767e-06,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11031192541122437,
|
|
"step": 3910,
|
|
"valid_targets_mean": 2582.2,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 6.224165341812401,
|
|
"grad_norm": 0.6813040177949818,
|
|
"learning_rate": 1.484706215707905e-06,
|
|
"loss": 0.2555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13871543109416962,
|
|
"step": 3915,
|
|
"valid_targets_mean": 4007.5,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 6.232114467408585,
|
|
"grad_norm": 0.6718771683562692,
|
|
"learning_rate": 1.4548710752212292e-06,
|
|
"loss": 0.2486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12350428104400635,
|
|
"step": 3920,
|
|
"valid_targets_mean": 3262.5,
|
|
"valid_targets_min": 1487
|
|
},
|
|
{
|
|
"epoch": 6.24006359300477,
|
|
"grad_norm": 0.5333262603099248,
|
|
"learning_rate": 1.425327435768582e-06,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09818576276302338,
|
|
"step": 3925,
|
|
"valid_targets_mean": 3849.5,
|
|
"valid_targets_min": 1651
|
|
},
|
|
{
|
|
"epoch": 6.248012718600954,
|
|
"grad_norm": 0.5166052599027676,
|
|
"learning_rate": 1.3960757617307486e-06,
|
|
"loss": 0.244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10246315598487854,
|
|
"step": 3930,
|
|
"valid_targets_mean": 5839.8,
|
|
"valid_targets_min": 1433
|
|
},
|
|
{
|
|
"epoch": 6.255961844197138,
|
|
"grad_norm": 0.4837399775624598,
|
|
"learning_rate": 1.3671165128992514e-06,
|
|
"loss": 0.2506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12420549988746643,
|
|
"step": 3935,
|
|
"valid_targets_mean": 7283.4,
|
|
"valid_targets_min": 2038
|
|
},
|
|
{
|
|
"epoch": 6.263910969793323,
|
|
"grad_norm": 0.7611030638859533,
|
|
"learning_rate": 1.3384501444691544e-06,
|
|
"loss": 0.2936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14793661236763,
|
|
"step": 3940,
|
|
"valid_targets_mean": 2990.9,
|
|
"valid_targets_min": 1119
|
|
},
|
|
{
|
|
"epoch": 6.271860095389507,
|
|
"grad_norm": 0.7296504262990383,
|
|
"learning_rate": 1.3100771070318796e-06,
|
|
"loss": 0.2669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10326884686946869,
|
|
"step": 3945,
|
|
"valid_targets_mean": 2694.5,
|
|
"valid_targets_min": 1296
|
|
},
|
|
{
|
|
"epoch": 6.279809220985691,
|
|
"grad_norm": 0.5964494736637876,
|
|
"learning_rate": 1.2819978465681283e-06,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09586623311042786,
|
|
"step": 3950,
|
|
"valid_targets_mean": 3213.8,
|
|
"valid_targets_min": 1572
|
|
},
|
|
{
|
|
"epoch": 6.287758346581876,
|
|
"grad_norm": 0.7496000156799355,
|
|
"learning_rate": 1.254212804440893e-06,
|
|
"loss": 0.2556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1649065464735031,
|
|
"step": 3955,
|
|
"valid_targets_mean": 4296.8,
|
|
"valid_targets_min": 896
|
|
},
|
|
{
|
|
"epoch": 6.29570747217806,
|
|
"grad_norm": 0.7530888685113304,
|
|
"learning_rate": 1.2267224173884929e-06,
|
|
"loss": 0.2651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15699484944343567,
|
|
"step": 3960,
|
|
"valid_targets_mean": 3425.5,
|
|
"valid_targets_min": 1527
|
|
},
|
|
{
|
|
"epoch": 6.3036565977742445,
|
|
"grad_norm": 0.7082065854401345,
|
|
"learning_rate": 1.199527117517727e-06,
|
|
"loss": 0.2675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14800715446472168,
|
|
"step": 3965,
|
|
"valid_targets_mean": 4011.6,
|
|
"valid_targets_min": 2192
|
|
},
|
|
{
|
|
"epoch": 6.3116057233704295,
|
|
"grad_norm": 0.7063750210585346,
|
|
"learning_rate": 1.172627332297076e-06,
|
|
"loss": 0.2678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14464861154556274,
|
|
"step": 3970,
|
|
"valid_targets_mean": 3535.2,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 6.319554848966614,
|
|
"grad_norm": 0.6550942927823205,
|
|
"learning_rate": 1.1460234845499763e-06,
|
|
"loss": 0.2652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13053598999977112,
|
|
"step": 3975,
|
|
"valid_targets_mean": 3546.8,
|
|
"valid_targets_min": 2419
|
|
},
|
|
{
|
|
"epoch": 6.327503974562799,
|
|
"grad_norm": 0.6546948304176295,
|
|
"learning_rate": 1.1197159924481804e-06,
|
|
"loss": 0.2436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15126483142375946,
|
|
"step": 3980,
|
|
"valid_targets_mean": 4587.4,
|
|
"valid_targets_min": 1636
|
|
},
|
|
{
|
|
"epoch": 6.335453100158983,
|
|
"grad_norm": 0.6264782815428047,
|
|
"learning_rate": 1.0937052695051965e-06,
|
|
"loss": 0.2449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1402762234210968,
|
|
"step": 3985,
|
|
"valid_targets_mean": 3570.0,
|
|
"valid_targets_min": 1712
|
|
},
|
|
{
|
|
"epoch": 6.343402225755167,
|
|
"grad_norm": 0.747138421235608,
|
|
"learning_rate": 1.067991724569759e-06,
|
|
"loss": 0.249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1327006220817566,
|
|
"step": 3990,
|
|
"valid_targets_mean": 3280.8,
|
|
"valid_targets_min": 1486
|
|
},
|
|
{
|
|
"epoch": 6.351351351351352,
|
|
"grad_norm": 0.6210711182879401,
|
|
"learning_rate": 1.0425757618194265e-06,
|
|
"loss": 0.2651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1449778825044632,
|
|
"step": 3995,
|
|
"valid_targets_mean": 5024.1,
|
|
"valid_targets_min": 1579
|
|
},
|
|
{
|
|
"epoch": 6.359300476947536,
|
|
"grad_norm": 0.6188552497362102,
|
|
"learning_rate": 1.0174577807542273e-06,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11441271007061005,
|
|
"step": 4000,
|
|
"valid_targets_mean": 4404.4,
|
|
"valid_targets_min": 1889
|
|
},
|
|
{
|
|
"epoch": 6.36724960254372,
|
|
"grad_norm": 0.6300798287287166,
|
|
"learning_rate": 9.926381761903614e-07,
|
|
"loss": 0.2633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09247942268848419,
|
|
"step": 4005,
|
|
"valid_targets_mean": 3308.5,
|
|
"valid_targets_min": 1430
|
|
},
|
|
{
|
|
"epoch": 6.375198728139905,
|
|
"grad_norm": 1.362687581454496,
|
|
"learning_rate": 9.681173382540177e-07,
|
|
"loss": 0.2561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10018517076969147,
|
|
"step": 4010,
|
|
"valid_targets_mean": 3482.8,
|
|
"valid_targets_min": 1090
|
|
},
|
|
{
|
|
"epoch": 6.383147853736089,
|
|
"grad_norm": 0.6831346615194489,
|
|
"learning_rate": 9.438956523752263e-07,
|
|
"loss": 0.3112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07794622331857681,
|
|
"step": 4015,
|
|
"valid_targets_mean": 1851.9,
|
|
"valid_targets_min": 1390
|
|
},
|
|
{
|
|
"epoch": 6.391096979332273,
|
|
"grad_norm": 0.7121049870787614,
|
|
"learning_rate": 9.199734992818099e-07,
|
|
"loss": 0.2597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11453884840011597,
|
|
"step": 4020,
|
|
"valid_targets_mean": 2835.8,
|
|
"valid_targets_min": 1677
|
|
},
|
|
{
|
|
"epoch": 6.399046104928458,
|
|
"grad_norm": 0.6765624896767553,
|
|
"learning_rate": 8.963512549933795e-07,
|
|
"loss": 0.2535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1173640638589859,
|
|
"step": 4025,
|
|
"valid_targets_mean": 3554.0,
|
|
"valid_targets_min": 1265
|
|
},
|
|
{
|
|
"epoch": 6.406995230524642,
|
|
"grad_norm": 0.8437670760276504,
|
|
"learning_rate": 8.730292908154614e-07,
|
|
"loss": 0.2846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16551662981510162,
|
|
"step": 4030,
|
|
"valid_targets_mean": 2974.4,
|
|
"valid_targets_min": 1193
|
|
},
|
|
{
|
|
"epoch": 6.414944356120826,
|
|
"grad_norm": 0.6100286442916071,
|
|
"learning_rate": 8.500079733336175e-07,
|
|
"loss": 0.2604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1291399598121643,
|
|
"step": 4035,
|
|
"valid_targets_mean": 5195.0,
|
|
"valid_targets_min": 1869
|
|
},
|
|
{
|
|
"epoch": 6.422893481717011,
|
|
"grad_norm": 0.6866249809469398,
|
|
"learning_rate": 8.272876644077188e-07,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12931175529956818,
|
|
"step": 4040,
|
|
"valid_targets_mean": 3271.2,
|
|
"valid_targets_min": 1875
|
|
},
|
|
{
|
|
"epoch": 6.4308426073131955,
|
|
"grad_norm": 0.5661552597339222,
|
|
"learning_rate": 8.048687211662343e-07,
|
|
"loss": 0.2487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.153263121843338,
|
|
"step": 4045,
|
|
"valid_targets_mean": 5780.9,
|
|
"valid_targets_min": 2144
|
|
},
|
|
{
|
|
"epoch": 6.43879173290938,
|
|
"grad_norm": 0.6387258372193202,
|
|
"learning_rate": 7.827514960006266e-07,
|
|
"loss": 0.2611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10343979299068451,
|
|
"step": 4050,
|
|
"valid_targets_mean": 3239.4,
|
|
"valid_targets_min": 1128
|
|
},
|
|
{
|
|
"epoch": 6.4467408585055646,
|
|
"grad_norm": 0.6388915527688235,
|
|
"learning_rate": 7.609363365598165e-07,
|
|
"loss": 0.2624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12444375455379486,
|
|
"step": 4055,
|
|
"valid_targets_mean": 3098.8,
|
|
"valid_targets_min": 975
|
|
},
|
|
{
|
|
"epoch": 6.454689984101749,
|
|
"grad_norm": 0.6083124013534026,
|
|
"learning_rate": 7.394235857447119e-07,
|
|
"loss": 0.2676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12032529711723328,
|
|
"step": 4060,
|
|
"valid_targets_mean": 4858.6,
|
|
"valid_targets_min": 2298
|
|
},
|
|
{
|
|
"epoch": 6.462639109697934,
|
|
"grad_norm": 0.6609437621187715,
|
|
"learning_rate": 7.182135817028157e-07,
|
|
"loss": 0.2555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11521437764167786,
|
|
"step": 4065,
|
|
"valid_targets_mean": 3493.6,
|
|
"valid_targets_min": 1429
|
|
},
|
|
{
|
|
"epoch": 6.470588235294118,
|
|
"grad_norm": 0.6316746809057769,
|
|
"learning_rate": 6.973066578229248e-07,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14546751976013184,
|
|
"step": 4070,
|
|
"valid_targets_mean": 4404.6,
|
|
"valid_targets_min": 1234
|
|
},
|
|
{
|
|
"epoch": 6.478537360890302,
|
|
"grad_norm": 0.7172930168462953,
|
|
"learning_rate": 6.767031427298687e-07,
|
|
"loss": 0.2573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11338971555233002,
|
|
"step": 4075,
|
|
"valid_targets_mean": 2748.1,
|
|
"valid_targets_min": 1167
|
|
},
|
|
{
|
|
"epoch": 6.486486486486487,
|
|
"grad_norm": 0.6702834310305704,
|
|
"learning_rate": 6.564033602793584e-07,
|
|
"loss": 0.2866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1593889743089676,
|
|
"step": 4080,
|
|
"valid_targets_mean": 3828.1,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 6.494435612082671,
|
|
"grad_norm": 0.7636084971646036,
|
|
"learning_rate": 6.364076295529042e-07,
|
|
"loss": 0.2857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1407301425933838,
|
|
"step": 4085,
|
|
"valid_targets_mean": 2835.1,
|
|
"valid_targets_min": 1483
|
|
},
|
|
{
|
|
"epoch": 6.502384737678855,
|
|
"grad_norm": 0.5204049232995884,
|
|
"learning_rate": 6.167162648527703e-07,
|
|
"loss": 0.2597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11776573210954666,
|
|
"step": 4090,
|
|
"valid_targets_mean": 5098.1,
|
|
"valid_targets_min": 1337
|
|
},
|
|
{
|
|
"epoch": 6.51033386327504,
|
|
"grad_norm": 0.659126230270788,
|
|
"learning_rate": 5.973295756970653e-07,
|
|
"loss": 0.2582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1443343460559845,
|
|
"step": 4095,
|
|
"valid_targets_mean": 3919.9,
|
|
"valid_targets_min": 1489
|
|
},
|
|
{
|
|
"epoch": 6.518282988871224,
|
|
"grad_norm": 0.7625519894518338,
|
|
"learning_rate": 5.782478668148672e-07,
|
|
"loss": 0.2872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15620967745780945,
|
|
"step": 4100,
|
|
"valid_targets_mean": 2918.2,
|
|
"valid_targets_min": 1368
|
|
},
|
|
{
|
|
"epoch": 6.526232114467408,
|
|
"grad_norm": 0.7510828994035491,
|
|
"learning_rate": 5.59471438141419e-07,
|
|
"loss": 0.2493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16122432053089142,
|
|
"step": 4105,
|
|
"valid_targets_mean": 3856.6,
|
|
"valid_targets_min": 1224
|
|
},
|
|
{
|
|
"epoch": 6.534181240063593,
|
|
"grad_norm": 0.6559112507896808,
|
|
"learning_rate": 5.410005848134315e-07,
|
|
"loss": 0.2591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1771281659603119,
|
|
"step": 4110,
|
|
"valid_targets_mean": 4124.9,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 6.542130365659777,
|
|
"grad_norm": 0.6637890471464966,
|
|
"learning_rate": 5.228355971644461e-07,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1684686839580536,
|
|
"step": 4115,
|
|
"valid_targets_mean": 4761.1,
|
|
"valid_targets_min": 1726
|
|
},
|
|
{
|
|
"epoch": 6.550079491255962,
|
|
"grad_norm": 1.1785597077015122,
|
|
"learning_rate": 5.049767607202549e-07,
|
|
"loss": 0.2739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14000050723552704,
|
|
"step": 4120,
|
|
"valid_targets_mean": 2742.0,
|
|
"valid_targets_min": 1381
|
|
},
|
|
{
|
|
"epoch": 6.558028616852146,
|
|
"grad_norm": 0.622753601477571,
|
|
"learning_rate": 4.874243561944214e-07,
|
|
"loss": 0.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1378781944513321,
|
|
"step": 4125,
|
|
"valid_targets_mean": 4404.8,
|
|
"valid_targets_min": 1309
|
|
},
|
|
{
|
|
"epoch": 6.5659777424483305,
|
|
"grad_norm": 0.6713301598548361,
|
|
"learning_rate": 4.701786594838753e-07,
|
|
"loss": 0.2439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12808442115783691,
|
|
"step": 4130,
|
|
"valid_targets_mean": 4464.5,
|
|
"valid_targets_min": 1819
|
|
},
|
|
{
|
|
"epoch": 6.573926868044515,
|
|
"grad_norm": 0.637011334092718,
|
|
"learning_rate": 4.532399416645694e-07,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09397947043180466,
|
|
"step": 4135,
|
|
"valid_targets_mean": 3145.8,
|
|
"valid_targets_min": 1509
|
|
},
|
|
{
|
|
"epoch": 6.5818759936407,
|
|
"grad_norm": 0.6003423173293511,
|
|
"learning_rate": 4.366084689872074e-07,
|
|
"loss": 0.2637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10146000981330872,
|
|
"step": 4140,
|
|
"valid_targets_mean": 3782.2,
|
|
"valid_targets_min": 2330
|
|
},
|
|
{
|
|
"epoch": 6.589825119236884,
|
|
"grad_norm": 0.7131527150428909,
|
|
"learning_rate": 4.202845028730829e-07,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0908747985959053,
|
|
"step": 4145,
|
|
"valid_targets_mean": 2267.4,
|
|
"valid_targets_min": 972
|
|
},
|
|
{
|
|
"epoch": 6.597774244833069,
|
|
"grad_norm": 0.6515984511813347,
|
|
"learning_rate": 4.0426829990994677e-07,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08831249177455902,
|
|
"step": 4150,
|
|
"valid_targets_mean": 2808.2,
|
|
"valid_targets_min": 1647
|
|
},
|
|
{
|
|
"epoch": 6.605723370429253,
|
|
"grad_norm": 0.6140680037883213,
|
|
"learning_rate": 3.885601118479909e-07,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12384349852800369,
|
|
"step": 4155,
|
|
"valid_targets_mean": 4263.8,
|
|
"valid_targets_min": 1346
|
|
},
|
|
{
|
|
"epoch": 6.613672496025437,
|
|
"grad_norm": 0.7207247102823364,
|
|
"learning_rate": 3.731601855958844e-07,
|
|
"loss": 0.246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16912327706813812,
|
|
"step": 4160,
|
|
"valid_targets_mean": 3764.4,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 6.621621621621622,
|
|
"grad_norm": 0.6249110574062943,
|
|
"learning_rate": 3.5806876321688553e-07,
|
|
"loss": 0.2607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12320230901241302,
|
|
"step": 4165,
|
|
"valid_targets_mean": 3194.6,
|
|
"valid_targets_min": 1587
|
|
},
|
|
{
|
|
"epoch": 6.629570747217806,
|
|
"grad_norm": 0.6521351929097156,
|
|
"learning_rate": 3.4328608192505164e-07,
|
|
"loss": 0.2432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.138484925031662,
|
|
"step": 4170,
|
|
"valid_targets_mean": 4601.6,
|
|
"valid_targets_min": 1391
|
|
},
|
|
{
|
|
"epoch": 6.63751987281399,
|
|
"grad_norm": 0.685219112534581,
|
|
"learning_rate": 3.288123740814997e-07,
|
|
"loss": 0.2819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1558046042919159,
|
|
"step": 4175,
|
|
"valid_targets_mean": 3329.5,
|
|
"valid_targets_min": 1555
|
|
},
|
|
{
|
|
"epoch": 6.645468998410175,
|
|
"grad_norm": 0.615846074004154,
|
|
"learning_rate": 3.1464786719075825e-07,
|
|
"loss": 0.2475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11607696861028671,
|
|
"step": 4180,
|
|
"valid_targets_mean": 4191.2,
|
|
"valid_targets_min": 862
|
|
},
|
|
{
|
|
"epoch": 6.653418124006359,
|
|
"grad_norm": 0.6859865114427361,
|
|
"learning_rate": 3.0079278389719246e-07,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12746399641036987,
|
|
"step": 4185,
|
|
"valid_targets_mean": 3626.9,
|
|
"valid_targets_min": 1332
|
|
},
|
|
{
|
|
"epoch": 6.661367249602543,
|
|
"grad_norm": 0.7080920002666135,
|
|
"learning_rate": 2.8724734198149585e-07,
|
|
"loss": 0.2685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15991079807281494,
|
|
"step": 4190,
|
|
"valid_targets_mean": 3939.0,
|
|
"valid_targets_min": 1316
|
|
},
|
|
{
|
|
"epoch": 6.669316375198728,
|
|
"grad_norm": 0.5727178416783522,
|
|
"learning_rate": 2.7401175435727735e-07,
|
|
"loss": 0.2572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16349464654922485,
|
|
"step": 4195,
|
|
"valid_targets_mean": 5447.4,
|
|
"valid_targets_min": 1346
|
|
},
|
|
{
|
|
"epoch": 6.677265500794912,
|
|
"grad_norm": 0.6139405384409878,
|
|
"learning_rate": 2.61086229067713e-07,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10272794216871262,
|
|
"step": 4200,
|
|
"valid_targets_mean": 3950.8,
|
|
"valid_targets_min": 1769
|
|
},
|
|
{
|
|
"epoch": 6.685214626391097,
|
|
"grad_norm": 0.6571083927900229,
|
|
"learning_rate": 2.4847096928226846e-07,
|
|
"loss": 0.2502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12205368280410767,
|
|
"step": 4205,
|
|
"valid_targets_mean": 3692.2,
|
|
"valid_targets_min": 1510
|
|
},
|
|
{
|
|
"epoch": 6.6931637519872815,
|
|
"grad_norm": 0.5732787816646541,
|
|
"learning_rate": 2.3616617329351499e-07,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1810118556022644,
|
|
"step": 4210,
|
|
"valid_targets_mean": 5591.9,
|
|
"valid_targets_min": 2303
|
|
},
|
|
{
|
|
"epoch": 6.701112877583466,
|
|
"grad_norm": 0.543924649714648,
|
|
"learning_rate": 2.2417203451400749e-07,
|
|
"loss": 0.2614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10553249716758728,
|
|
"step": 4215,
|
|
"valid_targets_mean": 3799.0,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 6.709062003179651,
|
|
"grad_norm": 0.7577087496348476,
|
|
"learning_rate": 2.124887414732424e-07,
|
|
"loss": 0.2603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12939730286598206,
|
|
"step": 4220,
|
|
"valid_targets_mean": 2980.0,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 6.717011128775835,
|
|
"grad_norm": 0.6952073888145993,
|
|
"learning_rate": 2.0111647781470233e-07,
|
|
"loss": 0.2817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15174588561058044,
|
|
"step": 4225,
|
|
"valid_targets_mean": 3962.8,
|
|
"valid_targets_min": 1765
|
|
},
|
|
{
|
|
"epoch": 6.724960254372019,
|
|
"grad_norm": 0.5926539808402443,
|
|
"learning_rate": 1.9005542229295848e-07,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08694517612457275,
|
|
"step": 4230,
|
|
"valid_targets_mean": 3091.0,
|
|
"valid_targets_min": 2228
|
|
},
|
|
{
|
|
"epoch": 6.732909379968204,
|
|
"grad_norm": 0.6544972747028084,
|
|
"learning_rate": 1.793057487708705e-07,
|
|
"loss": 0.2456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10004061460494995,
|
|
"step": 4235,
|
|
"valid_targets_mean": 3069.9,
|
|
"valid_targets_min": 1264
|
|
},
|
|
{
|
|
"epoch": 6.740858505564388,
|
|
"grad_norm": 0.5558739924603033,
|
|
"learning_rate": 1.688676262168465e-07,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09515691548585892,
|
|
"step": 4240,
|
|
"valid_targets_mean": 3552.8,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 6.748807631160572,
|
|
"grad_norm": 0.7997512397380077,
|
|
"learning_rate": 1.5874121870219415e-07,
|
|
"loss": 0.2609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17815861105918884,
|
|
"step": 4245,
|
|
"valid_targets_mean": 2915.9,
|
|
"valid_targets_min": 1301
|
|
},
|
|
{
|
|
"epoch": 6.756756756756757,
|
|
"grad_norm": 0.5840483684426973,
|
|
"learning_rate": 1.4892668539853606e-07,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07563312351703644,
|
|
"step": 4250,
|
|
"valid_targets_mean": 3174.0,
|
|
"valid_targets_min": 1956
|
|
},
|
|
{
|
|
"epoch": 6.764705882352941,
|
|
"grad_norm": 0.7589128604567826,
|
|
"learning_rate": 1.3942418057530714e-07,
|
|
"loss": 0.2595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16448462009429932,
|
|
"step": 4255,
|
|
"valid_targets_mean": 3400.2,
|
|
"valid_targets_min": 1574
|
|
},
|
|
{
|
|
"epoch": 6.772655007949125,
|
|
"grad_norm": 0.6426425257042281,
|
|
"learning_rate": 1.3023385359733687e-07,
|
|
"loss": 0.2558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14567741751670837,
|
|
"step": 4260,
|
|
"valid_targets_mean": 4817.0,
|
|
"valid_targets_min": 2379
|
|
},
|
|
{
|
|
"epoch": 6.78060413354531,
|
|
"grad_norm": 0.7453867870870018,
|
|
"learning_rate": 1.213558489224953e-07,
|
|
"loss": 0.2611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14852960407733917,
|
|
"step": 4265,
|
|
"valid_targets_mean": 3354.2,
|
|
"valid_targets_min": 1531
|
|
},
|
|
{
|
|
"epoch": 6.788553259141494,
|
|
"grad_norm": 0.5920056538975642,
|
|
"learning_rate": 1.1279030609942177e-07,
|
|
"loss": 0.25,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07984557747840881,
|
|
"step": 4270,
|
|
"valid_targets_mean": 3291.8,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 6.796502384737678,
|
|
"grad_norm": 0.5382337426239149,
|
|
"learning_rate": 1.0453735976533985e-07,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17529791593551636,
|
|
"step": 4275,
|
|
"valid_targets_mean": 6676.8,
|
|
"valid_targets_min": 1842
|
|
},
|
|
{
|
|
"epoch": 6.804451510333863,
|
|
"grad_norm": 0.5865414120318847,
|
|
"learning_rate": 9.659713964392358e-08,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1467946469783783,
|
|
"step": 4280,
|
|
"valid_targets_mean": 4501.6,
|
|
"valid_targets_min": 1429
|
|
},
|
|
{
|
|
"epoch": 6.8124006359300475,
|
|
"grad_norm": 0.6980884555646795,
|
|
"learning_rate": 8.896977054328349e-08,
|
|
"loss": 0.2744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13327836990356445,
|
|
"step": 4285,
|
|
"valid_targets_mean": 3997.6,
|
|
"valid_targets_min": 1105
|
|
},
|
|
{
|
|
"epoch": 6.8203497615262325,
|
|
"grad_norm": 0.5891655315602166,
|
|
"learning_rate": 8.165537235398146e-08,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.221988707780838,
|
|
"step": 4290,
|
|
"valid_targets_mean": 5868.2,
|
|
"valid_targets_min": 1329
|
|
},
|
|
{
|
|
"epoch": 6.828298887122417,
|
|
"grad_norm": 0.5738781394482735,
|
|
"learning_rate": 7.465406004715903e-08,
|
|
"loss": 0.2611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1472679078578949,
|
|
"step": 4295,
|
|
"valid_targets_mean": 4938.4,
|
|
"valid_targets_min": 1573
|
|
},
|
|
{
|
|
"epoch": 6.836248012718601,
|
|
"grad_norm": 0.5296391055403549,
|
|
"learning_rate": 6.796594367272535e-08,
|
|
"loss": 0.263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07535900175571442,
|
|
"step": 4300,
|
|
"valid_targets_mean": 2942.2,
|
|
"valid_targets_min": 1197
|
|
},
|
|
{
|
|
"epoch": 6.844197138314786,
|
|
"grad_norm": 0.6628067385365103,
|
|
"learning_rate": 6.159112835763204e-08,
|
|
"loss": 0.2576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11580079793930054,
|
|
"step": 4305,
|
|
"valid_targets_mean": 3296.8,
|
|
"valid_targets_min": 1402
|
|
},
|
|
{
|
|
"epoch": 6.85214626391097,
|
|
"grad_norm": 0.6733703296585642,
|
|
"learning_rate": 5.552971430421439e-08,
|
|
"loss": 0.2658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08777789771556854,
|
|
"step": 4310,
|
|
"valid_targets_mean": 3606.6,
|
|
"valid_targets_min": 1601
|
|
},
|
|
{
|
|
"epoch": 6.860095389507154,
|
|
"grad_norm": 0.6448618451624646,
|
|
"learning_rate": 4.9781796788621605e-08,
|
|
"loss": 0.2535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08481432497501373,
|
|
"step": 4315,
|
|
"valid_targets_mean": 3187.5,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 6.868044515103339,
|
|
"grad_norm": 0.6829035827420316,
|
|
"learning_rate": 4.434746615932018e-08,
|
|
"loss": 0.2549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11991842091083527,
|
|
"step": 4320,
|
|
"valid_targets_mean": 3667.9,
|
|
"valid_targets_min": 1425
|
|
},
|
|
{
|
|
"epoch": 6.875993640699523,
|
|
"grad_norm": 0.5456201731216378,
|
|
"learning_rate": 3.922680783566168e-08,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19996213912963867,
|
|
"step": 4325,
|
|
"valid_targets_mean": 6349.5,
|
|
"valid_targets_min": 1331
|
|
},
|
|
{
|
|
"epoch": 6.883942766295707,
|
|
"grad_norm": 0.5346273564543522,
|
|
"learning_rate": 3.441990230656167e-08,
|
|
"loss": 0.2659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1153978481888771,
|
|
"step": 4330,
|
|
"valid_targets_mean": 4485.6,
|
|
"valid_targets_min": 1449
|
|
},
|
|
{
|
|
"epoch": 6.891891891891892,
|
|
"grad_norm": 0.7910875239159146,
|
|
"learning_rate": 2.992682512921175e-08,
|
|
"loss": 0.261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14991989731788635,
|
|
"step": 4335,
|
|
"valid_targets_mean": 3145.0,
|
|
"valid_targets_min": 1200
|
|
},
|
|
{
|
|
"epoch": 6.899841017488076,
|
|
"grad_norm": 0.6962794960300286,
|
|
"learning_rate": 2.574764692790499e-08,
|
|
"loss": 0.2888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14987902343273163,
|
|
"step": 4340,
|
|
"valid_targets_mean": 4219.9,
|
|
"valid_targets_min": 1043
|
|
},
|
|
{
|
|
"epoch": 6.907790143084261,
|
|
"grad_norm": 0.8348052309661311,
|
|
"learning_rate": 2.188243339292795e-08,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17805957794189453,
|
|
"step": 4345,
|
|
"valid_targets_mean": 4821.9,
|
|
"valid_targets_min": 1355
|
|
},
|
|
{
|
|
"epoch": 6.915739268680445,
|
|
"grad_norm": 0.8011365050178731,
|
|
"learning_rate": 1.8331245279517017e-08,
|
|
"loss": 0.2574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10734917223453522,
|
|
"step": 4350,
|
|
"valid_targets_mean": 3004.9,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 6.923688394276629,
|
|
"grad_norm": 0.7396008217653776,
|
|
"learning_rate": 1.509413840691476e-08,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1494922637939453,
|
|
"step": 4355,
|
|
"valid_targets_mean": 3328.4,
|
|
"valid_targets_min": 1646
|
|
},
|
|
{
|
|
"epoch": 6.9316375198728135,
|
|
"grad_norm": 0.5324346781350666,
|
|
"learning_rate": 1.2171163657481722e-08,
|
|
"loss": 0.2537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12173102796077728,
|
|
"step": 4360,
|
|
"valid_targets_mean": 6169.4,
|
|
"valid_targets_min": 1995
|
|
},
|
|
{
|
|
"epoch": 6.9395866454689985,
|
|
"grad_norm": 0.7997993122307212,
|
|
"learning_rate": 9.562366975910397e-09,
|
|
"loss": 0.2415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14867912232875824,
|
|
"step": 4365,
|
|
"valid_targets_mean": 4667.0,
|
|
"valid_targets_min": 1086
|
|
},
|
|
{
|
|
"epoch": 6.947535771065183,
|
|
"grad_norm": 0.5346814246433383,
|
|
"learning_rate": 7.2677893684880425e-09,
|
|
"loss": 0.2472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08928591012954712,
|
|
"step": 4370,
|
|
"valid_targets_mean": 5167.4,
|
|
"valid_targets_min": 1757
|
|
},
|
|
{
|
|
"epoch": 6.955484896661368,
|
|
"grad_norm": 0.625565158873511,
|
|
"learning_rate": 5.2874669024616246e-09,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07153826951980591,
|
|
"step": 4375,
|
|
"valid_targets_mean": 2804.5,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 6.963434022257552,
|
|
"grad_norm": 0.646911461822627,
|
|
"learning_rate": 3.621430705467166e-09,
|
|
"loss": 0.2524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13823717832565308,
|
|
"step": 4380,
|
|
"valid_targets_mean": 4841.5,
|
|
"valid_targets_min": 1673
|
|
},
|
|
{
|
|
"epoch": 6.971383147853736,
|
|
"grad_norm": 0.7049914013624015,
|
|
"learning_rate": 2.2697069650456927e-09,
|
|
"loss": 0.2738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15039952099323273,
|
|
"step": 4385,
|
|
"valid_targets_mean": 3679.5,
|
|
"valid_targets_min": 1068
|
|
},
|
|
{
|
|
"epoch": 6.979332273449921,
|
|
"grad_norm": 0.6793035439771063,
|
|
"learning_rate": 1.2323169282257852e-09,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12276875972747803,
|
|
"step": 4390,
|
|
"valid_targets_mean": 4009.4,
|
|
"valid_targets_min": 1424
|
|
},
|
|
{
|
|
"epoch": 6.987281399046105,
|
|
"grad_norm": 0.7984508069602944,
|
|
"learning_rate": 5.092769011860732e-10,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.176311194896698,
|
|
"step": 4395,
|
|
"valid_targets_mean": 3764.0,
|
|
"valid_targets_min": 1061
|
|
},
|
|
{
|
|
"epoch": 6.995230524642289,
|
|
"grad_norm": 0.545472641111332,
|
|
"learning_rate": 1.0059824901098581e-10,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15308044850826263,
|
|
"step": 4400,
|
|
"valid_targets_mean": 6153.9,
|
|
"valid_targets_min": 1476
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14714080095291138,
|
|
"step": 4403,
|
|
"total_flos": 1.2559500582227804e+18,
|
|
"train_loss": 0.328229405380178,
|
|
"train_runtime": 48656.4652,
|
|
"train_samples_per_second": 1.448,
|
|
"train_steps_per_second": 0.09,
|
|
"valid_targets_mean": 2880.2,
|
|
"valid_targets_min": 1091
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4403,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1.2559500582227804e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|