9750 lines
271 KiB
JSON
9750 lines
271 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4410,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.007936507936507936,
|
|
"grad_norm": 35.8824941563582,
|
|
"learning_rate": 3.6281179138322e-07,
|
|
"loss": 0.9521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.9736891984939575,
|
|
"step": 5,
|
|
"valid_targets_mean": 6535.6,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 0.015873015873015872,
|
|
"grad_norm": 33.936900101725655,
|
|
"learning_rate": 8.163265306122449e-07,
|
|
"loss": 0.9331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.9417179822921753,
|
|
"step": 10,
|
|
"valid_targets_mean": 6796.2,
|
|
"valid_targets_min": 5538
|
|
},
|
|
{
|
|
"epoch": 0.023809523809523808,
|
|
"grad_norm": 31.15476640458832,
|
|
"learning_rate": 1.26984126984127e-06,
|
|
"loss": 0.9074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.9372260570526123,
|
|
"step": 15,
|
|
"valid_targets_mean": 6559.7,
|
|
"valid_targets_min": 5758
|
|
},
|
|
{
|
|
"epoch": 0.031746031746031744,
|
|
"grad_norm": 22.354957251077515,
|
|
"learning_rate": 1.723356009070295e-06,
|
|
"loss": 0.8451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8422178626060486,
|
|
"step": 20,
|
|
"valid_targets_mean": 6376.2,
|
|
"valid_targets_min": 939
|
|
},
|
|
{
|
|
"epoch": 0.03968253968253968,
|
|
"grad_norm": 8.841462484334293,
|
|
"learning_rate": 2.17687074829932e-06,
|
|
"loss": 0.7665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.697857677936554,
|
|
"step": 25,
|
|
"valid_targets_mean": 7884.8,
|
|
"valid_targets_min": 6578
|
|
},
|
|
{
|
|
"epoch": 0.047619047619047616,
|
|
"grad_norm": 4.6597594237729565,
|
|
"learning_rate": 2.6303854875283447e-06,
|
|
"loss": 0.6856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.692092776298523,
|
|
"step": 30,
|
|
"valid_targets_mean": 6484.9,
|
|
"valid_targets_min": 3405
|
|
},
|
|
{
|
|
"epoch": 0.05555555555555555,
|
|
"grad_norm": 2.6525932414308153,
|
|
"learning_rate": 3.08390022675737e-06,
|
|
"loss": 0.6668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6395765542984009,
|
|
"step": 35,
|
|
"valid_targets_mean": 6628.9,
|
|
"valid_targets_min": 5732
|
|
},
|
|
{
|
|
"epoch": 0.06349206349206349,
|
|
"grad_norm": 1.903204395651839,
|
|
"learning_rate": 3.537414965986395e-06,
|
|
"loss": 0.6301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.636085033416748,
|
|
"step": 40,
|
|
"valid_targets_mean": 6323.1,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 0.07142857142857142,
|
|
"grad_norm": 1.5709387215231831,
|
|
"learning_rate": 3.99092970521542e-06,
|
|
"loss": 0.5935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5842190980911255,
|
|
"step": 45,
|
|
"valid_targets_mean": 6537.0,
|
|
"valid_targets_min": 947
|
|
},
|
|
{
|
|
"epoch": 0.07936507936507936,
|
|
"grad_norm": 1.334076140403372,
|
|
"learning_rate": 4.444444444444444e-06,
|
|
"loss": 0.5962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6062476634979248,
|
|
"step": 50,
|
|
"valid_targets_mean": 7068.4,
|
|
"valid_targets_min": 5512
|
|
},
|
|
{
|
|
"epoch": 0.0873015873015873,
|
|
"grad_norm": 1.0110669760042539,
|
|
"learning_rate": 4.897959183673469e-06,
|
|
"loss": 0.572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5384228825569153,
|
|
"step": 55,
|
|
"valid_targets_mean": 6413.4,
|
|
"valid_targets_min": 5812
|
|
},
|
|
{
|
|
"epoch": 0.09523809523809523,
|
|
"grad_norm": 0.7085721032612825,
|
|
"learning_rate": 5.3514739229024945e-06,
|
|
"loss": 0.5371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5110308527946472,
|
|
"step": 60,
|
|
"valid_targets_mean": 7840.6,
|
|
"valid_targets_min": 6116
|
|
},
|
|
{
|
|
"epoch": 0.10317460317460317,
|
|
"grad_norm": 0.7111464076972573,
|
|
"learning_rate": 5.80498866213152e-06,
|
|
"loss": 0.5277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5348063707351685,
|
|
"step": 65,
|
|
"valid_targets_mean": 6505.1,
|
|
"valid_targets_min": 5178
|
|
},
|
|
{
|
|
"epoch": 0.1111111111111111,
|
|
"grad_norm": 0.6235427058905633,
|
|
"learning_rate": 6.258503401360545e-06,
|
|
"loss": 0.5102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5149164199829102,
|
|
"step": 70,
|
|
"valid_targets_mean": 6872.4,
|
|
"valid_targets_min": 5965
|
|
},
|
|
{
|
|
"epoch": 0.11904761904761904,
|
|
"grad_norm": 0.5850690179875717,
|
|
"learning_rate": 6.71201814058957e-06,
|
|
"loss": 0.5007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48702484369277954,
|
|
"step": 75,
|
|
"valid_targets_mean": 6693.8,
|
|
"valid_targets_min": 6029
|
|
},
|
|
{
|
|
"epoch": 0.12698412698412698,
|
|
"grad_norm": 0.5792817101805444,
|
|
"learning_rate": 7.165532879818595e-06,
|
|
"loss": 0.4853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4884127080440521,
|
|
"step": 80,
|
|
"valid_targets_mean": 6913.5,
|
|
"valid_targets_min": 6387
|
|
},
|
|
{
|
|
"epoch": 0.1349206349206349,
|
|
"grad_norm": 0.5502313966123876,
|
|
"learning_rate": 7.61904761904762e-06,
|
|
"loss": 0.4609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4458758234977722,
|
|
"step": 85,
|
|
"valid_targets_mean": 5938.1,
|
|
"valid_targets_min": 565
|
|
},
|
|
{
|
|
"epoch": 0.14285714285714285,
|
|
"grad_norm": 0.4639384261828613,
|
|
"learning_rate": 8.072562358276645e-06,
|
|
"loss": 0.4583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4457317292690277,
|
|
"step": 90,
|
|
"valid_targets_mean": 7750.4,
|
|
"valid_targets_min": 5595
|
|
},
|
|
{
|
|
"epoch": 0.15079365079365079,
|
|
"grad_norm": 0.49109095131560493,
|
|
"learning_rate": 8.52607709750567e-06,
|
|
"loss": 0.445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45213085412979126,
|
|
"step": 95,
|
|
"valid_targets_mean": 6817.5,
|
|
"valid_targets_min": 5992
|
|
},
|
|
{
|
|
"epoch": 0.15873015873015872,
|
|
"grad_norm": 0.4824867838432047,
|
|
"learning_rate": 8.979591836734695e-06,
|
|
"loss": 0.4215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4347403943538666,
|
|
"step": 100,
|
|
"valid_targets_mean": 6667.3,
|
|
"valid_targets_min": 6109
|
|
},
|
|
{
|
|
"epoch": 0.16666666666666666,
|
|
"grad_norm": 0.539034076724007,
|
|
"learning_rate": 9.43310657596372e-06,
|
|
"loss": 0.4209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4464046061038971,
|
|
"step": 105,
|
|
"valid_targets_mean": 6335.9,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 0.1746031746031746,
|
|
"grad_norm": 0.4256954018741094,
|
|
"learning_rate": 9.886621315192746e-06,
|
|
"loss": 0.4035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3975144028663635,
|
|
"step": 110,
|
|
"valid_targets_mean": 7686.5,
|
|
"valid_targets_min": 6124
|
|
},
|
|
{
|
|
"epoch": 0.18253968253968253,
|
|
"grad_norm": 0.4873190142154289,
|
|
"learning_rate": 1.034013605442177e-05,
|
|
"loss": 0.406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42707347869873047,
|
|
"step": 115,
|
|
"valid_targets_mean": 6758.6,
|
|
"valid_targets_min": 4954
|
|
},
|
|
{
|
|
"epoch": 0.19047619047619047,
|
|
"grad_norm": 0.5443344225035132,
|
|
"learning_rate": 1.0793650793650794e-05,
|
|
"loss": 0.399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3811206817626953,
|
|
"step": 120,
|
|
"valid_targets_mean": 6051.8,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 0.1984126984126984,
|
|
"grad_norm": 0.542581652335422,
|
|
"learning_rate": 1.124716553287982e-05,
|
|
"loss": 0.3777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36813944578170776,
|
|
"step": 125,
|
|
"valid_targets_mean": 7682.5,
|
|
"valid_targets_min": 6241
|
|
},
|
|
{
|
|
"epoch": 0.20634920634920634,
|
|
"grad_norm": 0.4370980951626029,
|
|
"learning_rate": 1.1700680272108845e-05,
|
|
"loss": 0.3897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3534102439880371,
|
|
"step": 130,
|
|
"valid_targets_mean": 7263.8,
|
|
"valid_targets_min": 5744
|
|
},
|
|
{
|
|
"epoch": 0.21428571428571427,
|
|
"grad_norm": 0.493282317336445,
|
|
"learning_rate": 1.215419501133787e-05,
|
|
"loss": 0.389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3786100149154663,
|
|
"step": 135,
|
|
"valid_targets_mean": 6587.2,
|
|
"valid_targets_min": 5949
|
|
},
|
|
{
|
|
"epoch": 0.2222222222222222,
|
|
"grad_norm": 0.4887639280574258,
|
|
"learning_rate": 1.2607709750566895e-05,
|
|
"loss": 0.3841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37781333923339844,
|
|
"step": 140,
|
|
"valid_targets_mean": 7089.0,
|
|
"valid_targets_min": 5547
|
|
},
|
|
{
|
|
"epoch": 0.23015873015873015,
|
|
"grad_norm": 0.4811993388013683,
|
|
"learning_rate": 1.3061224489795918e-05,
|
|
"loss": 0.3702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35799360275268555,
|
|
"step": 145,
|
|
"valid_targets_mean": 7099.6,
|
|
"valid_targets_min": 1712
|
|
},
|
|
{
|
|
"epoch": 0.23809523809523808,
|
|
"grad_norm": 0.5511708598233128,
|
|
"learning_rate": 1.3514739229024945e-05,
|
|
"loss": 0.3688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3696318566799164,
|
|
"step": 150,
|
|
"valid_targets_mean": 6672.1,
|
|
"valid_targets_min": 5910
|
|
},
|
|
{
|
|
"epoch": 0.24603174603174602,
|
|
"grad_norm": 0.5021289602672357,
|
|
"learning_rate": 1.3968253968253968e-05,
|
|
"loss": 0.3703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35513922572135925,
|
|
"step": 155,
|
|
"valid_targets_mean": 6102.7,
|
|
"valid_targets_min": 656
|
|
},
|
|
{
|
|
"epoch": 0.25396825396825395,
|
|
"grad_norm": 0.5445876342846555,
|
|
"learning_rate": 1.4421768707482994e-05,
|
|
"loss": 0.3697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37766435742378235,
|
|
"step": 160,
|
|
"valid_targets_mean": 6542.9,
|
|
"valid_targets_min": 3573
|
|
},
|
|
{
|
|
"epoch": 0.2619047619047619,
|
|
"grad_norm": 0.5463745410256283,
|
|
"learning_rate": 1.4875283446712018e-05,
|
|
"loss": 0.3666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38423383235931396,
|
|
"step": 165,
|
|
"valid_targets_mean": 7310.2,
|
|
"valid_targets_min": 5898
|
|
},
|
|
{
|
|
"epoch": 0.2698412698412698,
|
|
"grad_norm": 0.592494771957809,
|
|
"learning_rate": 1.5328798185941044e-05,
|
|
"loss": 0.3536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36160731315612793,
|
|
"step": 170,
|
|
"valid_targets_mean": 6889.7,
|
|
"valid_targets_min": 5885
|
|
},
|
|
{
|
|
"epoch": 0.2777777777777778,
|
|
"grad_norm": 0.5390386586186318,
|
|
"learning_rate": 1.578231292517007e-05,
|
|
"loss": 0.3685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3562218248844147,
|
|
"step": 175,
|
|
"valid_targets_mean": 6707.0,
|
|
"valid_targets_min": 6175
|
|
},
|
|
{
|
|
"epoch": 0.2857142857142857,
|
|
"grad_norm": 0.5200117007946871,
|
|
"learning_rate": 1.6235827664399097e-05,
|
|
"loss": 0.3587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3668617606163025,
|
|
"step": 180,
|
|
"valid_targets_mean": 7726.2,
|
|
"valid_targets_min": 5692
|
|
},
|
|
{
|
|
"epoch": 0.29365079365079366,
|
|
"grad_norm": 0.5252588806851979,
|
|
"learning_rate": 1.668934240362812e-05,
|
|
"loss": 0.3563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32099083065986633,
|
|
"step": 185,
|
|
"valid_targets_mean": 6818.9,
|
|
"valid_targets_min": 6249
|
|
},
|
|
{
|
|
"epoch": 0.30158730158730157,
|
|
"grad_norm": 0.596666579038154,
|
|
"learning_rate": 1.7142857142857142e-05,
|
|
"loss": 0.3623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36263060569763184,
|
|
"step": 190,
|
|
"valid_targets_mean": 6827.2,
|
|
"valid_targets_min": 5774
|
|
},
|
|
{
|
|
"epoch": 0.30952380952380953,
|
|
"grad_norm": 0.5882090867488947,
|
|
"learning_rate": 1.759637188208617e-05,
|
|
"loss": 0.348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3262217938899994,
|
|
"step": 195,
|
|
"valid_targets_mean": 6733.4,
|
|
"valid_targets_min": 6041
|
|
},
|
|
{
|
|
"epoch": 0.31746031746031744,
|
|
"grad_norm": 0.5952439927713156,
|
|
"learning_rate": 1.8049886621315194e-05,
|
|
"loss": 0.3581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35430148243904114,
|
|
"step": 200,
|
|
"valid_targets_mean": 6628.1,
|
|
"valid_targets_min": 6206
|
|
},
|
|
{
|
|
"epoch": 0.3253968253968254,
|
|
"grad_norm": 0.6375671955113913,
|
|
"learning_rate": 1.8503401360544218e-05,
|
|
"loss": 0.3324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3298405408859253,
|
|
"step": 205,
|
|
"valid_targets_mean": 6534.8,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 0.3333333333333333,
|
|
"grad_norm": 0.538719917257461,
|
|
"learning_rate": 1.8956916099773243e-05,
|
|
"loss": 0.3588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35709890723228455,
|
|
"step": 210,
|
|
"valid_targets_mean": 6494.2,
|
|
"valid_targets_min": 1491
|
|
},
|
|
{
|
|
"epoch": 0.3412698412698413,
|
|
"grad_norm": 0.6203521755561048,
|
|
"learning_rate": 1.941043083900227e-05,
|
|
"loss": 0.3488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.380302369594574,
|
|
"step": 215,
|
|
"valid_targets_mean": 6119.2,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 0.3492063492063492,
|
|
"grad_norm": 0.6234527710298925,
|
|
"learning_rate": 1.9863945578231295e-05,
|
|
"loss": 0.3472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3519989252090454,
|
|
"step": 220,
|
|
"valid_targets_mean": 6565.7,
|
|
"valid_targets_min": 4425
|
|
},
|
|
{
|
|
"epoch": 0.35714285714285715,
|
|
"grad_norm": 0.5748902140726773,
|
|
"learning_rate": 2.031746031746032e-05,
|
|
"loss": 0.3371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33353596925735474,
|
|
"step": 225,
|
|
"valid_targets_mean": 6648.9,
|
|
"valid_targets_min": 6033
|
|
},
|
|
{
|
|
"epoch": 0.36507936507936506,
|
|
"grad_norm": 0.6094053908431469,
|
|
"learning_rate": 2.0770975056689343e-05,
|
|
"loss": 0.354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37474408745765686,
|
|
"step": 230,
|
|
"valid_targets_mean": 6882.2,
|
|
"valid_targets_min": 6147
|
|
},
|
|
{
|
|
"epoch": 0.373015873015873,
|
|
"grad_norm": 0.6362268216319785,
|
|
"learning_rate": 2.122448979591837e-05,
|
|
"loss": 0.3442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3402664065361023,
|
|
"step": 235,
|
|
"valid_targets_mean": 6255.7,
|
|
"valid_targets_min": 903
|
|
},
|
|
{
|
|
"epoch": 0.38095238095238093,
|
|
"grad_norm": 0.6579059619031435,
|
|
"learning_rate": 2.1678004535147395e-05,
|
|
"loss": 0.3397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35843122005462646,
|
|
"step": 240,
|
|
"valid_targets_mean": 6295.4,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 0.3888888888888889,
|
|
"grad_norm": 0.5459465058306819,
|
|
"learning_rate": 2.213151927437642e-05,
|
|
"loss": 0.3471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33601340651512146,
|
|
"step": 245,
|
|
"valid_targets_mean": 6851.4,
|
|
"valid_targets_min": 6258
|
|
},
|
|
{
|
|
"epoch": 0.3968253968253968,
|
|
"grad_norm": 0.5246470913959492,
|
|
"learning_rate": 2.2585034013605444e-05,
|
|
"loss": 0.3376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.334672749042511,
|
|
"step": 250,
|
|
"valid_targets_mean": 7279.5,
|
|
"valid_targets_min": 5739
|
|
},
|
|
{
|
|
"epoch": 0.40476190476190477,
|
|
"grad_norm": 0.5805769184142227,
|
|
"learning_rate": 2.3038548752834472e-05,
|
|
"loss": 0.3408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3581482768058777,
|
|
"step": 255,
|
|
"valid_targets_mean": 5900.3,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 0.4126984126984127,
|
|
"grad_norm": 0.6737649650084032,
|
|
"learning_rate": 2.3492063492063496e-05,
|
|
"loss": 0.3403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3589943051338196,
|
|
"step": 260,
|
|
"valid_targets_mean": 6782.2,
|
|
"valid_targets_min": 5638
|
|
},
|
|
{
|
|
"epoch": 0.42063492063492064,
|
|
"grad_norm": 0.6138844037818817,
|
|
"learning_rate": 2.394557823129252e-05,
|
|
"loss": 0.3277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35277605056762695,
|
|
"step": 265,
|
|
"valid_targets_mean": 6988.1,
|
|
"valid_targets_min": 6208
|
|
},
|
|
{
|
|
"epoch": 0.42857142857142855,
|
|
"grad_norm": 0.5754437415575964,
|
|
"learning_rate": 2.439909297052154e-05,
|
|
"loss": 0.3379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34155869483947754,
|
|
"step": 270,
|
|
"valid_targets_mean": 6846.4,
|
|
"valid_targets_min": 5696
|
|
},
|
|
{
|
|
"epoch": 0.4365079365079365,
|
|
"grad_norm": 0.6158385657235128,
|
|
"learning_rate": 2.4852607709750566e-05,
|
|
"loss": 0.344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.344745934009552,
|
|
"step": 275,
|
|
"valid_targets_mean": 7821.5,
|
|
"valid_targets_min": 6125
|
|
},
|
|
{
|
|
"epoch": 0.4444444444444444,
|
|
"grad_norm": 0.5294900289797029,
|
|
"learning_rate": 2.5306122448979597e-05,
|
|
"loss": 0.3283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30480125546455383,
|
|
"step": 280,
|
|
"valid_targets_mean": 7686.9,
|
|
"valid_targets_min": 5977
|
|
},
|
|
{
|
|
"epoch": 0.4523809523809524,
|
|
"grad_norm": 0.5886195150856317,
|
|
"learning_rate": 2.5759637188208618e-05,
|
|
"loss": 0.3427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36157119274139404,
|
|
"step": 285,
|
|
"valid_targets_mean": 6278.1,
|
|
"valid_targets_min": 1050
|
|
},
|
|
{
|
|
"epoch": 0.4603174603174603,
|
|
"grad_norm": 0.5321016812456312,
|
|
"learning_rate": 2.6213151927437642e-05,
|
|
"loss": 0.3395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.347115159034729,
|
|
"step": 290,
|
|
"valid_targets_mean": 7779.2,
|
|
"valid_targets_min": 6065
|
|
},
|
|
{
|
|
"epoch": 0.46825396825396826,
|
|
"grad_norm": 0.5534350903592119,
|
|
"learning_rate": 2.6666666666666667e-05,
|
|
"loss": 0.3357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3201014995574951,
|
|
"step": 295,
|
|
"valid_targets_mean": 7190.4,
|
|
"valid_targets_min": 6201
|
|
},
|
|
{
|
|
"epoch": 0.47619047619047616,
|
|
"grad_norm": 0.5459342582738103,
|
|
"learning_rate": 2.7120181405895694e-05,
|
|
"loss": 0.3254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31951647996902466,
|
|
"step": 300,
|
|
"valid_targets_mean": 6675.3,
|
|
"valid_targets_min": 5893
|
|
},
|
|
{
|
|
"epoch": 0.48412698412698413,
|
|
"grad_norm": 0.5051976349896103,
|
|
"learning_rate": 2.757369614512472e-05,
|
|
"loss": 0.3126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3020789623260498,
|
|
"step": 305,
|
|
"valid_targets_mean": 8716.5,
|
|
"valid_targets_min": 6384
|
|
},
|
|
{
|
|
"epoch": 0.49206349206349204,
|
|
"grad_norm": 0.6918623386258306,
|
|
"learning_rate": 2.8027210884353743e-05,
|
|
"loss": 0.314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3155471086502075,
|
|
"step": 310,
|
|
"valid_targets_mean": 6445.3,
|
|
"valid_targets_min": 5879
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"grad_norm": 0.6566284951366844,
|
|
"learning_rate": 2.8480725623582767e-05,
|
|
"loss": 0.3331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30983513593673706,
|
|
"step": 315,
|
|
"valid_targets_mean": 7470.0,
|
|
"valid_targets_min": 5976
|
|
},
|
|
{
|
|
"epoch": 0.5079365079365079,
|
|
"grad_norm": 0.647334525387998,
|
|
"learning_rate": 2.893424036281179e-05,
|
|
"loss": 0.3354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3444277048110962,
|
|
"step": 320,
|
|
"valid_targets_mean": 6529.2,
|
|
"valid_targets_min": 5484
|
|
},
|
|
{
|
|
"epoch": 0.5158730158730159,
|
|
"grad_norm": 0.6192553089702874,
|
|
"learning_rate": 2.938775510204082e-05,
|
|
"loss": 0.3285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34645402431488037,
|
|
"step": 325,
|
|
"valid_targets_mean": 6659.4,
|
|
"valid_targets_min": 4662
|
|
},
|
|
{
|
|
"epoch": 0.5238095238095238,
|
|
"grad_norm": 0.5345610494995471,
|
|
"learning_rate": 2.9841269841269844e-05,
|
|
"loss": 0.316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3187600374221802,
|
|
"step": 330,
|
|
"valid_targets_mean": 6320.8,
|
|
"valid_targets_min": 603
|
|
},
|
|
{
|
|
"epoch": 0.5317460317460317,
|
|
"grad_norm": 0.5447330827966401,
|
|
"learning_rate": 3.0294784580498868e-05,
|
|
"loss": 0.3291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32346779108047485,
|
|
"step": 335,
|
|
"valid_targets_mean": 7514.6,
|
|
"valid_targets_min": 5924
|
|
},
|
|
{
|
|
"epoch": 0.5396825396825397,
|
|
"grad_norm": 0.6098718891351412,
|
|
"learning_rate": 3.074829931972789e-05,
|
|
"loss": 0.3351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32788965106010437,
|
|
"step": 340,
|
|
"valid_targets_mean": 6589.2,
|
|
"valid_targets_min": 5259
|
|
},
|
|
{
|
|
"epoch": 0.5476190476190477,
|
|
"grad_norm": 0.5862357636476979,
|
|
"learning_rate": 3.1201814058956924e-05,
|
|
"loss": 0.338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35055094957351685,
|
|
"step": 345,
|
|
"valid_targets_mean": 6409.6,
|
|
"valid_targets_min": 3182
|
|
},
|
|
{
|
|
"epoch": 0.5555555555555556,
|
|
"grad_norm": 0.6283802031937619,
|
|
"learning_rate": 3.1655328798185945e-05,
|
|
"loss": 0.3296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32779306173324585,
|
|
"step": 350,
|
|
"valid_targets_mean": 7151.4,
|
|
"valid_targets_min": 4870
|
|
},
|
|
{
|
|
"epoch": 0.5634920634920635,
|
|
"grad_norm": 0.6028988076134181,
|
|
"learning_rate": 3.2108843537414965e-05,
|
|
"loss": 0.3199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32394006848335266,
|
|
"step": 355,
|
|
"valid_targets_mean": 6366.4,
|
|
"valid_targets_min": 881
|
|
},
|
|
{
|
|
"epoch": 0.5714285714285714,
|
|
"grad_norm": 0.6365201939029815,
|
|
"learning_rate": 3.256235827664399e-05,
|
|
"loss": 0.3263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3481184244155884,
|
|
"step": 360,
|
|
"valid_targets_mean": 6369.6,
|
|
"valid_targets_min": 1035
|
|
},
|
|
{
|
|
"epoch": 0.5793650793650794,
|
|
"grad_norm": 0.7769710469072116,
|
|
"learning_rate": 3.3015873015873014e-05,
|
|
"loss": 0.3099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.314181923866272,
|
|
"step": 365,
|
|
"valid_targets_mean": 6312.2,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 0.5873015873015873,
|
|
"grad_norm": 0.7484598625845816,
|
|
"learning_rate": 3.346938775510204e-05,
|
|
"loss": 0.3355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31007227301597595,
|
|
"step": 370,
|
|
"valid_targets_mean": 7087.7,
|
|
"valid_targets_min": 5944
|
|
},
|
|
{
|
|
"epoch": 0.5952380952380952,
|
|
"grad_norm": 0.6275686201489294,
|
|
"learning_rate": 3.392290249433107e-05,
|
|
"loss": 0.3134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2980511784553528,
|
|
"step": 375,
|
|
"valid_targets_mean": 6679.4,
|
|
"valid_targets_min": 5931
|
|
},
|
|
{
|
|
"epoch": 0.6031746031746031,
|
|
"grad_norm": 0.5558960275863446,
|
|
"learning_rate": 3.437641723356009e-05,
|
|
"loss": 0.318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33760344982147217,
|
|
"step": 380,
|
|
"valid_targets_mean": 6710.0,
|
|
"valid_targets_min": 5605
|
|
},
|
|
{
|
|
"epoch": 0.6111111111111112,
|
|
"grad_norm": 0.5463170601200416,
|
|
"learning_rate": 3.482993197278912e-05,
|
|
"loss": 0.3295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32083994150161743,
|
|
"step": 385,
|
|
"valid_targets_mean": 6880.1,
|
|
"valid_targets_min": 6131
|
|
},
|
|
{
|
|
"epoch": 0.6190476190476191,
|
|
"grad_norm": 0.5773550114310539,
|
|
"learning_rate": 3.5283446712018146e-05,
|
|
"loss": 0.3283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3288286328315735,
|
|
"step": 390,
|
|
"valid_targets_mean": 6356.9,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 0.626984126984127,
|
|
"grad_norm": 0.6478551755306993,
|
|
"learning_rate": 3.573696145124717e-05,
|
|
"loss": 0.3272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33664068579673767,
|
|
"step": 395,
|
|
"valid_targets_mean": 6902.1,
|
|
"valid_targets_min": 5819
|
|
},
|
|
{
|
|
"epoch": 0.6349206349206349,
|
|
"grad_norm": 0.522939080384077,
|
|
"learning_rate": 3.6190476190476195e-05,
|
|
"loss": 0.3279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3187980055809021,
|
|
"step": 400,
|
|
"valid_targets_mean": 6535.3,
|
|
"valid_targets_min": 1851
|
|
},
|
|
{
|
|
"epoch": 0.6428571428571429,
|
|
"grad_norm": 0.6272156042362622,
|
|
"learning_rate": 3.6643990929705216e-05,
|
|
"loss": 0.3124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32333189249038696,
|
|
"step": 405,
|
|
"valid_targets_mean": 6727.0,
|
|
"valid_targets_min": 6330
|
|
},
|
|
{
|
|
"epoch": 0.6507936507936508,
|
|
"grad_norm": 0.7392069826316678,
|
|
"learning_rate": 3.7097505668934243e-05,
|
|
"loss": 0.3223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3326655626296997,
|
|
"step": 410,
|
|
"valid_targets_mean": 6974.6,
|
|
"valid_targets_min": 6548
|
|
},
|
|
{
|
|
"epoch": 0.6587301587301587,
|
|
"grad_norm": 0.5499901325080686,
|
|
"learning_rate": 3.755102040816327e-05,
|
|
"loss": 0.3254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3241935074329376,
|
|
"step": 415,
|
|
"valid_targets_mean": 6543.0,
|
|
"valid_targets_min": 862
|
|
},
|
|
{
|
|
"epoch": 0.6666666666666666,
|
|
"grad_norm": 0.5328684737132082,
|
|
"learning_rate": 3.800453514739229e-05,
|
|
"loss": 0.3217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3236032724380493,
|
|
"step": 420,
|
|
"valid_targets_mean": 8700.9,
|
|
"valid_targets_min": 5406
|
|
},
|
|
{
|
|
"epoch": 0.6746031746031746,
|
|
"grad_norm": 0.5556186681658927,
|
|
"learning_rate": 3.845804988662132e-05,
|
|
"loss": 0.3274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32321587204933167,
|
|
"step": 425,
|
|
"valid_targets_mean": 6769.8,
|
|
"valid_targets_min": 5848
|
|
},
|
|
{
|
|
"epoch": 0.6825396825396826,
|
|
"grad_norm": 0.5663944080069968,
|
|
"learning_rate": 3.891156462585034e-05,
|
|
"loss": 0.3304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3215429186820984,
|
|
"step": 430,
|
|
"valid_targets_mean": 6501.6,
|
|
"valid_targets_min": 4880
|
|
},
|
|
{
|
|
"epoch": 0.6904761904761905,
|
|
"grad_norm": 0.5922960816562407,
|
|
"learning_rate": 3.936507936507937e-05,
|
|
"loss": 0.3172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32387036085128784,
|
|
"step": 435,
|
|
"valid_targets_mean": 6650.9,
|
|
"valid_targets_min": 5946
|
|
},
|
|
{
|
|
"epoch": 0.6984126984126984,
|
|
"grad_norm": 0.5320418926756715,
|
|
"learning_rate": 3.9818594104308396e-05,
|
|
"loss": 0.3148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30306172370910645,
|
|
"step": 440,
|
|
"valid_targets_mean": 6451.9,
|
|
"valid_targets_min": 4038
|
|
},
|
|
{
|
|
"epoch": 0.7063492063492064,
|
|
"grad_norm": 0.5450837160964077,
|
|
"learning_rate": 3.999994361288785e-05,
|
|
"loss": 0.3262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3237943649291992,
|
|
"step": 445,
|
|
"valid_targets_mean": 6933.6,
|
|
"valid_targets_min": 6236
|
|
},
|
|
{
|
|
"epoch": 0.7142857142857143,
|
|
"grad_norm": 0.5334404771541242,
|
|
"learning_rate": 3.9999599026131644e-05,
|
|
"loss": 0.3216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3249209225177765,
|
|
"step": 450,
|
|
"valid_targets_mean": 6758.7,
|
|
"valid_targets_min": 5119
|
|
},
|
|
{
|
|
"epoch": 0.7222222222222222,
|
|
"grad_norm": 0.5733168286055116,
|
|
"learning_rate": 3.999894118418342e-05,
|
|
"loss": 0.3143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31783658266067505,
|
|
"step": 455,
|
|
"valid_targets_mean": 6747.2,
|
|
"valid_targets_min": 5920
|
|
},
|
|
{
|
|
"epoch": 0.7301587301587301,
|
|
"grad_norm": 0.5151612640599561,
|
|
"learning_rate": 3.999797009734697e-05,
|
|
"loss": 0.328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3375864624977112,
|
|
"step": 460,
|
|
"valid_targets_mean": 6718.4,
|
|
"valid_targets_min": 6134
|
|
},
|
|
{
|
|
"epoch": 0.7380952380952381,
|
|
"grad_norm": 0.6003510803960957,
|
|
"learning_rate": 3.999668578083253e-05,
|
|
"loss": 0.3233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32764333486557007,
|
|
"step": 465,
|
|
"valid_targets_mean": 6799.9,
|
|
"valid_targets_min": 6264
|
|
},
|
|
{
|
|
"epoch": 0.746031746031746,
|
|
"grad_norm": 0.5749895021987003,
|
|
"learning_rate": 3.9995088254756434e-05,
|
|
"loss": 0.3025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31097084283828735,
|
|
"step": 470,
|
|
"valid_targets_mean": 6657.8,
|
|
"valid_targets_min": 5178
|
|
},
|
|
{
|
|
"epoch": 0.753968253968254,
|
|
"grad_norm": 0.5163237329849193,
|
|
"learning_rate": 3.999317754414084e-05,
|
|
"loss": 0.3169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30085378885269165,
|
|
"step": 475,
|
|
"valid_targets_mean": 7313.1,
|
|
"valid_targets_min": 5840
|
|
},
|
|
{
|
|
"epoch": 0.7619047619047619,
|
|
"grad_norm": 0.6039478471266058,
|
|
"learning_rate": 3.999095367891337e-05,
|
|
"loss": 0.3218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3077031373977661,
|
|
"step": 480,
|
|
"valid_targets_mean": 7527.9,
|
|
"valid_targets_min": 6063
|
|
},
|
|
{
|
|
"epoch": 0.7698412698412699,
|
|
"grad_norm": 0.48779589230412357,
|
|
"learning_rate": 3.9988416693906563e-05,
|
|
"loss": 0.3133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33376407623291016,
|
|
"step": 485,
|
|
"valid_targets_mean": 6874.5,
|
|
"valid_targets_min": 5983
|
|
},
|
|
{
|
|
"epoch": 0.7777777777777778,
|
|
"grad_norm": 0.5338831370051105,
|
|
"learning_rate": 3.9985566628857425e-05,
|
|
"loss": 0.3277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33875173330307007,
|
|
"step": 490,
|
|
"valid_targets_mean": 6857.6,
|
|
"valid_targets_min": 5695
|
|
},
|
|
{
|
|
"epoch": 0.7857142857142857,
|
|
"grad_norm": 0.5576598593831579,
|
|
"learning_rate": 3.998240352840672e-05,
|
|
"loss": 0.3239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33390527963638306,
|
|
"step": 495,
|
|
"valid_targets_mean": 6844.3,
|
|
"valid_targets_min": 6070
|
|
},
|
|
{
|
|
"epoch": 0.7936507936507936,
|
|
"grad_norm": 0.5249863006509999,
|
|
"learning_rate": 3.997892744209833e-05,
|
|
"loss": 0.3196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3259086608886719,
|
|
"step": 500,
|
|
"valid_targets_mean": 6892.5,
|
|
"valid_targets_min": 6284
|
|
},
|
|
{
|
|
"epoch": 0.8015873015873016,
|
|
"grad_norm": 0.475204280047559,
|
|
"learning_rate": 3.997513842437845e-05,
|
|
"loss": 0.3244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3135770559310913,
|
|
"step": 505,
|
|
"valid_targets_mean": 7620.1,
|
|
"valid_targets_min": 5958
|
|
},
|
|
{
|
|
"epoch": 0.8095238095238095,
|
|
"grad_norm": 0.5236555989955272,
|
|
"learning_rate": 3.997103653459475e-05,
|
|
"loss": 0.317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3163428008556366,
|
|
"step": 510,
|
|
"valid_targets_mean": 6844.6,
|
|
"valid_targets_min": 5911
|
|
},
|
|
{
|
|
"epoch": 0.8174603174603174,
|
|
"grad_norm": 0.5674856785348726,
|
|
"learning_rate": 3.996662183699541e-05,
|
|
"loss": 0.3061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32461369037628174,
|
|
"step": 515,
|
|
"valid_targets_mean": 6922.4,
|
|
"valid_targets_min": 3564
|
|
},
|
|
{
|
|
"epoch": 0.8253968253968254,
|
|
"grad_norm": 0.503083889698272,
|
|
"learning_rate": 3.996189440072818e-05,
|
|
"loss": 0.3042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31698745489120483,
|
|
"step": 520,
|
|
"valid_targets_mean": 6345.5,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 0.8333333333333334,
|
|
"grad_norm": 0.7220018092228805,
|
|
"learning_rate": 3.9956854299839246e-05,
|
|
"loss": 0.3106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2986370921134949,
|
|
"step": 525,
|
|
"valid_targets_mean": 7551.2,
|
|
"valid_targets_min": 6158
|
|
},
|
|
{
|
|
"epoch": 0.8412698412698413,
|
|
"grad_norm": 0.4802075893947623,
|
|
"learning_rate": 3.9951501613272076e-05,
|
|
"loss": 0.3064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30686354637145996,
|
|
"step": 530,
|
|
"valid_targets_mean": 6728.2,
|
|
"valid_targets_min": 5486
|
|
},
|
|
{
|
|
"epoch": 0.8492063492063492,
|
|
"grad_norm": 0.5433136920049431,
|
|
"learning_rate": 3.994583642486618e-05,
|
|
"loss": 0.3218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32426926493644714,
|
|
"step": 535,
|
|
"valid_targets_mean": 6385.2,
|
|
"valid_targets_min": 1012
|
|
},
|
|
{
|
|
"epoch": 0.8571428571428571,
|
|
"grad_norm": 0.5590613981847573,
|
|
"learning_rate": 3.993985882335584e-05,
|
|
"loss": 0.3118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3080733120441437,
|
|
"step": 540,
|
|
"valid_targets_mean": 7632.5,
|
|
"valid_targets_min": 5798
|
|
},
|
|
{
|
|
"epoch": 0.8650793650793651,
|
|
"grad_norm": 0.5534159549104076,
|
|
"learning_rate": 3.993356890236866e-05,
|
|
"loss": 0.3099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30980873107910156,
|
|
"step": 545,
|
|
"valid_targets_mean": 6637.8,
|
|
"valid_targets_min": 5894
|
|
},
|
|
{
|
|
"epoch": 0.873015873015873,
|
|
"grad_norm": 0.5259927917171869,
|
|
"learning_rate": 3.992696676042414e-05,
|
|
"loss": 0.3248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3330468535423279,
|
|
"step": 550,
|
|
"valid_targets_mean": 7623.9,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 0.8809523809523809,
|
|
"grad_norm": 0.5342650902088095,
|
|
"learning_rate": 3.992005250093211e-05,
|
|
"loss": 0.3125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.317962110042572,
|
|
"step": 555,
|
|
"valid_targets_mean": 6553.6,
|
|
"valid_targets_min": 5082
|
|
},
|
|
{
|
|
"epoch": 0.8888888888888888,
|
|
"grad_norm": 0.4913197430616615,
|
|
"learning_rate": 3.991282623219113e-05,
|
|
"loss": 0.3126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31891268491744995,
|
|
"step": 560,
|
|
"valid_targets_mean": 6801.6,
|
|
"valid_targets_min": 5861
|
|
},
|
|
{
|
|
"epoch": 0.8968253968253969,
|
|
"grad_norm": 0.5156611592538767,
|
|
"learning_rate": 3.9905288067386776e-05,
|
|
"loss": 0.3086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3033723831176758,
|
|
"step": 565,
|
|
"valid_targets_mean": 6712.3,
|
|
"valid_targets_min": 6014
|
|
},
|
|
{
|
|
"epoch": 0.9047619047619048,
|
|
"grad_norm": 0.4694568499450959,
|
|
"learning_rate": 3.989743812458987e-05,
|
|
"loss": 0.3184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3271929621696472,
|
|
"step": 570,
|
|
"valid_targets_mean": 6960.5,
|
|
"valid_targets_min": 6232
|
|
},
|
|
{
|
|
"epoch": 0.9126984126984127,
|
|
"grad_norm": 0.4985131640891923,
|
|
"learning_rate": 3.9889276526754664e-05,
|
|
"loss": 0.3115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3113412857055664,
|
|
"step": 575,
|
|
"valid_targets_mean": 6329.6,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 0.9206349206349206,
|
|
"grad_norm": 0.45175184502099547,
|
|
"learning_rate": 3.988080340171685e-05,
|
|
"loss": 0.3051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3013080656528473,
|
|
"step": 580,
|
|
"valid_targets_mean": 6748.4,
|
|
"valid_targets_min": 6454
|
|
},
|
|
{
|
|
"epoch": 0.9285714285714286,
|
|
"grad_norm": 0.5168449511597433,
|
|
"learning_rate": 3.987201888219161e-05,
|
|
"loss": 0.3096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28940606117248535,
|
|
"step": 585,
|
|
"valid_targets_mean": 6390.4,
|
|
"valid_targets_min": 1053
|
|
},
|
|
{
|
|
"epoch": 0.9365079365079365,
|
|
"grad_norm": 0.44172654247589077,
|
|
"learning_rate": 3.986292310577153e-05,
|
|
"loss": 0.3104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3153989911079407,
|
|
"step": 590,
|
|
"valid_targets_mean": 7645.3,
|
|
"valid_targets_min": 5558
|
|
},
|
|
{
|
|
"epoch": 0.9444444444444444,
|
|
"grad_norm": 0.5189497788144406,
|
|
"learning_rate": 3.9853516214924416e-05,
|
|
"loss": 0.3121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32152968645095825,
|
|
"step": 595,
|
|
"valid_targets_mean": 6709.1,
|
|
"valid_targets_min": 5740
|
|
},
|
|
{
|
|
"epoch": 0.9523809523809523,
|
|
"grad_norm": 0.5364426081378828,
|
|
"learning_rate": 3.9843798356991096e-05,
|
|
"loss": 0.3149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29480716586112976,
|
|
"step": 600,
|
|
"valid_targets_mean": 6598.4,
|
|
"valid_targets_min": 5633
|
|
},
|
|
{
|
|
"epoch": 0.9603174603174603,
|
|
"grad_norm": 0.48033971085833005,
|
|
"learning_rate": 3.9833769684183104e-05,
|
|
"loss": 0.3057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30824756622314453,
|
|
"step": 605,
|
|
"valid_targets_mean": 6863.1,
|
|
"valid_targets_min": 5940
|
|
},
|
|
{
|
|
"epoch": 0.9682539682539683,
|
|
"grad_norm": 0.5584022965698194,
|
|
"learning_rate": 3.982343035358026e-05,
|
|
"loss": 0.3099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3037703335285187,
|
|
"step": 610,
|
|
"valid_targets_mean": 6883.4,
|
|
"valid_targets_min": 6242
|
|
},
|
|
{
|
|
"epoch": 0.9761904761904762,
|
|
"grad_norm": 0.5761048712171422,
|
|
"learning_rate": 3.981278052712827e-05,
|
|
"loss": 0.3148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3253772258758545,
|
|
"step": 615,
|
|
"valid_targets_mean": 6648.9,
|
|
"valid_targets_min": 1809
|
|
},
|
|
{
|
|
"epoch": 0.9841269841269841,
|
|
"grad_norm": 0.49090557371373506,
|
|
"learning_rate": 3.9801820371636157e-05,
|
|
"loss": 0.31,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.312380850315094,
|
|
"step": 620,
|
|
"valid_targets_mean": 7027.9,
|
|
"valid_targets_min": 5916
|
|
},
|
|
{
|
|
"epoch": 0.9920634920634921,
|
|
"grad_norm": 0.5527634626456803,
|
|
"learning_rate": 3.979055005877364e-05,
|
|
"loss": 0.3095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31445640325546265,
|
|
"step": 625,
|
|
"valid_targets_mean": 6273.0,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 0.557418140689673,
|
|
"learning_rate": 3.977896976506845e-05,
|
|
"loss": 0.308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31134557723999023,
|
|
"step": 630,
|
|
"valid_targets_mean": 6784.6,
|
|
"valid_targets_min": 6262
|
|
},
|
|
{
|
|
"epoch": 1.007936507936508,
|
|
"grad_norm": 0.5454251574341279,
|
|
"learning_rate": 3.976707967190358e-05,
|
|
"loss": 0.312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3254242539405823,
|
|
"step": 635,
|
|
"valid_targets_mean": 7171.1,
|
|
"valid_targets_min": 5918
|
|
},
|
|
{
|
|
"epoch": 1.0158730158730158,
|
|
"grad_norm": 0.5330384171923035,
|
|
"learning_rate": 3.9754879965514456e-05,
|
|
"loss": 0.3078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2948231101036072,
|
|
"step": 640,
|
|
"valid_targets_mean": 6630.1,
|
|
"valid_targets_min": 5982
|
|
},
|
|
{
|
|
"epoch": 1.0238095238095237,
|
|
"grad_norm": 0.44834388218974763,
|
|
"learning_rate": 3.9742370836985956e-05,
|
|
"loss": 0.3094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2976098656654358,
|
|
"step": 645,
|
|
"valid_targets_mean": 6820.8,
|
|
"valid_targets_min": 5693
|
|
},
|
|
{
|
|
"epoch": 1.0317460317460316,
|
|
"grad_norm": 0.4773037559110415,
|
|
"learning_rate": 3.972955248224949e-05,
|
|
"loss": 0.3065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31667783856391907,
|
|
"step": 650,
|
|
"valid_targets_mean": 6864.6,
|
|
"valid_targets_min": 5861
|
|
},
|
|
{
|
|
"epoch": 1.0396825396825398,
|
|
"grad_norm": 0.513031166317161,
|
|
"learning_rate": 3.971642510207989e-05,
|
|
"loss": 0.3005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32631784677505493,
|
|
"step": 655,
|
|
"valid_targets_mean": 7269.2,
|
|
"valid_targets_min": 6304
|
|
},
|
|
{
|
|
"epoch": 1.0476190476190477,
|
|
"grad_norm": 0.47608854479645946,
|
|
"learning_rate": 3.9702988902092274e-05,
|
|
"loss": 0.299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3122745156288147,
|
|
"step": 660,
|
|
"valid_targets_mean": 7154.9,
|
|
"valid_targets_min": 6077
|
|
},
|
|
{
|
|
"epoch": 1.0555555555555556,
|
|
"grad_norm": 0.4852996182191411,
|
|
"learning_rate": 3.968924409273884e-05,
|
|
"loss": 0.3,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3148670494556427,
|
|
"step": 665,
|
|
"valid_targets_mean": 6328.9,
|
|
"valid_targets_min": 852
|
|
},
|
|
{
|
|
"epoch": 1.0634920634920635,
|
|
"grad_norm": 0.4786564869238812,
|
|
"learning_rate": 3.9675190889305545e-05,
|
|
"loss": 0.3073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3057408332824707,
|
|
"step": 670,
|
|
"valid_targets_mean": 6476.9,
|
|
"valid_targets_min": 939
|
|
},
|
|
{
|
|
"epoch": 1.0714285714285714,
|
|
"grad_norm": 0.5221771220411617,
|
|
"learning_rate": 3.966082951190874e-05,
|
|
"loss": 0.309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30424433946609497,
|
|
"step": 675,
|
|
"valid_targets_mean": 6868.7,
|
|
"valid_targets_min": 5980
|
|
},
|
|
{
|
|
"epoch": 1.0793650793650793,
|
|
"grad_norm": 0.5222673380747445,
|
|
"learning_rate": 3.9646160185491756e-05,
|
|
"loss": 0.3138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3207654356956482,
|
|
"step": 680,
|
|
"valid_targets_mean": 6439.0,
|
|
"valid_targets_min": 2504
|
|
},
|
|
{
|
|
"epoch": 1.0873015873015872,
|
|
"grad_norm": 0.5194205472073951,
|
|
"learning_rate": 3.963118313982131e-05,
|
|
"loss": 0.3115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3053028881549835,
|
|
"step": 685,
|
|
"valid_targets_mean": 6387.3,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 1.0952380952380953,
|
|
"grad_norm": 0.5143888923688522,
|
|
"learning_rate": 3.961589860948399e-05,
|
|
"loss": 0.31,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27290722727775574,
|
|
"step": 690,
|
|
"valid_targets_mean": 6817.6,
|
|
"valid_targets_min": 6297
|
|
},
|
|
{
|
|
"epoch": 1.1031746031746033,
|
|
"grad_norm": 0.45304405436680056,
|
|
"learning_rate": 3.960030683388251e-05,
|
|
"loss": 0.3022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30412861704826355,
|
|
"step": 695,
|
|
"valid_targets_mean": 6916.8,
|
|
"valid_targets_min": 6083
|
|
},
|
|
{
|
|
"epoch": 1.1111111111111112,
|
|
"grad_norm": 0.5265037716350427,
|
|
"learning_rate": 3.9584408057232e-05,
|
|
"loss": 0.3083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3147455155849457,
|
|
"step": 700,
|
|
"valid_targets_mean": 6703.2,
|
|
"valid_targets_min": 5946
|
|
},
|
|
{
|
|
"epoch": 1.119047619047619,
|
|
"grad_norm": 0.5347043290572281,
|
|
"learning_rate": 3.956820252855618e-05,
|
|
"loss": 0.3141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3073728084564209,
|
|
"step": 705,
|
|
"valid_targets_mean": 6475.8,
|
|
"valid_targets_min": 4342
|
|
},
|
|
{
|
|
"epoch": 1.126984126984127,
|
|
"grad_norm": 0.6120501324397735,
|
|
"learning_rate": 3.955169050168343e-05,
|
|
"loss": 0.3043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29703280329704285,
|
|
"step": 710,
|
|
"valid_targets_mean": 6482.4,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 1.1349206349206349,
|
|
"grad_norm": 0.4897597847490548,
|
|
"learning_rate": 3.953487223524283e-05,
|
|
"loss": 0.3054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29175034165382385,
|
|
"step": 715,
|
|
"valid_targets_mean": 7650.8,
|
|
"valid_targets_min": 6219
|
|
},
|
|
{
|
|
"epoch": 1.1428571428571428,
|
|
"grad_norm": 0.5512666533290648,
|
|
"learning_rate": 3.951774799266014e-05,
|
|
"loss": 0.3087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30176472663879395,
|
|
"step": 720,
|
|
"valid_targets_mean": 6544.0,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 1.1507936507936507,
|
|
"grad_norm": 0.4807658265480286,
|
|
"learning_rate": 3.950031804215364e-05,
|
|
"loss": 0.3115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32568973302841187,
|
|
"step": 725,
|
|
"valid_targets_mean": 6360.0,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 1.1587301587301586,
|
|
"grad_norm": 0.556699493771938,
|
|
"learning_rate": 3.948258265672991e-05,
|
|
"loss": 0.2937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3195483982563019,
|
|
"step": 730,
|
|
"valid_targets_mean": 6795.8,
|
|
"valid_targets_min": 5692
|
|
},
|
|
{
|
|
"epoch": 1.1666666666666667,
|
|
"grad_norm": 0.5215383560867509,
|
|
"learning_rate": 3.946454211417961e-05,
|
|
"loss": 0.3058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2986541986465454,
|
|
"step": 735,
|
|
"valid_targets_mean": 6430.9,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 1.1746031746031746,
|
|
"grad_norm": 0.5629011150609644,
|
|
"learning_rate": 3.944619669707309e-05,
|
|
"loss": 0.3146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3047199845314026,
|
|
"step": 740,
|
|
"valid_targets_mean": 6466.1,
|
|
"valid_targets_min": 5556
|
|
},
|
|
{
|
|
"epoch": 1.1825396825396826,
|
|
"grad_norm": 0.4879215472302195,
|
|
"learning_rate": 3.9427546692755946e-05,
|
|
"loss": 0.3098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31238433718681335,
|
|
"step": 745,
|
|
"valid_targets_mean": 6326.2,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 1.1904761904761905,
|
|
"grad_norm": 0.5237994912849231,
|
|
"learning_rate": 3.9408592393344596e-05,
|
|
"loss": 0.2976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30352267622947693,
|
|
"step": 750,
|
|
"valid_targets_mean": 6404.9,
|
|
"valid_targets_min": 887
|
|
},
|
|
{
|
|
"epoch": 1.1984126984126984,
|
|
"grad_norm": 0.4475909489301072,
|
|
"learning_rate": 3.9389334095721606e-05,
|
|
"loss": 0.2985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29092368483543396,
|
|
"step": 755,
|
|
"valid_targets_mean": 6880.7,
|
|
"valid_targets_min": 6093
|
|
},
|
|
{
|
|
"epoch": 1.2063492063492063,
|
|
"grad_norm": 0.47072115683980986,
|
|
"learning_rate": 3.936977210153113e-05,
|
|
"loss": 0.2992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3095782995223999,
|
|
"step": 760,
|
|
"valid_targets_mean": 7325.2,
|
|
"valid_targets_min": 6245
|
|
},
|
|
{
|
|
"epoch": 1.2142857142857142,
|
|
"grad_norm": 0.4369317964766807,
|
|
"learning_rate": 3.93499067171741e-05,
|
|
"loss": 0.2954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2895420789718628,
|
|
"step": 765,
|
|
"valid_targets_mean": 6646.6,
|
|
"valid_targets_min": 6088
|
|
},
|
|
{
|
|
"epoch": 1.2222222222222223,
|
|
"grad_norm": 0.45291809340884365,
|
|
"learning_rate": 3.932973825380351e-05,
|
|
"loss": 0.3123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30057239532470703,
|
|
"step": 770,
|
|
"valid_targets_mean": 6884.7,
|
|
"valid_targets_min": 5910
|
|
},
|
|
{
|
|
"epoch": 1.2301587301587302,
|
|
"grad_norm": 0.47249876005162367,
|
|
"learning_rate": 3.9309267027319485e-05,
|
|
"loss": 0.3154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32773488759994507,
|
|
"step": 775,
|
|
"valid_targets_mean": 6977.9,
|
|
"valid_targets_min": 6211
|
|
},
|
|
{
|
|
"epoch": 1.2380952380952381,
|
|
"grad_norm": 0.43070335501983487,
|
|
"learning_rate": 3.928849335836435e-05,
|
|
"loss": 0.2961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30105558037757874,
|
|
"step": 780,
|
|
"valid_targets_mean": 6445.9,
|
|
"valid_targets_min": 1809
|
|
},
|
|
{
|
|
"epoch": 1.246031746031746,
|
|
"grad_norm": 0.49066474173325764,
|
|
"learning_rate": 3.926741757231761e-05,
|
|
"loss": 0.2944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30781009793281555,
|
|
"step": 785,
|
|
"valid_targets_mean": 6790.4,
|
|
"valid_targets_min": 4728
|
|
},
|
|
{
|
|
"epoch": 1.253968253968254,
|
|
"grad_norm": 0.45418055725402884,
|
|
"learning_rate": 3.924603999929086e-05,
|
|
"loss": 0.3007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30325183272361755,
|
|
"step": 790,
|
|
"valid_targets_mean": 6731.8,
|
|
"valid_targets_min": 5639
|
|
},
|
|
{
|
|
"epoch": 1.2619047619047619,
|
|
"grad_norm": 0.45980655154671046,
|
|
"learning_rate": 3.9224360974122584e-05,
|
|
"loss": 0.3001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3164779543876648,
|
|
"step": 795,
|
|
"valid_targets_mean": 7248.7,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 1.2698412698412698,
|
|
"grad_norm": 0.49943727664606435,
|
|
"learning_rate": 3.920238083637297e-05,
|
|
"loss": 0.3107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32338130474090576,
|
|
"step": 800,
|
|
"valid_targets_mean": 6721.1,
|
|
"valid_targets_min": 5898
|
|
},
|
|
{
|
|
"epoch": 1.2777777777777777,
|
|
"grad_norm": 0.4173264315394333,
|
|
"learning_rate": 3.9180099930318524e-05,
|
|
"loss": 0.3049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3090307116508484,
|
|
"step": 805,
|
|
"valid_targets_mean": 7629.9,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 1.2857142857142856,
|
|
"grad_norm": 0.4767058905596454,
|
|
"learning_rate": 3.915751860494672e-05,
|
|
"loss": 0.3022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2986421287059784,
|
|
"step": 810,
|
|
"valid_targets_mean": 6871.3,
|
|
"valid_targets_min": 6116
|
|
},
|
|
{
|
|
"epoch": 1.2936507936507937,
|
|
"grad_norm": 0.4488393317634179,
|
|
"learning_rate": 3.913463721395051e-05,
|
|
"loss": 0.2996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3031417429447174,
|
|
"step": 815,
|
|
"valid_targets_mean": 7650.6,
|
|
"valid_targets_min": 6347
|
|
},
|
|
{
|
|
"epoch": 1.3015873015873016,
|
|
"grad_norm": 0.5420087766446882,
|
|
"learning_rate": 3.911145611572282e-05,
|
|
"loss": 0.3033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29211655259132385,
|
|
"step": 820,
|
|
"valid_targets_mean": 5853.1,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 1.3095238095238095,
|
|
"grad_norm": 0.4375311050877111,
|
|
"learning_rate": 3.908797567335089e-05,
|
|
"loss": 0.2948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29216277599334717,
|
|
"step": 825,
|
|
"valid_targets_mean": 6622.8,
|
|
"valid_targets_min": 5594
|
|
},
|
|
{
|
|
"epoch": 1.3174603174603174,
|
|
"grad_norm": 0.4608118285386327,
|
|
"learning_rate": 3.906419625461062e-05,
|
|
"loss": 0.3029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3151078522205353,
|
|
"step": 830,
|
|
"valid_targets_mean": 6395.9,
|
|
"valid_targets_min": 2464
|
|
},
|
|
{
|
|
"epoch": 1.3253968253968254,
|
|
"grad_norm": 0.3967981261721286,
|
|
"learning_rate": 3.90401182319608e-05,
|
|
"loss": 0.2979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27572351694107056,
|
|
"step": 835,
|
|
"valid_targets_mean": 8300.2,
|
|
"valid_targets_min": 5776
|
|
},
|
|
{
|
|
"epoch": 1.3333333333333333,
|
|
"grad_norm": 0.4988361976246386,
|
|
"learning_rate": 3.9015741982537265e-05,
|
|
"loss": 0.2962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3065297603607178,
|
|
"step": 840,
|
|
"valid_targets_mean": 6576.2,
|
|
"valid_targets_min": 4563
|
|
},
|
|
{
|
|
"epoch": 1.3412698412698414,
|
|
"grad_norm": 0.4463368441503272,
|
|
"learning_rate": 3.899106788814701e-05,
|
|
"loss": 0.309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32060182094573975,
|
|
"step": 845,
|
|
"valid_targets_mean": 6826.9,
|
|
"valid_targets_min": 5577
|
|
},
|
|
{
|
|
"epoch": 1.3492063492063493,
|
|
"grad_norm": 0.44099726590361743,
|
|
"learning_rate": 3.896609633526219e-05,
|
|
"loss": 0.2996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30365803837776184,
|
|
"step": 850,
|
|
"valid_targets_mean": 7845.2,
|
|
"valid_targets_min": 6223
|
|
},
|
|
{
|
|
"epoch": 1.3571428571428572,
|
|
"grad_norm": 2.3449525631762285,
|
|
"learning_rate": 3.894082771501407e-05,
|
|
"loss": 0.3027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27957963943481445,
|
|
"step": 855,
|
|
"valid_targets_mean": 6508.6,
|
|
"valid_targets_min": 5708
|
|
},
|
|
{
|
|
"epoch": 1.3650793650793651,
|
|
"grad_norm": 0.41481078784213826,
|
|
"learning_rate": 3.891526242318692e-05,
|
|
"loss": 0.304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3069702982902527,
|
|
"step": 860,
|
|
"valid_targets_mean": 6746.2,
|
|
"valid_targets_min": 6042
|
|
},
|
|
{
|
|
"epoch": 1.373015873015873,
|
|
"grad_norm": 0.4417685003149175,
|
|
"learning_rate": 3.8889400860211785e-05,
|
|
"loss": 0.3007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30127257108688354,
|
|
"step": 865,
|
|
"valid_targets_mean": 6833.2,
|
|
"valid_targets_min": 6162
|
|
},
|
|
{
|
|
"epoch": 1.380952380952381,
|
|
"grad_norm": 0.47296006115544537,
|
|
"learning_rate": 3.886324343116023e-05,
|
|
"loss": 0.3089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31046074628829956,
|
|
"step": 870,
|
|
"valid_targets_mean": 8439.8,
|
|
"valid_targets_min": 6099
|
|
},
|
|
{
|
|
"epoch": 1.3888888888888888,
|
|
"grad_norm": 0.49668809906560213,
|
|
"learning_rate": 3.883679054573799e-05,
|
|
"loss": 0.2964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3041248023509979,
|
|
"step": 875,
|
|
"valid_targets_mean": 6668.0,
|
|
"valid_targets_min": 5546
|
|
},
|
|
{
|
|
"epoch": 1.3968253968253967,
|
|
"grad_norm": 0.4608125675606662,
|
|
"learning_rate": 3.881004261827856e-05,
|
|
"loss": 0.3075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3075067400932312,
|
|
"step": 880,
|
|
"valid_targets_mean": 6789.9,
|
|
"valid_targets_min": 6390
|
|
},
|
|
{
|
|
"epoch": 1.4047619047619047,
|
|
"grad_norm": 0.46304007958482496,
|
|
"learning_rate": 3.878300006773669e-05,
|
|
"loss": 0.2966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31151023507118225,
|
|
"step": 885,
|
|
"valid_targets_mean": 6655.4,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 1.4126984126984126,
|
|
"grad_norm": 0.43708030773945084,
|
|
"learning_rate": 3.875566331768184e-05,
|
|
"loss": 0.2976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28159815073013306,
|
|
"step": 890,
|
|
"valid_targets_mean": 6459.8,
|
|
"valid_targets_min": 2403
|
|
},
|
|
{
|
|
"epoch": 1.4206349206349207,
|
|
"grad_norm": 0.4446879830239651,
|
|
"learning_rate": 3.872803279629155e-05,
|
|
"loss": 0.29,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29320377111434937,
|
|
"step": 895,
|
|
"valid_targets_mean": 6837.2,
|
|
"valid_targets_min": 6325
|
|
},
|
|
{
|
|
"epoch": 1.4285714285714286,
|
|
"grad_norm": 0.48660917075333116,
|
|
"learning_rate": 3.8700108936344705e-05,
|
|
"loss": 0.3026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3051351308822632,
|
|
"step": 900,
|
|
"valid_targets_mean": 6682.2,
|
|
"valid_targets_min": 4425
|
|
},
|
|
{
|
|
"epoch": 1.4365079365079365,
|
|
"grad_norm": 0.47740856054893266,
|
|
"learning_rate": 3.867189217521477e-05,
|
|
"loss": 0.3073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.316456139087677,
|
|
"step": 905,
|
|
"valid_targets_mean": 7029.9,
|
|
"valid_targets_min": 5818
|
|
},
|
|
{
|
|
"epoch": 1.4444444444444444,
|
|
"grad_norm": 0.436884800860315,
|
|
"learning_rate": 3.864338295486297e-05,
|
|
"loss": 0.2995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2965593934059143,
|
|
"step": 910,
|
|
"valid_targets_mean": 6509.3,
|
|
"valid_targets_min": 5932
|
|
},
|
|
{
|
|
"epoch": 1.4523809523809523,
|
|
"grad_norm": 0.4309431164963026,
|
|
"learning_rate": 3.8614581721831316e-05,
|
|
"loss": 0.2962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30759620666503906,
|
|
"step": 915,
|
|
"valid_targets_mean": 6726.7,
|
|
"valid_targets_min": 6088
|
|
},
|
|
{
|
|
"epoch": 1.4603174603174602,
|
|
"grad_norm": 0.44492999746261414,
|
|
"learning_rate": 3.858548892723563e-05,
|
|
"loss": 0.3019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3066488802433014,
|
|
"step": 920,
|
|
"valid_targets_mean": 7343.9,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 1.4682539682539684,
|
|
"grad_norm": 0.44140029213146215,
|
|
"learning_rate": 3.855610502675851e-05,
|
|
"loss": 0.3024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2889404296875,
|
|
"step": 925,
|
|
"valid_targets_mean": 6481.2,
|
|
"valid_targets_min": 2483
|
|
},
|
|
{
|
|
"epoch": 1.4761904761904763,
|
|
"grad_norm": 0.4510166378870532,
|
|
"learning_rate": 3.852643048064215e-05,
|
|
"loss": 0.3118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32076844573020935,
|
|
"step": 930,
|
|
"valid_targets_mean": 6325.4,
|
|
"valid_targets_min": 1060
|
|
},
|
|
{
|
|
"epoch": 1.4841269841269842,
|
|
"grad_norm": 0.40416783503768516,
|
|
"learning_rate": 3.8496465753681145e-05,
|
|
"loss": 0.2965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27975571155548096,
|
|
"step": 935,
|
|
"valid_targets_mean": 6718.5,
|
|
"valid_targets_min": 6164
|
|
},
|
|
{
|
|
"epoch": 1.492063492063492,
|
|
"grad_norm": 0.43013679604109406,
|
|
"learning_rate": 3.846621131521522e-05,
|
|
"loss": 0.3082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29283440113067627,
|
|
"step": 940,
|
|
"valid_targets_mean": 6757.1,
|
|
"valid_targets_min": 6029
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"grad_norm": 0.4999106816968774,
|
|
"learning_rate": 3.843566763912187e-05,
|
|
"loss": 0.2989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2971036732196808,
|
|
"step": 945,
|
|
"valid_targets_mean": 6817.4,
|
|
"valid_targets_min": 5408
|
|
},
|
|
{
|
|
"epoch": 1.507936507936508,
|
|
"grad_norm": 0.4774422652365608,
|
|
"learning_rate": 3.840483520380896e-05,
|
|
"loss": 0.2962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3161478638648987,
|
|
"step": 950,
|
|
"valid_targets_mean": 5418.4,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 1.5158730158730158,
|
|
"grad_norm": 0.4329046941202208,
|
|
"learning_rate": 3.837371449220717e-05,
|
|
"loss": 0.3046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3092857897281647,
|
|
"step": 955,
|
|
"valid_targets_mean": 7248.1,
|
|
"valid_targets_min": 809
|
|
},
|
|
{
|
|
"epoch": 1.5238095238095237,
|
|
"grad_norm": 0.4257606738409649,
|
|
"learning_rate": 3.834230599176251e-05,
|
|
"loss": 0.2967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2862392067909241,
|
|
"step": 960,
|
|
"valid_targets_mean": 6691.7,
|
|
"valid_targets_min": 5720
|
|
},
|
|
{
|
|
"epoch": 1.5317460317460316,
|
|
"grad_norm": 0.44689521334622434,
|
|
"learning_rate": 3.831061019442864e-05,
|
|
"loss": 0.3093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31582826375961304,
|
|
"step": 965,
|
|
"valid_targets_mean": 6884.1,
|
|
"valid_targets_min": 5997
|
|
},
|
|
{
|
|
"epoch": 1.5396825396825395,
|
|
"grad_norm": 0.3795887206603885,
|
|
"learning_rate": 3.827862759665916e-05,
|
|
"loss": 0.3094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3048897981643677,
|
|
"step": 970,
|
|
"valid_targets_mean": 7342.5,
|
|
"valid_targets_min": 6340
|
|
},
|
|
{
|
|
"epoch": 1.5476190476190477,
|
|
"grad_norm": 0.43305970765530266,
|
|
"learning_rate": 3.8246358699399853e-05,
|
|
"loss": 0.3067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3067367672920227,
|
|
"step": 975,
|
|
"valid_targets_mean": 6731.6,
|
|
"valid_targets_min": 5512
|
|
},
|
|
{
|
|
"epoch": 1.5555555555555556,
|
|
"grad_norm": 0.3584387450246203,
|
|
"learning_rate": 3.8213804008080824e-05,
|
|
"loss": 0.3004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29355573654174805,
|
|
"step": 980,
|
|
"valid_targets_mean": 6688.0,
|
|
"valid_targets_min": 5967
|
|
},
|
|
{
|
|
"epoch": 1.5634920634920635,
|
|
"grad_norm": 0.42735024482088185,
|
|
"learning_rate": 3.818096403260862e-05,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2849254012107849,
|
|
"step": 985,
|
|
"valid_targets_mean": 7338.4,
|
|
"valid_targets_min": 5494
|
|
},
|
|
{
|
|
"epoch": 1.5714285714285714,
|
|
"grad_norm": 0.4234743399630868,
|
|
"learning_rate": 3.8147839287358185e-05,
|
|
"loss": 0.2959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3023817539215088,
|
|
"step": 990,
|
|
"valid_targets_mean": 6270.8,
|
|
"valid_targets_min": 1354
|
|
},
|
|
{
|
|
"epoch": 1.5793650793650795,
|
|
"grad_norm": 0.42751188324326367,
|
|
"learning_rate": 3.8114430291164836e-05,
|
|
"loss": 0.2886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2844902575016022,
|
|
"step": 995,
|
|
"valid_targets_mean": 6908.2,
|
|
"valid_targets_min": 6401
|
|
},
|
|
{
|
|
"epoch": 1.5873015873015874,
|
|
"grad_norm": 0.4676118585855047,
|
|
"learning_rate": 3.808073756731615e-05,
|
|
"loss": 0.2988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2990047335624695,
|
|
"step": 1000,
|
|
"valid_targets_mean": 6851.2,
|
|
"valid_targets_min": 5984
|
|
},
|
|
{
|
|
"epoch": 1.5952380952380953,
|
|
"grad_norm": 0.3668516628717141,
|
|
"learning_rate": 3.8046761643543734e-05,
|
|
"loss": 0.2994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29771462082862854,
|
|
"step": 1005,
|
|
"valid_targets_mean": 8400.2,
|
|
"valid_targets_min": 6123
|
|
},
|
|
{
|
|
"epoch": 1.6031746031746033,
|
|
"grad_norm": 0.44028657099266527,
|
|
"learning_rate": 3.8012503052014996e-05,
|
|
"loss": 0.3035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3059346079826355,
|
|
"step": 1010,
|
|
"valid_targets_mean": 6926.6,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 1.6111111111111112,
|
|
"grad_norm": 0.4412678845599613,
|
|
"learning_rate": 3.797796232932476e-05,
|
|
"loss": 0.3114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31800782680511475,
|
|
"step": 1015,
|
|
"valid_targets_mean": 6845.8,
|
|
"valid_targets_min": 6191
|
|
},
|
|
{
|
|
"epoch": 1.619047619047619,
|
|
"grad_norm": 0.4352380509684428,
|
|
"learning_rate": 3.794314001648692e-05,
|
|
"loss": 0.2971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2986167371273041,
|
|
"step": 1020,
|
|
"valid_targets_mean": 6648.8,
|
|
"valid_targets_min": 5328
|
|
},
|
|
{
|
|
"epoch": 1.626984126984127,
|
|
"grad_norm": 0.3887340428482689,
|
|
"learning_rate": 3.7908036658925926e-05,
|
|
"loss": 0.2952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29401472210884094,
|
|
"step": 1025,
|
|
"valid_targets_mean": 6813.8,
|
|
"valid_targets_min": 5772
|
|
},
|
|
{
|
|
"epoch": 1.6349206349206349,
|
|
"grad_norm": 0.44714391983027507,
|
|
"learning_rate": 3.787265280646825e-05,
|
|
"loss": 0.2928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31529200077056885,
|
|
"step": 1030,
|
|
"valid_targets_mean": 6985.2,
|
|
"valid_targets_min": 6333
|
|
},
|
|
{
|
|
"epoch": 1.6428571428571428,
|
|
"grad_norm": 0.45019246922823086,
|
|
"learning_rate": 3.7836989013333776e-05,
|
|
"loss": 0.2994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2996531128883362,
|
|
"step": 1035,
|
|
"valid_targets_mean": 6677.1,
|
|
"valid_targets_min": 5989
|
|
},
|
|
{
|
|
"epoch": 1.6507936507936507,
|
|
"grad_norm": 0.45431953169565015,
|
|
"learning_rate": 3.780104583812712e-05,
|
|
"loss": 0.3089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3301179111003876,
|
|
"step": 1040,
|
|
"valid_targets_mean": 6234.9,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 1.6587301587301586,
|
|
"grad_norm": 0.4271199039853916,
|
|
"learning_rate": 3.7764823843828883e-05,
|
|
"loss": 0.3009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2752671241760254,
|
|
"step": 1045,
|
|
"valid_targets_mean": 6725.4,
|
|
"valid_targets_min": 5991
|
|
},
|
|
{
|
|
"epoch": 1.6666666666666665,
|
|
"grad_norm": 0.3877254493765601,
|
|
"learning_rate": 3.7728323597786834e-05,
|
|
"loss": 0.2951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3041803240776062,
|
|
"step": 1050,
|
|
"valid_targets_mean": 6978.6,
|
|
"valid_targets_min": 5979
|
|
},
|
|
{
|
|
"epoch": 1.6746031746031746,
|
|
"grad_norm": 0.39304983048413156,
|
|
"learning_rate": 3.7691545671707007e-05,
|
|
"loss": 0.3034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3174543082714081,
|
|
"step": 1055,
|
|
"valid_targets_mean": 7201.1,
|
|
"valid_targets_min": 3205
|
|
},
|
|
{
|
|
"epoch": 1.6825396825396826,
|
|
"grad_norm": 0.4491808750061814,
|
|
"learning_rate": 3.765449064164477e-05,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3061814308166504,
|
|
"step": 1060,
|
|
"valid_targets_mean": 6889.2,
|
|
"valid_targets_min": 6366
|
|
},
|
|
{
|
|
"epoch": 1.6904761904761905,
|
|
"grad_norm": 0.4196439850429509,
|
|
"learning_rate": 3.7617159087995784e-05,
|
|
"loss": 0.298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30295848846435547,
|
|
"step": 1065,
|
|
"valid_targets_mean": 6636.4,
|
|
"valid_targets_min": 3698
|
|
},
|
|
{
|
|
"epoch": 1.6984126984126984,
|
|
"grad_norm": 0.4289822062892049,
|
|
"learning_rate": 3.757955159548693e-05,
|
|
"loss": 0.293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26410117745399475,
|
|
"step": 1070,
|
|
"valid_targets_mean": 7768.2,
|
|
"valid_targets_min": 6360
|
|
},
|
|
{
|
|
"epoch": 1.7063492063492065,
|
|
"grad_norm": 0.4146236494066799,
|
|
"learning_rate": 3.754166875316713e-05,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2841458320617676,
|
|
"step": 1075,
|
|
"valid_targets_mean": 9076.4,
|
|
"valid_targets_min": 6132
|
|
},
|
|
{
|
|
"epoch": 1.7142857142857144,
|
|
"grad_norm": 0.44408982081529014,
|
|
"learning_rate": 3.750351115439812e-05,
|
|
"loss": 0.2927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30598580837249756,
|
|
"step": 1080,
|
|
"valid_targets_mean": 6669.6,
|
|
"valid_targets_min": 1223
|
|
},
|
|
{
|
|
"epoch": 1.7222222222222223,
|
|
"grad_norm": 0.44962885339435893,
|
|
"learning_rate": 3.746507939684519e-05,
|
|
"loss": 0.289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2932506501674652,
|
|
"step": 1085,
|
|
"valid_targets_mean": 6617.9,
|
|
"valid_targets_min": 5637
|
|
},
|
|
{
|
|
"epoch": 1.7301587301587302,
|
|
"grad_norm": 0.44008698455772527,
|
|
"learning_rate": 3.742637408246779e-05,
|
|
"loss": 0.2977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2935304641723633,
|
|
"step": 1090,
|
|
"valid_targets_mean": 7225.2,
|
|
"valid_targets_min": 4193
|
|
},
|
|
{
|
|
"epoch": 1.7380952380952381,
|
|
"grad_norm": 0.4293500385988482,
|
|
"learning_rate": 3.73873958175101e-05,
|
|
"loss": 0.2951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2945937514305115,
|
|
"step": 1095,
|
|
"valid_targets_mean": 6462.3,
|
|
"valid_targets_min": 3940
|
|
},
|
|
{
|
|
"epoch": 1.746031746031746,
|
|
"grad_norm": 0.4139851604016748,
|
|
"learning_rate": 3.734814521249156e-05,
|
|
"loss": 0.2921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29244914650917053,
|
|
"step": 1100,
|
|
"valid_targets_mean": 7008.0,
|
|
"valid_targets_min": 6061
|
|
},
|
|
{
|
|
"epoch": 1.753968253968254,
|
|
"grad_norm": 0.45930856076384624,
|
|
"learning_rate": 3.7308622882197294e-05,
|
|
"loss": 0.2968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29201894998550415,
|
|
"step": 1105,
|
|
"valid_targets_mean": 6873.2,
|
|
"valid_targets_min": 5918
|
|
},
|
|
{
|
|
"epoch": 1.7619047619047619,
|
|
"grad_norm": 0.4621222759770208,
|
|
"learning_rate": 3.7268829445668456e-05,
|
|
"loss": 0.3008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28178849816322327,
|
|
"step": 1110,
|
|
"valid_targets_mean": 6400.8,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 1.7698412698412698,
|
|
"grad_norm": 0.406054690281881,
|
|
"learning_rate": 3.722876552619257e-05,
|
|
"loss": 0.2962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2939590811729431,
|
|
"step": 1115,
|
|
"valid_targets_mean": 6632.7,
|
|
"valid_targets_min": 6099
|
|
},
|
|
{
|
|
"epoch": 1.7777777777777777,
|
|
"grad_norm": 0.4064049948637611,
|
|
"learning_rate": 3.718843175129378e-05,
|
|
"loss": 0.2928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2875579595565796,
|
|
"step": 1120,
|
|
"valid_targets_mean": 7503.8,
|
|
"valid_targets_min": 5798
|
|
},
|
|
{
|
|
"epoch": 1.7857142857142856,
|
|
"grad_norm": 0.39758336285867657,
|
|
"learning_rate": 3.7147828752722944e-05,
|
|
"loss": 0.2942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29160594940185547,
|
|
"step": 1125,
|
|
"valid_targets_mean": 6232.2,
|
|
"valid_targets_min": 978
|
|
},
|
|
{
|
|
"epoch": 1.7936507936507935,
|
|
"grad_norm": 0.47121808218717987,
|
|
"learning_rate": 3.7106957166447834e-05,
|
|
"loss": 0.2986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28951168060302734,
|
|
"step": 1130,
|
|
"valid_targets_mean": 7829.3,
|
|
"valid_targets_min": 788
|
|
},
|
|
{
|
|
"epoch": 1.8015873015873016,
|
|
"grad_norm": 0.49153744711334296,
|
|
"learning_rate": 3.7065817632643115e-05,
|
|
"loss": 0.2937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3233233690261841,
|
|
"step": 1135,
|
|
"valid_targets_mean": 6797.2,
|
|
"valid_targets_min": 5514
|
|
},
|
|
{
|
|
"epoch": 1.8095238095238095,
|
|
"grad_norm": 0.42812792177510794,
|
|
"learning_rate": 3.7024410795680326e-05,
|
|
"loss": 0.2901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2990078628063202,
|
|
"step": 1140,
|
|
"valid_targets_mean": 7656.3,
|
|
"valid_targets_min": 5949
|
|
},
|
|
{
|
|
"epoch": 1.8174603174603174,
|
|
"grad_norm": 0.40194888644770904,
|
|
"learning_rate": 3.698273730411782e-05,
|
|
"loss": 0.3018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2937239706516266,
|
|
"step": 1145,
|
|
"valid_targets_mean": 7009.2,
|
|
"valid_targets_min": 6556
|
|
},
|
|
{
|
|
"epoch": 1.8253968253968254,
|
|
"grad_norm": 0.4060156930678874,
|
|
"learning_rate": 3.694079781069053e-05,
|
|
"loss": 0.2877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28889569640159607,
|
|
"step": 1150,
|
|
"valid_targets_mean": 6785.1,
|
|
"valid_targets_min": 6092
|
|
},
|
|
{
|
|
"epoch": 1.8333333333333335,
|
|
"grad_norm": 0.42279902982597106,
|
|
"learning_rate": 3.6898592972299875e-05,
|
|
"loss": 0.2915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2832255959510803,
|
|
"step": 1155,
|
|
"valid_targets_mean": 7303.2,
|
|
"valid_targets_min": 5956
|
|
},
|
|
{
|
|
"epoch": 1.8412698412698414,
|
|
"grad_norm": 0.40175209591650396,
|
|
"learning_rate": 3.6856123450003306e-05,
|
|
"loss": 0.2994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3172539472579956,
|
|
"step": 1160,
|
|
"valid_targets_mean": 7714.9,
|
|
"valid_targets_min": 6100
|
|
},
|
|
{
|
|
"epoch": 1.8492063492063493,
|
|
"grad_norm": 0.45310557734291707,
|
|
"learning_rate": 3.68133899090041e-05,
|
|
"loss": 0.2955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3225749731063843,
|
|
"step": 1165,
|
|
"valid_targets_mean": 6842.6,
|
|
"valid_targets_min": 6232
|
|
},
|
|
{
|
|
"epoch": 1.8571428571428572,
|
|
"grad_norm": 0.4260186157722423,
|
|
"learning_rate": 3.677039301864085e-05,
|
|
"loss": 0.2918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3041403293609619,
|
|
"step": 1170,
|
|
"valid_targets_mean": 6638.5,
|
|
"valid_targets_min": 5477
|
|
},
|
|
{
|
|
"epoch": 1.8650793650793651,
|
|
"grad_norm": 0.4399305449961783,
|
|
"learning_rate": 3.672713345237701e-05,
|
|
"loss": 0.295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29220902919769287,
|
|
"step": 1175,
|
|
"valid_targets_mean": 6942.9,
|
|
"valid_targets_min": 5593
|
|
},
|
|
{
|
|
"epoch": 1.873015873015873,
|
|
"grad_norm": 0.43052546762618843,
|
|
"learning_rate": 3.6683611887790356e-05,
|
|
"loss": 0.2902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2996700406074524,
|
|
"step": 1180,
|
|
"valid_targets_mean": 7559.1,
|
|
"valid_targets_min": 6189
|
|
},
|
|
{
|
|
"epoch": 1.880952380952381,
|
|
"grad_norm": 0.42420524006746657,
|
|
"learning_rate": 3.663982900656236e-05,
|
|
"loss": 0.2996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29127904772758484,
|
|
"step": 1185,
|
|
"valid_targets_mean": 7296.9,
|
|
"valid_targets_min": 5153
|
|
},
|
|
{
|
|
"epoch": 1.8888888888888888,
|
|
"grad_norm": 0.367828092998467,
|
|
"learning_rate": 3.6595785494467516e-05,
|
|
"loss": 0.293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.277553528547287,
|
|
"step": 1190,
|
|
"valid_targets_mean": 7712.8,
|
|
"valid_targets_min": 6166
|
|
},
|
|
{
|
|
"epoch": 1.8968253968253967,
|
|
"grad_norm": 0.40196091511810494,
|
|
"learning_rate": 3.655148204136259e-05,
|
|
"loss": 0.2923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27113741636276245,
|
|
"step": 1195,
|
|
"valid_targets_mean": 6810.2,
|
|
"valid_targets_min": 5940
|
|
},
|
|
{
|
|
"epoch": 1.9047619047619047,
|
|
"grad_norm": 0.42173955305054345,
|
|
"learning_rate": 3.650691934117584e-05,
|
|
"loss": 0.2926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27716606855392456,
|
|
"step": 1200,
|
|
"valid_targets_mean": 6937.8,
|
|
"valid_targets_min": 5657
|
|
},
|
|
{
|
|
"epoch": 1.9126984126984126,
|
|
"grad_norm": 0.381981679135796,
|
|
"learning_rate": 3.646209809189611e-05,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2913668155670166,
|
|
"step": 1205,
|
|
"valid_targets_mean": 6776.3,
|
|
"valid_targets_min": 6290
|
|
},
|
|
{
|
|
"epoch": 1.9206349206349205,
|
|
"grad_norm": 0.35852431635976234,
|
|
"learning_rate": 3.641701899556192e-05,
|
|
"loss": 0.2912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2936733663082123,
|
|
"step": 1210,
|
|
"valid_targets_mean": 7485.6,
|
|
"valid_targets_min": 5633
|
|
},
|
|
{
|
|
"epoch": 1.9285714285714286,
|
|
"grad_norm": 0.39707350239889333,
|
|
"learning_rate": 3.63716827582505e-05,
|
|
"loss": 0.3064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30755889415740967,
|
|
"step": 1215,
|
|
"valid_targets_mean": 7097.4,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 1.9365079365079365,
|
|
"grad_norm": 0.4055638044894128,
|
|
"learning_rate": 3.632609009006665e-05,
|
|
"loss": 0.3005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2929340600967407,
|
|
"step": 1220,
|
|
"valid_targets_mean": 6855.9,
|
|
"valid_targets_min": 5696
|
|
},
|
|
{
|
|
"epoch": 1.9444444444444444,
|
|
"grad_norm": 0.41778287157912586,
|
|
"learning_rate": 3.62802417051317e-05,
|
|
"loss": 0.2968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30424752831459045,
|
|
"step": 1225,
|
|
"valid_targets_mean": 6779.5,
|
|
"valid_targets_min": 5964
|
|
},
|
|
{
|
|
"epoch": 1.9523809523809523,
|
|
"grad_norm": 0.4341554957655379,
|
|
"learning_rate": 3.6234138321572274e-05,
|
|
"loss": 0.2905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28841882944107056,
|
|
"step": 1230,
|
|
"valid_targets_mean": 5898.2,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 1.9603174603174605,
|
|
"grad_norm": 0.38734466802210227,
|
|
"learning_rate": 3.6187780661509074e-05,
|
|
"loss": 0.2962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27550429105758667,
|
|
"step": 1235,
|
|
"valid_targets_mean": 6819.4,
|
|
"valid_targets_min": 6113
|
|
},
|
|
{
|
|
"epoch": 1.9682539682539684,
|
|
"grad_norm": 0.44090888736646566,
|
|
"learning_rate": 3.6141169451045526e-05,
|
|
"loss": 0.3089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30094802379608154,
|
|
"step": 1240,
|
|
"valid_targets_mean": 6872.9,
|
|
"valid_targets_min": 5399
|
|
},
|
|
{
|
|
"epoch": 1.9761904761904763,
|
|
"grad_norm": 0.42481761097099546,
|
|
"learning_rate": 3.609430542025646e-05,
|
|
"loss": 0.2909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.315696120262146,
|
|
"step": 1245,
|
|
"valid_targets_mean": 6269.5,
|
|
"valid_targets_min": 1050
|
|
},
|
|
{
|
|
"epoch": 1.9841269841269842,
|
|
"grad_norm": 0.37787890555667913,
|
|
"learning_rate": 3.604718930317664e-05,
|
|
"loss": 0.2933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.282970130443573,
|
|
"step": 1250,
|
|
"valid_targets_mean": 6769.4,
|
|
"valid_targets_min": 5595
|
|
},
|
|
{
|
|
"epoch": 1.992063492063492,
|
|
"grad_norm": 0.3843143131849191,
|
|
"learning_rate": 3.5999821837789275e-05,
|
|
"loss": 0.2965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3009134531021118,
|
|
"step": 1255,
|
|
"valid_targets_mean": 7565.6,
|
|
"valid_targets_min": 6184
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 0.4049771632144877,
|
|
"learning_rate": 3.595220376601447e-05,
|
|
"loss": 0.2893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28150904178619385,
|
|
"step": 1260,
|
|
"valid_targets_mean": 6884.7,
|
|
"valid_targets_min": 6104
|
|
},
|
|
{
|
|
"epoch": 2.007936507936508,
|
|
"grad_norm": 0.41458387384348394,
|
|
"learning_rate": 3.590433583369758e-05,
|
|
"loss": 0.2915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.278090238571167,
|
|
"step": 1265,
|
|
"valid_targets_mean": 6780.8,
|
|
"valid_targets_min": 5938
|
|
},
|
|
{
|
|
"epoch": 2.015873015873016,
|
|
"grad_norm": 0.4027230133999186,
|
|
"learning_rate": 3.5856218790597554e-05,
|
|
"loss": 0.29,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2771596908569336,
|
|
"step": 1270,
|
|
"valid_targets_mean": 6822.3,
|
|
"valid_targets_min": 6149
|
|
},
|
|
{
|
|
"epoch": 2.0238095238095237,
|
|
"grad_norm": 0.40126274283690117,
|
|
"learning_rate": 3.580785339037519e-05,
|
|
"loss": 0.2857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2698061466217041,
|
|
"step": 1275,
|
|
"valid_targets_mean": 6622.5,
|
|
"valid_targets_min": 6090
|
|
},
|
|
{
|
|
"epoch": 2.0317460317460316,
|
|
"grad_norm": 0.39513571925085034,
|
|
"learning_rate": 3.57592403905813e-05,
|
|
"loss": 0.2893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31205785274505615,
|
|
"step": 1280,
|
|
"valid_targets_mean": 6797.9,
|
|
"valid_targets_min": 6203
|
|
},
|
|
{
|
|
"epoch": 2.0396825396825395,
|
|
"grad_norm": 0.45881757664870404,
|
|
"learning_rate": 3.571038055264489e-05,
|
|
"loss": 0.2933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30912062525749207,
|
|
"step": 1285,
|
|
"valid_targets_mean": 6641.1,
|
|
"valid_targets_min": 5982
|
|
},
|
|
{
|
|
"epoch": 2.0476190476190474,
|
|
"grad_norm": 0.40813393269166576,
|
|
"learning_rate": 3.566127464186119e-05,
|
|
"loss": 0.2959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27573108673095703,
|
|
"step": 1290,
|
|
"valid_targets_mean": 6706.6,
|
|
"valid_targets_min": 6001
|
|
},
|
|
{
|
|
"epoch": 2.0555555555555554,
|
|
"grad_norm": 0.4435319124875423,
|
|
"learning_rate": 3.56119234273797e-05,
|
|
"loss": 0.2958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2895510196685791,
|
|
"step": 1295,
|
|
"valid_targets_mean": 6713.9,
|
|
"valid_targets_min": 5625
|
|
},
|
|
{
|
|
"epoch": 2.0634920634920633,
|
|
"grad_norm": 0.3822635776598885,
|
|
"learning_rate": 3.5562327682192134e-05,
|
|
"loss": 0.2876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29433852434158325,
|
|
"step": 1300,
|
|
"valid_targets_mean": 6766.6,
|
|
"valid_targets_min": 6047
|
|
},
|
|
{
|
|
"epoch": 2.0714285714285716,
|
|
"grad_norm": 0.3834622119893725,
|
|
"learning_rate": 3.5512488183120286e-05,
|
|
"loss": 0.2838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2743106484413147,
|
|
"step": 1305,
|
|
"valid_targets_mean": 6775.9,
|
|
"valid_targets_min": 5435
|
|
},
|
|
{
|
|
"epoch": 2.0793650793650795,
|
|
"grad_norm": 0.3836850865449953,
|
|
"learning_rate": 3.54624057108039e-05,
|
|
"loss": 0.2988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3090106248855591,
|
|
"step": 1310,
|
|
"valid_targets_mean": 6782.7,
|
|
"valid_targets_min": 5885
|
|
},
|
|
{
|
|
"epoch": 2.0873015873015874,
|
|
"grad_norm": 0.4673871396006402,
|
|
"learning_rate": 3.5412081049688444e-05,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3018457591533661,
|
|
"step": 1315,
|
|
"valid_targets_mean": 6843.9,
|
|
"valid_targets_min": 6540
|
|
},
|
|
{
|
|
"epoch": 2.0952380952380953,
|
|
"grad_norm": 0.43654320507586303,
|
|
"learning_rate": 3.5361514988012774e-05,
|
|
"loss": 0.2835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30469587445259094,
|
|
"step": 1320,
|
|
"valid_targets_mean": 6617.5,
|
|
"valid_targets_min": 5675
|
|
},
|
|
{
|
|
"epoch": 2.1031746031746033,
|
|
"grad_norm": 0.42224466203124195,
|
|
"learning_rate": 3.5310708317796844e-05,
|
|
"loss": 0.2861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29542917013168335,
|
|
"step": 1325,
|
|
"valid_targets_mean": 6851.4,
|
|
"valid_targets_min": 6361
|
|
},
|
|
{
|
|
"epoch": 2.111111111111111,
|
|
"grad_norm": 0.4118718389031393,
|
|
"learning_rate": 3.5259661834829266e-05,
|
|
"loss": 0.2972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3040982782840729,
|
|
"step": 1330,
|
|
"valid_targets_mean": 6767.4,
|
|
"valid_targets_min": 5890
|
|
},
|
|
{
|
|
"epoch": 2.119047619047619,
|
|
"grad_norm": 0.40410684161236726,
|
|
"learning_rate": 3.5208376338654866e-05,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2844935655593872,
|
|
"step": 1335,
|
|
"valid_targets_mean": 6994.1,
|
|
"valid_targets_min": 6260
|
|
},
|
|
{
|
|
"epoch": 2.126984126984127,
|
|
"grad_norm": 0.41338733990393434,
|
|
"learning_rate": 3.515685263256214e-05,
|
|
"loss": 0.2964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2860064208507538,
|
|
"step": 1340,
|
|
"valid_targets_mean": 6855.5,
|
|
"valid_targets_min": 6051
|
|
},
|
|
{
|
|
"epoch": 2.134920634920635,
|
|
"grad_norm": 0.41952663648759353,
|
|
"learning_rate": 3.51050915235707e-05,
|
|
"loss": 0.2884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29027625918388367,
|
|
"step": 1345,
|
|
"valid_targets_mean": 6934.0,
|
|
"valid_targets_min": 6228
|
|
},
|
|
{
|
|
"epoch": 2.142857142857143,
|
|
"grad_norm": 0.3873934122751603,
|
|
"learning_rate": 3.5053093822418596e-05,
|
|
"loss": 0.2938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29339128732681274,
|
|
"step": 1350,
|
|
"valid_targets_mean": 6589.3,
|
|
"valid_targets_min": 5765
|
|
},
|
|
{
|
|
"epoch": 2.1507936507936507,
|
|
"grad_norm": 0.40852661018444314,
|
|
"learning_rate": 3.500086034354966e-05,
|
|
"loss": 0.2949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29634934663772583,
|
|
"step": 1355,
|
|
"valid_targets_mean": 6810.8,
|
|
"valid_targets_min": 6068
|
|
},
|
|
{
|
|
"epoch": 2.1587301587301586,
|
|
"grad_norm": 0.4412716459704986,
|
|
"learning_rate": 3.494839190510071e-05,
|
|
"loss": 0.2926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30543291568756104,
|
|
"step": 1360,
|
|
"valid_targets_mean": 6070.8,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 2.1666666666666665,
|
|
"grad_norm": 0.41811610338899197,
|
|
"learning_rate": 3.489568932888877e-05,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28175196051597595,
|
|
"step": 1365,
|
|
"valid_targets_mean": 6817.6,
|
|
"valid_targets_min": 6092
|
|
},
|
|
{
|
|
"epoch": 2.1746031746031744,
|
|
"grad_norm": 0.3714974969880588,
|
|
"learning_rate": 3.484275344039815e-05,
|
|
"loss": 0.2918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2857710123062134,
|
|
"step": 1370,
|
|
"valid_targets_mean": 6745.2,
|
|
"valid_targets_min": 5932
|
|
},
|
|
{
|
|
"epoch": 2.1825396825396823,
|
|
"grad_norm": 0.4469397678283376,
|
|
"learning_rate": 3.478958506876759e-05,
|
|
"loss": 0.2967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2949032187461853,
|
|
"step": 1375,
|
|
"valid_targets_mean": 7302.9,
|
|
"valid_targets_min": 997
|
|
},
|
|
{
|
|
"epoch": 2.1904761904761907,
|
|
"grad_norm": 0.3841700519044322,
|
|
"learning_rate": 3.47361850467772e-05,
|
|
"loss": 0.2885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26443248987197876,
|
|
"step": 1380,
|
|
"valid_targets_mean": 6795.6,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 2.1984126984126986,
|
|
"grad_norm": 0.3551166503997323,
|
|
"learning_rate": 3.468255421083546e-05,
|
|
"loss": 0.2918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.264750599861145,
|
|
"step": 1385,
|
|
"valid_targets_mean": 7637.5,
|
|
"valid_targets_min": 5839
|
|
},
|
|
{
|
|
"epoch": 2.2063492063492065,
|
|
"grad_norm": 0.42439836169151884,
|
|
"learning_rate": 3.46286934009661e-05,
|
|
"loss": 0.294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2998582720756531,
|
|
"step": 1390,
|
|
"valid_targets_mean": 6984.1,
|
|
"valid_targets_min": 6120
|
|
},
|
|
{
|
|
"epoch": 2.2142857142857144,
|
|
"grad_norm": 0.4234743120093531,
|
|
"learning_rate": 3.457460346079495e-05,
|
|
"loss": 0.294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30108708143234253,
|
|
"step": 1395,
|
|
"valid_targets_mean": 7631.6,
|
|
"valid_targets_min": 5909
|
|
},
|
|
{
|
|
"epoch": 2.2222222222222223,
|
|
"grad_norm": 0.48335045585475966,
|
|
"learning_rate": 3.452028523753673e-05,
|
|
"loss": 0.293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3021375834941864,
|
|
"step": 1400,
|
|
"valid_targets_mean": 6528.7,
|
|
"valid_targets_min": 2400
|
|
},
|
|
{
|
|
"epoch": 2.2301587301587302,
|
|
"grad_norm": 0.4088636664821796,
|
|
"learning_rate": 3.446573958198176e-05,
|
|
"loss": 0.2872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.289085328578949,
|
|
"step": 1405,
|
|
"valid_targets_mean": 6894.1,
|
|
"valid_targets_min": 5787
|
|
},
|
|
{
|
|
"epoch": 2.238095238095238,
|
|
"grad_norm": 0.4116226530104535,
|
|
"learning_rate": 3.4410967348482666e-05,
|
|
"loss": 0.2929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28990620374679565,
|
|
"step": 1410,
|
|
"valid_targets_mean": 6970.7,
|
|
"valid_targets_min": 5788
|
|
},
|
|
{
|
|
"epoch": 2.246031746031746,
|
|
"grad_norm": 0.4069488167183454,
|
|
"learning_rate": 3.435596939494098e-05,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29124724864959717,
|
|
"step": 1415,
|
|
"valid_targets_mean": 6617.6,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 2.253968253968254,
|
|
"grad_norm": 0.3560709022447811,
|
|
"learning_rate": 3.430074658279369e-05,
|
|
"loss": 0.2886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29637622833251953,
|
|
"step": 1420,
|
|
"valid_targets_mean": 6724.6,
|
|
"valid_targets_min": 5850
|
|
},
|
|
{
|
|
"epoch": 2.261904761904762,
|
|
"grad_norm": 0.4135014879354426,
|
|
"learning_rate": 3.424529977699977e-05,
|
|
"loss": 0.2975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30234697461128235,
|
|
"step": 1425,
|
|
"valid_targets_mean": 6724.6,
|
|
"valid_targets_min": 5842
|
|
},
|
|
{
|
|
"epoch": 2.2698412698412698,
|
|
"grad_norm": 0.4403525087920092,
|
|
"learning_rate": 3.418962984602661e-05,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28053051233291626,
|
|
"step": 1430,
|
|
"valid_targets_mean": 6779.9,
|
|
"valid_targets_min": 5873
|
|
},
|
|
{
|
|
"epoch": 2.2777777777777777,
|
|
"grad_norm": 0.3962909112655349,
|
|
"learning_rate": 3.413373766183646e-05,
|
|
"loss": 0.2872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30405670404434204,
|
|
"step": 1435,
|
|
"valid_targets_mean": 6611.4,
|
|
"valid_targets_min": 5947
|
|
},
|
|
{
|
|
"epoch": 2.2857142857142856,
|
|
"grad_norm": 0.3974144090556034,
|
|
"learning_rate": 3.40776240998727e-05,
|
|
"loss": 0.2876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28689199686050415,
|
|
"step": 1440,
|
|
"valid_targets_mean": 6634.6,
|
|
"valid_targets_min": 5673
|
|
},
|
|
{
|
|
"epoch": 2.2936507936507935,
|
|
"grad_norm": 0.3495188043700731,
|
|
"learning_rate": 3.4021290039046184e-05,
|
|
"loss": 0.2924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28794148564338684,
|
|
"step": 1445,
|
|
"valid_targets_mean": 7053.5,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 2.3015873015873014,
|
|
"grad_norm": 0.4116809572303336,
|
|
"learning_rate": 3.396473636172146e-05,
|
|
"loss": 0.2855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2945330739021301,
|
|
"step": 1450,
|
|
"valid_targets_mean": 7081.8,
|
|
"valid_targets_min": 6300
|
|
},
|
|
{
|
|
"epoch": 2.3095238095238093,
|
|
"grad_norm": 0.3885978746200263,
|
|
"learning_rate": 3.390796395370294e-05,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28223198652267456,
|
|
"step": 1455,
|
|
"valid_targets_mean": 7341.6,
|
|
"valid_targets_min": 2955
|
|
},
|
|
{
|
|
"epoch": 2.317460317460317,
|
|
"grad_norm": 0.4130177332214122,
|
|
"learning_rate": 3.385097370422102e-05,
|
|
"loss": 0.2914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2821195423603058,
|
|
"step": 1460,
|
|
"valid_targets_mean": 6485.4,
|
|
"valid_targets_min": 862
|
|
},
|
|
{
|
|
"epoch": 2.3253968253968256,
|
|
"grad_norm": 0.4027605627056538,
|
|
"learning_rate": 3.3793766505918185e-05,
|
|
"loss": 0.288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2775035500526428,
|
|
"step": 1465,
|
|
"valid_targets_mean": 6813.4,
|
|
"valid_targets_min": 6073
|
|
},
|
|
{
|
|
"epoch": 2.3333333333333335,
|
|
"grad_norm": 0.38360897152659973,
|
|
"learning_rate": 3.3736343254834994e-05,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2837437093257904,
|
|
"step": 1470,
|
|
"valid_targets_mean": 6567.1,
|
|
"valid_targets_min": 1173
|
|
},
|
|
{
|
|
"epoch": 2.3412698412698414,
|
|
"grad_norm": 0.42858742065003597,
|
|
"learning_rate": 3.3678704850396045e-05,
|
|
"loss": 0.2932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28429657220840454,
|
|
"step": 1475,
|
|
"valid_targets_mean": 6401.7,
|
|
"valid_targets_min": 1026
|
|
},
|
|
{
|
|
"epoch": 2.3492063492063493,
|
|
"grad_norm": 0.4111198040518721,
|
|
"learning_rate": 3.362085219539592e-05,
|
|
"loss": 0.2927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2914206087589264,
|
|
"step": 1480,
|
|
"valid_targets_mean": 6742.4,
|
|
"valid_targets_min": 6107
|
|
},
|
|
{
|
|
"epoch": 2.357142857142857,
|
|
"grad_norm": 0.37023505228463544,
|
|
"learning_rate": 3.3562786195985025e-05,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27950817346572876,
|
|
"step": 1485,
|
|
"valid_targets_mean": 8283.9,
|
|
"valid_targets_min": 6193
|
|
},
|
|
{
|
|
"epoch": 2.365079365079365,
|
|
"grad_norm": 0.3790657997152897,
|
|
"learning_rate": 3.350450776165535e-05,
|
|
"loss": 0.291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28008103370666504,
|
|
"step": 1490,
|
|
"valid_targets_mean": 6317.5,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 2.373015873015873,
|
|
"grad_norm": 0.41416146123450653,
|
|
"learning_rate": 3.344601780522634e-05,
|
|
"loss": 0.2945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2817733883857727,
|
|
"step": 1495,
|
|
"valid_targets_mean": 7408.6,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 2.380952380952381,
|
|
"grad_norm": 0.39128676052616257,
|
|
"learning_rate": 3.3387317242830466e-05,
|
|
"loss": 0.2937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30462613701820374,
|
|
"step": 1500,
|
|
"valid_targets_mean": 6589.5,
|
|
"valid_targets_min": 5127
|
|
},
|
|
{
|
|
"epoch": 2.388888888888889,
|
|
"grad_norm": 0.4018891628537268,
|
|
"learning_rate": 3.332840699389897e-05,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3084038496017456,
|
|
"step": 1505,
|
|
"valid_targets_mean": 6922.8,
|
|
"valid_targets_min": 5416
|
|
},
|
|
{
|
|
"epoch": 2.3968253968253967,
|
|
"grad_norm": 0.39563235534796676,
|
|
"learning_rate": 3.32692879811474e-05,
|
|
"loss": 0.289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29888126254081726,
|
|
"step": 1510,
|
|
"valid_targets_mean": 6973.6,
|
|
"valid_targets_min": 6328
|
|
},
|
|
{
|
|
"epoch": 2.4047619047619047,
|
|
"grad_norm": 0.55388835292231,
|
|
"learning_rate": 3.320996113056123e-05,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29595446586608887,
|
|
"step": 1515,
|
|
"valid_targets_mean": 6829.3,
|
|
"valid_targets_min": 6411
|
|
},
|
|
{
|
|
"epoch": 2.4126984126984126,
|
|
"grad_norm": 0.3911596095319325,
|
|
"learning_rate": 3.315042737138128e-05,
|
|
"loss": 0.2904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29475414752960205,
|
|
"step": 1520,
|
|
"valid_targets_mean": 6826.1,
|
|
"valid_targets_min": 6091
|
|
},
|
|
{
|
|
"epoch": 2.4206349206349205,
|
|
"grad_norm": 0.3656041823947901,
|
|
"learning_rate": 3.309068763608919e-05,
|
|
"loss": 0.283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2821248769760132,
|
|
"step": 1525,
|
|
"valid_targets_mean": 6499.0,
|
|
"valid_targets_min": 5448
|
|
},
|
|
{
|
|
"epoch": 2.4285714285714284,
|
|
"grad_norm": 0.38915934706824185,
|
|
"learning_rate": 3.303074286039285e-05,
|
|
"loss": 0.2949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2920178174972534,
|
|
"step": 1530,
|
|
"valid_targets_mean": 6358.6,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 2.4365079365079367,
|
|
"grad_norm": 0.4215201538129641,
|
|
"learning_rate": 3.2970593983211694e-05,
|
|
"loss": 0.2916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28373783826828003,
|
|
"step": 1535,
|
|
"valid_targets_mean": 6874.3,
|
|
"valid_targets_min": 5740
|
|
},
|
|
{
|
|
"epoch": 2.4444444444444446,
|
|
"grad_norm": 0.40871684334829567,
|
|
"learning_rate": 3.2910241946661993e-05,
|
|
"loss": 0.2936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29211747646331787,
|
|
"step": 1540,
|
|
"valid_targets_mean": 6505.5,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 2.4523809523809526,
|
|
"grad_norm": 0.3660873234198171,
|
|
"learning_rate": 3.2849687696042165e-05,
|
|
"loss": 0.2845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2934216260910034,
|
|
"step": 1545,
|
|
"valid_targets_mean": 6834.9,
|
|
"valid_targets_min": 6044
|
|
},
|
|
{
|
|
"epoch": 2.4603174603174605,
|
|
"grad_norm": 0.37251071086709947,
|
|
"learning_rate": 3.2788932179817886e-05,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28161340951919556,
|
|
"step": 1550,
|
|
"valid_targets_mean": 6783.0,
|
|
"valid_targets_min": 6199
|
|
},
|
|
{
|
|
"epoch": 2.4682539682539684,
|
|
"grad_norm": 0.39003617385151174,
|
|
"learning_rate": 3.2727976349607276e-05,
|
|
"loss": 0.2927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2996060252189636,
|
|
"step": 1555,
|
|
"valid_targets_mean": 6688.8,
|
|
"valid_targets_min": 5330
|
|
},
|
|
{
|
|
"epoch": 2.4761904761904763,
|
|
"grad_norm": 0.3748700894643654,
|
|
"learning_rate": 3.266682116016599e-05,
|
|
"loss": 0.2912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2848844528198242,
|
|
"step": 1560,
|
|
"valid_targets_mean": 6834.1,
|
|
"valid_targets_min": 5835
|
|
},
|
|
{
|
|
"epoch": 2.484126984126984,
|
|
"grad_norm": 0.4230176635811382,
|
|
"learning_rate": 3.260546756937227e-05,
|
|
"loss": 0.288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2752341628074646,
|
|
"step": 1565,
|
|
"valid_targets_mean": 7295.9,
|
|
"valid_targets_min": 5930
|
|
},
|
|
{
|
|
"epoch": 2.492063492063492,
|
|
"grad_norm": 0.39767858589383903,
|
|
"learning_rate": 3.254391653821192e-05,
|
|
"loss": 0.2954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2926604747772217,
|
|
"step": 1570,
|
|
"valid_targets_mean": 6913.4,
|
|
"valid_targets_min": 6079
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"grad_norm": 0.38345401669568974,
|
|
"learning_rate": 3.248216903076328e-05,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28351399302482605,
|
|
"step": 1575,
|
|
"valid_targets_mean": 6789.1,
|
|
"valid_targets_min": 6116
|
|
},
|
|
{
|
|
"epoch": 2.507936507936508,
|
|
"grad_norm": 0.4384244714196099,
|
|
"learning_rate": 3.24202260141821e-05,
|
|
"loss": 0.2792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2890722453594208,
|
|
"step": 1580,
|
|
"valid_targets_mean": 7525.6,
|
|
"valid_targets_min": 5895
|
|
},
|
|
{
|
|
"epoch": 2.515873015873016,
|
|
"grad_norm": 0.40571336871339503,
|
|
"learning_rate": 3.235808845868641e-05,
|
|
"loss": 0.2981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3081614375114441,
|
|
"step": 1585,
|
|
"valid_targets_mean": 6304.8,
|
|
"valid_targets_min": 887
|
|
},
|
|
{
|
|
"epoch": 2.5238095238095237,
|
|
"grad_norm": 0.41912977732445267,
|
|
"learning_rate": 3.229575733754132e-05,
|
|
"loss": 0.2905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29208606481552124,
|
|
"step": 1590,
|
|
"valid_targets_mean": 7367.9,
|
|
"valid_targets_min": 5770
|
|
},
|
|
{
|
|
"epoch": 2.5317460317460316,
|
|
"grad_norm": 0.42350614637245365,
|
|
"learning_rate": 3.2233233627043765e-05,
|
|
"loss": 0.2994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29857608675956726,
|
|
"step": 1595,
|
|
"valid_targets_mean": 6537.7,
|
|
"valid_targets_min": 4843
|
|
},
|
|
{
|
|
"epoch": 2.5396825396825395,
|
|
"grad_norm": 0.3786667488365313,
|
|
"learning_rate": 3.217051830650722e-05,
|
|
"loss": 0.2959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.285835862159729,
|
|
"step": 1600,
|
|
"valid_targets_mean": 7751.4,
|
|
"valid_targets_min": 6305
|
|
},
|
|
{
|
|
"epoch": 2.5476190476190474,
|
|
"grad_norm": 0.37828126739134227,
|
|
"learning_rate": 3.210761235824639e-05,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30394405126571655,
|
|
"step": 1605,
|
|
"valid_targets_mean": 6939.7,
|
|
"valid_targets_min": 5943
|
|
},
|
|
{
|
|
"epoch": 2.5555555555555554,
|
|
"grad_norm": 0.36521262150794326,
|
|
"learning_rate": 3.204451676756175e-05,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2950948178768158,
|
|
"step": 1610,
|
|
"valid_targets_mean": 7115.0,
|
|
"valid_targets_min": 1354
|
|
},
|
|
{
|
|
"epoch": 2.5634920634920633,
|
|
"grad_norm": 0.40117138689298487,
|
|
"learning_rate": 3.198123252272419e-05,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2824857831001282,
|
|
"step": 1615,
|
|
"valid_targets_mean": 6726.3,
|
|
"valid_targets_min": 5931
|
|
},
|
|
{
|
|
"epoch": 2.571428571428571,
|
|
"grad_norm": 0.33324766465700256,
|
|
"learning_rate": 3.1917760614959505e-05,
|
|
"loss": 0.2876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27597469091415405,
|
|
"step": 1620,
|
|
"valid_targets_mean": 7657.0,
|
|
"valid_targets_min": 6064
|
|
},
|
|
{
|
|
"epoch": 2.5793650793650795,
|
|
"grad_norm": 0.3967187863734631,
|
|
"learning_rate": 3.1854102038432856e-05,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31039729714393616,
|
|
"step": 1625,
|
|
"valid_targets_mean": 6762.0,
|
|
"valid_targets_min": 5668
|
|
},
|
|
{
|
|
"epoch": 2.5873015873015874,
|
|
"grad_norm": 0.38728964263108345,
|
|
"learning_rate": 3.17902577902332e-05,
|
|
"loss": 0.2923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28956013917922974,
|
|
"step": 1630,
|
|
"valid_targets_mean": 6684.9,
|
|
"valid_targets_min": 989
|
|
},
|
|
{
|
|
"epoch": 2.5952380952380953,
|
|
"grad_norm": 0.35455311179010796,
|
|
"learning_rate": 3.172622887035771e-05,
|
|
"loss": 0.2899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2792101502418518,
|
|
"step": 1635,
|
|
"valid_targets_mean": 6681.2,
|
|
"valid_targets_min": 5966
|
|
},
|
|
{
|
|
"epoch": 2.6031746031746033,
|
|
"grad_norm": 0.3989972053554573,
|
|
"learning_rate": 3.1662016281696073e-05,
|
|
"loss": 0.2897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2765553593635559,
|
|
"step": 1640,
|
|
"valid_targets_mean": 6736.6,
|
|
"valid_targets_min": 5637
|
|
},
|
|
{
|
|
"epoch": 2.611111111111111,
|
|
"grad_norm": 0.3835453897701476,
|
|
"learning_rate": 3.15976210300148e-05,
|
|
"loss": 0.29,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31377026438713074,
|
|
"step": 1645,
|
|
"valid_targets_mean": 7112.0,
|
|
"valid_targets_min": 5968
|
|
},
|
|
{
|
|
"epoch": 2.619047619047619,
|
|
"grad_norm": 0.37880026067389333,
|
|
"learning_rate": 3.153304412394143e-05,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28595834970474243,
|
|
"step": 1650,
|
|
"valid_targets_mean": 8387.4,
|
|
"valid_targets_min": 6129
|
|
},
|
|
{
|
|
"epoch": 2.626984126984127,
|
|
"grad_norm": 0.3842733860836269,
|
|
"learning_rate": 3.146828657494883e-05,
|
|
"loss": 0.2964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3034130334854126,
|
|
"step": 1655,
|
|
"valid_targets_mean": 6826.3,
|
|
"valid_targets_min": 5874
|
|
},
|
|
{
|
|
"epoch": 2.634920634920635,
|
|
"grad_norm": 0.3472933791863606,
|
|
"learning_rate": 3.140334939733924e-05,
|
|
"loss": 0.2924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2961106300354004,
|
|
"step": 1660,
|
|
"valid_targets_mean": 8073.8,
|
|
"valid_targets_min": 6065
|
|
},
|
|
{
|
|
"epoch": 2.642857142857143,
|
|
"grad_norm": 0.3515246872710847,
|
|
"learning_rate": 3.1338233608228455e-05,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26518186926841736,
|
|
"step": 1665,
|
|
"valid_targets_mean": 6246.1,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 2.6507936507936507,
|
|
"grad_norm": 0.33982641059627083,
|
|
"learning_rate": 3.127294022752988e-05,
|
|
"loss": 0.2792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2756003737449646,
|
|
"step": 1670,
|
|
"valid_targets_mean": 6955.6,
|
|
"valid_targets_min": 6077
|
|
},
|
|
{
|
|
"epoch": 2.6587301587301586,
|
|
"grad_norm": 0.3460873254508217,
|
|
"learning_rate": 3.120747027793854e-05,
|
|
"loss": 0.2886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28218597173690796,
|
|
"step": 1675,
|
|
"valid_targets_mean": 7367.9,
|
|
"valid_targets_min": 5882
|
|
},
|
|
{
|
|
"epoch": 2.6666666666666665,
|
|
"grad_norm": 0.36333803139127835,
|
|
"learning_rate": 3.114182478491509e-05,
|
|
"loss": 0.2901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28637784719467163,
|
|
"step": 1680,
|
|
"valid_targets_mean": 6725.8,
|
|
"valid_targets_min": 5808
|
|
},
|
|
{
|
|
"epoch": 2.674603174603175,
|
|
"grad_norm": 0.3154829482603397,
|
|
"learning_rate": 3.107600477666969e-05,
|
|
"loss": 0.2867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28293800354003906,
|
|
"step": 1685,
|
|
"valid_targets_mean": 7815.8,
|
|
"valid_targets_min": 6165
|
|
},
|
|
{
|
|
"epoch": 2.682539682539683,
|
|
"grad_norm": 0.36520030515988083,
|
|
"learning_rate": 3.1010011284146004e-05,
|
|
"loss": 0.288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2905452847480774,
|
|
"step": 1690,
|
|
"valid_targets_mean": 6822.0,
|
|
"valid_targets_min": 6071
|
|
},
|
|
{
|
|
"epoch": 2.6904761904761907,
|
|
"grad_norm": 0.3510778385496118,
|
|
"learning_rate": 3.0943845341004944e-05,
|
|
"loss": 0.2867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2731110751628876,
|
|
"step": 1695,
|
|
"valid_targets_mean": 7571.4,
|
|
"valid_targets_min": 6103
|
|
},
|
|
{
|
|
"epoch": 2.6984126984126986,
|
|
"grad_norm": 0.33547955431400794,
|
|
"learning_rate": 3.087750798360856e-05,
|
|
"loss": 0.29,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2829752564430237,
|
|
"step": 1700,
|
|
"valid_targets_mean": 7339.9,
|
|
"valid_targets_min": 5328
|
|
},
|
|
{
|
|
"epoch": 2.7063492063492065,
|
|
"grad_norm": 0.3850519828320886,
|
|
"learning_rate": 3.0811000251003774e-05,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2893431782722473,
|
|
"step": 1705,
|
|
"valid_targets_mean": 6819.7,
|
|
"valid_targets_min": 6090
|
|
},
|
|
{
|
|
"epoch": 2.7142857142857144,
|
|
"grad_norm": 0.41876620872040465,
|
|
"learning_rate": 3.074432318490608e-05,
|
|
"loss": 0.2922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3054829239845276,
|
|
"step": 1710,
|
|
"valid_targets_mean": 6754.8,
|
|
"valid_targets_min": 5725
|
|
},
|
|
{
|
|
"epoch": 2.7222222222222223,
|
|
"grad_norm": 0.39394549491657893,
|
|
"learning_rate": 3.067747782968328e-05,
|
|
"loss": 0.2863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28851303458213806,
|
|
"step": 1715,
|
|
"valid_targets_mean": 6575.1,
|
|
"valid_targets_min": 5250
|
|
},
|
|
{
|
|
"epoch": 2.7301587301587302,
|
|
"grad_norm": 0.4206741839968584,
|
|
"learning_rate": 3.0610465232339096e-05,
|
|
"loss": 0.2832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29346880316734314,
|
|
"step": 1720,
|
|
"valid_targets_mean": 6597.2,
|
|
"valid_targets_min": 5691
|
|
},
|
|
{
|
|
"epoch": 2.738095238095238,
|
|
"grad_norm": 0.3903752642684434,
|
|
"learning_rate": 3.054328644249677e-05,
|
|
"loss": 0.2866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2958707809448242,
|
|
"step": 1725,
|
|
"valid_targets_mean": 6690.1,
|
|
"valid_targets_min": 5478
|
|
},
|
|
{
|
|
"epoch": 2.746031746031746,
|
|
"grad_norm": 0.37161048953973397,
|
|
"learning_rate": 3.047594251238265e-05,
|
|
"loss": 0.2926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29604822397232056,
|
|
"step": 1730,
|
|
"valid_targets_mean": 6922.5,
|
|
"valid_targets_min": 6157
|
|
},
|
|
{
|
|
"epoch": 2.753968253968254,
|
|
"grad_norm": 0.36719314710442263,
|
|
"learning_rate": 3.0408434496809643e-05,
|
|
"loss": 0.2844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.268909215927124,
|
|
"step": 1735,
|
|
"valid_targets_mean": 7726.9,
|
|
"valid_targets_min": 6082
|
|
},
|
|
{
|
|
"epoch": 2.761904761904762,
|
|
"grad_norm": 0.37441061452885754,
|
|
"learning_rate": 3.034076345316079e-05,
|
|
"loss": 0.2855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27883416414260864,
|
|
"step": 1740,
|
|
"valid_targets_mean": 7001.8,
|
|
"valid_targets_min": 6190
|
|
},
|
|
{
|
|
"epoch": 2.7698412698412698,
|
|
"grad_norm": 0.4064884491358845,
|
|
"learning_rate": 3.0272930441372628e-05,
|
|
"loss": 0.2861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.286080002784729,
|
|
"step": 1745,
|
|
"valid_targets_mean": 6598.4,
|
|
"valid_targets_min": 5789
|
|
},
|
|
{
|
|
"epoch": 2.7777777777777777,
|
|
"grad_norm": 0.3666110599526622,
|
|
"learning_rate": 3.02049365239186e-05,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28211796283721924,
|
|
"step": 1750,
|
|
"valid_targets_mean": 7507.9,
|
|
"valid_targets_min": 3564
|
|
},
|
|
{
|
|
"epoch": 2.7857142857142856,
|
|
"grad_norm": 0.3283374653533845,
|
|
"learning_rate": 3.0136782765792455e-05,
|
|
"loss": 0.2811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2764661908149719,
|
|
"step": 1755,
|
|
"valid_targets_mean": 8525.0,
|
|
"valid_targets_min": 6053
|
|
},
|
|
{
|
|
"epoch": 2.7936507936507935,
|
|
"grad_norm": 0.38314628254829464,
|
|
"learning_rate": 3.0068470234491517e-05,
|
|
"loss": 0.2936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2987751364707947,
|
|
"step": 1760,
|
|
"valid_targets_mean": 6862.9,
|
|
"valid_targets_min": 5858
|
|
},
|
|
{
|
|
"epoch": 2.8015873015873014,
|
|
"grad_norm": 0.393633819267281,
|
|
"learning_rate": 3.0000000000000004e-05,
|
|
"loss": 0.2844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30098673701286316,
|
|
"step": 1765,
|
|
"valid_targets_mean": 6826.6,
|
|
"valid_targets_min": 5031
|
|
},
|
|
{
|
|
"epoch": 2.8095238095238093,
|
|
"grad_norm": 0.3654697626416845,
|
|
"learning_rate": 2.993137313477223e-05,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27368563413619995,
|
|
"step": 1770,
|
|
"valid_targets_mean": 6417.4,
|
|
"valid_targets_min": 829
|
|
},
|
|
{
|
|
"epoch": 2.817460317460317,
|
|
"grad_norm": 0.35757587316560946,
|
|
"learning_rate": 2.9862590713715837e-05,
|
|
"loss": 0.2884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2902432382106781,
|
|
"step": 1775,
|
|
"valid_targets_mean": 6679.5,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 2.825396825396825,
|
|
"grad_norm": 0.3552703974563508,
|
|
"learning_rate": 2.9793653814174957e-05,
|
|
"loss": 0.2919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28072988986968994,
|
|
"step": 1780,
|
|
"valid_targets_mean": 6820.1,
|
|
"valid_targets_min": 5743
|
|
},
|
|
{
|
|
"epoch": 2.8333333333333335,
|
|
"grad_norm": 0.376415552629196,
|
|
"learning_rate": 2.9724563515913317e-05,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2895870506763458,
|
|
"step": 1785,
|
|
"valid_targets_mean": 7452.0,
|
|
"valid_targets_min": 4506
|
|
},
|
|
{
|
|
"epoch": 2.8412698412698414,
|
|
"grad_norm": 0.3956701515903422,
|
|
"learning_rate": 2.9655320901097348e-05,
|
|
"loss": 0.2899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3047757148742676,
|
|
"step": 1790,
|
|
"valid_targets_mean": 6517.6,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 2.8492063492063493,
|
|
"grad_norm": 0.3708198711014017,
|
|
"learning_rate": 2.9585927054279224e-05,
|
|
"loss": 0.2841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2791001796722412,
|
|
"step": 1795,
|
|
"valid_targets_mean": 6731.2,
|
|
"valid_targets_min": 5791
|
|
},
|
|
{
|
|
"epoch": 2.857142857142857,
|
|
"grad_norm": 0.34206979961502887,
|
|
"learning_rate": 2.951638306237988e-05,
|
|
"loss": 0.2861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2761074900627136,
|
|
"step": 1800,
|
|
"valid_targets_mean": 7333.2,
|
|
"valid_targets_min": 5611
|
|
},
|
|
{
|
|
"epoch": 2.865079365079365,
|
|
"grad_norm": 0.4198518472750547,
|
|
"learning_rate": 2.9446690014671976e-05,
|
|
"loss": 0.2896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3103276491165161,
|
|
"step": 1805,
|
|
"valid_targets_mean": 5881.8,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 2.873015873015873,
|
|
"grad_norm": 0.4240695642912641,
|
|
"learning_rate": 2.937684900276285e-05,
|
|
"loss": 0.2944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29687345027923584,
|
|
"step": 1810,
|
|
"valid_targets_mean": 6422.2,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 2.880952380952381,
|
|
"grad_norm": 0.3741878178350792,
|
|
"learning_rate": 2.9306861120577416e-05,
|
|
"loss": 0.2829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2825281023979187,
|
|
"step": 1815,
|
|
"valid_targets_mean": 6646.6,
|
|
"valid_targets_min": 5869
|
|
},
|
|
{
|
|
"epoch": 2.888888888888889,
|
|
"grad_norm": 0.39119503225508795,
|
|
"learning_rate": 2.923672746434103e-05,
|
|
"loss": 0.283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27314549684524536,
|
|
"step": 1820,
|
|
"valid_targets_mean": 6810.4,
|
|
"valid_targets_min": 5936
|
|
},
|
|
{
|
|
"epoch": 2.8968253968253967,
|
|
"grad_norm": 0.3402118120365203,
|
|
"learning_rate": 2.9166449132562303e-05,
|
|
"loss": 0.2863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27875739336013794,
|
|
"step": 1825,
|
|
"valid_targets_mean": 6840.9,
|
|
"valid_targets_min": 6283
|
|
},
|
|
{
|
|
"epoch": 2.9047619047619047,
|
|
"grad_norm": 0.3865469844692214,
|
|
"learning_rate": 2.9096027226015927e-05,
|
|
"loss": 0.2837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27800145745277405,
|
|
"step": 1830,
|
|
"valid_targets_mean": 6773.8,
|
|
"valid_targets_min": 5776
|
|
},
|
|
{
|
|
"epoch": 2.9126984126984126,
|
|
"grad_norm": 0.36325215659829335,
|
|
"learning_rate": 2.9025462847725405e-05,
|
|
"loss": 0.2887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2948734164237976,
|
|
"step": 1835,
|
|
"valid_targets_mean": 6317.8,
|
|
"valid_targets_min": 809
|
|
},
|
|
{
|
|
"epoch": 2.9206349206349205,
|
|
"grad_norm": 0.4029220464719995,
|
|
"learning_rate": 2.8954757102945798e-05,
|
|
"loss": 0.2992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31057000160217285,
|
|
"step": 1840,
|
|
"valid_targets_mean": 6314.5,
|
|
"valid_targets_min": 978
|
|
},
|
|
{
|
|
"epoch": 2.928571428571429,
|
|
"grad_norm": 0.3349971132317653,
|
|
"learning_rate": 2.888391109914638e-05,
|
|
"loss": 0.2891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2796922028064728,
|
|
"step": 1845,
|
|
"valid_targets_mean": 7710.9,
|
|
"valid_targets_min": 6130
|
|
},
|
|
{
|
|
"epoch": 2.9365079365079367,
|
|
"grad_norm": 0.3577202223853133,
|
|
"learning_rate": 2.8812925945993333e-05,
|
|
"loss": 0.2984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2931162714958191,
|
|
"step": 1850,
|
|
"valid_targets_mean": 7599.0,
|
|
"valid_targets_min": 5973
|
|
},
|
|
{
|
|
"epoch": 2.9444444444444446,
|
|
"grad_norm": 0.3983269919687857,
|
|
"learning_rate": 2.8741802755332332e-05,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2838042080402374,
|
|
"step": 1855,
|
|
"valid_targets_mean": 6693.6,
|
|
"valid_targets_min": 6207
|
|
},
|
|
{
|
|
"epoch": 2.9523809523809526,
|
|
"grad_norm": 0.35809876541730784,
|
|
"learning_rate": 2.8670542641171155e-05,
|
|
"loss": 0.2913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2927975356578827,
|
|
"step": 1860,
|
|
"valid_targets_mean": 6999.4,
|
|
"valid_targets_min": 5817
|
|
},
|
|
{
|
|
"epoch": 2.9603174603174605,
|
|
"grad_norm": 0.3457430610240399,
|
|
"learning_rate": 2.859914671966221e-05,
|
|
"loss": 0.2835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2731056809425354,
|
|
"step": 1865,
|
|
"valid_targets_mean": 6787.8,
|
|
"valid_targets_min": 5443
|
|
},
|
|
{
|
|
"epoch": 2.9682539682539684,
|
|
"grad_norm": 0.3898764606629703,
|
|
"learning_rate": 2.8527616109085082e-05,
|
|
"loss": 0.2834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2848525047302246,
|
|
"step": 1870,
|
|
"valid_targets_mean": 6623.8,
|
|
"valid_targets_min": 4327
|
|
},
|
|
{
|
|
"epoch": 2.9761904761904763,
|
|
"grad_norm": 0.3821237115723077,
|
|
"learning_rate": 2.8455951929828977e-05,
|
|
"loss": 0.282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27994582056999207,
|
|
"step": 1875,
|
|
"valid_targets_mean": 6600.4,
|
|
"valid_targets_min": 5821
|
|
},
|
|
{
|
|
"epoch": 2.984126984126984,
|
|
"grad_norm": 0.37543721777857775,
|
|
"learning_rate": 2.8384155304375223e-05,
|
|
"loss": 0.2849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2692505419254303,
|
|
"step": 1880,
|
|
"valid_targets_mean": 6679.9,
|
|
"valid_targets_min": 5837
|
|
},
|
|
{
|
|
"epoch": 2.992063492063492,
|
|
"grad_norm": 0.37548805290374604,
|
|
"learning_rate": 2.8312227357279646e-05,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2894020676612854,
|
|
"step": 1885,
|
|
"valid_targets_mean": 6774.2,
|
|
"valid_targets_min": 5890
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 0.37957359227005694,
|
|
"learning_rate": 2.8240169215154977e-05,
|
|
"loss": 0.2905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2896161675453186,
|
|
"step": 1890,
|
|
"valid_targets_mean": 6109.5,
|
|
"valid_targets_min": 666
|
|
},
|
|
{
|
|
"epoch": 3.007936507936508,
|
|
"grad_norm": 0.37347730985574484,
|
|
"learning_rate": 2.8167982006653196e-05,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28203874826431274,
|
|
"step": 1895,
|
|
"valid_targets_mean": 7407.4,
|
|
"valid_targets_min": 6170
|
|
},
|
|
{
|
|
"epoch": 3.015873015873016,
|
|
"grad_norm": 0.37288932615599985,
|
|
"learning_rate": 2.8095666862447876e-05,
|
|
"loss": 0.2895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2918568253517151,
|
|
"step": 1900,
|
|
"valid_targets_mean": 6829.6,
|
|
"valid_targets_min": 6175
|
|
},
|
|
{
|
|
"epoch": 3.0238095238095237,
|
|
"grad_norm": 0.3634541058744199,
|
|
"learning_rate": 2.8023224915216442e-05,
|
|
"loss": 0.2882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2790878415107727,
|
|
"step": 1905,
|
|
"valid_targets_mean": 6763.9,
|
|
"valid_targets_min": 5911
|
|
},
|
|
{
|
|
"epoch": 3.0317460317460316,
|
|
"grad_norm": 0.38247323719289694,
|
|
"learning_rate": 2.795065729962244e-05,
|
|
"loss": 0.2865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3002347946166992,
|
|
"step": 1910,
|
|
"valid_targets_mean": 7062.6,
|
|
"valid_targets_min": 5878
|
|
},
|
|
{
|
|
"epoch": 3.0396825396825395,
|
|
"grad_norm": 0.31995861751136917,
|
|
"learning_rate": 2.7877965152297785e-05,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2790948748588562,
|
|
"step": 1915,
|
|
"valid_targets_mean": 6886.1,
|
|
"valid_targets_min": 6170
|
|
},
|
|
{
|
|
"epoch": 3.0476190476190474,
|
|
"grad_norm": 0.45818952324099754,
|
|
"learning_rate": 2.780514961182492e-05,
|
|
"loss": 0.2856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2839384377002716,
|
|
"step": 1920,
|
|
"valid_targets_mean": 6752.6,
|
|
"valid_targets_min": 5360
|
|
},
|
|
{
|
|
"epoch": 3.0555555555555554,
|
|
"grad_norm": 0.38273784132165795,
|
|
"learning_rate": 2.773221181871903e-05,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2969732880592346,
|
|
"step": 1925,
|
|
"valid_targets_mean": 6913.3,
|
|
"valid_targets_min": 6282
|
|
},
|
|
{
|
|
"epoch": 3.0634920634920633,
|
|
"grad_norm": 0.3512936229814997,
|
|
"learning_rate": 2.765915291541013e-05,
|
|
"loss": 0.287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28148239850997925,
|
|
"step": 1930,
|
|
"valid_targets_mean": 6870.8,
|
|
"valid_targets_min": 6131
|
|
},
|
|
{
|
|
"epoch": 3.0714285714285716,
|
|
"grad_norm": 0.37435482553118465,
|
|
"learning_rate": 2.7585974046225206e-05,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29011020064353943,
|
|
"step": 1935,
|
|
"valid_targets_mean": 6781.9,
|
|
"valid_targets_min": 6146
|
|
},
|
|
{
|
|
"epoch": 3.0793650793650795,
|
|
"grad_norm": 0.3808522296139392,
|
|
"learning_rate": 2.751267635737027e-05,
|
|
"loss": 0.2807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2857987880706787,
|
|
"step": 1940,
|
|
"valid_targets_mean": 6793.4,
|
|
"valid_targets_min": 5874
|
|
},
|
|
{
|
|
"epoch": 3.0873015873015874,
|
|
"grad_norm": 0.3324002650537653,
|
|
"learning_rate": 2.7439260996912423e-05,
|
|
"loss": 0.2811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28080257773399353,
|
|
"step": 1945,
|
|
"valid_targets_mean": 6756.6,
|
|
"valid_targets_min": 5804
|
|
},
|
|
{
|
|
"epoch": 3.0952380952380953,
|
|
"grad_norm": 0.37489268278152565,
|
|
"learning_rate": 2.7365729114761862e-05,
|
|
"loss": 0.2865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2773934602737427,
|
|
"step": 1950,
|
|
"valid_targets_mean": 6304.2,
|
|
"valid_targets_min": 2403
|
|
},
|
|
{
|
|
"epoch": 3.1031746031746033,
|
|
"grad_norm": 0.348034974790927,
|
|
"learning_rate": 2.729208186265386e-05,
|
|
"loss": 0.2779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.270722359418869,
|
|
"step": 1955,
|
|
"valid_targets_mean": 6503.1,
|
|
"valid_targets_min": 5931
|
|
},
|
|
{
|
|
"epoch": 3.111111111111111,
|
|
"grad_norm": 0.4059264226619686,
|
|
"learning_rate": 2.721832039413077e-05,
|
|
"loss": 0.292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2901855707168579,
|
|
"step": 1960,
|
|
"valid_targets_mean": 6657.8,
|
|
"valid_targets_min": 6156
|
|
},
|
|
{
|
|
"epoch": 3.119047619047619,
|
|
"grad_norm": 0.3735784224802722,
|
|
"learning_rate": 2.7144445864523887e-05,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28734350204467773,
|
|
"step": 1965,
|
|
"valid_targets_mean": 6721.4,
|
|
"valid_targets_min": 6090
|
|
},
|
|
{
|
|
"epoch": 3.126984126984127,
|
|
"grad_norm": 0.37415300465223245,
|
|
"learning_rate": 2.7070459430935407e-05,
|
|
"loss": 0.2858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2852846682071686,
|
|
"step": 1970,
|
|
"valid_targets_mean": 6830.6,
|
|
"valid_targets_min": 5618
|
|
},
|
|
{
|
|
"epoch": 3.134920634920635,
|
|
"grad_norm": 0.335382760189247,
|
|
"learning_rate": 2.69963622522203e-05,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29504501819610596,
|
|
"step": 1975,
|
|
"valid_targets_mean": 7605.6,
|
|
"valid_targets_min": 6294
|
|
},
|
|
{
|
|
"epoch": 3.142857142857143,
|
|
"grad_norm": 0.3133889001778798,
|
|
"learning_rate": 2.6922155488968117e-05,
|
|
"loss": 0.2872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2559858560562134,
|
|
"step": 1980,
|
|
"valid_targets_mean": 8026.4,
|
|
"valid_targets_min": 5443
|
|
},
|
|
{
|
|
"epoch": 3.1507936507936507,
|
|
"grad_norm": 0.35402149437906727,
|
|
"learning_rate": 2.684784030348486e-05,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2828506529331207,
|
|
"step": 1985,
|
|
"valid_targets_mean": 6726.2,
|
|
"valid_targets_min": 6116
|
|
},
|
|
{
|
|
"epoch": 3.1587301587301586,
|
|
"grad_norm": 0.3280939257487129,
|
|
"learning_rate": 2.6773417859774755e-05,
|
|
"loss": 0.2825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2736167907714844,
|
|
"step": 1990,
|
|
"valid_targets_mean": 7509.6,
|
|
"valid_targets_min": 6135
|
|
},
|
|
{
|
|
"epoch": 3.1666666666666665,
|
|
"grad_norm": 0.347959379880439,
|
|
"learning_rate": 2.669888932352201e-05,
|
|
"loss": 0.2718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27805382013320923,
|
|
"step": 1995,
|
|
"valid_targets_mean": 6624.8,
|
|
"valid_targets_min": 5912
|
|
},
|
|
{
|
|
"epoch": 3.1746031746031744,
|
|
"grad_norm": 0.41231747556776555,
|
|
"learning_rate": 2.662425586207259e-05,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2949248254299164,
|
|
"step": 2000,
|
|
"valid_targets_mean": 7160.1,
|
|
"valid_targets_min": 6075
|
|
},
|
|
{
|
|
"epoch": 3.1825396825396823,
|
|
"grad_norm": 0.34214428915335116,
|
|
"learning_rate": 2.6549518644415876e-05,
|
|
"loss": 0.2773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2760777771472931,
|
|
"step": 2005,
|
|
"valid_targets_mean": 6714.4,
|
|
"valid_targets_min": 5546
|
|
},
|
|
{
|
|
"epoch": 3.1904761904761907,
|
|
"grad_norm": 0.37211383549452837,
|
|
"learning_rate": 2.6474678841166426e-05,
|
|
"loss": 0.2847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28807225823402405,
|
|
"step": 2010,
|
|
"valid_targets_mean": 7737.1,
|
|
"valid_targets_min": 6204
|
|
},
|
|
{
|
|
"epoch": 3.1984126984126986,
|
|
"grad_norm": 0.3666687405957571,
|
|
"learning_rate": 2.639973762454558e-05,
|
|
"loss": 0.2863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2877764105796814,
|
|
"step": 2015,
|
|
"valid_targets_mean": 6573.6,
|
|
"valid_targets_min": 2483
|
|
},
|
|
{
|
|
"epoch": 3.2063492063492065,
|
|
"grad_norm": 0.35017992590464436,
|
|
"learning_rate": 2.6324696168363134e-05,
|
|
"loss": 0.2794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2717195749282837,
|
|
"step": 2020,
|
|
"valid_targets_mean": 6673.2,
|
|
"valid_targets_min": 5982
|
|
},
|
|
{
|
|
"epoch": 3.2142857142857144,
|
|
"grad_norm": 0.3942073296979924,
|
|
"learning_rate": 2.624955564799894e-05,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3100702166557312,
|
|
"step": 2025,
|
|
"valid_targets_mean": 6716.9,
|
|
"valid_targets_min": 6326
|
|
},
|
|
{
|
|
"epoch": 3.2222222222222223,
|
|
"grad_norm": 0.3405113854561003,
|
|
"learning_rate": 2.617431724038451e-05,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.264583945274353,
|
|
"step": 2030,
|
|
"valid_targets_mean": 7571.8,
|
|
"valid_targets_min": 5898
|
|
},
|
|
{
|
|
"epoch": 3.2301587301587302,
|
|
"grad_norm": 0.37029638557827677,
|
|
"learning_rate": 2.609898212398455e-05,
|
|
"loss": 0.2829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2870217561721802,
|
|
"step": 2035,
|
|
"valid_targets_mean": 6666.4,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 3.238095238095238,
|
|
"grad_norm": 0.3762347793845241,
|
|
"learning_rate": 2.6023551478778535e-05,
|
|
"loss": 0.283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2929326891899109,
|
|
"step": 2040,
|
|
"valid_targets_mean": 6554.2,
|
|
"valid_targets_min": 6042
|
|
},
|
|
{
|
|
"epoch": 3.246031746031746,
|
|
"grad_norm": 0.34888356096862644,
|
|
"learning_rate": 2.5948026486242225e-05,
|
|
"loss": 0.2724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2534189820289612,
|
|
"step": 2045,
|
|
"valid_targets_mean": 6505.1,
|
|
"valid_targets_min": 5924
|
|
},
|
|
{
|
|
"epoch": 3.253968253968254,
|
|
"grad_norm": 0.3480621064566772,
|
|
"learning_rate": 2.5872408329329136e-05,
|
|
"loss": 0.2892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30314233899116516,
|
|
"step": 2050,
|
|
"valid_targets_mean": 7153.9,
|
|
"valid_targets_min": 6188
|
|
},
|
|
{
|
|
"epoch": 3.261904761904762,
|
|
"grad_norm": 0.3960453445841098,
|
|
"learning_rate": 2.5796698192452016e-05,
|
|
"loss": 0.2846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28814220428466797,
|
|
"step": 2055,
|
|
"valid_targets_mean": 6657.0,
|
|
"valid_targets_min": 1354
|
|
},
|
|
{
|
|
"epoch": 3.2698412698412698,
|
|
"grad_norm": 0.38166146754830227,
|
|
"learning_rate": 2.572089726146432e-05,
|
|
"loss": 0.2927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28875115513801575,
|
|
"step": 2060,
|
|
"valid_targets_mean": 6781.1,
|
|
"valid_targets_min": 6182
|
|
},
|
|
{
|
|
"epoch": 3.2777777777777777,
|
|
"grad_norm": 0.3794650140043809,
|
|
"learning_rate": 2.564500672364162e-05,
|
|
"loss": 0.2856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2778005003929138,
|
|
"step": 2065,
|
|
"valid_targets_mean": 6349.6,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 3.2857142857142856,
|
|
"grad_norm": 0.3639708485117279,
|
|
"learning_rate": 2.556902776766298e-05,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.275177538394928,
|
|
"step": 2070,
|
|
"valid_targets_mean": 6920.8,
|
|
"valid_targets_min": 6026
|
|
},
|
|
{
|
|
"epoch": 3.2936507936507935,
|
|
"grad_norm": 0.34498695816871244,
|
|
"learning_rate": 2.5492961583592397e-05,
|
|
"loss": 0.2751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27932828664779663,
|
|
"step": 2075,
|
|
"valid_targets_mean": 7004.9,
|
|
"valid_targets_min": 6182
|
|
},
|
|
{
|
|
"epoch": 3.3015873015873014,
|
|
"grad_norm": 0.3871177338043176,
|
|
"learning_rate": 2.5416809362860107e-05,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28635919094085693,
|
|
"step": 2080,
|
|
"valid_targets_mean": 6756.2,
|
|
"valid_targets_min": 4221
|
|
},
|
|
{
|
|
"epoch": 3.3095238095238093,
|
|
"grad_norm": 0.3562018741326075,
|
|
"learning_rate": 2.5340572298243946e-05,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2888091206550598,
|
|
"step": 2085,
|
|
"valid_targets_mean": 7328.2,
|
|
"valid_targets_min": 5707
|
|
},
|
|
{
|
|
"epoch": 3.317460317460317,
|
|
"grad_norm": 0.35313052249450044,
|
|
"learning_rate": 2.5264251583850677e-05,
|
|
"loss": 0.2852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2722732424736023,
|
|
"step": 2090,
|
|
"valid_targets_mean": 6994.0,
|
|
"valid_targets_min": 6259
|
|
},
|
|
{
|
|
"epoch": 3.3253968253968256,
|
|
"grad_norm": 0.3573272443406306,
|
|
"learning_rate": 2.518784841509726e-05,
|
|
"loss": 0.275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2718071937561035,
|
|
"step": 2095,
|
|
"valid_targets_mean": 6627.9,
|
|
"valid_targets_min": 1851
|
|
},
|
|
{
|
|
"epoch": 3.3333333333333335,
|
|
"grad_norm": 0.35170145243472783,
|
|
"learning_rate": 2.511136398869216e-05,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28485479950904846,
|
|
"step": 2100,
|
|
"valid_targets_mean": 6852.4,
|
|
"valid_targets_min": 6040
|
|
},
|
|
{
|
|
"epoch": 3.3412698412698414,
|
|
"grad_norm": 0.3816291714532512,
|
|
"learning_rate": 2.503479950261658e-05,
|
|
"loss": 0.2885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29539233446121216,
|
|
"step": 2105,
|
|
"valid_targets_mean": 6786.9,
|
|
"valid_targets_min": 6217
|
|
},
|
|
{
|
|
"epoch": 3.3492063492063493,
|
|
"grad_norm": 0.3587072172313252,
|
|
"learning_rate": 2.4958156156105693e-05,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2846233546733856,
|
|
"step": 2110,
|
|
"valid_targets_mean": 7258.4,
|
|
"valid_targets_min": 6003
|
|
},
|
|
{
|
|
"epoch": 3.357142857142857,
|
|
"grad_norm": 0.34692452387129363,
|
|
"learning_rate": 2.4881435149629892e-05,
|
|
"loss": 0.283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2807486653327942,
|
|
"step": 2115,
|
|
"valid_targets_mean": 6866.6,
|
|
"valid_targets_min": 6200
|
|
},
|
|
{
|
|
"epoch": 3.365079365079365,
|
|
"grad_norm": 0.3555473319741021,
|
|
"learning_rate": 2.4804637684875937e-05,
|
|
"loss": 0.2813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28913670778274536,
|
|
"step": 2120,
|
|
"valid_targets_mean": 8062.0,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 3.373015873015873,
|
|
"grad_norm": 0.36992191166056776,
|
|
"learning_rate": 2.4727764964728177e-05,
|
|
"loss": 0.289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28340238332748413,
|
|
"step": 2125,
|
|
"valid_targets_mean": 6532.8,
|
|
"valid_targets_min": 5743
|
|
},
|
|
{
|
|
"epoch": 3.380952380952381,
|
|
"grad_norm": 0.3329885382463814,
|
|
"learning_rate": 2.4650818193249693e-05,
|
|
"loss": 0.2859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28385528922080994,
|
|
"step": 2130,
|
|
"valid_targets_mean": 6692.3,
|
|
"valid_targets_min": 6085
|
|
},
|
|
{
|
|
"epoch": 3.388888888888889,
|
|
"grad_norm": 0.38106665128766376,
|
|
"learning_rate": 2.4573798575663425e-05,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30388984084129333,
|
|
"step": 2135,
|
|
"valid_targets_mean": 7005.9,
|
|
"valid_targets_min": 6075
|
|
},
|
|
{
|
|
"epoch": 3.3968253968253967,
|
|
"grad_norm": 0.3736371675156148,
|
|
"learning_rate": 2.4496707318333323e-05,
|
|
"loss": 0.2871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2951207756996155,
|
|
"step": 2140,
|
|
"valid_targets_mean": 6685.4,
|
|
"valid_targets_min": 6099
|
|
},
|
|
{
|
|
"epoch": 3.4047619047619047,
|
|
"grad_norm": 0.34618708711260715,
|
|
"learning_rate": 2.441954562874541e-05,
|
|
"loss": 0.2783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27241796255111694,
|
|
"step": 2145,
|
|
"valid_targets_mean": 6815.9,
|
|
"valid_targets_min": 5995
|
|
},
|
|
{
|
|
"epoch": 3.4126984126984126,
|
|
"grad_norm": 0.3837828189522095,
|
|
"learning_rate": 2.434231471548893e-05,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29301247000694275,
|
|
"step": 2150,
|
|
"valid_targets_mean": 6731.9,
|
|
"valid_targets_min": 5758
|
|
},
|
|
{
|
|
"epoch": 3.4206349206349205,
|
|
"grad_norm": 0.35133961339436703,
|
|
"learning_rate": 2.4265015788237348e-05,
|
|
"loss": 0.29,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28866082429885864,
|
|
"step": 2155,
|
|
"valid_targets_mean": 7092.3,
|
|
"valid_targets_min": 5414
|
|
},
|
|
{
|
|
"epoch": 3.4285714285714284,
|
|
"grad_norm": 0.3485890173176309,
|
|
"learning_rate": 2.4187650057729465e-05,
|
|
"loss": 0.29,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28252872824668884,
|
|
"step": 2160,
|
|
"valid_targets_mean": 6886.7,
|
|
"valid_targets_min": 6092
|
|
},
|
|
{
|
|
"epoch": 3.4365079365079367,
|
|
"grad_norm": 0.35155460652198606,
|
|
"learning_rate": 2.4110218735750403e-05,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2635493576526642,
|
|
"step": 2165,
|
|
"valid_targets_mean": 6702.6,
|
|
"valid_targets_min": 5749
|
|
},
|
|
{
|
|
"epoch": 3.4444444444444446,
|
|
"grad_norm": 0.37470347430894935,
|
|
"learning_rate": 2.4032723035112667e-05,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2656484842300415,
|
|
"step": 2170,
|
|
"valid_targets_mean": 7243.4,
|
|
"valid_targets_min": 5817
|
|
},
|
|
{
|
|
"epoch": 3.4523809523809526,
|
|
"grad_norm": 0.35090945346741265,
|
|
"learning_rate": 2.3955164169637124e-05,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2822057008743286,
|
|
"step": 2175,
|
|
"valid_targets_mean": 7581.6,
|
|
"valid_targets_min": 6258
|
|
},
|
|
{
|
|
"epoch": 3.4603174603174605,
|
|
"grad_norm": 0.32864630925059224,
|
|
"learning_rate": 2.387754335413398e-05,
|
|
"loss": 0.2821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26262542605400085,
|
|
"step": 2180,
|
|
"valid_targets_mean": 6846.4,
|
|
"valid_targets_min": 5840
|
|
},
|
|
{
|
|
"epoch": 3.4682539682539684,
|
|
"grad_norm": 0.40512946128262867,
|
|
"learning_rate": 2.3799861804383807e-05,
|
|
"loss": 0.2863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2999958395957947,
|
|
"step": 2185,
|
|
"valid_targets_mean": 6792.1,
|
|
"valid_targets_min": 5772
|
|
},
|
|
{
|
|
"epoch": 3.4761904761904763,
|
|
"grad_norm": 0.38202100754688195,
|
|
"learning_rate": 2.3722120737118414e-05,
|
|
"loss": 0.2824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28452610969543457,
|
|
"step": 2190,
|
|
"valid_targets_mean": 6901.5,
|
|
"valid_targets_min": 5957
|
|
},
|
|
{
|
|
"epoch": 3.484126984126984,
|
|
"grad_norm": 0.384618525687635,
|
|
"learning_rate": 2.3644321370001868e-05,
|
|
"loss": 0.2821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2763345539569855,
|
|
"step": 2195,
|
|
"valid_targets_mean": 6588.9,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 3.492063492063492,
|
|
"grad_norm": 0.3633457576115875,
|
|
"learning_rate": 2.3566464921611393e-05,
|
|
"loss": 0.2806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28692176938056946,
|
|
"step": 2200,
|
|
"valid_targets_mean": 6605.9,
|
|
"valid_targets_min": 5823
|
|
},
|
|
{
|
|
"epoch": 3.5,
|
|
"grad_norm": 0.35988044176414596,
|
|
"learning_rate": 2.348855261141827e-05,
|
|
"loss": 0.2868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2832300364971161,
|
|
"step": 2205,
|
|
"valid_targets_mean": 6470.9,
|
|
"valid_targets_min": 565
|
|
},
|
|
{
|
|
"epoch": 3.507936507936508,
|
|
"grad_norm": 0.3686127518442853,
|
|
"learning_rate": 2.341058565976874e-05,
|
|
"loss": 0.2858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28337612748146057,
|
|
"step": 2210,
|
|
"valid_targets_mean": 7286.7,
|
|
"valid_targets_min": 5503
|
|
},
|
|
{
|
|
"epoch": 3.515873015873016,
|
|
"grad_norm": 0.3440920599675757,
|
|
"learning_rate": 2.3332565287864918e-05,
|
|
"loss": 0.2856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28375059366226196,
|
|
"step": 2215,
|
|
"valid_targets_mean": 7024.9,
|
|
"valid_targets_min": 656
|
|
},
|
|
{
|
|
"epoch": 3.5238095238095237,
|
|
"grad_norm": 0.34282732443518743,
|
|
"learning_rate": 2.325449271774563e-05,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2828438878059387,
|
|
"step": 2220,
|
|
"valid_targets_mean": 7116.8,
|
|
"valid_targets_min": 1223
|
|
},
|
|
{
|
|
"epoch": 3.5317460317460316,
|
|
"grad_norm": 0.3730318078127401,
|
|
"learning_rate": 2.3176369172267286e-05,
|
|
"loss": 0.2841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2872731387615204,
|
|
"step": 2225,
|
|
"valid_targets_mean": 6684.4,
|
|
"valid_targets_min": 5880
|
|
},
|
|
{
|
|
"epoch": 3.5396825396825395,
|
|
"grad_norm": 0.35464799689214427,
|
|
"learning_rate": 2.3098195875084732e-05,
|
|
"loss": 0.2841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2781447768211365,
|
|
"step": 2230,
|
|
"valid_targets_mean": 6509.8,
|
|
"valid_targets_min": 4563
|
|
},
|
|
{
|
|
"epoch": 3.5476190476190474,
|
|
"grad_norm": 0.3478071750109381,
|
|
"learning_rate": 2.301997405063208e-05,
|
|
"loss": 0.2808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2922397255897522,
|
|
"step": 2235,
|
|
"valid_targets_mean": 6697.1,
|
|
"valid_targets_min": 5549
|
|
},
|
|
{
|
|
"epoch": 3.5555555555555554,
|
|
"grad_norm": 0.34243127069737794,
|
|
"learning_rate": 2.2941704924103535e-05,
|
|
"loss": 0.2826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2854847311973572,
|
|
"step": 2240,
|
|
"valid_targets_mean": 7093.7,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 3.5634920634920633,
|
|
"grad_norm": 0.367328594532514,
|
|
"learning_rate": 2.2863389721434165e-05,
|
|
"loss": 0.2838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2855761647224426,
|
|
"step": 2245,
|
|
"valid_targets_mean": 6917.6,
|
|
"valid_targets_min": 5876
|
|
},
|
|
{
|
|
"epoch": 3.571428571428571,
|
|
"grad_norm": 0.332478796975553,
|
|
"learning_rate": 2.2785029669280775e-05,
|
|
"loss": 0.2857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2829280495643616,
|
|
"step": 2250,
|
|
"valid_targets_mean": 6898.4,
|
|
"valid_targets_min": 6329
|
|
},
|
|
{
|
|
"epoch": 3.5793650793650795,
|
|
"grad_norm": 0.3306561855331795,
|
|
"learning_rate": 2.2706625995002626e-05,
|
|
"loss": 0.2779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2608898878097534,
|
|
"step": 2255,
|
|
"valid_targets_mean": 6625.5,
|
|
"valid_targets_min": 5983
|
|
},
|
|
{
|
|
"epoch": 3.5873015873015874,
|
|
"grad_norm": 0.3659790636353604,
|
|
"learning_rate": 2.262817992664224e-05,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29709434509277344,
|
|
"step": 2260,
|
|
"valid_targets_mean": 6755.5,
|
|
"valid_targets_min": 5920
|
|
},
|
|
{
|
|
"epoch": 3.5952380952380953,
|
|
"grad_norm": 0.365822342934927,
|
|
"learning_rate": 2.2549692692906158e-05,
|
|
"loss": 0.2819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2861059308052063,
|
|
"step": 2265,
|
|
"valid_targets_mean": 7465.0,
|
|
"valid_targets_min": 6014
|
|
},
|
|
{
|
|
"epoch": 3.6031746031746033,
|
|
"grad_norm": 0.3643563444120454,
|
|
"learning_rate": 2.24711655231457e-05,
|
|
"loss": 0.2867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2979148030281067,
|
|
"step": 2270,
|
|
"valid_targets_mean": 6973.7,
|
|
"valid_targets_min": 6506
|
|
},
|
|
{
|
|
"epoch": 3.611111111111111,
|
|
"grad_norm": 0.3830337046694783,
|
|
"learning_rate": 2.2392599647337724e-05,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2763550877571106,
|
|
"step": 2275,
|
|
"valid_targets_mean": 6902.3,
|
|
"valid_targets_min": 6148
|
|
},
|
|
{
|
|
"epoch": 3.619047619047619,
|
|
"grad_norm": 0.3252024609045062,
|
|
"learning_rate": 2.23139962960653e-05,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27614250779151917,
|
|
"step": 2280,
|
|
"valid_targets_mean": 6736.5,
|
|
"valid_targets_min": 5817
|
|
},
|
|
{
|
|
"epoch": 3.626984126984127,
|
|
"grad_norm": 0.3347547366109016,
|
|
"learning_rate": 2.2235356700498528e-05,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2779342830181122,
|
|
"step": 2285,
|
|
"valid_targets_mean": 6842.9,
|
|
"valid_targets_min": 5867
|
|
},
|
|
{
|
|
"epoch": 3.634920634920635,
|
|
"grad_norm": 0.34489775514827664,
|
|
"learning_rate": 2.2156682092375175e-05,
|
|
"loss": 0.2811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.283725380897522,
|
|
"step": 2290,
|
|
"valid_targets_mean": 7056.3,
|
|
"valid_targets_min": 6218
|
|
},
|
|
{
|
|
"epoch": 3.642857142857143,
|
|
"grad_norm": 0.3278043051816163,
|
|
"learning_rate": 2.2077973703981423e-05,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28248530626296997,
|
|
"step": 2295,
|
|
"valid_targets_mean": 7983.6,
|
|
"valid_targets_min": 4425
|
|
},
|
|
{
|
|
"epoch": 3.6507936507936507,
|
|
"grad_norm": 0.3340356603579259,
|
|
"learning_rate": 2.1999232768132552e-05,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24684977531433105,
|
|
"step": 2300,
|
|
"valid_targets_mean": 6793.1,
|
|
"valid_targets_min": 6041
|
|
},
|
|
{
|
|
"epoch": 3.6587301587301586,
|
|
"grad_norm": 0.3227324294645668,
|
|
"learning_rate": 2.1920460518153637e-05,
|
|
"loss": 0.2861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2731075584888458,
|
|
"step": 2305,
|
|
"valid_targets_mean": 7713.9,
|
|
"valid_targets_min": 5963
|
|
},
|
|
{
|
|
"epoch": 3.6666666666666665,
|
|
"grad_norm": 0.34390254491244865,
|
|
"learning_rate": 2.1841658187860232e-05,
|
|
"loss": 0.2905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28489845991134644,
|
|
"step": 2310,
|
|
"valid_targets_mean": 6619.1,
|
|
"valid_targets_min": 6056
|
|
},
|
|
{
|
|
"epoch": 3.674603174603175,
|
|
"grad_norm": 0.33406457219009744,
|
|
"learning_rate": 2.176282701153904e-05,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2567208409309387,
|
|
"step": 2315,
|
|
"valid_targets_mean": 7337.4,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 3.682539682539683,
|
|
"grad_norm": 0.33385204495015813,
|
|
"learning_rate": 2.1683968223928572e-05,
|
|
"loss": 0.288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2807605266571045,
|
|
"step": 2320,
|
|
"valid_targets_mean": 7474.2,
|
|
"valid_targets_min": 6407
|
|
},
|
|
{
|
|
"epoch": 3.6904761904761907,
|
|
"grad_norm": 0.317875448843412,
|
|
"learning_rate": 2.1605083060199835e-05,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2887420654296875,
|
|
"step": 2325,
|
|
"valid_targets_mean": 8784.4,
|
|
"valid_targets_min": 5712
|
|
},
|
|
{
|
|
"epoch": 3.6984126984126986,
|
|
"grad_norm": 0.3518556233821721,
|
|
"learning_rate": 2.152617275593694e-05,
|
|
"loss": 0.2824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28030043840408325,
|
|
"step": 2330,
|
|
"valid_targets_mean": 6661.7,
|
|
"valid_targets_min": 4880
|
|
},
|
|
{
|
|
"epoch": 3.7063492063492065,
|
|
"grad_norm": 0.3591873409770481,
|
|
"learning_rate": 2.144723854711781e-05,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2766907811164856,
|
|
"step": 2335,
|
|
"valid_targets_mean": 7218.1,
|
|
"valid_targets_min": 2464
|
|
},
|
|
{
|
|
"epoch": 3.7142857142857144,
|
|
"grad_norm": 0.3602849985923925,
|
|
"learning_rate": 2.1368281670094766e-05,
|
|
"loss": 0.2789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28866147994995117,
|
|
"step": 2340,
|
|
"valid_targets_mean": 6869.4,
|
|
"valid_targets_min": 6248
|
|
},
|
|
{
|
|
"epoch": 3.7222222222222223,
|
|
"grad_norm": 0.3247227783443309,
|
|
"learning_rate": 2.1289303361575175e-05,
|
|
"loss": 0.276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27216652035713196,
|
|
"step": 2345,
|
|
"valid_targets_mean": 6846.8,
|
|
"valid_targets_min": 6018
|
|
},
|
|
{
|
|
"epoch": 3.7301587301587302,
|
|
"grad_norm": 0.3860651238766827,
|
|
"learning_rate": 2.121030485860211e-05,
|
|
"loss": 0.2844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2885698080062866,
|
|
"step": 2350,
|
|
"valid_targets_mean": 6753.2,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 3.738095238095238,
|
|
"grad_norm": 0.4126504864485422,
|
|
"learning_rate": 2.113128739853493e-05,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2835997939109802,
|
|
"step": 2355,
|
|
"valid_targets_mean": 6113.6,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 3.746031746031746,
|
|
"grad_norm": 0.3295290399154437,
|
|
"learning_rate": 2.1052252219029944e-05,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27279001474380493,
|
|
"step": 2360,
|
|
"valid_targets_mean": 7253.2,
|
|
"valid_targets_min": 5990
|
|
},
|
|
{
|
|
"epoch": 3.753968253968254,
|
|
"grad_norm": 0.36732520701040944,
|
|
"learning_rate": 2.0973200558020967e-05,
|
|
"loss": 0.2854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2802339792251587,
|
|
"step": 2365,
|
|
"valid_targets_mean": 6430.8,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 3.761904761904762,
|
|
"grad_norm": 0.35952707802923317,
|
|
"learning_rate": 2.0894133653700005e-05,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30390632152557373,
|
|
"step": 2370,
|
|
"valid_targets_mean": 6670.8,
|
|
"valid_targets_min": 6102
|
|
},
|
|
{
|
|
"epoch": 3.7698412698412698,
|
|
"grad_norm": 0.3741116029699968,
|
|
"learning_rate": 2.0815052744497795e-05,
|
|
"loss": 0.2877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2858293056488037,
|
|
"step": 2375,
|
|
"valid_targets_mean": 6279.6,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 3.7777777777777777,
|
|
"grad_norm": 0.349300916133532,
|
|
"learning_rate": 2.0735959069064434e-05,
|
|
"loss": 0.2896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2881176769733429,
|
|
"step": 2380,
|
|
"valid_targets_mean": 6930.4,
|
|
"valid_targets_min": 5950
|
|
},
|
|
{
|
|
"epoch": 3.7857142857142856,
|
|
"grad_norm": 0.3517560722551662,
|
|
"learning_rate": 2.065685386624999e-05,
|
|
"loss": 0.2804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2871021330356598,
|
|
"step": 2385,
|
|
"valid_targets_mean": 6658.8,
|
|
"valid_targets_min": 5731
|
|
},
|
|
{
|
|
"epoch": 3.7936507936507935,
|
|
"grad_norm": 0.345275635964642,
|
|
"learning_rate": 2.0577738375085076e-05,
|
|
"loss": 0.2852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2764952778816223,
|
|
"step": 2390,
|
|
"valid_targets_mean": 6704.2,
|
|
"valid_targets_min": 5903
|
|
},
|
|
{
|
|
"epoch": 3.8015873015873014,
|
|
"grad_norm": 0.34239348316950696,
|
|
"learning_rate": 2.0498613834761462e-05,
|
|
"loss": 0.2815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28023093938827515,
|
|
"step": 2395,
|
|
"valid_targets_mean": 6671.5,
|
|
"valid_targets_min": 6123
|
|
},
|
|
{
|
|
"epoch": 3.8095238095238093,
|
|
"grad_norm": 0.36714710877765583,
|
|
"learning_rate": 2.041948148461264e-05,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2713680863380432,
|
|
"step": 2400,
|
|
"valid_targets_mean": 6425.7,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 3.817460317460317,
|
|
"grad_norm": 0.3561378419115524,
|
|
"learning_rate": 2.0340342564094436e-05,
|
|
"loss": 0.2811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.285433292388916,
|
|
"step": 2405,
|
|
"valid_targets_mean": 6739.6,
|
|
"valid_targets_min": 5816
|
|
},
|
|
{
|
|
"epoch": 3.825396825396825,
|
|
"grad_norm": 0.3813540375189877,
|
|
"learning_rate": 2.0261198312765597e-05,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2823823094367981,
|
|
"step": 2410,
|
|
"valid_targets_mean": 6281.9,
|
|
"valid_targets_min": 934
|
|
},
|
|
{
|
|
"epoch": 3.8333333333333335,
|
|
"grad_norm": 0.36039930559401123,
|
|
"learning_rate": 2.0182049970268355e-05,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26361119747161865,
|
|
"step": 2415,
|
|
"valid_targets_mean": 6812.4,
|
|
"valid_targets_min": 6131
|
|
},
|
|
{
|
|
"epoch": 3.8412698412698414,
|
|
"grad_norm": 0.3503641000252341,
|
|
"learning_rate": 2.010289877630902e-05,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2733581066131592,
|
|
"step": 2420,
|
|
"valid_targets_mean": 6887.2,
|
|
"valid_targets_min": 6196
|
|
},
|
|
{
|
|
"epoch": 3.8492063492063493,
|
|
"grad_norm": 0.3480620899867291,
|
|
"learning_rate": 2.002374597063858e-05,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2843567728996277,
|
|
"step": 2425,
|
|
"valid_targets_mean": 7281.8,
|
|
"valid_targets_min": 4211
|
|
},
|
|
{
|
|
"epoch": 3.857142857142857,
|
|
"grad_norm": 0.36897759312388007,
|
|
"learning_rate": 1.9944592793033255e-05,
|
|
"loss": 0.2822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29287245869636536,
|
|
"step": 2430,
|
|
"valid_targets_mean": 7238.8,
|
|
"valid_targets_min": 5714
|
|
},
|
|
{
|
|
"epoch": 3.865079365079365,
|
|
"grad_norm": 0.37654617644272675,
|
|
"learning_rate": 1.9865440483275086e-05,
|
|
"loss": 0.2884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29796725511550903,
|
|
"step": 2435,
|
|
"valid_targets_mean": 7846.7,
|
|
"valid_targets_min": 5522
|
|
},
|
|
{
|
|
"epoch": 3.873015873015873,
|
|
"grad_norm": 0.382192979376078,
|
|
"learning_rate": 1.978629028113254e-05,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2928523123264313,
|
|
"step": 2440,
|
|
"valid_targets_mean": 6893.8,
|
|
"valid_targets_min": 5711
|
|
},
|
|
{
|
|
"epoch": 3.880952380952381,
|
|
"grad_norm": 0.38377710648822355,
|
|
"learning_rate": 1.9707143426341058e-05,
|
|
"loss": 0.2823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2841948866844177,
|
|
"step": 2445,
|
|
"valid_targets_mean": 6243.9,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 3.888888888888889,
|
|
"grad_norm": 0.35782959126168784,
|
|
"learning_rate": 1.962800115858364e-05,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2744080126285553,
|
|
"step": 2450,
|
|
"valid_targets_mean": 7208.4,
|
|
"valid_targets_min": 5399
|
|
},
|
|
{
|
|
"epoch": 3.8968253968253967,
|
|
"grad_norm": 0.33192666328526915,
|
|
"learning_rate": 1.9548864717471472e-05,
|
|
"loss": 0.2837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2663179039955139,
|
|
"step": 2455,
|
|
"valid_targets_mean": 6767.6,
|
|
"valid_targets_min": 5512
|
|
},
|
|
{
|
|
"epoch": 3.9047619047619047,
|
|
"grad_norm": 0.35071999846542384,
|
|
"learning_rate": 1.9469735342524454e-05,
|
|
"loss": 0.289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2909437417984009,
|
|
"step": 2460,
|
|
"valid_targets_mean": 6758.3,
|
|
"valid_targets_min": 5890
|
|
},
|
|
{
|
|
"epoch": 3.9126984126984126,
|
|
"grad_norm": 0.36560989375429104,
|
|
"learning_rate": 1.939061427315179e-05,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28445857763290405,
|
|
"step": 2465,
|
|
"valid_targets_mean": 6673.5,
|
|
"valid_targets_min": 6010
|
|
},
|
|
{
|
|
"epoch": 3.9206349206349205,
|
|
"grad_norm": 0.35531489620879153,
|
|
"learning_rate": 1.931150274863265e-05,
|
|
"loss": 0.2829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2744136452674866,
|
|
"step": 2470,
|
|
"valid_targets_mean": 6795.2,
|
|
"valid_targets_min": 5786
|
|
},
|
|
{
|
|
"epoch": 3.928571428571429,
|
|
"grad_norm": 0.3387369739289266,
|
|
"learning_rate": 1.9232402008096643e-05,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2611912190914154,
|
|
"step": 2475,
|
|
"valid_targets_mean": 7429.9,
|
|
"valid_targets_min": 5788
|
|
},
|
|
{
|
|
"epoch": 3.9365079365079367,
|
|
"grad_norm": 0.34690391077590477,
|
|
"learning_rate": 1.9153313290504495e-05,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2782368063926697,
|
|
"step": 2480,
|
|
"valid_targets_mean": 6753.2,
|
|
"valid_targets_min": 5924
|
|
},
|
|
{
|
|
"epoch": 3.9444444444444446,
|
|
"grad_norm": 0.3485953105124751,
|
|
"learning_rate": 1.9074237834628623e-05,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28297412395477295,
|
|
"step": 2485,
|
|
"valid_targets_mean": 6714.7,
|
|
"valid_targets_min": 1053
|
|
},
|
|
{
|
|
"epoch": 3.9523809523809526,
|
|
"grad_norm": 0.31003252703812095,
|
|
"learning_rate": 1.8995176879033698e-05,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2699219584465027,
|
|
"step": 2490,
|
|
"valid_targets_mean": 7464.6,
|
|
"valid_targets_min": 6101
|
|
},
|
|
{
|
|
"epoch": 3.9603174603174605,
|
|
"grad_norm": 0.33323718352747117,
|
|
"learning_rate": 1.89161316620573e-05,
|
|
"loss": 0.2811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29058215022087097,
|
|
"step": 2495,
|
|
"valid_targets_mean": 7132.1,
|
|
"valid_targets_min": 6163
|
|
},
|
|
{
|
|
"epoch": 3.9682539682539684,
|
|
"grad_norm": 0.3650101325307968,
|
|
"learning_rate": 1.8837103421790486e-05,
|
|
"loss": 0.289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2873576283454895,
|
|
"step": 2500,
|
|
"valid_targets_mean": 6706.6,
|
|
"valid_targets_min": 6038
|
|
},
|
|
{
|
|
"epoch": 3.9761904761904763,
|
|
"grad_norm": 0.351374962157887,
|
|
"learning_rate": 1.8758093396058386e-05,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26560264825820923,
|
|
"step": 2505,
|
|
"valid_targets_mean": 6826.4,
|
|
"valid_targets_min": 4870
|
|
},
|
|
{
|
|
"epoch": 3.984126984126984,
|
|
"grad_norm": 0.3480996416133145,
|
|
"learning_rate": 1.8679102822400874e-05,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28964635729789734,
|
|
"step": 2510,
|
|
"valid_targets_mean": 6804.9,
|
|
"valid_targets_min": 5946
|
|
},
|
|
{
|
|
"epoch": 3.992063492063492,
|
|
"grad_norm": 0.3649332629474215,
|
|
"learning_rate": 1.8600132938053098e-05,
|
|
"loss": 0.2854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27446383237838745,
|
|
"step": 2515,
|
|
"valid_targets_mean": 6677.0,
|
|
"valid_targets_min": 6039
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"grad_norm": 0.3602129959940527,
|
|
"learning_rate": 1.8521184979926177e-05,
|
|
"loss": 0.2866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28203096985816956,
|
|
"step": 2520,
|
|
"valid_targets_mean": 6337.8,
|
|
"valid_targets_min": 997
|
|
},
|
|
{
|
|
"epoch": 4.007936507936508,
|
|
"grad_norm": 0.3738818996135606,
|
|
"learning_rate": 1.8442260184587804e-05,
|
|
"loss": 0.284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29567456245422363,
|
|
"step": 2525,
|
|
"valid_targets_mean": 6790.1,
|
|
"valid_targets_min": 5786
|
|
},
|
|
{
|
|
"epoch": 4.015873015873016,
|
|
"grad_norm": 0.33659630477583885,
|
|
"learning_rate": 1.8363359788242842e-05,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26767486333847046,
|
|
"step": 2530,
|
|
"valid_targets_mean": 6689.0,
|
|
"valid_targets_min": 6100
|
|
},
|
|
{
|
|
"epoch": 4.023809523809524,
|
|
"grad_norm": 0.2884562305666483,
|
|
"learning_rate": 1.8284485026714013e-05,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2555672824382782,
|
|
"step": 2535,
|
|
"valid_targets_mean": 8353.6,
|
|
"valid_targets_min": 6102
|
|
},
|
|
{
|
|
"epoch": 4.031746031746032,
|
|
"grad_norm": 0.32552321781125193,
|
|
"learning_rate": 1.8205637135422525e-05,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2688191831111908,
|
|
"step": 2540,
|
|
"valid_targets_mean": 7086.2,
|
|
"valid_targets_min": 6315
|
|
},
|
|
{
|
|
"epoch": 4.0396825396825395,
|
|
"grad_norm": 0.3432887887198242,
|
|
"learning_rate": 1.8126817349368697e-05,
|
|
"loss": 0.275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.276351660490036,
|
|
"step": 2545,
|
|
"valid_targets_mean": 6623.9,
|
|
"valid_targets_min": 5638
|
|
},
|
|
{
|
|
"epoch": 4.0476190476190474,
|
|
"grad_norm": 0.3334802241819227,
|
|
"learning_rate": 1.8048026903112632e-05,
|
|
"loss": 0.2729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2845083177089691,
|
|
"step": 2550,
|
|
"valid_targets_mean": 6838.1,
|
|
"valid_targets_min": 6137
|
|
},
|
|
{
|
|
"epoch": 4.055555555555555,
|
|
"grad_norm": 0.3454670124430183,
|
|
"learning_rate": 1.7969267030754903e-05,
|
|
"loss": 0.2766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2850227952003479,
|
|
"step": 2555,
|
|
"valid_targets_mean": 6856.4,
|
|
"valid_targets_min": 6002
|
|
},
|
|
{
|
|
"epoch": 4.063492063492063,
|
|
"grad_norm": 0.351674356528408,
|
|
"learning_rate": 1.7890538965917184e-05,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2673512101173401,
|
|
"step": 2560,
|
|
"valid_targets_mean": 7477.4,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 4.071428571428571,
|
|
"grad_norm": 0.36708610941538633,
|
|
"learning_rate": 1.7811843941722952e-05,
|
|
"loss": 0.2848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2781935930252075,
|
|
"step": 2565,
|
|
"valid_targets_mean": 6754.1,
|
|
"valid_targets_min": 5968
|
|
},
|
|
{
|
|
"epoch": 4.079365079365079,
|
|
"grad_norm": 0.3561075868003765,
|
|
"learning_rate": 1.7733183190778174e-05,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27610060572624207,
|
|
"step": 2570,
|
|
"valid_targets_mean": 6701.4,
|
|
"valid_targets_min": 5804
|
|
},
|
|
{
|
|
"epoch": 4.087301587301587,
|
|
"grad_norm": 0.3680403124727013,
|
|
"learning_rate": 1.7654557945151968e-05,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28913071751594543,
|
|
"step": 2575,
|
|
"valid_targets_mean": 7357.1,
|
|
"valid_targets_min": 5201
|
|
},
|
|
{
|
|
"epoch": 4.095238095238095,
|
|
"grad_norm": 0.36620827937989026,
|
|
"learning_rate": 1.7575969436357352e-05,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28242844343185425,
|
|
"step": 2580,
|
|
"valid_targets_mean": 6871.2,
|
|
"valid_targets_min": 5898
|
|
},
|
|
{
|
|
"epoch": 4.103174603174603,
|
|
"grad_norm": 0.3291598013705393,
|
|
"learning_rate": 1.7497418895331934e-05,
|
|
"loss": 0.2713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2662534713745117,
|
|
"step": 2585,
|
|
"valid_targets_mean": 7544.7,
|
|
"valid_targets_min": 5977
|
|
},
|
|
{
|
|
"epoch": 4.111111111111111,
|
|
"grad_norm": 0.3846255730626335,
|
|
"learning_rate": 1.7418907552418597e-05,
|
|
"loss": 0.282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2932029962539673,
|
|
"step": 2590,
|
|
"valid_targets_mean": 6805.8,
|
|
"valid_targets_min": 6135
|
|
},
|
|
{
|
|
"epoch": 4.119047619047619,
|
|
"grad_norm": 0.3570777351842093,
|
|
"learning_rate": 1.7340436637346315e-05,
|
|
"loss": 0.2857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3036203384399414,
|
|
"step": 2595,
|
|
"valid_targets_mean": 6784.1,
|
|
"valid_targets_min": 6071
|
|
},
|
|
{
|
|
"epoch": 4.1269841269841265,
|
|
"grad_norm": 0.3504236292263725,
|
|
"learning_rate": 1.726200737921079e-05,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28338032960891724,
|
|
"step": 2600,
|
|
"valid_targets_mean": 6774.2,
|
|
"valid_targets_min": 6185
|
|
},
|
|
{
|
|
"epoch": 4.134920634920635,
|
|
"grad_norm": 0.3697739074731332,
|
|
"learning_rate": 1.718362100645527e-05,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28275614976882935,
|
|
"step": 2605,
|
|
"valid_targets_mean": 6163.8,
|
|
"valid_targets_min": 1273
|
|
},
|
|
{
|
|
"epoch": 4.142857142857143,
|
|
"grad_norm": 0.35767149069118115,
|
|
"learning_rate": 1.710527874685129e-05,
|
|
"loss": 0.2774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27221107482910156,
|
|
"step": 2610,
|
|
"valid_targets_mean": 6815.8,
|
|
"valid_targets_min": 6283
|
|
},
|
|
{
|
|
"epoch": 4.150793650793651,
|
|
"grad_norm": 0.381630153537236,
|
|
"learning_rate": 1.702698182747942e-05,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2813687324523926,
|
|
"step": 2615,
|
|
"valid_targets_mean": 6500.3,
|
|
"valid_targets_min": 3205
|
|
},
|
|
{
|
|
"epoch": 4.158730158730159,
|
|
"grad_norm": 0.33027967841404343,
|
|
"learning_rate": 1.6948731474710075e-05,
|
|
"loss": 0.2731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25854647159576416,
|
|
"step": 2620,
|
|
"valid_targets_mean": 6868.4,
|
|
"valid_targets_min": 3817
|
|
},
|
|
{
|
|
"epoch": 4.166666666666667,
|
|
"grad_norm": 0.3441008767346702,
|
|
"learning_rate": 1.68705289141843e-05,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27836495637893677,
|
|
"step": 2625,
|
|
"valid_targets_mean": 6605.9,
|
|
"valid_targets_min": 1453
|
|
},
|
|
{
|
|
"epoch": 4.174603174603175,
|
|
"grad_norm": 0.3807436704842077,
|
|
"learning_rate": 1.679237537079454e-05,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3011740744113922,
|
|
"step": 2630,
|
|
"valid_targets_mean": 6904.2,
|
|
"valid_targets_min": 6261
|
|
},
|
|
{
|
|
"epoch": 4.182539682539683,
|
|
"grad_norm": 0.35685398372800026,
|
|
"learning_rate": 1.6714272068665526e-05,
|
|
"loss": 0.2702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26164132356643677,
|
|
"step": 2635,
|
|
"valid_targets_mean": 6742.5,
|
|
"valid_targets_min": 6215
|
|
},
|
|
{
|
|
"epoch": 4.190476190476191,
|
|
"grad_norm": 0.3577189142400484,
|
|
"learning_rate": 1.663622023113501e-05,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.275162935256958,
|
|
"step": 2640,
|
|
"valid_targets_mean": 6857.2,
|
|
"valid_targets_min": 6164
|
|
},
|
|
{
|
|
"epoch": 4.198412698412699,
|
|
"grad_norm": 0.36584609766366044,
|
|
"learning_rate": 1.655822108073467e-05,
|
|
"loss": 0.2793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2753656208515167,
|
|
"step": 2645,
|
|
"valid_targets_mean": 7389.6,
|
|
"valid_targets_min": 6111
|
|
},
|
|
{
|
|
"epoch": 4.2063492063492065,
|
|
"grad_norm": 0.34716793539879914,
|
|
"learning_rate": 1.648027583917095e-05,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2808184027671814,
|
|
"step": 2650,
|
|
"valid_targets_mean": 6578.2,
|
|
"valid_targets_min": 5740
|
|
},
|
|
{
|
|
"epoch": 4.214285714285714,
|
|
"grad_norm": 0.3777588177175829,
|
|
"learning_rate": 1.640238572730591e-05,
|
|
"loss": 0.2891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3077867031097412,
|
|
"step": 2655,
|
|
"valid_targets_mean": 6850.8,
|
|
"valid_targets_min": 5696
|
|
},
|
|
{
|
|
"epoch": 4.222222222222222,
|
|
"grad_norm": 0.3383331525631053,
|
|
"learning_rate": 1.632455196513809e-05,
|
|
"loss": 0.2825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28449374437332153,
|
|
"step": 2660,
|
|
"valid_targets_mean": 6602.4,
|
|
"valid_targets_min": 5840
|
|
},
|
|
{
|
|
"epoch": 4.23015873015873,
|
|
"grad_norm": 0.3568537950686811,
|
|
"learning_rate": 1.624677577178345e-05,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26558178663253784,
|
|
"step": 2665,
|
|
"valid_targets_mean": 6927.5,
|
|
"valid_targets_min": 6178
|
|
},
|
|
{
|
|
"epoch": 4.238095238095238,
|
|
"grad_norm": 0.33723371140443054,
|
|
"learning_rate": 1.616905836545624e-05,
|
|
"loss": 0.2772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28151506185531616,
|
|
"step": 2670,
|
|
"valid_targets_mean": 6251.4,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 4.246031746031746,
|
|
"grad_norm": 0.34401470170525944,
|
|
"learning_rate": 1.6091400963449894e-05,
|
|
"loss": 0.2805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27869611978530884,
|
|
"step": 2675,
|
|
"valid_targets_mean": 6795.4,
|
|
"valid_targets_min": 5556
|
|
},
|
|
{
|
|
"epoch": 4.253968253968254,
|
|
"grad_norm": 0.32962006545392486,
|
|
"learning_rate": 1.6013804782118043e-05,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26756420731544495,
|
|
"step": 2680,
|
|
"valid_targets_mean": 6725.4,
|
|
"valid_targets_min": 6145
|
|
},
|
|
{
|
|
"epoch": 4.261904761904762,
|
|
"grad_norm": 0.37490151654538306,
|
|
"learning_rate": 1.5936271036855372e-05,
|
|
"loss": 0.2823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2836240828037262,
|
|
"step": 2685,
|
|
"valid_targets_mean": 6745.5,
|
|
"valid_targets_min": 5770
|
|
},
|
|
{
|
|
"epoch": 4.26984126984127,
|
|
"grad_norm": 0.36868351161463264,
|
|
"learning_rate": 1.585880094207864e-05,
|
|
"loss": 0.2784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26008307933807373,
|
|
"step": 2690,
|
|
"valid_targets_mean": 6658.9,
|
|
"valid_targets_min": 5575
|
|
},
|
|
{
|
|
"epoch": 4.277777777777778,
|
|
"grad_norm": 0.3343647049075332,
|
|
"learning_rate": 1.5781395711207664e-05,
|
|
"loss": 0.2816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2714908719062805,
|
|
"step": 2695,
|
|
"valid_targets_mean": 6678.2,
|
|
"valid_targets_min": 5758
|
|
},
|
|
{
|
|
"epoch": 4.285714285714286,
|
|
"grad_norm": 0.32852862635696184,
|
|
"learning_rate": 1.5704056556646255e-05,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28126946091651917,
|
|
"step": 2700,
|
|
"valid_targets_mean": 6714.4,
|
|
"valid_targets_min": 5839
|
|
},
|
|
{
|
|
"epoch": 4.2936507936507935,
|
|
"grad_norm": 0.3614462057130427,
|
|
"learning_rate": 1.562678468976329e-05,
|
|
"loss": 0.2813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28286993503570557,
|
|
"step": 2705,
|
|
"valid_targets_mean": 6417.3,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 4.301587301587301,
|
|
"grad_norm": 0.36689896042453257,
|
|
"learning_rate": 1.5549581320873715e-05,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28190481662750244,
|
|
"step": 2710,
|
|
"valid_targets_mean": 6498.0,
|
|
"valid_targets_min": 4754
|
|
},
|
|
{
|
|
"epoch": 4.309523809523809,
|
|
"grad_norm": 0.34316275362279547,
|
|
"learning_rate": 1.5472447659219573e-05,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28224173188209534,
|
|
"step": 2715,
|
|
"valid_targets_mean": 6613.8,
|
|
"valid_targets_min": 6184
|
|
},
|
|
{
|
|
"epoch": 4.317460317460317,
|
|
"grad_norm": 0.34383830510905494,
|
|
"learning_rate": 1.5395384912951096e-05,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27297505736351013,
|
|
"step": 2720,
|
|
"valid_targets_mean": 6719.1,
|
|
"valid_targets_min": 5675
|
|
},
|
|
{
|
|
"epoch": 4.325396825396825,
|
|
"grad_norm": 0.344056145734644,
|
|
"learning_rate": 1.531839428910774e-05,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27190646529197693,
|
|
"step": 2725,
|
|
"valid_targets_mean": 6744.2,
|
|
"valid_targets_min": 6102
|
|
},
|
|
{
|
|
"epoch": 4.333333333333333,
|
|
"grad_norm": 0.3592322986076419,
|
|
"learning_rate": 1.5241476993599318e-05,
|
|
"loss": 0.2782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2834925651550293,
|
|
"step": 2730,
|
|
"valid_targets_mean": 6188.3,
|
|
"valid_targets_min": 1031
|
|
},
|
|
{
|
|
"epoch": 4.341269841269841,
|
|
"grad_norm": 0.39622692071049276,
|
|
"learning_rate": 1.5164634231187106e-05,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2778765559196472,
|
|
"step": 2735,
|
|
"valid_targets_mean": 6733.4,
|
|
"valid_targets_min": 5983
|
|
},
|
|
{
|
|
"epoch": 4.349206349206349,
|
|
"grad_norm": 0.35308295018924957,
|
|
"learning_rate": 1.5087867205464933e-05,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.287126362323761,
|
|
"step": 2740,
|
|
"valid_targets_mean": 6595.4,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 4.357142857142857,
|
|
"grad_norm": 0.3527027704458163,
|
|
"learning_rate": 1.5011177118840376e-05,
|
|
"loss": 0.2794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2564517855644226,
|
|
"step": 2745,
|
|
"valid_targets_mean": 6963.6,
|
|
"valid_targets_min": 5691
|
|
},
|
|
{
|
|
"epoch": 4.365079365079365,
|
|
"grad_norm": 0.3214116614993063,
|
|
"learning_rate": 1.4934565172515917e-05,
|
|
"loss": 0.283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2741885781288147,
|
|
"step": 2750,
|
|
"valid_targets_mean": 7264.9,
|
|
"valid_targets_min": 5892
|
|
},
|
|
{
|
|
"epoch": 4.3730158730158735,
|
|
"grad_norm": 0.3341322654229478,
|
|
"learning_rate": 1.4858032566470107e-05,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2765420079231262,
|
|
"step": 2755,
|
|
"valid_targets_mean": 6824.6,
|
|
"valid_targets_min": 5999
|
|
},
|
|
{
|
|
"epoch": 4.380952380952381,
|
|
"grad_norm": 0.3415197785628832,
|
|
"learning_rate": 1.4781580499438794e-05,
|
|
"loss": 0.2753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2803388833999634,
|
|
"step": 2760,
|
|
"valid_targets_mean": 6031.8,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 4.388888888888889,
|
|
"grad_norm": 0.34144641879957593,
|
|
"learning_rate": 1.4705210168896327e-05,
|
|
"loss": 0.2754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2839279770851135,
|
|
"step": 2765,
|
|
"valid_targets_mean": 6826.1,
|
|
"valid_targets_min": 6370
|
|
},
|
|
{
|
|
"epoch": 4.396825396825397,
|
|
"grad_norm": 0.34887848623366835,
|
|
"learning_rate": 1.462892277103681e-05,
|
|
"loss": 0.2863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.289436399936676,
|
|
"step": 2770,
|
|
"valid_targets_mean": 7288.1,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 4.404761904761905,
|
|
"grad_norm": 0.3319744880056703,
|
|
"learning_rate": 1.455271950075539e-05,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2769818603992462,
|
|
"step": 2775,
|
|
"valid_targets_mean": 7413.7,
|
|
"valid_targets_min": 6318
|
|
},
|
|
{
|
|
"epoch": 4.412698412698413,
|
|
"grad_norm": 0.34784372116820667,
|
|
"learning_rate": 1.4476601551629493e-05,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28152358531951904,
|
|
"step": 2780,
|
|
"valid_targets_mean": 6921.0,
|
|
"valid_targets_min": 1190
|
|
},
|
|
{
|
|
"epoch": 4.420634920634921,
|
|
"grad_norm": 0.35972325398655464,
|
|
"learning_rate": 1.4400570115900147e-05,
|
|
"loss": 0.2733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27402442693710327,
|
|
"step": 2785,
|
|
"valid_targets_mean": 6010.1,
|
|
"valid_targets_min": 887
|
|
},
|
|
{
|
|
"epoch": 4.428571428571429,
|
|
"grad_norm": 0.3706021372706739,
|
|
"learning_rate": 1.4324626384453345e-05,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.261771559715271,
|
|
"step": 2790,
|
|
"valid_targets_mean": 6700.4,
|
|
"valid_targets_min": 5627
|
|
},
|
|
{
|
|
"epoch": 4.436507936507937,
|
|
"grad_norm": 0.32865584138339055,
|
|
"learning_rate": 1.4248771546801339e-05,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2805875241756439,
|
|
"step": 2795,
|
|
"valid_targets_mean": 7139.8,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 4.444444444444445,
|
|
"grad_norm": 0.32566805608129223,
|
|
"learning_rate": 1.4173006791064023e-05,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2694135904312134,
|
|
"step": 2800,
|
|
"valid_targets_mean": 6892.6,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 4.4523809523809526,
|
|
"grad_norm": 0.3442728897834653,
|
|
"learning_rate": 1.4097333303950368e-05,
|
|
"loss": 0.2765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2591538727283478,
|
|
"step": 2805,
|
|
"valid_targets_mean": 6612.6,
|
|
"valid_targets_min": 4563
|
|
},
|
|
{
|
|
"epoch": 4.4603174603174605,
|
|
"grad_norm": 0.3555360546309791,
|
|
"learning_rate": 1.4021752270739759e-05,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.295727014541626,
|
|
"step": 2810,
|
|
"valid_targets_mean": 6742.2,
|
|
"valid_targets_min": 5761
|
|
},
|
|
{
|
|
"epoch": 4.468253968253968,
|
|
"grad_norm": 0.3255566264469316,
|
|
"learning_rate": 1.3946264875263485e-05,
|
|
"loss": 0.2655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.272371381521225,
|
|
"step": 2815,
|
|
"valid_targets_mean": 6372.4,
|
|
"valid_targets_min": 1026
|
|
},
|
|
{
|
|
"epoch": 4.476190476190476,
|
|
"grad_norm": 0.3551072332135445,
|
|
"learning_rate": 1.3870872299886184e-05,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27101507782936096,
|
|
"step": 2820,
|
|
"valid_targets_mean": 6295.2,
|
|
"valid_targets_min": 1173
|
|
},
|
|
{
|
|
"epoch": 4.484126984126984,
|
|
"grad_norm": 0.3750262477871447,
|
|
"learning_rate": 1.3795575725487303e-05,
|
|
"loss": 0.2746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.282275527715683,
|
|
"step": 2825,
|
|
"valid_targets_mean": 6870.9,
|
|
"valid_targets_min": 6180
|
|
},
|
|
{
|
|
"epoch": 4.492063492063492,
|
|
"grad_norm": 0.3519411178520911,
|
|
"learning_rate": 1.3720376331442652e-05,
|
|
"loss": 0.2844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26568925380706787,
|
|
"step": 2830,
|
|
"valid_targets_mean": 6193.5,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 4.5,
|
|
"grad_norm": 0.3534333075377457,
|
|
"learning_rate": 1.364527529560586e-05,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2830936312675476,
|
|
"step": 2835,
|
|
"valid_targets_mean": 6957.4,
|
|
"valid_targets_min": 6087
|
|
},
|
|
{
|
|
"epoch": 4.507936507936508,
|
|
"grad_norm": 0.2765381717847923,
|
|
"learning_rate": 1.3570273794289978e-05,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2551390528678894,
|
|
"step": 2840,
|
|
"valid_targets_mean": 8466.3,
|
|
"valid_targets_min": 5962
|
|
},
|
|
{
|
|
"epoch": 4.515873015873016,
|
|
"grad_norm": 0.35532903607896826,
|
|
"learning_rate": 1.3495373002249061e-05,
|
|
"loss": 0.2811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2869076728820801,
|
|
"step": 2845,
|
|
"valid_targets_mean": 6751.3,
|
|
"valid_targets_min": 6090
|
|
},
|
|
{
|
|
"epoch": 4.523809523809524,
|
|
"grad_norm": 0.3117542148215137,
|
|
"learning_rate": 1.3420574092659713e-05,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2599877119064331,
|
|
"step": 2850,
|
|
"valid_targets_mean": 7623.4,
|
|
"valid_targets_min": 5932
|
|
},
|
|
{
|
|
"epoch": 4.531746031746032,
|
|
"grad_norm": 0.36239618036830007,
|
|
"learning_rate": 1.3345878237102766e-05,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2802869379520416,
|
|
"step": 2855,
|
|
"valid_targets_mean": 6769.9,
|
|
"valid_targets_min": 4327
|
|
},
|
|
{
|
|
"epoch": 4.5396825396825395,
|
|
"grad_norm": 0.3346747541679857,
|
|
"learning_rate": 1.3271286605544906e-05,
|
|
"loss": 0.2804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2864949405193329,
|
|
"step": 2860,
|
|
"valid_targets_mean": 6749.3,
|
|
"valid_targets_min": 6108
|
|
},
|
|
{
|
|
"epoch": 4.5476190476190474,
|
|
"grad_norm": 0.3344770754739249,
|
|
"learning_rate": 1.3196800366320357e-05,
|
|
"loss": 0.2765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27994322776794434,
|
|
"step": 2865,
|
|
"valid_targets_mean": 7336.7,
|
|
"valid_targets_min": 2123
|
|
},
|
|
{
|
|
"epoch": 4.555555555555555,
|
|
"grad_norm": 0.3172586087503977,
|
|
"learning_rate": 1.3122420686112554e-05,
|
|
"loss": 0.2834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2791077196598053,
|
|
"step": 2870,
|
|
"valid_targets_mean": 7057.9,
|
|
"valid_targets_min": 5740
|
|
},
|
|
{
|
|
"epoch": 4.563492063492063,
|
|
"grad_norm": 0.33733405662972454,
|
|
"learning_rate": 1.3048148729935917e-05,
|
|
"loss": 0.2789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.278312087059021,
|
|
"step": 2875,
|
|
"valid_targets_mean": 6818.2,
|
|
"valid_targets_min": 6147
|
|
},
|
|
{
|
|
"epoch": 4.571428571428571,
|
|
"grad_norm": 0.32590351514422466,
|
|
"learning_rate": 1.297398566111756e-05,
|
|
"loss": 0.2711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26736336946487427,
|
|
"step": 2880,
|
|
"valid_targets_mean": 7638.9,
|
|
"valid_targets_min": 5945
|
|
},
|
|
{
|
|
"epoch": 4.579365079365079,
|
|
"grad_norm": 0.32359648237313043,
|
|
"learning_rate": 1.2899932641279082e-05,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2800748944282532,
|
|
"step": 2885,
|
|
"valid_targets_mean": 7671.9,
|
|
"valid_targets_min": 5818
|
|
},
|
|
{
|
|
"epoch": 4.587301587301587,
|
|
"grad_norm": 0.3484154427606534,
|
|
"learning_rate": 1.2825990830318395e-05,
|
|
"loss": 0.2706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26860713958740234,
|
|
"step": 2890,
|
|
"valid_targets_mean": 6941.8,
|
|
"valid_targets_min": 6005
|
|
},
|
|
{
|
|
"epoch": 4.595238095238095,
|
|
"grad_norm": 0.32333409822653625,
|
|
"learning_rate": 1.2752161386391526e-05,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27400338649749756,
|
|
"step": 2895,
|
|
"valid_targets_mean": 6556.2,
|
|
"valid_targets_min": 5260
|
|
},
|
|
{
|
|
"epoch": 4.603174603174603,
|
|
"grad_norm": 0.3464888747034742,
|
|
"learning_rate": 1.2678445465894491e-05,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2608845829963684,
|
|
"step": 2900,
|
|
"valid_targets_mean": 7558.2,
|
|
"valid_targets_min": 5051
|
|
},
|
|
{
|
|
"epoch": 4.611111111111111,
|
|
"grad_norm": 0.35743764648178616,
|
|
"learning_rate": 1.2604844223445181e-05,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28071358799934387,
|
|
"step": 2905,
|
|
"valid_targets_mean": 6494.7,
|
|
"valid_targets_min": 5843
|
|
},
|
|
{
|
|
"epoch": 4.619047619047619,
|
|
"grad_norm": 0.3306474715643804,
|
|
"learning_rate": 1.2531358811865268e-05,
|
|
"loss": 0.2724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26603472232818604,
|
|
"step": 2910,
|
|
"valid_targets_mean": 7092.6,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 4.6269841269841265,
|
|
"grad_norm": 0.34390090020566166,
|
|
"learning_rate": 1.2457990382162173e-05,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2714117169380188,
|
|
"step": 2915,
|
|
"valid_targets_mean": 6658.8,
|
|
"valid_targets_min": 5786
|
|
},
|
|
{
|
|
"epoch": 4.634920634920634,
|
|
"grad_norm": 0.3388719192671874,
|
|
"learning_rate": 1.238474008351101e-05,
|
|
"loss": 0.2739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2836211323738098,
|
|
"step": 2920,
|
|
"valid_targets_mean": 7085.6,
|
|
"valid_targets_min": 6087
|
|
},
|
|
{
|
|
"epoch": 4.642857142857143,
|
|
"grad_norm": 0.3487528631837358,
|
|
"learning_rate": 1.2311609063236594e-05,
|
|
"loss": 0.2738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2811795473098755,
|
|
"step": 2925,
|
|
"valid_targets_mean": 7380.8,
|
|
"valid_targets_min": 6014
|
|
},
|
|
{
|
|
"epoch": 4.650793650793651,
|
|
"grad_norm": 0.34357010686646894,
|
|
"learning_rate": 1.2238598466795493e-05,
|
|
"loss": 0.2808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2876550257205963,
|
|
"step": 2930,
|
|
"valid_targets_mean": 7258.0,
|
|
"valid_targets_min": 5637
|
|
},
|
|
{
|
|
"epoch": 4.658730158730159,
|
|
"grad_norm": 0.3306902044308892,
|
|
"learning_rate": 1.2165709437758042e-05,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28965097665786743,
|
|
"step": 2935,
|
|
"valid_targets_mean": 7837.8,
|
|
"valid_targets_min": 6338
|
|
},
|
|
{
|
|
"epoch": 4.666666666666667,
|
|
"grad_norm": 0.354529497091284,
|
|
"learning_rate": 1.209294311779047e-05,
|
|
"loss": 0.2785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2819896936416626,
|
|
"step": 2940,
|
|
"valid_targets_mean": 6963.2,
|
|
"valid_targets_min": 6274
|
|
},
|
|
{
|
|
"epoch": 4.674603174603175,
|
|
"grad_norm": 0.3386684093808595,
|
|
"learning_rate": 1.2020300646637018e-05,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27036818861961365,
|
|
"step": 2945,
|
|
"valid_targets_mean": 7055.8,
|
|
"valid_targets_min": 6158
|
|
},
|
|
{
|
|
"epoch": 4.682539682539683,
|
|
"grad_norm": 0.35130142409798576,
|
|
"learning_rate": 1.1947783162102043e-05,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2777625024318695,
|
|
"step": 2950,
|
|
"valid_targets_mean": 6507.8,
|
|
"valid_targets_min": 5314
|
|
},
|
|
{
|
|
"epoch": 4.690476190476191,
|
|
"grad_norm": 0.3364816316266462,
|
|
"learning_rate": 1.1875391800032248e-05,
|
|
"loss": 0.2797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26677021384239197,
|
|
"step": 2955,
|
|
"valid_targets_mean": 6097.0,
|
|
"valid_targets_min": 997
|
|
},
|
|
{
|
|
"epoch": 4.698412698412699,
|
|
"grad_norm": 0.3196634113483736,
|
|
"learning_rate": 1.1803127694298873e-05,
|
|
"loss": 0.2784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.254010945558548,
|
|
"step": 2960,
|
|
"valid_targets_mean": 6682.3,
|
|
"valid_targets_min": 6221
|
|
},
|
|
{
|
|
"epoch": 4.7063492063492065,
|
|
"grad_norm": 0.362375559790429,
|
|
"learning_rate": 1.173099197677992e-05,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2769344449043274,
|
|
"step": 2965,
|
|
"valid_targets_mean": 6716.1,
|
|
"valid_targets_min": 6284
|
|
},
|
|
{
|
|
"epoch": 4.714285714285714,
|
|
"grad_norm": 0.338512711064312,
|
|
"learning_rate": 1.1658985777342458e-05,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2758563756942749,
|
|
"step": 2970,
|
|
"valid_targets_mean": 6844.2,
|
|
"valid_targets_min": 6080
|
|
},
|
|
{
|
|
"epoch": 4.722222222222222,
|
|
"grad_norm": 0.3266578045462592,
|
|
"learning_rate": 1.1587110223824874e-05,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26141127943992615,
|
|
"step": 2975,
|
|
"valid_targets_mean": 7169.9,
|
|
"valid_targets_min": 5606
|
|
},
|
|
{
|
|
"epoch": 4.73015873015873,
|
|
"grad_norm": 0.37814257530040063,
|
|
"learning_rate": 1.151536644201925e-05,
|
|
"loss": 0.2775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2667842507362366,
|
|
"step": 2980,
|
|
"valid_targets_mean": 6421.6,
|
|
"valid_targets_min": 862
|
|
},
|
|
{
|
|
"epoch": 4.738095238095238,
|
|
"grad_norm": 0.32679827182407334,
|
|
"learning_rate": 1.1443755555653751e-05,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27391499280929565,
|
|
"step": 2985,
|
|
"valid_targets_mean": 8592.7,
|
|
"valid_targets_min": 5729
|
|
},
|
|
{
|
|
"epoch": 4.746031746031746,
|
|
"grad_norm": 0.2902846437599138,
|
|
"learning_rate": 1.1372278686374935e-05,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25189000368118286,
|
|
"step": 2990,
|
|
"valid_targets_mean": 7825.6,
|
|
"valid_targets_min": 6329
|
|
},
|
|
{
|
|
"epoch": 4.753968253968254,
|
|
"grad_norm": 0.3024133392778004,
|
|
"learning_rate": 1.1300936953730273e-05,
|
|
"loss": 0.2748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2436165064573288,
|
|
"step": 2995,
|
|
"valid_targets_mean": 6845.9,
|
|
"valid_targets_min": 5972
|
|
},
|
|
{
|
|
"epoch": 4.761904761904762,
|
|
"grad_norm": 0.30186997555906786,
|
|
"learning_rate": 1.1229731475150594e-05,
|
|
"loss": 0.2716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27404844760894775,
|
|
"step": 3000,
|
|
"valid_targets_mean": 7831.8,
|
|
"valid_targets_min": 6060
|
|
},
|
|
{
|
|
"epoch": 4.76984126984127,
|
|
"grad_norm": 0.3044608918901399,
|
|
"learning_rate": 1.1158663365932529e-05,
|
|
"loss": 0.2788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2712925672531128,
|
|
"step": 3005,
|
|
"valid_targets_mean": 7839.4,
|
|
"valid_targets_min": 6171
|
|
},
|
|
{
|
|
"epoch": 4.777777777777778,
|
|
"grad_norm": 0.3466720069414081,
|
|
"learning_rate": 1.1087733739221109e-05,
|
|
"loss": 0.2855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28900763392448425,
|
|
"step": 3010,
|
|
"valid_targets_mean": 6677.2,
|
|
"valid_targets_min": 4647
|
|
},
|
|
{
|
|
"epoch": 4.785714285714286,
|
|
"grad_norm": 0.33682463082958813,
|
|
"learning_rate": 1.1016943705992311e-05,
|
|
"loss": 0.2717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28141486644744873,
|
|
"step": 3015,
|
|
"valid_targets_mean": 6453.2,
|
|
"valid_targets_min": 3182
|
|
},
|
|
{
|
|
"epoch": 4.7936507936507935,
|
|
"grad_norm": 0.3499774906439442,
|
|
"learning_rate": 1.0946294375035639e-05,
|
|
"loss": 0.2779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29249894618988037,
|
|
"step": 3020,
|
|
"valid_targets_mean": 6304.6,
|
|
"valid_targets_min": 624
|
|
},
|
|
{
|
|
"epoch": 4.801587301587301,
|
|
"grad_norm": 0.3430075539691763,
|
|
"learning_rate": 1.087578685293674e-05,
|
|
"loss": 0.2751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2803530693054199,
|
|
"step": 3025,
|
|
"valid_targets_mean": 6753.1,
|
|
"valid_targets_min": 6306
|
|
},
|
|
{
|
|
"epoch": 4.809523809523809,
|
|
"grad_norm": 0.34572404470773627,
|
|
"learning_rate": 1.080542224406015e-05,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27616608142852783,
|
|
"step": 3030,
|
|
"valid_targets_mean": 6765.9,
|
|
"valid_targets_min": 6010
|
|
},
|
|
{
|
|
"epoch": 4.817460317460317,
|
|
"grad_norm": 0.34415956960344674,
|
|
"learning_rate": 1.0735201650531915e-05,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2876840829849243,
|
|
"step": 3035,
|
|
"valid_targets_mean": 6845.6,
|
|
"valid_targets_min": 6214
|
|
},
|
|
{
|
|
"epoch": 4.825396825396825,
|
|
"grad_norm": 0.3190235826366181,
|
|
"learning_rate": 1.066512617222235e-05,
|
|
"loss": 0.2754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2785532474517822,
|
|
"step": 3040,
|
|
"valid_targets_mean": 7328.1,
|
|
"valid_targets_min": 1935
|
|
},
|
|
{
|
|
"epoch": 4.833333333333333,
|
|
"grad_norm": 0.35229042488932955,
|
|
"learning_rate": 1.059519690672884e-05,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29433807730674744,
|
|
"step": 3045,
|
|
"valid_targets_mean": 6873.1,
|
|
"valid_targets_min": 6062
|
|
},
|
|
{
|
|
"epoch": 4.841269841269841,
|
|
"grad_norm": 0.35937847180469645,
|
|
"learning_rate": 1.0525414949358614e-05,
|
|
"loss": 0.2788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2781003415584564,
|
|
"step": 3050,
|
|
"valid_targets_mean": 6887.3,
|
|
"valid_targets_min": 6136
|
|
},
|
|
{
|
|
"epoch": 4.849206349206349,
|
|
"grad_norm": 0.34167399996074727,
|
|
"learning_rate": 1.0455781393111613e-05,
|
|
"loss": 0.2781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28492891788482666,
|
|
"step": 3055,
|
|
"valid_targets_mean": 6619.8,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 4.857142857142857,
|
|
"grad_norm": 0.34729897600094706,
|
|
"learning_rate": 1.0386297328663353e-05,
|
|
"loss": 0.2724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2784905731678009,
|
|
"step": 3060,
|
|
"valid_targets_mean": 6423.9,
|
|
"valid_targets_min": 881
|
|
},
|
|
{
|
|
"epoch": 4.865079365079366,
|
|
"grad_norm": 0.30813565074420535,
|
|
"learning_rate": 1.0316963844347843e-05,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26624542474746704,
|
|
"step": 3065,
|
|
"valid_targets_mean": 7444.2,
|
|
"valid_targets_min": 5765
|
|
},
|
|
{
|
|
"epoch": 4.8730158730158735,
|
|
"grad_norm": 0.30357810200868,
|
|
"learning_rate": 1.0247782026140576e-05,
|
|
"loss": 0.2746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2707095742225647,
|
|
"step": 3070,
|
|
"valid_targets_mean": 7447.4,
|
|
"valid_targets_min": 5964
|
|
},
|
|
{
|
|
"epoch": 4.880952380952381,
|
|
"grad_norm": 0.33243923957301247,
|
|
"learning_rate": 1.017875295764144e-05,
|
|
"loss": 0.2876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28286096453666687,
|
|
"step": 3075,
|
|
"valid_targets_mean": 6845.8,
|
|
"valid_targets_min": 6247
|
|
},
|
|
{
|
|
"epoch": 4.888888888888889,
|
|
"grad_norm": 0.32827569117455146,
|
|
"learning_rate": 1.0109877720057818e-05,
|
|
"loss": 0.2754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2795287072658539,
|
|
"step": 3080,
|
|
"valid_targets_mean": 6924.8,
|
|
"valid_targets_min": 6236
|
|
},
|
|
{
|
|
"epoch": 4.896825396825397,
|
|
"grad_norm": 0.3511641000921196,
|
|
"learning_rate": 1.0041157392187651e-05,
|
|
"loss": 0.2772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28283190727233887,
|
|
"step": 3085,
|
|
"valid_targets_mean": 6995.4,
|
|
"valid_targets_min": 5909
|
|
},
|
|
{
|
|
"epoch": 4.904761904761905,
|
|
"grad_norm": 0.3208380885233844,
|
|
"learning_rate": 9.972593050402471e-06,
|
|
"loss": 0.2797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2625943422317505,
|
|
"step": 3090,
|
|
"valid_targets_mean": 6923.7,
|
|
"valid_targets_min": 6457
|
|
},
|
|
{
|
|
"epoch": 4.912698412698413,
|
|
"grad_norm": 0.33246093359701406,
|
|
"learning_rate": 9.904185768630612e-06,
|
|
"loss": 0.2824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28158771991729736,
|
|
"step": 3095,
|
|
"valid_targets_mean": 6922.7,
|
|
"valid_targets_min": 5997
|
|
},
|
|
{
|
|
"epoch": 4.920634920634921,
|
|
"grad_norm": 0.3591496687972443,
|
|
"learning_rate": 9.835936618340377e-06,
|
|
"loss": 0.2713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2596362233161926,
|
|
"step": 3100,
|
|
"valid_targets_mean": 6590.8,
|
|
"valid_targets_min": 5947
|
|
},
|
|
{
|
|
"epoch": 4.928571428571429,
|
|
"grad_norm": 0.32086976008136986,
|
|
"learning_rate": 9.76784666852323e-06,
|
|
"loss": 0.2773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2809128165245056,
|
|
"step": 3105,
|
|
"valid_targets_mean": 7477.1,
|
|
"valid_targets_min": 5716
|
|
},
|
|
{
|
|
"epoch": 4.936507936507937,
|
|
"grad_norm": 0.35768966240923133,
|
|
"learning_rate": 9.699916985677062e-06,
|
|
"loss": 0.2748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2672625780105591,
|
|
"step": 3110,
|
|
"valid_targets_mean": 6806.3,
|
|
"valid_targets_min": 6073
|
|
},
|
|
{
|
|
"epoch": 4.944444444444445,
|
|
"grad_norm": 0.3724907458982945,
|
|
"learning_rate": 9.6321486337895e-06,
|
|
"loss": 0.2812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2930816411972046,
|
|
"step": 3115,
|
|
"valid_targets_mean": 6803.8,
|
|
"valid_targets_min": 6202
|
|
},
|
|
{
|
|
"epoch": 4.9523809523809526,
|
|
"grad_norm": 0.3562489440863743,
|
|
"learning_rate": 9.564542674321228e-06,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3059656023979187,
|
|
"step": 3120,
|
|
"valid_targets_mean": 6785.9,
|
|
"valid_targets_min": 5315
|
|
},
|
|
{
|
|
"epoch": 4.9603174603174605,
|
|
"grad_norm": 0.36953628248348713,
|
|
"learning_rate": 9.49710016618937e-06,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2913885712623596,
|
|
"step": 3125,
|
|
"valid_targets_mean": 6332.9,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 4.968253968253968,
|
|
"grad_norm": 0.31710657950918913,
|
|
"learning_rate": 9.429822165750893e-06,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2706819474697113,
|
|
"step": 3130,
|
|
"valid_targets_mean": 6685.6,
|
|
"valid_targets_min": 5940
|
|
},
|
|
{
|
|
"epoch": 4.976190476190476,
|
|
"grad_norm": 0.3677274778813916,
|
|
"learning_rate": 9.36270972678607e-06,
|
|
"loss": 0.2811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2962263822555542,
|
|
"step": 3135,
|
|
"valid_targets_mean": 6674.2,
|
|
"valid_targets_min": 5477
|
|
},
|
|
{
|
|
"epoch": 4.984126984126984,
|
|
"grad_norm": 0.3403043371040174,
|
|
"learning_rate": 9.295763900481977e-06,
|
|
"loss": 0.2757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2778708040714264,
|
|
"step": 3140,
|
|
"valid_targets_mean": 6537.1,
|
|
"valid_targets_min": 4025
|
|
},
|
|
{
|
|
"epoch": 4.992063492063492,
|
|
"grad_norm": 0.32579843994696506,
|
|
"learning_rate": 9.22898573541602e-06,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2751721143722534,
|
|
"step": 3145,
|
|
"valid_targets_mean": 7167.9,
|
|
"valid_targets_min": 2296
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.2991268936257293,
|
|
"learning_rate": 9.162376277539513e-06,
|
|
"loss": 0.2664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25648993253707886,
|
|
"step": 3150,
|
|
"valid_targets_mean": 6810.5,
|
|
"valid_targets_min": 5873
|
|
},
|
|
{
|
|
"epoch": 5.007936507936508,
|
|
"grad_norm": 0.32618012589136997,
|
|
"learning_rate": 9.095936570161301e-06,
|
|
"loss": 0.2708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2686976194381714,
|
|
"step": 3155,
|
|
"valid_targets_mean": 6987.2,
|
|
"valid_targets_min": 6169
|
|
},
|
|
{
|
|
"epoch": 5.015873015873016,
|
|
"grad_norm": 0.34625756465882623,
|
|
"learning_rate": 9.029667653931411e-06,
|
|
"loss": 0.2783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28517788648605347,
|
|
"step": 3160,
|
|
"valid_targets_mean": 7056.9,
|
|
"valid_targets_min": 6204
|
|
},
|
|
{
|
|
"epoch": 5.023809523809524,
|
|
"grad_norm": 0.3102075158981908,
|
|
"learning_rate": 8.96357056682475e-06,
|
|
"loss": 0.2709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2710297703742981,
|
|
"step": 3165,
|
|
"valid_targets_mean": 6770.2,
|
|
"valid_targets_min": 5976
|
|
},
|
|
{
|
|
"epoch": 5.031746031746032,
|
|
"grad_norm": 0.32672413313312154,
|
|
"learning_rate": 8.897646344124882e-06,
|
|
"loss": 0.2804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2837831974029541,
|
|
"step": 3170,
|
|
"valid_targets_mean": 7284.8,
|
|
"valid_targets_min": 6070
|
|
},
|
|
{
|
|
"epoch": 5.0396825396825395,
|
|
"grad_norm": 0.3533541184557666,
|
|
"learning_rate": 8.83189601840773e-06,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2856595516204834,
|
|
"step": 3175,
|
|
"valid_targets_mean": 6891.7,
|
|
"valid_targets_min": 6365
|
|
},
|
|
{
|
|
"epoch": 5.0476190476190474,
|
|
"grad_norm": 0.3171025768784316,
|
|
"learning_rate": 8.766320619525511e-06,
|
|
"loss": 0.2756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2626307010650635,
|
|
"step": 3180,
|
|
"valid_targets_mean": 6770.3,
|
|
"valid_targets_min": 5787
|
|
},
|
|
{
|
|
"epoch": 5.055555555555555,
|
|
"grad_norm": 0.36270341910002296,
|
|
"learning_rate": 8.700921174590525e-06,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28715789318084717,
|
|
"step": 3185,
|
|
"valid_targets_mean": 6728.1,
|
|
"valid_targets_min": 5946
|
|
},
|
|
{
|
|
"epoch": 5.063492063492063,
|
|
"grad_norm": 0.3020006669007532,
|
|
"learning_rate": 8.63569870795907e-06,
|
|
"loss": 0.2687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2524801194667816,
|
|
"step": 3190,
|
|
"valid_targets_mean": 7463.0,
|
|
"valid_targets_min": 6073
|
|
},
|
|
{
|
|
"epoch": 5.071428571428571,
|
|
"grad_norm": 0.32035654319745765,
|
|
"learning_rate": 8.570654241215466e-06,
|
|
"loss": 0.2788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28044942021369934,
|
|
"step": 3195,
|
|
"valid_targets_mean": 6753.2,
|
|
"valid_targets_min": 5946
|
|
},
|
|
{
|
|
"epoch": 5.079365079365079,
|
|
"grad_norm": 0.5095353612245247,
|
|
"learning_rate": 8.505788793155978e-06,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26298630237579346,
|
|
"step": 3200,
|
|
"valid_targets_mean": 6660.0,
|
|
"valid_targets_min": 5782
|
|
},
|
|
{
|
|
"epoch": 5.087301587301587,
|
|
"grad_norm": 0.3074348419891196,
|
|
"learning_rate": 8.441103379772893e-06,
|
|
"loss": 0.2786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2547451853752136,
|
|
"step": 3205,
|
|
"valid_targets_mean": 7573.4,
|
|
"valid_targets_min": 4954
|
|
},
|
|
{
|
|
"epoch": 5.095238095238095,
|
|
"grad_norm": 0.3261794260763178,
|
|
"learning_rate": 8.376599014238605e-06,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2672325074672699,
|
|
"step": 3210,
|
|
"valid_targets_mean": 6780.6,
|
|
"valid_targets_min": 6118
|
|
},
|
|
{
|
|
"epoch": 5.103174603174603,
|
|
"grad_norm": 0.3182456167229273,
|
|
"learning_rate": 8.312276706889738e-06,
|
|
"loss": 0.2741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27260977029800415,
|
|
"step": 3215,
|
|
"valid_targets_mean": 6757.8,
|
|
"valid_targets_min": 5919
|
|
},
|
|
{
|
|
"epoch": 5.111111111111111,
|
|
"grad_norm": 0.32310335604644363,
|
|
"learning_rate": 8.24813746521133e-06,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2903539538383484,
|
|
"step": 3220,
|
|
"valid_targets_mean": 6787.9,
|
|
"valid_targets_min": 5975
|
|
},
|
|
{
|
|
"epoch": 5.119047619047619,
|
|
"grad_norm": 0.36185187801685575,
|
|
"learning_rate": 8.184182293821046e-06,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2983723282814026,
|
|
"step": 3225,
|
|
"valid_targets_mean": 6667.3,
|
|
"valid_targets_min": 5843
|
|
},
|
|
{
|
|
"epoch": 5.1269841269841265,
|
|
"grad_norm": 0.3314479799185527,
|
|
"learning_rate": 8.120412194453442e-06,
|
|
"loss": 0.2718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2569195628166199,
|
|
"step": 3230,
|
|
"valid_targets_mean": 6418.0,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 5.134920634920635,
|
|
"grad_norm": 0.3208750679800644,
|
|
"learning_rate": 8.056828165944282e-06,
|
|
"loss": 0.2792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2710353136062622,
|
|
"step": 3235,
|
|
"valid_targets_mean": 7673.3,
|
|
"valid_targets_min": 6229
|
|
},
|
|
{
|
|
"epoch": 5.142857142857143,
|
|
"grad_norm": 0.3512332898100303,
|
|
"learning_rate": 7.993431204214883e-06,
|
|
"loss": 0.2713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2763490676879883,
|
|
"step": 3240,
|
|
"valid_targets_mean": 6348.1,
|
|
"valid_targets_min": 4579
|
|
},
|
|
{
|
|
"epoch": 5.150793650793651,
|
|
"grad_norm": 0.3258543036973358,
|
|
"learning_rate": 7.93022230225652e-06,
|
|
"loss": 0.272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2693859040737152,
|
|
"step": 3245,
|
|
"valid_targets_mean": 6979.1,
|
|
"valid_targets_min": 5866
|
|
},
|
|
{
|
|
"epoch": 5.158730158730159,
|
|
"grad_norm": 0.3318873435791997,
|
|
"learning_rate": 7.867202450114892e-06,
|
|
"loss": 0.2686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2817775011062622,
|
|
"step": 3250,
|
|
"valid_targets_mean": 7260.8,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 5.166666666666667,
|
|
"grad_norm": 0.3314666993697846,
|
|
"learning_rate": 7.804372634874582e-06,
|
|
"loss": 0.2739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27555152773857117,
|
|
"step": 3255,
|
|
"valid_targets_mean": 6649.2,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 5.174603174603175,
|
|
"grad_norm": 0.34064278958376853,
|
|
"learning_rate": 7.74173384064359e-06,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2745921015739441,
|
|
"step": 3260,
|
|
"valid_targets_mean": 6383.1,
|
|
"valid_targets_min": 903
|
|
},
|
|
{
|
|
"epoch": 5.182539682539683,
|
|
"grad_norm": 0.3425787138578214,
|
|
"learning_rate": 7.679287048537987e-06,
|
|
"loss": 0.2841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29056185483932495,
|
|
"step": 3265,
|
|
"valid_targets_mean": 6630.2,
|
|
"valid_targets_min": 5330
|
|
},
|
|
{
|
|
"epoch": 5.190476190476191,
|
|
"grad_norm": 0.33069181744399917,
|
|
"learning_rate": 7.617033236666469e-06,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2918166220188141,
|
|
"step": 3270,
|
|
"valid_targets_mean": 7712.5,
|
|
"valid_targets_min": 5611
|
|
},
|
|
{
|
|
"epoch": 5.198412698412699,
|
|
"grad_norm": 0.350216524012091,
|
|
"learning_rate": 7.55497338011506e-06,
|
|
"loss": 0.271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26443636417388916,
|
|
"step": 3275,
|
|
"valid_targets_mean": 6579.7,
|
|
"valid_targets_min": 6041
|
|
},
|
|
{
|
|
"epoch": 5.2063492063492065,
|
|
"grad_norm": 0.33952417757871717,
|
|
"learning_rate": 7.493108450931879e-06,
|
|
"loss": 0.2735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2767696976661682,
|
|
"step": 3280,
|
|
"valid_targets_mean": 6806.3,
|
|
"valid_targets_min": 6005
|
|
},
|
|
{
|
|
"epoch": 5.214285714285714,
|
|
"grad_norm": 0.3473348615830595,
|
|
"learning_rate": 7.4314394181118636e-06,
|
|
"loss": 0.2687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2688555419445038,
|
|
"step": 3285,
|
|
"valid_targets_mean": 6557.7,
|
|
"valid_targets_min": 4342
|
|
},
|
|
{
|
|
"epoch": 5.222222222222222,
|
|
"grad_norm": 0.2868033944259028,
|
|
"learning_rate": 7.369967247581611e-06,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2600844204425812,
|
|
"step": 3290,
|
|
"valid_targets_mean": 7589.8,
|
|
"valid_targets_min": 5870
|
|
},
|
|
{
|
|
"epoch": 5.23015873015873,
|
|
"grad_norm": 0.34000892804612176,
|
|
"learning_rate": 7.3086929021842575e-06,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2754635214805603,
|
|
"step": 3295,
|
|
"valid_targets_mean": 5973.0,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 5.238095238095238,
|
|
"grad_norm": 0.3202463323636644,
|
|
"learning_rate": 7.247617341664384e-06,
|
|
"loss": 0.2695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2675333023071289,
|
|
"step": 3300,
|
|
"valid_targets_mean": 6686.8,
|
|
"valid_targets_min": 4880
|
|
},
|
|
{
|
|
"epoch": 5.246031746031746,
|
|
"grad_norm": 0.3420541077884274,
|
|
"learning_rate": 7.186741522652994e-06,
|
|
"loss": 0.2758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2780552804470062,
|
|
"step": 3305,
|
|
"valid_targets_mean": 6707.5,
|
|
"valid_targets_min": 6140
|
|
},
|
|
{
|
|
"epoch": 5.253968253968254,
|
|
"grad_norm": 0.343144199657932,
|
|
"learning_rate": 7.12606639865252e-06,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28020888566970825,
|
|
"step": 3310,
|
|
"valid_targets_mean": 6741.2,
|
|
"valid_targets_min": 5712
|
|
},
|
|
{
|
|
"epoch": 5.261904761904762,
|
|
"grad_norm": 0.3117472122413256,
|
|
"learning_rate": 7.065592920021893e-06,
|
|
"loss": 0.283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2856062948703766,
|
|
"step": 3315,
|
|
"valid_targets_mean": 7755.0,
|
|
"valid_targets_min": 6208
|
|
},
|
|
{
|
|
"epoch": 5.26984126984127,
|
|
"grad_norm": 0.3234547605361187,
|
|
"learning_rate": 7.005322033961679e-06,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2821074426174164,
|
|
"step": 3320,
|
|
"valid_targets_mean": 6971.4,
|
|
"valid_targets_min": 5819
|
|
},
|
|
{
|
|
"epoch": 5.277777777777778,
|
|
"grad_norm": 0.327176947756782,
|
|
"learning_rate": 6.945254684499185e-06,
|
|
"loss": 0.2743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27074986696243286,
|
|
"step": 3325,
|
|
"valid_targets_mean": 6474.9,
|
|
"valid_targets_min": 5673
|
|
},
|
|
{
|
|
"epoch": 5.285714285714286,
|
|
"grad_norm": 0.30996143578841656,
|
|
"learning_rate": 6.8853918124737274e-06,
|
|
"loss": 0.275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2601577043533325,
|
|
"step": 3330,
|
|
"valid_targets_mean": 6882.0,
|
|
"valid_targets_min": 5918
|
|
},
|
|
{
|
|
"epoch": 5.2936507936507935,
|
|
"grad_norm": 0.3319748226653216,
|
|
"learning_rate": 6.825734355521898e-06,
|
|
"loss": 0.2766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2832726240158081,
|
|
"step": 3335,
|
|
"valid_targets_mean": 7313.1,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 5.301587301587301,
|
|
"grad_norm": 0.3222600562338098,
|
|
"learning_rate": 6.766283248062817e-06,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27643370628356934,
|
|
"step": 3340,
|
|
"valid_targets_mean": 6668.4,
|
|
"valid_targets_min": 5912
|
|
},
|
|
{
|
|
"epoch": 5.309523809523809,
|
|
"grad_norm": 0.30746116146876695,
|
|
"learning_rate": 6.707039421283559e-06,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28225177526474,
|
|
"step": 3345,
|
|
"valid_targets_mean": 7756.9,
|
|
"valid_targets_min": 6432
|
|
},
|
|
{
|
|
"epoch": 5.317460317460317,
|
|
"grad_norm": 0.32116291158862903,
|
|
"learning_rate": 6.648003803124559e-06,
|
|
"loss": 0.2692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28007763624191284,
|
|
"step": 3350,
|
|
"valid_targets_mean": 6982.1,
|
|
"valid_targets_min": 5406
|
|
},
|
|
{
|
|
"epoch": 5.325396825396825,
|
|
"grad_norm": 0.3258508401290243,
|
|
"learning_rate": 6.589177318265047e-06,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2772740125656128,
|
|
"step": 3355,
|
|
"valid_targets_mean": 6621.7,
|
|
"valid_targets_min": 5512
|
|
},
|
|
{
|
|
"epoch": 5.333333333333333,
|
|
"grad_norm": 0.3269023228632337,
|
|
"learning_rate": 6.53056088810857e-06,
|
|
"loss": 0.271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27484679222106934,
|
|
"step": 3360,
|
|
"valid_targets_mean": 6600.2,
|
|
"valid_targets_min": 5714
|
|
},
|
|
{
|
|
"epoch": 5.341269841269841,
|
|
"grad_norm": 0.3458132571411164,
|
|
"learning_rate": 6.472155430768608e-06,
|
|
"loss": 0.2783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28881144523620605,
|
|
"step": 3365,
|
|
"valid_targets_mean": 6912.2,
|
|
"valid_targets_min": 4728
|
|
},
|
|
{
|
|
"epoch": 5.349206349206349,
|
|
"grad_norm": 0.31597950270680997,
|
|
"learning_rate": 6.413961861054132e-06,
|
|
"loss": 0.2729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2780212163925171,
|
|
"step": 3370,
|
|
"valid_targets_mean": 7413.4,
|
|
"valid_targets_min": 5618
|
|
},
|
|
{
|
|
"epoch": 5.357142857142857,
|
|
"grad_norm": 0.3317942107117175,
|
|
"learning_rate": 6.3559810904553095e-06,
|
|
"loss": 0.2824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2855583131313324,
|
|
"step": 3375,
|
|
"valid_targets_mean": 6804.4,
|
|
"valid_targets_min": 5478
|
|
},
|
|
{
|
|
"epoch": 5.365079365079365,
|
|
"grad_norm": 0.3475934479256111,
|
|
"learning_rate": 6.298214027129219e-06,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2768478989601135,
|
|
"step": 3380,
|
|
"valid_targets_mean": 6516.1,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 5.3730158730158735,
|
|
"grad_norm": 0.31741972255341716,
|
|
"learning_rate": 6.240661575885629e-06,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26733312010765076,
|
|
"step": 3385,
|
|
"valid_targets_mean": 7267.4,
|
|
"valid_targets_min": 4532
|
|
},
|
|
{
|
|
"epoch": 5.380952380952381,
|
|
"grad_norm": 0.3396324087218616,
|
|
"learning_rate": 6.183324638172819e-06,
|
|
"loss": 0.2611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2629362940788269,
|
|
"step": 3390,
|
|
"valid_targets_mean": 6623.4,
|
|
"valid_targets_min": 5804
|
|
},
|
|
{
|
|
"epoch": 5.388888888888889,
|
|
"grad_norm": 0.3276380360141541,
|
|
"learning_rate": 6.126204112063463e-06,
|
|
"loss": 0.2705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2880324125289917,
|
|
"step": 3395,
|
|
"valid_targets_mean": 7351.4,
|
|
"valid_targets_min": 6095
|
|
},
|
|
{
|
|
"epoch": 5.396825396825397,
|
|
"grad_norm": 0.3249745238105346,
|
|
"learning_rate": 6.069300892240564e-06,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27040308713912964,
|
|
"step": 3400,
|
|
"valid_targets_mean": 6939.4,
|
|
"valid_targets_min": 6066
|
|
},
|
|
{
|
|
"epoch": 5.404761904761905,
|
|
"grad_norm": 0.32480101962079744,
|
|
"learning_rate": 6.0126158699834625e-06,
|
|
"loss": 0.2761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2835528254508972,
|
|
"step": 3405,
|
|
"valid_targets_mean": 6301.2,
|
|
"valid_targets_min": 2483
|
|
},
|
|
{
|
|
"epoch": 5.412698412698413,
|
|
"grad_norm": 0.3363470804506051,
|
|
"learning_rate": 5.956149933153816e-06,
|
|
"loss": 0.272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2694054841995239,
|
|
"step": 3410,
|
|
"valid_targets_mean": 6672.0,
|
|
"valid_targets_min": 6074
|
|
},
|
|
{
|
|
"epoch": 5.420634920634921,
|
|
"grad_norm": 0.3397236250036521,
|
|
"learning_rate": 5.899903966181751e-06,
|
|
"loss": 0.2675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2741575241088867,
|
|
"step": 3415,
|
|
"valid_targets_mean": 6780.4,
|
|
"valid_targets_min": 5314
|
|
},
|
|
{
|
|
"epoch": 5.428571428571429,
|
|
"grad_norm": 0.31191590118123375,
|
|
"learning_rate": 5.843878850052007e-06,
|
|
"loss": 0.2711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26607757806777954,
|
|
"step": 3420,
|
|
"valid_targets_mean": 7062.4,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 5.436507936507937,
|
|
"grad_norm": 0.3252501529662636,
|
|
"learning_rate": 5.788075462290084e-06,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26874810457229614,
|
|
"step": 3425,
|
|
"valid_targets_mean": 6743.6,
|
|
"valid_targets_min": 6086
|
|
},
|
|
{
|
|
"epoch": 5.444444444444445,
|
|
"grad_norm": 0.35109568270298624,
|
|
"learning_rate": 5.732494676948554e-06,
|
|
"loss": 0.2743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2715177536010742,
|
|
"step": 3430,
|
|
"valid_targets_mean": 6797.1,
|
|
"valid_targets_min": 6060
|
|
},
|
|
{
|
|
"epoch": 5.4523809523809526,
|
|
"grad_norm": 0.3275962204275203,
|
|
"learning_rate": 5.677137364593363e-06,
|
|
"loss": 0.2719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27148008346557617,
|
|
"step": 3435,
|
|
"valid_targets_mean": 6695.6,
|
|
"valid_targets_min": 5947
|
|
},
|
|
{
|
|
"epoch": 5.4603174603174605,
|
|
"grad_norm": 0.3475352410278873,
|
|
"learning_rate": 5.622004392290163e-06,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2929421663284302,
|
|
"step": 3440,
|
|
"valid_targets_mean": 7916.4,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 5.468253968253968,
|
|
"grad_norm": 0.2651951136670226,
|
|
"learning_rate": 5.567096623590758e-06,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26388949155807495,
|
|
"step": 3445,
|
|
"valid_targets_mean": 8719.8,
|
|
"valid_targets_min": 6369
|
|
},
|
|
{
|
|
"epoch": 5.476190476190476,
|
|
"grad_norm": 0.3302007005495651,
|
|
"learning_rate": 5.512414918519573e-06,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27515876293182373,
|
|
"step": 3450,
|
|
"valid_targets_mean": 6711.4,
|
|
"valid_targets_min": 6016
|
|
},
|
|
{
|
|
"epoch": 5.484126984126984,
|
|
"grad_norm": 0.5892082716485184,
|
|
"learning_rate": 5.457960133560179e-06,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2712671756744385,
|
|
"step": 3455,
|
|
"valid_targets_mean": 7533.0,
|
|
"valid_targets_min": 5980
|
|
},
|
|
{
|
|
"epoch": 5.492063492063492,
|
|
"grad_norm": 0.3373344264639116,
|
|
"learning_rate": 5.403733121641883e-06,
|
|
"loss": 0.2807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29250556230545044,
|
|
"step": 3460,
|
|
"valid_targets_mean": 6773.4,
|
|
"valid_targets_min": 6163
|
|
},
|
|
{
|
|
"epoch": 5.5,
|
|
"grad_norm": 0.31756134058847474,
|
|
"learning_rate": 5.349734732126366e-06,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24138540029525757,
|
|
"step": 3465,
|
|
"valid_targets_mean": 6439.4,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 5.507936507936508,
|
|
"grad_norm": 0.3446035487131935,
|
|
"learning_rate": 5.295965810794376e-06,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2750970423221588,
|
|
"step": 3470,
|
|
"valid_targets_mean": 6620.4,
|
|
"valid_targets_min": 5260
|
|
},
|
|
{
|
|
"epoch": 5.515873015873016,
|
|
"grad_norm": 0.2921612255872633,
|
|
"learning_rate": 5.2424271998324895e-06,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2627141773700714,
|
|
"step": 3475,
|
|
"valid_targets_mean": 7443.3,
|
|
"valid_targets_min": 4754
|
|
},
|
|
{
|
|
"epoch": 5.523809523809524,
|
|
"grad_norm": 0.32675650885718277,
|
|
"learning_rate": 5.189119737819912e-06,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2764326333999634,
|
|
"step": 3480,
|
|
"valid_targets_mean": 6723.6,
|
|
"valid_targets_min": 4870
|
|
},
|
|
{
|
|
"epoch": 5.531746031746032,
|
|
"grad_norm": 0.31622548712765236,
|
|
"learning_rate": 5.136044259715342e-06,
|
|
"loss": 0.2716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25748011469841003,
|
|
"step": 3485,
|
|
"valid_targets_mean": 6843.2,
|
|
"valid_targets_min": 6245
|
|
},
|
|
{
|
|
"epoch": 5.5396825396825395,
|
|
"grad_norm": 0.2961764094436337,
|
|
"learning_rate": 5.083201596843905e-06,
|
|
"loss": 0.2772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2673254907131195,
|
|
"step": 3490,
|
|
"valid_targets_mean": 7790.4,
|
|
"valid_targets_min": 5905
|
|
},
|
|
{
|
|
"epoch": 5.5476190476190474,
|
|
"grad_norm": 0.34376308478729944,
|
|
"learning_rate": 5.030592576884117e-06,
|
|
"loss": 0.2757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2671489715576172,
|
|
"step": 3495,
|
|
"valid_targets_mean": 6419.8,
|
|
"valid_targets_min": 3817
|
|
},
|
|
{
|
|
"epoch": 5.555555555555555,
|
|
"grad_norm": 0.32208174340532353,
|
|
"learning_rate": 4.978218023854928e-06,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2740962505340576,
|
|
"step": 3500,
|
|
"valid_targets_mean": 6647.6,
|
|
"valid_targets_min": 5633
|
|
},
|
|
{
|
|
"epoch": 5.563492063492063,
|
|
"grad_norm": 0.33797278337457787,
|
|
"learning_rate": 4.926078758102834e-06,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2754783630371094,
|
|
"step": 3505,
|
|
"valid_targets_mean": 6905.6,
|
|
"valid_targets_min": 5546
|
|
},
|
|
{
|
|
"epoch": 5.571428571428571,
|
|
"grad_norm": 0.32850999087580296,
|
|
"learning_rate": 4.87417559628897e-06,
|
|
"loss": 0.2735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26777344942092896,
|
|
"step": 3510,
|
|
"valid_targets_mean": 6636.1,
|
|
"valid_targets_min": 4425
|
|
},
|
|
{
|
|
"epoch": 5.579365079365079,
|
|
"grad_norm": 0.34331645111249554,
|
|
"learning_rate": 4.822509351376399e-06,
|
|
"loss": 0.2748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2819187343120575,
|
|
"step": 3515,
|
|
"valid_targets_mean": 6751.4,
|
|
"valid_targets_min": 6129
|
|
},
|
|
{
|
|
"epoch": 5.587301587301587,
|
|
"grad_norm": 0.33901911599040974,
|
|
"learning_rate": 4.7710808326173115e-06,
|
|
"loss": 0.2762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2764272093772888,
|
|
"step": 3520,
|
|
"valid_targets_mean": 6458.9,
|
|
"valid_targets_min": 4920
|
|
},
|
|
{
|
|
"epoch": 5.595238095238095,
|
|
"grad_norm": 0.3273229116490547,
|
|
"learning_rate": 4.719890845540385e-06,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2814077138900757,
|
|
"step": 3525,
|
|
"valid_targets_mean": 6639.6,
|
|
"valid_targets_min": 5965
|
|
},
|
|
{
|
|
"epoch": 5.603174603174603,
|
|
"grad_norm": 0.3078395771782246,
|
|
"learning_rate": 4.668940191938156e-06,
|
|
"loss": 0.2724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28143301606178284,
|
|
"step": 3530,
|
|
"valid_targets_mean": 7290.2,
|
|
"valid_targets_min": 5725
|
|
},
|
|
{
|
|
"epoch": 5.611111111111111,
|
|
"grad_norm": 0.31751026797209314,
|
|
"learning_rate": 4.618229669854464e-06,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2573384940624237,
|
|
"step": 3535,
|
|
"valid_targets_mean": 6747.6,
|
|
"valid_targets_min": 5958
|
|
},
|
|
{
|
|
"epoch": 5.619047619047619,
|
|
"grad_norm": 0.3575567934301571,
|
|
"learning_rate": 4.567760073571947e-06,
|
|
"loss": 0.2659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2709672152996063,
|
|
"step": 3540,
|
|
"valid_targets_mean": 6199.5,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 5.6269841269841265,
|
|
"grad_norm": 0.33551783322109435,
|
|
"learning_rate": 4.51753219359961e-06,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27431222796440125,
|
|
"step": 3545,
|
|
"valid_targets_mean": 7084.7,
|
|
"valid_targets_min": 777
|
|
},
|
|
{
|
|
"epoch": 5.634920634920634,
|
|
"grad_norm": 0.3005818407644003,
|
|
"learning_rate": 4.467546816660433e-06,
|
|
"loss": 0.2756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.276604026556015,
|
|
"step": 3550,
|
|
"valid_targets_mean": 7451.4,
|
|
"valid_targets_min": 5813
|
|
},
|
|
{
|
|
"epoch": 5.642857142857143,
|
|
"grad_norm": 0.32490136776297324,
|
|
"learning_rate": 4.417804725679058e-06,
|
|
"loss": 0.2745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2872847318649292,
|
|
"step": 3555,
|
|
"valid_targets_mean": 6906.1,
|
|
"valid_targets_min": 603
|
|
},
|
|
{
|
|
"epoch": 5.650793650793651,
|
|
"grad_norm": 0.34750641668788784,
|
|
"learning_rate": 4.368306699769518e-06,
|
|
"loss": 0.2724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2745434045791626,
|
|
"step": 3560,
|
|
"valid_targets_mean": 6530.2,
|
|
"valid_targets_min": 1935
|
|
},
|
|
{
|
|
"epoch": 5.658730158730159,
|
|
"grad_norm": 0.28906900685621884,
|
|
"learning_rate": 4.319053514223033e-06,
|
|
"loss": 0.2617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25728583335876465,
|
|
"step": 3565,
|
|
"valid_targets_mean": 7745.2,
|
|
"valid_targets_min": 5758
|
|
},
|
|
{
|
|
"epoch": 5.666666666666667,
|
|
"grad_norm": 0.3492790821836433,
|
|
"learning_rate": 4.270045940495879e-06,
|
|
"loss": 0.2724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2620035409927368,
|
|
"step": 3570,
|
|
"valid_targets_mean": 6363.4,
|
|
"valid_targets_min": 1491
|
|
},
|
|
{
|
|
"epoch": 5.674603174603175,
|
|
"grad_norm": 0.2957053573230713,
|
|
"learning_rate": 4.221284746197292e-06,
|
|
"loss": 0.2686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26280391216278076,
|
|
"step": 3575,
|
|
"valid_targets_mean": 7388.6,
|
|
"valid_targets_min": 5963
|
|
},
|
|
{
|
|
"epoch": 5.682539682539683,
|
|
"grad_norm": 0.31250710257135633,
|
|
"learning_rate": 4.172770695077437e-06,
|
|
"loss": 0.2684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2550213932991028,
|
|
"step": 3580,
|
|
"valid_targets_mean": 6801.1,
|
|
"valid_targets_min": 4458
|
|
},
|
|
{
|
|
"epoch": 5.690476190476191,
|
|
"grad_norm": 0.310884958141401,
|
|
"learning_rate": 4.124504547015487e-06,
|
|
"loss": 0.2698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27220970392227173,
|
|
"step": 3585,
|
|
"valid_targets_mean": 7577.3,
|
|
"valid_targets_min": 925
|
|
},
|
|
{
|
|
"epoch": 5.698412698412699,
|
|
"grad_norm": 0.33304816892764655,
|
|
"learning_rate": 4.0764870580076675e-06,
|
|
"loss": 0.2779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2743382751941681,
|
|
"step": 3590,
|
|
"valid_targets_mean": 6691.9,
|
|
"valid_targets_min": 2123
|
|
},
|
|
{
|
|
"epoch": 5.7063492063492065,
|
|
"grad_norm": 0.3190861598058769,
|
|
"learning_rate": 4.0287189801554304e-06,
|
|
"loss": 0.2782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29642146825790405,
|
|
"step": 3595,
|
|
"valid_targets_mean": 7444.1,
|
|
"valid_targets_min": 5711
|
|
},
|
|
{
|
|
"epoch": 5.714285714285714,
|
|
"grad_norm": 0.32688327364150577,
|
|
"learning_rate": 3.98120106165371e-06,
|
|
"loss": 0.2664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26632988452911377,
|
|
"step": 3600,
|
|
"valid_targets_mean": 6773.8,
|
|
"valid_targets_min": 5940
|
|
},
|
|
{
|
|
"epoch": 5.722222222222222,
|
|
"grad_norm": 0.37778181292886254,
|
|
"learning_rate": 3.933934046779164e-06,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2966225743293762,
|
|
"step": 3605,
|
|
"valid_targets_mean": 6502.3,
|
|
"valid_targets_min": 2403
|
|
},
|
|
{
|
|
"epoch": 5.73015873015873,
|
|
"grad_norm": 0.3146598569490981,
|
|
"learning_rate": 3.886918675878513e-06,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.273377001285553,
|
|
"step": 3610,
|
|
"valid_targets_mean": 7423.8,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 5.738095238095238,
|
|
"grad_norm": 0.31547969838549006,
|
|
"learning_rate": 3.840155685356983e-06,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26998457312583923,
|
|
"step": 3615,
|
|
"valid_targets_mean": 7526.8,
|
|
"valid_targets_min": 5824
|
|
},
|
|
{
|
|
"epoch": 5.746031746031746,
|
|
"grad_norm": 0.31752759287827714,
|
|
"learning_rate": 3.793645807666735e-06,
|
|
"loss": 0.2715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26018378138542175,
|
|
"step": 3620,
|
|
"valid_targets_mean": 6726.8,
|
|
"valid_targets_min": 6189
|
|
},
|
|
{
|
|
"epoch": 5.753968253968254,
|
|
"grad_norm": 0.33368050653092485,
|
|
"learning_rate": 3.747389771295411e-06,
|
|
"loss": 0.2805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2702258229255676,
|
|
"step": 3625,
|
|
"valid_targets_mean": 6822.9,
|
|
"valid_targets_min": 5912
|
|
},
|
|
{
|
|
"epoch": 5.761904761904762,
|
|
"grad_norm": 0.342925172377563,
|
|
"learning_rate": 3.701388300754709e-06,
|
|
"loss": 0.2741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28437238931655884,
|
|
"step": 3630,
|
|
"valid_targets_mean": 6858.0,
|
|
"valid_targets_min": 5963
|
|
},
|
|
{
|
|
"epoch": 5.76984126984127,
|
|
"grad_norm": 0.3211947707956985,
|
|
"learning_rate": 3.6556421165690516e-06,
|
|
"loss": 0.2683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2575856149196625,
|
|
"step": 3635,
|
|
"valid_targets_mean": 7234.1,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 5.777777777777778,
|
|
"grad_norm": 0.35969123187887564,
|
|
"learning_rate": 3.610151935264288e-06,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28753212094306946,
|
|
"step": 3640,
|
|
"valid_targets_mean": 6567.1,
|
|
"valid_targets_min": 5770
|
|
},
|
|
{
|
|
"epoch": 5.785714285714286,
|
|
"grad_norm": 0.3357421225225441,
|
|
"learning_rate": 3.5649184693564797e-06,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27521878480911255,
|
|
"step": 3645,
|
|
"valid_targets_mean": 6436.6,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 5.7936507936507935,
|
|
"grad_norm": 0.2867339403404284,
|
|
"learning_rate": 3.5199424273407277e-06,
|
|
"loss": 0.2744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27534574270248413,
|
|
"step": 3650,
|
|
"valid_targets_mean": 8527.3,
|
|
"valid_targets_min": 6150
|
|
},
|
|
{
|
|
"epoch": 5.801587301587301,
|
|
"grad_norm": 0.346093358505726,
|
|
"learning_rate": 3.4752245136801065e-06,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28039658069610596,
|
|
"step": 3655,
|
|
"valid_targets_mean": 6321.6,
|
|
"valid_targets_min": 823
|
|
},
|
|
{
|
|
"epoch": 5.809523809523809,
|
|
"grad_norm": 0.32607029109201546,
|
|
"learning_rate": 3.430765428794569e-06,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27508544921875,
|
|
"step": 3660,
|
|
"valid_targets_mean": 6587.0,
|
|
"valid_targets_min": 5595
|
|
},
|
|
{
|
|
"epoch": 5.817460317460317,
|
|
"grad_norm": 0.32618884062993364,
|
|
"learning_rate": 3.3865658690500424e-06,
|
|
"loss": 0.2674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2774638533592224,
|
|
"step": 3665,
|
|
"valid_targets_mean": 6764.8,
|
|
"valid_targets_min": 6214
|
|
},
|
|
{
|
|
"epoch": 5.825396825396825,
|
|
"grad_norm": 0.337105332438837,
|
|
"learning_rate": 3.34262652674749e-06,
|
|
"loss": 0.2705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27832186222076416,
|
|
"step": 3670,
|
|
"valid_targets_mean": 7079.5,
|
|
"valid_targets_min": 4111
|
|
},
|
|
{
|
|
"epoch": 5.833333333333333,
|
|
"grad_norm": 0.3304340829033622,
|
|
"learning_rate": 3.2989480901120684e-06,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27656280994415283,
|
|
"step": 3675,
|
|
"valid_targets_mean": 6716.5,
|
|
"valid_targets_min": 6268
|
|
},
|
|
{
|
|
"epoch": 5.841269841269841,
|
|
"grad_norm": 0.2890486345408232,
|
|
"learning_rate": 3.2555312432823283e-06,
|
|
"loss": 0.2666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2546257972717285,
|
|
"step": 3680,
|
|
"valid_targets_mean": 7575.0,
|
|
"valid_targets_min": 5920
|
|
},
|
|
{
|
|
"epoch": 5.849206349206349,
|
|
"grad_norm": 0.34049752818730195,
|
|
"learning_rate": 3.2123766662995572e-06,
|
|
"loss": 0.2757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27448463439941406,
|
|
"step": 3685,
|
|
"valid_targets_mean": 6815.9,
|
|
"valid_targets_min": 6069
|
|
},
|
|
{
|
|
"epoch": 5.857142857142857,
|
|
"grad_norm": 0.3072516926148437,
|
|
"learning_rate": 3.1694850350970686e-06,
|
|
"loss": 0.2792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2656967043876648,
|
|
"step": 3690,
|
|
"valid_targets_mean": 7377.9,
|
|
"valid_targets_min": 6627
|
|
},
|
|
{
|
|
"epoch": 5.865079365079366,
|
|
"grad_norm": 0.4659461283228239,
|
|
"learning_rate": 3.1268570214896265e-06,
|
|
"loss": 0.2788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2678910791873932,
|
|
"step": 3695,
|
|
"valid_targets_mean": 6565.1,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 5.8730158730158735,
|
|
"grad_norm": 0.3322326574265224,
|
|
"learning_rate": 3.0844932931629602e-06,
|
|
"loss": 0.2786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2529580891132355,
|
|
"step": 3700,
|
|
"valid_targets_mean": 6934.9,
|
|
"valid_targets_min": 5977
|
|
},
|
|
{
|
|
"epoch": 5.880952380952381,
|
|
"grad_norm": 0.34639457717184735,
|
|
"learning_rate": 3.0423945136632626e-06,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28279852867126465,
|
|
"step": 3705,
|
|
"valid_targets_mean": 6634.4,
|
|
"valid_targets_min": 4696
|
|
},
|
|
{
|
|
"epoch": 5.888888888888889,
|
|
"grad_norm": 0.39625616539309455,
|
|
"learning_rate": 3.000561342386814e-06,
|
|
"loss": 0.2733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27179470658302307,
|
|
"step": 3710,
|
|
"valid_targets_mean": 6370.1,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 5.896825396825397,
|
|
"grad_norm": 0.3538908434434527,
|
|
"learning_rate": 2.9589944345696596e-06,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2718745172023773,
|
|
"step": 3715,
|
|
"valid_targets_mean": 6749.3,
|
|
"valid_targets_min": 6092
|
|
},
|
|
{
|
|
"epoch": 5.904761904761905,
|
|
"grad_norm": 0.30060166492892026,
|
|
"learning_rate": 2.9176944412773322e-06,
|
|
"loss": 0.2781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26197993755340576,
|
|
"step": 3720,
|
|
"valid_targets_mean": 6523.2,
|
|
"valid_targets_min": 5821
|
|
},
|
|
{
|
|
"epoch": 5.912698412698413,
|
|
"grad_norm": 0.32036858487637526,
|
|
"learning_rate": 2.876662009394673e-06,
|
|
"loss": 0.2712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2634490132331848,
|
|
"step": 3725,
|
|
"valid_targets_mean": 7125.1,
|
|
"valid_targets_min": 1060
|
|
},
|
|
{
|
|
"epoch": 5.920634920634921,
|
|
"grad_norm": 0.3339223745444748,
|
|
"learning_rate": 2.8358977816156796e-06,
|
|
"loss": 0.2658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26558423042297363,
|
|
"step": 3730,
|
|
"valid_targets_mean": 6434.8,
|
|
"valid_targets_min": 997
|
|
},
|
|
{
|
|
"epoch": 5.928571428571429,
|
|
"grad_norm": 0.3375295772295544,
|
|
"learning_rate": 2.7954023964334485e-06,
|
|
"loss": 0.2754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2789832353591919,
|
|
"step": 3735,
|
|
"valid_targets_mean": 6265.1,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 5.936507936507937,
|
|
"grad_norm": 0.29253117354320374,
|
|
"learning_rate": 2.7551764881301955e-06,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25958818197250366,
|
|
"step": 3740,
|
|
"valid_targets_mean": 7510.6,
|
|
"valid_targets_min": 5170
|
|
},
|
|
{
|
|
"epoch": 5.944444444444445,
|
|
"grad_norm": 0.2665984696410422,
|
|
"learning_rate": 2.715220686767268e-06,
|
|
"loss": 0.2664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25424766540527344,
|
|
"step": 3745,
|
|
"valid_targets_mean": 8813.1,
|
|
"valid_targets_min": 6428
|
|
},
|
|
{
|
|
"epoch": 5.9523809523809526,
|
|
"grad_norm": 0.3229021051115329,
|
|
"learning_rate": 2.6755356181753247e-06,
|
|
"loss": 0.2697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2732028067111969,
|
|
"step": 3750,
|
|
"valid_targets_mean": 6883.8,
|
|
"valid_targets_min": 5399
|
|
},
|
|
{
|
|
"epoch": 5.9603174603174605,
|
|
"grad_norm": 0.312883866089789,
|
|
"learning_rate": 2.6361219039445328e-06,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28022703528404236,
|
|
"step": 3755,
|
|
"valid_targets_mean": 6852.0,
|
|
"valid_targets_min": 5914
|
|
},
|
|
{
|
|
"epoch": 5.968253968253968,
|
|
"grad_norm": 0.3485804580877343,
|
|
"learning_rate": 2.5969801614147838e-06,
|
|
"loss": 0.2773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2976386547088623,
|
|
"step": 3760,
|
|
"valid_targets_mean": 6721.8,
|
|
"valid_targets_min": 4063
|
|
},
|
|
{
|
|
"epoch": 5.976190476190476,
|
|
"grad_norm": 0.30817047165728256,
|
|
"learning_rate": 2.558111003666075e-06,
|
|
"loss": 0.2762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2644259035587311,
|
|
"step": 3765,
|
|
"valid_targets_mean": 6558.4,
|
|
"valid_targets_min": 910
|
|
},
|
|
{
|
|
"epoch": 5.984126984126984,
|
|
"grad_norm": 0.31791922824231333,
|
|
"learning_rate": 2.519515039508893e-06,
|
|
"loss": 0.2781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27109044790267944,
|
|
"step": 3770,
|
|
"valid_targets_mean": 6903.5,
|
|
"valid_targets_min": 5673
|
|
},
|
|
{
|
|
"epoch": 5.992063492063492,
|
|
"grad_norm": 0.32409503780182763,
|
|
"learning_rate": 2.481192873474667e-06,
|
|
"loss": 0.2706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.262308806180954,
|
|
"step": 3775,
|
|
"valid_targets_mean": 6337.6,
|
|
"valid_targets_min": 1453
|
|
},
|
|
{
|
|
"epoch": 6.0,
|
|
"grad_norm": 0.3403526700763175,
|
|
"learning_rate": 2.4431451058062928e-06,
|
|
"loss": 0.2726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2739599645137787,
|
|
"step": 3780,
|
|
"valid_targets_mean": 6316.4,
|
|
"valid_targets_min": 1012
|
|
},
|
|
{
|
|
"epoch": 6.007936507936508,
|
|
"grad_norm": 0.3388083671103971,
|
|
"learning_rate": 2.4053723324487677e-06,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28137436509132385,
|
|
"step": 3785,
|
|
"valid_targets_mean": 6806.8,
|
|
"valid_targets_min": 5939
|
|
},
|
|
{
|
|
"epoch": 6.015873015873016,
|
|
"grad_norm": 0.3434655549762519,
|
|
"learning_rate": 2.3678751450398196e-06,
|
|
"loss": 0.2657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26245996356010437,
|
|
"step": 3790,
|
|
"valid_targets_mean": 6851.1,
|
|
"valid_targets_min": 6344
|
|
},
|
|
{
|
|
"epoch": 6.023809523809524,
|
|
"grad_norm": 0.32924857559090087,
|
|
"learning_rate": 2.330654130900656e-06,
|
|
"loss": 0.2785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2786942720413208,
|
|
"step": 3795,
|
|
"valid_targets_mean": 6436.6,
|
|
"valid_targets_min": 989
|
|
},
|
|
{
|
|
"epoch": 6.031746031746032,
|
|
"grad_norm": 0.3356418363772774,
|
|
"learning_rate": 2.2937098730267572e-06,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28526610136032104,
|
|
"step": 3800,
|
|
"valid_targets_mean": 6903.2,
|
|
"valid_targets_min": 6326
|
|
},
|
|
{
|
|
"epoch": 6.0396825396825395,
|
|
"grad_norm": 0.3202194992129843,
|
|
"learning_rate": 2.2570429500787604e-06,
|
|
"loss": 0.2662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27275580167770386,
|
|
"step": 3805,
|
|
"valid_targets_mean": 7349.3,
|
|
"valid_targets_min": 6125
|
|
},
|
|
{
|
|
"epoch": 6.0476190476190474,
|
|
"grad_norm": 0.3302078929501228,
|
|
"learning_rate": 2.2206539363733738e-06,
|
|
"loss": 0.2683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2685317397117615,
|
|
"step": 3810,
|
|
"valid_targets_mean": 6851.6,
|
|
"valid_targets_min": 6250
|
|
},
|
|
{
|
|
"epoch": 6.055555555555555,
|
|
"grad_norm": 0.34116029529034886,
|
|
"learning_rate": 2.1845434018744038e-06,
|
|
"loss": 0.2756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2753264307975769,
|
|
"step": 3815,
|
|
"valid_targets_mean": 6719.6,
|
|
"valid_targets_min": 5082
|
|
},
|
|
{
|
|
"epoch": 6.063492063492063,
|
|
"grad_norm": 0.3314281521803331,
|
|
"learning_rate": 2.148711912183803e-06,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2676103413105011,
|
|
"step": 3820,
|
|
"valid_targets_mean": 6071.4,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 6.071428571428571,
|
|
"grad_norm": 0.31448667834339267,
|
|
"learning_rate": 2.1131600285328458e-06,
|
|
"loss": 0.2724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27560901641845703,
|
|
"step": 3825,
|
|
"valid_targets_mean": 6854.8,
|
|
"valid_targets_min": 6187
|
|
},
|
|
{
|
|
"epoch": 6.079365079365079,
|
|
"grad_norm": 0.3752870299782435,
|
|
"learning_rate": 2.0778883077732903e-06,
|
|
"loss": 0.2768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2703682482242584,
|
|
"step": 3830,
|
|
"valid_targets_mean": 6293.8,
|
|
"valid_targets_min": 903
|
|
},
|
|
{
|
|
"epoch": 6.087301587301587,
|
|
"grad_norm": 0.311693508602459,
|
|
"learning_rate": 2.0428973023686983e-06,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26049721240997314,
|
|
"step": 3835,
|
|
"valid_targets_mean": 7006.4,
|
|
"valid_targets_min": 6352
|
|
},
|
|
{
|
|
"epoch": 6.095238095238095,
|
|
"grad_norm": 0.31885086987466565,
|
|
"learning_rate": 2.0081875603857726e-06,
|
|
"loss": 0.2704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2789642810821533,
|
|
"step": 3840,
|
|
"valid_targets_mean": 6701.2,
|
|
"valid_targets_min": 6192
|
|
},
|
|
{
|
|
"epoch": 6.103174603174603,
|
|
"grad_norm": 0.3378180032780896,
|
|
"learning_rate": 1.973759625485743e-06,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2913306653499603,
|
|
"step": 3845,
|
|
"valid_targets_mean": 7195.8,
|
|
"valid_targets_min": 5879
|
|
},
|
|
{
|
|
"epoch": 6.111111111111111,
|
|
"grad_norm": 0.3085640048383995,
|
|
"learning_rate": 1.9396140369159e-06,
|
|
"loss": 0.2803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27990275621414185,
|
|
"step": 3850,
|
|
"valid_targets_mean": 7387.7,
|
|
"valid_targets_min": 6002
|
|
},
|
|
{
|
|
"epoch": 6.119047619047619,
|
|
"grad_norm": 0.31118805515948755,
|
|
"learning_rate": 1.9057513295011087e-06,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28056734800338745,
|
|
"step": 3855,
|
|
"valid_targets_mean": 7995.1,
|
|
"valid_targets_min": 6320
|
|
},
|
|
{
|
|
"epoch": 6.1269841269841265,
|
|
"grad_norm": 0.30939549487407175,
|
|
"learning_rate": 1.8721720336354487e-06,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27132314443588257,
|
|
"step": 3860,
|
|
"valid_targets_mean": 6747.6,
|
|
"valid_targets_min": 5966
|
|
},
|
|
{
|
|
"epoch": 6.134920634920635,
|
|
"grad_norm": 0.34302294628315066,
|
|
"learning_rate": 1.8388766752739017e-06,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2824704647064209,
|
|
"step": 3865,
|
|
"valid_targets_mean": 6284.9,
|
|
"valid_targets_min": 2706
|
|
},
|
|
{
|
|
"epoch": 6.142857142857143,
|
|
"grad_norm": 0.2868102839003854,
|
|
"learning_rate": 1.805865775924116e-06,
|
|
"loss": 0.2678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26112598180770874,
|
|
"step": 3870,
|
|
"valid_targets_mean": 7515.0,
|
|
"valid_targets_min": 6027
|
|
},
|
|
{
|
|
"epoch": 6.150793650793651,
|
|
"grad_norm": 0.27990845670631115,
|
|
"learning_rate": 1.7731398526382416e-06,
|
|
"loss": 0.2729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2587110102176666,
|
|
"step": 3875,
|
|
"valid_targets_mean": 8192.6,
|
|
"valid_targets_min": 6069
|
|
},
|
|
{
|
|
"epoch": 6.158730158730159,
|
|
"grad_norm": 0.34902871470111235,
|
|
"learning_rate": 1.7406994180048231e-06,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2892429828643799,
|
|
"step": 3880,
|
|
"valid_targets_mean": 6917.1,
|
|
"valid_targets_min": 6284
|
|
},
|
|
{
|
|
"epoch": 6.166666666666667,
|
|
"grad_norm": 0.32868164246178155,
|
|
"learning_rate": 1.7085449801407783e-06,
|
|
"loss": 0.2697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2734549939632416,
|
|
"step": 3885,
|
|
"valid_targets_mean": 5812.6,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 6.174603174603175,
|
|
"grad_norm": 0.32145348989278744,
|
|
"learning_rate": 1.67667704268343e-06,
|
|
"loss": 0.2702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2837391793727875,
|
|
"step": 3890,
|
|
"valid_targets_mean": 7480.7,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 6.182539682539683,
|
|
"grad_norm": 0.3060580385530307,
|
|
"learning_rate": 1.6450961047826353e-06,
|
|
"loss": 0.2624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25625067949295044,
|
|
"step": 3895,
|
|
"valid_targets_mean": 6734.7,
|
|
"valid_targets_min": 6045
|
|
},
|
|
{
|
|
"epoch": 6.190476190476191,
|
|
"grad_norm": 0.3431785387785104,
|
|
"learning_rate": 1.6138026610929446e-06,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27775415778160095,
|
|
"step": 3900,
|
|
"valid_targets_mean": 6184.9,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 6.198412698412699,
|
|
"grad_norm": 0.29621994136556096,
|
|
"learning_rate": 1.5827972017658732e-06,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2718660235404968,
|
|
"step": 3905,
|
|
"valid_targets_mean": 7668.0,
|
|
"valid_targets_min": 5942
|
|
},
|
|
{
|
|
"epoch": 6.2063492063492065,
|
|
"grad_norm": 0.2916966041196081,
|
|
"learning_rate": 1.5520802124422108e-06,
|
|
"loss": 0.2743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25832104682922363,
|
|
"step": 3910,
|
|
"valid_targets_mean": 7441.3,
|
|
"valid_targets_min": 5800
|
|
},
|
|
{
|
|
"epoch": 6.214285714285714,
|
|
"grad_norm": 0.356384572479193,
|
|
"learning_rate": 1.5216521742444236e-06,
|
|
"loss": 0.2712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2814731299877167,
|
|
"step": 3915,
|
|
"valid_targets_mean": 5940.8,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 6.222222222222222,
|
|
"grad_norm": 0.3268994176419603,
|
|
"learning_rate": 1.491513563769118e-06,
|
|
"loss": 0.2741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2729707658290863,
|
|
"step": 3920,
|
|
"valid_targets_mean": 6603.8,
|
|
"valid_targets_min": 5969
|
|
},
|
|
{
|
|
"epoch": 6.23015873015873,
|
|
"grad_norm": 0.3429485882852285,
|
|
"learning_rate": 1.4616648530795673e-06,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2783457040786743,
|
|
"step": 3925,
|
|
"valid_targets_mean": 6463.8,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 6.238095238095238,
|
|
"grad_norm": 0.3051406752156092,
|
|
"learning_rate": 1.432106509698319e-06,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26260828971862793,
|
|
"step": 3930,
|
|
"valid_targets_mean": 6807.8,
|
|
"valid_targets_min": 5991
|
|
},
|
|
{
|
|
"epoch": 6.246031746031746,
|
|
"grad_norm": 0.31411306680769285,
|
|
"learning_rate": 1.4028389965998867e-06,
|
|
"loss": 0.2782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27847254276275635,
|
|
"step": 3935,
|
|
"valid_targets_mean": 6938.5,
|
|
"valid_targets_min": 6231
|
|
},
|
|
{
|
|
"epoch": 6.253968253968254,
|
|
"grad_norm": 0.30746329410473533,
|
|
"learning_rate": 1.3738627722034848e-06,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27991652488708496,
|
|
"step": 3940,
|
|
"valid_targets_mean": 6784.3,
|
|
"valid_targets_min": 6287
|
|
},
|
|
{
|
|
"epoch": 6.261904761904762,
|
|
"grad_norm": 0.3399515155152072,
|
|
"learning_rate": 1.345178290365845e-06,
|
|
"loss": 0.2762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27730685472488403,
|
|
"step": 3945,
|
|
"valid_targets_mean": 6531.3,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 6.26984126984127,
|
|
"grad_norm": 0.3140508632155118,
|
|
"learning_rate": 1.3167860003741218e-06,
|
|
"loss": 0.2754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2657640278339386,
|
|
"step": 3950,
|
|
"valid_targets_mean": 6776.3,
|
|
"valid_targets_min": 5910
|
|
},
|
|
{
|
|
"epoch": 6.277777777777778,
|
|
"grad_norm": 0.3086307327659111,
|
|
"learning_rate": 1.2886863469388389e-06,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27010494470596313,
|
|
"step": 3955,
|
|
"valid_targets_mean": 7528.8,
|
|
"valid_targets_min": 5119
|
|
},
|
|
{
|
|
"epoch": 6.285714285714286,
|
|
"grad_norm": 0.33931134872706276,
|
|
"learning_rate": 1.2608797701869425e-06,
|
|
"loss": 0.2754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2754051983356476,
|
|
"step": 3960,
|
|
"valid_targets_mean": 6479.7,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 6.2936507936507935,
|
|
"grad_norm": 0.3395129150004189,
|
|
"learning_rate": 1.2333667056548881e-06,
|
|
"loss": 0.2779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2848201394081116,
|
|
"step": 3965,
|
|
"valid_targets_mean": 6923.8,
|
|
"valid_targets_min": 5654
|
|
},
|
|
{
|
|
"epoch": 6.301587301587301,
|
|
"grad_norm": 0.3449339649728558,
|
|
"learning_rate": 1.2061475842818337e-06,
|
|
"loss": 0.2748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2786179184913635,
|
|
"step": 3970,
|
|
"valid_targets_mean": 6541.2,
|
|
"valid_targets_min": 4029
|
|
},
|
|
{
|
|
"epoch": 6.309523809523809,
|
|
"grad_norm": 0.3384567559998413,
|
|
"learning_rate": 1.1792228324028776e-06,
|
|
"loss": 0.2789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2834559381008148,
|
|
"step": 3975,
|
|
"valid_targets_mean": 6827.1,
|
|
"valid_targets_min": 6208
|
|
},
|
|
{
|
|
"epoch": 6.317460317460317,
|
|
"grad_norm": 0.3021930973938394,
|
|
"learning_rate": 1.152592871742395e-06,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2751089632511139,
|
|
"step": 3980,
|
|
"valid_targets_mean": 6650.9,
|
|
"valid_targets_min": 5816
|
|
},
|
|
{
|
|
"epoch": 6.325396825396825,
|
|
"grad_norm": 0.33288838768548884,
|
|
"learning_rate": 1.1262581194074152e-06,
|
|
"loss": 0.2682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27845561504364014,
|
|
"step": 3985,
|
|
"valid_targets_mean": 6267.7,
|
|
"valid_targets_min": 829
|
|
},
|
|
{
|
|
"epoch": 6.333333333333333,
|
|
"grad_norm": 0.30785810270567704,
|
|
"learning_rate": 1.100218987881112e-06,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2822997272014618,
|
|
"step": 3990,
|
|
"valid_targets_mean": 7481.9,
|
|
"valid_targets_min": 6078
|
|
},
|
|
{
|
|
"epoch": 6.341269841269841,
|
|
"grad_norm": 0.29170259981592206,
|
|
"learning_rate": 1.0744758850163085e-06,
|
|
"loss": 0.2743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26321518421173096,
|
|
"step": 3995,
|
|
"valid_targets_mean": 7663.4,
|
|
"valid_targets_min": 5733
|
|
},
|
|
{
|
|
"epoch": 6.349206349206349,
|
|
"grad_norm": 0.3032762020909906,
|
|
"learning_rate": 1.0490292140291247e-06,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2671698331832886,
|
|
"step": 4000,
|
|
"valid_targets_mean": 7412.8,
|
|
"valid_targets_min": 1114
|
|
},
|
|
{
|
|
"epoch": 6.357142857142857,
|
|
"grad_norm": 0.3143326566523872,
|
|
"learning_rate": 1.0238793734926467e-06,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2718682885169983,
|
|
"step": 4005,
|
|
"valid_targets_mean": 6653.9,
|
|
"valid_targets_min": 5558
|
|
},
|
|
{
|
|
"epoch": 6.365079365079365,
|
|
"grad_norm": 0.32026984197018443,
|
|
"learning_rate": 9.990267573306745e-07,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28002679347991943,
|
|
"step": 4010,
|
|
"valid_targets_mean": 6724.6,
|
|
"valid_targets_min": 5877
|
|
},
|
|
{
|
|
"epoch": 6.3730158730158735,
|
|
"grad_norm": 0.334330173382039,
|
|
"learning_rate": 9.744717548115613e-07,
|
|
"loss": 0.2708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28418827056884766,
|
|
"step": 4015,
|
|
"valid_targets_mean": 6657.1,
|
|
"valid_targets_min": 5141
|
|
},
|
|
{
|
|
"epoch": 6.380952380952381,
|
|
"grad_norm": 0.3417063258412195,
|
|
"learning_rate": 9.502147505421244e-07,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28173577785491943,
|
|
"step": 4020,
|
|
"valid_targets_mean": 6616.8,
|
|
"valid_targets_min": 5665
|
|
},
|
|
{
|
|
"epoch": 6.388888888888889,
|
|
"grad_norm": 0.31286221773292744,
|
|
"learning_rate": 9.262561244616108e-07,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2763950824737549,
|
|
"step": 4025,
|
|
"valid_targets_mean": 6700.2,
|
|
"valid_targets_min": 5577
|
|
},
|
|
{
|
|
"epoch": 6.396825396825397,
|
|
"grad_norm": 0.3281385068675008,
|
|
"learning_rate": 9.025962518357323e-07,
|
|
"loss": 0.2757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26859521865844727,
|
|
"step": 4030,
|
|
"valid_targets_mean": 6786.2,
|
|
"valid_targets_min": 5406
|
|
},
|
|
{
|
|
"epoch": 6.404761904761905,
|
|
"grad_norm": 0.3252274577764428,
|
|
"learning_rate": 8.792355032508282e-07,
|
|
"loss": 0.2697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2587582468986511,
|
|
"step": 4035,
|
|
"valid_targets_mean": 5456.8,
|
|
"valid_targets_min": 624
|
|
},
|
|
{
|
|
"epoch": 6.412698412698413,
|
|
"grad_norm": 0.29141317338867095,
|
|
"learning_rate": 8.561742446080168e-07,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2659565210342407,
|
|
"step": 4040,
|
|
"valid_targets_mean": 6980.2,
|
|
"valid_targets_min": 6261
|
|
},
|
|
{
|
|
"epoch": 6.420634920634921,
|
|
"grad_norm": 0.319013418008109,
|
|
"learning_rate": 8.334128371174955e-07,
|
|
"loss": 0.2662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2574118375778198,
|
|
"step": 4045,
|
|
"valid_targets_mean": 7611.1,
|
|
"valid_targets_min": 5786
|
|
},
|
|
{
|
|
"epoch": 6.428571428571429,
|
|
"grad_norm": 0.3007412025459006,
|
|
"learning_rate": 8.109516372928605e-07,
|
|
"loss": 0.2643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25932422280311584,
|
|
"step": 4050,
|
|
"valid_targets_mean": 6873.8,
|
|
"valid_targets_min": 6279
|
|
},
|
|
{
|
|
"epoch": 6.436507936507937,
|
|
"grad_norm": 0.2894229330412034,
|
|
"learning_rate": 7.887909969455366e-07,
|
|
"loss": 0.2706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26138877868652344,
|
|
"step": 4055,
|
|
"valid_targets_mean": 6768.0,
|
|
"valid_targets_min": 5866
|
|
},
|
|
{
|
|
"epoch": 6.444444444444445,
|
|
"grad_norm": 0.31127385134581725,
|
|
"learning_rate": 7.669312631792758e-07,
|
|
"loss": 0.2784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2713558077812195,
|
|
"step": 4060,
|
|
"valid_targets_mean": 6843.1,
|
|
"valid_targets_min": 6304
|
|
},
|
|
{
|
|
"epoch": 6.4523809523809526,
|
|
"grad_norm": 0.3105961735053375,
|
|
"learning_rate": 7.453727783846876e-07,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2738935351371765,
|
|
"step": 4065,
|
|
"valid_targets_mean": 6849.5,
|
|
"valid_targets_min": 6274
|
|
},
|
|
{
|
|
"epoch": 6.4603174603174605,
|
|
"grad_norm": 0.33017337197716473,
|
|
"learning_rate": 7.241158802339065e-07,
|
|
"loss": 0.2756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27432024478912354,
|
|
"step": 4070,
|
|
"valid_targets_mean": 6872.4,
|
|
"valid_targets_min": 5918
|
|
},
|
|
{
|
|
"epoch": 6.468253968253968,
|
|
"grad_norm": 0.34230683316856264,
|
|
"learning_rate": 7.031609016753016e-07,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2684346139431,
|
|
"step": 4075,
|
|
"valid_targets_mean": 6286.7,
|
|
"valid_targets_min": 1019
|
|
},
|
|
{
|
|
"epoch": 6.476190476190476,
|
|
"grad_norm": 0.3217397353517838,
|
|
"learning_rate": 6.825081709282377e-07,
|
|
"loss": 0.2779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28377363085746765,
|
|
"step": 4080,
|
|
"valid_targets_mean": 7234.6,
|
|
"valid_targets_min": 4111
|
|
},
|
|
{
|
|
"epoch": 6.484126984126984,
|
|
"grad_norm": 0.33199953112014385,
|
|
"learning_rate": 6.62158011477958e-07,
|
|
"loss": 0.2713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28517842292785645,
|
|
"step": 4085,
|
|
"valid_targets_mean": 6756.9,
|
|
"valid_targets_min": 4656
|
|
},
|
|
{
|
|
"epoch": 6.492063492063492,
|
|
"grad_norm": 0.3132540051177268,
|
|
"learning_rate": 6.421107420705097e-07,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26659032702445984,
|
|
"step": 4090,
|
|
"valid_targets_mean": 6751.2,
|
|
"valid_targets_min": 6120
|
|
},
|
|
{
|
|
"epoch": 6.5,
|
|
"grad_norm": 0.32709329652848196,
|
|
"learning_rate": 6.223666767077508e-07,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2773485779762268,
|
|
"step": 4095,
|
|
"valid_targets_mean": 6627.7,
|
|
"valid_targets_min": 5866
|
|
},
|
|
{
|
|
"epoch": 6.507936507936508,
|
|
"grad_norm": 0.2973737590850442,
|
|
"learning_rate": 6.029261246424267e-07,
|
|
"loss": 0.2701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2719399333000183,
|
|
"step": 4100,
|
|
"valid_targets_mean": 7294.7,
|
|
"valid_targets_min": 6077
|
|
},
|
|
{
|
|
"epoch": 6.515873015873016,
|
|
"grad_norm": 0.268342347362687,
|
|
"learning_rate": 5.837893903733394e-07,
|
|
"loss": 0.2665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26082220673561096,
|
|
"step": 4105,
|
|
"valid_targets_mean": 7597.5,
|
|
"valid_targets_min": 5799
|
|
},
|
|
{
|
|
"epoch": 6.523809523809524,
|
|
"grad_norm": 0.2869590882733387,
|
|
"learning_rate": 5.649567736405681e-07,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2427126169204712,
|
|
"step": 4110,
|
|
"valid_targets_mean": 6827.9,
|
|
"valid_targets_min": 6134
|
|
},
|
|
{
|
|
"epoch": 6.531746031746032,
|
|
"grad_norm": 0.28707171379297686,
|
|
"learning_rate": 5.464285694207672e-07,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27897125482559204,
|
|
"step": 4115,
|
|
"valid_targets_mean": 8381.4,
|
|
"valid_targets_min": 5956
|
|
},
|
|
{
|
|
"epoch": 6.5396825396825395,
|
|
"grad_norm": 0.3328589978252485,
|
|
"learning_rate": 5.282050679225714e-07,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2868872880935669,
|
|
"step": 4120,
|
|
"valid_targets_mean": 7147.2,
|
|
"valid_targets_min": 6280
|
|
},
|
|
{
|
|
"epoch": 6.5476190476190474,
|
|
"grad_norm": 0.3655199336038534,
|
|
"learning_rate": 5.102865545820245e-07,
|
|
"loss": 0.2674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26657554507255554,
|
|
"step": 4125,
|
|
"valid_targets_mean": 5743.6,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 6.555555555555555,
|
|
"grad_norm": 0.30051001630837376,
|
|
"learning_rate": 4.926733100581182e-07,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25863218307495117,
|
|
"step": 4130,
|
|
"valid_targets_mean": 6706.0,
|
|
"valid_targets_min": 6261
|
|
},
|
|
{
|
|
"epoch": 6.563492063492063,
|
|
"grad_norm": 0.28237535179918144,
|
|
"learning_rate": 4.7536561022840213e-07,
|
|
"loss": 0.2706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26936718821525574,
|
|
"step": 4135,
|
|
"valid_targets_mean": 8140.0,
|
|
"valid_targets_min": 6002
|
|
},
|
|
{
|
|
"epoch": 6.571428571428571,
|
|
"grad_norm": 0.3146708281144581,
|
|
"learning_rate": 4.5836372618464964e-07,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.279814213514328,
|
|
"step": 4140,
|
|
"valid_targets_mean": 6580.6,
|
|
"valid_targets_min": 5594
|
|
},
|
|
{
|
|
"epoch": 6.579365079365079,
|
|
"grad_norm": 0.31621342770313016,
|
|
"learning_rate": 4.416679242286215e-07,
|
|
"loss": 0.2748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2859848737716675,
|
|
"step": 4145,
|
|
"valid_targets_mean": 8034.2,
|
|
"valid_targets_min": 5360
|
|
},
|
|
{
|
|
"epoch": 6.587301587301587,
|
|
"grad_norm": 0.31420584334918683,
|
|
"learning_rate": 4.2527846586789547e-07,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25576838850975037,
|
|
"step": 4150,
|
|
"valid_targets_mean": 6864.4,
|
|
"valid_targets_min": 6134
|
|
},
|
|
{
|
|
"epoch": 6.595238095238095,
|
|
"grad_norm": 0.31915475531316134,
|
|
"learning_rate": 4.0919560781176317e-07,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27565398812294006,
|
|
"step": 4155,
|
|
"valid_targets_mean": 6763.0,
|
|
"valid_targets_min": 5980
|
|
},
|
|
{
|
|
"epoch": 6.603174603174603,
|
|
"grad_norm": 0.2833339871144851,
|
|
"learning_rate": 3.934196019672176e-07,
|
|
"loss": 0.2701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27071577310562134,
|
|
"step": 4160,
|
|
"valid_targets_mean": 7747.1,
|
|
"valid_targets_min": 6059
|
|
},
|
|
{
|
|
"epoch": 6.611111111111111,
|
|
"grad_norm": 0.30587505111761565,
|
|
"learning_rate": 3.779506954349965e-07,
|
|
"loss": 0.2695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2638894021511078,
|
|
"step": 4165,
|
|
"valid_targets_mean": 6754.7,
|
|
"valid_targets_min": 6143
|
|
},
|
|
{
|
|
"epoch": 6.619047619047619,
|
|
"grad_norm": 0.31615879775761035,
|
|
"learning_rate": 3.6278913050572076e-07,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27563342452049255,
|
|
"step": 4170,
|
|
"valid_targets_mean": 6779.5,
|
|
"valid_targets_min": 5909
|
|
},
|
|
{
|
|
"epoch": 6.6269841269841265,
|
|
"grad_norm": 0.290277733048729,
|
|
"learning_rate": 3.4793514465610414e-07,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2744487226009369,
|
|
"step": 4175,
|
|
"valid_targets_mean": 8200.6,
|
|
"valid_targets_min": 6089
|
|
},
|
|
{
|
|
"epoch": 6.634920634920634,
|
|
"grad_norm": 0.31331963776614535,
|
|
"learning_rate": 3.3338897054521205e-07,
|
|
"loss": 0.2758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2727993130683899,
|
|
"step": 4180,
|
|
"valid_targets_mean": 6577.9,
|
|
"valid_targets_min": 5903
|
|
},
|
|
{
|
|
"epoch": 6.642857142857143,
|
|
"grad_norm": 0.31938417211143383,
|
|
"learning_rate": 3.191508360108464e-07,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2708355784416199,
|
|
"step": 4185,
|
|
"valid_targets_mean": 7345.9,
|
|
"valid_targets_min": 4957
|
|
},
|
|
{
|
|
"epoch": 6.650793650793651,
|
|
"grad_norm": 0.2957698198680341,
|
|
"learning_rate": 3.0522096406595536e-07,
|
|
"loss": 0.2656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2587151825428009,
|
|
"step": 4190,
|
|
"valid_targets_mean": 6825.9,
|
|
"valid_targets_min": 6049
|
|
},
|
|
{
|
|
"epoch": 6.658730158730159,
|
|
"grad_norm": 0.32511794513672765,
|
|
"learning_rate": 2.9159957289514926e-07,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2695087194442749,
|
|
"step": 4195,
|
|
"valid_targets_mean": 7316.3,
|
|
"valid_targets_min": 5494
|
|
},
|
|
{
|
|
"epoch": 6.666666666666667,
|
|
"grad_norm": 0.3024417456518714,
|
|
"learning_rate": 2.782868758512791e-07,
|
|
"loss": 0.2751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27322492003440857,
|
|
"step": 4200,
|
|
"valid_targets_mean": 7343.2,
|
|
"valid_targets_min": 6190
|
|
},
|
|
{
|
|
"epoch": 6.674603174603175,
|
|
"grad_norm": 0.3036296166786774,
|
|
"learning_rate": 2.6528308145210125e-07,
|
|
"loss": 0.2748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26416444778442383,
|
|
"step": 4205,
|
|
"valid_targets_mean": 6779.9,
|
|
"valid_targets_min": 5786
|
|
},
|
|
{
|
|
"epoch": 6.682539682539683,
|
|
"grad_norm": 0.3194793112699858,
|
|
"learning_rate": 2.525883933770046e-07,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2791600525379181,
|
|
"step": 4210,
|
|
"valid_targets_mean": 6377.1,
|
|
"valid_targets_min": 5201
|
|
},
|
|
{
|
|
"epoch": 6.690476190476191,
|
|
"grad_norm": 0.2969129618333589,
|
|
"learning_rate": 2.402030104638198e-07,
|
|
"loss": 0.2756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25265082716941833,
|
|
"step": 4215,
|
|
"valid_targets_mean": 7029.5,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 6.698412698412699,
|
|
"grad_norm": 0.33400665079097486,
|
|
"learning_rate": 2.2812712670571502e-07,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2761928141117096,
|
|
"step": 4220,
|
|
"valid_targets_mean": 6058.6,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 6.7063492063492065,
|
|
"grad_norm": 0.335634478023948,
|
|
"learning_rate": 2.1636093124814738e-07,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28270167112350464,
|
|
"step": 4225,
|
|
"valid_targets_mean": 6508.7,
|
|
"valid_targets_min": 925
|
|
},
|
|
{
|
|
"epoch": 6.714285714285714,
|
|
"grad_norm": 0.3206744211652736,
|
|
"learning_rate": 2.0490460838589855e-07,
|
|
"loss": 0.2733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26230210065841675,
|
|
"step": 4230,
|
|
"valid_targets_mean": 6313.9,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 6.722222222222222,
|
|
"grad_norm": 0.3205914463873738,
|
|
"learning_rate": 1.9375833756019923e-07,
|
|
"loss": 0.2631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2644524574279785,
|
|
"step": 4235,
|
|
"valid_targets_mean": 6457.2,
|
|
"valid_targets_min": 5911
|
|
},
|
|
{
|
|
"epoch": 6.73015873015873,
|
|
"grad_norm": 0.3344723185285748,
|
|
"learning_rate": 1.8292229335590716e-07,
|
|
"loss": 0.2719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2806960344314575,
|
|
"step": 4240,
|
|
"valid_targets_mean": 6401.0,
|
|
"valid_targets_min": 656
|
|
},
|
|
{
|
|
"epoch": 6.738095238095238,
|
|
"grad_norm": 0.32431691166152604,
|
|
"learning_rate": 1.7239664549878688e-07,
|
|
"loss": 0.2811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2963787913322449,
|
|
"step": 4245,
|
|
"valid_targets_mean": 6788.1,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 6.746031746031746,
|
|
"grad_norm": 0.31081308304551436,
|
|
"learning_rate": 1.6218155885283192e-07,
|
|
"loss": 0.2717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2808656394481659,
|
|
"step": 4250,
|
|
"valid_targets_mean": 6822.1,
|
|
"valid_targets_min": 3688
|
|
},
|
|
{
|
|
"epoch": 6.753968253968254,
|
|
"grad_norm": 0.3246821086990968,
|
|
"learning_rate": 1.5227719341769364e-07,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2932637929916382,
|
|
"step": 4255,
|
|
"valid_targets_mean": 6769.4,
|
|
"valid_targets_min": 5868
|
|
},
|
|
{
|
|
"epoch": 6.761904761904762,
|
|
"grad_norm": 0.30333837418328885,
|
|
"learning_rate": 1.4268370432618306e-07,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2728917598724365,
|
|
"step": 4260,
|
|
"valid_targets_mean": 7001.7,
|
|
"valid_targets_min": 5448
|
|
},
|
|
{
|
|
"epoch": 6.76984126984127,
|
|
"grad_norm": 0.32094711065075765,
|
|
"learning_rate": 1.3340124184182178e-07,
|
|
"loss": 0.2655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2695567011833191,
|
|
"step": 4265,
|
|
"valid_targets_mean": 6583.5,
|
|
"valid_targets_min": 5572
|
|
},
|
|
{
|
|
"epoch": 6.777777777777778,
|
|
"grad_norm": 0.30331851930978804,
|
|
"learning_rate": 1.2442995135650393e-07,
|
|
"loss": 0.2714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27021706104278564,
|
|
"step": 4270,
|
|
"valid_targets_mean": 6686.4,
|
|
"valid_targets_min": 6104
|
|
},
|
|
{
|
|
"epoch": 6.785714285714286,
|
|
"grad_norm": 0.3271231111454617,
|
|
"learning_rate": 1.1576997338821339e-07,
|
|
"loss": 0.2761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2815885543823242,
|
|
"step": 4275,
|
|
"valid_targets_mean": 6746.9,
|
|
"valid_targets_min": 5522
|
|
},
|
|
{
|
|
"epoch": 6.7936507936507935,
|
|
"grad_norm": 0.3133792652186981,
|
|
"learning_rate": 1.0742144357882567e-07,
|
|
"loss": 0.2738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.271761953830719,
|
|
"step": 4280,
|
|
"valid_targets_mean": 6780.9,
|
|
"valid_targets_min": 6094
|
|
},
|
|
{
|
|
"epoch": 6.801587301587301,
|
|
"grad_norm": 0.29294406096879855,
|
|
"learning_rate": 9.938449269197181e-08,
|
|
"loss": 0.2674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26170825958251953,
|
|
"step": 4285,
|
|
"valid_targets_mean": 7241.4,
|
|
"valid_targets_min": 5994
|
|
},
|
|
{
|
|
"epoch": 6.809523809523809,
|
|
"grad_norm": 0.31148246112996997,
|
|
"learning_rate": 9.165924661100889e-08,
|
|
"loss": 0.2714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2806248962879181,
|
|
"step": 4290,
|
|
"valid_targets_mean": 7736.5,
|
|
"valid_targets_min": 5726
|
|
},
|
|
{
|
|
"epoch": 6.817460317460317,
|
|
"grad_norm": 0.33104095740109907,
|
|
"learning_rate": 8.424582633703493e-08,
|
|
"loss": 0.2748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2733340263366699,
|
|
"step": 4295,
|
|
"valid_targets_mean": 6722.9,
|
|
"valid_targets_min": 5999
|
|
},
|
|
{
|
|
"epoch": 6.825396825396825,
|
|
"grad_norm": 0.29595993253805686,
|
|
"learning_rate": 7.714434798699933e-08,
|
|
"loss": 0.2716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25560927391052246,
|
|
"step": 4300,
|
|
"valid_targets_mean": 7853.9,
|
|
"valid_targets_min": 5606
|
|
},
|
|
{
|
|
"epoch": 6.833333333333333,
|
|
"grad_norm": 0.31170515071711263,
|
|
"learning_rate": 7.035492279187538e-08,
|
|
"loss": 0.2686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2787945568561554,
|
|
"step": 4305,
|
|
"valid_targets_mean": 7009.0,
|
|
"valid_targets_min": 5806
|
|
},
|
|
{
|
|
"epoch": 6.841269841269841,
|
|
"grad_norm": 0.30873029321568346,
|
|
"learning_rate": 6.387765709493288e-08,
|
|
"loss": 0.2638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26397645473480225,
|
|
"step": 4310,
|
|
"valid_targets_mean": 6852.4,
|
|
"valid_targets_min": 6246
|
|
},
|
|
{
|
|
"epoch": 6.849206349206349,
|
|
"grad_norm": 0.2702906227020219,
|
|
"learning_rate": 5.7712652350061515e-08,
|
|
"loss": 0.2716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25887924432754517,
|
|
"step": 4315,
|
|
"valid_targets_mean": 7915.1,
|
|
"valid_targets_min": 6393
|
|
},
|
|
{
|
|
"epoch": 6.857142857142857,
|
|
"grad_norm": 0.30173605690837446,
|
|
"learning_rate": 5.186000512018341e-08,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2642671465873718,
|
|
"step": 4320,
|
|
"valid_targets_mean": 7372.1,
|
|
"valid_targets_min": 6279
|
|
},
|
|
{
|
|
"epoch": 6.865079365079366,
|
|
"grad_norm": 0.316172691588809,
|
|
"learning_rate": 4.631980707574535e-08,
|
|
"loss": 0.2729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2624400854110718,
|
|
"step": 4325,
|
|
"valid_targets_mean": 7185.5,
|
|
"valid_targets_min": 5947
|
|
},
|
|
{
|
|
"epoch": 6.8730158730158735,
|
|
"grad_norm": 0.3121026542836471,
|
|
"learning_rate": 4.10921449932733e-08,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26848697662353516,
|
|
"step": 4330,
|
|
"valid_targets_mean": 6900.1,
|
|
"valid_targets_min": 6485
|
|
},
|
|
{
|
|
"epoch": 6.880952380952381,
|
|
"grad_norm": 0.30793589336556626,
|
|
"learning_rate": 3.61771007540268e-08,
|
|
"loss": 0.2688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2694513201713562,
|
|
"step": 4335,
|
|
"valid_targets_mean": 6806.1,
|
|
"valid_targets_min": 5885
|
|
},
|
|
{
|
|
"epoch": 6.888888888888889,
|
|
"grad_norm": 0.285040570848796,
|
|
"learning_rate": 3.157475134270227e-08,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25863173604011536,
|
|
"step": 4340,
|
|
"valid_targets_mean": 7657.6,
|
|
"valid_targets_min": 6266
|
|
},
|
|
{
|
|
"epoch": 6.896825396825397,
|
|
"grad_norm": 0.32977479388743547,
|
|
"learning_rate": 2.728516884624277e-08,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2783914804458618,
|
|
"step": 4345,
|
|
"valid_targets_mean": 6918.1,
|
|
"valid_targets_min": 6086
|
|
},
|
|
{
|
|
"epoch": 6.904761904761905,
|
|
"grad_norm": 0.3160559472563977,
|
|
"learning_rate": 2.3308420452690106e-08,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2697213888168335,
|
|
"step": 4350,
|
|
"valid_targets_mean": 6731.3,
|
|
"valid_targets_min": 5870
|
|
},
|
|
{
|
|
"epoch": 6.912698412698413,
|
|
"grad_norm": 0.28663796985433976,
|
|
"learning_rate": 1.9644568450147837e-08,
|
|
"loss": 0.2642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2675458490848541,
|
|
"step": 4355,
|
|
"valid_targets_mean": 7597.1,
|
|
"valid_targets_min": 5757
|
|
},
|
|
{
|
|
"epoch": 6.920634920634921,
|
|
"grad_norm": 0.30528251331192885,
|
|
"learning_rate": 1.6293670225799864e-08,
|
|
"loss": 0.2684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2690272033214569,
|
|
"step": 4360,
|
|
"valid_targets_mean": 6689.8,
|
|
"valid_targets_min": 5941
|
|
},
|
|
{
|
|
"epoch": 6.928571428571429,
|
|
"grad_norm": 0.3076565128209838,
|
|
"learning_rate": 1.3255778265013342e-08,
|
|
"loss": 0.2693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2849264442920685,
|
|
"step": 4365,
|
|
"valid_targets_mean": 6962.5,
|
|
"valid_targets_min": 1223
|
|
},
|
|
{
|
|
"epoch": 6.936507936507937,
|
|
"grad_norm": 0.30484802160750374,
|
|
"learning_rate": 1.0530940150512703e-08,
|
|
"loss": 0.2689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26153427362442017,
|
|
"step": 4370,
|
|
"valid_targets_mean": 7174.6,
|
|
"valid_targets_min": 1188
|
|
},
|
|
{
|
|
"epoch": 6.944444444444445,
|
|
"grad_norm": 0.2963865805966246,
|
|
"learning_rate": 8.119198561638009e-09,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.270327091217041,
|
|
"step": 4375,
|
|
"valid_targets_mean": 7185.9,
|
|
"valid_targets_min": 6148
|
|
},
|
|
{
|
|
"epoch": 6.9523809523809526,
|
|
"grad_norm": 0.31272225422512906,
|
|
"learning_rate": 6.020591273674381e-09,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2621399760246277,
|
|
"step": 4380,
|
|
"valid_targets_mean": 6722.4,
|
|
"valid_targets_min": 6002
|
|
},
|
|
{
|
|
"epoch": 6.9603174603174605,
|
|
"grad_norm": 0.30514293109040547,
|
|
"learning_rate": 4.2351511572635835e-09,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24763672053813934,
|
|
"step": 4385,
|
|
"valid_targets_mean": 6628.1,
|
|
"valid_targets_min": 3179
|
|
},
|
|
{
|
|
"epoch": 6.968253968253968,
|
|
"grad_norm": 0.33791577994725713,
|
|
"learning_rate": 2.7629061778866597e-09,
|
|
"loss": 0.2685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26168376207351685,
|
|
"step": 4390,
|
|
"valid_targets_mean": 6663.4,
|
|
"valid_targets_min": 4843
|
|
},
|
|
{
|
|
"epoch": 6.976190476190476,
|
|
"grad_norm": 0.3053120208412012,
|
|
"learning_rate": 1.603879395422059e-09,
|
|
"loss": 0.2693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.259382426738739,
|
|
"step": 4395,
|
|
"valid_targets_mean": 6770.1,
|
|
"valid_targets_min": 6049
|
|
},
|
|
{
|
|
"epoch": 6.984126984126984,
|
|
"grad_norm": 0.3223029498715875,
|
|
"learning_rate": 7.580889637925914e-10,
|
|
"loss": 0.2716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28899016976356506,
|
|
"step": 4400,
|
|
"valid_targets_mean": 6646.0,
|
|
"valid_targets_min": 5874
|
|
},
|
|
{
|
|
"epoch": 6.992063492063492,
|
|
"grad_norm": 0.341559311677354,
|
|
"learning_rate": 2.2554813067676705e-10,
|
|
"loss": 0.2768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2845405042171478,
|
|
"step": 4405,
|
|
"valid_targets_mean": 6866.9,
|
|
"valid_targets_min": 5859
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"grad_norm": 0.32399391929587706,
|
|
"learning_rate": 6.265237300073778e-12,
|
|
"loss": 0.272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26912549138069153,
|
|
"step": 4410,
|
|
"valid_targets_mean": 6379.4,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26912549138069153,
|
|
"step": 4410,
|
|
"total_flos": 1898150523305984.0,
|
|
"train_loss": 0.2966247021476157,
|
|
"train_runtime": 33665.9995,
|
|
"train_samples_per_second": 2.094,
|
|
"train_steps_per_second": 0.131,
|
|
"valid_targets_mean": 6379.4,
|
|
"valid_targets_min": 827
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4410,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1898150523305984.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|