8177 lines
226 KiB
JSON
8177 lines
226 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 3696,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.00946969696969697,
|
|
"grad_norm": 8.687714728860547,
|
|
"learning_rate": 4.324324324324325e-07,
|
|
"loss": 0.9086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.9157116413116455,
|
|
"step": 5,
|
|
"valid_targets_mean": 3560.9,
|
|
"valid_targets_min": 1571
|
|
},
|
|
{
|
|
"epoch": 0.01893939393939394,
|
|
"grad_norm": 9.44391191852026,
|
|
"learning_rate": 9.72972972972973e-07,
|
|
"loss": 0.9105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8576526641845703,
|
|
"step": 10,
|
|
"valid_targets_mean": 2820.0,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 0.028409090909090908,
|
|
"grad_norm": 7.801245465380323,
|
|
"learning_rate": 1.5135135135135137e-06,
|
|
"loss": 0.9296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 1.010927677154541,
|
|
"step": 15,
|
|
"valid_targets_mean": 3801.7,
|
|
"valid_targets_min": 1254
|
|
},
|
|
{
|
|
"epoch": 0.03787878787878788,
|
|
"grad_norm": 7.753579084555416,
|
|
"learning_rate": 2.054054054054054e-06,
|
|
"loss": 0.8572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8089697360992432,
|
|
"step": 20,
|
|
"valid_targets_mean": 3226.4,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 0.04734848484848485,
|
|
"grad_norm": 6.126435877347991,
|
|
"learning_rate": 2.594594594594595e-06,
|
|
"loss": 0.7803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8435947895050049,
|
|
"step": 25,
|
|
"valid_targets_mean": 2542.2,
|
|
"valid_targets_min": 1165
|
|
},
|
|
{
|
|
"epoch": 0.056818181818181816,
|
|
"grad_norm": 3.7281622586985663,
|
|
"learning_rate": 3.1351351351351356e-06,
|
|
"loss": 0.7949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8384841680526733,
|
|
"step": 30,
|
|
"valid_targets_mean": 3713.9,
|
|
"valid_targets_min": 1083
|
|
},
|
|
{
|
|
"epoch": 0.06628787878787878,
|
|
"grad_norm": 2.3152852734961566,
|
|
"learning_rate": 3.6756756756756763e-06,
|
|
"loss": 0.7782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7312390208244324,
|
|
"step": 35,
|
|
"valid_targets_mean": 3388.5,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 0.07575757575757576,
|
|
"grad_norm": 1.6799959728301728,
|
|
"learning_rate": 4.216216216216217e-06,
|
|
"loss": 0.6717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8152287006378174,
|
|
"step": 40,
|
|
"valid_targets_mean": 3547.2,
|
|
"valid_targets_min": 917
|
|
},
|
|
{
|
|
"epoch": 0.08522727272727272,
|
|
"grad_norm": 1.4579633550294715,
|
|
"learning_rate": 4.756756756756757e-06,
|
|
"loss": 0.7148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6801141500473022,
|
|
"step": 45,
|
|
"valid_targets_mean": 3036.1,
|
|
"valid_targets_min": 1444
|
|
},
|
|
{
|
|
"epoch": 0.0946969696969697,
|
|
"grad_norm": 1.2123707732199087,
|
|
"learning_rate": 5.297297297297298e-06,
|
|
"loss": 0.6533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6614420413970947,
|
|
"step": 50,
|
|
"valid_targets_mean": 3060.1,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 0.10416666666666667,
|
|
"grad_norm": 1.217971796063111,
|
|
"learning_rate": 5.837837837837839e-06,
|
|
"loss": 0.6344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6328532695770264,
|
|
"step": 55,
|
|
"valid_targets_mean": 2132.4,
|
|
"valid_targets_min": 395
|
|
},
|
|
{
|
|
"epoch": 0.11363636363636363,
|
|
"grad_norm": 0.9093591951184479,
|
|
"learning_rate": 6.378378378378379e-06,
|
|
"loss": 0.5791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.561041533946991,
|
|
"step": 60,
|
|
"valid_targets_mean": 2584.6,
|
|
"valid_targets_min": 1144
|
|
},
|
|
{
|
|
"epoch": 0.12310606060606061,
|
|
"grad_norm": 0.9340950862143942,
|
|
"learning_rate": 6.91891891891892e-06,
|
|
"loss": 0.6246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6120415925979614,
|
|
"step": 65,
|
|
"valid_targets_mean": 2396.8,
|
|
"valid_targets_min": 969
|
|
},
|
|
{
|
|
"epoch": 0.13257575757575757,
|
|
"grad_norm": 0.8410927805845392,
|
|
"learning_rate": 7.45945945945946e-06,
|
|
"loss": 0.616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6414203643798828,
|
|
"step": 70,
|
|
"valid_targets_mean": 2806.1,
|
|
"valid_targets_min": 259
|
|
},
|
|
{
|
|
"epoch": 0.14204545454545456,
|
|
"grad_norm": 0.751332131150043,
|
|
"learning_rate": 8.000000000000001e-06,
|
|
"loss": 0.6513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6104887127876282,
|
|
"step": 75,
|
|
"valid_targets_mean": 3092.2,
|
|
"valid_targets_min": 969
|
|
},
|
|
{
|
|
"epoch": 0.15151515151515152,
|
|
"grad_norm": 0.8318183036340143,
|
|
"learning_rate": 8.540540540540542e-06,
|
|
"loss": 0.6009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5744675397872925,
|
|
"step": 80,
|
|
"valid_targets_mean": 2436.5,
|
|
"valid_targets_min": 1076
|
|
},
|
|
{
|
|
"epoch": 0.16098484848484848,
|
|
"grad_norm": 0.8439319776376424,
|
|
"learning_rate": 9.081081081081082e-06,
|
|
"loss": 0.5654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.526833713054657,
|
|
"step": 85,
|
|
"valid_targets_mean": 3432.9,
|
|
"valid_targets_min": 923
|
|
},
|
|
{
|
|
"epoch": 0.17045454545454544,
|
|
"grad_norm": 0.784798841734056,
|
|
"learning_rate": 9.621621621621622e-06,
|
|
"loss": 0.5547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4206632971763611,
|
|
"step": 90,
|
|
"valid_targets_mean": 2033.1,
|
|
"valid_targets_min": 1118
|
|
},
|
|
{
|
|
"epoch": 0.17992424242424243,
|
|
"grad_norm": 0.7940958768523053,
|
|
"learning_rate": 1.0162162162162164e-05,
|
|
"loss": 0.6135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6740902066230774,
|
|
"step": 95,
|
|
"valid_targets_mean": 4576.4,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 0.1893939393939394,
|
|
"grad_norm": 0.7594354143206765,
|
|
"learning_rate": 1.0702702702702703e-05,
|
|
"loss": 0.5433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5432190895080566,
|
|
"step": 100,
|
|
"valid_targets_mean": 2797.8,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 0.19886363636363635,
|
|
"grad_norm": 0.7098341988801337,
|
|
"learning_rate": 1.1243243243243245e-05,
|
|
"loss": 0.5267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5413912534713745,
|
|
"step": 105,
|
|
"valid_targets_mean": 2929.4,
|
|
"valid_targets_min": 1486
|
|
},
|
|
{
|
|
"epoch": 0.20833333333333334,
|
|
"grad_norm": 0.9183627350974325,
|
|
"learning_rate": 1.1783783783783785e-05,
|
|
"loss": 0.5273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5220178365707397,
|
|
"step": 110,
|
|
"valid_targets_mean": 2395.1,
|
|
"valid_targets_min": 204
|
|
},
|
|
{
|
|
"epoch": 0.2178030303030303,
|
|
"grad_norm": 0.7252351528791027,
|
|
"learning_rate": 1.2324324324324327e-05,
|
|
"loss": 0.4977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4540213942527771,
|
|
"step": 115,
|
|
"valid_targets_mean": 2495.7,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 0.22727272727272727,
|
|
"grad_norm": 0.7237560713368479,
|
|
"learning_rate": 1.2864864864864865e-05,
|
|
"loss": 0.5132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5129131078720093,
|
|
"step": 120,
|
|
"valid_targets_mean": 3622.9,
|
|
"valid_targets_min": 1560
|
|
},
|
|
{
|
|
"epoch": 0.23674242424242425,
|
|
"grad_norm": 0.6764512609359697,
|
|
"learning_rate": 1.3405405405405407e-05,
|
|
"loss": 0.5028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5001686215400696,
|
|
"step": 125,
|
|
"valid_targets_mean": 3291.1,
|
|
"valid_targets_min": 1113
|
|
},
|
|
{
|
|
"epoch": 0.24621212121212122,
|
|
"grad_norm": 0.7438697909876415,
|
|
"learning_rate": 1.3945945945945946e-05,
|
|
"loss": 0.4867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4970878064632416,
|
|
"step": 130,
|
|
"valid_targets_mean": 3091.9,
|
|
"valid_targets_min": 1274
|
|
},
|
|
{
|
|
"epoch": 0.2556818181818182,
|
|
"grad_norm": 0.7213925677883009,
|
|
"learning_rate": 1.4486486486486488e-05,
|
|
"loss": 0.5294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48219209909439087,
|
|
"step": 135,
|
|
"valid_targets_mean": 3936.8,
|
|
"valid_targets_min": 1140
|
|
},
|
|
{
|
|
"epoch": 0.26515151515151514,
|
|
"grad_norm": 0.6988805702569482,
|
|
"learning_rate": 1.5027027027027028e-05,
|
|
"loss": 0.4927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46139663457870483,
|
|
"step": 140,
|
|
"valid_targets_mean": 2880.2,
|
|
"valid_targets_min": 1569
|
|
},
|
|
{
|
|
"epoch": 0.2746212121212121,
|
|
"grad_norm": 0.7029378833775194,
|
|
"learning_rate": 1.556756756756757e-05,
|
|
"loss": 0.508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4820391535758972,
|
|
"step": 145,
|
|
"valid_targets_mean": 3015.7,
|
|
"valid_targets_min": 1159
|
|
},
|
|
{
|
|
"epoch": 0.2840909090909091,
|
|
"grad_norm": 0.753760034391332,
|
|
"learning_rate": 1.610810810810811e-05,
|
|
"loss": 0.429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5099053382873535,
|
|
"step": 150,
|
|
"valid_targets_mean": 2672.0,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 0.2935606060606061,
|
|
"grad_norm": 0.7529951549306833,
|
|
"learning_rate": 1.6648648648648652e-05,
|
|
"loss": 0.5097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47587496042251587,
|
|
"step": 155,
|
|
"valid_targets_mean": 2517.6,
|
|
"valid_targets_min": 1416
|
|
},
|
|
{
|
|
"epoch": 0.30303030303030304,
|
|
"grad_norm": 0.750111370611548,
|
|
"learning_rate": 1.718918918918919e-05,
|
|
"loss": 0.5022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.504230260848999,
|
|
"step": 160,
|
|
"valid_targets_mean": 2950.3,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 0.3125,
|
|
"grad_norm": 0.8131568540870425,
|
|
"learning_rate": 1.7729729729729733e-05,
|
|
"loss": 0.4919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47397834062576294,
|
|
"step": 165,
|
|
"valid_targets_mean": 2184.4,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 0.32196969696969696,
|
|
"grad_norm": 0.5904560540543917,
|
|
"learning_rate": 1.827027027027027e-05,
|
|
"loss": 0.4743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5447852611541748,
|
|
"step": 170,
|
|
"valid_targets_mean": 5999.2,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 0.3314393939393939,
|
|
"grad_norm": 0.6788081313243527,
|
|
"learning_rate": 1.8810810810810813e-05,
|
|
"loss": 0.5281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5425960421562195,
|
|
"step": 175,
|
|
"valid_targets_mean": 3747.2,
|
|
"valid_targets_min": 925
|
|
},
|
|
{
|
|
"epoch": 0.3409090909090909,
|
|
"grad_norm": 0.8144847579692093,
|
|
"learning_rate": 1.9351351351351352e-05,
|
|
"loss": 0.4491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47804224491119385,
|
|
"step": 180,
|
|
"valid_targets_mean": 2455.9,
|
|
"valid_targets_min": 1016
|
|
},
|
|
{
|
|
"epoch": 0.3503787878787879,
|
|
"grad_norm": 0.777887759735664,
|
|
"learning_rate": 1.9891891891891894e-05,
|
|
"loss": 0.4275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38488462567329407,
|
|
"step": 185,
|
|
"valid_targets_mean": 2290.8,
|
|
"valid_targets_min": 1122
|
|
},
|
|
{
|
|
"epoch": 0.35984848484848486,
|
|
"grad_norm": 0.8203732627588123,
|
|
"learning_rate": 2.0432432432432432e-05,
|
|
"loss": 0.5559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44812387228012085,
|
|
"step": 190,
|
|
"valid_targets_mean": 2365.8,
|
|
"valid_targets_min": 1050
|
|
},
|
|
{
|
|
"epoch": 0.3693181818181818,
|
|
"grad_norm": 0.712228629360912,
|
|
"learning_rate": 2.0972972972972974e-05,
|
|
"loss": 0.468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4383413791656494,
|
|
"step": 195,
|
|
"valid_targets_mean": 2839.3,
|
|
"valid_targets_min": 1225
|
|
},
|
|
{
|
|
"epoch": 0.3787878787878788,
|
|
"grad_norm": 0.7462569626749957,
|
|
"learning_rate": 2.1513513513513513e-05,
|
|
"loss": 0.4731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4598328471183777,
|
|
"step": 200,
|
|
"valid_targets_mean": 3115.0,
|
|
"valid_targets_min": 1162
|
|
},
|
|
{
|
|
"epoch": 0.38825757575757575,
|
|
"grad_norm": 0.7110629348655219,
|
|
"learning_rate": 2.205405405405406e-05,
|
|
"loss": 0.4979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4239012598991394,
|
|
"step": 205,
|
|
"valid_targets_mean": 2553.9,
|
|
"valid_targets_min": 881
|
|
},
|
|
{
|
|
"epoch": 0.3977272727272727,
|
|
"grad_norm": 0.8521813539349383,
|
|
"learning_rate": 2.2594594594594597e-05,
|
|
"loss": 0.4903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47966161370277405,
|
|
"step": 210,
|
|
"valid_targets_mean": 2411.2,
|
|
"valid_targets_min": 921
|
|
},
|
|
{
|
|
"epoch": 0.4071969696969697,
|
|
"grad_norm": 0.7108744523154932,
|
|
"learning_rate": 2.3135135135135136e-05,
|
|
"loss": 0.4767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4671946167945862,
|
|
"step": 215,
|
|
"valid_targets_mean": 3435.9,
|
|
"valid_targets_min": 1288
|
|
},
|
|
{
|
|
"epoch": 0.4166666666666667,
|
|
"grad_norm": 0.7557054683505391,
|
|
"learning_rate": 2.3675675675675674e-05,
|
|
"loss": 0.4746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3366042971611023,
|
|
"step": 220,
|
|
"valid_targets_mean": 2123.9,
|
|
"valid_targets_min": 1224
|
|
},
|
|
{
|
|
"epoch": 0.42613636363636365,
|
|
"grad_norm": 0.6885118154105949,
|
|
"learning_rate": 2.421621621621622e-05,
|
|
"loss": 0.4415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41184890270233154,
|
|
"step": 225,
|
|
"valid_targets_mean": 3238.5,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 0.4356060606060606,
|
|
"grad_norm": 0.8611509901979592,
|
|
"learning_rate": 2.4756756756756758e-05,
|
|
"loss": 0.4591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41321122646331787,
|
|
"step": 230,
|
|
"valid_targets_mean": 2907.0,
|
|
"valid_targets_min": 1216
|
|
},
|
|
{
|
|
"epoch": 0.44507575757575757,
|
|
"grad_norm": 0.6748834593092948,
|
|
"learning_rate": 2.52972972972973e-05,
|
|
"loss": 0.4545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5059018135070801,
|
|
"step": 235,
|
|
"valid_targets_mean": 3650.4,
|
|
"valid_targets_min": 870
|
|
},
|
|
{
|
|
"epoch": 0.45454545454545453,
|
|
"grad_norm": 0.8885979687190914,
|
|
"learning_rate": 2.583783783783784e-05,
|
|
"loss": 0.4726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5074188709259033,
|
|
"step": 240,
|
|
"valid_targets_mean": 3436.2,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 0.4640151515151515,
|
|
"grad_norm": 0.7765059313576562,
|
|
"learning_rate": 2.637837837837838e-05,
|
|
"loss": 0.4237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4757297933101654,
|
|
"step": 245,
|
|
"valid_targets_mean": 2710.5,
|
|
"valid_targets_min": 965
|
|
},
|
|
{
|
|
"epoch": 0.4734848484848485,
|
|
"grad_norm": 0.7068421901281199,
|
|
"learning_rate": 2.6918918918918922e-05,
|
|
"loss": 0.4379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3935742676258087,
|
|
"step": 250,
|
|
"valid_targets_mean": 2882.9,
|
|
"valid_targets_min": 1152
|
|
},
|
|
{
|
|
"epoch": 0.48295454545454547,
|
|
"grad_norm": 0.7559187784267649,
|
|
"learning_rate": 2.745945945945946e-05,
|
|
"loss": 0.4479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47057420015335083,
|
|
"step": 255,
|
|
"valid_targets_mean": 2793.4,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 0.49242424242424243,
|
|
"grad_norm": 0.9022452572430801,
|
|
"learning_rate": 2.8e-05,
|
|
"loss": 0.4286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5103176236152649,
|
|
"step": 260,
|
|
"valid_targets_mean": 2772.2,
|
|
"valid_targets_min": 913
|
|
},
|
|
{
|
|
"epoch": 0.5018939393939394,
|
|
"grad_norm": 0.7028697008909377,
|
|
"learning_rate": 2.8540540540540545e-05,
|
|
"loss": 0.4448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5334185361862183,
|
|
"step": 265,
|
|
"valid_targets_mean": 4366.8,
|
|
"valid_targets_min": 1556
|
|
},
|
|
{
|
|
"epoch": 0.5113636363636364,
|
|
"grad_norm": 0.7969033457604299,
|
|
"learning_rate": 2.9081081081081083e-05,
|
|
"loss": 0.4865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4469882845878601,
|
|
"step": 270,
|
|
"valid_targets_mean": 2702.1,
|
|
"valid_targets_min": 1053
|
|
},
|
|
{
|
|
"epoch": 0.5208333333333334,
|
|
"grad_norm": 0.7688501064247357,
|
|
"learning_rate": 2.9621621621621622e-05,
|
|
"loss": 0.4295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4447226822376251,
|
|
"step": 275,
|
|
"valid_targets_mean": 2669.3,
|
|
"valid_targets_min": 1461
|
|
},
|
|
{
|
|
"epoch": 0.5303030303030303,
|
|
"grad_norm": 0.7514038500708305,
|
|
"learning_rate": 3.0162162162162164e-05,
|
|
"loss": 0.4162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4828983545303345,
|
|
"step": 280,
|
|
"valid_targets_mean": 3318.8,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 0.5397727272727273,
|
|
"grad_norm": 0.6863237608713711,
|
|
"learning_rate": 3.0702702702702706e-05,
|
|
"loss": 0.4657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3963128328323364,
|
|
"step": 285,
|
|
"valid_targets_mean": 2670.4,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 0.5492424242424242,
|
|
"grad_norm": 0.7034450839014039,
|
|
"learning_rate": 3.124324324324325e-05,
|
|
"loss": 0.4516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45250385999679565,
|
|
"step": 290,
|
|
"valid_targets_mean": 3753.0,
|
|
"valid_targets_min": 1171
|
|
},
|
|
{
|
|
"epoch": 0.5587121212121212,
|
|
"grad_norm": 0.726150044335056,
|
|
"learning_rate": 3.178378378378378e-05,
|
|
"loss": 0.3988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4240293800830841,
|
|
"step": 295,
|
|
"valid_targets_mean": 2850.4,
|
|
"valid_targets_min": 1242
|
|
},
|
|
{
|
|
"epoch": 0.5681818181818182,
|
|
"grad_norm": 0.8372454611155744,
|
|
"learning_rate": 3.2324324324324325e-05,
|
|
"loss": 0.5002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4544684886932373,
|
|
"step": 300,
|
|
"valid_targets_mean": 3178.2,
|
|
"valid_targets_min": 1427
|
|
},
|
|
{
|
|
"epoch": 0.5776515151515151,
|
|
"grad_norm": 0.8245380192442264,
|
|
"learning_rate": 3.286486486486487e-05,
|
|
"loss": 0.4401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5538255572319031,
|
|
"step": 305,
|
|
"valid_targets_mean": 3260.8,
|
|
"valid_targets_min": 930
|
|
},
|
|
{
|
|
"epoch": 0.5871212121212122,
|
|
"grad_norm": 0.7904843312667517,
|
|
"learning_rate": 3.340540540540541e-05,
|
|
"loss": 0.3981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4674568772315979,
|
|
"step": 310,
|
|
"valid_targets_mean": 3284.4,
|
|
"valid_targets_min": 1042
|
|
},
|
|
{
|
|
"epoch": 0.5965909090909091,
|
|
"grad_norm": 0.8848456762434055,
|
|
"learning_rate": 3.394594594594595e-05,
|
|
"loss": 0.416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43480440974235535,
|
|
"step": 315,
|
|
"valid_targets_mean": 2789.4,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 0.6060606060606061,
|
|
"grad_norm": 0.6570517330880365,
|
|
"learning_rate": 3.4486486486486486e-05,
|
|
"loss": 0.4632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3827119767665863,
|
|
"step": 320,
|
|
"valid_targets_mean": 2366.9,
|
|
"valid_targets_min": 1152
|
|
},
|
|
{
|
|
"epoch": 0.615530303030303,
|
|
"grad_norm": 0.8669050250006336,
|
|
"learning_rate": 3.5027027027027035e-05,
|
|
"loss": 0.4349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42654949426651,
|
|
"step": 325,
|
|
"valid_targets_mean": 2382.8,
|
|
"valid_targets_min": 1135
|
|
},
|
|
{
|
|
"epoch": 0.625,
|
|
"grad_norm": 0.789922428868788,
|
|
"learning_rate": 3.556756756756757e-05,
|
|
"loss": 0.4726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4431856572628021,
|
|
"step": 330,
|
|
"valid_targets_mean": 2533.2,
|
|
"valid_targets_min": 1203
|
|
},
|
|
{
|
|
"epoch": 0.634469696969697,
|
|
"grad_norm": 0.716396096663615,
|
|
"learning_rate": 3.610810810810811e-05,
|
|
"loss": 0.4965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5012862682342529,
|
|
"step": 335,
|
|
"valid_targets_mean": 3424.6,
|
|
"valid_targets_min": 1476
|
|
},
|
|
{
|
|
"epoch": 0.6439393939393939,
|
|
"grad_norm": 0.7028684467080407,
|
|
"learning_rate": 3.664864864864865e-05,
|
|
"loss": 0.4492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48331519961357117,
|
|
"step": 340,
|
|
"valid_targets_mean": 3638.4,
|
|
"valid_targets_min": 1077
|
|
},
|
|
{
|
|
"epoch": 0.6534090909090909,
|
|
"grad_norm": 0.7653334236022534,
|
|
"learning_rate": 3.7189189189189196e-05,
|
|
"loss": 0.4336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35957634449005127,
|
|
"step": 345,
|
|
"valid_targets_mean": 2424.1,
|
|
"valid_targets_min": 1256
|
|
},
|
|
{
|
|
"epoch": 0.6628787878787878,
|
|
"grad_norm": 0.6427629735903666,
|
|
"learning_rate": 3.772972972972973e-05,
|
|
"loss": 0.4236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44240009784698486,
|
|
"step": 350,
|
|
"valid_targets_mean": 4304.3,
|
|
"valid_targets_min": 915
|
|
},
|
|
{
|
|
"epoch": 0.6723484848484849,
|
|
"grad_norm": 0.7603187560591832,
|
|
"learning_rate": 3.827027027027027e-05,
|
|
"loss": 0.4412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4714593291282654,
|
|
"step": 355,
|
|
"valid_targets_mean": 2982.1,
|
|
"valid_targets_min": 984
|
|
},
|
|
{
|
|
"epoch": 0.6818181818181818,
|
|
"grad_norm": 0.8639595543318713,
|
|
"learning_rate": 3.8810810810810815e-05,
|
|
"loss": 0.3833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3224002718925476,
|
|
"step": 360,
|
|
"valid_targets_mean": 1883.3,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 0.6912878787878788,
|
|
"grad_norm": 0.6744016410328966,
|
|
"learning_rate": 3.935135135135136e-05,
|
|
"loss": 0.4354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44326096773147583,
|
|
"step": 365,
|
|
"valid_targets_mean": 3093.6,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 0.7007575757575758,
|
|
"grad_norm": 0.7399922177523541,
|
|
"learning_rate": 3.98918918918919e-05,
|
|
"loss": 0.4542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48060142993927,
|
|
"step": 370,
|
|
"valid_targets_mean": 3106.0,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 0.7102272727272727,
|
|
"grad_norm": 0.7059006530977384,
|
|
"learning_rate": 3.999985725045862e-05,
|
|
"loss": 0.4343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39739736914634705,
|
|
"step": 375,
|
|
"valid_targets_mean": 2433.8,
|
|
"valid_targets_min": 1181
|
|
},
|
|
{
|
|
"epoch": 0.7196969696969697,
|
|
"grad_norm": 0.6904210990036138,
|
|
"learning_rate": 3.999927733393917e-05,
|
|
"loss": 0.4364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41200125217437744,
|
|
"step": 380,
|
|
"valid_targets_mean": 2969.1,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 0.7291666666666666,
|
|
"grad_norm": 0.6643697026923454,
|
|
"learning_rate": 3.9998251341520166e-05,
|
|
"loss": 0.4179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4023284316062927,
|
|
"step": 385,
|
|
"valid_targets_mean": 2977.9,
|
|
"valid_targets_min": 857
|
|
},
|
|
{
|
|
"epoch": 0.7386363636363636,
|
|
"grad_norm": 0.6943437010514465,
|
|
"learning_rate": 3.9996779296085956e-05,
|
|
"loss": 0.4389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44698572158813477,
|
|
"step": 390,
|
|
"valid_targets_mean": 3339.3,
|
|
"valid_targets_min": 1494
|
|
},
|
|
{
|
|
"epoch": 0.7481060606060606,
|
|
"grad_norm": 0.7302604391825306,
|
|
"learning_rate": 3.999486123046995e-05,
|
|
"loss": 0.4311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34639227390289307,
|
|
"step": 395,
|
|
"valid_targets_mean": 2233.9,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 0.7575757575757576,
|
|
"grad_norm": 0.7197648648706764,
|
|
"learning_rate": 3.999249718745382e-05,
|
|
"loss": 0.4323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44859910011291504,
|
|
"step": 400,
|
|
"valid_targets_mean": 3067.5,
|
|
"valid_targets_min": 1067
|
|
},
|
|
{
|
|
"epoch": 0.7670454545454546,
|
|
"grad_norm": 0.7257474906730643,
|
|
"learning_rate": 3.9989687219766586e-05,
|
|
"loss": 0.419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4311911463737488,
|
|
"step": 405,
|
|
"valid_targets_mean": 2482.9,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 0.7765151515151515,
|
|
"grad_norm": 0.6087154925992447,
|
|
"learning_rate": 3.9986431390083486e-05,
|
|
"loss": 0.4405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45316898822784424,
|
|
"step": 410,
|
|
"valid_targets_mean": 3798.6,
|
|
"valid_targets_min": 1552
|
|
},
|
|
{
|
|
"epoch": 0.7859848484848485,
|
|
"grad_norm": 0.7101090968530446,
|
|
"learning_rate": 3.9982729771024485e-05,
|
|
"loss": 0.4454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41053667664527893,
|
|
"step": 415,
|
|
"valid_targets_mean": 2462.2,
|
|
"valid_targets_min": 1022
|
|
},
|
|
{
|
|
"epoch": 0.7954545454545454,
|
|
"grad_norm": 0.7522515105034158,
|
|
"learning_rate": 3.997858244515271e-05,
|
|
"loss": 0.4288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3688526153564453,
|
|
"step": 420,
|
|
"valid_targets_mean": 2486.4,
|
|
"valid_targets_min": 959
|
|
},
|
|
{
|
|
"epoch": 0.8049242424242424,
|
|
"grad_norm": 0.7646549635739964,
|
|
"learning_rate": 3.997398950497264e-05,
|
|
"loss": 0.4477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43811848759651184,
|
|
"step": 425,
|
|
"valid_targets_mean": 2914.6,
|
|
"valid_targets_min": 1016
|
|
},
|
|
{
|
|
"epoch": 0.8143939393939394,
|
|
"grad_norm": 0.782011548118161,
|
|
"learning_rate": 3.996895105292794e-05,
|
|
"loss": 0.4172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46398311853408813,
|
|
"step": 430,
|
|
"valid_targets_mean": 2388.6,
|
|
"valid_targets_min": 847
|
|
},
|
|
{
|
|
"epoch": 0.8238636363636364,
|
|
"grad_norm": 0.6242658841951135,
|
|
"learning_rate": 3.9963467201399296e-05,
|
|
"loss": 0.4421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48359042406082153,
|
|
"step": 435,
|
|
"valid_targets_mean": 4836.9,
|
|
"valid_targets_min": 1132
|
|
},
|
|
{
|
|
"epoch": 0.8333333333333334,
|
|
"grad_norm": 0.6590084348821017,
|
|
"learning_rate": 3.995753807270181e-05,
|
|
"loss": 0.4883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5101567506790161,
|
|
"step": 440,
|
|
"valid_targets_mean": 4719.2,
|
|
"valid_targets_min": 864
|
|
},
|
|
{
|
|
"epoch": 0.8428030303030303,
|
|
"grad_norm": 0.7055458298031291,
|
|
"learning_rate": 3.995116379908234e-05,
|
|
"loss": 0.4226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3877120614051819,
|
|
"step": 445,
|
|
"valid_targets_mean": 2224.4,
|
|
"valid_targets_min": 1008
|
|
},
|
|
{
|
|
"epoch": 0.8522727272727273,
|
|
"grad_norm": 0.716073808968575,
|
|
"learning_rate": 3.994434452271652e-05,
|
|
"loss": 0.4215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4801817834377289,
|
|
"step": 450,
|
|
"valid_targets_mean": 3112.5,
|
|
"valid_targets_min": 1022
|
|
},
|
|
{
|
|
"epoch": 0.8617424242424242,
|
|
"grad_norm": 0.8220657825354768,
|
|
"learning_rate": 3.9937080395705576e-05,
|
|
"loss": 0.397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42936602234840393,
|
|
"step": 455,
|
|
"valid_targets_mean": 2405.9,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 0.8712121212121212,
|
|
"grad_norm": 0.5874753700273456,
|
|
"learning_rate": 3.992937158007297e-05,
|
|
"loss": 0.4461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4628455638885498,
|
|
"step": 460,
|
|
"valid_targets_mean": 3761.6,
|
|
"valid_targets_min": 1350
|
|
},
|
|
{
|
|
"epoch": 0.8806818181818182,
|
|
"grad_norm": 0.665259818085395,
|
|
"learning_rate": 3.992121824776075e-05,
|
|
"loss": 0.4116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.414675772190094,
|
|
"step": 465,
|
|
"valid_targets_mean": 2745.5,
|
|
"valid_targets_min": 1293
|
|
},
|
|
{
|
|
"epoch": 0.8901515151515151,
|
|
"grad_norm": 0.6637857645829813,
|
|
"learning_rate": 3.991262058062576e-05,
|
|
"loss": 0.4139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43361055850982666,
|
|
"step": 470,
|
|
"valid_targets_mean": 2987.0,
|
|
"valid_targets_min": 988
|
|
},
|
|
{
|
|
"epoch": 0.8996212121212122,
|
|
"grad_norm": 0.7649280937052625,
|
|
"learning_rate": 3.9903578770435516e-05,
|
|
"loss": 0.4225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39113545417785645,
|
|
"step": 475,
|
|
"valid_targets_mean": 2149.9,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 0.9090909090909091,
|
|
"grad_norm": 0.7487953953044645,
|
|
"learning_rate": 3.989409301886398e-05,
|
|
"loss": 0.4394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5525660514831543,
|
|
"step": 480,
|
|
"valid_targets_mean": 3252.9,
|
|
"valid_targets_min": 1317
|
|
},
|
|
{
|
|
"epoch": 0.9185606060606061,
|
|
"grad_norm": 0.6923253635576399,
|
|
"learning_rate": 3.988416353748707e-05,
|
|
"loss": 0.43,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.431830495595932,
|
|
"step": 485,
|
|
"valid_targets_mean": 2885.9,
|
|
"valid_targets_min": 1162
|
|
},
|
|
{
|
|
"epoch": 0.928030303030303,
|
|
"grad_norm": 0.7124268954842429,
|
|
"learning_rate": 3.9873790547777905e-05,
|
|
"loss": 0.3798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3701033592224121,
|
|
"step": 490,
|
|
"valid_targets_mean": 2521.4,
|
|
"valid_targets_min": 1051
|
|
},
|
|
{
|
|
"epoch": 0.9375,
|
|
"grad_norm": 0.7558539244827671,
|
|
"learning_rate": 3.986297428110187e-05,
|
|
"loss": 0.4314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37805086374282837,
|
|
"step": 495,
|
|
"valid_targets_mean": 2435.8,
|
|
"valid_targets_min": 913
|
|
},
|
|
{
|
|
"epoch": 0.946969696969697,
|
|
"grad_norm": 0.7251387950806394,
|
|
"learning_rate": 3.985171497871149e-05,
|
|
"loss": 0.4448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4272584915161133,
|
|
"step": 500,
|
|
"valid_targets_mean": 2598.0,
|
|
"valid_targets_min": 1216
|
|
},
|
|
{
|
|
"epoch": 0.9564393939393939,
|
|
"grad_norm": 0.6426590097686116,
|
|
"learning_rate": 3.9840012891741004e-05,
|
|
"loss": 0.4027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4283660054206848,
|
|
"step": 505,
|
|
"valid_targets_mean": 3265.7,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 0.9659090909090909,
|
|
"grad_norm": 0.6726089719443609,
|
|
"learning_rate": 3.982786828120079e-05,
|
|
"loss": 0.3926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31620165705680847,
|
|
"step": 510,
|
|
"valid_targets_mean": 2291.8,
|
|
"valid_targets_min": 846
|
|
},
|
|
{
|
|
"epoch": 0.9753787878787878,
|
|
"grad_norm": 0.6531094973079313,
|
|
"learning_rate": 3.981528141797153e-05,
|
|
"loss": 0.4502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42973166704177856,
|
|
"step": 515,
|
|
"valid_targets_mean": 3812.8,
|
|
"valid_targets_min": 1357
|
|
},
|
|
{
|
|
"epoch": 0.9848484848484849,
|
|
"grad_norm": 0.6730761620662346,
|
|
"learning_rate": 3.9802252582798206e-05,
|
|
"loss": 0.4379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3429870903491974,
|
|
"step": 520,
|
|
"valid_targets_mean": 2528.9,
|
|
"valid_targets_min": 1329
|
|
},
|
|
{
|
|
"epoch": 0.9943181818181818,
|
|
"grad_norm": 0.6446927479346494,
|
|
"learning_rate": 3.978878206628378e-05,
|
|
"loss": 0.4337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47135937213897705,
|
|
"step": 525,
|
|
"valid_targets_mean": 3894.1,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 1.003787878787879,
|
|
"grad_norm": 0.6316034076742875,
|
|
"learning_rate": 3.9774870168882746e-05,
|
|
"loss": 0.4527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49126577377319336,
|
|
"step": 530,
|
|
"valid_targets_mean": 3419.3,
|
|
"valid_targets_min": 1225
|
|
},
|
|
{
|
|
"epoch": 1.0132575757575757,
|
|
"grad_norm": 0.761857269796181,
|
|
"learning_rate": 3.9760517200894416e-05,
|
|
"loss": 0.3994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4052489399909973,
|
|
"step": 535,
|
|
"valid_targets_mean": 2725.1,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 1.0227272727272727,
|
|
"grad_norm": 0.6415221730365859,
|
|
"learning_rate": 3.9745723482456025e-05,
|
|
"loss": 0.4296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4181866943836212,
|
|
"step": 540,
|
|
"valid_targets_mean": 2886.9,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 1.0321969696969697,
|
|
"grad_norm": 0.690590540502715,
|
|
"learning_rate": 3.973048934353555e-05,
|
|
"loss": 0.4384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4801873564720154,
|
|
"step": 545,
|
|
"valid_targets_mean": 2805.3,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 1.0416666666666667,
|
|
"grad_norm": 0.6628692710290114,
|
|
"learning_rate": 3.971481512392438e-05,
|
|
"loss": 0.4289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41585400700569153,
|
|
"step": 550,
|
|
"valid_targets_mean": 3000.0,
|
|
"valid_targets_min": 1029
|
|
},
|
|
{
|
|
"epoch": 1.0511363636363635,
|
|
"grad_norm": 0.7997677484855487,
|
|
"learning_rate": 3.969870117322973e-05,
|
|
"loss": 0.3913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44741833209991455,
|
|
"step": 555,
|
|
"valid_targets_mean": 3864.5,
|
|
"valid_targets_min": 1331
|
|
},
|
|
{
|
|
"epoch": 1.0606060606060606,
|
|
"grad_norm": 0.6326554443625483,
|
|
"learning_rate": 3.968214785086685e-05,
|
|
"loss": 0.3981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3791867792606354,
|
|
"step": 560,
|
|
"valid_targets_mean": 3059.8,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 1.0700757575757576,
|
|
"grad_norm": 0.7665036641138419,
|
|
"learning_rate": 3.966515552605097e-05,
|
|
"loss": 0.4021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5288013219833374,
|
|
"step": 565,
|
|
"valid_targets_mean": 3535.7,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 1.0795454545454546,
|
|
"grad_norm": 0.7013802988335522,
|
|
"learning_rate": 3.964772457778912e-05,
|
|
"loss": 0.3905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41038429737091064,
|
|
"step": 570,
|
|
"valid_targets_mean": 2661.0,
|
|
"valid_targets_min": 1019
|
|
},
|
|
{
|
|
"epoch": 1.0890151515151516,
|
|
"grad_norm": 0.6447390981717036,
|
|
"learning_rate": 3.962985539487166e-05,
|
|
"loss": 0.4197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4540190100669861,
|
|
"step": 575,
|
|
"valid_targets_mean": 3434.0,
|
|
"valid_targets_min": 1020
|
|
},
|
|
{
|
|
"epoch": 1.0984848484848484,
|
|
"grad_norm": 0.6552819498177631,
|
|
"learning_rate": 3.961154837586356e-05,
|
|
"loss": 0.3899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4063706398010254,
|
|
"step": 580,
|
|
"valid_targets_mean": 2667.2,
|
|
"valid_targets_min": 1072
|
|
},
|
|
{
|
|
"epoch": 1.1079545454545454,
|
|
"grad_norm": 0.6055285269344132,
|
|
"learning_rate": 3.959280392909559e-05,
|
|
"loss": 0.4036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4059950113296509,
|
|
"step": 585,
|
|
"valid_targets_mean": 3669.4,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 1.1174242424242424,
|
|
"grad_norm": 0.6806542062933352,
|
|
"learning_rate": 3.957362247265515e-05,
|
|
"loss": 0.3997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42932671308517456,
|
|
"step": 590,
|
|
"valid_targets_mean": 3078.8,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 1.1268939393939394,
|
|
"grad_norm": 0.6216030531212253,
|
|
"learning_rate": 3.9554004434376966e-05,
|
|
"loss": 0.3953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3615322709083557,
|
|
"step": 595,
|
|
"valid_targets_mean": 3331.1,
|
|
"valid_targets_min": 1362
|
|
},
|
|
{
|
|
"epoch": 1.1363636363636362,
|
|
"grad_norm": 0.6204009326755534,
|
|
"learning_rate": 3.9533950251833555e-05,
|
|
"loss": 0.4293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38953977823257446,
|
|
"step": 600,
|
|
"valid_targets_mean": 2875.5,
|
|
"valid_targets_min": 1098
|
|
},
|
|
{
|
|
"epoch": 1.1458333333333333,
|
|
"grad_norm": 0.6314104686290775,
|
|
"learning_rate": 3.9513460372325466e-05,
|
|
"loss": 0.3958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29819411039352417,
|
|
"step": 605,
|
|
"valid_targets_mean": 2458.2,
|
|
"valid_targets_min": 1125
|
|
},
|
|
{
|
|
"epoch": 1.1553030303030303,
|
|
"grad_norm": 0.6672623468193309,
|
|
"learning_rate": 3.949253525287126e-05,
|
|
"loss": 0.4201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5061877369880676,
|
|
"step": 610,
|
|
"valid_targets_mean": 3510.8,
|
|
"valid_targets_min": 896
|
|
},
|
|
{
|
|
"epoch": 1.1647727272727273,
|
|
"grad_norm": 0.5746450751542721,
|
|
"learning_rate": 3.947117536019741e-05,
|
|
"loss": 0.3902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39186620712280273,
|
|
"step": 615,
|
|
"valid_targets_mean": 3777.2,
|
|
"valid_targets_min": 1207
|
|
},
|
|
{
|
|
"epoch": 1.1742424242424243,
|
|
"grad_norm": 0.7438860091811257,
|
|
"learning_rate": 3.944938117072776e-05,
|
|
"loss": 0.4103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4060593545436859,
|
|
"step": 620,
|
|
"valid_targets_mean": 2435.2,
|
|
"valid_targets_min": 1347
|
|
},
|
|
{
|
|
"epoch": 1.183712121212121,
|
|
"grad_norm": 0.7353145088141256,
|
|
"learning_rate": 3.9427153170573034e-05,
|
|
"loss": 0.403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4154336452484131,
|
|
"step": 625,
|
|
"valid_targets_mean": 2958.5,
|
|
"valid_targets_min": 793
|
|
},
|
|
{
|
|
"epoch": 1.1931818181818181,
|
|
"grad_norm": 0.6354974155553861,
|
|
"learning_rate": 3.940449185551989e-05,
|
|
"loss": 0.405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35959261655807495,
|
|
"step": 630,
|
|
"valid_targets_mean": 2797.6,
|
|
"valid_targets_min": 1131
|
|
},
|
|
{
|
|
"epoch": 1.2026515151515151,
|
|
"grad_norm": 0.6616631842274717,
|
|
"learning_rate": 3.9381397731019934e-05,
|
|
"loss": 0.45,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5067667365074158,
|
|
"step": 635,
|
|
"valid_targets_mean": 3877.4,
|
|
"valid_targets_min": 840
|
|
},
|
|
{
|
|
"epoch": 1.2121212121212122,
|
|
"grad_norm": 0.9671528584193018,
|
|
"learning_rate": 3.935787131217838e-05,
|
|
"loss": 0.4011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3797389268875122,
|
|
"step": 640,
|
|
"valid_targets_mean": 2663.6,
|
|
"valid_targets_min": 1230
|
|
},
|
|
{
|
|
"epoch": 1.2215909090909092,
|
|
"grad_norm": 0.57551168616709,
|
|
"learning_rate": 3.9333913123742625e-05,
|
|
"loss": 0.4033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42554861307144165,
|
|
"step": 645,
|
|
"valid_targets_mean": 3919.5,
|
|
"valid_targets_min": 1120
|
|
},
|
|
{
|
|
"epoch": 1.231060606060606,
|
|
"grad_norm": 0.6102558345044318,
|
|
"learning_rate": 3.930952370009048e-05,
|
|
"loss": 0.4097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37100544571876526,
|
|
"step": 650,
|
|
"valid_targets_mean": 2921.2,
|
|
"valid_targets_min": 1114
|
|
},
|
|
{
|
|
"epoch": 1.240530303030303,
|
|
"grad_norm": 0.5967759761903922,
|
|
"learning_rate": 3.928470358521833e-05,
|
|
"loss": 0.4256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38805723190307617,
|
|
"step": 655,
|
|
"valid_targets_mean": 3017.4,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"grad_norm": 0.6704660074180865,
|
|
"learning_rate": 3.925945333272892e-05,
|
|
"loss": 0.4111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46485066413879395,
|
|
"step": 660,
|
|
"valid_targets_mean": 3124.2,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 1.259469696969697,
|
|
"grad_norm": 0.6519923081103552,
|
|
"learning_rate": 3.9233773505819057e-05,
|
|
"loss": 0.3947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3752923309803009,
|
|
"step": 665,
|
|
"valid_targets_mean": 2662.0,
|
|
"valid_targets_min": 1169
|
|
},
|
|
{
|
|
"epoch": 1.268939393939394,
|
|
"grad_norm": 0.6035015156830779,
|
|
"learning_rate": 3.920766467726703e-05,
|
|
"loss": 0.377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38695183396339417,
|
|
"step": 670,
|
|
"valid_targets_mean": 3120.6,
|
|
"valid_targets_min": 1330
|
|
},
|
|
{
|
|
"epoch": 1.2784090909090908,
|
|
"grad_norm": 0.6708662342476348,
|
|
"learning_rate": 3.9181127429419836e-05,
|
|
"loss": 0.4269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4673488140106201,
|
|
"step": 675,
|
|
"valid_targets_mean": 2938.6,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 1.2878787878787878,
|
|
"grad_norm": 0.5862961461063759,
|
|
"learning_rate": 3.91541623541802e-05,
|
|
"loss": 0.3863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43873700499534607,
|
|
"step": 680,
|
|
"valid_targets_mean": 3457.9,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 1.2973484848484849,
|
|
"grad_norm": 0.6741666973962984,
|
|
"learning_rate": 3.9126770052993374e-05,
|
|
"loss": 0.4266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4370829463005066,
|
|
"step": 685,
|
|
"valid_targets_mean": 2786.0,
|
|
"valid_targets_min": 1016
|
|
},
|
|
{
|
|
"epoch": 1.3068181818181819,
|
|
"grad_norm": 0.7231732903736157,
|
|
"learning_rate": 3.909895113683369e-05,
|
|
"loss": 0.388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42647579312324524,
|
|
"step": 690,
|
|
"valid_targets_mean": 3770.9,
|
|
"valid_targets_min": 1047
|
|
},
|
|
{
|
|
"epoch": 1.316287878787879,
|
|
"grad_norm": 0.6058221356251443,
|
|
"learning_rate": 3.907070622619099e-05,
|
|
"loss": 0.4046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4271622896194458,
|
|
"step": 695,
|
|
"valid_targets_mean": 3031.1,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 1.3257575757575757,
|
|
"grad_norm": 0.6470710578392214,
|
|
"learning_rate": 3.904203595105672e-05,
|
|
"loss": 0.3787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4194009304046631,
|
|
"step": 700,
|
|
"valid_targets_mean": 2905.2,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 1.3352272727272727,
|
|
"grad_norm": 2.2372546100191038,
|
|
"learning_rate": 3.9012940950909913e-05,
|
|
"loss": 0.4473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45529961585998535,
|
|
"step": 705,
|
|
"valid_targets_mean": 3031.1,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 1.3446969696969697,
|
|
"grad_norm": 0.6238215670381088,
|
|
"learning_rate": 3.898342187470296e-05,
|
|
"loss": 0.4053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4112209677696228,
|
|
"step": 710,
|
|
"valid_targets_mean": 3044.3,
|
|
"valid_targets_min": 1061
|
|
},
|
|
{
|
|
"epoch": 1.3541666666666667,
|
|
"grad_norm": 0.5656800902455107,
|
|
"learning_rate": 3.895347938084706e-05,
|
|
"loss": 0.375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39838892221450806,
|
|
"step": 715,
|
|
"valid_targets_mean": 3629.1,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 1.3636363636363638,
|
|
"grad_norm": 0.5690529796447313,
|
|
"learning_rate": 3.89231141371976e-05,
|
|
"loss": 0.3705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28948310017585754,
|
|
"step": 720,
|
|
"valid_targets_mean": 2662.8,
|
|
"valid_targets_min": 962
|
|
},
|
|
{
|
|
"epoch": 1.3731060606060606,
|
|
"grad_norm": 0.5864684959570857,
|
|
"learning_rate": 3.8892326821039205e-05,
|
|
"loss": 0.365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3599168658256531,
|
|
"step": 725,
|
|
"valid_targets_mean": 2978.3,
|
|
"valid_targets_min": 1607
|
|
},
|
|
{
|
|
"epoch": 1.3825757575757576,
|
|
"grad_norm": 0.7083404970017486,
|
|
"learning_rate": 3.886111811907069e-05,
|
|
"loss": 0.3926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3933259844779968,
|
|
"step": 730,
|
|
"valid_targets_mean": 2733.9,
|
|
"valid_targets_min": 924
|
|
},
|
|
{
|
|
"epoch": 1.3920454545454546,
|
|
"grad_norm": 0.5478701988160245,
|
|
"learning_rate": 3.882948872738969e-05,
|
|
"loss": 0.4139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3255859911441803,
|
|
"step": 735,
|
|
"valid_targets_mean": 3306.4,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 1.4015151515151514,
|
|
"grad_norm": 0.6234638282835069,
|
|
"learning_rate": 3.879743935147717e-05,
|
|
"loss": 0.4172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3531230092048645,
|
|
"step": 740,
|
|
"valid_targets_mean": 2869.8,
|
|
"valid_targets_min": 1220
|
|
},
|
|
{
|
|
"epoch": 1.4109848484848486,
|
|
"grad_norm": 0.6467375174983417,
|
|
"learning_rate": 3.8764970706181665e-05,
|
|
"loss": 0.4002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31948328018188477,
|
|
"step": 745,
|
|
"valid_targets_mean": 2352.8,
|
|
"valid_targets_min": 1234
|
|
},
|
|
{
|
|
"epoch": 1.4204545454545454,
|
|
"grad_norm": 0.5903659566208808,
|
|
"learning_rate": 3.8732083515703353e-05,
|
|
"loss": 0.3564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34983542561531067,
|
|
"step": 750,
|
|
"valid_targets_mean": 2851.4,
|
|
"valid_targets_min": 1132
|
|
},
|
|
{
|
|
"epoch": 1.4299242424242424,
|
|
"grad_norm": 0.6748545090260847,
|
|
"learning_rate": 3.869877851357789e-05,
|
|
"loss": 0.3938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35444360971450806,
|
|
"step": 755,
|
|
"valid_targets_mean": 2364.1,
|
|
"valid_targets_min": 1079
|
|
},
|
|
{
|
|
"epoch": 1.4393939393939394,
|
|
"grad_norm": 0.6783824292232773,
|
|
"learning_rate": 3.866505644266006e-05,
|
|
"loss": 0.3526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3265710771083832,
|
|
"step": 760,
|
|
"valid_targets_mean": 2406.2,
|
|
"valid_targets_min": 1046
|
|
},
|
|
{
|
|
"epoch": 1.4488636363636362,
|
|
"grad_norm": 0.6215192521938441,
|
|
"learning_rate": 3.863091805510719e-05,
|
|
"loss": 0.3905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3712896406650543,
|
|
"step": 765,
|
|
"valid_targets_mean": 2880.6,
|
|
"valid_targets_min": 1274
|
|
},
|
|
{
|
|
"epoch": 1.4583333333333333,
|
|
"grad_norm": 0.6189696877508291,
|
|
"learning_rate": 3.8596364112362393e-05,
|
|
"loss": 0.3881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3593665361404419,
|
|
"step": 770,
|
|
"valid_targets_mean": 2955.9,
|
|
"valid_targets_min": 1003
|
|
},
|
|
{
|
|
"epoch": 1.4678030303030303,
|
|
"grad_norm": 0.7427202858232096,
|
|
"learning_rate": 3.856139538513758e-05,
|
|
"loss": 0.3992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.474393755197525,
|
|
"step": 775,
|
|
"valid_targets_mean": 2872.2,
|
|
"valid_targets_min": 267
|
|
},
|
|
{
|
|
"epoch": 1.4772727272727273,
|
|
"grad_norm": 0.6467178846765422,
|
|
"learning_rate": 3.852601265339625e-05,
|
|
"loss": 0.3888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4436153769493103,
|
|
"step": 780,
|
|
"valid_targets_mean": 3159.6,
|
|
"valid_targets_min": 1248
|
|
},
|
|
{
|
|
"epoch": 1.4867424242424243,
|
|
"grad_norm": 0.687788550313866,
|
|
"learning_rate": 3.8490216706336116e-05,
|
|
"loss": 0.4208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3544483482837677,
|
|
"step": 785,
|
|
"valid_targets_mean": 2506.6,
|
|
"valid_targets_min": 1125
|
|
},
|
|
{
|
|
"epoch": 1.496212121212121,
|
|
"grad_norm": 0.6056130463565335,
|
|
"learning_rate": 3.8454008342371486e-05,
|
|
"loss": 0.4012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45660361647605896,
|
|
"step": 790,
|
|
"valid_targets_mean": 4043.1,
|
|
"valid_targets_min": 1054
|
|
},
|
|
{
|
|
"epoch": 1.5056818181818183,
|
|
"grad_norm": 0.7039021074014782,
|
|
"learning_rate": 3.8417388369115474e-05,
|
|
"loss": 0.3528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3005557656288147,
|
|
"step": 795,
|
|
"valid_targets_mean": 2000.1,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 1.5151515151515151,
|
|
"grad_norm": 0.651204285870595,
|
|
"learning_rate": 3.838035760336196e-05,
|
|
"loss": 0.3841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40659600496292114,
|
|
"step": 800,
|
|
"valid_targets_mean": 2632.4,
|
|
"valid_targets_min": 1349
|
|
},
|
|
{
|
|
"epoch": 1.5246212121212122,
|
|
"grad_norm": 0.6472373003221926,
|
|
"learning_rate": 3.834291687106742e-05,
|
|
"loss": 0.4098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33355090022087097,
|
|
"step": 805,
|
|
"valid_targets_mean": 2353.5,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 1.5340909090909092,
|
|
"grad_norm": 0.5511357579221835,
|
|
"learning_rate": 3.8305067007332415e-05,
|
|
"loss": 0.4236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3600749969482422,
|
|
"step": 810,
|
|
"valid_targets_mean": 3387.2,
|
|
"valid_targets_min": 962
|
|
},
|
|
{
|
|
"epoch": 1.543560606060606,
|
|
"grad_norm": 0.5877213232520649,
|
|
"learning_rate": 3.8266808856383066e-05,
|
|
"loss": 0.3515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3851625919342041,
|
|
"step": 815,
|
|
"valid_targets_mean": 2985.9,
|
|
"valid_targets_min": 817
|
|
},
|
|
{
|
|
"epoch": 1.553030303030303,
|
|
"grad_norm": 0.7198093966656267,
|
|
"learning_rate": 3.822814327155216e-05,
|
|
"loss": 0.4222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43124186992645264,
|
|
"step": 820,
|
|
"valid_targets_mean": 2410.2,
|
|
"valid_targets_min": 236
|
|
},
|
|
{
|
|
"epoch": 1.5625,
|
|
"grad_norm": 0.6372535209839998,
|
|
"learning_rate": 3.818907111526014e-05,
|
|
"loss": 0.3868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3842049539089203,
|
|
"step": 825,
|
|
"valid_targets_mean": 2877.5,
|
|
"valid_targets_min": 1279
|
|
},
|
|
{
|
|
"epoch": 1.571969696969697,
|
|
"grad_norm": 0.5854940396380293,
|
|
"learning_rate": 3.814959325899585e-05,
|
|
"loss": 0.3775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3429268002510071,
|
|
"step": 830,
|
|
"valid_targets_mean": 2909.9,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 1.581439393939394,
|
|
"grad_norm": 0.6437277691644642,
|
|
"learning_rate": 3.810971058329712e-05,
|
|
"loss": 0.3578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3440169095993042,
|
|
"step": 835,
|
|
"valid_targets_mean": 2426.2,
|
|
"valid_targets_min": 976
|
|
},
|
|
{
|
|
"epoch": 1.5909090909090908,
|
|
"grad_norm": 0.6285397490245834,
|
|
"learning_rate": 3.806942397773113e-05,
|
|
"loss": 0.433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3548210859298706,
|
|
"step": 840,
|
|
"valid_targets_mean": 2489.5,
|
|
"valid_targets_min": 1202
|
|
},
|
|
{
|
|
"epoch": 1.6003787878787878,
|
|
"grad_norm": 0.6065038836634405,
|
|
"learning_rate": 3.8028734340874515e-05,
|
|
"loss": 0.4001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4580554664134979,
|
|
"step": 845,
|
|
"valid_targets_mean": 3926.1,
|
|
"valid_targets_min": 1088
|
|
},
|
|
{
|
|
"epoch": 1.6098484848484849,
|
|
"grad_norm": 0.6078783925374002,
|
|
"learning_rate": 3.7987642580293406e-05,
|
|
"loss": 0.3922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4924246668815613,
|
|
"step": 850,
|
|
"valid_targets_mean": 3424.6,
|
|
"valid_targets_min": 1087
|
|
},
|
|
{
|
|
"epoch": 1.6193181818181817,
|
|
"grad_norm": 0.7572543318158582,
|
|
"learning_rate": 3.794614961252312e-05,
|
|
"loss": 0.3933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.393582820892334,
|
|
"step": 855,
|
|
"valid_targets_mean": 2252.2,
|
|
"valid_targets_min": 1287
|
|
},
|
|
{
|
|
"epoch": 1.628787878787879,
|
|
"grad_norm": 0.7052297209450726,
|
|
"learning_rate": 3.7904256363047735e-05,
|
|
"loss": 0.386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36443302035331726,
|
|
"step": 860,
|
|
"valid_targets_mean": 2264.5,
|
|
"valid_targets_min": 993
|
|
},
|
|
{
|
|
"epoch": 1.6382575757575757,
|
|
"grad_norm": 0.635111488590503,
|
|
"learning_rate": 3.786196376627947e-05,
|
|
"loss": 0.3674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3072546720504761,
|
|
"step": 865,
|
|
"valid_targets_mean": 2286.2,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 1.6477272727272727,
|
|
"grad_norm": 0.6318335577862783,
|
|
"learning_rate": 3.781927276553782e-05,
|
|
"loss": 0.4308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3698217272758484,
|
|
"step": 870,
|
|
"valid_targets_mean": 2751.1,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 1.6571969696969697,
|
|
"grad_norm": 0.6209016595038966,
|
|
"learning_rate": 3.777618431302851e-05,
|
|
"loss": 0.4235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43773892521858215,
|
|
"step": 875,
|
|
"valid_targets_mean": 2993.8,
|
|
"valid_targets_min": 1144
|
|
},
|
|
{
|
|
"epoch": 1.6666666666666665,
|
|
"grad_norm": 0.6278129906727719,
|
|
"learning_rate": 3.773269936982228e-05,
|
|
"loss": 0.4216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40297383069992065,
|
|
"step": 880,
|
|
"valid_targets_mean": 3320.8,
|
|
"valid_targets_min": 1077
|
|
},
|
|
{
|
|
"epoch": 1.6761363636363638,
|
|
"grad_norm": 0.6306902682907488,
|
|
"learning_rate": 3.768881890583344e-05,
|
|
"loss": 0.4134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42257997393608093,
|
|
"step": 885,
|
|
"valid_targets_mean": 2971.5,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 1.6856060606060606,
|
|
"grad_norm": 0.6227823268781754,
|
|
"learning_rate": 3.764454389979823e-05,
|
|
"loss": 0.4424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3216041028499603,
|
|
"step": 890,
|
|
"valid_targets_mean": 2226.4,
|
|
"valid_targets_min": 1258
|
|
},
|
|
{
|
|
"epoch": 1.6950757575757576,
|
|
"grad_norm": 1.0013550155397126,
|
|
"learning_rate": 3.759987533925297e-05,
|
|
"loss": 0.3797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4422708749771118,
|
|
"step": 895,
|
|
"valid_targets_mean": 3406.0,
|
|
"valid_targets_min": 853
|
|
},
|
|
{
|
|
"epoch": 1.7045454545454546,
|
|
"grad_norm": 0.6263179234464797,
|
|
"learning_rate": 3.75548142205121e-05,
|
|
"loss": 0.4136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38917651772499084,
|
|
"step": 900,
|
|
"valid_targets_mean": 2951.9,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 1.7140151515151514,
|
|
"grad_norm": 0.6737910424003686,
|
|
"learning_rate": 3.7509361548645876e-05,
|
|
"loss": 0.3793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39876434206962585,
|
|
"step": 905,
|
|
"valid_targets_mean": 2670.9,
|
|
"valid_targets_min": 1153
|
|
},
|
|
{
|
|
"epoch": 1.7234848484848486,
|
|
"grad_norm": 0.6657017953680348,
|
|
"learning_rate": 3.746351833745801e-05,
|
|
"loss": 0.3881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4156784117221832,
|
|
"step": 910,
|
|
"valid_targets_mean": 2693.2,
|
|
"valid_targets_min": 1012
|
|
},
|
|
{
|
|
"epoch": 1.7329545454545454,
|
|
"grad_norm": 0.6193944127284645,
|
|
"learning_rate": 3.741728560946303e-05,
|
|
"loss": 0.3876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35384005308151245,
|
|
"step": 915,
|
|
"valid_targets_mean": 2662.0,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 1.7424242424242424,
|
|
"grad_norm": 0.536796733962674,
|
|
"learning_rate": 3.737066439586348e-05,
|
|
"loss": 0.4188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4114513397216797,
|
|
"step": 920,
|
|
"valid_targets_mean": 3911.6,
|
|
"valid_targets_min": 1258
|
|
},
|
|
{
|
|
"epoch": 1.7518939393939394,
|
|
"grad_norm": 0.7105155115111549,
|
|
"learning_rate": 3.732365573652694e-05,
|
|
"loss": 0.4386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47635161876678467,
|
|
"step": 925,
|
|
"valid_targets_mean": 2787.6,
|
|
"valid_targets_min": 1024
|
|
},
|
|
{
|
|
"epoch": 1.7613636363636362,
|
|
"grad_norm": 0.7137407524766849,
|
|
"learning_rate": 3.727626067996278e-05,
|
|
"loss": 0.3775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31371888518333435,
|
|
"step": 930,
|
|
"valid_targets_mean": 2650.9,
|
|
"valid_targets_min": 878
|
|
},
|
|
{
|
|
"epoch": 1.7708333333333335,
|
|
"grad_norm": 0.5991878125350675,
|
|
"learning_rate": 3.7228480283298826e-05,
|
|
"loss": 0.3844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36515945196151733,
|
|
"step": 935,
|
|
"valid_targets_mean": 2892.6,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 1.7803030303030303,
|
|
"grad_norm": 0.7675569874103912,
|
|
"learning_rate": 3.718031561225775e-05,
|
|
"loss": 0.3957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38159579038619995,
|
|
"step": 940,
|
|
"valid_targets_mean": 2819.0,
|
|
"valid_targets_min": 833
|
|
},
|
|
{
|
|
"epoch": 1.7897727272727273,
|
|
"grad_norm": 0.6087028511596431,
|
|
"learning_rate": 3.7131767741133336e-05,
|
|
"loss": 0.4076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38360902667045593,
|
|
"step": 945,
|
|
"valid_targets_mean": 2993.4,
|
|
"valid_targets_min": 1683
|
|
},
|
|
{
|
|
"epoch": 1.7992424242424243,
|
|
"grad_norm": 0.6746251150915344,
|
|
"learning_rate": 3.708283775276646e-05,
|
|
"loss": 0.3785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3412631154060364,
|
|
"step": 950,
|
|
"valid_targets_mean": 2322.7,
|
|
"valid_targets_min": 953
|
|
},
|
|
{
|
|
"epoch": 1.808712121212121,
|
|
"grad_norm": 0.6146448542775192,
|
|
"learning_rate": 3.703352673852099e-05,
|
|
"loss": 0.3704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4567805528640747,
|
|
"step": 955,
|
|
"valid_targets_mean": 3587.2,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 1.8181818181818183,
|
|
"grad_norm": 0.5225288425887636,
|
|
"learning_rate": 3.6983835798259413e-05,
|
|
"loss": 0.4221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38839465379714966,
|
|
"step": 960,
|
|
"valid_targets_mean": 3993.2,
|
|
"valid_targets_min": 1162
|
|
},
|
|
{
|
|
"epoch": 1.8276515151515151,
|
|
"grad_norm": 0.6597359509094284,
|
|
"learning_rate": 3.6933766040318323e-05,
|
|
"loss": 0.3785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30244168639183044,
|
|
"step": 965,
|
|
"valid_targets_mean": 2188.2,
|
|
"valid_targets_min": 1092
|
|
},
|
|
{
|
|
"epoch": 1.8371212121212122,
|
|
"grad_norm": 0.5961821988504793,
|
|
"learning_rate": 3.688331858148371e-05,
|
|
"loss": 0.4157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3585679531097412,
|
|
"step": 970,
|
|
"valid_targets_mean": 3047.6,
|
|
"valid_targets_min": 1555
|
|
},
|
|
{
|
|
"epoch": 1.8465909090909092,
|
|
"grad_norm": 0.6443299938133737,
|
|
"learning_rate": 3.683249454696598e-05,
|
|
"loss": 0.3738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3137340247631073,
|
|
"step": 975,
|
|
"valid_targets_mean": 2375.2,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 1.856060606060606,
|
|
"grad_norm": 0.6923498037009496,
|
|
"learning_rate": 3.678129507037496e-05,
|
|
"loss": 0.3855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39877134561538696,
|
|
"step": 980,
|
|
"valid_targets_mean": 2734.2,
|
|
"valid_targets_min": 267
|
|
},
|
|
{
|
|
"epoch": 1.865530303030303,
|
|
"grad_norm": 0.6361138719139272,
|
|
"learning_rate": 3.672972129369453e-05,
|
|
"loss": 0.3925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40384894609451294,
|
|
"step": 985,
|
|
"valid_targets_mean": 2964.6,
|
|
"valid_targets_min": 899
|
|
},
|
|
{
|
|
"epoch": 1.875,
|
|
"grad_norm": 0.6448987234717858,
|
|
"learning_rate": 3.66777743672572e-05,
|
|
"loss": 0.3708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.436010479927063,
|
|
"step": 990,
|
|
"valid_targets_mean": 3234.2,
|
|
"valid_targets_min": 1137
|
|
},
|
|
{
|
|
"epoch": 1.884469696969697,
|
|
"grad_norm": 0.6566147326745773,
|
|
"learning_rate": 3.662545544971844e-05,
|
|
"loss": 0.3943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44196996092796326,
|
|
"step": 995,
|
|
"valid_targets_mean": 2977.2,
|
|
"valid_targets_min": 1265
|
|
},
|
|
{
|
|
"epoch": 1.893939393939394,
|
|
"grad_norm": 0.5792422881482142,
|
|
"learning_rate": 3.6572765708030816e-05,
|
|
"loss": 0.355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3386695086956024,
|
|
"step": 1000,
|
|
"valid_targets_mean": 2736.6,
|
|
"valid_targets_min": 879
|
|
},
|
|
{
|
|
"epoch": 1.9034090909090908,
|
|
"grad_norm": 0.5954897839334274,
|
|
"learning_rate": 3.6519706317418e-05,
|
|
"loss": 0.3779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47396373748779297,
|
|
"step": 1005,
|
|
"valid_targets_mean": 4549.3,
|
|
"valid_targets_min": 809
|
|
},
|
|
{
|
|
"epoch": 1.9128787878787878,
|
|
"grad_norm": 0.6753396528313356,
|
|
"learning_rate": 3.6466278461348516e-05,
|
|
"loss": 0.3975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3595288395881653,
|
|
"step": 1010,
|
|
"valid_targets_mean": 2483.2,
|
|
"valid_targets_min": 1021
|
|
},
|
|
{
|
|
"epoch": 1.9223484848484849,
|
|
"grad_norm": 0.6253912397781534,
|
|
"learning_rate": 3.641248333150938e-05,
|
|
"loss": 0.389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33992117643356323,
|
|
"step": 1015,
|
|
"valid_targets_mean": 2439.1,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 1.9318181818181817,
|
|
"grad_norm": 0.5359146746627454,
|
|
"learning_rate": 3.635832212777948e-05,
|
|
"loss": 0.4025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3760654926300049,
|
|
"step": 1020,
|
|
"valid_targets_mean": 3752.5,
|
|
"valid_targets_min": 1742
|
|
},
|
|
{
|
|
"epoch": 1.941287878787879,
|
|
"grad_norm": 0.5698255287178197,
|
|
"learning_rate": 3.6303796058202865e-05,
|
|
"loss": 0.3649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41591978073120117,
|
|
"step": 1025,
|
|
"valid_targets_mean": 3689.0,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 1.9507575757575757,
|
|
"grad_norm": 0.6229523907831855,
|
|
"learning_rate": 3.6248906338961736e-05,
|
|
"loss": 0.3896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3853365182876587,
|
|
"step": 1030,
|
|
"valid_targets_mean": 2650.9,
|
|
"valid_targets_min": 1017
|
|
},
|
|
{
|
|
"epoch": 1.9602272727272727,
|
|
"grad_norm": 0.687037732669012,
|
|
"learning_rate": 3.6193654194349405e-05,
|
|
"loss": 0.3755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43535369634628296,
|
|
"step": 1035,
|
|
"valid_targets_mean": 3008.3,
|
|
"valid_targets_min": 1053
|
|
},
|
|
{
|
|
"epoch": 1.9696969696969697,
|
|
"grad_norm": 0.7079231154989691,
|
|
"learning_rate": 3.613804085674289e-05,
|
|
"loss": 0.3991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37676456570625305,
|
|
"step": 1040,
|
|
"valid_targets_mean": 2716.0,
|
|
"valid_targets_min": 888
|
|
},
|
|
{
|
|
"epoch": 1.9791666666666665,
|
|
"grad_norm": 0.6484478944252974,
|
|
"learning_rate": 3.608206756657548e-05,
|
|
"loss": 0.3992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.360420823097229,
|
|
"step": 1045,
|
|
"valid_targets_mean": 2706.4,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 1.9886363636363638,
|
|
"grad_norm": 0.6732806511242416,
|
|
"learning_rate": 3.602573557230909e-05,
|
|
"loss": 0.359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4151829481124878,
|
|
"step": 1050,
|
|
"valid_targets_mean": 3796.8,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 1.9981060606060606,
|
|
"grad_norm": 0.6334494482905959,
|
|
"learning_rate": 3.596904613040638e-05,
|
|
"loss": 0.394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3949044346809387,
|
|
"step": 1055,
|
|
"valid_targets_mean": 2921.1,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 2.007575757575758,
|
|
"grad_norm": 0.6067518273747473,
|
|
"learning_rate": 3.5912000505302707e-05,
|
|
"loss": 0.3514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32665830850601196,
|
|
"step": 1060,
|
|
"valid_targets_mean": 2975.5,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 2.0170454545454546,
|
|
"grad_norm": 0.6357774125703869,
|
|
"learning_rate": 3.5854599969377984e-05,
|
|
"loss": 0.3636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32983386516571045,
|
|
"step": 1065,
|
|
"valid_targets_mean": 2746.8,
|
|
"valid_targets_min": 1074
|
|
},
|
|
{
|
|
"epoch": 2.0265151515151514,
|
|
"grad_norm": 1.9812669074428304,
|
|
"learning_rate": 3.579684580292826e-05,
|
|
"loss": 0.3611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3147047162055969,
|
|
"step": 1070,
|
|
"valid_targets_mean": 3223.4,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 2.0359848484848486,
|
|
"grad_norm": 0.6341741098914326,
|
|
"learning_rate": 3.573873929413716e-05,
|
|
"loss": 0.3607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3770799934864044,
|
|
"step": 1075,
|
|
"valid_targets_mean": 3202.4,
|
|
"valid_targets_min": 1077
|
|
},
|
|
{
|
|
"epoch": 2.0454545454545454,
|
|
"grad_norm": 0.7461854381333618,
|
|
"learning_rate": 3.5680281739047176e-05,
|
|
"loss": 0.3465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3996240496635437,
|
|
"step": 1080,
|
|
"valid_targets_mean": 2365.2,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 2.054924242424242,
|
|
"grad_norm": 0.6451534282607315,
|
|
"learning_rate": 3.562147444153075e-05,
|
|
"loss": 0.3755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3866714835166931,
|
|
"step": 1085,
|
|
"valid_targets_mean": 3227.2,
|
|
"valid_targets_min": 1381
|
|
},
|
|
{
|
|
"epoch": 2.0643939393939394,
|
|
"grad_norm": 0.666391215609453,
|
|
"learning_rate": 3.556231871326118e-05,
|
|
"loss": 0.3427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38233160972595215,
|
|
"step": 1090,
|
|
"valid_targets_mean": 2982.3,
|
|
"valid_targets_min": 236
|
|
},
|
|
{
|
|
"epoch": 2.0738636363636362,
|
|
"grad_norm": 0.6943281571888182,
|
|
"learning_rate": 3.550281587368337e-05,
|
|
"loss": 0.3533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3907845914363861,
|
|
"step": 1095,
|
|
"valid_targets_mean": 2841.2,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 2.0833333333333335,
|
|
"grad_norm": 0.6406693750064782,
|
|
"learning_rate": 3.544296724998443e-05,
|
|
"loss": 0.3671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4067705273628235,
|
|
"step": 1100,
|
|
"valid_targets_mean": 3002.4,
|
|
"valid_targets_min": 969
|
|
},
|
|
{
|
|
"epoch": 2.0928030303030303,
|
|
"grad_norm": 0.7050277881850183,
|
|
"learning_rate": 3.538277417706401e-05,
|
|
"loss": 0.3496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37240076065063477,
|
|
"step": 1105,
|
|
"valid_targets_mean": 2869.5,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 2.102272727272727,
|
|
"grad_norm": 0.6637643143357205,
|
|
"learning_rate": 3.532223799750458e-05,
|
|
"loss": 0.36,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3317979574203491,
|
|
"step": 1110,
|
|
"valid_targets_mean": 2532.9,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 2.1117424242424243,
|
|
"grad_norm": 0.6498259487285378,
|
|
"learning_rate": 3.526136006154147e-05,
|
|
"loss": 0.3726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32798415422439575,
|
|
"step": 1115,
|
|
"valid_targets_mean": 2507.9,
|
|
"valid_targets_min": 1000
|
|
},
|
|
{
|
|
"epoch": 2.121212121212121,
|
|
"grad_norm": 0.6315138613327461,
|
|
"learning_rate": 3.520014172703275e-05,
|
|
"loss": 0.3522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2958212196826935,
|
|
"step": 1120,
|
|
"valid_targets_mean": 2633.1,
|
|
"valid_targets_min": 1335
|
|
},
|
|
{
|
|
"epoch": 2.1306818181818183,
|
|
"grad_norm": 0.6923992239756467,
|
|
"learning_rate": 3.5138584359428936e-05,
|
|
"loss": 0.3515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3799145817756653,
|
|
"step": 1125,
|
|
"valid_targets_mean": 2708.8,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 2.140151515151515,
|
|
"grad_norm": 0.7564050373846809,
|
|
"learning_rate": 3.507668933174255e-05,
|
|
"loss": 0.341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37782737612724304,
|
|
"step": 1130,
|
|
"valid_targets_mean": 2382.4,
|
|
"valid_targets_min": 1120
|
|
},
|
|
{
|
|
"epoch": 2.149621212121212,
|
|
"grad_norm": 0.5161203536759313,
|
|
"learning_rate": 3.5014458024517464e-05,
|
|
"loss": 0.371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.368108332157135,
|
|
"step": 1135,
|
|
"valid_targets_mean": 4025.8,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 2.159090909090909,
|
|
"grad_norm": 0.6583177244764001,
|
|
"learning_rate": 3.495189182579818e-05,
|
|
"loss": 0.3427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3304949998855591,
|
|
"step": 1140,
|
|
"valid_targets_mean": 2519.4,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 2.168560606060606,
|
|
"grad_norm": 0.6078015608056067,
|
|
"learning_rate": 3.488899213109877e-05,
|
|
"loss": 0.3606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3422364592552185,
|
|
"step": 1145,
|
|
"valid_targets_mean": 2944.2,
|
|
"valid_targets_min": 1206
|
|
},
|
|
{
|
|
"epoch": 2.178030303030303,
|
|
"grad_norm": 0.6304756596587294,
|
|
"learning_rate": 3.482576034337183e-05,
|
|
"loss": 0.3743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3863288462162018,
|
|
"step": 1150,
|
|
"valid_targets_mean": 3267.2,
|
|
"valid_targets_min": 1092
|
|
},
|
|
{
|
|
"epoch": 2.1875,
|
|
"grad_norm": 0.5925254325191266,
|
|
"learning_rate": 3.4762197872977156e-05,
|
|
"loss": 0.343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3592289090156555,
|
|
"step": 1155,
|
|
"valid_targets_mean": 3035.8,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 2.196969696969697,
|
|
"grad_norm": 0.613075101287476,
|
|
"learning_rate": 3.469830613765026e-05,
|
|
"loss": 0.3377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36160987615585327,
|
|
"step": 1160,
|
|
"valid_targets_mean": 4038.8,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 2.206439393939394,
|
|
"grad_norm": 0.8963742401741642,
|
|
"learning_rate": 3.463408656247084e-05,
|
|
"loss": 0.3887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41184258460998535,
|
|
"step": 1165,
|
|
"valid_targets_mean": 3125.4,
|
|
"valid_targets_min": 1435
|
|
},
|
|
{
|
|
"epoch": 2.215909090909091,
|
|
"grad_norm": 0.7182964477238911,
|
|
"learning_rate": 3.456954057983086e-05,
|
|
"loss": 0.3845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39316830039024353,
|
|
"step": 1170,
|
|
"valid_targets_mean": 2967.8,
|
|
"valid_targets_min": 653
|
|
},
|
|
{
|
|
"epoch": 2.225378787878788,
|
|
"grad_norm": 0.5509403492115073,
|
|
"learning_rate": 3.45046696294027e-05,
|
|
"loss": 0.3656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35107213258743286,
|
|
"step": 1175,
|
|
"valid_targets_mean": 3772.9,
|
|
"valid_targets_min": 1098
|
|
},
|
|
{
|
|
"epoch": 2.234848484848485,
|
|
"grad_norm": 0.8022875264241273,
|
|
"learning_rate": 3.443947515810704e-05,
|
|
"loss": 0.3829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36813992261886597,
|
|
"step": 1180,
|
|
"valid_targets_mean": 2846.2,
|
|
"valid_targets_min": 1028
|
|
},
|
|
{
|
|
"epoch": 2.2443181818181817,
|
|
"grad_norm": 0.5918151418721938,
|
|
"learning_rate": 3.4373958620080544e-05,
|
|
"loss": 0.3447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4227719008922577,
|
|
"step": 1185,
|
|
"valid_targets_mean": 3711.9,
|
|
"valid_targets_min": 1437
|
|
},
|
|
{
|
|
"epoch": 2.253787878787879,
|
|
"grad_norm": 0.5672723426976444,
|
|
"learning_rate": 3.4308121476643425e-05,
|
|
"loss": 0.3575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43555590510368347,
|
|
"step": 1190,
|
|
"valid_targets_mean": 4170.6,
|
|
"valid_targets_min": 1003
|
|
},
|
|
{
|
|
"epoch": 2.2632575757575757,
|
|
"grad_norm": 0.6524301544496917,
|
|
"learning_rate": 3.424196519626692e-05,
|
|
"loss": 0.3418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38282832503318787,
|
|
"step": 1195,
|
|
"valid_targets_mean": 3150.5,
|
|
"valid_targets_min": 1042
|
|
},
|
|
{
|
|
"epoch": 2.2727272727272725,
|
|
"grad_norm": 0.5769376146286349,
|
|
"learning_rate": 3.417549125454044e-05,
|
|
"loss": 0.3852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4285449981689453,
|
|
"step": 1200,
|
|
"valid_targets_mean": 3824.6,
|
|
"valid_targets_min": 1077
|
|
},
|
|
{
|
|
"epoch": 2.2821969696969697,
|
|
"grad_norm": 0.6727082627917902,
|
|
"learning_rate": 3.410870113413873e-05,
|
|
"loss": 0.3458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37507957220077515,
|
|
"step": 1205,
|
|
"valid_targets_mean": 2791.2,
|
|
"valid_targets_min": 1291
|
|
},
|
|
{
|
|
"epoch": 2.2916666666666665,
|
|
"grad_norm": 0.5552793680752626,
|
|
"learning_rate": 3.4041596324788785e-05,
|
|
"loss": 0.3558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31909480690956116,
|
|
"step": 1210,
|
|
"valid_targets_mean": 3349.6,
|
|
"valid_targets_min": 1396
|
|
},
|
|
{
|
|
"epoch": 2.3011363636363638,
|
|
"grad_norm": 0.6284837329996423,
|
|
"learning_rate": 3.397417832323658e-05,
|
|
"loss": 0.3393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41131746768951416,
|
|
"step": 1215,
|
|
"valid_targets_mean": 3427.0,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 2.3106060606060606,
|
|
"grad_norm": 0.698382385880741,
|
|
"learning_rate": 3.3906448633213746e-05,
|
|
"loss": 0.3781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3846529722213745,
|
|
"step": 1220,
|
|
"valid_targets_mean": 2540.2,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 2.320075757575758,
|
|
"grad_norm": 0.6214165730097354,
|
|
"learning_rate": 3.383840876540398e-05,
|
|
"loss": 0.3809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38045307993888855,
|
|
"step": 1225,
|
|
"valid_targets_mean": 2858.1,
|
|
"valid_targets_min": 1304
|
|
},
|
|
{
|
|
"epoch": 2.3295454545454546,
|
|
"grad_norm": 0.6270321470802516,
|
|
"learning_rate": 3.3770060237409384e-05,
|
|
"loss": 0.3188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30533140897750854,
|
|
"step": 1230,
|
|
"valid_targets_mean": 2698.0,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 2.3390151515151514,
|
|
"grad_norm": 0.640282162783909,
|
|
"learning_rate": 3.37014045737166e-05,
|
|
"loss": 0.345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3312714695930481,
|
|
"step": 1235,
|
|
"valid_targets_mean": 2843.5,
|
|
"valid_targets_min": 862
|
|
},
|
|
{
|
|
"epoch": 2.3484848484848486,
|
|
"grad_norm": 0.5868312775528356,
|
|
"learning_rate": 3.36324433056628e-05,
|
|
"loss": 0.3555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35159093141555786,
|
|
"step": 1240,
|
|
"valid_targets_mean": 3154.7,
|
|
"valid_targets_min": 1380
|
|
},
|
|
{
|
|
"epoch": 2.3579545454545454,
|
|
"grad_norm": 0.6312824374737372,
|
|
"learning_rate": 3.356317797140156e-05,
|
|
"loss": 0.398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4727991223335266,
|
|
"step": 1245,
|
|
"valid_targets_mean": 4085.6,
|
|
"valid_targets_min": 1372
|
|
},
|
|
{
|
|
"epoch": 2.367424242424242,
|
|
"grad_norm": 0.6556855017990704,
|
|
"learning_rate": 3.349361011586851e-05,
|
|
"loss": 0.3567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34955960512161255,
|
|
"step": 1250,
|
|
"valid_targets_mean": 2794.4,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 2.3768939393939394,
|
|
"grad_norm": 0.6592178273886284,
|
|
"learning_rate": 3.34237412907469e-05,
|
|
"loss": 0.3968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3927568197250366,
|
|
"step": 1255,
|
|
"valid_targets_mean": 3279.1,
|
|
"valid_targets_min": 1068
|
|
},
|
|
{
|
|
"epoch": 2.3863636363636362,
|
|
"grad_norm": 0.696635664074054,
|
|
"learning_rate": 3.3353573054433e-05,
|
|
"loss": 0.3544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.366418719291687,
|
|
"step": 1260,
|
|
"valid_targets_mean": 2476.6,
|
|
"valid_targets_min": 242
|
|
},
|
|
{
|
|
"epoch": 2.3958333333333335,
|
|
"grad_norm": 0.585353274597932,
|
|
"learning_rate": 3.328310697200131e-05,
|
|
"loss": 0.3167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3249672055244446,
|
|
"step": 1265,
|
|
"valid_targets_mean": 3114.2,
|
|
"valid_targets_min": 1479
|
|
},
|
|
{
|
|
"epoch": 2.4053030303030303,
|
|
"grad_norm": 0.5889688681332489,
|
|
"learning_rate": 3.321234461516968e-05,
|
|
"loss": 0.3816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40575236082077026,
|
|
"step": 1270,
|
|
"valid_targets_mean": 4107.5,
|
|
"valid_targets_min": 1279
|
|
},
|
|
{
|
|
"epoch": 2.4147727272727275,
|
|
"grad_norm": 0.6330719827665698,
|
|
"learning_rate": 3.314128756226424e-05,
|
|
"loss": 0.3527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3900808095932007,
|
|
"step": 1275,
|
|
"valid_targets_mean": 2660.6,
|
|
"valid_targets_min": 1122
|
|
},
|
|
{
|
|
"epoch": 2.4242424242424243,
|
|
"grad_norm": 0.6141369622904939,
|
|
"learning_rate": 3.306993739818419e-05,
|
|
"loss": 0.3463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3714469075202942,
|
|
"step": 1280,
|
|
"valid_targets_mean": 3575.9,
|
|
"valid_targets_min": 943
|
|
},
|
|
{
|
|
"epoch": 2.433712121212121,
|
|
"grad_norm": 0.7223899111157773,
|
|
"learning_rate": 3.2998295714366485e-05,
|
|
"loss": 0.3383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3195560574531555,
|
|
"step": 1285,
|
|
"valid_targets_mean": 2129.7,
|
|
"valid_targets_min": 867
|
|
},
|
|
{
|
|
"epoch": 2.4431818181818183,
|
|
"grad_norm": 0.598038743149409,
|
|
"learning_rate": 3.292636410875027e-05,
|
|
"loss": 0.3369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37590736150741577,
|
|
"step": 1290,
|
|
"valid_targets_mean": 3236.3,
|
|
"valid_targets_min": 1206
|
|
},
|
|
{
|
|
"epoch": 2.452651515151515,
|
|
"grad_norm": 0.6640510169692915,
|
|
"learning_rate": 3.28541441857413e-05,
|
|
"loss": 0.3895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37221455574035645,
|
|
"step": 1295,
|
|
"valid_targets_mean": 2695.0,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 2.462121212121212,
|
|
"grad_norm": 0.6911109878198171,
|
|
"learning_rate": 3.278163755617616e-05,
|
|
"loss": 0.382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36092811822891235,
|
|
"step": 1300,
|
|
"valid_targets_mean": 3438.5,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 2.471590909090909,
|
|
"grad_norm": 0.6507334983609933,
|
|
"learning_rate": 3.270884583728627e-05,
|
|
"loss": 0.3874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44592881202697754,
|
|
"step": 1305,
|
|
"valid_targets_mean": 3983.1,
|
|
"valid_targets_min": 1295
|
|
},
|
|
{
|
|
"epoch": 2.481060606060606,
|
|
"grad_norm": 0.6623776527884195,
|
|
"learning_rate": 3.2635770652661866e-05,
|
|
"loss": 0.3882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4581608474254608,
|
|
"step": 1310,
|
|
"valid_targets_mean": 3559.3,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 2.490530303030303,
|
|
"grad_norm": 0.6472581145351607,
|
|
"learning_rate": 3.2562413632215784e-05,
|
|
"loss": 0.3653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.372177392244339,
|
|
"step": 1315,
|
|
"valid_targets_mean": 2858.4,
|
|
"valid_targets_min": 1194
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"grad_norm": 0.6423554949775815,
|
|
"learning_rate": 3.24887764121471e-05,
|
|
"loss": 0.3433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29365307092666626,
|
|
"step": 1320,
|
|
"valid_targets_mean": 2573.6,
|
|
"valid_targets_min": 1118
|
|
},
|
|
{
|
|
"epoch": 2.5094696969696972,
|
|
"grad_norm": 0.6135737048152438,
|
|
"learning_rate": 3.24148606349046e-05,
|
|
"loss": 0.3841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3749263286590576,
|
|
"step": 1325,
|
|
"valid_targets_mean": 3357.9,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 2.518939393939394,
|
|
"grad_norm": 0.6200177066699752,
|
|
"learning_rate": 3.2340667949150206e-05,
|
|
"loss": 0.3664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32676559686660767,
|
|
"step": 1330,
|
|
"valid_targets_mean": 2800.3,
|
|
"valid_targets_min": 805
|
|
},
|
|
{
|
|
"epoch": 2.528409090909091,
|
|
"grad_norm": 0.6081658356852767,
|
|
"learning_rate": 3.226620000972216e-05,
|
|
"loss": 0.3813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3985334038734436,
|
|
"step": 1335,
|
|
"valid_targets_mean": 3517.9,
|
|
"valid_targets_min": 1155
|
|
},
|
|
{
|
|
"epoch": 2.537878787878788,
|
|
"grad_norm": 0.5849153135310672,
|
|
"learning_rate": 3.219145847759815e-05,
|
|
"loss": 0.3804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4115307629108429,
|
|
"step": 1340,
|
|
"valid_targets_mean": 4212.4,
|
|
"valid_targets_min": 1403
|
|
},
|
|
{
|
|
"epoch": 2.547348484848485,
|
|
"grad_norm": 0.6688729020188083,
|
|
"learning_rate": 3.21164450198582e-05,
|
|
"loss": 0.3603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4513026475906372,
|
|
"step": 1345,
|
|
"valid_targets_mean": 3527.6,
|
|
"valid_targets_min": 967
|
|
},
|
|
{
|
|
"epoch": 2.5568181818181817,
|
|
"grad_norm": 0.6619426138580016,
|
|
"learning_rate": 3.204116130964756e-05,
|
|
"loss": 0.36,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36756688356399536,
|
|
"step": 1350,
|
|
"valid_targets_mean": 3009.9,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 2.566287878787879,
|
|
"grad_norm": 0.5878124588508259,
|
|
"learning_rate": 3.196560902613933e-05,
|
|
"loss": 0.3583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35253700613975525,
|
|
"step": 1355,
|
|
"valid_targets_mean": 2947.8,
|
|
"valid_targets_min": 1044
|
|
},
|
|
{
|
|
"epoch": 2.5757575757575757,
|
|
"grad_norm": 0.653703988282283,
|
|
"learning_rate": 3.188978985449706e-05,
|
|
"loss": 0.3368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32623544335365295,
|
|
"step": 1360,
|
|
"valid_targets_mean": 2900.6,
|
|
"valid_targets_min": 1210
|
|
},
|
|
{
|
|
"epoch": 2.5852272727272725,
|
|
"grad_norm": 0.6034679739940356,
|
|
"learning_rate": 3.18137054858371e-05,
|
|
"loss": 0.3333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37140268087387085,
|
|
"step": 1365,
|
|
"valid_targets_mean": 3256.7,
|
|
"valid_targets_min": 1432
|
|
},
|
|
{
|
|
"epoch": 2.5946969696969697,
|
|
"grad_norm": 0.6214125780158161,
|
|
"learning_rate": 3.173735761719094e-05,
|
|
"loss": 0.3406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35767662525177,
|
|
"step": 1370,
|
|
"valid_targets_mean": 2846.9,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 2.6041666666666665,
|
|
"grad_norm": 0.6626603571828742,
|
|
"learning_rate": 3.166074795146732e-05,
|
|
"loss": 0.3417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3792498707771301,
|
|
"step": 1375,
|
|
"valid_targets_mean": 2506.7,
|
|
"valid_targets_min": 1024
|
|
},
|
|
{
|
|
"epoch": 2.6136363636363638,
|
|
"grad_norm": 0.6081468726757838,
|
|
"learning_rate": 3.158387819741424e-05,
|
|
"loss": 0.3874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34944403171539307,
|
|
"step": 1380,
|
|
"valid_targets_mean": 3073.1,
|
|
"valid_targets_min": 1581
|
|
},
|
|
{
|
|
"epoch": 2.6231060606060606,
|
|
"grad_norm": 0.8147530608799451,
|
|
"learning_rate": 3.1506750069580915e-05,
|
|
"loss": 0.381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3546479046344757,
|
|
"step": 1385,
|
|
"valid_targets_mean": 2869.2,
|
|
"valid_targets_min": 936
|
|
},
|
|
{
|
|
"epoch": 2.632575757575758,
|
|
"grad_norm": 0.6738744813054032,
|
|
"learning_rate": 3.142936528827944e-05,
|
|
"loss": 0.3443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3402855396270752,
|
|
"step": 1390,
|
|
"valid_targets_mean": 2534.6,
|
|
"valid_targets_min": 1137
|
|
},
|
|
{
|
|
"epoch": 2.6420454545454546,
|
|
"grad_norm": 0.6956477087002707,
|
|
"learning_rate": 3.135172557954649e-05,
|
|
"loss": 0.365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35989320278167725,
|
|
"step": 1395,
|
|
"valid_targets_mean": 2374.4,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 2.6515151515151514,
|
|
"grad_norm": 0.5941765689533776,
|
|
"learning_rate": 3.127383267510476e-05,
|
|
"loss": 0.391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41383516788482666,
|
|
"step": 1400,
|
|
"valid_targets_mean": 3852.6,
|
|
"valid_targets_min": 1190
|
|
},
|
|
{
|
|
"epoch": 2.6609848484848486,
|
|
"grad_norm": 0.6203899932740184,
|
|
"learning_rate": 3.119568831232443e-05,
|
|
"loss": 0.3354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40115517377853394,
|
|
"step": 1405,
|
|
"valid_targets_mean": 3094.1,
|
|
"valid_targets_min": 204
|
|
},
|
|
{
|
|
"epoch": 2.6704545454545454,
|
|
"grad_norm": 0.5954667745484522,
|
|
"learning_rate": 3.1117294234184306e-05,
|
|
"loss": 0.3979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4214431941509247,
|
|
"step": 1410,
|
|
"valid_targets_mean": 4079.2,
|
|
"valid_targets_min": 853
|
|
},
|
|
{
|
|
"epoch": 2.679924242424242,
|
|
"grad_norm": 0.5903599733716282,
|
|
"learning_rate": 3.1038652189233015e-05,
|
|
"loss": 0.3545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33240750432014465,
|
|
"step": 1415,
|
|
"valid_targets_mean": 3000.3,
|
|
"valid_targets_min": 1270
|
|
},
|
|
{
|
|
"epoch": 2.6893939393939394,
|
|
"grad_norm": 0.6871395435304188,
|
|
"learning_rate": 3.095976393154999e-05,
|
|
"loss": 0.3468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4028029441833496,
|
|
"step": 1420,
|
|
"valid_targets_mean": 2612.0,
|
|
"valid_targets_min": 1135
|
|
},
|
|
{
|
|
"epoch": 2.6988636363636362,
|
|
"grad_norm": 0.6142433357718043,
|
|
"learning_rate": 3.088063122070633e-05,
|
|
"loss": 0.331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3261798918247223,
|
|
"step": 1425,
|
|
"valid_targets_mean": 2684.9,
|
|
"valid_targets_min": 1103
|
|
},
|
|
{
|
|
"epoch": 2.7083333333333335,
|
|
"grad_norm": 0.6131777053705455,
|
|
"learning_rate": 3.0801255821725584e-05,
|
|
"loss": 0.3448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3070215880870819,
|
|
"step": 1430,
|
|
"valid_targets_mean": 2594.8,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 2.7178030303030303,
|
|
"grad_norm": 0.5958077672359405,
|
|
"learning_rate": 3.072163950504433e-05,
|
|
"loss": 0.3503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44516491889953613,
|
|
"step": 1435,
|
|
"valid_targets_mean": 3497.8,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 2.7272727272727275,
|
|
"grad_norm": 0.6671518209248153,
|
|
"learning_rate": 3.0641784046472745e-05,
|
|
"loss": 0.4011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3056536912918091,
|
|
"step": 1440,
|
|
"valid_targets_mean": 2694.8,
|
|
"valid_targets_min": 997
|
|
},
|
|
{
|
|
"epoch": 2.7367424242424243,
|
|
"grad_norm": 0.5749436568103856,
|
|
"learning_rate": 3.056169122715497e-05,
|
|
"loss": 0.3692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3439633846282959,
|
|
"step": 1445,
|
|
"valid_targets_mean": 3099.6,
|
|
"valid_targets_min": 1325
|
|
},
|
|
{
|
|
"epoch": 2.746212121212121,
|
|
"grad_norm": 0.6515911916529566,
|
|
"learning_rate": 3.0481362833529374e-05,
|
|
"loss": 0.3645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3087332844734192,
|
|
"step": 1450,
|
|
"valid_targets_mean": 2423.1,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 2.7556818181818183,
|
|
"grad_norm": 0.6099812782277989,
|
|
"learning_rate": 3.0400800657288713e-05,
|
|
"loss": 0.3721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35250064730644226,
|
|
"step": 1455,
|
|
"valid_targets_mean": 3103.2,
|
|
"valid_targets_min": 993
|
|
},
|
|
{
|
|
"epoch": 2.765151515151515,
|
|
"grad_norm": 0.5386516273503262,
|
|
"learning_rate": 3.032000649534018e-05,
|
|
"loss": 0.3368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37500643730163574,
|
|
"step": 1460,
|
|
"valid_targets_mean": 3926.9,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 2.774621212121212,
|
|
"grad_norm": 0.6545578997925079,
|
|
"learning_rate": 3.0238982149765315e-05,
|
|
"loss": 0.3616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44129228591918945,
|
|
"step": 1465,
|
|
"valid_targets_mean": 3395.1,
|
|
"valid_targets_min": 938
|
|
},
|
|
{
|
|
"epoch": 2.784090909090909,
|
|
"grad_norm": 0.6441863421377303,
|
|
"learning_rate": 3.0157729427779804e-05,
|
|
"loss": 0.3583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4101092517375946,
|
|
"step": 1470,
|
|
"valid_targets_mean": 3295.2,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 2.793560606060606,
|
|
"grad_norm": 0.661149189863479,
|
|
"learning_rate": 3.00762501416932e-05,
|
|
"loss": 0.3759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3834726810455322,
|
|
"step": 1475,
|
|
"valid_targets_mean": 2993.9,
|
|
"valid_targets_min": 1238
|
|
},
|
|
{
|
|
"epoch": 2.8030303030303028,
|
|
"grad_norm": 0.7134233141740134,
|
|
"learning_rate": 2.9994546108868445e-05,
|
|
"loss": 0.4028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3811787962913513,
|
|
"step": 1480,
|
|
"valid_targets_mean": 2598.6,
|
|
"valid_targets_min": 910
|
|
},
|
|
{
|
|
"epoch": 2.8125,
|
|
"grad_norm": 0.7455321612511406,
|
|
"learning_rate": 2.9912619151681396e-05,
|
|
"loss": 0.3622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31110620498657227,
|
|
"step": 1485,
|
|
"valid_targets_mean": 2458.8,
|
|
"valid_targets_min": 1254
|
|
},
|
|
{
|
|
"epoch": 2.8219696969696972,
|
|
"grad_norm": 0.662404311498716,
|
|
"learning_rate": 2.983047109748013e-05,
|
|
"loss": 0.3676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4849827289581299,
|
|
"step": 1490,
|
|
"valid_targets_mean": 3280.1,
|
|
"valid_targets_min": 896
|
|
},
|
|
{
|
|
"epoch": 2.831439393939394,
|
|
"grad_norm": 0.6275785468792403,
|
|
"learning_rate": 2.9748103778544213e-05,
|
|
"loss": 0.3178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35886654257774353,
|
|
"step": 1495,
|
|
"valid_targets_mean": 2898.8,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 2.840909090909091,
|
|
"grad_norm": 0.6537106913377588,
|
|
"learning_rate": 2.966551903204383e-05,
|
|
"loss": 0.3433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3127059042453766,
|
|
"step": 1500,
|
|
"valid_targets_mean": 2293.4,
|
|
"valid_targets_min": 989
|
|
},
|
|
{
|
|
"epoch": 2.850378787878788,
|
|
"grad_norm": 0.638832452572433,
|
|
"learning_rate": 2.958271869999878e-05,
|
|
"loss": 0.362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3934442400932312,
|
|
"step": 1505,
|
|
"valid_targets_mean": 2748.2,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 2.859848484848485,
|
|
"grad_norm": 0.6434122973668456,
|
|
"learning_rate": 2.949970462923744e-05,
|
|
"loss": 0.3431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34700238704681396,
|
|
"step": 1510,
|
|
"valid_targets_mean": 2710.2,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 2.8693181818181817,
|
|
"grad_norm": 0.6198591868767468,
|
|
"learning_rate": 2.941647867135552e-05,
|
|
"loss": 0.3312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31982704997062683,
|
|
"step": 1515,
|
|
"valid_targets_mean": 2943.4,
|
|
"valid_targets_min": 1177
|
|
},
|
|
{
|
|
"epoch": 2.878787878787879,
|
|
"grad_norm": 0.6035286330446781,
|
|
"learning_rate": 2.9333042682674793e-05,
|
|
"loss": 0.3236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2906029224395752,
|
|
"step": 1520,
|
|
"valid_targets_mean": 2557.2,
|
|
"valid_targets_min": 1438
|
|
},
|
|
{
|
|
"epoch": 2.8882575757575757,
|
|
"grad_norm": 0.6516623903124585,
|
|
"learning_rate": 2.9249398524201694e-05,
|
|
"loss": 0.3532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.346318781375885,
|
|
"step": 1525,
|
|
"valid_targets_mean": 2544.4,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 2.8977272727272725,
|
|
"grad_norm": 0.6162520543510063,
|
|
"learning_rate": 2.91655480615858e-05,
|
|
"loss": 0.3721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4944382309913635,
|
|
"step": 1530,
|
|
"valid_targets_mean": 4391.8,
|
|
"valid_targets_min": 1144
|
|
},
|
|
{
|
|
"epoch": 2.9071969696969697,
|
|
"grad_norm": 0.5919513381205782,
|
|
"learning_rate": 2.9081493165078217e-05,
|
|
"loss": 0.3553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3467448055744171,
|
|
"step": 1535,
|
|
"valid_targets_mean": 3182.8,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 2.9166666666666665,
|
|
"grad_norm": 0.5804336196459176,
|
|
"learning_rate": 2.8997235709489853e-05,
|
|
"loss": 0.4167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3572392165660858,
|
|
"step": 1540,
|
|
"valid_targets_mean": 3354.8,
|
|
"valid_targets_min": 1489
|
|
},
|
|
{
|
|
"epoch": 2.9261363636363638,
|
|
"grad_norm": 0.6110931617009046,
|
|
"learning_rate": 2.8912777574149644e-05,
|
|
"loss": 0.3931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35870361328125,
|
|
"step": 1545,
|
|
"valid_targets_mean": 3097.9,
|
|
"valid_targets_min": 1135
|
|
},
|
|
{
|
|
"epoch": 2.9356060606060606,
|
|
"grad_norm": 0.6319453189848042,
|
|
"learning_rate": 2.882812064286258e-05,
|
|
"loss": 0.3711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30660805106163025,
|
|
"step": 1550,
|
|
"valid_targets_mean": 2384.7,
|
|
"valid_targets_min": 1168
|
|
},
|
|
{
|
|
"epoch": 2.945075757575758,
|
|
"grad_norm": 0.678104113183451,
|
|
"learning_rate": 2.8743266803867745e-05,
|
|
"loss": 0.3692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.293647438287735,
|
|
"step": 1555,
|
|
"valid_targets_mean": 2065.8,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 2.9545454545454546,
|
|
"grad_norm": 0.634658596499621,
|
|
"learning_rate": 2.8658217949796142e-05,
|
|
"loss": 0.35,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3554021120071411,
|
|
"step": 1560,
|
|
"valid_targets_mean": 2932.0,
|
|
"valid_targets_min": 1112
|
|
},
|
|
{
|
|
"epoch": 2.9640151515151514,
|
|
"grad_norm": 0.6593660146995721,
|
|
"learning_rate": 2.8572975977628518e-05,
|
|
"loss": 0.3708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4099416732788086,
|
|
"step": 1565,
|
|
"valid_targets_mean": 2974.4,
|
|
"valid_targets_min": 1499
|
|
},
|
|
{
|
|
"epoch": 2.9734848484848486,
|
|
"grad_norm": 0.6405951806274508,
|
|
"learning_rate": 2.8487542788653047e-05,
|
|
"loss": 0.3676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.341849684715271,
|
|
"step": 1570,
|
|
"valid_targets_mean": 2842.1,
|
|
"valid_targets_min": 1016
|
|
},
|
|
{
|
|
"epoch": 2.9829545454545454,
|
|
"grad_norm": 0.5347713666664129,
|
|
"learning_rate": 2.840192028842292e-05,
|
|
"loss": 0.3928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33453184366226196,
|
|
"step": 1575,
|
|
"valid_targets_mean": 3965.0,
|
|
"valid_targets_min": 847
|
|
},
|
|
{
|
|
"epoch": 2.992424242424242,
|
|
"grad_norm": 0.6823221885203572,
|
|
"learning_rate": 2.8316110386713823e-05,
|
|
"loss": 0.3893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3766270875930786,
|
|
"step": 1580,
|
|
"valid_targets_mean": 2918.2,
|
|
"valid_targets_min": 1022
|
|
},
|
|
{
|
|
"epoch": 3.0018939393939394,
|
|
"grad_norm": 0.6301512136579102,
|
|
"learning_rate": 2.8230114997481375e-05,
|
|
"loss": 0.3028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2537193298339844,
|
|
"step": 1585,
|
|
"valid_targets_mean": 2355.9,
|
|
"valid_targets_min": 971
|
|
},
|
|
{
|
|
"epoch": 3.0113636363636362,
|
|
"grad_norm": 0.5577983827813736,
|
|
"learning_rate": 2.8143936038818417e-05,
|
|
"loss": 0.3095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3179975152015686,
|
|
"step": 1590,
|
|
"valid_targets_mean": 4350.2,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 3.0208333333333335,
|
|
"grad_norm": 0.6603658205782935,
|
|
"learning_rate": 2.805757543291222e-05,
|
|
"loss": 0.2971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3200560212135315,
|
|
"step": 1595,
|
|
"valid_targets_mean": 3702.2,
|
|
"valid_targets_min": 1563
|
|
},
|
|
{
|
|
"epoch": 3.0303030303030303,
|
|
"grad_norm": 0.6842129852137848,
|
|
"learning_rate": 2.797103510600164e-05,
|
|
"loss": 0.37,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39970946311950684,
|
|
"step": 1600,
|
|
"valid_targets_mean": 3548.6,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 3.039772727272727,
|
|
"grad_norm": 0.7300913873254511,
|
|
"learning_rate": 2.7884316988334127e-05,
|
|
"loss": 0.3466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.285548597574234,
|
|
"step": 1605,
|
|
"valid_targets_mean": 2158.0,
|
|
"valid_targets_min": 1003
|
|
},
|
|
{
|
|
"epoch": 3.0492424242424243,
|
|
"grad_norm": 0.7586081467730309,
|
|
"learning_rate": 2.7797423014122694e-05,
|
|
"loss": 0.3291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33878371119499207,
|
|
"step": 1610,
|
|
"valid_targets_mean": 2850.8,
|
|
"valid_targets_min": 1216
|
|
},
|
|
{
|
|
"epoch": 3.058712121212121,
|
|
"grad_norm": 0.6407993509571133,
|
|
"learning_rate": 2.771035512150275e-05,
|
|
"loss": 0.3027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27003076672554016,
|
|
"step": 1615,
|
|
"valid_targets_mean": 2669.4,
|
|
"valid_targets_min": 1050
|
|
},
|
|
{
|
|
"epoch": 3.0681818181818183,
|
|
"grad_norm": 0.6531894528376352,
|
|
"learning_rate": 2.7623115252488908e-05,
|
|
"loss": 0.3853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38037651777267456,
|
|
"step": 1620,
|
|
"valid_targets_mean": 3312.5,
|
|
"valid_targets_min": 1074
|
|
},
|
|
{
|
|
"epoch": 3.077651515151515,
|
|
"grad_norm": 0.6688668908694053,
|
|
"learning_rate": 2.753570535293161e-05,
|
|
"loss": 0.3064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29739874601364136,
|
|
"step": 1625,
|
|
"valid_targets_mean": 2475.4,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 3.087121212121212,
|
|
"grad_norm": 0.6064144878094633,
|
|
"learning_rate": 2.7448127372473793e-05,
|
|
"loss": 0.3289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33377522230148315,
|
|
"step": 1630,
|
|
"valid_targets_mean": 3129.1,
|
|
"valid_targets_min": 382
|
|
},
|
|
{
|
|
"epoch": 3.096590909090909,
|
|
"grad_norm": 0.6709571423713865,
|
|
"learning_rate": 2.7360383264507366e-05,
|
|
"loss": 0.3233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2704837918281555,
|
|
"step": 1635,
|
|
"valid_targets_mean": 2546.5,
|
|
"valid_targets_min": 1277
|
|
},
|
|
{
|
|
"epoch": 3.106060606060606,
|
|
"grad_norm": 0.6638531281722483,
|
|
"learning_rate": 2.727247498612963e-05,
|
|
"loss": 0.319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3492266535758972,
|
|
"step": 1640,
|
|
"valid_targets_mean": 2682.2,
|
|
"valid_targets_min": 971
|
|
},
|
|
{
|
|
"epoch": 3.115530303030303,
|
|
"grad_norm": 0.6329025849694069,
|
|
"learning_rate": 2.7184404498099647e-05,
|
|
"loss": 0.318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3587942123413086,
|
|
"step": 1645,
|
|
"valid_targets_mean": 3128.0,
|
|
"valid_targets_min": 316
|
|
},
|
|
{
|
|
"epoch": 3.125,
|
|
"grad_norm": 0.8195061138434774,
|
|
"learning_rate": 2.7096173764794514e-05,
|
|
"loss": 0.3359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3900783658027649,
|
|
"step": 1650,
|
|
"valid_targets_mean": 2162.3,
|
|
"valid_targets_min": 978
|
|
},
|
|
{
|
|
"epoch": 3.134469696969697,
|
|
"grad_norm": 0.7510758563150106,
|
|
"learning_rate": 2.7007784754165528e-05,
|
|
"loss": 0.3251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2995261549949646,
|
|
"step": 1655,
|
|
"valid_targets_mean": 2324.4,
|
|
"valid_targets_min": 975
|
|
},
|
|
{
|
|
"epoch": 3.143939393939394,
|
|
"grad_norm": 0.641241969089199,
|
|
"learning_rate": 2.6919239437694292e-05,
|
|
"loss": 0.3063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3183354139328003,
|
|
"step": 1660,
|
|
"valid_targets_mean": 3264.2,
|
|
"valid_targets_min": 1658
|
|
},
|
|
{
|
|
"epoch": 3.153409090909091,
|
|
"grad_norm": 0.773986810774813,
|
|
"learning_rate": 2.6830539790348754e-05,
|
|
"loss": 0.319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3400264382362366,
|
|
"step": 1665,
|
|
"valid_targets_mean": 2204.2,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 3.162878787878788,
|
|
"grad_norm": 0.7068089501459185,
|
|
"learning_rate": 2.674168779053915e-05,
|
|
"loss": 0.3336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37632396817207336,
|
|
"step": 1670,
|
|
"valid_targets_mean": 2705.8,
|
|
"valid_targets_min": 1018
|
|
},
|
|
{
|
|
"epoch": 3.172348484848485,
|
|
"grad_norm": 0.6576540758983097,
|
|
"learning_rate": 2.665268542007387e-05,
|
|
"loss": 0.3214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2991321384906769,
|
|
"step": 1675,
|
|
"valid_targets_mean": 2771.7,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 3.1818181818181817,
|
|
"grad_norm": 0.748565369100263,
|
|
"learning_rate": 2.6563534664115275e-05,
|
|
"loss": 0.32,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30832135677337646,
|
|
"step": 1680,
|
|
"valid_targets_mean": 2852.4,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 3.191287878787879,
|
|
"grad_norm": 0.683681163354747,
|
|
"learning_rate": 2.6474237511135394e-05,
|
|
"loss": 0.2899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33145153522491455,
|
|
"step": 1685,
|
|
"valid_targets_mean": 2849.2,
|
|
"valid_targets_min": 1083
|
|
},
|
|
{
|
|
"epoch": 3.2007575757575757,
|
|
"grad_norm": 0.6936135917988684,
|
|
"learning_rate": 2.6384795952871593e-05,
|
|
"loss": 0.3195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3220590054988861,
|
|
"step": 1690,
|
|
"valid_targets_mean": 2969.5,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 3.210227272727273,
|
|
"grad_norm": 0.6586881450424636,
|
|
"learning_rate": 2.6295211984282133e-05,
|
|
"loss": 0.3056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37260201573371887,
|
|
"step": 1695,
|
|
"valid_targets_mean": 3325.3,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 3.2196969696969697,
|
|
"grad_norm": 0.6024997520333087,
|
|
"learning_rate": 2.6205487603501677e-05,
|
|
"loss": 0.3347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39180779457092285,
|
|
"step": 1700,
|
|
"valid_targets_mean": 3976.9,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 3.2291666666666665,
|
|
"grad_norm": 0.6718184622632953,
|
|
"learning_rate": 2.6115624811796733e-05,
|
|
"loss": 0.2913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2270316779613495,
|
|
"step": 1705,
|
|
"valid_targets_mean": 2303.2,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 3.2386363636363638,
|
|
"grad_norm": 0.6175945087171051,
|
|
"learning_rate": 2.6025625613521002e-05,
|
|
"loss": 0.3071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3006656765937805,
|
|
"step": 1710,
|
|
"valid_targets_mean": 3267.2,
|
|
"valid_targets_min": 1178
|
|
},
|
|
{
|
|
"epoch": 3.2481060606060606,
|
|
"grad_norm": 0.6048130054957186,
|
|
"learning_rate": 2.59354920160707e-05,
|
|
"loss": 0.3138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3190374970436096,
|
|
"step": 1715,
|
|
"valid_targets_mean": 3053.6,
|
|
"valid_targets_min": 1220
|
|
},
|
|
{
|
|
"epoch": 3.257575757575758,
|
|
"grad_norm": 0.5997554944803695,
|
|
"learning_rate": 2.584522602983973e-05,
|
|
"loss": 0.3506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33289796113967896,
|
|
"step": 1720,
|
|
"valid_targets_mean": 3499.4,
|
|
"valid_targets_min": 1495
|
|
},
|
|
{
|
|
"epoch": 3.2670454545454546,
|
|
"grad_norm": 0.6471267273660105,
|
|
"learning_rate": 2.5754829668174902e-05,
|
|
"loss": 0.3325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3981403112411499,
|
|
"step": 1725,
|
|
"valid_targets_mean": 4034.2,
|
|
"valid_targets_min": 1052
|
|
},
|
|
{
|
|
"epoch": 3.2765151515151514,
|
|
"grad_norm": 0.6502894022713746,
|
|
"learning_rate": 2.5664304947330988e-05,
|
|
"loss": 0.3634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3391433656215668,
|
|
"step": 1730,
|
|
"valid_targets_mean": 3178.8,
|
|
"valid_targets_min": 809
|
|
},
|
|
{
|
|
"epoch": 3.2859848484848486,
|
|
"grad_norm": 0.6660267439245466,
|
|
"learning_rate": 2.5573653886425746e-05,
|
|
"loss": 0.3267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3259686827659607,
|
|
"step": 1735,
|
|
"valid_targets_mean": 3321.7,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 3.2954545454545454,
|
|
"grad_norm": 0.608642757086891,
|
|
"learning_rate": 2.5482878507394927e-05,
|
|
"loss": 0.3112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3194117844104767,
|
|
"step": 1740,
|
|
"valid_targets_mean": 3590.9,
|
|
"valid_targets_min": 971
|
|
},
|
|
{
|
|
"epoch": 3.304924242424242,
|
|
"grad_norm": 0.6501483984560933,
|
|
"learning_rate": 2.5391980834947124e-05,
|
|
"loss": 0.3458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32857397198677063,
|
|
"step": 1745,
|
|
"valid_targets_mean": 3158.5,
|
|
"valid_targets_min": 1411
|
|
},
|
|
{
|
|
"epoch": 3.3143939393939394,
|
|
"grad_norm": 0.660178292328819,
|
|
"learning_rate": 2.530096289651865e-05,
|
|
"loss": 0.3633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33205971121788025,
|
|
"step": 1750,
|
|
"valid_targets_mean": 2843.4,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 3.3238636363636362,
|
|
"grad_norm": 0.6458556393895819,
|
|
"learning_rate": 2.5209826722228295e-05,
|
|
"loss": 0.3585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3754691183567047,
|
|
"step": 1755,
|
|
"valid_targets_mean": 3750.4,
|
|
"valid_targets_min": 896
|
|
},
|
|
{
|
|
"epoch": 3.3333333333333335,
|
|
"grad_norm": 0.7564855365652163,
|
|
"learning_rate": 2.5118574344832066e-05,
|
|
"loss": 0.3588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27913403511047363,
|
|
"step": 1760,
|
|
"valid_targets_mean": 2138.9,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 3.3428030303030303,
|
|
"grad_norm": 0.7057147565083695,
|
|
"learning_rate": 2.5027207799677804e-05,
|
|
"loss": 0.3387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3535635471343994,
|
|
"step": 1765,
|
|
"valid_targets_mean": 2759.2,
|
|
"valid_targets_min": 1016
|
|
},
|
|
{
|
|
"epoch": 3.3522727272727275,
|
|
"grad_norm": 0.631732916483994,
|
|
"learning_rate": 2.493572912465985e-05,
|
|
"loss": 0.3534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26970988512039185,
|
|
"step": 1770,
|
|
"valid_targets_mean": 2627.7,
|
|
"valid_targets_min": 1036
|
|
},
|
|
{
|
|
"epoch": 3.3617424242424243,
|
|
"grad_norm": 0.6929411642281869,
|
|
"learning_rate": 2.484414036017354e-05,
|
|
"loss": 0.3573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37282320857048035,
|
|
"step": 1775,
|
|
"valid_targets_mean": 2879.7,
|
|
"valid_targets_min": 1452
|
|
},
|
|
{
|
|
"epoch": 3.371212121212121,
|
|
"grad_norm": 0.6149378952787781,
|
|
"learning_rate": 2.4752443549069716e-05,
|
|
"loss": 0.2981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2771247327327728,
|
|
"step": 1780,
|
|
"valid_targets_mean": 2754.1,
|
|
"valid_targets_min": 1084
|
|
},
|
|
{
|
|
"epoch": 3.3806818181818183,
|
|
"grad_norm": 0.7512387716226137,
|
|
"learning_rate": 2.466064073660915e-05,
|
|
"loss": 0.3557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2984611392021179,
|
|
"step": 1785,
|
|
"valid_targets_mean": 2633.9,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 3.390151515151515,
|
|
"grad_norm": 0.7344331793950483,
|
|
"learning_rate": 2.4568733970416938e-05,
|
|
"loss": 0.3494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31682896614074707,
|
|
"step": 1790,
|
|
"valid_targets_mean": 2434.8,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 3.399621212121212,
|
|
"grad_norm": 0.6998343322225069,
|
|
"learning_rate": 2.4476725300436824e-05,
|
|
"loss": 0.356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3464736044406891,
|
|
"step": 1795,
|
|
"valid_targets_mean": 2673.2,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 3.409090909090909,
|
|
"grad_norm": 0.6770646383442555,
|
|
"learning_rate": 2.4384616778885475e-05,
|
|
"loss": 0.3559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39056408405303955,
|
|
"step": 1800,
|
|
"valid_targets_mean": 3126.4,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 3.418560606060606,
|
|
"grad_norm": 0.5971324156244269,
|
|
"learning_rate": 2.42924104602067e-05,
|
|
"loss": 0.354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34026890993118286,
|
|
"step": 1805,
|
|
"valid_targets_mean": 3670.1,
|
|
"valid_targets_min": 847
|
|
},
|
|
{
|
|
"epoch": 3.428030303030303,
|
|
"grad_norm": 0.5876781203188435,
|
|
"learning_rate": 2.4200108401025636e-05,
|
|
"loss": 0.3773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31103047728538513,
|
|
"step": 1810,
|
|
"valid_targets_mean": 3225.6,
|
|
"valid_targets_min": 1330
|
|
},
|
|
{
|
|
"epoch": 3.4375,
|
|
"grad_norm": 0.682291828711733,
|
|
"learning_rate": 2.410771266010288e-05,
|
|
"loss": 0.3319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32919842004776,
|
|
"step": 1815,
|
|
"valid_targets_mean": 2583.4,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 3.446969696969697,
|
|
"grad_norm": 0.6248076826345288,
|
|
"learning_rate": 2.401522529828857e-05,
|
|
"loss": 0.3494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4183732271194458,
|
|
"step": 1820,
|
|
"valid_targets_mean": 3762.1,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 3.456439393939394,
|
|
"grad_norm": 0.6893317549612219,
|
|
"learning_rate": 2.3922648378476393e-05,
|
|
"loss": 0.3412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36395278573036194,
|
|
"step": 1825,
|
|
"valid_targets_mean": 3020.4,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 3.465909090909091,
|
|
"grad_norm": 0.744410596174076,
|
|
"learning_rate": 2.3829983965557594e-05,
|
|
"loss": 0.3317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4007588028907776,
|
|
"step": 1830,
|
|
"valid_targets_mean": 2401.3,
|
|
"valid_targets_min": 915
|
|
},
|
|
{
|
|
"epoch": 3.475378787878788,
|
|
"grad_norm": 0.6346510976345004,
|
|
"learning_rate": 2.3737234126374924e-05,
|
|
"loss": 0.3413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3351159989833832,
|
|
"step": 1835,
|
|
"valid_targets_mean": 3178.6,
|
|
"valid_targets_min": 242
|
|
},
|
|
{
|
|
"epoch": 3.484848484848485,
|
|
"grad_norm": 0.6331554534822816,
|
|
"learning_rate": 2.3644400929676536e-05,
|
|
"loss": 0.3645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3300311267375946,
|
|
"step": 1840,
|
|
"valid_targets_mean": 2821.8,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 3.4943181818181817,
|
|
"grad_norm": 0.6439047225394164,
|
|
"learning_rate": 2.355148644606984e-05,
|
|
"loss": 0.3577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37909388542175293,
|
|
"step": 1845,
|
|
"valid_targets_mean": 3636.0,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 3.503787878787879,
|
|
"grad_norm": 0.6608038488408319,
|
|
"learning_rate": 2.345849274797529e-05,
|
|
"loss": 0.3238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2919193208217621,
|
|
"step": 1850,
|
|
"valid_targets_mean": 2636.5,
|
|
"valid_targets_min": 978
|
|
},
|
|
{
|
|
"epoch": 3.5132575757575757,
|
|
"grad_norm": 0.6068514756094557,
|
|
"learning_rate": 2.3365421909580238e-05,
|
|
"loss": 0.3266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39818108081817627,
|
|
"step": 1855,
|
|
"valid_targets_mean": 3820.0,
|
|
"valid_targets_min": 1118
|
|
},
|
|
{
|
|
"epoch": 3.5227272727272725,
|
|
"grad_norm": 0.64392979451326,
|
|
"learning_rate": 2.327227600679257e-05,
|
|
"loss": 0.3335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2771720588207245,
|
|
"step": 1860,
|
|
"valid_targets_mean": 2680.7,
|
|
"valid_targets_min": 1185
|
|
},
|
|
{
|
|
"epoch": 3.5321969696969697,
|
|
"grad_norm": 0.6190288855873033,
|
|
"learning_rate": 2.3179057117194485e-05,
|
|
"loss": 0.3342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32504522800445557,
|
|
"step": 1865,
|
|
"valid_targets_mean": 3245.0,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 3.5416666666666665,
|
|
"grad_norm": 0.6513781186129098,
|
|
"learning_rate": 2.3085767319996113e-05,
|
|
"loss": 0.3341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31286710500717163,
|
|
"step": 1870,
|
|
"valid_targets_mean": 2720.8,
|
|
"valid_targets_min": 1054
|
|
},
|
|
{
|
|
"epoch": 3.5511363636363638,
|
|
"grad_norm": 0.660657205009661,
|
|
"learning_rate": 2.299240869598914e-05,
|
|
"loss": 0.3213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3482305109500885,
|
|
"step": 1875,
|
|
"valid_targets_mean": 2881.7,
|
|
"valid_targets_min": 1000
|
|
},
|
|
{
|
|
"epoch": 3.5606060606060606,
|
|
"grad_norm": 0.6081337628847159,
|
|
"learning_rate": 2.2898983327500433e-05,
|
|
"loss": 0.3105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32418566942214966,
|
|
"step": 1880,
|
|
"valid_targets_mean": 2990.9,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 3.570075757575758,
|
|
"grad_norm": 0.705144662519959,
|
|
"learning_rate": 2.280549329834554e-05,
|
|
"loss": 0.304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3148069977760315,
|
|
"step": 1885,
|
|
"valid_targets_mean": 2576.9,
|
|
"valid_targets_min": 1308
|
|
},
|
|
{
|
|
"epoch": 3.5795454545454546,
|
|
"grad_norm": 0.6535224903001041,
|
|
"learning_rate": 2.2711940693782252e-05,
|
|
"loss": 0.3659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3509103059768677,
|
|
"step": 1890,
|
|
"valid_targets_mean": 2842.1,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 3.5890151515151514,
|
|
"grad_norm": 0.6783308381949563,
|
|
"learning_rate": 2.261832760046408e-05,
|
|
"loss": 0.3347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28785064816474915,
|
|
"step": 1895,
|
|
"valid_targets_mean": 2518.2,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 3.5984848484848486,
|
|
"grad_norm": 0.5881626089648836,
|
|
"learning_rate": 2.2524656106393712e-05,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31268805265426636,
|
|
"step": 1900,
|
|
"valid_targets_mean": 3491.0,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 3.6079545454545454,
|
|
"grad_norm": 0.7282045917594657,
|
|
"learning_rate": 2.243092830087644e-05,
|
|
"loss": 0.3394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3410530090332031,
|
|
"step": 1905,
|
|
"valid_targets_mean": 3281.5,
|
|
"valid_targets_min": 936
|
|
},
|
|
{
|
|
"epoch": 3.617424242424242,
|
|
"grad_norm": 0.6329445570275456,
|
|
"learning_rate": 2.2337146274473565e-05,
|
|
"loss": 0.2888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32806456089019775,
|
|
"step": 1910,
|
|
"valid_targets_mean": 3017.1,
|
|
"valid_targets_min": 1309
|
|
},
|
|
{
|
|
"epoch": 3.6268939393939394,
|
|
"grad_norm": 0.6404582534770342,
|
|
"learning_rate": 2.224331211895575e-05,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3053431808948517,
|
|
"step": 1915,
|
|
"valid_targets_mean": 2910.6,
|
|
"valid_targets_min": 1263
|
|
},
|
|
{
|
|
"epoch": 3.6363636363636362,
|
|
"grad_norm": 0.7091411798476415,
|
|
"learning_rate": 2.214942792725639e-05,
|
|
"loss": 0.3163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3474353551864624,
|
|
"step": 1920,
|
|
"valid_targets_mean": 2756.5,
|
|
"valid_targets_min": 1065
|
|
},
|
|
{
|
|
"epoch": 3.6458333333333335,
|
|
"grad_norm": 0.6752046285440569,
|
|
"learning_rate": 2.2055495793424915e-05,
|
|
"loss": 0.3011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3468410074710846,
|
|
"step": 1925,
|
|
"valid_targets_mean": 2780.6,
|
|
"valid_targets_min": 285
|
|
},
|
|
{
|
|
"epoch": 3.6553030303030303,
|
|
"grad_norm": 0.6635169644788932,
|
|
"learning_rate": 2.196151781258008e-05,
|
|
"loss": 0.3553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35017693042755127,
|
|
"step": 1930,
|
|
"valid_targets_mean": 3303.5,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 3.6647727272727275,
|
|
"grad_norm": 0.5768030085892524,
|
|
"learning_rate": 2.1867496080863253e-05,
|
|
"loss": 0.3244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4167655110359192,
|
|
"step": 1935,
|
|
"valid_targets_mean": 4465.0,
|
|
"valid_targets_min": 936
|
|
},
|
|
{
|
|
"epoch": 3.6742424242424243,
|
|
"grad_norm": 0.6678880574986856,
|
|
"learning_rate": 2.1773432695391622e-05,
|
|
"loss": 0.3406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3807205259799957,
|
|
"step": 1940,
|
|
"valid_targets_mean": 3036.8,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 3.683712121212121,
|
|
"grad_norm": 0.6637147304343244,
|
|
"learning_rate": 2.1679329754211472e-05,
|
|
"loss": 0.3393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36827173829078674,
|
|
"step": 1945,
|
|
"valid_targets_mean": 3218.2,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 3.6931818181818183,
|
|
"grad_norm": 0.6481141494442498,
|
|
"learning_rate": 2.1585189356251342e-05,
|
|
"loss": 0.3502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2747058868408203,
|
|
"step": 1950,
|
|
"valid_targets_mean": 2372.9,
|
|
"valid_targets_min": 951
|
|
},
|
|
{
|
|
"epoch": 3.702651515151515,
|
|
"grad_norm": 0.5907668378669308,
|
|
"learning_rate": 2.149101360127525e-05,
|
|
"loss": 0.3353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2725542187690735,
|
|
"step": 1955,
|
|
"valid_targets_mean": 3046.8,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 3.712121212121212,
|
|
"grad_norm": 0.6118525913504305,
|
|
"learning_rate": 2.1396804589835824e-05,
|
|
"loss": 0.3408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39681771397590637,
|
|
"step": 1960,
|
|
"valid_targets_mean": 3872.5,
|
|
"valid_targets_min": 1413
|
|
},
|
|
{
|
|
"epoch": 3.721590909090909,
|
|
"grad_norm": 0.7187960003061866,
|
|
"learning_rate": 2.1302564423227456e-05,
|
|
"loss": 0.321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.260134756565094,
|
|
"step": 1965,
|
|
"valid_targets_mean": 2149.9,
|
|
"valid_targets_min": 988
|
|
},
|
|
{
|
|
"epoch": 3.731060606060606,
|
|
"grad_norm": 0.5165572774854524,
|
|
"learning_rate": 2.1208295203439463e-05,
|
|
"loss": 0.3675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43970781564712524,
|
|
"step": 1970,
|
|
"valid_targets_mean": 5450.9,
|
|
"valid_targets_min": 1202
|
|
},
|
|
{
|
|
"epoch": 3.7405303030303028,
|
|
"grad_norm": 0.6180529002754266,
|
|
"learning_rate": 2.1113999033109165e-05,
|
|
"loss": 0.3091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29223644733428955,
|
|
"step": 1975,
|
|
"valid_targets_mean": 3032.2,
|
|
"valid_targets_min": 1018
|
|
},
|
|
{
|
|
"epoch": 3.75,
|
|
"grad_norm": 0.5809538999061915,
|
|
"learning_rate": 2.1019678015475012e-05,
|
|
"loss": 0.3521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3399672508239746,
|
|
"step": 1980,
|
|
"valid_targets_mean": 3793.8,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 3.7594696969696972,
|
|
"grad_norm": 0.6408168233287046,
|
|
"learning_rate": 2.0925334254329666e-05,
|
|
"loss": 0.3652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3791733384132385,
|
|
"step": 1985,
|
|
"valid_targets_mean": 3445.0,
|
|
"valid_targets_min": 1210
|
|
},
|
|
{
|
|
"epoch": 3.768939393939394,
|
|
"grad_norm": 0.7390062358145014,
|
|
"learning_rate": 2.0830969853973064e-05,
|
|
"loss": 0.3139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31430596113204956,
|
|
"step": 1990,
|
|
"valid_targets_mean": 2363.8,
|
|
"valid_targets_min": 915
|
|
},
|
|
{
|
|
"epoch": 3.778409090909091,
|
|
"grad_norm": 0.679130726623561,
|
|
"learning_rate": 2.0736586919165486e-05,
|
|
"loss": 0.3173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2976763844490051,
|
|
"step": 1995,
|
|
"valid_targets_mean": 2445.1,
|
|
"valid_targets_min": 1127
|
|
},
|
|
{
|
|
"epoch": 3.787878787878788,
|
|
"grad_norm": 0.5675206694775314,
|
|
"learning_rate": 2.064218755508064e-05,
|
|
"loss": 0.3258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3499271273612976,
|
|
"step": 2000,
|
|
"valid_targets_mean": 3692.0,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 3.797348484848485,
|
|
"grad_norm": 0.6696147309252558,
|
|
"learning_rate": 2.0547773867258667e-05,
|
|
"loss": 0.3187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33318015933036804,
|
|
"step": 2005,
|
|
"valid_targets_mean": 2729.8,
|
|
"valid_targets_min": 926
|
|
},
|
|
{
|
|
"epoch": 3.8068181818181817,
|
|
"grad_norm": 0.6341758676323284,
|
|
"learning_rate": 2.0453347961559193e-05,
|
|
"loss": 0.3105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36540964245796204,
|
|
"step": 2010,
|
|
"valid_targets_mean": 3427.8,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 3.816287878787879,
|
|
"grad_norm": 0.759406182848391,
|
|
"learning_rate": 2.0358911944114364e-05,
|
|
"loss": 0.3083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2811744809150696,
|
|
"step": 2015,
|
|
"valid_targets_mean": 2582.3,
|
|
"valid_targets_min": 1105
|
|
},
|
|
{
|
|
"epoch": 3.8257575757575757,
|
|
"grad_norm": 0.6755649882958579,
|
|
"learning_rate": 2.0264467921281848e-05,
|
|
"loss": 0.3341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35657942295074463,
|
|
"step": 2020,
|
|
"valid_targets_mean": 2952.3,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 3.8352272727272725,
|
|
"grad_norm": 0.6693557935962697,
|
|
"learning_rate": 2.0170017999597894e-05,
|
|
"loss": 0.3384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2543318271636963,
|
|
"step": 2025,
|
|
"valid_targets_mean": 2445.9,
|
|
"valid_targets_min": 924
|
|
},
|
|
{
|
|
"epoch": 3.8446969696969697,
|
|
"grad_norm": 0.9872705605741504,
|
|
"learning_rate": 2.0075564285730315e-05,
|
|
"loss": 0.3403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2925317883491516,
|
|
"step": 2030,
|
|
"valid_targets_mean": 3016.7,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 3.8541666666666665,
|
|
"grad_norm": 0.9356648810767931,
|
|
"learning_rate": 1.9981108886431495e-05,
|
|
"loss": 0.3071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31507444381713867,
|
|
"step": 2035,
|
|
"valid_targets_mean": 2686.3,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 3.8636363636363638,
|
|
"grad_norm": 0.6724365943132989,
|
|
"learning_rate": 1.9886653908491424e-05,
|
|
"loss": 0.3438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30074799060821533,
|
|
"step": 2040,
|
|
"valid_targets_mean": 2809.9,
|
|
"valid_targets_min": 1246
|
|
},
|
|
{
|
|
"epoch": 3.8731060606060606,
|
|
"grad_norm": 0.6564648725316806,
|
|
"learning_rate": 1.9792201458690695e-05,
|
|
"loss": 0.3507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3782392740249634,
|
|
"step": 2045,
|
|
"valid_targets_mean": 3063.3,
|
|
"valid_targets_min": 1143
|
|
},
|
|
{
|
|
"epoch": 3.882575757575758,
|
|
"grad_norm": 0.6367122319638797,
|
|
"learning_rate": 1.96977536437535e-05,
|
|
"loss": 0.3356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36474543809890747,
|
|
"step": 2050,
|
|
"valid_targets_mean": 3631.3,
|
|
"valid_targets_min": 1351
|
|
},
|
|
{
|
|
"epoch": 3.8920454545454546,
|
|
"grad_norm": 0.6999332165495065,
|
|
"learning_rate": 1.960331257030066e-05,
|
|
"loss": 0.3485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3744880259037018,
|
|
"step": 2055,
|
|
"valid_targets_mean": 3059.6,
|
|
"valid_targets_min": 1064
|
|
},
|
|
{
|
|
"epoch": 3.9015151515151514,
|
|
"grad_norm": 0.696816093232675,
|
|
"learning_rate": 1.9508880344802624e-05,
|
|
"loss": 0.2976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24983765184879303,
|
|
"step": 2060,
|
|
"valid_targets_mean": 2120.2,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 3.9109848484848486,
|
|
"grad_norm": 0.6668368451543965,
|
|
"learning_rate": 1.94144590735325e-05,
|
|
"loss": 0.3304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38019129633903503,
|
|
"step": 2065,
|
|
"valid_targets_mean": 3197.9,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 3.9204545454545454,
|
|
"grad_norm": 0.6761886653166814,
|
|
"learning_rate": 1.932005086251906e-05,
|
|
"loss": 0.3548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36857151985168457,
|
|
"step": 2070,
|
|
"valid_targets_mean": 2874.4,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 3.929924242424242,
|
|
"grad_norm": 0.5796932219908127,
|
|
"learning_rate": 1.9225657817499775e-05,
|
|
"loss": 0.3344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30397409200668335,
|
|
"step": 2075,
|
|
"valid_targets_mean": 3628.8,
|
|
"valid_targets_min": 1177
|
|
},
|
|
{
|
|
"epoch": 3.9393939393939394,
|
|
"grad_norm": 0.6382520745548468,
|
|
"learning_rate": 1.9131282043873845e-05,
|
|
"loss": 0.3319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3480740785598755,
|
|
"step": 2080,
|
|
"valid_targets_mean": 3129.4,
|
|
"valid_targets_min": 1020
|
|
},
|
|
{
|
|
"epoch": 3.9488636363636362,
|
|
"grad_norm": 0.6290145536726383,
|
|
"learning_rate": 1.9036925646655234e-05,
|
|
"loss": 0.348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3128345012664795,
|
|
"step": 2085,
|
|
"valid_targets_mean": 2675.0,
|
|
"valid_targets_min": 370
|
|
},
|
|
{
|
|
"epoch": 3.9583333333333335,
|
|
"grad_norm": 0.6307807680087694,
|
|
"learning_rate": 1.8942590730425733e-05,
|
|
"loss": 0.3114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32158148288726807,
|
|
"step": 2090,
|
|
"valid_targets_mean": 2977.9,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 3.9678030303030303,
|
|
"grad_norm": 0.7662005055380653,
|
|
"learning_rate": 1.8848279399287987e-05,
|
|
"loss": 0.3314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34880077838897705,
|
|
"step": 2095,
|
|
"valid_targets_mean": 2834.6,
|
|
"valid_targets_min": 1002
|
|
},
|
|
{
|
|
"epoch": 3.9772727272727275,
|
|
"grad_norm": 0.6613482581148082,
|
|
"learning_rate": 1.8753993756818607e-05,
|
|
"loss": 0.3251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28859174251556396,
|
|
"step": 2100,
|
|
"valid_targets_mean": 2745.9,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 3.9867424242424243,
|
|
"grad_norm": 0.6245543736667526,
|
|
"learning_rate": 1.8659735906021232e-05,
|
|
"loss": 0.3143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30892664194107056,
|
|
"step": 2105,
|
|
"valid_targets_mean": 3113.9,
|
|
"valid_targets_min": 914
|
|
},
|
|
{
|
|
"epoch": 3.996212121212121,
|
|
"grad_norm": 0.66915169610781,
|
|
"learning_rate": 1.8565507949279588e-05,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35334154963493347,
|
|
"step": 2110,
|
|
"valid_targets_mean": 2911.2,
|
|
"valid_targets_min": 1183
|
|
},
|
|
{
|
|
"epoch": 4.005681818181818,
|
|
"grad_norm": 0.6052064031949881,
|
|
"learning_rate": 1.847131198831065e-05,
|
|
"loss": 0.2675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2576064169406891,
|
|
"step": 2115,
|
|
"valid_targets_mean": 2890.6,
|
|
"valid_targets_min": 1288
|
|
},
|
|
{
|
|
"epoch": 4.015151515151516,
|
|
"grad_norm": 0.686288160409638,
|
|
"learning_rate": 1.837715012411774e-05,
|
|
"loss": 0.2977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.256112277507782,
|
|
"step": 2120,
|
|
"valid_targets_mean": 2609.1,
|
|
"valid_targets_min": 915
|
|
},
|
|
{
|
|
"epoch": 4.024621212121212,
|
|
"grad_norm": 0.7049339217231857,
|
|
"learning_rate": 1.828302445694366e-05,
|
|
"loss": 0.3059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2870901823043823,
|
|
"step": 2125,
|
|
"valid_targets_mean": 2530.7,
|
|
"valid_targets_min": 1161
|
|
},
|
|
{
|
|
"epoch": 4.034090909090909,
|
|
"grad_norm": 0.745787867411192,
|
|
"learning_rate": 1.818893708622385e-05,
|
|
"loss": 0.3109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2942032516002655,
|
|
"step": 2130,
|
|
"valid_targets_mean": 2476.0,
|
|
"valid_targets_min": 971
|
|
},
|
|
{
|
|
"epoch": 4.043560606060606,
|
|
"grad_norm": 0.6786048115705973,
|
|
"learning_rate": 1.809489011053957e-05,
|
|
"loss": 0.2897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24971142411231995,
|
|
"step": 2135,
|
|
"valid_targets_mean": 2689.8,
|
|
"valid_targets_min": 974
|
|
},
|
|
{
|
|
"epoch": 4.053030303030303,
|
|
"grad_norm": 0.7021352386178411,
|
|
"learning_rate": 1.800088562757107e-05,
|
|
"loss": 0.2994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27472883462905884,
|
|
"step": 2140,
|
|
"valid_targets_mean": 2616.9,
|
|
"valid_targets_min": 1016
|
|
},
|
|
{
|
|
"epoch": 4.0625,
|
|
"grad_norm": 0.6720792784382932,
|
|
"learning_rate": 1.7906925734050836e-05,
|
|
"loss": 0.2575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23587368428707123,
|
|
"step": 2145,
|
|
"valid_targets_mean": 2694.6,
|
|
"valid_targets_min": 833
|
|
},
|
|
{
|
|
"epoch": 4.071969696969697,
|
|
"grad_norm": 0.7164473108750313,
|
|
"learning_rate": 1.7813012525716796e-05,
|
|
"loss": 0.3291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3149929940700531,
|
|
"step": 2150,
|
|
"valid_targets_mean": 2761.1,
|
|
"valid_targets_min": 925
|
|
},
|
|
{
|
|
"epoch": 4.081439393939394,
|
|
"grad_norm": 0.7924154959459456,
|
|
"learning_rate": 1.7719148097265572e-05,
|
|
"loss": 0.2898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24486128985881805,
|
|
"step": 2155,
|
|
"valid_targets_mean": 1937.1,
|
|
"valid_targets_min": 869
|
|
},
|
|
{
|
|
"epoch": 4.090909090909091,
|
|
"grad_norm": 0.7237215413711432,
|
|
"learning_rate": 1.76253345423058e-05,
|
|
"loss": 0.285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24427613615989685,
|
|
"step": 2160,
|
|
"valid_targets_mean": 2088.8,
|
|
"valid_targets_min": 840
|
|
},
|
|
{
|
|
"epoch": 4.100378787878788,
|
|
"grad_norm": 0.9913748779688368,
|
|
"learning_rate": 1.753157395331136e-05,
|
|
"loss": 0.2668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25353801250457764,
|
|
"step": 2165,
|
|
"valid_targets_mean": 2792.2,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 4.109848484848484,
|
|
"grad_norm": 0.714811530285404,
|
|
"learning_rate": 1.7437868421574786e-05,
|
|
"loss": 0.3137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3608337640762329,
|
|
"step": 2170,
|
|
"valid_targets_mean": 2875.3,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 4.119318181818182,
|
|
"grad_norm": 0.6735779017704333,
|
|
"learning_rate": 1.7344220037160566e-05,
|
|
"loss": 0.298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2533136010169983,
|
|
"step": 2175,
|
|
"valid_targets_mean": 3023.5,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 4.128787878787879,
|
|
"grad_norm": 0.6639340334907975,
|
|
"learning_rate": 1.7250630888858536e-05,
|
|
"loss": 0.2851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27889108657836914,
|
|
"step": 2180,
|
|
"valid_targets_mean": 2701.8,
|
|
"valid_targets_min": 1131
|
|
},
|
|
{
|
|
"epoch": 4.138257575757576,
|
|
"grad_norm": 0.727918811080758,
|
|
"learning_rate": 1.71571030641373e-05,
|
|
"loss": 0.3326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38342058658599854,
|
|
"step": 2185,
|
|
"valid_targets_mean": 3210.5,
|
|
"valid_targets_min": 1404
|
|
},
|
|
{
|
|
"epoch": 4.1477272727272725,
|
|
"grad_norm": 0.6818406407429174,
|
|
"learning_rate": 1.7063638649097666e-05,
|
|
"loss": 0.3012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26273882389068604,
|
|
"step": 2190,
|
|
"valid_targets_mean": 2586.8,
|
|
"valid_targets_min": 1036
|
|
},
|
|
{
|
|
"epoch": 4.15719696969697,
|
|
"grad_norm": 0.6318575444019972,
|
|
"learning_rate": 1.6970239728426118e-05,
|
|
"loss": 0.2981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30564966797828674,
|
|
"step": 2195,
|
|
"valid_targets_mean": 3405.7,
|
|
"valid_targets_min": 1252
|
|
},
|
|
{
|
|
"epoch": 4.166666666666667,
|
|
"grad_norm": 0.6481927258542993,
|
|
"learning_rate": 1.687690838534829e-05,
|
|
"loss": 0.3245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36290088295936584,
|
|
"step": 2200,
|
|
"valid_targets_mean": 3897.0,
|
|
"valid_targets_min": 896
|
|
},
|
|
{
|
|
"epoch": 4.176136363636363,
|
|
"grad_norm": 0.6343708446298161,
|
|
"learning_rate": 1.6783646701582556e-05,
|
|
"loss": 0.3073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32945138216018677,
|
|
"step": 2205,
|
|
"valid_targets_mean": 3477.9,
|
|
"valid_targets_min": 1274
|
|
},
|
|
{
|
|
"epoch": 4.1856060606060606,
|
|
"grad_norm": 0.6917755014531518,
|
|
"learning_rate": 1.6690456757293552e-05,
|
|
"loss": 0.2871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.308441162109375,
|
|
"step": 2210,
|
|
"valid_targets_mean": 2997.4,
|
|
"valid_targets_min": 1184
|
|
},
|
|
{
|
|
"epoch": 4.195075757575758,
|
|
"grad_norm": 0.6257811589285132,
|
|
"learning_rate": 1.6597340631045787e-05,
|
|
"loss": 0.2909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32576870918273926,
|
|
"step": 2215,
|
|
"valid_targets_mean": 3299.9,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 4.204545454545454,
|
|
"grad_norm": 0.6687426921405316,
|
|
"learning_rate": 1.650430039975731e-05,
|
|
"loss": 0.316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32010895013809204,
|
|
"step": 2220,
|
|
"valid_targets_mean": 3278.1,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 4.214015151515151,
|
|
"grad_norm": 0.666187674164031,
|
|
"learning_rate": 1.6411338138653327e-05,
|
|
"loss": 0.3108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3824407458305359,
|
|
"step": 2225,
|
|
"valid_targets_mean": 3471.5,
|
|
"valid_targets_min": 1752
|
|
},
|
|
{
|
|
"epoch": 4.223484848484849,
|
|
"grad_norm": 0.6874156049359609,
|
|
"learning_rate": 1.631845592121999e-05,
|
|
"loss": 0.2812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28018447756767273,
|
|
"step": 2230,
|
|
"valid_targets_mean": 2871.2,
|
|
"valid_targets_min": 1183
|
|
},
|
|
{
|
|
"epoch": 4.232954545454546,
|
|
"grad_norm": 0.7080127697268435,
|
|
"learning_rate": 1.622565581915809e-05,
|
|
"loss": 0.3079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3446245491504669,
|
|
"step": 2235,
|
|
"valid_targets_mean": 3694.8,
|
|
"valid_targets_min": 1174
|
|
},
|
|
{
|
|
"epoch": 4.242424242424242,
|
|
"grad_norm": 0.6380037611180758,
|
|
"learning_rate": 1.613293990233686e-05,
|
|
"loss": 0.3502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4187149703502655,
|
|
"step": 2240,
|
|
"valid_targets_mean": 4451.6,
|
|
"valid_targets_min": 847
|
|
},
|
|
{
|
|
"epoch": 4.2518939393939394,
|
|
"grad_norm": 0.6689892729853713,
|
|
"learning_rate": 1.6040310238747827e-05,
|
|
"loss": 0.282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34598517417907715,
|
|
"step": 2245,
|
|
"valid_targets_mean": 3485.2,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 4.261363636363637,
|
|
"grad_norm": 0.7011945594840294,
|
|
"learning_rate": 1.5947768894458673e-05,
|
|
"loss": 0.2929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26216158270835876,
|
|
"step": 2250,
|
|
"valid_targets_mean": 2531.6,
|
|
"valid_targets_min": 1086
|
|
},
|
|
{
|
|
"epoch": 4.270833333333333,
|
|
"grad_norm": 0.725861198660732,
|
|
"learning_rate": 1.5855317933567155e-05,
|
|
"loss": 0.3044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29773661494255066,
|
|
"step": 2255,
|
|
"valid_targets_mean": 2328.7,
|
|
"valid_targets_min": 899
|
|
},
|
|
{
|
|
"epoch": 4.28030303030303,
|
|
"grad_norm": 0.6612963497250627,
|
|
"learning_rate": 1.5762959418155046e-05,
|
|
"loss": 0.3339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33770671486854553,
|
|
"step": 2260,
|
|
"valid_targets_mean": 3794.3,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 4.2897727272727275,
|
|
"grad_norm": 0.7816300481077356,
|
|
"learning_rate": 1.567069540824219e-05,
|
|
"loss": 0.2971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2114625871181488,
|
|
"step": 2265,
|
|
"valid_targets_mean": 2151.3,
|
|
"valid_targets_min": 793
|
|
},
|
|
{
|
|
"epoch": 4.299242424242424,
|
|
"grad_norm": 0.6691840023713783,
|
|
"learning_rate": 1.5578527961740494e-05,
|
|
"loss": 0.3256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3044890761375427,
|
|
"step": 2270,
|
|
"valid_targets_mean": 3282.9,
|
|
"valid_targets_min": 1252
|
|
},
|
|
{
|
|
"epoch": 4.308712121212121,
|
|
"grad_norm": 0.6548199858488674,
|
|
"learning_rate": 1.5486459134408072e-05,
|
|
"loss": 0.3023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31405389308929443,
|
|
"step": 2275,
|
|
"valid_targets_mean": 3124.4,
|
|
"valid_targets_min": 1073
|
|
},
|
|
{
|
|
"epoch": 4.318181818181818,
|
|
"grad_norm": 0.7444565005266761,
|
|
"learning_rate": 1.539449097980338e-05,
|
|
"loss": 0.3071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30968451499938965,
|
|
"step": 2280,
|
|
"valid_targets_mean": 2868.5,
|
|
"valid_targets_min": 1086
|
|
},
|
|
{
|
|
"epoch": 4.327651515151516,
|
|
"grad_norm": 0.740911777193961,
|
|
"learning_rate": 1.53026255492394e-05,
|
|
"loss": 0.2982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2749401330947876,
|
|
"step": 2285,
|
|
"valid_targets_mean": 2237.5,
|
|
"valid_targets_min": 865
|
|
},
|
|
{
|
|
"epoch": 4.337121212121212,
|
|
"grad_norm": 0.6935506271287019,
|
|
"learning_rate": 1.5210864891737893e-05,
|
|
"loss": 0.2966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2972255349159241,
|
|
"step": 2290,
|
|
"valid_targets_mean": 3199.4,
|
|
"valid_targets_min": 1125
|
|
},
|
|
{
|
|
"epoch": 4.346590909090909,
|
|
"grad_norm": 0.684165474945537,
|
|
"learning_rate": 1.5119211053983718e-05,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2915506958961487,
|
|
"step": 2295,
|
|
"valid_targets_mean": 2826.1,
|
|
"valid_targets_min": 1177
|
|
},
|
|
{
|
|
"epoch": 4.356060606060606,
|
|
"grad_norm": 0.7476924900626162,
|
|
"learning_rate": 1.5027666080279135e-05,
|
|
"loss": 0.2968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27624720335006714,
|
|
"step": 2300,
|
|
"valid_targets_mean": 2545.2,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 4.365530303030303,
|
|
"grad_norm": 0.6557545415734991,
|
|
"learning_rate": 1.4936232012498258e-05,
|
|
"loss": 0.3135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4462564289569855,
|
|
"step": 2305,
|
|
"valid_targets_mean": 4268.9,
|
|
"valid_targets_min": 951
|
|
},
|
|
{
|
|
"epoch": 4.375,
|
|
"grad_norm": 0.7492623168958791,
|
|
"learning_rate": 1.484491089004147e-05,
|
|
"loss": 0.3058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30561721324920654,
|
|
"step": 2310,
|
|
"valid_targets_mean": 2708.4,
|
|
"valid_targets_min": 878
|
|
},
|
|
{
|
|
"epoch": 4.384469696969697,
|
|
"grad_norm": 0.73874045418594,
|
|
"learning_rate": 1.4753704749789979e-05,
|
|
"loss": 0.2579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2859521210193634,
|
|
"step": 2315,
|
|
"valid_targets_mean": 2469.8,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 4.393939393939394,
|
|
"grad_norm": 0.7584167517810665,
|
|
"learning_rate": 1.4662615626060329e-05,
|
|
"loss": 0.3195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3107367753982544,
|
|
"step": 2320,
|
|
"valid_targets_mean": 2866.4,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 4.403409090909091,
|
|
"grad_norm": 0.7785505790724848,
|
|
"learning_rate": 1.4571645550559075e-05,
|
|
"loss": 0.309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28550994396209717,
|
|
"step": 2325,
|
|
"valid_targets_mean": 2149.9,
|
|
"valid_targets_min": 971
|
|
},
|
|
{
|
|
"epoch": 4.412878787878788,
|
|
"grad_norm": 0.7772259592017609,
|
|
"learning_rate": 1.4480796552337447e-05,
|
|
"loss": 0.2955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31777992844581604,
|
|
"step": 2330,
|
|
"valid_targets_mean": 2752.8,
|
|
"valid_targets_min": 930
|
|
},
|
|
{
|
|
"epoch": 4.422348484848484,
|
|
"grad_norm": 0.7713710853435042,
|
|
"learning_rate": 1.4390070657746096e-05,
|
|
"loss": 0.3181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35458117723464966,
|
|
"step": 2335,
|
|
"valid_targets_mean": 3681.9,
|
|
"valid_targets_min": 1021
|
|
},
|
|
{
|
|
"epoch": 4.431818181818182,
|
|
"grad_norm": 0.7193226421722435,
|
|
"learning_rate": 1.4299469890389895e-05,
|
|
"loss": 0.3097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3468610644340515,
|
|
"step": 2340,
|
|
"valid_targets_mean": 2981.4,
|
|
"valid_targets_min": 1003
|
|
},
|
|
{
|
|
"epoch": 4.441287878787879,
|
|
"grad_norm": 0.7198461809640079,
|
|
"learning_rate": 1.4208996271082796e-05,
|
|
"loss": 0.2784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24278980493545532,
|
|
"step": 2345,
|
|
"valid_targets_mean": 2383.5,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 4.450757575757576,
|
|
"grad_norm": 0.8456709291260625,
|
|
"learning_rate": 1.4118651817802777e-05,
|
|
"loss": 0.3207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2569124102592468,
|
|
"step": 2350,
|
|
"valid_targets_mean": 2089.3,
|
|
"valid_targets_min": 914
|
|
},
|
|
{
|
|
"epoch": 4.4602272727272725,
|
|
"grad_norm": 0.7626727557261296,
|
|
"learning_rate": 1.4028438545646814e-05,
|
|
"loss": 0.343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30639687180519104,
|
|
"step": 2355,
|
|
"valid_targets_mean": 2369.4,
|
|
"valid_targets_min": 1047
|
|
},
|
|
{
|
|
"epoch": 4.46969696969697,
|
|
"grad_norm": 0.8131681000347785,
|
|
"learning_rate": 1.3938358466785943e-05,
|
|
"loss": 0.3324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33323854207992554,
|
|
"step": 2360,
|
|
"valid_targets_mean": 2344.4,
|
|
"valid_targets_min": 1164
|
|
},
|
|
{
|
|
"epoch": 4.479166666666667,
|
|
"grad_norm": 0.7145250540678227,
|
|
"learning_rate": 1.384841359042036e-05,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2559138238430023,
|
|
"step": 2365,
|
|
"valid_targets_mean": 2454.9,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 4.488636363636363,
|
|
"grad_norm": 0.598248159353537,
|
|
"learning_rate": 1.3758605922734653e-05,
|
|
"loss": 0.2897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23145177960395813,
|
|
"step": 2370,
|
|
"valid_targets_mean": 2984.5,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 4.4981060606060606,
|
|
"grad_norm": 0.6826376955633069,
|
|
"learning_rate": 1.3668937466852997e-05,
|
|
"loss": 0.3222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30398109555244446,
|
|
"step": 2375,
|
|
"valid_targets_mean": 2890.6,
|
|
"valid_targets_min": 1392
|
|
},
|
|
{
|
|
"epoch": 4.507575757575758,
|
|
"grad_norm": 0.7428056491667924,
|
|
"learning_rate": 1.3579410222794519e-05,
|
|
"loss": 0.2965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3179449141025543,
|
|
"step": 2380,
|
|
"valid_targets_mean": 2732.6,
|
|
"valid_targets_min": 867
|
|
},
|
|
{
|
|
"epoch": 4.517045454545455,
|
|
"grad_norm": 0.9886437774898788,
|
|
"learning_rate": 1.3490026187428671e-05,
|
|
"loss": 0.3346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2796229124069214,
|
|
"step": 2385,
|
|
"valid_targets_mean": 2388.9,
|
|
"valid_targets_min": 1167
|
|
},
|
|
{
|
|
"epoch": 4.526515151515151,
|
|
"grad_norm": 0.6546355834028638,
|
|
"learning_rate": 1.3400787354430684e-05,
|
|
"loss": 0.2885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3275710344314575,
|
|
"step": 2390,
|
|
"valid_targets_mean": 3537.8,
|
|
"valid_targets_min": 1435
|
|
},
|
|
{
|
|
"epoch": 4.535984848484849,
|
|
"grad_norm": 0.7016155186563944,
|
|
"learning_rate": 1.331169571423712e-05,
|
|
"loss": 0.3331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39911067485809326,
|
|
"step": 2395,
|
|
"valid_targets_mean": 3464.6,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 4.545454545454545,
|
|
"grad_norm": 0.8562116640719617,
|
|
"learning_rate": 1.3222753254001464e-05,
|
|
"loss": 0.329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24516209959983826,
|
|
"step": 2400,
|
|
"valid_targets_mean": 2705.9,
|
|
"valid_targets_min": 1143
|
|
},
|
|
{
|
|
"epoch": 4.554924242424242,
|
|
"grad_norm": 0.7743255728635488,
|
|
"learning_rate": 1.3133961957549781e-05,
|
|
"loss": 0.2824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2735127806663513,
|
|
"step": 2405,
|
|
"valid_targets_mean": 2375.5,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 4.5643939393939394,
|
|
"grad_norm": 0.6625793886320552,
|
|
"learning_rate": 1.3045323805336514e-05,
|
|
"loss": 0.2988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2793106138706207,
|
|
"step": 2410,
|
|
"valid_targets_mean": 2916.8,
|
|
"valid_targets_min": 1216
|
|
},
|
|
{
|
|
"epoch": 4.573863636363637,
|
|
"grad_norm": 0.7135778764972254,
|
|
"learning_rate": 1.295684077440027e-05,
|
|
"loss": 0.3413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30118927359580994,
|
|
"step": 2415,
|
|
"valid_targets_mean": 2724.1,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 4.583333333333333,
|
|
"grad_norm": 0.7176400826676046,
|
|
"learning_rate": 1.2868514838319749e-05,
|
|
"loss": 0.2992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34547048807144165,
|
|
"step": 2420,
|
|
"valid_targets_mean": 2762.7,
|
|
"valid_targets_min": 395
|
|
},
|
|
{
|
|
"epoch": 4.59280303030303,
|
|
"grad_norm": 0.7904441036823299,
|
|
"learning_rate": 1.2780347967169703e-05,
|
|
"loss": 0.3004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33883923292160034,
|
|
"step": 2425,
|
|
"valid_targets_mean": 3704.7,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 4.6022727272727275,
|
|
"grad_norm": 0.6100302009338138,
|
|
"learning_rate": 1.2692342127476995e-05,
|
|
"loss": 0.3259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32346370816230774,
|
|
"step": 2430,
|
|
"valid_targets_mean": 3950.1,
|
|
"valid_targets_min": 1241
|
|
},
|
|
{
|
|
"epoch": 4.611742424242424,
|
|
"grad_norm": 0.7205412860255006,
|
|
"learning_rate": 1.260449928217677e-05,
|
|
"loss": 0.2958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33376532793045044,
|
|
"step": 2435,
|
|
"valid_targets_mean": 3059.0,
|
|
"valid_targets_min": 1105
|
|
},
|
|
{
|
|
"epoch": 4.621212121212121,
|
|
"grad_norm": 0.7055845821029746,
|
|
"learning_rate": 1.2516821390568633e-05,
|
|
"loss": 0.307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.293209433555603,
|
|
"step": 2440,
|
|
"valid_targets_mean": 2707.8,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 4.630681818181818,
|
|
"grad_norm": 0.6815935341576593,
|
|
"learning_rate": 1.2429310408272968e-05,
|
|
"loss": 0.272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28893178701400757,
|
|
"step": 2445,
|
|
"valid_targets_mean": 2916.3,
|
|
"valid_targets_min": 1265
|
|
},
|
|
{
|
|
"epoch": 4.640151515151516,
|
|
"grad_norm": 0.7054143326375655,
|
|
"learning_rate": 1.2341968287187302e-05,
|
|
"loss": 0.3107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22132417559623718,
|
|
"step": 2450,
|
|
"valid_targets_mean": 2491.2,
|
|
"valid_targets_min": 1035
|
|
},
|
|
{
|
|
"epoch": 4.649621212121212,
|
|
"grad_norm": 1.171840229089561,
|
|
"learning_rate": 1.2254796975442797e-05,
|
|
"loss": 0.307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31816911697387695,
|
|
"step": 2455,
|
|
"valid_targets_mean": 3896.6,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 4.659090909090909,
|
|
"grad_norm": 0.6838639647808855,
|
|
"learning_rate": 1.2167798417360777e-05,
|
|
"loss": 0.298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2726309299468994,
|
|
"step": 2460,
|
|
"valid_targets_mean": 2914.2,
|
|
"valid_targets_min": 1140
|
|
},
|
|
{
|
|
"epoch": 4.668560606060606,
|
|
"grad_norm": 0.7915707723196701,
|
|
"learning_rate": 1.2080974553409348e-05,
|
|
"loss": 0.3181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2644215226173401,
|
|
"step": 2465,
|
|
"valid_targets_mean": 2098.1,
|
|
"valid_targets_min": 870
|
|
},
|
|
{
|
|
"epoch": 4.678030303030303,
|
|
"grad_norm": 0.6473515408172529,
|
|
"learning_rate": 1.1994327320160153e-05,
|
|
"loss": 0.257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27932336926460266,
|
|
"step": 2470,
|
|
"valid_targets_mean": 3540.8,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 4.6875,
|
|
"grad_norm": 0.7074724997841196,
|
|
"learning_rate": 1.1907858650245157e-05,
|
|
"loss": 0.304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2834998369216919,
|
|
"step": 2475,
|
|
"valid_targets_mean": 2562.2,
|
|
"valid_targets_min": 1185
|
|
},
|
|
{
|
|
"epoch": 4.696969696969697,
|
|
"grad_norm": 0.7320891529424824,
|
|
"learning_rate": 1.1821570472313515e-05,
|
|
"loss": 0.3068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2946927547454834,
|
|
"step": 2480,
|
|
"valid_targets_mean": 3080.7,
|
|
"valid_targets_min": 904
|
|
},
|
|
{
|
|
"epoch": 4.706439393939394,
|
|
"grad_norm": 0.6461108957932098,
|
|
"learning_rate": 1.1735464710988619e-05,
|
|
"loss": 0.3458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2942076325416565,
|
|
"step": 2485,
|
|
"valid_targets_mean": 3437.2,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 4.715909090909091,
|
|
"grad_norm": 0.8209132420811707,
|
|
"learning_rate": 1.1649543286825092e-05,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28862065076828003,
|
|
"step": 2490,
|
|
"valid_targets_mean": 2059.9,
|
|
"valid_targets_min": 285
|
|
},
|
|
{
|
|
"epoch": 4.725378787878788,
|
|
"grad_norm": 0.6142215174356648,
|
|
"learning_rate": 1.1563808116266034e-05,
|
|
"loss": 0.3201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3006783127784729,
|
|
"step": 2495,
|
|
"valid_targets_mean": 4670.9,
|
|
"valid_targets_min": 881
|
|
},
|
|
{
|
|
"epoch": 4.734848484848484,
|
|
"grad_norm": 0.6980945361714567,
|
|
"learning_rate": 1.1478261111600192e-05,
|
|
"loss": 0.3091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25062429904937744,
|
|
"step": 2500,
|
|
"valid_targets_mean": 2762.4,
|
|
"valid_targets_min": 956
|
|
},
|
|
{
|
|
"epoch": 4.744318181818182,
|
|
"grad_norm": 0.7058847817674732,
|
|
"learning_rate": 1.1392904180919365e-05,
|
|
"loss": 0.335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3519722819328308,
|
|
"step": 2505,
|
|
"valid_targets_mean": 2936.7,
|
|
"valid_targets_min": 1274
|
|
},
|
|
{
|
|
"epoch": 4.753787878787879,
|
|
"grad_norm": 0.6932087431024149,
|
|
"learning_rate": 1.1307739228075839e-05,
|
|
"loss": 0.3068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32616230845451355,
|
|
"step": 2510,
|
|
"valid_targets_mean": 2894.8,
|
|
"valid_targets_min": 1188
|
|
},
|
|
{
|
|
"epoch": 4.763257575757576,
|
|
"grad_norm": 0.6650539636574567,
|
|
"learning_rate": 1.1222768152639889e-05,
|
|
"loss": 0.3467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.335166335105896,
|
|
"step": 2515,
|
|
"valid_targets_mean": 3036.1,
|
|
"valid_targets_min": 1118
|
|
},
|
|
{
|
|
"epoch": 4.7727272727272725,
|
|
"grad_norm": 0.7095891429169946,
|
|
"learning_rate": 1.1137992849857437e-05,
|
|
"loss": 0.3118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31800755858421326,
|
|
"step": 2520,
|
|
"valid_targets_mean": 2900.8,
|
|
"valid_targets_min": 926
|
|
},
|
|
{
|
|
"epoch": 4.78219696969697,
|
|
"grad_norm": 0.7211462330644652,
|
|
"learning_rate": 1.10534152106078e-05,
|
|
"loss": 0.3302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28862619400024414,
|
|
"step": 2525,
|
|
"valid_targets_mean": 2683.3,
|
|
"valid_targets_min": 1107
|
|
},
|
|
{
|
|
"epoch": 4.791666666666667,
|
|
"grad_norm": 0.7647238403209635,
|
|
"learning_rate": 1.096903712136145e-05,
|
|
"loss": 0.3111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3275856673717499,
|
|
"step": 2530,
|
|
"valid_targets_mean": 2522.2,
|
|
"valid_targets_min": 1215
|
|
},
|
|
{
|
|
"epoch": 4.801136363636363,
|
|
"grad_norm": 0.7582104031562541,
|
|
"learning_rate": 1.0884860464138e-05,
|
|
"loss": 0.2998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2665335536003113,
|
|
"step": 2535,
|
|
"valid_targets_mean": 2449.8,
|
|
"valid_targets_min": 1067
|
|
},
|
|
{
|
|
"epoch": 4.8106060606060606,
|
|
"grad_norm": 0.9476751165415227,
|
|
"learning_rate": 1.0800887116464196e-05,
|
|
"loss": 0.3383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2955111861228943,
|
|
"step": 2540,
|
|
"valid_targets_mean": 2734.5,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 4.820075757575758,
|
|
"grad_norm": 0.7967391216454419,
|
|
"learning_rate": 1.0717118951332033e-05,
|
|
"loss": 0.2926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2611914873123169,
|
|
"step": 2545,
|
|
"valid_targets_mean": 2111.2,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 4.829545454545455,
|
|
"grad_norm": 0.6743043157021413,
|
|
"learning_rate": 1.0633557837157024e-05,
|
|
"loss": 0.3218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4029915928840637,
|
|
"step": 2550,
|
|
"valid_targets_mean": 3624.1,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 4.839015151515151,
|
|
"grad_norm": 0.6986665144853835,
|
|
"learning_rate": 1.0550205637736464e-05,
|
|
"loss": 0.3262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23510821163654327,
|
|
"step": 2555,
|
|
"valid_targets_mean": 2262.1,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 4.848484848484849,
|
|
"grad_norm": 0.6652637297286845,
|
|
"learning_rate": 1.046706421220789e-05,
|
|
"loss": 0.304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3540082573890686,
|
|
"step": 2560,
|
|
"valid_targets_mean": 3212.6,
|
|
"valid_targets_min": 984
|
|
},
|
|
{
|
|
"epoch": 4.857954545454545,
|
|
"grad_norm": 0.7505479781661824,
|
|
"learning_rate": 1.0384135415007628e-05,
|
|
"loss": 0.3058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.312686026096344,
|
|
"step": 2565,
|
|
"valid_targets_mean": 2897.5,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 4.867424242424242,
|
|
"grad_norm": 0.7736374106831825,
|
|
"learning_rate": 1.0301421095829394e-05,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31730690598487854,
|
|
"step": 2570,
|
|
"valid_targets_mean": 2555.4,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 4.8768939393939394,
|
|
"grad_norm": 0.7614137185318047,
|
|
"learning_rate": 1.0218923099583083e-05,
|
|
"loss": 0.2952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35499870777130127,
|
|
"step": 2575,
|
|
"valid_targets_mean": 2622.4,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 4.886363636363637,
|
|
"grad_norm": 0.7962115398070491,
|
|
"learning_rate": 1.0136643266353572e-05,
|
|
"loss": 0.2974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.288144588470459,
|
|
"step": 2580,
|
|
"valid_targets_mean": 2607.7,
|
|
"valid_targets_min": 1069
|
|
},
|
|
{
|
|
"epoch": 4.895833333333333,
|
|
"grad_norm": 0.7098995492950018,
|
|
"learning_rate": 1.0054583431359687e-05,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35193300247192383,
|
|
"step": 2585,
|
|
"valid_targets_mean": 3151.3,
|
|
"valid_targets_min": 1202
|
|
},
|
|
{
|
|
"epoch": 4.90530303030303,
|
|
"grad_norm": 0.7160877083531707,
|
|
"learning_rate": 9.97274542491332e-06,
|
|
"loss": 0.2978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2639806866645813,
|
|
"step": 2590,
|
|
"valid_targets_mean": 2884.3,
|
|
"valid_targets_min": 1046
|
|
},
|
|
{
|
|
"epoch": 4.9147727272727275,
|
|
"grad_norm": 0.6727526489944198,
|
|
"learning_rate": 9.891131072378532e-06,
|
|
"loss": 0.296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3169069290161133,
|
|
"step": 2595,
|
|
"valid_targets_mean": 3093.4,
|
|
"valid_targets_min": 1366
|
|
},
|
|
{
|
|
"epoch": 4.924242424242424,
|
|
"grad_norm": 0.6564194597038467,
|
|
"learning_rate": 9.809742194130895e-06,
|
|
"loss": 0.2816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2665601372718811,
|
|
"step": 2600,
|
|
"valid_targets_mean": 2767.0,
|
|
"valid_targets_min": 974
|
|
},
|
|
{
|
|
"epoch": 4.933712121212121,
|
|
"grad_norm": 0.7911065049510495,
|
|
"learning_rate": 9.728580605516855e-06,
|
|
"loss": 0.3306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35964858531951904,
|
|
"step": 2605,
|
|
"valid_targets_mean": 2538.2,
|
|
"valid_targets_min": 817
|
|
},
|
|
{
|
|
"epoch": 4.943181818181818,
|
|
"grad_norm": 0.7226953931442387,
|
|
"learning_rate": 9.647648116813247e-06,
|
|
"loss": 0.3203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34944793581962585,
|
|
"step": 2610,
|
|
"valid_targets_mean": 2611.1,
|
|
"valid_targets_min": 939
|
|
},
|
|
{
|
|
"epoch": 4.952651515151516,
|
|
"grad_norm": 0.5600296733113928,
|
|
"learning_rate": 9.566946533186943e-06,
|
|
"loss": 0.3293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2790874242782593,
|
|
"step": 2615,
|
|
"valid_targets_mean": 4108.8,
|
|
"valid_targets_min": 947
|
|
},
|
|
{
|
|
"epoch": 4.962121212121212,
|
|
"grad_norm": 0.6602915325157084,
|
|
"learning_rate": 9.486477654654558e-06,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33438998460769653,
|
|
"step": 2620,
|
|
"valid_targets_mean": 3208.9,
|
|
"valid_targets_min": 1462
|
|
},
|
|
{
|
|
"epoch": 4.971590909090909,
|
|
"grad_norm": 0.7562474128145615,
|
|
"learning_rate": 9.406243276042304e-06,
|
|
"loss": 0.307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27687782049179077,
|
|
"step": 2625,
|
|
"valid_targets_mean": 2561.2,
|
|
"valid_targets_min": 725
|
|
},
|
|
{
|
|
"epoch": 4.981060606060606,
|
|
"grad_norm": 0.6642206841188554,
|
|
"learning_rate": 9.326245186945991e-06,
|
|
"loss": 0.2971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3291071057319641,
|
|
"step": 2630,
|
|
"valid_targets_mean": 3593.9,
|
|
"valid_targets_min": 1329
|
|
},
|
|
{
|
|
"epoch": 4.990530303030303,
|
|
"grad_norm": 0.7286182366255992,
|
|
"learning_rate": 9.24648517169106e-06,
|
|
"loss": 0.3303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35772228240966797,
|
|
"step": 2635,
|
|
"valid_targets_mean": 2769.4,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.7801934714572549,
|
|
"learning_rate": 9.166965009292824e-06,
|
|
"loss": 0.3045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2234649658203125,
|
|
"step": 2640,
|
|
"valid_targets_mean": 2155.2,
|
|
"valid_targets_min": 1013
|
|
},
|
|
{
|
|
"epoch": 5.009469696969697,
|
|
"grad_norm": 0.7083942979558027,
|
|
"learning_rate": 9.087686473416768e-06,
|
|
"loss": 0.3008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2596581280231476,
|
|
"step": 2645,
|
|
"valid_targets_mean": 2758.2,
|
|
"valid_targets_min": 1044
|
|
},
|
|
{
|
|
"epoch": 5.018939393939394,
|
|
"grad_norm": 0.7767688674613339,
|
|
"learning_rate": 9.008651332338991e-06,
|
|
"loss": 0.2805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2545972466468811,
|
|
"step": 2650,
|
|
"valid_targets_mean": 2482.2,
|
|
"valid_targets_min": 1289
|
|
},
|
|
{
|
|
"epoch": 5.028409090909091,
|
|
"grad_norm": 0.7176517582993568,
|
|
"learning_rate": 8.929861348906785e-06,
|
|
"loss": 0.3144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28357428312301636,
|
|
"step": 2655,
|
|
"valid_targets_mean": 2898.4,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 5.037878787878788,
|
|
"grad_norm": 0.6775385984477393,
|
|
"learning_rate": 8.85131828049928e-06,
|
|
"loss": 0.2905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2596086263656616,
|
|
"step": 2660,
|
|
"valid_targets_mean": 3206.0,
|
|
"valid_targets_min": 972
|
|
},
|
|
{
|
|
"epoch": 5.047348484848484,
|
|
"grad_norm": 1.0216470334763648,
|
|
"learning_rate": 8.773023878988266e-06,
|
|
"loss": 0.261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28679585456848145,
|
|
"step": 2665,
|
|
"valid_targets_mean": 2501.6,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 5.056818181818182,
|
|
"grad_norm": 0.73432402688444,
|
|
"learning_rate": 8.694979890699135e-06,
|
|
"loss": 0.2726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24043041467666626,
|
|
"step": 2670,
|
|
"valid_targets_mean": 2691.1,
|
|
"valid_targets_min": 858
|
|
},
|
|
{
|
|
"epoch": 5.066287878787879,
|
|
"grad_norm": 0.7061368947533598,
|
|
"learning_rate": 8.617188056371887e-06,
|
|
"loss": 0.3404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3837023079395294,
|
|
"step": 2675,
|
|
"valid_targets_mean": 3851.8,
|
|
"valid_targets_min": 927
|
|
},
|
|
{
|
|
"epoch": 5.075757575757576,
|
|
"grad_norm": 0.791123170328224,
|
|
"learning_rate": 8.539650111122358e-06,
|
|
"loss": 0.3128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30000433325767517,
|
|
"step": 2680,
|
|
"valid_targets_mean": 2854.5,
|
|
"valid_targets_min": 1043
|
|
},
|
|
{
|
|
"epoch": 5.0852272727272725,
|
|
"grad_norm": 0.8453799227777693,
|
|
"learning_rate": 8.462367784403458e-06,
|
|
"loss": 0.2983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30926838517189026,
|
|
"step": 2685,
|
|
"valid_targets_mean": 2278.4,
|
|
"valid_targets_min": 1008
|
|
},
|
|
{
|
|
"epoch": 5.09469696969697,
|
|
"grad_norm": 0.7356122925413243,
|
|
"learning_rate": 8.385342799966647e-06,
|
|
"loss": 0.2933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33058780431747437,
|
|
"step": 2690,
|
|
"valid_targets_mean": 2904.0,
|
|
"valid_targets_min": 802
|
|
},
|
|
{
|
|
"epoch": 5.104166666666667,
|
|
"grad_norm": 0.7830348504519481,
|
|
"learning_rate": 8.308576875823465e-06,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3316836357116699,
|
|
"step": 2695,
|
|
"valid_targets_mean": 2885.4,
|
|
"valid_targets_min": 1054
|
|
},
|
|
{
|
|
"epoch": 5.113636363636363,
|
|
"grad_norm": 0.7722388556744241,
|
|
"learning_rate": 8.232071724207204e-06,
|
|
"loss": 0.3126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20272141695022583,
|
|
"step": 2700,
|
|
"valid_targets_mean": 2161.5,
|
|
"valid_targets_min": 1084
|
|
},
|
|
{
|
|
"epoch": 5.1231060606060606,
|
|
"grad_norm": 0.7607304385841452,
|
|
"learning_rate": 8.155829051534753e-06,
|
|
"loss": 0.3095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2507064938545227,
|
|
"step": 2705,
|
|
"valid_targets_mean": 2862.9,
|
|
"valid_targets_min": 1190
|
|
},
|
|
{
|
|
"epoch": 5.132575757575758,
|
|
"grad_norm": 0.8380620256483563,
|
|
"learning_rate": 8.079850558368497e-06,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2672100067138672,
|
|
"step": 2710,
|
|
"valid_targets_mean": 2333.6,
|
|
"valid_targets_min": 936
|
|
},
|
|
{
|
|
"epoch": 5.142045454545454,
|
|
"grad_norm": 0.6698165205159401,
|
|
"learning_rate": 8.00413793937839e-06,
|
|
"loss": 0.2909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2373093068599701,
|
|
"step": 2715,
|
|
"valid_targets_mean": 2887.7,
|
|
"valid_targets_min": 1165
|
|
},
|
|
{
|
|
"epoch": 5.151515151515151,
|
|
"grad_norm": 0.6969335623594948,
|
|
"learning_rate": 7.928692883304198e-06,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33501362800598145,
|
|
"step": 2720,
|
|
"valid_targets_mean": 3215.7,
|
|
"valid_targets_min": 1017
|
|
},
|
|
{
|
|
"epoch": 5.160984848484849,
|
|
"grad_norm": 0.7255152466019107,
|
|
"learning_rate": 7.853517072917787e-06,
|
|
"loss": 0.3095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.294289231300354,
|
|
"step": 2725,
|
|
"valid_targets_mean": 2823.0,
|
|
"valid_targets_min": 925
|
|
},
|
|
{
|
|
"epoch": 5.170454545454546,
|
|
"grad_norm": 0.7420821654501136,
|
|
"learning_rate": 7.778612184985592e-06,
|
|
"loss": 0.311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3362032473087311,
|
|
"step": 2730,
|
|
"valid_targets_mean": 3110.0,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 5.179924242424242,
|
|
"grad_norm": 0.7666421724066657,
|
|
"learning_rate": 7.703979890231265e-06,
|
|
"loss": 0.2975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2424549013376236,
|
|
"step": 2735,
|
|
"valid_targets_mean": 2423.6,
|
|
"valid_targets_min": 1183
|
|
},
|
|
{
|
|
"epoch": 5.1893939393939394,
|
|
"grad_norm": 0.7402505850812773,
|
|
"learning_rate": 7.629621853298343e-06,
|
|
"loss": 0.2967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29788947105407715,
|
|
"step": 2740,
|
|
"valid_targets_mean": 2907.6,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 5.198863636363637,
|
|
"grad_norm": 0.7693004230650073,
|
|
"learning_rate": 7.555539732713171e-06,
|
|
"loss": 0.317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33685559034347534,
|
|
"step": 2745,
|
|
"valid_targets_mean": 2762.3,
|
|
"valid_targets_min": 865
|
|
},
|
|
{
|
|
"epoch": 5.208333333333333,
|
|
"grad_norm": 0.7306362656705607,
|
|
"learning_rate": 7.4817351808478774e-06,
|
|
"loss": 0.3342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3252193331718445,
|
|
"step": 2750,
|
|
"valid_targets_mean": 3485.9,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 5.21780303030303,
|
|
"grad_norm": 0.6758115635790173,
|
|
"learning_rate": 7.408209843883536e-06,
|
|
"loss": 0.2958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2783186733722687,
|
|
"step": 2755,
|
|
"valid_targets_mean": 3088.4,
|
|
"valid_targets_min": 1118
|
|
},
|
|
{
|
|
"epoch": 5.2272727272727275,
|
|
"grad_norm": 0.7549068862971525,
|
|
"learning_rate": 7.334965361773454e-06,
|
|
"loss": 0.257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28676024079322815,
|
|
"step": 2760,
|
|
"valid_targets_mean": 2423.0,
|
|
"valid_targets_min": 1000
|
|
},
|
|
{
|
|
"epoch": 5.236742424242424,
|
|
"grad_norm": 0.9145590519352917,
|
|
"learning_rate": 7.2620033682065715e-06,
|
|
"loss": 0.2964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31939688324928284,
|
|
"step": 2765,
|
|
"valid_targets_mean": 2705.9,
|
|
"valid_targets_min": 978
|
|
},
|
|
{
|
|
"epoch": 5.246212121212121,
|
|
"grad_norm": 0.7177870092389994,
|
|
"learning_rate": 7.189325490571026e-06,
|
|
"loss": 0.2903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27524375915527344,
|
|
"step": 2770,
|
|
"valid_targets_mean": 2718.9,
|
|
"valid_targets_min": 932
|
|
},
|
|
{
|
|
"epoch": 5.255681818181818,
|
|
"grad_norm": 0.7526060778732195,
|
|
"learning_rate": 7.116933349917894e-06,
|
|
"loss": 0.2692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23347587883472443,
|
|
"step": 2775,
|
|
"valid_targets_mean": 2077.6,
|
|
"valid_targets_min": 878
|
|
},
|
|
{
|
|
"epoch": 5.265151515151516,
|
|
"grad_norm": 0.815700348703319,
|
|
"learning_rate": 7.044828560924968e-06,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2701820433139801,
|
|
"step": 2780,
|
|
"valid_targets_mean": 2363.1,
|
|
"valid_targets_min": 1322
|
|
},
|
|
{
|
|
"epoch": 5.274621212121212,
|
|
"grad_norm": 0.7110701051326784,
|
|
"learning_rate": 6.973012731860787e-06,
|
|
"loss": 0.2861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2773430347442627,
|
|
"step": 2785,
|
|
"valid_targets_mean": 2846.2,
|
|
"valid_targets_min": 1156
|
|
},
|
|
{
|
|
"epoch": 5.284090909090909,
|
|
"grad_norm": 0.8039713338008241,
|
|
"learning_rate": 6.901487464548771e-06,
|
|
"loss": 0.2789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3055078387260437,
|
|
"step": 2790,
|
|
"valid_targets_mean": 2464.9,
|
|
"valid_targets_min": 992
|
|
},
|
|
{
|
|
"epoch": 5.293560606060606,
|
|
"grad_norm": 0.7110745680464083,
|
|
"learning_rate": 6.830254354331458e-06,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.307306170463562,
|
|
"step": 2795,
|
|
"valid_targets_mean": 3037.9,
|
|
"valid_targets_min": 1172
|
|
},
|
|
{
|
|
"epoch": 5.303030303030303,
|
|
"grad_norm": 0.760025008805222,
|
|
"learning_rate": 6.7593149900349395e-06,
|
|
"loss": 0.271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2079525887966156,
|
|
"step": 2800,
|
|
"valid_targets_mean": 2384.4,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 5.3125,
|
|
"grad_norm": 0.611168302910325,
|
|
"learning_rate": 6.6886709539334295e-06,
|
|
"loss": 0.3207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.339036762714386,
|
|
"step": 2805,
|
|
"valid_targets_mean": 4784.5,
|
|
"valid_targets_min": 1364
|
|
},
|
|
{
|
|
"epoch": 5.321969696969697,
|
|
"grad_norm": 0.6925466776043471,
|
|
"learning_rate": 6.61832382171395e-06,
|
|
"loss": 0.2738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3191547989845276,
|
|
"step": 2810,
|
|
"valid_targets_mean": 3162.1,
|
|
"valid_targets_min": 870
|
|
},
|
|
{
|
|
"epoch": 5.331439393939394,
|
|
"grad_norm": 0.7546521872992992,
|
|
"learning_rate": 6.548275162441229e-06,
|
|
"loss": 0.2959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3368365168571472,
|
|
"step": 2815,
|
|
"valid_targets_mean": 3084.2,
|
|
"valid_targets_min": 1084
|
|
},
|
|
{
|
|
"epoch": 5.340909090909091,
|
|
"grad_norm": 0.7267388655524162,
|
|
"learning_rate": 6.478526538522638e-06,
|
|
"loss": 0.2992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3870479464530945,
|
|
"step": 2820,
|
|
"valid_targets_mean": 3415.4,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 5.350378787878788,
|
|
"grad_norm": 0.7418622625286694,
|
|
"learning_rate": 6.409079505673412e-06,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27272170782089233,
|
|
"step": 2825,
|
|
"valid_targets_mean": 2799.1,
|
|
"valid_targets_min": 1072
|
|
},
|
|
{
|
|
"epoch": 5.359848484848484,
|
|
"grad_norm": 0.6013512100284714,
|
|
"learning_rate": 6.3399356128819e-06,
|
|
"loss": 0.297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2982272803783417,
|
|
"step": 2830,
|
|
"valid_targets_mean": 4025.1,
|
|
"valid_targets_min": 896
|
|
},
|
|
{
|
|
"epoch": 5.369318181818182,
|
|
"grad_norm": 0.8774372528887067,
|
|
"learning_rate": 6.2710964023750275e-06,
|
|
"loss": 0.297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2724454998970032,
|
|
"step": 2835,
|
|
"valid_targets_mean": 2402.1,
|
|
"valid_targets_min": 953
|
|
},
|
|
{
|
|
"epoch": 5.378787878787879,
|
|
"grad_norm": 0.8922146667608377,
|
|
"learning_rate": 6.202563409583926e-06,
|
|
"loss": 0.2978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2680201828479767,
|
|
"step": 2840,
|
|
"valid_targets_mean": 2314.2,
|
|
"valid_targets_min": 1302
|
|
},
|
|
{
|
|
"epoch": 5.388257575757576,
|
|
"grad_norm": 0.7936813128906817,
|
|
"learning_rate": 6.134338163109641e-06,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24976639449596405,
|
|
"step": 2845,
|
|
"valid_targets_mean": 2629.8,
|
|
"valid_targets_min": 974
|
|
},
|
|
{
|
|
"epoch": 5.3977272727272725,
|
|
"grad_norm": 0.7722211045202745,
|
|
"learning_rate": 6.0664221846890715e-06,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2600027620792389,
|
|
"step": 2850,
|
|
"valid_targets_mean": 2448.4,
|
|
"valid_targets_min": 1341
|
|
},
|
|
{
|
|
"epoch": 5.40719696969697,
|
|
"grad_norm": 0.6626525148412977,
|
|
"learning_rate": 5.99881698916101e-06,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3152995705604553,
|
|
"step": 2855,
|
|
"valid_targets_mean": 3362.2,
|
|
"valid_targets_min": 1118
|
|
},
|
|
{
|
|
"epoch": 5.416666666666667,
|
|
"grad_norm": 0.7138356753107448,
|
|
"learning_rate": 5.931524084432354e-06,
|
|
"loss": 0.2689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2947576940059662,
|
|
"step": 2860,
|
|
"valid_targets_mean": 3078.3,
|
|
"valid_targets_min": 1290
|
|
},
|
|
{
|
|
"epoch": 5.426136363636363,
|
|
"grad_norm": 0.7297440729546754,
|
|
"learning_rate": 5.8645449714445035e-06,
|
|
"loss": 0.3052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32731929421424866,
|
|
"step": 2865,
|
|
"valid_targets_mean": 3634.0,
|
|
"valid_targets_min": 1369
|
|
},
|
|
{
|
|
"epoch": 5.4356060606060606,
|
|
"grad_norm": 1.5154764983259665,
|
|
"learning_rate": 5.797881144139836e-06,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21219462156295776,
|
|
"step": 2870,
|
|
"valid_targets_mean": 2467.6,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 5.445075757575758,
|
|
"grad_norm": 0.7150172129889892,
|
|
"learning_rate": 5.73153408942841e-06,
|
|
"loss": 0.3097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30269789695739746,
|
|
"step": 2875,
|
|
"valid_targets_mean": 2957.1,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 5.454545454545454,
|
|
"grad_norm": 0.7941165548252831,
|
|
"learning_rate": 5.6655052871548135e-06,
|
|
"loss": 0.3102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27892398834228516,
|
|
"step": 2880,
|
|
"valid_targets_mean": 2810.4,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 5.464015151515151,
|
|
"grad_norm": 0.7786463544500783,
|
|
"learning_rate": 5.599796210065125e-06,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3003968596458435,
|
|
"step": 2885,
|
|
"valid_targets_mean": 2746.1,
|
|
"valid_targets_min": 1074
|
|
},
|
|
{
|
|
"epoch": 5.473484848484849,
|
|
"grad_norm": 0.6632545954521551,
|
|
"learning_rate": 5.5344083237740786e-06,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29318347573280334,
|
|
"step": 2890,
|
|
"valid_targets_mean": 3516.5,
|
|
"valid_targets_min": 1191
|
|
},
|
|
{
|
|
"epoch": 5.482954545454546,
|
|
"grad_norm": 0.7938266524744773,
|
|
"learning_rate": 5.469343086732397e-06,
|
|
"loss": 0.27,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1780526041984558,
|
|
"step": 2895,
|
|
"valid_targets_mean": 1936.8,
|
|
"valid_targets_min": 1082
|
|
},
|
|
{
|
|
"epoch": 5.492424242424242,
|
|
"grad_norm": 0.8293280652779262,
|
|
"learning_rate": 5.4046019501942174e-06,
|
|
"loss": 0.2803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22576263546943665,
|
|
"step": 2900,
|
|
"valid_targets_mean": 2297.4,
|
|
"valid_targets_min": 1050
|
|
},
|
|
{
|
|
"epoch": 5.5018939393939394,
|
|
"grad_norm": 0.7676765086689091,
|
|
"learning_rate": 5.340186358184753e-06,
|
|
"loss": 0.2929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28281110525131226,
|
|
"step": 2905,
|
|
"valid_targets_mean": 2708.7,
|
|
"valid_targets_min": 1098
|
|
},
|
|
{
|
|
"epoch": 5.511363636363637,
|
|
"grad_norm": 0.7742612638975616,
|
|
"learning_rate": 5.276097747468081e-06,
|
|
"loss": 0.2807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3895435631275177,
|
|
"step": 2910,
|
|
"valid_targets_mean": 2968.8,
|
|
"valid_targets_min": 930
|
|
},
|
|
{
|
|
"epoch": 5.520833333333333,
|
|
"grad_norm": 0.749919227534722,
|
|
"learning_rate": 5.2123375475150765e-06,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27819758653640747,
|
|
"step": 2915,
|
|
"valid_targets_mean": 2985.9,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 5.53030303030303,
|
|
"grad_norm": 0.6637337707122435,
|
|
"learning_rate": 5.1489071804715654e-06,
|
|
"loss": 0.2904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24529731273651123,
|
|
"step": 2920,
|
|
"valid_targets_mean": 3282.1,
|
|
"valid_targets_min": 988
|
|
},
|
|
{
|
|
"epoch": 5.5397727272727275,
|
|
"grad_norm": 0.7601679853731937,
|
|
"learning_rate": 5.08580806112656e-06,
|
|
"loss": 0.2654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24980325996875763,
|
|
"step": 2925,
|
|
"valid_targets_mean": 2645.2,
|
|
"valid_targets_min": 921
|
|
},
|
|
{
|
|
"epoch": 5.549242424242424,
|
|
"grad_norm": 0.7771068913716181,
|
|
"learning_rate": 5.023041596880749e-06,
|
|
"loss": 0.289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2725909352302551,
|
|
"step": 2930,
|
|
"valid_targets_mean": 2598.4,
|
|
"valid_targets_min": 1096
|
|
},
|
|
{
|
|
"epoch": 5.558712121212121,
|
|
"grad_norm": 0.6158406550985045,
|
|
"learning_rate": 4.9606091877150576e-06,
|
|
"loss": 0.2744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29989326000213623,
|
|
"step": 2935,
|
|
"valid_targets_mean": 3913.4,
|
|
"valid_targets_min": 1562
|
|
},
|
|
{
|
|
"epoch": 5.568181818181818,
|
|
"grad_norm": 0.7659367143592785,
|
|
"learning_rate": 4.8985122261594555e-06,
|
|
"loss": 0.302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3836999237537384,
|
|
"step": 2940,
|
|
"valid_targets_mean": 3226.8,
|
|
"valid_targets_min": 1319
|
|
},
|
|
{
|
|
"epoch": 5.577651515151516,
|
|
"grad_norm": 0.7223365589150194,
|
|
"learning_rate": 4.8367520972618986e-06,
|
|
"loss": 0.2863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26267319917678833,
|
|
"step": 2945,
|
|
"valid_targets_mean": 3051.4,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 5.587121212121212,
|
|
"grad_norm": 0.6653056095250088,
|
|
"learning_rate": 4.775330178557409e-06,
|
|
"loss": 0.2816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2585207223892212,
|
|
"step": 2950,
|
|
"valid_targets_mean": 3773.1,
|
|
"valid_targets_min": 1566
|
|
},
|
|
{
|
|
"epoch": 5.596590909090909,
|
|
"grad_norm": 0.7999456053651244,
|
|
"learning_rate": 4.714247840037369e-06,
|
|
"loss": 0.2578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21376433968544006,
|
|
"step": 2955,
|
|
"valid_targets_mean": 2142.6,
|
|
"valid_targets_min": 956
|
|
},
|
|
{
|
|
"epoch": 5.606060606060606,
|
|
"grad_norm": 0.7557958999571945,
|
|
"learning_rate": 4.653506444118963e-06,
|
|
"loss": 0.2346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2141074538230896,
|
|
"step": 2960,
|
|
"valid_targets_mean": 2743.5,
|
|
"valid_targets_min": 993
|
|
},
|
|
{
|
|
"epoch": 5.615530303030303,
|
|
"grad_norm": 0.7943164926373172,
|
|
"learning_rate": 4.593107345614782e-06,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30793049931526184,
|
|
"step": 2965,
|
|
"valid_targets_mean": 2605.7,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 5.625,
|
|
"grad_norm": 0.715812919052811,
|
|
"learning_rate": 4.533051891702622e-06,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3051164746284485,
|
|
"step": 2970,
|
|
"valid_targets_mean": 3072.2,
|
|
"valid_targets_min": 1626
|
|
},
|
|
{
|
|
"epoch": 5.634469696969697,
|
|
"grad_norm": 0.8167010447683465,
|
|
"learning_rate": 4.473341421895414e-06,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24410909414291382,
|
|
"step": 2975,
|
|
"valid_targets_mean": 2587.8,
|
|
"valid_targets_min": 1126
|
|
},
|
|
{
|
|
"epoch": 5.643939393939394,
|
|
"grad_norm": 0.7401297578509658,
|
|
"learning_rate": 4.413977268011349e-06,
|
|
"loss": 0.2682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22754594683647156,
|
|
"step": 2980,
|
|
"valid_targets_mean": 2472.9,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 5.653409090909091,
|
|
"grad_norm": 0.7369925553462479,
|
|
"learning_rate": 4.354960754144199e-06,
|
|
"loss": 0.2931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21642503142356873,
|
|
"step": 2985,
|
|
"valid_targets_mean": 2370.9,
|
|
"valid_targets_min": 990
|
|
},
|
|
{
|
|
"epoch": 5.662878787878788,
|
|
"grad_norm": 0.8509691164458568,
|
|
"learning_rate": 4.296293196633752e-06,
|
|
"loss": 0.2857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22029736638069153,
|
|
"step": 2990,
|
|
"valid_targets_mean": 2306.0,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 5.672348484848484,
|
|
"grad_norm": 0.7199406270951106,
|
|
"learning_rate": 4.2379759040364535e-06,
|
|
"loss": 0.2887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29955077171325684,
|
|
"step": 2995,
|
|
"valid_targets_mean": 3446.6,
|
|
"valid_targets_min": 1485
|
|
},
|
|
{
|
|
"epoch": 5.681818181818182,
|
|
"grad_norm": 0.8222362550057568,
|
|
"learning_rate": 4.180010177096257e-06,
|
|
"loss": 0.312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26154011487960815,
|
|
"step": 3000,
|
|
"valid_targets_mean": 2739.9,
|
|
"valid_targets_min": 951
|
|
},
|
|
{
|
|
"epoch": 5.691287878787879,
|
|
"grad_norm": 0.8045829321721933,
|
|
"learning_rate": 4.12239730871556e-06,
|
|
"loss": 0.2965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29042568802833557,
|
|
"step": 3005,
|
|
"valid_targets_mean": 2475.9,
|
|
"valid_targets_min": 805
|
|
},
|
|
{
|
|
"epoch": 5.700757575757576,
|
|
"grad_norm": 0.6954151629104889,
|
|
"learning_rate": 4.0651385839264e-06,
|
|
"loss": 0.2753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2484559416770935,
|
|
"step": 3010,
|
|
"valid_targets_mean": 2831.0,
|
|
"valid_targets_min": 1083
|
|
},
|
|
{
|
|
"epoch": 5.7102272727272725,
|
|
"grad_norm": 0.756263923248415,
|
|
"learning_rate": 4.0082352798617855e-06,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2594577670097351,
|
|
"step": 3015,
|
|
"valid_targets_mean": 2763.2,
|
|
"valid_targets_min": 1034
|
|
},
|
|
{
|
|
"epoch": 5.71969696969697,
|
|
"grad_norm": 0.7254942584150078,
|
|
"learning_rate": 3.951688665727196e-06,
|
|
"loss": 0.3002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27426761388778687,
|
|
"step": 3020,
|
|
"valid_targets_mean": 2985.6,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 5.729166666666667,
|
|
"grad_norm": 0.6543895699688739,
|
|
"learning_rate": 3.8955000027723035e-06,
|
|
"loss": 0.2473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24140413105487823,
|
|
"step": 3025,
|
|
"valid_targets_mean": 4014.0,
|
|
"valid_targets_min": 1184
|
|
},
|
|
{
|
|
"epoch": 5.738636363636363,
|
|
"grad_norm": 0.8465428355530478,
|
|
"learning_rate": 3.839670544262808e-06,
|
|
"loss": 0.2487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29114481806755066,
|
|
"step": 3030,
|
|
"valid_targets_mean": 2398.8,
|
|
"valid_targets_min": 878
|
|
},
|
|
{
|
|
"epoch": 5.7481060606060606,
|
|
"grad_norm": 0.9222692954555853,
|
|
"learning_rate": 3.784201535452494e-06,
|
|
"loss": 0.3168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2932199537754059,
|
|
"step": 3035,
|
|
"valid_targets_mean": 2343.3,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 5.757575757575758,
|
|
"grad_norm": 0.7642097644756622,
|
|
"learning_rate": 3.7290942135554754e-06,
|
|
"loss": 0.3089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28685760498046875,
|
|
"step": 3040,
|
|
"valid_targets_mean": 2953.6,
|
|
"valid_targets_min": 1013
|
|
},
|
|
{
|
|
"epoch": 5.767045454545455,
|
|
"grad_norm": 0.9186306460957517,
|
|
"learning_rate": 3.674349807718569e-06,
|
|
"loss": 0.3088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3445358872413635,
|
|
"step": 3045,
|
|
"valid_targets_mean": 2373.2,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 5.776515151515151,
|
|
"grad_norm": 0.6352048760849357,
|
|
"learning_rate": 3.619969538993906e-06,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32018163800239563,
|
|
"step": 3050,
|
|
"valid_targets_mean": 4040.6,
|
|
"valid_targets_min": 969
|
|
},
|
|
{
|
|
"epoch": 5.785984848484849,
|
|
"grad_norm": 0.7657074271007112,
|
|
"learning_rate": 3.5659546203116734e-06,
|
|
"loss": 0.2762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2790144979953766,
|
|
"step": 3055,
|
|
"valid_targets_mean": 2682.8,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 5.795454545454545,
|
|
"grad_norm": 0.7516076755451682,
|
|
"learning_rate": 3.5123062564530776e-06,
|
|
"loss": 0.3081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2899581491947174,
|
|
"step": 3060,
|
|
"valid_targets_mean": 2607.7,
|
|
"valid_targets_min": 1000
|
|
},
|
|
{
|
|
"epoch": 5.804924242424242,
|
|
"grad_norm": 1.8336911888488554,
|
|
"learning_rate": 3.4590256440234616e-06,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2522396445274353,
|
|
"step": 3065,
|
|
"valid_targets_mean": 2903.1,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 5.8143939393939394,
|
|
"grad_norm": 0.7394156252329193,
|
|
"learning_rate": 3.4061139714256174e-06,
|
|
"loss": 0.2962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2830778956413269,
|
|
"step": 3070,
|
|
"valid_targets_mean": 2794.9,
|
|
"valid_targets_min": 1103
|
|
},
|
|
{
|
|
"epoch": 5.823863636363637,
|
|
"grad_norm": 0.7450802012590318,
|
|
"learning_rate": 3.3535724188332996e-06,
|
|
"loss": 0.2902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2466995269060135,
|
|
"step": 3075,
|
|
"valid_targets_mean": 2779.8,
|
|
"valid_targets_min": 1105
|
|
},
|
|
{
|
|
"epoch": 5.833333333333333,
|
|
"grad_norm": 5.500189794855682,
|
|
"learning_rate": 3.3014021581648613e-06,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22964492440223694,
|
|
"step": 3080,
|
|
"valid_targets_mean": 2273.2,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 5.84280303030303,
|
|
"grad_norm": 0.6117756342323342,
|
|
"learning_rate": 3.2496043530571453e-06,
|
|
"loss": 0.3153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4313168525695801,
|
|
"step": 3085,
|
|
"valid_targets_mean": 4787.2,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 5.8522727272727275,
|
|
"grad_norm": 0.7933340733715843,
|
|
"learning_rate": 3.1981801588395278e-06,
|
|
"loss": 0.2914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26301246881484985,
|
|
"step": 3090,
|
|
"valid_targets_mean": 2519.8,
|
|
"valid_targets_min": 835
|
|
},
|
|
{
|
|
"epoch": 5.861742424242424,
|
|
"grad_norm": 0.8225111220307535,
|
|
"learning_rate": 3.147130722508138e-06,
|
|
"loss": 0.2758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27497878670692444,
|
|
"step": 3095,
|
|
"valid_targets_mean": 2429.1,
|
|
"valid_targets_min": 1419
|
|
},
|
|
{
|
|
"epoch": 5.871212121212121,
|
|
"grad_norm": 0.7949261960335404,
|
|
"learning_rate": 3.096457182700272e-06,
|
|
"loss": 0.2685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22902481257915497,
|
|
"step": 3100,
|
|
"valid_targets_mean": 2411.5,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 5.880681818181818,
|
|
"grad_norm": 0.6298428915519383,
|
|
"learning_rate": 3.046160669669025e-06,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2898612916469574,
|
|
"step": 3105,
|
|
"valid_targets_mean": 3990.2,
|
|
"valid_targets_min": 1189
|
|
},
|
|
{
|
|
"epoch": 5.890151515151516,
|
|
"grad_norm": 0.7699061640931419,
|
|
"learning_rate": 2.9962423052580412e-06,
|
|
"loss": 0.3122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35197505354881287,
|
|
"step": 3110,
|
|
"valid_targets_mean": 2985.8,
|
|
"valid_targets_min": 1050
|
|
},
|
|
{
|
|
"epoch": 5.899621212121212,
|
|
"grad_norm": 0.7263579062287591,
|
|
"learning_rate": 2.9467032028765174e-06,
|
|
"loss": 0.2965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24723392724990845,
|
|
"step": 3115,
|
|
"valid_targets_mean": 2905.2,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 5.909090909090909,
|
|
"grad_norm": 0.7704974888251244,
|
|
"learning_rate": 2.8975444674743624e-06,
|
|
"loss": 0.3025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3412296772003174,
|
|
"step": 3120,
|
|
"valid_targets_mean": 2864.7,
|
|
"valid_targets_min": 1074
|
|
},
|
|
{
|
|
"epoch": 5.918560606060606,
|
|
"grad_norm": 0.764940140069547,
|
|
"learning_rate": 2.8487671955175433e-06,
|
|
"loss": 0.2729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2403152734041214,
|
|
"step": 3125,
|
|
"valid_targets_mean": 3067.2,
|
|
"valid_targets_min": 902
|
|
},
|
|
{
|
|
"epoch": 5.928030303030303,
|
|
"grad_norm": 0.7858368917307781,
|
|
"learning_rate": 2.8003724749636594e-06,
|
|
"loss": 0.3258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3317389488220215,
|
|
"step": 3130,
|
|
"valid_targets_mean": 2932.5,
|
|
"valid_targets_min": 1393
|
|
},
|
|
{
|
|
"epoch": 5.9375,
|
|
"grad_norm": 0.8086274978598056,
|
|
"learning_rate": 2.7523613852376297e-06,
|
|
"loss": 0.3078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.276688814163208,
|
|
"step": 3135,
|
|
"valid_targets_mean": 2629.8,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 5.946969696969697,
|
|
"grad_norm": 0.7279635896503,
|
|
"learning_rate": 2.7047349972076475e-06,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2723991572856903,
|
|
"step": 3140,
|
|
"valid_targets_mean": 3051.4,
|
|
"valid_targets_min": 1783
|
|
},
|
|
{
|
|
"epoch": 5.956439393939394,
|
|
"grad_norm": 0.7575057627034761,
|
|
"learning_rate": 2.6574943731613024e-06,
|
|
"loss": 0.2915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31866124272346497,
|
|
"step": 3145,
|
|
"valid_targets_mean": 3174.2,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 5.965909090909091,
|
|
"grad_norm": 0.7235650487678464,
|
|
"learning_rate": 2.61064056678185e-06,
|
|
"loss": 0.3049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31120604276657104,
|
|
"step": 3150,
|
|
"valid_targets_mean": 2830.2,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 5.975378787878788,
|
|
"grad_norm": 0.869016910327608,
|
|
"learning_rate": 2.5641746231247446e-06,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2774472236633301,
|
|
"step": 3155,
|
|
"valid_targets_mean": 2412.9,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 5.984848484848484,
|
|
"grad_norm": 0.7409766932721407,
|
|
"learning_rate": 2.5180975785943228e-06,
|
|
"loss": 0.2792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23813384771347046,
|
|
"step": 3160,
|
|
"valid_targets_mean": 2655.9,
|
|
"valid_targets_min": 1250
|
|
},
|
|
{
|
|
"epoch": 5.994318181818182,
|
|
"grad_norm": 0.657191540656982,
|
|
"learning_rate": 2.472410460920669e-06,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27969256043434143,
|
|
"step": 3165,
|
|
"valid_targets_mean": 3425.1,
|
|
"valid_targets_min": 865
|
|
},
|
|
{
|
|
"epoch": 6.003787878787879,
|
|
"grad_norm": 0.7479523101463702,
|
|
"learning_rate": 2.4271142891367095e-06,
|
|
"loss": 0.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3117693066596985,
|
|
"step": 3170,
|
|
"valid_targets_mean": 2974.5,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 6.013257575757576,
|
|
"grad_norm": 0.6762554120389915,
|
|
"learning_rate": 2.3822100735554777e-06,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3028896749019623,
|
|
"step": 3175,
|
|
"valid_targets_mean": 3438.4,
|
|
"valid_targets_min": 1172
|
|
},
|
|
{
|
|
"epoch": 6.0227272727272725,
|
|
"grad_norm": 0.8634132247536119,
|
|
"learning_rate": 2.3376988157475867e-06,
|
|
"loss": 0.2969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2494552880525589,
|
|
"step": 3180,
|
|
"valid_targets_mean": 2726.6,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 6.03219696969697,
|
|
"grad_norm": 0.8956940662061619,
|
|
"learning_rate": 2.293581508518874e-06,
|
|
"loss": 0.3039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36183804273605347,
|
|
"step": 3185,
|
|
"valid_targets_mean": 3616.9,
|
|
"valid_targets_min": 1201
|
|
},
|
|
{
|
|
"epoch": 6.041666666666667,
|
|
"grad_norm": 0.6660076529609307,
|
|
"learning_rate": 2.249859135888268e-06,
|
|
"loss": 0.2821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29410141706466675,
|
|
"step": 3190,
|
|
"valid_targets_mean": 3657.4,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 6.051136363636363,
|
|
"grad_norm": 1.1202392722908618,
|
|
"learning_rate": 2.206532673065851e-06,
|
|
"loss": 0.2766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27225634455680847,
|
|
"step": 3195,
|
|
"valid_targets_mean": 2992.8,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 6.0606060606060606,
|
|
"grad_norm": 0.7389782449210996,
|
|
"learning_rate": 2.1636030864310787e-06,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32250869274139404,
|
|
"step": 3200,
|
|
"valid_targets_mean": 3299.3,
|
|
"valid_targets_min": 237
|
|
},
|
|
{
|
|
"epoch": 6.070075757575758,
|
|
"grad_norm": 0.877638420702606,
|
|
"learning_rate": 2.1210713335112466e-06,
|
|
"loss": 0.3091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29791060090065,
|
|
"step": 3205,
|
|
"valid_targets_mean": 2183.1,
|
|
"valid_targets_min": 1271
|
|
},
|
|
{
|
|
"epoch": 6.079545454545454,
|
|
"grad_norm": 0.722695517917214,
|
|
"learning_rate": 2.0789383629601366e-06,
|
|
"loss": 0.2806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24031683802604675,
|
|
"step": 3210,
|
|
"valid_targets_mean": 3369.0,
|
|
"valid_targets_min": 1054
|
|
},
|
|
{
|
|
"epoch": 6.089015151515151,
|
|
"grad_norm": 0.751418780093696,
|
|
"learning_rate": 2.0372051145368375e-06,
|
|
"loss": 0.2582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30524569749832153,
|
|
"step": 3215,
|
|
"valid_targets_mean": 2964.9,
|
|
"valid_targets_min": 653
|
|
},
|
|
{
|
|
"epoch": 6.098484848484849,
|
|
"grad_norm": 0.8827471342566028,
|
|
"learning_rate": 1.9958725190848005e-06,
|
|
"loss": 0.2599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2241586148738861,
|
|
"step": 3220,
|
|
"valid_targets_mean": 1919.0,
|
|
"valid_targets_min": 1113
|
|
},
|
|
{
|
|
"epoch": 6.107954545454546,
|
|
"grad_norm": 0.7941894209661737,
|
|
"learning_rate": 1.954941498511074e-06,
|
|
"loss": 0.2836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25327014923095703,
|
|
"step": 3225,
|
|
"valid_targets_mean": 2540.8,
|
|
"valid_targets_min": 984
|
|
},
|
|
{
|
|
"epoch": 6.117424242424242,
|
|
"grad_norm": 0.6704744689947337,
|
|
"learning_rate": 1.9144129657657328e-06,
|
|
"loss": 0.2913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26415207982063293,
|
|
"step": 3230,
|
|
"valid_targets_mean": 3642.2,
|
|
"valid_targets_min": 853
|
|
},
|
|
{
|
|
"epoch": 6.1268939393939394,
|
|
"grad_norm": 0.6875340543827657,
|
|
"learning_rate": 1.8742878248215301e-06,
|
|
"loss": 0.2779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31393834948539734,
|
|
"step": 3235,
|
|
"valid_targets_mean": 3674.5,
|
|
"valid_targets_min": 1131
|
|
},
|
|
{
|
|
"epoch": 6.136363636363637,
|
|
"grad_norm": 0.6961984313925638,
|
|
"learning_rate": 1.8345669706537216e-06,
|
|
"loss": 0.2712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2377914935350418,
|
|
"step": 3240,
|
|
"valid_targets_mean": 3273.1,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 6.145833333333333,
|
|
"grad_norm": 0.752801942546114,
|
|
"learning_rate": 1.7952512892201013e-06,
|
|
"loss": 0.2483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3270752429962158,
|
|
"step": 3245,
|
|
"valid_targets_mean": 3054.1,
|
|
"valid_targets_min": 1263
|
|
},
|
|
{
|
|
"epoch": 6.15530303030303,
|
|
"grad_norm": 0.7245352605782267,
|
|
"learning_rate": 1.756341657441263e-06,
|
|
"loss": 0.2461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30817776918411255,
|
|
"step": 3250,
|
|
"valid_targets_mean": 3405.8,
|
|
"valid_targets_min": 1403
|
|
},
|
|
{
|
|
"epoch": 6.1647727272727275,
|
|
"grad_norm": 0.7224024100693172,
|
|
"learning_rate": 1.7178389431810071e-06,
|
|
"loss": 0.2882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25151073932647705,
|
|
"step": 3255,
|
|
"valid_targets_mean": 3147.1,
|
|
"valid_targets_min": 984
|
|
},
|
|
{
|
|
"epoch": 6.174242424242424,
|
|
"grad_norm": 0.6747035669927908,
|
|
"learning_rate": 1.6797440052270108e-06,
|
|
"loss": 0.2603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28985482454299927,
|
|
"step": 3260,
|
|
"valid_targets_mean": 3696.5,
|
|
"valid_targets_min": 835
|
|
},
|
|
{
|
|
"epoch": 6.183712121212121,
|
|
"grad_norm": 0.7385214077879164,
|
|
"learning_rate": 1.6420576932716681e-06,
|
|
"loss": 0.302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3440060615539551,
|
|
"step": 3265,
|
|
"valid_targets_mean": 3041.2,
|
|
"valid_targets_min": 1061
|
|
},
|
|
{
|
|
"epoch": 6.193181818181818,
|
|
"grad_norm": 0.7150235194831117,
|
|
"learning_rate": 1.6047808478931214e-06,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3491504490375519,
|
|
"step": 3270,
|
|
"valid_targets_mean": 3922.9,
|
|
"valid_targets_min": 1304
|
|
},
|
|
{
|
|
"epoch": 6.202651515151516,
|
|
"grad_norm": 0.7148185138853995,
|
|
"learning_rate": 1.5679143005365304e-06,
|
|
"loss": 0.2664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23463097214698792,
|
|
"step": 3275,
|
|
"valid_targets_mean": 2997.9,
|
|
"valid_targets_min": 1159
|
|
},
|
|
{
|
|
"epoch": 6.212121212121212,
|
|
"grad_norm": 0.7584822418088031,
|
|
"learning_rate": 1.5314588734955239e-06,
|
|
"loss": 0.2675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25991326570510864,
|
|
"step": 3280,
|
|
"valid_targets_mean": 2790.1,
|
|
"valid_targets_min": 1077
|
|
},
|
|
{
|
|
"epoch": 6.221590909090909,
|
|
"grad_norm": 0.7246773357917251,
|
|
"learning_rate": 1.4954153798938475e-06,
|
|
"loss": 0.2677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3440665900707245,
|
|
"step": 3285,
|
|
"valid_targets_mean": 3625.9,
|
|
"valid_targets_min": 924
|
|
},
|
|
{
|
|
"epoch": 6.231060606060606,
|
|
"grad_norm": 0.6813553918188918,
|
|
"learning_rate": 1.4597846236672508e-06,
|
|
"loss": 0.2905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.307594358921051,
|
|
"step": 3290,
|
|
"valid_targets_mean": 3453.8,
|
|
"valid_targets_min": 857
|
|
},
|
|
{
|
|
"epoch": 6.240530303030303,
|
|
"grad_norm": 0.7430406773587825,
|
|
"learning_rate": 1.4245673995455224e-06,
|
|
"loss": 0.2808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28920233249664307,
|
|
"step": 3295,
|
|
"valid_targets_mean": 3146.6,
|
|
"valid_targets_min": 896
|
|
},
|
|
{
|
|
"epoch": 6.25,
|
|
"grad_norm": 0.8118746059621327,
|
|
"learning_rate": 1.3897644930347975e-06,
|
|
"loss": 0.2601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29953065514564514,
|
|
"step": 3300,
|
|
"valid_targets_mean": 2528.2,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 6.259469696969697,
|
|
"grad_norm": 0.7767349794082651,
|
|
"learning_rate": 1.355376680400018e-06,
|
|
"loss": 0.3083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2903704345226288,
|
|
"step": 3305,
|
|
"valid_targets_mean": 2741.1,
|
|
"valid_targets_min": 1024
|
|
},
|
|
{
|
|
"epoch": 6.268939393939394,
|
|
"grad_norm": 0.8100485028598501,
|
|
"learning_rate": 1.3214047286476128e-06,
|
|
"loss": 0.2858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2503022253513336,
|
|
"step": 3310,
|
|
"valid_targets_mean": 2558.8,
|
|
"valid_targets_min": 953
|
|
},
|
|
{
|
|
"epoch": 6.278409090909091,
|
|
"grad_norm": 0.8180253089153836,
|
|
"learning_rate": 1.287849395508418e-06,
|
|
"loss": 0.2864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2754809260368347,
|
|
"step": 3315,
|
|
"valid_targets_mean": 2728.2,
|
|
"valid_targets_min": 937
|
|
},
|
|
{
|
|
"epoch": 6.287878787878788,
|
|
"grad_norm": 0.7886385894959198,
|
|
"learning_rate": 1.254711429420743e-06,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18305569887161255,
|
|
"step": 3320,
|
|
"valid_targets_mean": 2196.8,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 6.297348484848484,
|
|
"grad_norm": 0.6908895114336885,
|
|
"learning_rate": 1.2219915695136985e-06,
|
|
"loss": 0.2837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4005507826805115,
|
|
"step": 3325,
|
|
"valid_targets_mean": 4153.5,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 6.306818181818182,
|
|
"grad_norm": 0.6381368324655696,
|
|
"learning_rate": 1.1896905455906983e-06,
|
|
"loss": 0.2993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3057023584842682,
|
|
"step": 3330,
|
|
"valid_targets_mean": 4255.1,
|
|
"valid_targets_min": 1254
|
|
},
|
|
{
|
|
"epoch": 6.316287878787879,
|
|
"grad_norm": 0.7400173530353669,
|
|
"learning_rate": 1.1578090781131901e-06,
|
|
"loss": 0.2547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2468041181564331,
|
|
"step": 3335,
|
|
"valid_targets_mean": 3048.8,
|
|
"valid_targets_min": 864
|
|
},
|
|
{
|
|
"epoch": 6.325757575757576,
|
|
"grad_norm": 0.7447567517143859,
|
|
"learning_rate": 1.126347878184586e-06,
|
|
"loss": 0.2664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2892162799835205,
|
|
"step": 3340,
|
|
"valid_targets_mean": 2988.6,
|
|
"valid_targets_min": 1113
|
|
},
|
|
{
|
|
"epoch": 6.3352272727272725,
|
|
"grad_norm": 0.7737485875104078,
|
|
"learning_rate": 1.095307647534396e-06,
|
|
"loss": 0.2372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2050369679927826,
|
|
"step": 3345,
|
|
"valid_targets_mean": 2266.9,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 6.34469696969697,
|
|
"grad_norm": 0.8746487530313464,
|
|
"learning_rate": 1.0646890785025698e-06,
|
|
"loss": 0.3096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32405412197113037,
|
|
"step": 3350,
|
|
"valid_targets_mean": 3170.2,
|
|
"valid_targets_min": 1169
|
|
},
|
|
{
|
|
"epoch": 6.354166666666667,
|
|
"grad_norm": 1.5525426263312605,
|
|
"learning_rate": 1.0344928540240783e-06,
|
|
"loss": 0.3027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31522366404533386,
|
|
"step": 3355,
|
|
"valid_targets_mean": 3134.3,
|
|
"valid_targets_min": 916
|
|
},
|
|
{
|
|
"epoch": 6.363636363636363,
|
|
"grad_norm": 0.7383031935783807,
|
|
"learning_rate": 1.0047196476136545e-06,
|
|
"loss": 0.2572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25389596819877625,
|
|
"step": 3360,
|
|
"valid_targets_mean": 2945.7,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 6.3731060606060606,
|
|
"grad_norm": 1.130831864750787,
|
|
"learning_rate": 9.753701233507828e-07,
|
|
"loss": 0.2739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3073417544364929,
|
|
"step": 3365,
|
|
"valid_targets_mean": 2345.3,
|
|
"valid_targets_min": 1178
|
|
},
|
|
{
|
|
"epoch": 6.382575757575758,
|
|
"grad_norm": 0.8290258127453207,
|
|
"learning_rate": 9.464449358648941e-07,
|
|
"loss": 0.2966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33612531423568726,
|
|
"step": 3370,
|
|
"valid_targets_mean": 2850.6,
|
|
"valid_targets_min": 1265
|
|
},
|
|
{
|
|
"epoch": 6.392045454545454,
|
|
"grad_norm": 0.7658152162098584,
|
|
"learning_rate": 9.179447303207479e-07,
|
|
"loss": 0.2639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2722936272621155,
|
|
"step": 3375,
|
|
"valid_targets_mean": 2828.2,
|
|
"valid_targets_min": 870
|
|
},
|
|
{
|
|
"epoch": 6.401515151515151,
|
|
"grad_norm": 0.7997219500390282,
|
|
"learning_rate": 8.898701424040568e-07,
|
|
"loss": 0.2459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24936950206756592,
|
|
"step": 3380,
|
|
"valid_targets_mean": 2481.3,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 6.410984848484849,
|
|
"grad_norm": 0.6901370528088343,
|
|
"learning_rate": 8.62221798307299e-07,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2845917344093323,
|
|
"step": 3385,
|
|
"valid_targets_mean": 3576.8,
|
|
"valid_targets_min": 1053
|
|
},
|
|
{
|
|
"epoch": 6.420454545454546,
|
|
"grad_norm": 0.8550110335206853,
|
|
"learning_rate": 8.350003147157548e-07,
|
|
"loss": 0.278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2593115270137787,
|
|
"step": 3390,
|
|
"valid_targets_mean": 2455.6,
|
|
"valid_targets_min": 1224
|
|
},
|
|
{
|
|
"epoch": 6.429924242424242,
|
|
"grad_norm": 0.7366213752937361,
|
|
"learning_rate": 8.082062987937545e-07,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25383639335632324,
|
|
"step": 3395,
|
|
"valid_targets_mean": 2894.2,
|
|
"valid_targets_min": 915
|
|
},
|
|
{
|
|
"epoch": 6.4393939393939394,
|
|
"grad_norm": 0.830382401957924,
|
|
"learning_rate": 7.818403481711301e-07,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2541577219963074,
|
|
"step": 3400,
|
|
"valid_targets_mean": 2390.8,
|
|
"valid_targets_min": 844
|
|
},
|
|
{
|
|
"epoch": 6.448863636363637,
|
|
"grad_norm": 0.7603062182218498,
|
|
"learning_rate": 7.559030509298826e-07,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21562251448631287,
|
|
"step": 3405,
|
|
"valid_targets_mean": 2328.9,
|
|
"valid_targets_min": 1100
|
|
},
|
|
{
|
|
"epoch": 6.458333333333333,
|
|
"grad_norm": 0.8694550777653652,
|
|
"learning_rate": 7.30394985591083e-07,
|
|
"loss": 0.2861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26847851276397705,
|
|
"step": 3410,
|
|
"valid_targets_mean": 2709.1,
|
|
"valid_targets_min": 1274
|
|
},
|
|
{
|
|
"epoch": 6.46780303030303,
|
|
"grad_norm": 0.7007261169177813,
|
|
"learning_rate": 7.053167211019451e-07,
|
|
"loss": 0.3024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32844868302345276,
|
|
"step": 3415,
|
|
"valid_targets_mean": 3693.6,
|
|
"valid_targets_min": 1017
|
|
},
|
|
{
|
|
"epoch": 6.4772727272727275,
|
|
"grad_norm": 0.7901650590309718,
|
|
"learning_rate": 6.806688168231512e-07,
|
|
"loss": 0.2435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2393220216035843,
|
|
"step": 3420,
|
|
"valid_targets_mean": 2554.8,
|
|
"valid_targets_min": 1169
|
|
},
|
|
{
|
|
"epoch": 6.486742424242424,
|
|
"grad_norm": 0.7593137030290243,
|
|
"learning_rate": 6.564518225163708e-07,
|
|
"loss": 0.2898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3162652254104614,
|
|
"step": 3425,
|
|
"valid_targets_mean": 3039.4,
|
|
"valid_targets_min": 1125
|
|
},
|
|
{
|
|
"epoch": 6.496212121212121,
|
|
"grad_norm": 0.7796125423475732,
|
|
"learning_rate": 6.326662783319948e-07,
|
|
"loss": 0.2511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19908849895000458,
|
|
"step": 3430,
|
|
"valid_targets_mean": 2406.8,
|
|
"valid_targets_min": 1148
|
|
},
|
|
{
|
|
"epoch": 6.505681818181818,
|
|
"grad_norm": 0.829505379995145,
|
|
"learning_rate": 6.093127147970945e-07,
|
|
"loss": 0.2622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23179075121879578,
|
|
"step": 3435,
|
|
"valid_targets_mean": 2482.6,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 6.515151515151516,
|
|
"grad_norm": 0.8166919226758544,
|
|
"learning_rate": 5.86391652803584e-07,
|
|
"loss": 0.3232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3228103816509247,
|
|
"step": 3440,
|
|
"valid_targets_mean": 2834.8,
|
|
"valid_targets_min": 888
|
|
},
|
|
{
|
|
"epoch": 6.524621212121212,
|
|
"grad_norm": 0.8030939168818446,
|
|
"learning_rate": 5.63903603596605e-07,
|
|
"loss": 0.2534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2538023591041565,
|
|
"step": 3445,
|
|
"valid_targets_mean": 3067.4,
|
|
"valid_targets_min": 992
|
|
},
|
|
{
|
|
"epoch": 6.534090909090909,
|
|
"grad_norm": 0.7435537262486897,
|
|
"learning_rate": 5.418490687631206e-07,
|
|
"loss": 0.3174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3305688500404358,
|
|
"step": 3450,
|
|
"valid_targets_mean": 3357.3,
|
|
"valid_targets_min": 954
|
|
},
|
|
{
|
|
"epoch": 6.543560606060606,
|
|
"grad_norm": 0.7342979071967464,
|
|
"learning_rate": 5.202285402207263e-07,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.299335777759552,
|
|
"step": 3455,
|
|
"valid_targets_mean": 2833.7,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 6.553030303030303,
|
|
"grad_norm": 0.7908516567890211,
|
|
"learning_rate": 4.990425002066857e-07,
|
|
"loss": 0.2761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23545055091381073,
|
|
"step": 3460,
|
|
"valid_targets_mean": 2452.5,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 6.5625,
|
|
"grad_norm": 0.8206932168690125,
|
|
"learning_rate": 4.782914212671653e-07,
|
|
"loss": 0.253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20241320133209229,
|
|
"step": 3465,
|
|
"valid_targets_mean": 2130.6,
|
|
"valid_targets_min": 997
|
|
},
|
|
{
|
|
"epoch": 6.571969696969697,
|
|
"grad_norm": 0.8714180792604437,
|
|
"learning_rate": 4.579757662466966e-07,
|
|
"loss": 0.322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27541032433509827,
|
|
"step": 3470,
|
|
"valid_targets_mean": 2734.2,
|
|
"valid_targets_min": 1241
|
|
},
|
|
{
|
|
"epoch": 6.581439393939394,
|
|
"grad_norm": 0.7639327113937807,
|
|
"learning_rate": 4.3809598827786237e-07,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2549261450767517,
|
|
"step": 3475,
|
|
"valid_targets_mean": 2769.2,
|
|
"valid_targets_min": 1192
|
|
},
|
|
{
|
|
"epoch": 6.590909090909091,
|
|
"grad_norm": 0.6762402756066686,
|
|
"learning_rate": 4.186525307711753e-07,
|
|
"loss": 0.2992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2717151641845703,
|
|
"step": 3480,
|
|
"valid_targets_mean": 3725.3,
|
|
"valid_targets_min": 1180
|
|
},
|
|
{
|
|
"epoch": 6.600378787878788,
|
|
"grad_norm": 0.7313538894071349,
|
|
"learning_rate": 3.996458274051929e-07,
|
|
"loss": 0.2984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37201523780822754,
|
|
"step": 3485,
|
|
"valid_targets_mean": 5088.9,
|
|
"valid_targets_min": 1123
|
|
},
|
|
{
|
|
"epoch": 6.609848484848484,
|
|
"grad_norm": 1.240679514054856,
|
|
"learning_rate": 3.810763021168495e-07,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3235916793346405,
|
|
"step": 3490,
|
|
"valid_targets_mean": 3312.9,
|
|
"valid_targets_min": 1685
|
|
},
|
|
{
|
|
"epoch": 6.619318181818182,
|
|
"grad_norm": 0.7576130148533162,
|
|
"learning_rate": 3.6294436909199536e-07,
|
|
"loss": 0.3199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2654678225517273,
|
|
"step": 3495,
|
|
"valid_targets_mean": 3074.9,
|
|
"valid_targets_min": 1369
|
|
},
|
|
{
|
|
"epoch": 6.628787878787879,
|
|
"grad_norm": 0.7810071573149348,
|
|
"learning_rate": 3.452504327561612e-07,
|
|
"loss": 0.2722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.293660432100296,
|
|
"step": 3500,
|
|
"valid_targets_mean": 2801.5,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 6.638257575757576,
|
|
"grad_norm": 0.666923157231194,
|
|
"learning_rate": 3.27994887765537e-07,
|
|
"loss": 0.295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28967416286468506,
|
|
"step": 3505,
|
|
"valid_targets_mean": 3877.8,
|
|
"valid_targets_min": 1123
|
|
},
|
|
{
|
|
"epoch": 6.6477272727272725,
|
|
"grad_norm": 0.7620824757912084,
|
|
"learning_rate": 3.111781189981655e-07,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23825384676456451,
|
|
"step": 3510,
|
|
"valid_targets_mean": 2430.0,
|
|
"valid_targets_min": 1253
|
|
},
|
|
{
|
|
"epoch": 6.65719696969697,
|
|
"grad_norm": 0.8648842153110048,
|
|
"learning_rate": 2.9480050154536257e-07,
|
|
"loss": 0.2604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24667057394981384,
|
|
"step": 3515,
|
|
"valid_targets_mean": 2219.9,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 6.666666666666667,
|
|
"grad_norm": 0.7767630444320176,
|
|
"learning_rate": 2.7886240070335024e-07,
|
|
"loss": 0.2679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2703108787536621,
|
|
"step": 3520,
|
|
"valid_targets_mean": 2921.0,
|
|
"valid_targets_min": 267
|
|
},
|
|
{
|
|
"epoch": 6.676136363636363,
|
|
"grad_norm": 0.8180081419464671,
|
|
"learning_rate": 2.633641719651081e-07,
|
|
"loss": 0.3104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.306096613407135,
|
|
"step": 3525,
|
|
"valid_targets_mean": 2831.0,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 6.6856060606060606,
|
|
"grad_norm": 0.7349534712967289,
|
|
"learning_rate": 2.483061610124415e-07,
|
|
"loss": 0.291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31256765127182007,
|
|
"step": 3530,
|
|
"valid_targets_mean": 3324.2,
|
|
"valid_targets_min": 1390
|
|
},
|
|
{
|
|
"epoch": 6.695075757575758,
|
|
"grad_norm": 0.7009481360273515,
|
|
"learning_rate": 2.336887037082747e-07,
|
|
"loss": 0.2574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21603810787200928,
|
|
"step": 3535,
|
|
"valid_targets_mean": 3197.9,
|
|
"valid_targets_min": 921
|
|
},
|
|
{
|
|
"epoch": 6.704545454545455,
|
|
"grad_norm": 0.7229549992031178,
|
|
"learning_rate": 2.1951212608916305e-07,
|
|
"loss": 0.3302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3526965379714966,
|
|
"step": 3540,
|
|
"valid_targets_mean": 3836.4,
|
|
"valid_targets_min": 1357
|
|
},
|
|
{
|
|
"epoch": 6.714015151515151,
|
|
"grad_norm": 0.8195049847378149,
|
|
"learning_rate": 2.057767443580061e-07,
|
|
"loss": 0.2579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26782917976379395,
|
|
"step": 3545,
|
|
"valid_targets_mean": 2630.4,
|
|
"valid_targets_min": 1213
|
|
},
|
|
{
|
|
"epoch": 6.723484848484849,
|
|
"grad_norm": 0.818566101515422,
|
|
"learning_rate": 1.924828648770194e-07,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20337113738059998,
|
|
"step": 3550,
|
|
"valid_targets_mean": 2104.8,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 6.732954545454545,
|
|
"grad_norm": 0.7598257755968266,
|
|
"learning_rate": 1.7963078416087576e-07,
|
|
"loss": 0.2944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3678698241710663,
|
|
"step": 3555,
|
|
"valid_targets_mean": 3606.2,
|
|
"valid_targets_min": 1051
|
|
},
|
|
{
|
|
"epoch": 6.742424242424242,
|
|
"grad_norm": 0.7600096718363533,
|
|
"learning_rate": 1.6722078887010383e-07,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21424710750579834,
|
|
"step": 3560,
|
|
"valid_targets_mean": 2658.1,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 6.7518939393939394,
|
|
"grad_norm": 0.7554164856616007,
|
|
"learning_rate": 1.552531558046999e-07,
|
|
"loss": 0.3133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3586035966873169,
|
|
"step": 3565,
|
|
"valid_targets_mean": 3125.9,
|
|
"valid_targets_min": 1084
|
|
},
|
|
{
|
|
"epoch": 6.761363636363637,
|
|
"grad_norm": 0.7477705092469106,
|
|
"learning_rate": 1.4372815189794386e-07,
|
|
"loss": 0.3317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2731787860393524,
|
|
"step": 3570,
|
|
"valid_targets_mean": 3020.4,
|
|
"valid_targets_min": 972
|
|
},
|
|
{
|
|
"epoch": 6.770833333333333,
|
|
"grad_norm": 0.5868156666534103,
|
|
"learning_rate": 1.326460342104441e-07,
|
|
"loss": 0.2982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3199750781059265,
|
|
"step": 3575,
|
|
"valid_targets_mean": 4828.1,
|
|
"valid_targets_min": 835
|
|
},
|
|
{
|
|
"epoch": 6.78030303030303,
|
|
"grad_norm": 0.7501048056591529,
|
|
"learning_rate": 1.220070499244175e-07,
|
|
"loss": 0.2829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.337718665599823,
|
|
"step": 3580,
|
|
"valid_targets_mean": 3300.2,
|
|
"valid_targets_min": 316
|
|
},
|
|
{
|
|
"epoch": 6.7897727272727275,
|
|
"grad_norm": 0.7674888336602804,
|
|
"learning_rate": 1.1181143633816061e-07,
|
|
"loss": 0.291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3135637640953064,
|
|
"step": 3585,
|
|
"valid_targets_mean": 2992.8,
|
|
"valid_targets_min": 1602
|
|
},
|
|
{
|
|
"epoch": 6.799242424242424,
|
|
"grad_norm": 0.7930081849697127,
|
|
"learning_rate": 1.0205942086076503e-07,
|
|
"loss": 0.2938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2586725950241089,
|
|
"step": 3590,
|
|
"valid_targets_mean": 2778.4,
|
|
"valid_targets_min": 1235
|
|
},
|
|
{
|
|
"epoch": 6.808712121212121,
|
|
"grad_norm": 0.5886459029143625,
|
|
"learning_rate": 9.275122100704581e-08,
|
|
"loss": 0.3016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39650607109069824,
|
|
"step": 3595,
|
|
"valid_targets_mean": 5160.7,
|
|
"valid_targets_min": 1722
|
|
},
|
|
{
|
|
"epoch": 6.818181818181818,
|
|
"grad_norm": 0.8168866962833852,
|
|
"learning_rate": 8.388704439268314e-08,
|
|
"loss": 0.2781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3173607885837555,
|
|
"step": 3600,
|
|
"valid_targets_mean": 2882.4,
|
|
"valid_targets_min": 1210
|
|
},
|
|
{
|
|
"epoch": 6.827651515151516,
|
|
"grad_norm": 0.7214209929896152,
|
|
"learning_rate": 7.546708872960162e-08,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30105432868003845,
|
|
"step": 3605,
|
|
"valid_targets_mean": 3295.9,
|
|
"valid_targets_min": 989
|
|
},
|
|
{
|
|
"epoch": 6.837121212121212,
|
|
"grad_norm": 0.7893081634660285,
|
|
"learning_rate": 6.74915418215516e-08,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2699580192565918,
|
|
"step": 3610,
|
|
"valid_targets_mean": 2836.4,
|
|
"valid_targets_min": 1033
|
|
},
|
|
{
|
|
"epoch": 6.846590909090909,
|
|
"grad_norm": 0.8188056135667495,
|
|
"learning_rate": 5.996058155992135e-08,
|
|
"loss": 0.2871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25267067551612854,
|
|
"step": 3615,
|
|
"valid_targets_mean": 2529.6,
|
|
"valid_targets_min": 965
|
|
},
|
|
{
|
|
"epoch": 6.856060606060606,
|
|
"grad_norm": 0.7474881489999372,
|
|
"learning_rate": 5.287437591977807e-08,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27609768509864807,
|
|
"step": 3620,
|
|
"valid_targets_mean": 2746.8,
|
|
"valid_targets_min": 990
|
|
},
|
|
{
|
|
"epoch": 6.865530303030303,
|
|
"grad_norm": 0.7757070278566047,
|
|
"learning_rate": 4.623308295610862e-08,
|
|
"loss": 0.2612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2925783693790436,
|
|
"step": 3625,
|
|
"valid_targets_mean": 2813.9,
|
|
"valid_targets_min": 881
|
|
},
|
|
{
|
|
"epoch": 6.875,
|
|
"grad_norm": 0.7975184955132695,
|
|
"learning_rate": 4.0036850800300176e-08,
|
|
"loss": 0.2855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23089198768138885,
|
|
"step": 3630,
|
|
"valid_targets_mean": 2370.7,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 6.884469696969697,
|
|
"grad_norm": 0.8411714315796196,
|
|
"learning_rate": 3.42858176568428e-08,
|
|
"loss": 0.244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25411084294319153,
|
|
"step": 3635,
|
|
"valid_targets_mean": 2164.6,
|
|
"valid_targets_min": 956
|
|
},
|
|
{
|
|
"epoch": 6.893939393939394,
|
|
"grad_norm": 0.795159156023418,
|
|
"learning_rate": 2.898011180023197e-08,
|
|
"loss": 0.2672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2717554569244385,
|
|
"step": 3640,
|
|
"valid_targets_mean": 2784.2,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 6.903409090909091,
|
|
"grad_norm": 0.8500138368939761,
|
|
"learning_rate": 2.411985157211971e-08,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22534826397895813,
|
|
"step": 3645,
|
|
"valid_targets_mean": 1964.1,
|
|
"valid_targets_min": 1000
|
|
},
|
|
{
|
|
"epoch": 6.912878787878788,
|
|
"grad_norm": 0.805386755317268,
|
|
"learning_rate": 1.9705145378672296e-08,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2785588204860687,
|
|
"step": 3650,
|
|
"valid_targets_mean": 2695.1,
|
|
"valid_targets_min": 853
|
|
},
|
|
{
|
|
"epoch": 6.922348484848484,
|
|
"grad_norm": 0.7350304889792184,
|
|
"learning_rate": 1.5736091688147713e-08,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2649744153022766,
|
|
"step": 3655,
|
|
"valid_targets_mean": 3043.4,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 6.931818181818182,
|
|
"grad_norm": 0.7964728671190333,
|
|
"learning_rate": 1.2212779028706323e-08,
|
|
"loss": 0.2397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2548796534538269,
|
|
"step": 3660,
|
|
"valid_targets_mean": 2388.2,
|
|
"valid_targets_min": 1050
|
|
},
|
|
{
|
|
"epoch": 6.941287878787879,
|
|
"grad_norm": 0.7748808468385654,
|
|
"learning_rate": 9.135285986427988e-09,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2775483727455139,
|
|
"step": 3665,
|
|
"valid_targets_mean": 2768.1,
|
|
"valid_targets_min": 1112
|
|
},
|
|
{
|
|
"epoch": 6.950757575757576,
|
|
"grad_norm": 0.7188500086541901,
|
|
"learning_rate": 6.503681203571255e-09,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3210909962654114,
|
|
"step": 3670,
|
|
"valid_targets_mean": 3268.6,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 6.9602272727272725,
|
|
"grad_norm": 0.8201527538720523,
|
|
"learning_rate": 4.318023377027913e-09,
|
|
"loss": 0.243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19639575481414795,
|
|
"step": 3675,
|
|
"valid_targets_mean": 1962.5,
|
|
"valid_targets_min": 1064
|
|
},
|
|
{
|
|
"epoch": 6.96969696969697,
|
|
"grad_norm": 0.7896466961816956,
|
|
"learning_rate": 2.5783612570240424e-09,
|
|
"loss": 0.2716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24593189358711243,
|
|
"step": 3680,
|
|
"valid_targets_mean": 2611.4,
|
|
"valid_targets_min": 1018
|
|
},
|
|
{
|
|
"epoch": 6.979166666666667,
|
|
"grad_norm": 0.8033124661249537,
|
|
"learning_rate": 1.2847336460275473e-09,
|
|
"loss": 0.291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2680438160896301,
|
|
"step": 3685,
|
|
"valid_targets_mean": 3199.4,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 6.988636363636363,
|
|
"grad_norm": 0.7378837145401517,
|
|
"learning_rate": 4.371693978866276e-10,
|
|
"loss": 0.2922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3379039168357849,
|
|
"step": 3690,
|
|
"valid_targets_mean": 3146.9,
|
|
"valid_targets_min": 1177
|
|
},
|
|
{
|
|
"epoch": 6.9981060606060606,
|
|
"grad_norm": 0.7167227750770363,
|
|
"learning_rate": 3.568741718584789e-11,
|
|
"loss": 0.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20342959463596344,
|
|
"step": 3695,
|
|
"valid_targets_mean": 2741.6,
|
|
"valid_targets_min": 936
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21515175700187683,
|
|
"step": 3696,
|
|
"total_flos": 565338957152256.0,
|
|
"train_loss": 0.3516287574649373,
|
|
"train_runtime": 10041.4381,
|
|
"train_samples_per_second": 5.879,
|
|
"train_steps_per_second": 0.368,
|
|
"valid_targets_mean": 2865.6,
|
|
"valid_targets_min": 1016
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 3696,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 565338957152256.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|