9541 lines
264 KiB
JSON
9541 lines
264 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4319,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.008103727714748784,
|
|
"grad_norm": 14.502046563847436,
|
|
"learning_rate": 3.7037037037037036e-07,
|
|
"loss": 0.8801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.9242562651634216,
|
|
"step": 5,
|
|
"valid_targets_mean": 4295.1,
|
|
"valid_targets_min": 1096
|
|
},
|
|
{
|
|
"epoch": 0.01620745542949757,
|
|
"grad_norm": 15.903096275441944,
|
|
"learning_rate": 8.333333333333333e-07,
|
|
"loss": 0.8843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 1.0109663009643555,
|
|
"step": 10,
|
|
"valid_targets_mean": 4119.3,
|
|
"valid_targets_min": 1110
|
|
},
|
|
{
|
|
"epoch": 0.024311183144246355,
|
|
"grad_norm": 12.890655417344467,
|
|
"learning_rate": 1.2962962962962962e-06,
|
|
"loss": 0.8416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8123115301132202,
|
|
"step": 15,
|
|
"valid_targets_mean": 3726.6,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 0.03241491085899514,
|
|
"grad_norm": 10.128730652201147,
|
|
"learning_rate": 1.7592592592592594e-06,
|
|
"loss": 0.785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.718390703201294,
|
|
"step": 20,
|
|
"valid_targets_mean": 3520.1,
|
|
"valid_targets_min": 985
|
|
},
|
|
{
|
|
"epoch": 0.04051863857374392,
|
|
"grad_norm": 7.138044910532352,
|
|
"learning_rate": 2.222222222222222e-06,
|
|
"loss": 0.7489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7211443185806274,
|
|
"step": 25,
|
|
"valid_targets_mean": 4349.3,
|
|
"valid_targets_min": 1351
|
|
},
|
|
{
|
|
"epoch": 0.04862236628849271,
|
|
"grad_norm": 4.932655033986416,
|
|
"learning_rate": 2.6851851851851856e-06,
|
|
"loss": 0.7511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6623836755752563,
|
|
"step": 30,
|
|
"valid_targets_mean": 3442.4,
|
|
"valid_targets_min": 1153
|
|
},
|
|
{
|
|
"epoch": 0.05672609400324149,
|
|
"grad_norm": 3.188346911958201,
|
|
"learning_rate": 3.1481481481481483e-06,
|
|
"loss": 0.7389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7061078548431396,
|
|
"step": 35,
|
|
"valid_targets_mean": 3195.9,
|
|
"valid_targets_min": 1017
|
|
},
|
|
{
|
|
"epoch": 0.06482982171799027,
|
|
"grad_norm": 2.155975188008163,
|
|
"learning_rate": 3.6111111111111115e-06,
|
|
"loss": 0.6625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6780894994735718,
|
|
"step": 40,
|
|
"valid_targets_mean": 3058.6,
|
|
"valid_targets_min": 1235
|
|
},
|
|
{
|
|
"epoch": 0.07293354943273905,
|
|
"grad_norm": 1.5267785030853465,
|
|
"learning_rate": 4.074074074074074e-06,
|
|
"loss": 0.633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7597166299819946,
|
|
"step": 45,
|
|
"valid_targets_mean": 4936.4,
|
|
"valid_targets_min": 1436
|
|
},
|
|
{
|
|
"epoch": 0.08103727714748785,
|
|
"grad_norm": 1.4912835751224582,
|
|
"learning_rate": 4.537037037037038e-06,
|
|
"loss": 0.6283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6779534816741943,
|
|
"step": 50,
|
|
"valid_targets_mean": 2796.6,
|
|
"valid_targets_min": 823
|
|
},
|
|
{
|
|
"epoch": 0.08914100486223663,
|
|
"grad_norm": 1.2109844973016437,
|
|
"learning_rate": 5e-06,
|
|
"loss": 0.602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.619255542755127,
|
|
"step": 55,
|
|
"valid_targets_mean": 2768.4,
|
|
"valid_targets_min": 351
|
|
},
|
|
{
|
|
"epoch": 0.09724473257698542,
|
|
"grad_norm": 0.7300524443363055,
|
|
"learning_rate": 5.462962962962963e-06,
|
|
"loss": 0.6097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5387172698974609,
|
|
"step": 60,
|
|
"valid_targets_mean": 5398.2,
|
|
"valid_targets_min": 1450
|
|
},
|
|
{
|
|
"epoch": 0.1053484602917342,
|
|
"grad_norm": 0.6939352585635662,
|
|
"learning_rate": 5.925925925925926e-06,
|
|
"loss": 0.5651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4976433515548706,
|
|
"step": 65,
|
|
"valid_targets_mean": 5193.5,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 0.11345218800648298,
|
|
"grad_norm": 0.8282020026122351,
|
|
"learning_rate": 6.3888888888888885e-06,
|
|
"loss": 0.5986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.562492847442627,
|
|
"step": 70,
|
|
"valid_targets_mean": 3213.2,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 0.12155591572123177,
|
|
"grad_norm": 0.8388747146684071,
|
|
"learning_rate": 6.851851851851853e-06,
|
|
"loss": 0.5874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6159340143203735,
|
|
"step": 75,
|
|
"valid_targets_mean": 3616.1,
|
|
"valid_targets_min": 998
|
|
},
|
|
{
|
|
"epoch": 0.12965964343598055,
|
|
"grad_norm": 0.6349565573978161,
|
|
"learning_rate": 7.314814814814816e-06,
|
|
"loss": 0.5918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5378053784370422,
|
|
"step": 80,
|
|
"valid_targets_mean": 4808.6,
|
|
"valid_targets_min": 1076
|
|
},
|
|
{
|
|
"epoch": 0.13776337115072934,
|
|
"grad_norm": 0.6866539798766335,
|
|
"learning_rate": 7.77777777777778e-06,
|
|
"loss": 0.5544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5045327544212341,
|
|
"step": 85,
|
|
"valid_targets_mean": 4616.6,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 0.1458670988654781,
|
|
"grad_norm": 0.7135765247596682,
|
|
"learning_rate": 8.240740740740741e-06,
|
|
"loss": 0.5973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5626544952392578,
|
|
"step": 90,
|
|
"valid_targets_mean": 3737.9,
|
|
"valid_targets_min": 988
|
|
},
|
|
{
|
|
"epoch": 0.1539708265802269,
|
|
"grad_norm": 0.8944356346358915,
|
|
"learning_rate": 8.703703703703705e-06,
|
|
"loss": 0.5711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5422185659408569,
|
|
"step": 95,
|
|
"valid_targets_mean": 3383.4,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 0.1620745542949757,
|
|
"grad_norm": 0.6752326811138456,
|
|
"learning_rate": 9.166666666666666e-06,
|
|
"loss": 0.5453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5315940380096436,
|
|
"step": 100,
|
|
"valid_targets_mean": 3757.1,
|
|
"valid_targets_min": 916
|
|
},
|
|
{
|
|
"epoch": 0.17017828200972449,
|
|
"grad_norm": 0.7034188800227436,
|
|
"learning_rate": 9.62962962962963e-06,
|
|
"loss": 0.5076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5128780603408813,
|
|
"step": 105,
|
|
"valid_targets_mean": 3490.1,
|
|
"valid_targets_min": 1364
|
|
},
|
|
{
|
|
"epoch": 0.17828200972447325,
|
|
"grad_norm": 0.8563770301799251,
|
|
"learning_rate": 1.0092592592592594e-05,
|
|
"loss": 0.5609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6451968550682068,
|
|
"step": 110,
|
|
"valid_targets_mean": 3104.2,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 0.18638573743922204,
|
|
"grad_norm": 0.6977828625213393,
|
|
"learning_rate": 1.0555555555555557e-05,
|
|
"loss": 0.5463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5652469396591187,
|
|
"step": 115,
|
|
"valid_targets_mean": 3494.4,
|
|
"valid_targets_min": 962
|
|
},
|
|
{
|
|
"epoch": 0.19448946515397084,
|
|
"grad_norm": 0.7024165934230638,
|
|
"learning_rate": 1.1018518518518519e-05,
|
|
"loss": 0.5138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48960810899734497,
|
|
"step": 120,
|
|
"valid_targets_mean": 3552.3,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 0.2025931928687196,
|
|
"grad_norm": 0.6711618444311361,
|
|
"learning_rate": 1.1481481481481482e-05,
|
|
"loss": 0.5164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48685699701309204,
|
|
"step": 125,
|
|
"valid_targets_mean": 4097.9,
|
|
"valid_targets_min": 1264
|
|
},
|
|
{
|
|
"epoch": 0.2106969205834684,
|
|
"grad_norm": 0.8895538573513374,
|
|
"learning_rate": 1.1944444444444444e-05,
|
|
"loss": 0.5224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5535306930541992,
|
|
"step": 130,
|
|
"valid_targets_mean": 2476.7,
|
|
"valid_targets_min": 985
|
|
},
|
|
{
|
|
"epoch": 0.2188006482982172,
|
|
"grad_norm": 0.7147279273965873,
|
|
"learning_rate": 1.2407407407407408e-05,
|
|
"loss": 0.5609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5410614609718323,
|
|
"step": 135,
|
|
"valid_targets_mean": 3292.9,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 0.22690437601296595,
|
|
"grad_norm": 0.5760847796962758,
|
|
"learning_rate": 1.2870370370370371e-05,
|
|
"loss": 0.5127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4297313690185547,
|
|
"step": 140,
|
|
"valid_targets_mean": 4090.2,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 0.23500810372771475,
|
|
"grad_norm": 0.6252622714518827,
|
|
"learning_rate": 1.3333333333333333e-05,
|
|
"loss": 0.5182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4308566451072693,
|
|
"step": 145,
|
|
"valid_targets_mean": 3886.9,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 0.24311183144246354,
|
|
"grad_norm": 0.664417459291334,
|
|
"learning_rate": 1.3796296296296297e-05,
|
|
"loss": 0.5026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49792343378067017,
|
|
"step": 150,
|
|
"valid_targets_mean": 3989.4,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 0.25121555915721233,
|
|
"grad_norm": 0.4487043325414751,
|
|
"learning_rate": 1.4259259259259259e-05,
|
|
"loss": 0.4752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36317574977874756,
|
|
"step": 155,
|
|
"valid_targets_mean": 6837.2,
|
|
"valid_targets_min": 1064
|
|
},
|
|
{
|
|
"epoch": 0.2593192868719611,
|
|
"grad_norm": 0.6029473626318651,
|
|
"learning_rate": 1.4722222222222224e-05,
|
|
"loss": 0.5177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44724512100219727,
|
|
"step": 160,
|
|
"valid_targets_mean": 4923.8,
|
|
"valid_targets_min": 1165
|
|
},
|
|
{
|
|
"epoch": 0.26742301458670986,
|
|
"grad_norm": 0.5768270103259813,
|
|
"learning_rate": 1.5185185185185187e-05,
|
|
"loss": 0.5058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4138815999031067,
|
|
"step": 165,
|
|
"valid_targets_mean": 4088.1,
|
|
"valid_targets_min": 887
|
|
},
|
|
{
|
|
"epoch": 0.2755267423014587,
|
|
"grad_norm": 0.5493315381239489,
|
|
"learning_rate": 1.564814814814815e-05,
|
|
"loss": 0.4689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44827204942703247,
|
|
"step": 170,
|
|
"valid_targets_mean": 5288.2,
|
|
"valid_targets_min": 1507
|
|
},
|
|
{
|
|
"epoch": 0.28363047001620745,
|
|
"grad_norm": 0.7994248985246581,
|
|
"learning_rate": 1.6111111111111115e-05,
|
|
"loss": 0.4863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5198005437850952,
|
|
"step": 175,
|
|
"valid_targets_mean": 2657.2,
|
|
"valid_targets_min": 1139
|
|
},
|
|
{
|
|
"epoch": 0.2917341977309562,
|
|
"grad_norm": 0.9722413882474711,
|
|
"learning_rate": 1.6574074074074075e-05,
|
|
"loss": 0.513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5183806419372559,
|
|
"step": 180,
|
|
"valid_targets_mean": 3431.2,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 0.29983792544570503,
|
|
"grad_norm": 0.6589835217769006,
|
|
"learning_rate": 1.7037037037037038e-05,
|
|
"loss": 0.5393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5170410871505737,
|
|
"step": 185,
|
|
"valid_targets_mean": 5086.8,
|
|
"valid_targets_min": 1333
|
|
},
|
|
{
|
|
"epoch": 0.3079416531604538,
|
|
"grad_norm": 0.6967830216075351,
|
|
"learning_rate": 1.7500000000000002e-05,
|
|
"loss": 0.4479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47171273827552795,
|
|
"step": 190,
|
|
"valid_targets_mean": 4035.7,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 0.3160453808752026,
|
|
"grad_norm": 0.821386141264342,
|
|
"learning_rate": 1.7962962962962965e-05,
|
|
"loss": 0.5305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6754404902458191,
|
|
"step": 195,
|
|
"valid_targets_mean": 3246.0,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 0.3241491085899514,
|
|
"grad_norm": 0.7996690171267771,
|
|
"learning_rate": 1.8425925925925926e-05,
|
|
"loss": 0.4836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4826638996601105,
|
|
"step": 200,
|
|
"valid_targets_mean": 2741.5,
|
|
"valid_targets_min": 1119
|
|
},
|
|
{
|
|
"epoch": 0.33225283630470015,
|
|
"grad_norm": 0.7619254088595571,
|
|
"learning_rate": 1.888888888888889e-05,
|
|
"loss": 0.4903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47676026821136475,
|
|
"step": 205,
|
|
"valid_targets_mean": 3212.6,
|
|
"valid_targets_min": 849
|
|
},
|
|
{
|
|
"epoch": 0.34035656401944897,
|
|
"grad_norm": 0.6571212058463909,
|
|
"learning_rate": 1.9351851851851853e-05,
|
|
"loss": 0.5098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5193724036216736,
|
|
"step": 210,
|
|
"valid_targets_mean": 4616.6,
|
|
"valid_targets_min": 1602
|
|
},
|
|
{
|
|
"epoch": 0.34846029173419774,
|
|
"grad_norm": 0.6304645656002126,
|
|
"learning_rate": 1.9814814814814816e-05,
|
|
"loss": 0.503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.446179062128067,
|
|
"step": 215,
|
|
"valid_targets_mean": 4198.4,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 0.3565640194489465,
|
|
"grad_norm": 0.797707714974618,
|
|
"learning_rate": 2.027777777777778e-05,
|
|
"loss": 0.4763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5235961675643921,
|
|
"step": 220,
|
|
"valid_targets_mean": 3196.6,
|
|
"valid_targets_min": 1181
|
|
},
|
|
{
|
|
"epoch": 0.3646677471636953,
|
|
"grad_norm": 0.8339761763253815,
|
|
"learning_rate": 2.074074074074074e-05,
|
|
"loss": 0.5363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5340523719787598,
|
|
"step": 225,
|
|
"valid_targets_mean": 3011.3,
|
|
"valid_targets_min": 1068
|
|
},
|
|
{
|
|
"epoch": 0.3727714748784441,
|
|
"grad_norm": 0.8061816435780602,
|
|
"learning_rate": 2.1203703703703703e-05,
|
|
"loss": 0.4665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3998042941093445,
|
|
"step": 230,
|
|
"valid_targets_mean": 3933.1,
|
|
"valid_targets_min": 1459
|
|
},
|
|
{
|
|
"epoch": 0.38087520259319285,
|
|
"grad_norm": 0.8327959127590774,
|
|
"learning_rate": 2.1666666666666667e-05,
|
|
"loss": 0.462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5590848922729492,
|
|
"step": 235,
|
|
"valid_targets_mean": 3193.1,
|
|
"valid_targets_min": 1423
|
|
},
|
|
{
|
|
"epoch": 0.3889789303079417,
|
|
"grad_norm": 0.7369049954720542,
|
|
"learning_rate": 2.212962962962963e-05,
|
|
"loss": 0.4519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47498127818107605,
|
|
"step": 240,
|
|
"valid_targets_mean": 3404.8,
|
|
"valid_targets_min": 1482
|
|
},
|
|
{
|
|
"epoch": 0.39708265802269044,
|
|
"grad_norm": 0.7763169284637017,
|
|
"learning_rate": 2.2592592592592594e-05,
|
|
"loss": 0.4707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39968666434288025,
|
|
"step": 245,
|
|
"valid_targets_mean": 3643.1,
|
|
"valid_targets_min": 890
|
|
},
|
|
{
|
|
"epoch": 0.4051863857374392,
|
|
"grad_norm": 0.5745074030464737,
|
|
"learning_rate": 2.3055555555555554e-05,
|
|
"loss": 0.4828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4340120851993561,
|
|
"step": 250,
|
|
"valid_targets_mean": 4889.4,
|
|
"valid_targets_min": 1652
|
|
},
|
|
{
|
|
"epoch": 0.413290113452188,
|
|
"grad_norm": 0.6509176855544626,
|
|
"learning_rate": 2.3518518518518518e-05,
|
|
"loss": 0.48,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4259786605834961,
|
|
"step": 255,
|
|
"valid_targets_mean": 4724.8,
|
|
"valid_targets_min": 1665
|
|
},
|
|
{
|
|
"epoch": 0.4213938411669368,
|
|
"grad_norm": 0.6382937773930243,
|
|
"learning_rate": 2.3981481481481485e-05,
|
|
"loss": 0.4641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4811030626296997,
|
|
"step": 260,
|
|
"valid_targets_mean": 3824.5,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 0.42949756888168555,
|
|
"grad_norm": 0.7814960892271423,
|
|
"learning_rate": 2.444444444444445e-05,
|
|
"loss": 0.4826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4951825737953186,
|
|
"step": 265,
|
|
"valid_targets_mean": 2962.9,
|
|
"valid_targets_min": 1195
|
|
},
|
|
{
|
|
"epoch": 0.4376012965964344,
|
|
"grad_norm": 0.7860611727969448,
|
|
"learning_rate": 2.4907407407407412e-05,
|
|
"loss": 0.5235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.465817928314209,
|
|
"step": 270,
|
|
"valid_targets_mean": 4154.8,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 0.44570502431118314,
|
|
"grad_norm": 0.7225875405455014,
|
|
"learning_rate": 2.5370370370370375e-05,
|
|
"loss": 0.4314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4747081995010376,
|
|
"step": 275,
|
|
"valid_targets_mean": 3147.5,
|
|
"valid_targets_min": 1231
|
|
},
|
|
{
|
|
"epoch": 0.4538087520259319,
|
|
"grad_norm": 0.7579343166171137,
|
|
"learning_rate": 2.5833333333333336e-05,
|
|
"loss": 0.4712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4651714265346527,
|
|
"step": 280,
|
|
"valid_targets_mean": 2885.6,
|
|
"valid_targets_min": 916
|
|
},
|
|
{
|
|
"epoch": 0.4619124797406807,
|
|
"grad_norm": 0.6458345103499519,
|
|
"learning_rate": 2.62962962962963e-05,
|
|
"loss": 0.479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44281134009361267,
|
|
"step": 285,
|
|
"valid_targets_mean": 4319.7,
|
|
"valid_targets_min": 1245
|
|
},
|
|
{
|
|
"epoch": 0.4700162074554295,
|
|
"grad_norm": 0.7007184493552016,
|
|
"learning_rate": 2.6759259259259263e-05,
|
|
"loss": 0.4632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43551719188690186,
|
|
"step": 290,
|
|
"valid_targets_mean": 3202.4,
|
|
"valid_targets_min": 342
|
|
},
|
|
{
|
|
"epoch": 0.47811993517017826,
|
|
"grad_norm": 0.7307318580304619,
|
|
"learning_rate": 2.7222222222222226e-05,
|
|
"loss": 0.4552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4373762607574463,
|
|
"step": 295,
|
|
"valid_targets_mean": 3294.9,
|
|
"valid_targets_min": 1019
|
|
},
|
|
{
|
|
"epoch": 0.4862236628849271,
|
|
"grad_norm": 0.9011037479565293,
|
|
"learning_rate": 2.768518518518519e-05,
|
|
"loss": 0.4564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4616827368736267,
|
|
"step": 300,
|
|
"valid_targets_mean": 2662.6,
|
|
"valid_targets_min": 1290
|
|
},
|
|
{
|
|
"epoch": 0.49432739059967584,
|
|
"grad_norm": 0.7121661911160081,
|
|
"learning_rate": 2.814814814814815e-05,
|
|
"loss": 0.4694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42763668298721313,
|
|
"step": 305,
|
|
"valid_targets_mean": 3236.9,
|
|
"valid_targets_min": 815
|
|
},
|
|
{
|
|
"epoch": 0.5024311183144247,
|
|
"grad_norm": 0.8096707896642864,
|
|
"learning_rate": 2.8611111111111113e-05,
|
|
"loss": 0.4287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4525720775127411,
|
|
"step": 310,
|
|
"valid_targets_mean": 2636.4,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 0.5105348460291734,
|
|
"grad_norm": 0.7261203808002379,
|
|
"learning_rate": 2.9074074074074077e-05,
|
|
"loss": 0.4664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48865166306495667,
|
|
"step": 315,
|
|
"valid_targets_mean": 3461.4,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 0.5186385737439222,
|
|
"grad_norm": 1.2575281945746308,
|
|
"learning_rate": 2.953703703703704e-05,
|
|
"loss": 0.4828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5157779455184937,
|
|
"step": 320,
|
|
"valid_targets_mean": 2379.4,
|
|
"valid_targets_min": 1189
|
|
},
|
|
{
|
|
"epoch": 0.526742301458671,
|
|
"grad_norm": 0.7251897026165784,
|
|
"learning_rate": 3.0000000000000004e-05,
|
|
"loss": 0.4352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46817487478256226,
|
|
"step": 325,
|
|
"valid_targets_mean": 3586.9,
|
|
"valid_targets_min": 1204
|
|
},
|
|
{
|
|
"epoch": 0.5348460291734197,
|
|
"grad_norm": 0.7598243027449975,
|
|
"learning_rate": 3.0462962962962964e-05,
|
|
"loss": 0.4647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40748733282089233,
|
|
"step": 330,
|
|
"valid_targets_mean": 3351.0,
|
|
"valid_targets_min": 1153
|
|
},
|
|
{
|
|
"epoch": 0.5429497568881686,
|
|
"grad_norm": 0.5867804558692091,
|
|
"learning_rate": 3.092592592592593e-05,
|
|
"loss": 0.4548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3633122146129608,
|
|
"step": 335,
|
|
"valid_targets_mean": 4532.9,
|
|
"valid_targets_min": 1330
|
|
},
|
|
{
|
|
"epoch": 0.5510534846029174,
|
|
"grad_norm": 0.8308762981819495,
|
|
"learning_rate": 3.138888888888889e-05,
|
|
"loss": 0.4196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44768840074539185,
|
|
"step": 340,
|
|
"valid_targets_mean": 2915.2,
|
|
"valid_targets_min": 1129
|
|
},
|
|
{
|
|
"epoch": 0.5591572123176661,
|
|
"grad_norm": 0.7168296495416179,
|
|
"learning_rate": 3.185185185185185e-05,
|
|
"loss": 0.4715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47569552063941956,
|
|
"step": 345,
|
|
"valid_targets_mean": 3553.9,
|
|
"valid_targets_min": 934
|
|
},
|
|
{
|
|
"epoch": 0.5672609400324149,
|
|
"grad_norm": 0.7779706686981114,
|
|
"learning_rate": 3.231481481481482e-05,
|
|
"loss": 0.4571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45146211981773376,
|
|
"step": 350,
|
|
"valid_targets_mean": 2965.0,
|
|
"valid_targets_min": 1124
|
|
},
|
|
{
|
|
"epoch": 0.5753646677471637,
|
|
"grad_norm": 0.6521122222225881,
|
|
"learning_rate": 3.277777777777778e-05,
|
|
"loss": 0.4165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.383737713098526,
|
|
"step": 355,
|
|
"valid_targets_mean": 3290.4,
|
|
"valid_targets_min": 445
|
|
},
|
|
{
|
|
"epoch": 0.5834683954619124,
|
|
"grad_norm": 0.6903093659168319,
|
|
"learning_rate": 3.3240740740740746e-05,
|
|
"loss": 0.4344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44812503457069397,
|
|
"step": 360,
|
|
"valid_targets_mean": 3197.4,
|
|
"valid_targets_min": 993
|
|
},
|
|
{
|
|
"epoch": 0.5915721231766613,
|
|
"grad_norm": 0.5893356617898746,
|
|
"learning_rate": 3.3703703703703706e-05,
|
|
"loss": 0.4698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43500688672065735,
|
|
"step": 365,
|
|
"valid_targets_mean": 4616.5,
|
|
"valid_targets_min": 1276
|
|
},
|
|
{
|
|
"epoch": 0.5996758508914101,
|
|
"grad_norm": 0.5704634541622681,
|
|
"learning_rate": 3.4166666666666666e-05,
|
|
"loss": 0.4545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4581429362297058,
|
|
"step": 370,
|
|
"valid_targets_mean": 5007.1,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 0.6077795786061588,
|
|
"grad_norm": 0.6250080462027718,
|
|
"learning_rate": 3.462962962962963e-05,
|
|
"loss": 0.4421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4598006308078766,
|
|
"step": 375,
|
|
"valid_targets_mean": 4201.1,
|
|
"valid_targets_min": 1594
|
|
},
|
|
{
|
|
"epoch": 0.6158833063209076,
|
|
"grad_norm": 0.604145580512504,
|
|
"learning_rate": 3.509259259259259e-05,
|
|
"loss": 0.4483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38162094354629517,
|
|
"step": 380,
|
|
"valid_targets_mean": 4180.4,
|
|
"valid_targets_min": 1192
|
|
},
|
|
{
|
|
"epoch": 0.6239870340356564,
|
|
"grad_norm": 0.8135079481344287,
|
|
"learning_rate": 3.555555555555555e-05,
|
|
"loss": 0.5118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5114529132843018,
|
|
"step": 385,
|
|
"valid_targets_mean": 2778.4,
|
|
"valid_targets_min": 1183
|
|
},
|
|
{
|
|
"epoch": 0.6320907617504052,
|
|
"grad_norm": 0.7156111178744403,
|
|
"learning_rate": 3.601851851851852e-05,
|
|
"loss": 0.4418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42450952529907227,
|
|
"step": 390,
|
|
"valid_targets_mean": 2836.8,
|
|
"valid_targets_min": 1203
|
|
},
|
|
{
|
|
"epoch": 0.640194489465154,
|
|
"grad_norm": 0.6233850159032864,
|
|
"learning_rate": 3.648148148148149e-05,
|
|
"loss": 0.4337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4477499723434448,
|
|
"step": 395,
|
|
"valid_targets_mean": 4234.6,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 0.6482982171799028,
|
|
"grad_norm": 0.6873205556027221,
|
|
"learning_rate": 3.694444444444445e-05,
|
|
"loss": 0.4436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4863933324813843,
|
|
"step": 400,
|
|
"valid_targets_mean": 3356.6,
|
|
"valid_targets_min": 1213
|
|
},
|
|
{
|
|
"epoch": 0.6564019448946515,
|
|
"grad_norm": 0.5459953172678582,
|
|
"learning_rate": 3.7407407407407414e-05,
|
|
"loss": 0.4143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35290199518203735,
|
|
"step": 405,
|
|
"valid_targets_mean": 4598.9,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 0.6645056726094003,
|
|
"grad_norm": 0.6910108927145312,
|
|
"learning_rate": 3.7870370370370374e-05,
|
|
"loss": 0.4879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44417983293533325,
|
|
"step": 410,
|
|
"valid_targets_mean": 3119.1,
|
|
"valid_targets_min": 1184
|
|
},
|
|
{
|
|
"epoch": 0.6726094003241491,
|
|
"grad_norm": 0.5251638853973931,
|
|
"learning_rate": 3.833333333333334e-05,
|
|
"loss": 0.4003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3857458829879761,
|
|
"step": 415,
|
|
"valid_targets_mean": 5829.8,
|
|
"valid_targets_min": 1662
|
|
},
|
|
{
|
|
"epoch": 0.6807131280388979,
|
|
"grad_norm": 0.5492363264332838,
|
|
"learning_rate": 3.87962962962963e-05,
|
|
"loss": 0.4234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3834828734397888,
|
|
"step": 420,
|
|
"valid_targets_mean": 4159.3,
|
|
"valid_targets_min": 403
|
|
},
|
|
{
|
|
"epoch": 0.6888168557536467,
|
|
"grad_norm": 0.6657955711902417,
|
|
"learning_rate": 3.925925925925926e-05,
|
|
"loss": 0.4585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4726736545562744,
|
|
"step": 425,
|
|
"valid_targets_mean": 4370.1,
|
|
"valid_targets_min": 992
|
|
},
|
|
{
|
|
"epoch": 0.6969205834683955,
|
|
"grad_norm": 0.8215679499444706,
|
|
"learning_rate": 3.972222222222223e-05,
|
|
"loss": 0.4492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5097543597221375,
|
|
"step": 430,
|
|
"valid_targets_mean": 2555.1,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 0.7050243111831442,
|
|
"grad_norm": 0.6324092514283729,
|
|
"learning_rate": 3.99999738705324e-05,
|
|
"loss": 0.4479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3866470158100128,
|
|
"step": 435,
|
|
"valid_targets_mean": 3941.9,
|
|
"valid_targets_min": 1237
|
|
},
|
|
{
|
|
"epoch": 0.713128038897893,
|
|
"grad_norm": 0.7808262105006745,
|
|
"learning_rate": 3.999967991480598e-05,
|
|
"loss": 0.4242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4729665517807007,
|
|
"step": 440,
|
|
"valid_targets_mean": 2932.7,
|
|
"valid_targets_min": 1097
|
|
},
|
|
{
|
|
"epoch": 0.7212317666126418,
|
|
"grad_norm": 0.6797144276661182,
|
|
"learning_rate": 3.99990593463352e-05,
|
|
"loss": 0.4634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47761768102645874,
|
|
"step": 445,
|
|
"valid_targets_mean": 3771.3,
|
|
"valid_targets_min": 1436
|
|
},
|
|
{
|
|
"epoch": 0.7293354943273906,
|
|
"grad_norm": 0.7894421925623699,
|
|
"learning_rate": 3.9998112175254504e-05,
|
|
"loss": 0.4495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48762065172195435,
|
|
"step": 450,
|
|
"valid_targets_mean": 2831.8,
|
|
"valid_targets_min": 1503
|
|
},
|
|
{
|
|
"epoch": 0.7374392220421394,
|
|
"grad_norm": 0.7071798523471358,
|
|
"learning_rate": 3.9996838417032044e-05,
|
|
"loss": 0.4311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5425790548324585,
|
|
"step": 455,
|
|
"valid_targets_mean": 3630.4,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 0.7455429497568882,
|
|
"grad_norm": 0.85816434814954,
|
|
"learning_rate": 3.999523809246943e-05,
|
|
"loss": 0.4367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45270124077796936,
|
|
"step": 460,
|
|
"valid_targets_mean": 2890.2,
|
|
"valid_targets_min": 1181
|
|
},
|
|
{
|
|
"epoch": 0.7536466774716369,
|
|
"grad_norm": 0.7183863410015342,
|
|
"learning_rate": 3.9993311227701415e-05,
|
|
"loss": 0.4237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.481193482875824,
|
|
"step": 465,
|
|
"valid_targets_mean": 2988.1,
|
|
"valid_targets_min": 1100
|
|
},
|
|
{
|
|
"epoch": 0.7617504051863857,
|
|
"grad_norm": 0.5698856112206491,
|
|
"learning_rate": 3.9991057854195424e-05,
|
|
"loss": 0.4704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.550667405128479,
|
|
"step": 470,
|
|
"valid_targets_mean": 5871.3,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 0.7698541329011345,
|
|
"grad_norm": 0.6288923533755414,
|
|
"learning_rate": 3.998847800875107e-05,
|
|
"loss": 0.4034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4150170087814331,
|
|
"step": 475,
|
|
"valid_targets_mean": 3744.1,
|
|
"valid_targets_min": 1145
|
|
},
|
|
{
|
|
"epoch": 0.7779578606158833,
|
|
"grad_norm": 0.7841484146071308,
|
|
"learning_rate": 3.9985571733499544e-05,
|
|
"loss": 0.4228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4479774832725525,
|
|
"step": 480,
|
|
"valid_targets_mean": 2472.2,
|
|
"valid_targets_min": 1320
|
|
},
|
|
{
|
|
"epoch": 0.7860615883306321,
|
|
"grad_norm": 0.700045967636308,
|
|
"learning_rate": 3.998233907590294e-05,
|
|
"loss": 0.4584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44580456614494324,
|
|
"step": 485,
|
|
"valid_targets_mean": 3343.3,
|
|
"valid_targets_min": 1102
|
|
},
|
|
{
|
|
"epoch": 0.7941653160453809,
|
|
"grad_norm": 0.629997635377666,
|
|
"learning_rate": 3.9978780088753464e-05,
|
|
"loss": 0.4529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4441825747489929,
|
|
"step": 490,
|
|
"valid_targets_mean": 3981.2,
|
|
"valid_targets_min": 1551
|
|
},
|
|
{
|
|
"epoch": 0.8022690437601296,
|
|
"grad_norm": 0.6817904477118991,
|
|
"learning_rate": 3.997489483017256e-05,
|
|
"loss": 0.4131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4390064477920532,
|
|
"step": 495,
|
|
"valid_targets_mean": 3689.6,
|
|
"valid_targets_min": 1495
|
|
},
|
|
{
|
|
"epoch": 0.8103727714748784,
|
|
"grad_norm": 0.6185939925084256,
|
|
"learning_rate": 3.997068336361e-05,
|
|
"loss": 0.4502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3633172810077667,
|
|
"step": 500,
|
|
"valid_targets_mean": 4270.8,
|
|
"valid_targets_min": 858
|
|
},
|
|
{
|
|
"epoch": 0.8184764991896273,
|
|
"grad_norm": 0.879292203617941,
|
|
"learning_rate": 3.996614575784282e-05,
|
|
"loss": 0.4163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4245051145553589,
|
|
"step": 505,
|
|
"valid_targets_mean": 4214.6,
|
|
"valid_targets_min": 1415
|
|
},
|
|
{
|
|
"epoch": 0.826580226904376,
|
|
"grad_norm": 0.7885150453655395,
|
|
"learning_rate": 3.99612820869742e-05,
|
|
"loss": 0.4717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4266160726547241,
|
|
"step": 510,
|
|
"valid_targets_mean": 2683.9,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 0.8346839546191248,
|
|
"grad_norm": 0.6732147232946274,
|
|
"learning_rate": 3.9956092430432254e-05,
|
|
"loss": 0.437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4329238533973694,
|
|
"step": 515,
|
|
"valid_targets_mean": 3728.2,
|
|
"valid_targets_min": 1153
|
|
},
|
|
{
|
|
"epoch": 0.8427876823338736,
|
|
"grad_norm": 0.8663697967068255,
|
|
"learning_rate": 3.9950576872968735e-05,
|
|
"loss": 0.4436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44332367181777954,
|
|
"step": 520,
|
|
"valid_targets_mean": 4120.1,
|
|
"valid_targets_min": 1120
|
|
},
|
|
{
|
|
"epoch": 0.8508914100486223,
|
|
"grad_norm": 0.6675209752863196,
|
|
"learning_rate": 3.994473550465765e-05,
|
|
"loss": 0.4373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42500463128089905,
|
|
"step": 525,
|
|
"valid_targets_mean": 3894.8,
|
|
"valid_targets_min": 1595
|
|
},
|
|
{
|
|
"epoch": 0.8589951377633711,
|
|
"grad_norm": 1.156331824829645,
|
|
"learning_rate": 3.993856842089379e-05,
|
|
"loss": 0.4993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4912262558937073,
|
|
"step": 530,
|
|
"valid_targets_mean": 2224.8,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 0.86709886547812,
|
|
"grad_norm": 0.594392464554485,
|
|
"learning_rate": 3.99320757223912e-05,
|
|
"loss": 0.4307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4009823203086853,
|
|
"step": 535,
|
|
"valid_targets_mean": 4163.1,
|
|
"valid_targets_min": 1266
|
|
},
|
|
{
|
|
"epoch": 0.8752025931928687,
|
|
"grad_norm": 0.5042294566986941,
|
|
"learning_rate": 3.992525751518144e-05,
|
|
"loss": 0.4034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3967186212539673,
|
|
"step": 540,
|
|
"valid_targets_mean": 6045.8,
|
|
"valid_targets_min": 1249
|
|
},
|
|
{
|
|
"epoch": 0.8833063209076175,
|
|
"grad_norm": 0.7508835619309017,
|
|
"learning_rate": 3.991811391061199e-05,
|
|
"loss": 0.394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41827213764190674,
|
|
"step": 545,
|
|
"valid_targets_mean": 3118.6,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 0.8914100486223663,
|
|
"grad_norm": 2.109010732539915,
|
|
"learning_rate": 3.9910645025344324e-05,
|
|
"loss": 0.4496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4581921696662903,
|
|
"step": 550,
|
|
"valid_targets_mean": 4448.7,
|
|
"valid_targets_min": 1252
|
|
},
|
|
{
|
|
"epoch": 0.899513776337115,
|
|
"grad_norm": 0.505861640073071,
|
|
"learning_rate": 3.990285098135204e-05,
|
|
"loss": 0.4245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4023759365081787,
|
|
"step": 555,
|
|
"valid_targets_mean": 5656.4,
|
|
"valid_targets_min": 1084
|
|
},
|
|
{
|
|
"epoch": 0.9076175040518638,
|
|
"grad_norm": 0.7084642299995411,
|
|
"learning_rate": 3.98947319059189e-05,
|
|
"loss": 0.4338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4089127779006958,
|
|
"step": 560,
|
|
"valid_targets_mean": 3820.6,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 0.9157212317666127,
|
|
"grad_norm": 0.7838903874181324,
|
|
"learning_rate": 3.988628793163669e-05,
|
|
"loss": 0.4763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47754400968551636,
|
|
"step": 565,
|
|
"valid_targets_mean": 3355.3,
|
|
"valid_targets_min": 1983
|
|
},
|
|
{
|
|
"epoch": 0.9238249594813615,
|
|
"grad_norm": 0.7697692626341919,
|
|
"learning_rate": 3.987751919640309e-05,
|
|
"loss": 0.4241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41041046380996704,
|
|
"step": 570,
|
|
"valid_targets_mean": 3666.2,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 0.9319286871961102,
|
|
"grad_norm": 0.5892131127706576,
|
|
"learning_rate": 3.9868425843419445e-05,
|
|
"loss": 0.4032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4179239571094513,
|
|
"step": 575,
|
|
"valid_targets_mean": 4579.3,
|
|
"valid_targets_min": 1043
|
|
},
|
|
{
|
|
"epoch": 0.940032414910859,
|
|
"grad_norm": 0.5400962924292961,
|
|
"learning_rate": 3.985900802118836e-05,
|
|
"loss": 0.4318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4245756268501282,
|
|
"step": 580,
|
|
"valid_targets_mean": 5453.4,
|
|
"valid_targets_min": 1178
|
|
},
|
|
{
|
|
"epoch": 0.9481361426256077,
|
|
"grad_norm": 0.6919745305835788,
|
|
"learning_rate": 3.984926588351135e-05,
|
|
"loss": 0.4518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45021432638168335,
|
|
"step": 585,
|
|
"valid_targets_mean": 3102.4,
|
|
"valid_targets_min": 296
|
|
},
|
|
{
|
|
"epoch": 0.9562398703403565,
|
|
"grad_norm": 0.6591393873889547,
|
|
"learning_rate": 3.983919958948627e-05,
|
|
"loss": 0.4142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42304936051368713,
|
|
"step": 590,
|
|
"valid_targets_mean": 3353.1,
|
|
"valid_targets_min": 1418
|
|
},
|
|
{
|
|
"epoch": 0.9643435980551054,
|
|
"grad_norm": 0.5632951141018055,
|
|
"learning_rate": 3.982880930350474e-05,
|
|
"loss": 0.4174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4031622111797333,
|
|
"step": 595,
|
|
"valid_targets_mean": 4799.3,
|
|
"valid_targets_min": 1247
|
|
},
|
|
{
|
|
"epoch": 0.9724473257698542,
|
|
"grad_norm": 0.5232136419625988,
|
|
"learning_rate": 3.981809519524948e-05,
|
|
"loss": 0.4119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3731626272201538,
|
|
"step": 600,
|
|
"valid_targets_mean": 4803.6,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 0.9805510534846029,
|
|
"grad_norm": 0.6326091273919606,
|
|
"learning_rate": 3.98070574396915e-05,
|
|
"loss": 0.431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42506226897239685,
|
|
"step": 605,
|
|
"valid_targets_mean": 4227.1,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 0.9886547811993517,
|
|
"grad_norm": 0.6045895230930198,
|
|
"learning_rate": 3.979569621708727e-05,
|
|
"loss": 0.4239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5133014917373657,
|
|
"step": 610,
|
|
"valid_targets_mean": 4420.0,
|
|
"valid_targets_min": 1620
|
|
},
|
|
{
|
|
"epoch": 0.9967585089141004,
|
|
"grad_norm": 0.7123425243387163,
|
|
"learning_rate": 3.978401171297576e-05,
|
|
"loss": 0.4082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4153461456298828,
|
|
"step": 615,
|
|
"valid_targets_mean": 2826.6,
|
|
"valid_targets_min": 1183
|
|
},
|
|
{
|
|
"epoch": 1.0048622366288493,
|
|
"grad_norm": 0.539167883549107,
|
|
"learning_rate": 3.9772004118175424e-05,
|
|
"loss": 0.4051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4070894122123718,
|
|
"step": 620,
|
|
"valid_targets_mean": 4499.4,
|
|
"valid_targets_min": 1437
|
|
},
|
|
{
|
|
"epoch": 1.012965964343598,
|
|
"grad_norm": 0.6626488535519265,
|
|
"learning_rate": 3.975967362878106e-05,
|
|
"loss": 0.4298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5363422632217407,
|
|
"step": 625,
|
|
"valid_targets_mean": 4372.2,
|
|
"valid_targets_min": 1387
|
|
},
|
|
{
|
|
"epoch": 1.0210696920583469,
|
|
"grad_norm": 0.7049923386137342,
|
|
"learning_rate": 3.9747020446160665e-05,
|
|
"loss": 0.3794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.411579430103302,
|
|
"step": 630,
|
|
"valid_targets_mean": 3333.8,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 1.0291734197730957,
|
|
"grad_norm": 0.6429529234872674,
|
|
"learning_rate": 3.973404477695207e-05,
|
|
"loss": 0.3909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4016868472099304,
|
|
"step": 635,
|
|
"valid_targets_mean": 3914.5,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 1.0372771474878444,
|
|
"grad_norm": 0.8430734597321315,
|
|
"learning_rate": 3.972074683305961e-05,
|
|
"loss": 0.3793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4546150863170624,
|
|
"step": 640,
|
|
"valid_targets_mean": 3274.8,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 1.0453808752025933,
|
|
"grad_norm": 0.691765509979604,
|
|
"learning_rate": 3.970712683165066e-05,
|
|
"loss": 0.3941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3804226815700531,
|
|
"step": 645,
|
|
"valid_targets_mean": 3015.3,
|
|
"valid_targets_min": 1176
|
|
},
|
|
{
|
|
"epoch": 1.053484602917342,
|
|
"grad_norm": 0.7121115958430826,
|
|
"learning_rate": 3.969318499515208e-05,
|
|
"loss": 0.387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4142545163631439,
|
|
"step": 650,
|
|
"valid_targets_mean": 3504.2,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 1.0615883306320908,
|
|
"grad_norm": 0.5999484561204884,
|
|
"learning_rate": 3.967892155124659e-05,
|
|
"loss": 0.4022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3854263424873352,
|
|
"step": 655,
|
|
"valid_targets_mean": 4220.0,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 1.0696920583468394,
|
|
"grad_norm": 0.5713234178165169,
|
|
"learning_rate": 3.9664336732869044e-05,
|
|
"loss": 0.3996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3343764841556549,
|
|
"step": 660,
|
|
"valid_targets_mean": 3993.2,
|
|
"valid_targets_min": 1225
|
|
},
|
|
{
|
|
"epoch": 1.0777957860615883,
|
|
"grad_norm": 0.7329951448315434,
|
|
"learning_rate": 3.964943077820263e-05,
|
|
"loss": 0.4483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44141700863838196,
|
|
"step": 665,
|
|
"valid_targets_mean": 3044.4,
|
|
"valid_targets_min": 1388
|
|
},
|
|
{
|
|
"epoch": 1.0858995137763372,
|
|
"grad_norm": 0.7502131949551032,
|
|
"learning_rate": 3.963420393067499e-05,
|
|
"loss": 0.436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4431774616241455,
|
|
"step": 670,
|
|
"valid_targets_mean": 2909.8,
|
|
"valid_targets_min": 1386
|
|
},
|
|
{
|
|
"epoch": 1.0940032414910859,
|
|
"grad_norm": 0.5182332836674972,
|
|
"learning_rate": 3.961865643895422e-05,
|
|
"loss": 0.4116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2863534986972809,
|
|
"step": 675,
|
|
"valid_targets_mean": 4810.2,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 1.1021069692058347,
|
|
"grad_norm": 0.7191407703553409,
|
|
"learning_rate": 3.960278855694484e-05,
|
|
"loss": 0.3964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42751169204711914,
|
|
"step": 680,
|
|
"valid_targets_mean": 5616.1,
|
|
"valid_targets_min": 1414
|
|
},
|
|
{
|
|
"epoch": 1.1102106969205834,
|
|
"grad_norm": 0.5066897158392785,
|
|
"learning_rate": 3.958660054378361e-05,
|
|
"loss": 0.4463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33914482593536377,
|
|
"step": 685,
|
|
"valid_targets_mean": 5099.1,
|
|
"valid_targets_min": 1419
|
|
},
|
|
{
|
|
"epoch": 1.1183144246353323,
|
|
"grad_norm": 0.5236343651746419,
|
|
"learning_rate": 3.9570092663835334e-05,
|
|
"loss": 0.415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3773620128631592,
|
|
"step": 690,
|
|
"valid_targets_mean": 4685.4,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 1.1264181523500811,
|
|
"grad_norm": 0.7039460112163713,
|
|
"learning_rate": 3.9553265186688525e-05,
|
|
"loss": 0.3993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44046705961227417,
|
|
"step": 695,
|
|
"valid_targets_mean": 3954.5,
|
|
"valid_targets_min": 1168
|
|
},
|
|
{
|
|
"epoch": 1.1345218800648298,
|
|
"grad_norm": 0.6654606056629773,
|
|
"learning_rate": 3.953611838715101e-05,
|
|
"loss": 0.4225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4078805446624756,
|
|
"step": 700,
|
|
"valid_targets_mean": 3752.2,
|
|
"valid_targets_min": 1312
|
|
},
|
|
{
|
|
"epoch": 1.1426256077795787,
|
|
"grad_norm": 0.6514465887784622,
|
|
"learning_rate": 3.951865254524542e-05,
|
|
"loss": 0.4026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3984151780605316,
|
|
"step": 705,
|
|
"valid_targets_mean": 3862.8,
|
|
"valid_targets_min": 1563
|
|
},
|
|
{
|
|
"epoch": 1.1507293354943273,
|
|
"grad_norm": 0.6471325396020866,
|
|
"learning_rate": 3.950086794620467e-05,
|
|
"loss": 0.4328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43052127957344055,
|
|
"step": 710,
|
|
"valid_targets_mean": 3554.1,
|
|
"valid_targets_min": 898
|
|
},
|
|
{
|
|
"epoch": 1.1588330632090762,
|
|
"grad_norm": 0.5873275500250088,
|
|
"learning_rate": 3.948276488046723e-05,
|
|
"loss": 0.396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3730786144733429,
|
|
"step": 715,
|
|
"valid_targets_mean": 3719.2,
|
|
"valid_targets_min": 1152
|
|
},
|
|
{
|
|
"epoch": 1.1669367909238249,
|
|
"grad_norm": 0.467522004257344,
|
|
"learning_rate": 3.946434364367243e-05,
|
|
"loss": 0.3867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3182247281074524,
|
|
"step": 720,
|
|
"valid_targets_mean": 5996.4,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 1.1750405186385737,
|
|
"grad_norm": 0.6791400275737529,
|
|
"learning_rate": 3.9445604536655625e-05,
|
|
"loss": 0.4678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5305315256118774,
|
|
"step": 725,
|
|
"valid_targets_mean": 5634.3,
|
|
"valid_targets_min": 1711
|
|
},
|
|
{
|
|
"epoch": 1.1831442463533226,
|
|
"grad_norm": 0.5174903212149605,
|
|
"learning_rate": 3.9426547865443264e-05,
|
|
"loss": 0.4005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33829182386398315,
|
|
"step": 730,
|
|
"valid_targets_mean": 4885.6,
|
|
"valid_targets_min": 995
|
|
},
|
|
{
|
|
"epoch": 1.1912479740680713,
|
|
"grad_norm": 0.7597194711385206,
|
|
"learning_rate": 3.940717394124792e-05,
|
|
"loss": 0.4487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44391706585884094,
|
|
"step": 735,
|
|
"valid_targets_mean": 2776.8,
|
|
"valid_targets_min": 1116
|
|
},
|
|
{
|
|
"epoch": 1.1993517017828201,
|
|
"grad_norm": 0.7881689255335056,
|
|
"learning_rate": 3.938748308046318e-05,
|
|
"loss": 0.4106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44584935903549194,
|
|
"step": 740,
|
|
"valid_targets_mean": 3193.6,
|
|
"valid_targets_min": 1003
|
|
},
|
|
{
|
|
"epoch": 1.2074554294975688,
|
|
"grad_norm": 0.8381756076442425,
|
|
"learning_rate": 3.9367475604658485e-05,
|
|
"loss": 0.4037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4248051643371582,
|
|
"step": 745,
|
|
"valid_targets_mean": 2367.8,
|
|
"valid_targets_min": 1159
|
|
},
|
|
{
|
|
"epoch": 1.2155591572123177,
|
|
"grad_norm": 0.7159039861695103,
|
|
"learning_rate": 3.9347151840573915e-05,
|
|
"loss": 0.4161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47999000549316406,
|
|
"step": 750,
|
|
"valid_targets_mean": 3596.5,
|
|
"valid_targets_min": 1075
|
|
},
|
|
{
|
|
"epoch": 1.2236628849270665,
|
|
"grad_norm": 0.6398486566542297,
|
|
"learning_rate": 3.932651212011479e-05,
|
|
"loss": 0.4168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5003429055213928,
|
|
"step": 755,
|
|
"valid_targets_mean": 5000.0,
|
|
"valid_targets_min": 358
|
|
},
|
|
{
|
|
"epoch": 1.2317666126418152,
|
|
"grad_norm": 0.624969106257508,
|
|
"learning_rate": 3.930555678034628e-05,
|
|
"loss": 0.4071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4231489598751068,
|
|
"step": 760,
|
|
"valid_targets_mean": 4428.4,
|
|
"valid_targets_min": 1694
|
|
},
|
|
{
|
|
"epoch": 1.239870340356564,
|
|
"grad_norm": 0.7089448788566617,
|
|
"learning_rate": 3.9284286163487924e-05,
|
|
"loss": 0.4081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4825202226638794,
|
|
"step": 765,
|
|
"valid_targets_mean": 3411.8,
|
|
"valid_targets_min": 1277
|
|
},
|
|
{
|
|
"epoch": 1.2479740680713127,
|
|
"grad_norm": 0.759421400318165,
|
|
"learning_rate": 3.9262700616908006e-05,
|
|
"loss": 0.4374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4094354510307312,
|
|
"step": 770,
|
|
"valid_targets_mean": 2685.1,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 1.2560777957860616,
|
|
"grad_norm": 0.5529348501565219,
|
|
"learning_rate": 3.924080049311791e-05,
|
|
"loss": 0.3975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37100690603256226,
|
|
"step": 775,
|
|
"valid_targets_mean": 4895.1,
|
|
"valid_targets_min": 1178
|
|
},
|
|
{
|
|
"epoch": 1.2641815235008105,
|
|
"grad_norm": 0.619539071217271,
|
|
"learning_rate": 3.921858614976632e-05,
|
|
"loss": 0.4127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4086681604385376,
|
|
"step": 780,
|
|
"valid_targets_mean": 3785.5,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 1.2722852512155591,
|
|
"grad_norm": 0.5238357820823653,
|
|
"learning_rate": 3.919605794963343e-05,
|
|
"loss": 0.4185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39525216817855835,
|
|
"step": 785,
|
|
"valid_targets_mean": 5541.9,
|
|
"valid_targets_min": 1276
|
|
},
|
|
{
|
|
"epoch": 1.280388978930308,
|
|
"grad_norm": 0.5092409315061598,
|
|
"learning_rate": 3.9173216260624994e-05,
|
|
"loss": 0.4171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4608556628227234,
|
|
"step": 790,
|
|
"valid_targets_mean": 6425.1,
|
|
"valid_targets_min": 2062
|
|
},
|
|
{
|
|
"epoch": 1.2884927066450567,
|
|
"grad_norm": 0.6410692988138462,
|
|
"learning_rate": 3.915006145576632e-05,
|
|
"loss": 0.3949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.344308078289032,
|
|
"step": 795,
|
|
"valid_targets_mean": 3473.1,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 1.2965964343598055,
|
|
"grad_norm": 0.7056554788066899,
|
|
"learning_rate": 3.912659391319617e-05,
|
|
"loss": 0.4397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.483992338180542,
|
|
"step": 800,
|
|
"valid_targets_mean": 3135.4,
|
|
"valid_targets_min": 1593
|
|
},
|
|
{
|
|
"epoch": 1.3047001620745542,
|
|
"grad_norm": 0.4873176150043294,
|
|
"learning_rate": 3.910281401616061e-05,
|
|
"loss": 0.3886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34384751319885254,
|
|
"step": 805,
|
|
"valid_targets_mean": 5550.6,
|
|
"valid_targets_min": 1641
|
|
},
|
|
{
|
|
"epoch": 1.312803889789303,
|
|
"grad_norm": 0.6927361702376568,
|
|
"learning_rate": 3.907872215300672e-05,
|
|
"loss": 0.3893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5015307664871216,
|
|
"step": 810,
|
|
"valid_targets_mean": 4136.8,
|
|
"valid_targets_min": 1319
|
|
},
|
|
{
|
|
"epoch": 1.320907617504052,
|
|
"grad_norm": 0.7312277221405057,
|
|
"learning_rate": 3.905431871717628e-05,
|
|
"loss": 0.423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3949721157550812,
|
|
"step": 815,
|
|
"valid_targets_mean": 3217.5,
|
|
"valid_targets_min": 995
|
|
},
|
|
{
|
|
"epoch": 1.3290113452188006,
|
|
"grad_norm": 0.6697071515516008,
|
|
"learning_rate": 3.902960410719931e-05,
|
|
"loss": 0.3975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37480708956718445,
|
|
"step": 820,
|
|
"valid_targets_mean": 4123.7,
|
|
"valid_targets_min": 1396
|
|
},
|
|
{
|
|
"epoch": 1.3371150729335495,
|
|
"grad_norm": 0.672036926562655,
|
|
"learning_rate": 3.900457872668763e-05,
|
|
"loss": 0.4257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42396080493927,
|
|
"step": 825,
|
|
"valid_targets_mean": 3633.0,
|
|
"valid_targets_min": 1318
|
|
},
|
|
{
|
|
"epoch": 1.3452188006482984,
|
|
"grad_norm": 0.7642734408208844,
|
|
"learning_rate": 3.897924298432816e-05,
|
|
"loss": 0.4193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41950348019599915,
|
|
"step": 830,
|
|
"valid_targets_mean": 3189.9,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 1.353322528363047,
|
|
"grad_norm": 0.6463330876952285,
|
|
"learning_rate": 3.895359729387636e-05,
|
|
"loss": 0.4157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39035725593566895,
|
|
"step": 835,
|
|
"valid_targets_mean": 3329.4,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 1.3614262560777957,
|
|
"grad_norm": 0.6294863371097716,
|
|
"learning_rate": 3.892764207414939e-05,
|
|
"loss": 0.3883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3504693806171417,
|
|
"step": 840,
|
|
"valid_targets_mean": 3639.1,
|
|
"valid_targets_min": 1118
|
|
},
|
|
{
|
|
"epoch": 1.3695299837925445,
|
|
"grad_norm": 0.7969453453498634,
|
|
"learning_rate": 3.890137774901931e-05,
|
|
"loss": 0.4373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46982237696647644,
|
|
"step": 845,
|
|
"valid_targets_mean": 3004.3,
|
|
"valid_targets_min": 1046
|
|
},
|
|
{
|
|
"epoch": 1.3776337115072934,
|
|
"grad_norm": 0.721881420599743,
|
|
"learning_rate": 3.887480474740616e-05,
|
|
"loss": 0.4039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4777338206768036,
|
|
"step": 850,
|
|
"valid_targets_mean": 3069.9,
|
|
"valid_targets_min": 1533
|
|
},
|
|
{
|
|
"epoch": 1.385737439222042,
|
|
"grad_norm": 0.6847253726579977,
|
|
"learning_rate": 3.8847923503270915e-05,
|
|
"loss": 0.4118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3960109353065491,
|
|
"step": 855,
|
|
"valid_targets_mean": 3656.4,
|
|
"valid_targets_min": 1090
|
|
},
|
|
{
|
|
"epoch": 1.393841166936791,
|
|
"grad_norm": 0.6216929584617436,
|
|
"learning_rate": 3.8820734455608464e-05,
|
|
"loss": 0.4082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4312170147895813,
|
|
"step": 860,
|
|
"valid_targets_mean": 4023.9,
|
|
"valid_targets_min": 954
|
|
},
|
|
{
|
|
"epoch": 1.4019448946515398,
|
|
"grad_norm": 0.5892956812304465,
|
|
"learning_rate": 3.879323804844038e-05,
|
|
"loss": 0.3918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3535316586494446,
|
|
"step": 865,
|
|
"valid_targets_mean": 4127.8,
|
|
"valid_targets_min": 1471
|
|
},
|
|
{
|
|
"epoch": 1.4100486223662885,
|
|
"grad_norm": 0.8082279554048201,
|
|
"learning_rate": 3.876543473080771e-05,
|
|
"loss": 0.4261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4516857862472534,
|
|
"step": 870,
|
|
"valid_targets_mean": 3422.7,
|
|
"valid_targets_min": 1390
|
|
},
|
|
{
|
|
"epoch": 1.4181523500810373,
|
|
"grad_norm": 0.48789635530901226,
|
|
"learning_rate": 3.87373249567636e-05,
|
|
"loss": 0.4178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3681475818157196,
|
|
"step": 875,
|
|
"valid_targets_mean": 5620.6,
|
|
"valid_targets_min": 943
|
|
},
|
|
{
|
|
"epoch": 1.426256077795786,
|
|
"grad_norm": 0.6612612675219925,
|
|
"learning_rate": 3.870890918536592e-05,
|
|
"loss": 0.4047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38521233201026917,
|
|
"step": 880,
|
|
"valid_targets_mean": 3554.9,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 1.4343598055105349,
|
|
"grad_norm": 0.6937842376936233,
|
|
"learning_rate": 3.8680187880669765e-05,
|
|
"loss": 0.4346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4324935972690582,
|
|
"step": 885,
|
|
"valid_targets_mean": 3092.2,
|
|
"valid_targets_min": 1330
|
|
},
|
|
{
|
|
"epoch": 1.4424635332252835,
|
|
"grad_norm": 0.6157750782438423,
|
|
"learning_rate": 3.865116151171983e-05,
|
|
"loss": 0.4053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40046823024749756,
|
|
"step": 890,
|
|
"valid_targets_mean": 3759.9,
|
|
"valid_targets_min": 1438
|
|
},
|
|
{
|
|
"epoch": 1.4505672609400324,
|
|
"grad_norm": 0.7167528050751656,
|
|
"learning_rate": 3.8621830552542826e-05,
|
|
"loss": 0.4149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5026776194572449,
|
|
"step": 895,
|
|
"valid_targets_mean": 3752.6,
|
|
"valid_targets_min": 1342
|
|
},
|
|
{
|
|
"epoch": 1.4586709886547813,
|
|
"grad_norm": 0.8431157727162455,
|
|
"learning_rate": 3.859219548213965e-05,
|
|
"loss": 0.3728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35948169231414795,
|
|
"step": 900,
|
|
"valid_targets_mean": 4141.4,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 1.46677471636953,
|
|
"grad_norm": 0.5786833063220671,
|
|
"learning_rate": 3.856225678447764e-05,
|
|
"loss": 0.3812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39766621589660645,
|
|
"step": 905,
|
|
"valid_targets_mean": 4228.1,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 1.4748784440842788,
|
|
"grad_norm": 0.7049559260095754,
|
|
"learning_rate": 3.853201494848263e-05,
|
|
"loss": 0.4019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41903460025787354,
|
|
"step": 910,
|
|
"valid_targets_mean": 2839.4,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 1.4829821717990275,
|
|
"grad_norm": 0.5730202377371769,
|
|
"learning_rate": 3.8501470468030984e-05,
|
|
"loss": 0.4318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40747907757759094,
|
|
"step": 915,
|
|
"valid_targets_mean": 3931.9,
|
|
"valid_targets_min": 1028
|
|
},
|
|
{
|
|
"epoch": 1.4910858995137763,
|
|
"grad_norm": 0.43206946086028963,
|
|
"learning_rate": 3.84706238419415e-05,
|
|
"loss": 0.3631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2669350504875183,
|
|
"step": 920,
|
|
"valid_targets_mean": 5546.7,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 1.499189627228525,
|
|
"grad_norm": 0.6471623110865716,
|
|
"learning_rate": 3.8439475573967315e-05,
|
|
"loss": 0.4107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4179629683494568,
|
|
"step": 925,
|
|
"valid_targets_mean": 4247.6,
|
|
"valid_targets_min": 1447
|
|
},
|
|
{
|
|
"epoch": 1.5072933549432739,
|
|
"grad_norm": 0.6122343718916604,
|
|
"learning_rate": 3.840802617278763e-05,
|
|
"loss": 0.4017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4195345640182495,
|
|
"step": 930,
|
|
"valid_targets_mean": 3743.9,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 1.5153970826580228,
|
|
"grad_norm": 0.5128272228930184,
|
|
"learning_rate": 3.837627615199942e-05,
|
|
"loss": 0.3743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3458319306373596,
|
|
"step": 935,
|
|
"valid_targets_mean": 4861.8,
|
|
"valid_targets_min": 1247
|
|
},
|
|
{
|
|
"epoch": 1.5235008103727714,
|
|
"grad_norm": 0.5372139985938604,
|
|
"learning_rate": 3.834422603010906e-05,
|
|
"loss": 0.3887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34437286853790283,
|
|
"step": 940,
|
|
"valid_targets_mean": 5274.9,
|
|
"valid_targets_min": 1164
|
|
},
|
|
{
|
|
"epoch": 1.5316045380875203,
|
|
"grad_norm": 0.7168389521153298,
|
|
"learning_rate": 3.831187633052384e-05,
|
|
"loss": 0.4173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5015881657600403,
|
|
"step": 945,
|
|
"valid_targets_mean": 4582.9,
|
|
"valid_targets_min": 1236
|
|
},
|
|
{
|
|
"epoch": 1.5397082658022692,
|
|
"grad_norm": 0.5084505658863171,
|
|
"learning_rate": 3.8279227581543424e-05,
|
|
"loss": 0.4032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3953602612018585,
|
|
"step": 950,
|
|
"valid_targets_mean": 4978.7,
|
|
"valid_targets_min": 1067
|
|
},
|
|
{
|
|
"epoch": 1.5478119935170178,
|
|
"grad_norm": 0.5907435331885859,
|
|
"learning_rate": 3.824628031635122e-05,
|
|
"loss": 0.4198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40457284450531006,
|
|
"step": 955,
|
|
"valid_targets_mean": 3979.6,
|
|
"valid_targets_min": 1202
|
|
},
|
|
{
|
|
"epoch": 1.5559157212317665,
|
|
"grad_norm": 0.6571353061058341,
|
|
"learning_rate": 3.8213035073005665e-05,
|
|
"loss": 0.4104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3425450325012207,
|
|
"step": 960,
|
|
"valid_targets_mean": 4567.8,
|
|
"valid_targets_min": 1203
|
|
},
|
|
{
|
|
"epoch": 1.5640194489465153,
|
|
"grad_norm": 0.679939959714484,
|
|
"learning_rate": 3.817949239443147e-05,
|
|
"loss": 0.3983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37692564725875854,
|
|
"step": 965,
|
|
"valid_targets_mean": 2661.4,
|
|
"valid_targets_min": 1105
|
|
},
|
|
{
|
|
"epoch": 1.5721231766612642,
|
|
"grad_norm": 0.530755056682054,
|
|
"learning_rate": 3.814565282841071e-05,
|
|
"loss": 0.4017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42367827892303467,
|
|
"step": 970,
|
|
"valid_targets_mean": 5225.1,
|
|
"valid_targets_min": 828
|
|
},
|
|
{
|
|
"epoch": 1.5802269043760129,
|
|
"grad_norm": 0.7466237984825811,
|
|
"learning_rate": 3.81115169275739e-05,
|
|
"loss": 0.4176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5087313055992126,
|
|
"step": 975,
|
|
"valid_targets_mean": 2849.9,
|
|
"valid_targets_min": 1079
|
|
},
|
|
{
|
|
"epoch": 1.5883306320907618,
|
|
"grad_norm": 0.5840496257255798,
|
|
"learning_rate": 3.8077085249390997e-05,
|
|
"loss": 0.385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3712337613105774,
|
|
"step": 980,
|
|
"valid_targets_mean": 3896.5,
|
|
"valid_targets_min": 1698
|
|
},
|
|
{
|
|
"epoch": 1.5964343598055106,
|
|
"grad_norm": 0.677800468030808,
|
|
"learning_rate": 3.8042358356162215e-05,
|
|
"loss": 0.4315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4169023036956787,
|
|
"step": 985,
|
|
"valid_targets_mean": 3102.1,
|
|
"valid_targets_min": 863
|
|
},
|
|
{
|
|
"epoch": 1.6045380875202593,
|
|
"grad_norm": 0.5126684483285767,
|
|
"learning_rate": 3.8007336815008945e-05,
|
|
"loss": 0.4127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32481974363327026,
|
|
"step": 990,
|
|
"valid_targets_mean": 4557.1,
|
|
"valid_targets_min": 974
|
|
},
|
|
{
|
|
"epoch": 1.6126418152350082,
|
|
"grad_norm": 0.5972646097264587,
|
|
"learning_rate": 3.797202119786442e-05,
|
|
"loss": 0.4141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3950786590576172,
|
|
"step": 995,
|
|
"valid_targets_mean": 4294.4,
|
|
"valid_targets_min": 1126
|
|
},
|
|
{
|
|
"epoch": 1.620745542949757,
|
|
"grad_norm": 0.5937224848479558,
|
|
"learning_rate": 3.79364120814644e-05,
|
|
"loss": 0.4127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3952077031135559,
|
|
"step": 1000,
|
|
"valid_targets_mean": 3905.6,
|
|
"valid_targets_min": 1188
|
|
},
|
|
{
|
|
"epoch": 1.6288492706645057,
|
|
"grad_norm": 0.6321038959683297,
|
|
"learning_rate": 3.790051004733775e-05,
|
|
"loss": 0.4201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42207908630371094,
|
|
"step": 1005,
|
|
"valid_targets_mean": 3878.1,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 1.6369529983792543,
|
|
"grad_norm": 1.1186970348539031,
|
|
"learning_rate": 3.786431568179697e-05,
|
|
"loss": 0.3937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3650881052017212,
|
|
"step": 1010,
|
|
"valid_targets_mean": 5200.1,
|
|
"valid_targets_min": 2239
|
|
},
|
|
{
|
|
"epoch": 1.6450567260940032,
|
|
"grad_norm": 0.5774237494154663,
|
|
"learning_rate": 3.7827829575928536e-05,
|
|
"loss": 0.3736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3719528913497925,
|
|
"step": 1015,
|
|
"valid_targets_mean": 4792.2,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 1.653160453808752,
|
|
"grad_norm": 0.6601144475343061,
|
|
"learning_rate": 3.779105232558337e-05,
|
|
"loss": 0.4043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4179123044013977,
|
|
"step": 1020,
|
|
"valid_targets_mean": 3453.9,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 1.6612641815235007,
|
|
"grad_norm": 0.503079426746597,
|
|
"learning_rate": 3.775398453136701e-05,
|
|
"loss": 0.3738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31408727169036865,
|
|
"step": 1025,
|
|
"valid_targets_mean": 5430.4,
|
|
"valid_targets_min": 922
|
|
},
|
|
{
|
|
"epoch": 1.6693679092382496,
|
|
"grad_norm": 0.7753835008841552,
|
|
"learning_rate": 3.771662679862984e-05,
|
|
"loss": 0.3767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39880800247192383,
|
|
"step": 1030,
|
|
"valid_targets_mean": 2442.4,
|
|
"valid_targets_min": 1370
|
|
},
|
|
{
|
|
"epoch": 1.6774716369529985,
|
|
"grad_norm": 0.6991030543306379,
|
|
"learning_rate": 3.767897973745721e-05,
|
|
"loss": 0.4159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44633948802948,
|
|
"step": 1035,
|
|
"valid_targets_mean": 3597.4,
|
|
"valid_targets_min": 1057
|
|
},
|
|
{
|
|
"epoch": 1.6855753646677472,
|
|
"grad_norm": 0.7679002110960852,
|
|
"learning_rate": 3.764104396265946e-05,
|
|
"loss": 0.3918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3935224711894989,
|
|
"step": 1040,
|
|
"valid_targets_mean": 3113.6,
|
|
"valid_targets_min": 1097
|
|
},
|
|
{
|
|
"epoch": 1.6936790923824958,
|
|
"grad_norm": 0.48586847500539443,
|
|
"learning_rate": 3.760282009376187e-05,
|
|
"loss": 0.3647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3813055157661438,
|
|
"step": 1045,
|
|
"valid_targets_mean": 4965.1,
|
|
"valid_targets_min": 1090
|
|
},
|
|
{
|
|
"epoch": 1.7017828200972447,
|
|
"grad_norm": 0.9069360496102079,
|
|
"learning_rate": 3.756430875499458e-05,
|
|
"loss": 0.4018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44932669401168823,
|
|
"step": 1050,
|
|
"valid_targets_mean": 2457.4,
|
|
"valid_targets_min": 1025
|
|
},
|
|
{
|
|
"epoch": 1.7098865478119936,
|
|
"grad_norm": 0.5147049425616307,
|
|
"learning_rate": 3.7525510575282354e-05,
|
|
"loss": 0.4147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3647802174091339,
|
|
"step": 1055,
|
|
"valid_targets_mean": 5582.6,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 1.7179902755267422,
|
|
"grad_norm": 0.5324879798136473,
|
|
"learning_rate": 3.748642618823432e-05,
|
|
"loss": 0.3832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3285607099533081,
|
|
"step": 1060,
|
|
"valid_targets_mean": 5071.8,
|
|
"valid_targets_min": 1431
|
|
},
|
|
{
|
|
"epoch": 1.726094003241491,
|
|
"grad_norm": 0.647787323939788,
|
|
"learning_rate": 3.744705623213365e-05,
|
|
"loss": 0.3851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4008070230484009,
|
|
"step": 1065,
|
|
"valid_targets_mean": 3614.9,
|
|
"valid_targets_min": 1114
|
|
},
|
|
{
|
|
"epoch": 1.73419773095624,
|
|
"grad_norm": 0.6619832349327134,
|
|
"learning_rate": 3.74074013499271e-05,
|
|
"loss": 0.4134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42244502902030945,
|
|
"step": 1070,
|
|
"valid_targets_mean": 3305.0,
|
|
"valid_targets_min": 1453
|
|
},
|
|
{
|
|
"epoch": 1.7423014586709886,
|
|
"grad_norm": 0.6542150803144281,
|
|
"learning_rate": 3.736746218921453e-05,
|
|
"loss": 0.4391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42766737937927246,
|
|
"step": 1075,
|
|
"valid_targets_mean": 3408.0,
|
|
"valid_targets_min": 1420
|
|
},
|
|
{
|
|
"epoch": 1.7504051863857373,
|
|
"grad_norm": 0.7258838869557971,
|
|
"learning_rate": 3.732723940223832e-05,
|
|
"loss": 0.3782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39612990617752075,
|
|
"step": 1080,
|
|
"valid_targets_mean": 2531.2,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 1.7585089141004864,
|
|
"grad_norm": 0.5910566163663715,
|
|
"learning_rate": 3.7286733645872726e-05,
|
|
"loss": 0.3818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4122849404811859,
|
|
"step": 1085,
|
|
"valid_targets_mean": 4625.3,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 1.766612641815235,
|
|
"grad_norm": 0.6433905775726179,
|
|
"learning_rate": 3.724594558161315e-05,
|
|
"loss": 0.4116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4537995755672455,
|
|
"step": 1090,
|
|
"valid_targets_mean": 3713.8,
|
|
"valid_targets_min": 1241
|
|
},
|
|
{
|
|
"epoch": 1.7747163695299837,
|
|
"grad_norm": 0.726822537416244,
|
|
"learning_rate": 3.720487587556534e-05,
|
|
"loss": 0.4445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46592673659324646,
|
|
"step": 1095,
|
|
"valid_targets_mean": 3423.6,
|
|
"valid_targets_min": 1516
|
|
},
|
|
{
|
|
"epoch": 1.7828200972447326,
|
|
"grad_norm": 0.5792115707093757,
|
|
"learning_rate": 3.716352519843448e-05,
|
|
"loss": 0.3869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3367713689804077,
|
|
"step": 1100,
|
|
"valid_targets_mean": 4104.1,
|
|
"valid_targets_min": 1400
|
|
},
|
|
{
|
|
"epoch": 1.7909238249594814,
|
|
"grad_norm": 0.6039687716465919,
|
|
"learning_rate": 3.712189422551431e-05,
|
|
"loss": 0.3674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34517428278923035,
|
|
"step": 1105,
|
|
"valid_targets_mean": 3525.6,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 1.79902755267423,
|
|
"grad_norm": 0.718725524451317,
|
|
"learning_rate": 3.7079983636676005e-05,
|
|
"loss": 0.4015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39871928095817566,
|
|
"step": 1110,
|
|
"valid_targets_mean": 3444.6,
|
|
"valid_targets_min": 1017
|
|
},
|
|
{
|
|
"epoch": 1.807131280388979,
|
|
"grad_norm": 0.6537149777525709,
|
|
"learning_rate": 3.703779411635714e-05,
|
|
"loss": 0.4066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3639248013496399,
|
|
"step": 1115,
|
|
"valid_targets_mean": 3092.6,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 1.8152350081037278,
|
|
"grad_norm": 0.5629855946423489,
|
|
"learning_rate": 3.6995326353550515e-05,
|
|
"loss": 0.4053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3245788514614105,
|
|
"step": 1120,
|
|
"valid_targets_mean": 4038.6,
|
|
"valid_targets_min": 1146
|
|
},
|
|
{
|
|
"epoch": 1.8233387358184765,
|
|
"grad_norm": 0.5734716036860011,
|
|
"learning_rate": 3.695258104179284e-05,
|
|
"loss": 0.3876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39152956008911133,
|
|
"step": 1125,
|
|
"valid_targets_mean": 4071.9,
|
|
"valid_targets_min": 1410
|
|
},
|
|
{
|
|
"epoch": 1.8314424635332252,
|
|
"grad_norm": 0.5854495000674845,
|
|
"learning_rate": 3.690955887915347e-05,
|
|
"loss": 0.4394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4314863085746765,
|
|
"step": 1130,
|
|
"valid_targets_mean": 4155.2,
|
|
"valid_targets_min": 1275
|
|
},
|
|
{
|
|
"epoch": 1.839546191247974,
|
|
"grad_norm": 0.5835121083788867,
|
|
"learning_rate": 3.686626056822298e-05,
|
|
"loss": 0.3728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3812408745288849,
|
|
"step": 1135,
|
|
"valid_targets_mean": 3790.1,
|
|
"valid_targets_min": 1133
|
|
},
|
|
{
|
|
"epoch": 1.847649918962723,
|
|
"grad_norm": 0.5390903203193496,
|
|
"learning_rate": 3.682268681610169e-05,
|
|
"loss": 0.4031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3393847942352295,
|
|
"step": 1140,
|
|
"valid_targets_mean": 4877.7,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 1.8557536466774716,
|
|
"grad_norm": 0.6666436255178859,
|
|
"learning_rate": 3.677883833438814e-05,
|
|
"loss": 0.38,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3733408451080322,
|
|
"step": 1145,
|
|
"valid_targets_mean": 2816.2,
|
|
"valid_targets_min": 993
|
|
},
|
|
{
|
|
"epoch": 1.8638573743922204,
|
|
"grad_norm": 0.6744209523213708,
|
|
"learning_rate": 3.6734715839167427e-05,
|
|
"loss": 0.3776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41044723987579346,
|
|
"step": 1150,
|
|
"valid_targets_mean": 2894.9,
|
|
"valid_targets_min": 1019
|
|
},
|
|
{
|
|
"epoch": 1.8719611021069693,
|
|
"grad_norm": 0.5776259559281988,
|
|
"learning_rate": 3.669032005099954e-05,
|
|
"loss": 0.3672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31816449761390686,
|
|
"step": 1155,
|
|
"valid_targets_mean": 3865.9,
|
|
"valid_targets_min": 961
|
|
},
|
|
{
|
|
"epoch": 1.880064829821718,
|
|
"grad_norm": 0.5069812438471043,
|
|
"learning_rate": 3.664565169490761e-05,
|
|
"loss": 0.3728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34008556604385376,
|
|
"step": 1160,
|
|
"valid_targets_mean": 5045.1,
|
|
"valid_targets_min": 1271
|
|
},
|
|
{
|
|
"epoch": 1.8881685575364666,
|
|
"grad_norm": 0.6393979867095283,
|
|
"learning_rate": 3.660071150036602e-05,
|
|
"loss": 0.3599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36201924085617065,
|
|
"step": 1165,
|
|
"valid_targets_mean": 3738.8,
|
|
"valid_targets_min": 1312
|
|
},
|
|
{
|
|
"epoch": 1.8962722852512157,
|
|
"grad_norm": 0.6385627539570938,
|
|
"learning_rate": 3.655550020128852e-05,
|
|
"loss": 0.4085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4077168107032776,
|
|
"step": 1170,
|
|
"valid_targets_mean": 3565.6,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 1.9043760129659644,
|
|
"grad_norm": 0.5922135114548367,
|
|
"learning_rate": 3.651001853601627e-05,
|
|
"loss": 0.4176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.405472993850708,
|
|
"step": 1175,
|
|
"valid_targets_mean": 3930.4,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 1.912479740680713,
|
|
"grad_norm": 0.589394070370728,
|
|
"learning_rate": 3.646426724730572e-05,
|
|
"loss": 0.3919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3850012421607971,
|
|
"step": 1180,
|
|
"valid_targets_mean": 3923.1,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 1.920583468395462,
|
|
"grad_norm": 0.4849462911152238,
|
|
"learning_rate": 3.641824708231653e-05,
|
|
"loss": 0.3806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3893633186817169,
|
|
"step": 1185,
|
|
"valid_targets_mean": 5844.6,
|
|
"valid_targets_min": 1425
|
|
},
|
|
{
|
|
"epoch": 1.9286871961102108,
|
|
"grad_norm": 0.4868660520441822,
|
|
"learning_rate": 3.6371958792599346e-05,
|
|
"loss": 0.3971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4328196048736572,
|
|
"step": 1190,
|
|
"valid_targets_mean": 5689.4,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 1.9367909238249594,
|
|
"grad_norm": 0.7830835596596479,
|
|
"learning_rate": 3.632540313408353e-05,
|
|
"loss": 0.3763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37044936418533325,
|
|
"step": 1195,
|
|
"valid_targets_mean": 3209.5,
|
|
"valid_targets_min": 818
|
|
},
|
|
{
|
|
"epoch": 1.9448946515397083,
|
|
"grad_norm": 0.5564325863706703,
|
|
"learning_rate": 3.627858086706483e-05,
|
|
"loss": 0.4226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36431488394737244,
|
|
"step": 1200,
|
|
"valid_targets_mean": 4813.8,
|
|
"valid_targets_min": 1456
|
|
},
|
|
{
|
|
"epoch": 1.9529983792544572,
|
|
"grad_norm": 0.5822242628899343,
|
|
"learning_rate": 3.6231492756192924e-05,
|
|
"loss": 0.3924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37201085686683655,
|
|
"step": 1205,
|
|
"valid_targets_mean": 3925.2,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 1.9611021069692058,
|
|
"grad_norm": 0.5299007245341597,
|
|
"learning_rate": 3.618413957045899e-05,
|
|
"loss": 0.3548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31111475825309753,
|
|
"step": 1210,
|
|
"valid_targets_mean": 4279.1,
|
|
"valid_targets_min": 1016
|
|
},
|
|
{
|
|
"epoch": 1.9692058346839545,
|
|
"grad_norm": 0.7156083555424475,
|
|
"learning_rate": 3.6136522083183096e-05,
|
|
"loss": 0.4136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4434228539466858,
|
|
"step": 1215,
|
|
"valid_targets_mean": 2801.8,
|
|
"valid_targets_min": 1138
|
|
},
|
|
{
|
|
"epoch": 1.9773095623987034,
|
|
"grad_norm": 0.7191486420410436,
|
|
"learning_rate": 3.608864107200159e-05,
|
|
"loss": 0.394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40111202001571655,
|
|
"step": 1220,
|
|
"valid_targets_mean": 3650.6,
|
|
"valid_targets_min": 959
|
|
},
|
|
{
|
|
"epoch": 1.9854132901134522,
|
|
"grad_norm": 0.7177268568233205,
|
|
"learning_rate": 3.604049731885442e-05,
|
|
"loss": 0.4102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4274698793888092,
|
|
"step": 1225,
|
|
"valid_targets_mean": 2859.1,
|
|
"valid_targets_min": 914
|
|
},
|
|
{
|
|
"epoch": 1.993517017828201,
|
|
"grad_norm": 0.5936165125904502,
|
|
"learning_rate": 3.599209160997234e-05,
|
|
"loss": 0.4041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4391655921936035,
|
|
"step": 1230,
|
|
"valid_targets_mean": 4539.0,
|
|
"valid_targets_min": 1030
|
|
},
|
|
{
|
|
"epoch": 2.0016207455429496,
|
|
"grad_norm": 0.6607150994416292,
|
|
"learning_rate": 3.594342473586406e-05,
|
|
"loss": 0.359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3753243684768677,
|
|
"step": 1235,
|
|
"valid_targets_mean": 3325.9,
|
|
"valid_targets_min": 1040
|
|
},
|
|
{
|
|
"epoch": 2.0097244732576987,
|
|
"grad_norm": 0.6229919055152918,
|
|
"learning_rate": 3.589449749130337e-05,
|
|
"loss": 0.3713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3249698877334595,
|
|
"step": 1240,
|
|
"valid_targets_mean": 3667.4,
|
|
"valid_targets_min": 1301
|
|
},
|
|
{
|
|
"epoch": 2.0178282009724473,
|
|
"grad_norm": 0.596009922929333,
|
|
"learning_rate": 3.5845310675316134e-05,
|
|
"loss": 0.3692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35430464148521423,
|
|
"step": 1245,
|
|
"valid_targets_mean": 4640.9,
|
|
"valid_targets_min": 1234
|
|
},
|
|
{
|
|
"epoch": 2.025931928687196,
|
|
"grad_norm": 0.6380265961179253,
|
|
"learning_rate": 3.5795865091167266e-05,
|
|
"loss": 0.3419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33436498045921326,
|
|
"step": 1250,
|
|
"valid_targets_mean": 3602.1,
|
|
"valid_targets_min": 1405
|
|
},
|
|
{
|
|
"epoch": 2.034035656401945,
|
|
"grad_norm": 0.6162634046057834,
|
|
"learning_rate": 3.574616154634758e-05,
|
|
"loss": 0.3513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.321361780166626,
|
|
"step": 1255,
|
|
"valid_targets_mean": 3951.0,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 2.0421393841166937,
|
|
"grad_norm": 0.8549853836990146,
|
|
"learning_rate": 3.569620085256062e-05,
|
|
"loss": 0.3671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34706395864486694,
|
|
"step": 1260,
|
|
"valid_targets_mean": 2600.3,
|
|
"valid_targets_min": 1178
|
|
},
|
|
{
|
|
"epoch": 2.0502431118314424,
|
|
"grad_norm": 0.6157850695159539,
|
|
"learning_rate": 3.564598382570943e-05,
|
|
"loss": 0.3577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31989988684654236,
|
|
"step": 1265,
|
|
"valid_targets_mean": 3471.0,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 2.0583468395461915,
|
|
"grad_norm": 0.6912760968032348,
|
|
"learning_rate": 3.559551128588316e-05,
|
|
"loss": 0.3561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3643125295639038,
|
|
"step": 1270,
|
|
"valid_targets_mean": 3251.2,
|
|
"valid_targets_min": 1369
|
|
},
|
|
{
|
|
"epoch": 2.06645056726094,
|
|
"grad_norm": 0.7610727310356797,
|
|
"learning_rate": 3.5544784057343746e-05,
|
|
"loss": 0.3989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44861117005348206,
|
|
"step": 1275,
|
|
"valid_targets_mean": 2874.9,
|
|
"valid_targets_min": 1049
|
|
},
|
|
{
|
|
"epoch": 2.0745542949756888,
|
|
"grad_norm": 0.5567253168047179,
|
|
"learning_rate": 3.549380296851242e-05,
|
|
"loss": 0.356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34195390343666077,
|
|
"step": 1280,
|
|
"valid_targets_mean": 4699.7,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 2.0826580226904374,
|
|
"grad_norm": 0.6193215633518243,
|
|
"learning_rate": 3.544256885195616e-05,
|
|
"loss": 0.3697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36366885900497437,
|
|
"step": 1285,
|
|
"valid_targets_mean": 4513.9,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 2.0907617504051865,
|
|
"grad_norm": 0.6104236788483114,
|
|
"learning_rate": 3.539108254437414e-05,
|
|
"loss": 0.3526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3442176580429077,
|
|
"step": 1290,
|
|
"valid_targets_mean": 4325.1,
|
|
"valid_targets_min": 1184
|
|
},
|
|
{
|
|
"epoch": 2.098865478119935,
|
|
"grad_norm": 1.1540653788876254,
|
|
"learning_rate": 3.5339344886584015e-05,
|
|
"loss": 0.3837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47065746784210205,
|
|
"step": 1295,
|
|
"valid_targets_mean": 4431.3,
|
|
"valid_targets_min": 1418
|
|
},
|
|
{
|
|
"epoch": 2.106969205834684,
|
|
"grad_norm": 0.6313032622328161,
|
|
"learning_rate": 3.5287356723508245e-05,
|
|
"loss": 0.3774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3796147108078003,
|
|
"step": 1300,
|
|
"valid_targets_mean": 3835.1,
|
|
"valid_targets_min": 1712
|
|
},
|
|
{
|
|
"epoch": 2.115072933549433,
|
|
"grad_norm": 0.5995277124296124,
|
|
"learning_rate": 3.523511890416023e-05,
|
|
"loss": 0.365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3411373496055603,
|
|
"step": 1305,
|
|
"valid_targets_mean": 4134.9,
|
|
"valid_targets_min": 1533
|
|
},
|
|
{
|
|
"epoch": 2.1231766612641816,
|
|
"grad_norm": 0.6323877139662327,
|
|
"learning_rate": 3.518263228163051e-05,
|
|
"loss": 0.3738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3897821605205536,
|
|
"step": 1310,
|
|
"valid_targets_mean": 3811.7,
|
|
"valid_targets_min": 1073
|
|
},
|
|
{
|
|
"epoch": 2.1312803889789302,
|
|
"grad_norm": 0.6142230133720461,
|
|
"learning_rate": 3.51298977130728e-05,
|
|
"loss": 0.3768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35401493310928345,
|
|
"step": 1315,
|
|
"valid_targets_mean": 4104.4,
|
|
"valid_targets_min": 1090
|
|
},
|
|
{
|
|
"epoch": 2.139384116693679,
|
|
"grad_norm": 0.575159991305633,
|
|
"learning_rate": 3.507691605968997e-05,
|
|
"loss": 0.3386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3774334788322449,
|
|
"step": 1320,
|
|
"valid_targets_mean": 4597.1,
|
|
"valid_targets_min": 1139
|
|
},
|
|
{
|
|
"epoch": 2.147487844408428,
|
|
"grad_norm": 0.650651417151311,
|
|
"learning_rate": 3.5023688186720047e-05,
|
|
"loss": 0.3865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3409254550933838,
|
|
"step": 1325,
|
|
"valid_targets_mean": 3917.1,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 2.1555915721231766,
|
|
"grad_norm": 0.520190676285313,
|
|
"learning_rate": 3.497021496342203e-05,
|
|
"loss": 0.3525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37429600954055786,
|
|
"step": 1330,
|
|
"valid_targets_mean": 5373.3,
|
|
"valid_targets_min": 1456
|
|
},
|
|
{
|
|
"epoch": 2.1636952998379253,
|
|
"grad_norm": 0.8766333160570894,
|
|
"learning_rate": 3.491649726306168e-05,
|
|
"loss": 0.3596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45493656396865845,
|
|
"step": 1335,
|
|
"valid_targets_mean": 3073.1,
|
|
"valid_targets_min": 1138
|
|
},
|
|
{
|
|
"epoch": 2.1717990275526744,
|
|
"grad_norm": 0.48924493035671224,
|
|
"learning_rate": 3.486253596289734e-05,
|
|
"loss": 0.3724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3554176092147827,
|
|
"step": 1340,
|
|
"valid_targets_mean": 8016.4,
|
|
"valid_targets_min": 1256
|
|
},
|
|
{
|
|
"epoch": 2.179902755267423,
|
|
"grad_norm": 0.6152012628911511,
|
|
"learning_rate": 3.4808331944165524e-05,
|
|
"loss": 0.3655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33981600403785706,
|
|
"step": 1345,
|
|
"valid_targets_mean": 3537.2,
|
|
"valid_targets_min": 1047
|
|
},
|
|
{
|
|
"epoch": 2.1880064829821717,
|
|
"grad_norm": 0.5302271963442517,
|
|
"learning_rate": 3.4753886092066546e-05,
|
|
"loss": 0.337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2537047564983368,
|
|
"step": 1350,
|
|
"valid_targets_mean": 5067.1,
|
|
"valid_targets_min": 1520
|
|
},
|
|
{
|
|
"epoch": 2.1961102106969204,
|
|
"grad_norm": 0.6367347476094577,
|
|
"learning_rate": 3.469919929575012e-05,
|
|
"loss": 0.3537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.347379207611084,
|
|
"step": 1355,
|
|
"valid_targets_mean": 3530.1,
|
|
"valid_targets_min": 358
|
|
},
|
|
{
|
|
"epoch": 2.2042139384116695,
|
|
"grad_norm": 7.616842383691646,
|
|
"learning_rate": 3.464427244830076e-05,
|
|
"loss": 0.3627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3965144455432892,
|
|
"step": 1360,
|
|
"valid_targets_mean": 4835.8,
|
|
"valid_targets_min": 922
|
|
},
|
|
{
|
|
"epoch": 2.212317666126418,
|
|
"grad_norm": 0.5772928971804826,
|
|
"learning_rate": 3.458910644672324e-05,
|
|
"loss": 0.3315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3225036859512329,
|
|
"step": 1365,
|
|
"valid_targets_mean": 4137.2,
|
|
"valid_targets_min": 1041
|
|
},
|
|
{
|
|
"epoch": 2.2204213938411668,
|
|
"grad_norm": 0.7652072059945616,
|
|
"learning_rate": 3.453370219192794e-05,
|
|
"loss": 0.3621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3325500786304474,
|
|
"step": 1370,
|
|
"valid_targets_mean": 3532.5,
|
|
"valid_targets_min": 1434
|
|
},
|
|
{
|
|
"epoch": 2.228525121555916,
|
|
"grad_norm": 0.6273654869200413,
|
|
"learning_rate": 3.4478060588716116e-05,
|
|
"loss": 0.3389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3371928632259369,
|
|
"step": 1375,
|
|
"valid_targets_mean": 4380.4,
|
|
"valid_targets_min": 1101
|
|
},
|
|
{
|
|
"epoch": 2.2366288492706645,
|
|
"grad_norm": 0.5330490634477026,
|
|
"learning_rate": 3.442218254576516e-05,
|
|
"loss": 0.3603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30799007415771484,
|
|
"step": 1380,
|
|
"valid_targets_mean": 4405.8,
|
|
"valid_targets_min": 1395
|
|
},
|
|
{
|
|
"epoch": 2.244732576985413,
|
|
"grad_norm": 0.6034321724192573,
|
|
"learning_rate": 3.436606897561372e-05,
|
|
"loss": 0.3584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3362310528755188,
|
|
"step": 1385,
|
|
"valid_targets_mean": 3742.8,
|
|
"valid_targets_min": 1574
|
|
},
|
|
{
|
|
"epoch": 2.2528363047001623,
|
|
"grad_norm": 0.6490329309441155,
|
|
"learning_rate": 3.430972079464682e-05,
|
|
"loss": 0.3901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34692591428756714,
|
|
"step": 1390,
|
|
"valid_targets_mean": 3377.1,
|
|
"valid_targets_min": 886
|
|
},
|
|
{
|
|
"epoch": 2.260940032414911,
|
|
"grad_norm": 0.6561577805947444,
|
|
"learning_rate": 3.425313892308089e-05,
|
|
"loss": 0.3798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47548946738243103,
|
|
"step": 1395,
|
|
"valid_targets_mean": 4475.6,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 2.2690437601296596,
|
|
"grad_norm": 0.7213768996297372,
|
|
"learning_rate": 3.419632428494873e-05,
|
|
"loss": 0.3854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45764851570129395,
|
|
"step": 1400,
|
|
"valid_targets_mean": 3668.8,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 2.2771474878444082,
|
|
"grad_norm": 0.676480480135692,
|
|
"learning_rate": 3.413927780808443e-05,
|
|
"loss": 0.3544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36912989616394043,
|
|
"step": 1405,
|
|
"valid_targets_mean": 3886.9,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 2.2852512155591573,
|
|
"grad_norm": 0.6377817982477058,
|
|
"learning_rate": 3.4082000424108235e-05,
|
|
"loss": 0.3659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43174880743026733,
|
|
"step": 1410,
|
|
"valid_targets_mean": 4233.6,
|
|
"valid_targets_min": 1040
|
|
},
|
|
{
|
|
"epoch": 2.293354943273906,
|
|
"grad_norm": 0.5519348419998006,
|
|
"learning_rate": 3.402449306841128e-05,
|
|
"loss": 0.3967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31168171763420105,
|
|
"step": 1415,
|
|
"valid_targets_mean": 4307.8,
|
|
"valid_targets_min": 1098
|
|
},
|
|
{
|
|
"epoch": 2.3014586709886546,
|
|
"grad_norm": 0.7505030716064821,
|
|
"learning_rate": 3.396675668014035e-05,
|
|
"loss": 0.3603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37064194679260254,
|
|
"step": 1420,
|
|
"valid_targets_mean": 3508.5,
|
|
"valid_targets_min": 971
|
|
},
|
|
{
|
|
"epoch": 2.3095623987034037,
|
|
"grad_norm": 0.6153586073602998,
|
|
"learning_rate": 3.390879220218258e-05,
|
|
"loss": 0.341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.324023574590683,
|
|
"step": 1425,
|
|
"valid_targets_mean": 3714.1,
|
|
"valid_targets_min": 1192
|
|
},
|
|
{
|
|
"epoch": 2.3176661264181524,
|
|
"grad_norm": 0.6047177695229424,
|
|
"learning_rate": 3.385060058114995e-05,
|
|
"loss": 0.371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36816906929016113,
|
|
"step": 1430,
|
|
"valid_targets_mean": 4465.9,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 2.325769854132901,
|
|
"grad_norm": 0.554001360948358,
|
|
"learning_rate": 3.379218276736393e-05,
|
|
"loss": 0.3663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32747846841812134,
|
|
"step": 1435,
|
|
"valid_targets_mean": 4328.4,
|
|
"valid_targets_min": 1183
|
|
},
|
|
{
|
|
"epoch": 2.3338735818476497,
|
|
"grad_norm": 0.7300247951181067,
|
|
"learning_rate": 3.373353971483992e-05,
|
|
"loss": 0.3827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39081335067749023,
|
|
"step": 1440,
|
|
"valid_targets_mean": 3143.4,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 2.341977309562399,
|
|
"grad_norm": 0.7887680824193282,
|
|
"learning_rate": 3.367467238127165e-05,
|
|
"loss": 0.353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4153595268726349,
|
|
"step": 1445,
|
|
"valid_targets_mean": 2644.9,
|
|
"valid_targets_min": 1132
|
|
},
|
|
{
|
|
"epoch": 2.3500810372771475,
|
|
"grad_norm": 0.6128981493620611,
|
|
"learning_rate": 3.361558172801558e-05,
|
|
"loss": 0.3464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3145619034767151,
|
|
"step": 1450,
|
|
"valid_targets_mean": 3406.1,
|
|
"valid_targets_min": 951
|
|
},
|
|
{
|
|
"epoch": 2.358184764991896,
|
|
"grad_norm": 0.7057939890319916,
|
|
"learning_rate": 3.355626872007518e-05,
|
|
"loss": 0.3815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43307170271873474,
|
|
"step": 1455,
|
|
"valid_targets_mean": 3228.0,
|
|
"valid_targets_min": 1237
|
|
},
|
|
{
|
|
"epoch": 2.366288492706645,
|
|
"grad_norm": 1.245387675691084,
|
|
"learning_rate": 3.3496734326085154e-05,
|
|
"loss": 0.3844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5461128950119019,
|
|
"step": 1460,
|
|
"valid_targets_mean": 3818.5,
|
|
"valid_targets_min": 973
|
|
},
|
|
{
|
|
"epoch": 2.374392220421394,
|
|
"grad_norm": 0.7974141939772306,
|
|
"learning_rate": 3.3436979518295674e-05,
|
|
"loss": 0.3809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39469295740127563,
|
|
"step": 1465,
|
|
"valid_targets_mean": 2919.4,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 2.3824959481361425,
|
|
"grad_norm": 0.7108670533735841,
|
|
"learning_rate": 3.337700527255643e-05,
|
|
"loss": 0.3687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3202032744884491,
|
|
"step": 1470,
|
|
"valid_targets_mean": 4618.8,
|
|
"valid_targets_min": 1204
|
|
},
|
|
{
|
|
"epoch": 2.3905996758508916,
|
|
"grad_norm": 0.7412244458586731,
|
|
"learning_rate": 3.331681256830074e-05,
|
|
"loss": 0.3566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3399648666381836,
|
|
"step": 1475,
|
|
"valid_targets_mean": 3429.4,
|
|
"valid_targets_min": 1292
|
|
},
|
|
{
|
|
"epoch": 2.3987034035656403,
|
|
"grad_norm": 0.5383118278414649,
|
|
"learning_rate": 3.325640238852958e-05,
|
|
"loss": 0.3393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3444640636444092,
|
|
"step": 1480,
|
|
"valid_targets_mean": 5254.6,
|
|
"valid_targets_min": 1150
|
|
},
|
|
{
|
|
"epoch": 2.406807131280389,
|
|
"grad_norm": 0.5928053691323958,
|
|
"learning_rate": 3.319577571979544e-05,
|
|
"loss": 0.3871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40096884965896606,
|
|
"step": 1485,
|
|
"valid_targets_mean": 4633.4,
|
|
"valid_targets_min": 1176
|
|
},
|
|
{
|
|
"epoch": 2.4149108589951376,
|
|
"grad_norm": 0.5049795409180116,
|
|
"learning_rate": 3.313493355218632e-05,
|
|
"loss": 0.3872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43719810247421265,
|
|
"step": 1490,
|
|
"valid_targets_mean": 6272.6,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 2.4230145867098867,
|
|
"grad_norm": 0.8353029407587053,
|
|
"learning_rate": 3.307387687930947e-05,
|
|
"loss": 0.3483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41381019353866577,
|
|
"step": 1495,
|
|
"valid_targets_mean": 2182.9,
|
|
"valid_targets_min": 962
|
|
},
|
|
{
|
|
"epoch": 2.4311183144246353,
|
|
"grad_norm": 0.8126079203284331,
|
|
"learning_rate": 3.301260669827524e-05,
|
|
"loss": 0.3478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40541964769363403,
|
|
"step": 1500,
|
|
"valid_targets_mean": 2986.6,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 2.439222042139384,
|
|
"grad_norm": 0.641139311735153,
|
|
"learning_rate": 3.295112400968073e-05,
|
|
"loss": 0.3354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36821722984313965,
|
|
"step": 1505,
|
|
"valid_targets_mean": 3650.5,
|
|
"valid_targets_min": 1287
|
|
},
|
|
{
|
|
"epoch": 2.447325769854133,
|
|
"grad_norm": 0.720372072556044,
|
|
"learning_rate": 3.2889429817593494e-05,
|
|
"loss": 0.3687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3299311697483063,
|
|
"step": 1510,
|
|
"valid_targets_mean": 2849.4,
|
|
"valid_targets_min": 1233
|
|
},
|
|
{
|
|
"epoch": 2.4554294975688817,
|
|
"grad_norm": 0.6615710856738938,
|
|
"learning_rate": 3.2827525129535135e-05,
|
|
"loss": 0.3528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4220331311225891,
|
|
"step": 1515,
|
|
"valid_targets_mean": 3493.5,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 2.4635332252836304,
|
|
"grad_norm": 0.6267811677605671,
|
|
"learning_rate": 3.276541095646482e-05,
|
|
"loss": 0.3856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4255092740058899,
|
|
"step": 1520,
|
|
"valid_targets_mean": 4085.3,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 2.471636952998379,
|
|
"grad_norm": 0.6283412448611168,
|
|
"learning_rate": 3.2703088312762825e-05,
|
|
"loss": 0.3746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37167081236839294,
|
|
"step": 1525,
|
|
"valid_targets_mean": 3667.0,
|
|
"valid_targets_min": 1152
|
|
},
|
|
{
|
|
"epoch": 2.479740680713128,
|
|
"grad_norm": 0.6150481887647228,
|
|
"learning_rate": 3.2640558216213914e-05,
|
|
"loss": 0.3525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3558899164199829,
|
|
"step": 1530,
|
|
"valid_targets_mean": 3783.0,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 2.487844408427877,
|
|
"grad_norm": 0.7059041076787986,
|
|
"learning_rate": 3.2577821687990764e-05,
|
|
"loss": 0.3955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39561814069747925,
|
|
"step": 1535,
|
|
"valid_targets_mean": 3271.9,
|
|
"valid_targets_min": 995
|
|
},
|
|
{
|
|
"epoch": 2.4959481361426255,
|
|
"grad_norm": 0.6347284403899237,
|
|
"learning_rate": 3.2514879752637236e-05,
|
|
"loss": 0.3603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.359174907207489,
|
|
"step": 1540,
|
|
"valid_targets_mean": 3839.1,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 2.5040518638573745,
|
|
"grad_norm": 0.695347992645094,
|
|
"learning_rate": 3.2451733438051705e-05,
|
|
"loss": 0.4004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4211786687374115,
|
|
"step": 1545,
|
|
"valid_targets_mean": 3237.3,
|
|
"valid_targets_min": 1200
|
|
},
|
|
{
|
|
"epoch": 2.512155591572123,
|
|
"grad_norm": 0.6572998579365059,
|
|
"learning_rate": 3.238838377547023e-05,
|
|
"loss": 0.3471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31920304894447327,
|
|
"step": 1550,
|
|
"valid_targets_mean": 3640.3,
|
|
"valid_targets_min": 666
|
|
},
|
|
{
|
|
"epoch": 2.520259319286872,
|
|
"grad_norm": 0.6183228297803092,
|
|
"learning_rate": 3.2324831799449724e-05,
|
|
"loss": 0.3371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3397529721260071,
|
|
"step": 1555,
|
|
"valid_targets_mean": 3801.9,
|
|
"valid_targets_min": 1076
|
|
},
|
|
{
|
|
"epoch": 2.528363047001621,
|
|
"grad_norm": 0.5922882925500382,
|
|
"learning_rate": 3.226107854785106e-05,
|
|
"loss": 0.3603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40487492084503174,
|
|
"step": 1560,
|
|
"valid_targets_mean": 4336.1,
|
|
"valid_targets_min": 1168
|
|
},
|
|
{
|
|
"epoch": 2.5364667747163696,
|
|
"grad_norm": 0.5505805672615378,
|
|
"learning_rate": 3.2197125061822135e-05,
|
|
"loss": 0.362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30663758516311646,
|
|
"step": 1565,
|
|
"valid_targets_mean": 5709.7,
|
|
"valid_targets_min": 1193
|
|
},
|
|
{
|
|
"epoch": 2.5445705024311183,
|
|
"grad_norm": 0.6592707151602514,
|
|
"learning_rate": 3.213297238578082e-05,
|
|
"loss": 0.3713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3352968096733093,
|
|
"step": 1570,
|
|
"valid_targets_mean": 3024.6,
|
|
"valid_targets_min": 370
|
|
},
|
|
{
|
|
"epoch": 2.5526742301458674,
|
|
"grad_norm": 0.6837888385824673,
|
|
"learning_rate": 3.206862156739799e-05,
|
|
"loss": 0.3663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3527699410915375,
|
|
"step": 1575,
|
|
"valid_targets_mean": 3217.2,
|
|
"valid_targets_min": 1165
|
|
},
|
|
{
|
|
"epoch": 2.560777957860616,
|
|
"grad_norm": 0.5714278693915779,
|
|
"learning_rate": 3.2004073657580314e-05,
|
|
"loss": 0.3257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3621138334274292,
|
|
"step": 1580,
|
|
"valid_targets_mean": 5215.3,
|
|
"valid_targets_min": 1660
|
|
},
|
|
{
|
|
"epoch": 2.5688816855753647,
|
|
"grad_norm": 0.632687459517687,
|
|
"learning_rate": 3.193932971045316e-05,
|
|
"loss": 0.3496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.378584623336792,
|
|
"step": 1585,
|
|
"valid_targets_mean": 3656.8,
|
|
"valid_targets_min": 1012
|
|
},
|
|
{
|
|
"epoch": 2.5769854132901133,
|
|
"grad_norm": 0.5154328163710282,
|
|
"learning_rate": 3.187439078334338e-05,
|
|
"loss": 0.3532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2569686770439148,
|
|
"step": 1590,
|
|
"valid_targets_mean": 4438.7,
|
|
"valid_targets_min": 1003
|
|
},
|
|
{
|
|
"epoch": 2.585089141004862,
|
|
"grad_norm": 0.8485853331997644,
|
|
"learning_rate": 3.180925793676199e-05,
|
|
"loss": 0.375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4228053689002991,
|
|
"step": 1595,
|
|
"valid_targets_mean": 2364.7,
|
|
"valid_targets_min": 985
|
|
},
|
|
{
|
|
"epoch": 2.593192868719611,
|
|
"grad_norm": 0.6385223754745202,
|
|
"learning_rate": 3.1743932234386905e-05,
|
|
"loss": 0.3582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33268439769744873,
|
|
"step": 1600,
|
|
"valid_targets_mean": 3510.6,
|
|
"valid_targets_min": 1022
|
|
},
|
|
{
|
|
"epoch": 2.6012965964343597,
|
|
"grad_norm": 0.6203925952713126,
|
|
"learning_rate": 3.167841474304555e-05,
|
|
"loss": 0.3524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35235142707824707,
|
|
"step": 1605,
|
|
"valid_targets_mean": 4186.2,
|
|
"valid_targets_min": 1441
|
|
},
|
|
{
|
|
"epoch": 2.6094003241491084,
|
|
"grad_norm": 0.6407113023621984,
|
|
"learning_rate": 3.161270653269743e-05,
|
|
"loss": 0.363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37132495641708374,
|
|
"step": 1610,
|
|
"valid_targets_mean": 3792.4,
|
|
"valid_targets_min": 1169
|
|
},
|
|
{
|
|
"epoch": 2.6175040518638575,
|
|
"grad_norm": 0.787376412545527,
|
|
"learning_rate": 3.154680867641666e-05,
|
|
"loss": 0.3786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3557274639606476,
|
|
"step": 1615,
|
|
"valid_targets_mean": 2341.8,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 2.625607779578606,
|
|
"grad_norm": 0.7020441242654781,
|
|
"learning_rate": 3.1480722250374454e-05,
|
|
"loss": 0.3702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3731013536453247,
|
|
"step": 1620,
|
|
"valid_targets_mean": 3057.1,
|
|
"valid_targets_min": 1061
|
|
},
|
|
{
|
|
"epoch": 2.633711507293355,
|
|
"grad_norm": 0.6408600597666677,
|
|
"learning_rate": 3.1414448333821526e-05,
|
|
"loss": 0.3524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3454047441482544,
|
|
"step": 1625,
|
|
"valid_targets_mean": 3993.4,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 2.641815235008104,
|
|
"grad_norm": 0.4902024972171765,
|
|
"learning_rate": 3.1347988009070496e-05,
|
|
"loss": 0.3596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34175369143486023,
|
|
"step": 1630,
|
|
"valid_targets_mean": 6449.6,
|
|
"valid_targets_min": 1164
|
|
},
|
|
{
|
|
"epoch": 2.6499189627228525,
|
|
"grad_norm": 0.6120518005314733,
|
|
"learning_rate": 3.1281342361478184e-05,
|
|
"loss": 0.3608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40884333848953247,
|
|
"step": 1635,
|
|
"valid_targets_mean": 4059.9,
|
|
"valid_targets_min": 979
|
|
},
|
|
{
|
|
"epoch": 2.658022690437601,
|
|
"grad_norm": 0.6771620012648307,
|
|
"learning_rate": 3.121451247942789e-05,
|
|
"loss": 0.3555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38433632254600525,
|
|
"step": 1640,
|
|
"valid_targets_mean": 4274.3,
|
|
"valid_targets_min": 856
|
|
},
|
|
{
|
|
"epoch": 2.6661264181523503,
|
|
"grad_norm": 0.5457628190492216,
|
|
"learning_rate": 3.1147499454311654e-05,
|
|
"loss": 0.4067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3678765892982483,
|
|
"step": 1645,
|
|
"valid_targets_mean": 5149.6,
|
|
"valid_targets_min": 1689
|
|
},
|
|
{
|
|
"epoch": 2.674230145867099,
|
|
"grad_norm": 0.627604755543937,
|
|
"learning_rate": 3.1080304380512386e-05,
|
|
"loss": 0.3641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4083409309387207,
|
|
"step": 1650,
|
|
"valid_targets_mean": 4076.5,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 2.6823338735818476,
|
|
"grad_norm": 1.1030374170315973,
|
|
"learning_rate": 3.101292835538602e-05,
|
|
"loss": 0.3886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4205278158187866,
|
|
"step": 1655,
|
|
"valid_targets_mean": 2480.2,
|
|
"valid_targets_min": 1100
|
|
},
|
|
{
|
|
"epoch": 2.6904376012965967,
|
|
"grad_norm": 0.7339764425274315,
|
|
"learning_rate": 3.0945372479243605e-05,
|
|
"loss": 0.3464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35320383310317993,
|
|
"step": 1660,
|
|
"valid_targets_mean": 2801.6,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 2.6985413290113454,
|
|
"grad_norm": 0.6863535808248586,
|
|
"learning_rate": 3.087763785533328e-05,
|
|
"loss": 0.3789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4404126703739166,
|
|
"step": 1665,
|
|
"valid_targets_mean": 3782.9,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 2.706645056726094,
|
|
"grad_norm": 0.7168705430931214,
|
|
"learning_rate": 3.0809725589822325e-05,
|
|
"loss": 0.3784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3114071190357208,
|
|
"step": 1670,
|
|
"valid_targets_mean": 2838.5,
|
|
"valid_targets_min": 1215
|
|
},
|
|
{
|
|
"epoch": 2.7147487844408427,
|
|
"grad_norm": 0.6556363386210516,
|
|
"learning_rate": 3.074163679177907e-05,
|
|
"loss": 0.3592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36680227518081665,
|
|
"step": 1675,
|
|
"valid_targets_mean": 3425.0,
|
|
"valid_targets_min": 1262
|
|
},
|
|
{
|
|
"epoch": 2.7228525121555913,
|
|
"grad_norm": 0.7037719475710038,
|
|
"learning_rate": 3.067337257315477e-05,
|
|
"loss": 0.4053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3956286311149597,
|
|
"step": 1680,
|
|
"valid_targets_mean": 3108.4,
|
|
"valid_targets_min": 1175
|
|
},
|
|
{
|
|
"epoch": 2.7309562398703404,
|
|
"grad_norm": 0.5316241938449401,
|
|
"learning_rate": 3.0604934048765444e-05,
|
|
"loss": 0.3245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28850430250167847,
|
|
"step": 1685,
|
|
"valid_targets_mean": 4587.1,
|
|
"valid_targets_min": 1823
|
|
},
|
|
{
|
|
"epoch": 2.739059967585089,
|
|
"grad_norm": 0.7055812576851976,
|
|
"learning_rate": 3.05363223362737e-05,
|
|
"loss": 0.3812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38997015357017517,
|
|
"step": 1690,
|
|
"valid_targets_mean": 3222.5,
|
|
"valid_targets_min": 1028
|
|
},
|
|
{
|
|
"epoch": 2.7471636952998377,
|
|
"grad_norm": 0.664731751727158,
|
|
"learning_rate": 3.0467538556170463e-05,
|
|
"loss": 0.36,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32559171319007874,
|
|
"step": 1695,
|
|
"valid_targets_mean": 2939.7,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 2.755267423014587,
|
|
"grad_norm": 0.5783986029196682,
|
|
"learning_rate": 3.0398583831756655e-05,
|
|
"loss": 0.3368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3600800633430481,
|
|
"step": 1700,
|
|
"valid_targets_mean": 4368.5,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 2.7633711507293355,
|
|
"grad_norm": 0.611412124205423,
|
|
"learning_rate": 3.03294592891249e-05,
|
|
"loss": 0.3526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33210447430610657,
|
|
"step": 1705,
|
|
"valid_targets_mean": 4879.0,
|
|
"valid_targets_min": 1231
|
|
},
|
|
{
|
|
"epoch": 2.771474878444084,
|
|
"grad_norm": 0.557474285200355,
|
|
"learning_rate": 3.0260166057141086e-05,
|
|
"loss": 0.3625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32921281456947327,
|
|
"step": 1710,
|
|
"valid_targets_mean": 4460.2,
|
|
"valid_targets_min": 1413
|
|
},
|
|
{
|
|
"epoch": 2.7795786061588332,
|
|
"grad_norm": 0.5043150540097012,
|
|
"learning_rate": 3.0190705267425956e-05,
|
|
"loss": 0.3064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2847195863723755,
|
|
"step": 1715,
|
|
"valid_targets_mean": 4976.2,
|
|
"valid_targets_min": 1237
|
|
},
|
|
{
|
|
"epoch": 2.787682333873582,
|
|
"grad_norm": 0.5706697755674613,
|
|
"learning_rate": 3.0121078054336633e-05,
|
|
"loss": 0.3416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3246619701385498,
|
|
"step": 1720,
|
|
"valid_targets_mean": 4126.2,
|
|
"valid_targets_min": 1436
|
|
},
|
|
{
|
|
"epoch": 2.7957860615883305,
|
|
"grad_norm": 0.6778117045066955,
|
|
"learning_rate": 3.005128555494806e-05,
|
|
"loss": 0.3771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36269962787628174,
|
|
"step": 1725,
|
|
"valid_targets_mean": 3297.2,
|
|
"valid_targets_min": 1201
|
|
},
|
|
{
|
|
"epoch": 2.8038897893030796,
|
|
"grad_norm": 0.5794849455469647,
|
|
"learning_rate": 2.998132890903448e-05,
|
|
"loss": 0.3473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.330091655254364,
|
|
"step": 1730,
|
|
"valid_targets_mean": 3850.0,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 2.8119935170178283,
|
|
"grad_norm": 0.6007475248043017,
|
|
"learning_rate": 2.9911209259050763e-05,
|
|
"loss": 0.354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35253655910491943,
|
|
"step": 1735,
|
|
"valid_targets_mean": 4133.7,
|
|
"valid_targets_min": 1510
|
|
},
|
|
{
|
|
"epoch": 2.820097244732577,
|
|
"grad_norm": 0.5841323161644045,
|
|
"learning_rate": 2.984092775011382e-05,
|
|
"loss": 0.356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3201521635055542,
|
|
"step": 1740,
|
|
"valid_targets_mean": 4134.8,
|
|
"valid_targets_min": 1065
|
|
},
|
|
{
|
|
"epoch": 2.828200972447326,
|
|
"grad_norm": 0.7336317690567647,
|
|
"learning_rate": 2.9770485529983834e-05,
|
|
"loss": 0.3741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44161343574523926,
|
|
"step": 1745,
|
|
"valid_targets_mean": 2970.2,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 2.8363047001620747,
|
|
"grad_norm": 0.6635532459963542,
|
|
"learning_rate": 2.9699883749045564e-05,
|
|
"loss": 0.3355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3493614196777344,
|
|
"step": 1750,
|
|
"valid_targets_mean": 3285.0,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 2.8444084278768234,
|
|
"grad_norm": 0.5027187622458136,
|
|
"learning_rate": 2.962912356028953e-05,
|
|
"loss": 0.3654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3354187309741974,
|
|
"step": 1755,
|
|
"valid_targets_mean": 4720.4,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 2.852512155591572,
|
|
"grad_norm": 0.6901547177308334,
|
|
"learning_rate": 2.95582061192932e-05,
|
|
"loss": 0.3491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3903937339782715,
|
|
"step": 1760,
|
|
"valid_targets_mean": 3206.8,
|
|
"valid_targets_min": 1276
|
|
},
|
|
{
|
|
"epoch": 2.8606158833063207,
|
|
"grad_norm": 0.5708803489808794,
|
|
"learning_rate": 2.9487132584202115e-05,
|
|
"loss": 0.3831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3380119800567627,
|
|
"step": 1765,
|
|
"valid_targets_mean": 3977.0,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 2.8687196110210698,
|
|
"grad_norm": 0.5085623418043083,
|
|
"learning_rate": 2.9415904115710964e-05,
|
|
"loss": 0.3575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34908628463745117,
|
|
"step": 1770,
|
|
"valid_targets_mean": 4955.4,
|
|
"valid_targets_min": 1178
|
|
},
|
|
{
|
|
"epoch": 2.8768233387358184,
|
|
"grad_norm": 0.6051208343777774,
|
|
"learning_rate": 2.9344521877044633e-05,
|
|
"loss": 0.3787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4075632691383362,
|
|
"step": 1775,
|
|
"valid_targets_mean": 4224.0,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 2.884927066450567,
|
|
"grad_norm": 0.5855062840025109,
|
|
"learning_rate": 2.927298703393924e-05,
|
|
"loss": 0.3579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3283981680870056,
|
|
"step": 1780,
|
|
"valid_targets_mean": 4081.2,
|
|
"valid_targets_min": 959
|
|
},
|
|
{
|
|
"epoch": 2.893030794165316,
|
|
"grad_norm": 0.6371935770846757,
|
|
"learning_rate": 2.9201300754623046e-05,
|
|
"loss": 0.3729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3392450511455536,
|
|
"step": 1785,
|
|
"valid_targets_mean": 3568.0,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 2.901134521880065,
|
|
"grad_norm": 0.4278974307253599,
|
|
"learning_rate": 2.9129464209797404e-05,
|
|
"loss": 0.3376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2865206003189087,
|
|
"step": 1790,
|
|
"valid_targets_mean": 6328.2,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 2.9092382495948135,
|
|
"grad_norm": 0.5505126981477363,
|
|
"learning_rate": 2.9057478572617644e-05,
|
|
"loss": 0.3201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.337314248085022,
|
|
"step": 1795,
|
|
"valid_targets_mean": 4826.6,
|
|
"valid_targets_min": 1736
|
|
},
|
|
{
|
|
"epoch": 2.9173419773095626,
|
|
"grad_norm": 0.7805624719946108,
|
|
"learning_rate": 2.898534501867391e-05,
|
|
"loss": 0.376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4011368751525879,
|
|
"step": 1800,
|
|
"valid_targets_mean": 2397.6,
|
|
"valid_targets_min": 898
|
|
},
|
|
{
|
|
"epoch": 2.9254457050243112,
|
|
"grad_norm": 0.5482087600129742,
|
|
"learning_rate": 2.8913064725971947e-05,
|
|
"loss": 0.3508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3068529963493347,
|
|
"step": 1805,
|
|
"valid_targets_mean": 5494.9,
|
|
"valid_targets_min": 1006
|
|
},
|
|
{
|
|
"epoch": 2.93354943273906,
|
|
"grad_norm": 0.9936321479400723,
|
|
"learning_rate": 2.8840638874913894e-05,
|
|
"loss": 0.3868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40905821323394775,
|
|
"step": 1810,
|
|
"valid_targets_mean": 3718.8,
|
|
"valid_targets_min": 1419
|
|
},
|
|
{
|
|
"epoch": 2.941653160453809,
|
|
"grad_norm": 0.5736879552202689,
|
|
"learning_rate": 2.8768068648278976e-05,
|
|
"loss": 0.3454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39087069034576416,
|
|
"step": 1815,
|
|
"valid_targets_mean": 4475.4,
|
|
"valid_targets_min": 1388
|
|
},
|
|
{
|
|
"epoch": 2.9497568881685576,
|
|
"grad_norm": 0.6756877628799013,
|
|
"learning_rate": 2.8695355231204206e-05,
|
|
"loss": 0.3601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3940260112285614,
|
|
"step": 1820,
|
|
"valid_targets_mean": 3155.6,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 2.9578606158833063,
|
|
"grad_norm": 0.5794570379139724,
|
|
"learning_rate": 2.862249981116502e-05,
|
|
"loss": 0.3382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3015279173851013,
|
|
"step": 1825,
|
|
"valid_targets_mean": 4283.2,
|
|
"valid_targets_min": 1192
|
|
},
|
|
{
|
|
"epoch": 2.965964343598055,
|
|
"grad_norm": 0.6037397342964902,
|
|
"learning_rate": 2.854950357795589e-05,
|
|
"loss": 0.3597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3510607182979584,
|
|
"step": 1830,
|
|
"valid_targets_mean": 3907.7,
|
|
"valid_targets_min": 1421
|
|
},
|
|
{
|
|
"epoch": 2.974068071312804,
|
|
"grad_norm": 0.6184387967781734,
|
|
"learning_rate": 2.847636772367091e-05,
|
|
"loss": 0.3505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3586122989654541,
|
|
"step": 1835,
|
|
"valid_targets_mean": 3820.1,
|
|
"valid_targets_min": 1575
|
|
},
|
|
{
|
|
"epoch": 2.9821717990275527,
|
|
"grad_norm": 0.6530052932759616,
|
|
"learning_rate": 2.8403093442684287e-05,
|
|
"loss": 0.3908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4493202865123749,
|
|
"step": 1840,
|
|
"valid_targets_mean": 3684.6,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 2.9902755267423013,
|
|
"grad_norm": 0.5322411565499487,
|
|
"learning_rate": 2.8329681931630877e-05,
|
|
"loss": 0.3515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34222137928009033,
|
|
"step": 1845,
|
|
"valid_targets_mean": 4850.8,
|
|
"valid_targets_min": 1409
|
|
},
|
|
{
|
|
"epoch": 2.99837925445705,
|
|
"grad_norm": 0.5810539949314576,
|
|
"learning_rate": 2.825613438938663e-05,
|
|
"loss": 0.3602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3474220931529999,
|
|
"step": 1850,
|
|
"valid_targets_mean": 4159.3,
|
|
"valid_targets_min": 1442
|
|
},
|
|
{
|
|
"epoch": 3.006482982171799,
|
|
"grad_norm": 0.5876530626626576,
|
|
"learning_rate": 2.8182452017048983e-05,
|
|
"loss": 0.298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3032858371734619,
|
|
"step": 1855,
|
|
"valid_targets_mean": 3758.8,
|
|
"valid_targets_min": 1413
|
|
},
|
|
{
|
|
"epoch": 3.0145867098865478,
|
|
"grad_norm": 0.6532906368770814,
|
|
"learning_rate": 2.81086360179173e-05,
|
|
"loss": 0.3592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3261423707008362,
|
|
"step": 1860,
|
|
"valid_targets_mean": 3757.5,
|
|
"valid_targets_min": 1124
|
|
},
|
|
{
|
|
"epoch": 3.0226904376012964,
|
|
"grad_norm": 0.6768457925442797,
|
|
"learning_rate": 2.8034687597473164e-05,
|
|
"loss": 0.3324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33774706721305847,
|
|
"step": 1865,
|
|
"valid_targets_mean": 3208.4,
|
|
"valid_targets_min": 1392
|
|
},
|
|
{
|
|
"epoch": 3.0307941653160455,
|
|
"grad_norm": 0.6026553451116088,
|
|
"learning_rate": 2.796060796336074e-05,
|
|
"loss": 0.3436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3198626637458801,
|
|
"step": 1870,
|
|
"valid_targets_mean": 4000.1,
|
|
"valid_targets_min": 1044
|
|
},
|
|
{
|
|
"epoch": 3.038897893030794,
|
|
"grad_norm": 0.6424620557357676,
|
|
"learning_rate": 2.7886398325367018e-05,
|
|
"loss": 0.3378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2810208797454834,
|
|
"step": 1875,
|
|
"valid_targets_mean": 3380.9,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 3.047001620745543,
|
|
"grad_norm": 0.653905616156312,
|
|
"learning_rate": 2.7812059895402064e-05,
|
|
"loss": 0.3081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29623761773109436,
|
|
"step": 1880,
|
|
"valid_targets_mean": 3166.4,
|
|
"valid_targets_min": 1436
|
|
},
|
|
{
|
|
"epoch": 3.055105348460292,
|
|
"grad_norm": 0.6290299885338436,
|
|
"learning_rate": 2.773759388747925e-05,
|
|
"loss": 0.3417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2997327446937561,
|
|
"step": 1885,
|
|
"valid_targets_mean": 4197.2,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 3.0632090761750406,
|
|
"grad_norm": 0.6799966773570919,
|
|
"learning_rate": 2.7663001517695386e-05,
|
|
"loss": 0.295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31888049840927124,
|
|
"step": 1890,
|
|
"valid_targets_mean": 3355.9,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 3.0713128038897892,
|
|
"grad_norm": 0.6150196310708935,
|
|
"learning_rate": 2.7588284004210907e-05,
|
|
"loss": 0.3195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2811214327812195,
|
|
"step": 1895,
|
|
"valid_targets_mean": 4364.9,
|
|
"valid_targets_min": 1049
|
|
},
|
|
{
|
|
"epoch": 3.079416531604538,
|
|
"grad_norm": 0.6290637964866943,
|
|
"learning_rate": 2.7513442567229936e-05,
|
|
"loss": 0.3593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3333356976509094,
|
|
"step": 1900,
|
|
"valid_targets_mean": 4037.9,
|
|
"valid_targets_min": 1237
|
|
},
|
|
{
|
|
"epoch": 3.087520259319287,
|
|
"grad_norm": 0.6693041785931817,
|
|
"learning_rate": 2.7438478428980407e-05,
|
|
"loss": 0.3229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3725960850715637,
|
|
"step": 1905,
|
|
"valid_targets_mean": 5818.6,
|
|
"valid_targets_min": 961
|
|
},
|
|
{
|
|
"epoch": 3.0956239870340356,
|
|
"grad_norm": 0.7196188613672898,
|
|
"learning_rate": 2.7363392813694047e-05,
|
|
"loss": 0.3148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31832918524742126,
|
|
"step": 1910,
|
|
"valid_targets_mean": 3378.1,
|
|
"valid_targets_min": 1281
|
|
},
|
|
{
|
|
"epoch": 3.1037277147487843,
|
|
"grad_norm": 0.5818347922259423,
|
|
"learning_rate": 2.7288186947586426e-05,
|
|
"loss": 0.3084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3344227075576782,
|
|
"step": 1915,
|
|
"valid_targets_mean": 4411.8,
|
|
"valid_targets_min": 1216
|
|
},
|
|
{
|
|
"epoch": 3.1118314424635334,
|
|
"grad_norm": 0.8446124163677976,
|
|
"learning_rate": 2.7212862058836925e-05,
|
|
"loss": 0.3213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33817988634109497,
|
|
"step": 1920,
|
|
"valid_targets_mean": 2214.9,
|
|
"valid_targets_min": 975
|
|
},
|
|
{
|
|
"epoch": 3.119935170178282,
|
|
"grad_norm": 0.5779953469992928,
|
|
"learning_rate": 2.713741937756865e-05,
|
|
"loss": 0.3159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.341831237077713,
|
|
"step": 1925,
|
|
"valid_targets_mean": 4742.4,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 3.1280388978930307,
|
|
"grad_norm": 0.6294273422868075,
|
|
"learning_rate": 2.7061860135828384e-05,
|
|
"loss": 0.3238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27781611680984497,
|
|
"step": 1930,
|
|
"valid_targets_mean": 3733.9,
|
|
"valid_targets_min": 943
|
|
},
|
|
{
|
|
"epoch": 3.1361426256077793,
|
|
"grad_norm": 0.6062050863467073,
|
|
"learning_rate": 2.6986185567566442e-05,
|
|
"loss": 0.3311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3080408573150635,
|
|
"step": 1935,
|
|
"valid_targets_mean": 4087.9,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 3.1442463533225284,
|
|
"grad_norm": 0.6952279897706104,
|
|
"learning_rate": 2.6910396908616527e-05,
|
|
"loss": 0.3299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3233116567134857,
|
|
"step": 1940,
|
|
"valid_targets_mean": 2762.6,
|
|
"valid_targets_min": 1176
|
|
},
|
|
{
|
|
"epoch": 3.152350081037277,
|
|
"grad_norm": 0.6787695116443276,
|
|
"learning_rate": 2.6834495396675526e-05,
|
|
"loss": 0.3359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3256149888038635,
|
|
"step": 1945,
|
|
"valid_targets_mean": 3157.2,
|
|
"valid_targets_min": 1514
|
|
},
|
|
{
|
|
"epoch": 3.1604538087520258,
|
|
"grad_norm": 1.1495419111797855,
|
|
"learning_rate": 2.6758482271283347e-05,
|
|
"loss": 0.354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3622073531150818,
|
|
"step": 1950,
|
|
"valid_targets_mean": 4490.8,
|
|
"valid_targets_min": 989
|
|
},
|
|
{
|
|
"epoch": 3.168557536466775,
|
|
"grad_norm": 0.5168346141282057,
|
|
"learning_rate": 2.668235877380263e-05,
|
|
"loss": 0.2996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26513683795928955,
|
|
"step": 1955,
|
|
"valid_targets_mean": 4616.9,
|
|
"valid_targets_min": 1226
|
|
},
|
|
{
|
|
"epoch": 3.1766612641815235,
|
|
"grad_norm": 0.5043452872043703,
|
|
"learning_rate": 2.660612614739849e-05,
|
|
"loss": 0.2941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28028619289398193,
|
|
"step": 1960,
|
|
"valid_targets_mean": 5325.6,
|
|
"valid_targets_min": 1168
|
|
},
|
|
{
|
|
"epoch": 3.184764991896272,
|
|
"grad_norm": 0.6906179138947607,
|
|
"learning_rate": 2.652978563701822e-05,
|
|
"loss": 0.3754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3693622946739197,
|
|
"step": 1965,
|
|
"valid_targets_mean": 3594.6,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 3.1928687196110213,
|
|
"grad_norm": 0.49400430685078656,
|
|
"learning_rate": 2.645333848937095e-05,
|
|
"loss": 0.3227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28254038095474243,
|
|
"step": 1970,
|
|
"valid_targets_mean": 6402.3,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 3.20097244732577,
|
|
"grad_norm": 0.637834024266905,
|
|
"learning_rate": 2.6376785952907292e-05,
|
|
"loss": 0.2854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2891494035720825,
|
|
"step": 1975,
|
|
"valid_targets_mean": 3953.7,
|
|
"valid_targets_min": 1532
|
|
},
|
|
{
|
|
"epoch": 3.2090761750405186,
|
|
"grad_norm": 0.7558584709666064,
|
|
"learning_rate": 2.630012927779896e-05,
|
|
"loss": 0.3222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3705684542655945,
|
|
"step": 1980,
|
|
"valid_targets_mean": 3169.7,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 3.217179902755267,
|
|
"grad_norm": 0.7032258231926862,
|
|
"learning_rate": 2.6223369715918338e-05,
|
|
"loss": 0.3329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3272814154624939,
|
|
"step": 1985,
|
|
"valid_targets_mean": 3970.0,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 3.2252836304700163,
|
|
"grad_norm": 0.5503263411221311,
|
|
"learning_rate": 2.614650852081805e-05,
|
|
"loss": 0.3136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31049543619155884,
|
|
"step": 1990,
|
|
"valid_targets_mean": 4652.0,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 3.233387358184765,
|
|
"grad_norm": 0.6450482534708121,
|
|
"learning_rate": 2.606954694771047e-05,
|
|
"loss": 0.3312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3217643201351166,
|
|
"step": 1995,
|
|
"valid_targets_mean": 4368.6,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 3.2414910858995136,
|
|
"grad_norm": 0.6920112935131606,
|
|
"learning_rate": 2.5992486253447258e-05,
|
|
"loss": 0.3351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3920653462409973,
|
|
"step": 2000,
|
|
"valid_targets_mean": 3602.1,
|
|
"valid_targets_min": 1061
|
|
},
|
|
{
|
|
"epoch": 3.2495948136142627,
|
|
"grad_norm": 0.5584795903285783,
|
|
"learning_rate": 2.5915327696498787e-05,
|
|
"loss": 0.3166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3155994415283203,
|
|
"step": 2005,
|
|
"valid_targets_mean": 4913.4,
|
|
"valid_targets_min": 858
|
|
},
|
|
{
|
|
"epoch": 3.2576985413290114,
|
|
"grad_norm": 0.60509966416426,
|
|
"learning_rate": 2.583807253693362e-05,
|
|
"loss": 0.3474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28235840797424316,
|
|
"step": 2010,
|
|
"valid_targets_mean": 4147.4,
|
|
"valid_targets_min": 1456
|
|
},
|
|
{
|
|
"epoch": 3.26580226904376,
|
|
"grad_norm": 0.6062613453180128,
|
|
"learning_rate": 2.576072203639794e-05,
|
|
"loss": 0.343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3725880980491638,
|
|
"step": 2015,
|
|
"valid_targets_mean": 4285.9,
|
|
"valid_targets_min": 1194
|
|
},
|
|
{
|
|
"epoch": 3.2739059967585087,
|
|
"grad_norm": 0.5738457995722336,
|
|
"learning_rate": 2.5683277458094926e-05,
|
|
"loss": 0.3144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3272360563278198,
|
|
"step": 2020,
|
|
"valid_targets_mean": 4682.2,
|
|
"valid_targets_min": 1157
|
|
},
|
|
{
|
|
"epoch": 3.282009724473258,
|
|
"grad_norm": 0.8579473802204056,
|
|
"learning_rate": 2.560574006676413e-05,
|
|
"loss": 0.3313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3547009229660034,
|
|
"step": 2025,
|
|
"valid_targets_mean": 2234.7,
|
|
"valid_targets_min": 1192
|
|
},
|
|
{
|
|
"epoch": 3.2901134521880064,
|
|
"grad_norm": 0.6933220501836592,
|
|
"learning_rate": 2.5528111128660826e-05,
|
|
"loss": 0.3368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33494657278060913,
|
|
"step": 2030,
|
|
"valid_targets_mean": 3647.6,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 3.298217179902755,
|
|
"grad_norm": 0.7250423855085911,
|
|
"learning_rate": 2.545039191153533e-05,
|
|
"loss": 0.3168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30072858929634094,
|
|
"step": 2035,
|
|
"valid_targets_mean": 2830.2,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 3.306320907617504,
|
|
"grad_norm": 0.5630006709655807,
|
|
"learning_rate": 2.53725836846123e-05,
|
|
"loss": 0.3066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2971174716949463,
|
|
"step": 2040,
|
|
"valid_targets_mean": 4642.2,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 3.314424635332253,
|
|
"grad_norm": 0.6837754571304738,
|
|
"learning_rate": 2.5294687718569994e-05,
|
|
"loss": 0.3488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29883715510368347,
|
|
"step": 2045,
|
|
"valid_targets_mean": 3184.2,
|
|
"valid_targets_min": 1064
|
|
},
|
|
{
|
|
"epoch": 3.3225283630470015,
|
|
"grad_norm": 0.6214125630303267,
|
|
"learning_rate": 2.5216705285519525e-05,
|
|
"loss": 0.3056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.303884357213974,
|
|
"step": 2050,
|
|
"valid_targets_mean": 4170.8,
|
|
"valid_targets_min": 1264
|
|
},
|
|
{
|
|
"epoch": 3.3306320907617506,
|
|
"grad_norm": 0.7065926173313246,
|
|
"learning_rate": 2.5138637658984116e-05,
|
|
"loss": 0.3617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4543423056602478,
|
|
"step": 2055,
|
|
"valid_targets_mean": 4195.6,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 3.3387358184764993,
|
|
"grad_norm": 0.7035147409734731,
|
|
"learning_rate": 2.5060486113878244e-05,
|
|
"loss": 0.317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3000071942806244,
|
|
"step": 2060,
|
|
"valid_targets_mean": 3148.4,
|
|
"valid_targets_min": 1195
|
|
},
|
|
{
|
|
"epoch": 3.346839546191248,
|
|
"grad_norm": 0.5664150065637116,
|
|
"learning_rate": 2.4982251926486873e-05,
|
|
"loss": 0.3111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2577757239341736,
|
|
"step": 2065,
|
|
"valid_targets_mean": 3929.9,
|
|
"valid_targets_min": 1442
|
|
},
|
|
{
|
|
"epoch": 3.354943273905997,
|
|
"grad_norm": 0.6440217382239727,
|
|
"learning_rate": 2.490393637444458e-05,
|
|
"loss": 0.3241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3098256587982178,
|
|
"step": 2070,
|
|
"valid_targets_mean": 3736.5,
|
|
"valid_targets_min": 1467
|
|
},
|
|
{
|
|
"epoch": 3.3630470016207457,
|
|
"grad_norm": 0.601624816332684,
|
|
"learning_rate": 2.482554073671471e-05,
|
|
"loss": 0.3112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29705819487571716,
|
|
"step": 2075,
|
|
"valid_targets_mean": 3976.4,
|
|
"valid_targets_min": 1256
|
|
},
|
|
{
|
|
"epoch": 3.3711507293354943,
|
|
"grad_norm": 0.6332684428153963,
|
|
"learning_rate": 2.4747066293568452e-05,
|
|
"loss": 0.3347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34846431016921997,
|
|
"step": 2080,
|
|
"valid_targets_mean": 3731.8,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 3.379254457050243,
|
|
"grad_norm": 0.622824711033024,
|
|
"learning_rate": 2.4668514326564e-05,
|
|
"loss": 0.3304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3172885775566101,
|
|
"step": 2085,
|
|
"valid_targets_mean": 4841.5,
|
|
"valid_targets_min": 1281
|
|
},
|
|
{
|
|
"epoch": 3.387358184764992,
|
|
"grad_norm": 0.5756571432151757,
|
|
"learning_rate": 2.4589886118525556e-05,
|
|
"loss": 0.3044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28297609090805054,
|
|
"step": 2090,
|
|
"valid_targets_mean": 4441.5,
|
|
"valid_targets_min": 1041
|
|
},
|
|
{
|
|
"epoch": 3.3954619124797407,
|
|
"grad_norm": 0.6063759657293902,
|
|
"learning_rate": 2.4511182953522405e-05,
|
|
"loss": 0.3615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3448454737663269,
|
|
"step": 2095,
|
|
"valid_targets_mean": 4130.9,
|
|
"valid_targets_min": 1434
|
|
},
|
|
{
|
|
"epoch": 3.4035656401944894,
|
|
"grad_norm": 0.6837849754358483,
|
|
"learning_rate": 2.4432406116847954e-05,
|
|
"loss": 0.3421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4419548809528351,
|
|
"step": 2100,
|
|
"valid_targets_mean": 3970.8,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 3.411669367909238,
|
|
"grad_norm": 0.7285501548199385,
|
|
"learning_rate": 2.435355689499874e-05,
|
|
"loss": 0.3489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3801971971988678,
|
|
"step": 2105,
|
|
"valid_targets_mean": 3257.1,
|
|
"valid_targets_min": 818
|
|
},
|
|
{
|
|
"epoch": 3.419773095623987,
|
|
"grad_norm": 0.6734064818131312,
|
|
"learning_rate": 2.4274636575653398e-05,
|
|
"loss": 0.3608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3767085075378418,
|
|
"step": 2110,
|
|
"valid_targets_mean": 4296.2,
|
|
"valid_targets_min": 1145
|
|
},
|
|
{
|
|
"epoch": 3.427876823338736,
|
|
"grad_norm": 0.4899912083889619,
|
|
"learning_rate": 2.4195646447651663e-05,
|
|
"loss": 0.3062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2573740482330322,
|
|
"step": 2115,
|
|
"valid_targets_mean": 5421.6,
|
|
"valid_targets_min": 1276
|
|
},
|
|
{
|
|
"epoch": 3.4359805510534844,
|
|
"grad_norm": 0.8302010528432883,
|
|
"learning_rate": 2.411658780097331e-05,
|
|
"loss": 0.3315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3501180410385132,
|
|
"step": 2120,
|
|
"valid_targets_mean": 5060.1,
|
|
"valid_targets_min": 1026
|
|
},
|
|
{
|
|
"epoch": 3.4440842787682335,
|
|
"grad_norm": 0.8078858491515395,
|
|
"learning_rate": 2.4037461926717075e-05,
|
|
"loss": 0.3097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3591434955596924,
|
|
"step": 2125,
|
|
"valid_targets_mean": 2513.8,
|
|
"valid_targets_min": 1204
|
|
},
|
|
{
|
|
"epoch": 3.452188006482982,
|
|
"grad_norm": 0.7684258628541496,
|
|
"learning_rate": 2.395827011707959e-05,
|
|
"loss": 0.3193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30334824323654175,
|
|
"step": 2130,
|
|
"valid_targets_mean": 3054.1,
|
|
"valid_targets_min": 1884
|
|
},
|
|
{
|
|
"epoch": 3.460291734197731,
|
|
"grad_norm": 0.605594191987749,
|
|
"learning_rate": 2.3879013665334258e-05,
|
|
"loss": 0.3272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2900565564632416,
|
|
"step": 2135,
|
|
"valid_targets_mean": 4041.1,
|
|
"valid_targets_min": 1271
|
|
},
|
|
{
|
|
"epoch": 3.46839546191248,
|
|
"grad_norm": 0.8309685861439673,
|
|
"learning_rate": 2.3799693865810163e-05,
|
|
"loss": 0.3481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37296062707901,
|
|
"step": 2140,
|
|
"valid_targets_mean": 4586.9,
|
|
"valid_targets_min": 1363
|
|
},
|
|
{
|
|
"epoch": 3.4764991896272286,
|
|
"grad_norm": 0.6551158353626602,
|
|
"learning_rate": 2.37203120138709e-05,
|
|
"loss": 0.3182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29635459184646606,
|
|
"step": 2145,
|
|
"valid_targets_mean": 3419.6,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 3.4846029173419772,
|
|
"grad_norm": 0.6087540304186911,
|
|
"learning_rate": 2.3640869405893446e-05,
|
|
"loss": 0.3493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3888489603996277,
|
|
"step": 2150,
|
|
"valid_targets_mean": 4710.4,
|
|
"valid_targets_min": 1216
|
|
},
|
|
{
|
|
"epoch": 3.492706645056726,
|
|
"grad_norm": 0.6512082633788822,
|
|
"learning_rate": 2.3561367339246976e-05,
|
|
"loss": 0.354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33419734239578247,
|
|
"step": 2155,
|
|
"valid_targets_mean": 4247.2,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 3.500810372771475,
|
|
"grad_norm": 0.6006332493640769,
|
|
"learning_rate": 2.3481807112271678e-05,
|
|
"loss": 0.2958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28265705704689026,
|
|
"step": 2160,
|
|
"valid_targets_mean": 4532.8,
|
|
"valid_targets_min": 1355
|
|
},
|
|
{
|
|
"epoch": 3.5089141004862237,
|
|
"grad_norm": 0.6341470250528332,
|
|
"learning_rate": 2.3402190024257543e-05,
|
|
"loss": 0.3251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3464913070201874,
|
|
"step": 2165,
|
|
"valid_targets_mean": 3982.2,
|
|
"valid_targets_min": 1030
|
|
},
|
|
{
|
|
"epoch": 3.5170178282009723,
|
|
"grad_norm": 0.49166553268855945,
|
|
"learning_rate": 2.3322517375423165e-05,
|
|
"loss": 0.3251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3147142827510834,
|
|
"step": 2170,
|
|
"valid_targets_mean": 5943.0,
|
|
"valid_targets_min": 1166
|
|
},
|
|
{
|
|
"epoch": 3.525121555915721,
|
|
"grad_norm": 0.6416235508438507,
|
|
"learning_rate": 2.3242790466894494e-05,
|
|
"loss": 0.3379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3126268684864044,
|
|
"step": 2175,
|
|
"valid_targets_mean": 3601.8,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 3.53322528363047,
|
|
"grad_norm": 0.7830076468989171,
|
|
"learning_rate": 2.316301060068359e-05,
|
|
"loss": 0.3423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30202680826187134,
|
|
"step": 2180,
|
|
"valid_targets_mean": 4094.8,
|
|
"valid_targets_min": 1174
|
|
},
|
|
{
|
|
"epoch": 3.5413290113452187,
|
|
"grad_norm": 0.5855369398506851,
|
|
"learning_rate": 2.3083179079667347e-05,
|
|
"loss": 0.3314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29343757033348083,
|
|
"step": 2185,
|
|
"valid_targets_mean": 4782.4,
|
|
"valid_targets_min": 1087
|
|
},
|
|
{
|
|
"epoch": 3.5494327390599674,
|
|
"grad_norm": 0.7597815442570822,
|
|
"learning_rate": 2.300329720756625e-05,
|
|
"loss": 0.3539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3538168966770172,
|
|
"step": 2190,
|
|
"valid_targets_mean": 2795.9,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 3.5575364667747165,
|
|
"grad_norm": 0.6412366872198969,
|
|
"learning_rate": 2.2923366288923045e-05,
|
|
"loss": 0.3495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34232521057128906,
|
|
"step": 2195,
|
|
"valid_targets_mean": 3828.8,
|
|
"valid_targets_min": 1162
|
|
},
|
|
{
|
|
"epoch": 3.565640194489465,
|
|
"grad_norm": 0.8572308035882495,
|
|
"learning_rate": 2.2843387629081453e-05,
|
|
"loss": 0.3201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34046727418899536,
|
|
"step": 2200,
|
|
"valid_targets_mean": 2780.4,
|
|
"valid_targets_min": 1118
|
|
},
|
|
{
|
|
"epoch": 3.5737439222042138,
|
|
"grad_norm": 0.8499753167098539,
|
|
"learning_rate": 2.2763362534164854e-05,
|
|
"loss": 0.3543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33723342418670654,
|
|
"step": 2205,
|
|
"valid_targets_mean": 4185.2,
|
|
"valid_targets_min": 1169
|
|
},
|
|
{
|
|
"epoch": 3.581847649918963,
|
|
"grad_norm": 0.6092208444164786,
|
|
"learning_rate": 2.268329231105498e-05,
|
|
"loss": 0.3163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3268812596797943,
|
|
"step": 2210,
|
|
"valid_targets_mean": 4369.1,
|
|
"valid_targets_min": 1360
|
|
},
|
|
{
|
|
"epoch": 3.5899513776337115,
|
|
"grad_norm": 0.6149682556778238,
|
|
"learning_rate": 2.2603178267370504e-05,
|
|
"loss": 0.3166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2967323958873749,
|
|
"step": 2215,
|
|
"valid_targets_mean": 3937.6,
|
|
"valid_targets_min": 974
|
|
},
|
|
{
|
|
"epoch": 3.59805510534846,
|
|
"grad_norm": 0.6136305917408219,
|
|
"learning_rate": 2.2523021711445746e-05,
|
|
"loss": 0.3271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32520681619644165,
|
|
"step": 2220,
|
|
"valid_targets_mean": 4224.8,
|
|
"valid_targets_min": 955
|
|
},
|
|
{
|
|
"epoch": 3.6061588330632093,
|
|
"grad_norm": 0.7678255502273676,
|
|
"learning_rate": 2.2442823952309308e-05,
|
|
"loss": 0.3278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37268686294555664,
|
|
"step": 2225,
|
|
"valid_targets_mean": 2761.6,
|
|
"valid_targets_min": 823
|
|
},
|
|
{
|
|
"epoch": 3.614262560777958,
|
|
"grad_norm": 0.6825494201612562,
|
|
"learning_rate": 2.2362586299662642e-05,
|
|
"loss": 0.3154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30497026443481445,
|
|
"step": 2230,
|
|
"valid_targets_mean": 3246.8,
|
|
"valid_targets_min": 1196
|
|
},
|
|
{
|
|
"epoch": 3.6223662884927066,
|
|
"grad_norm": 0.7635839021679189,
|
|
"learning_rate": 2.228231006385873e-05,
|
|
"loss": 0.3218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30430150032043457,
|
|
"step": 2235,
|
|
"valid_targets_mean": 3143.2,
|
|
"valid_targets_min": 1208
|
|
},
|
|
{
|
|
"epoch": 3.6304700162074557,
|
|
"grad_norm": 0.7017950535136555,
|
|
"learning_rate": 2.2201996555880633e-05,
|
|
"loss": 0.327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31477123498916626,
|
|
"step": 2240,
|
|
"valid_targets_mean": 3130.1,
|
|
"valid_targets_min": 1155
|
|
},
|
|
{
|
|
"epoch": 3.6385737439222043,
|
|
"grad_norm": 0.6932946086334288,
|
|
"learning_rate": 2.2121647087320105e-05,
|
|
"loss": 0.3548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31043219566345215,
|
|
"step": 2245,
|
|
"valid_targets_mean": 3812.2,
|
|
"valid_targets_min": 1364
|
|
},
|
|
{
|
|
"epoch": 3.646677471636953,
|
|
"grad_norm": 0.7548489669502686,
|
|
"learning_rate": 2.204126297035617e-05,
|
|
"loss": 0.3342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33928608894348145,
|
|
"step": 2250,
|
|
"valid_targets_mean": 3369.1,
|
|
"valid_targets_min": 1178
|
|
},
|
|
{
|
|
"epoch": 3.6547811993517016,
|
|
"grad_norm": 0.8476792319211851,
|
|
"learning_rate": 2.196084551773368e-05,
|
|
"loss": 0.3205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.299949049949646,
|
|
"step": 2255,
|
|
"valid_targets_mean": 3624.9,
|
|
"valid_targets_min": 828
|
|
},
|
|
{
|
|
"epoch": 3.6628849270664503,
|
|
"grad_norm": 0.7748299471252531,
|
|
"learning_rate": 2.1880396042741906e-05,
|
|
"loss": 0.3251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34626305103302,
|
|
"step": 2260,
|
|
"valid_targets_mean": 3092.2,
|
|
"valid_targets_min": 1019
|
|
},
|
|
{
|
|
"epoch": 3.6709886547811994,
|
|
"grad_norm": 0.6871604467560325,
|
|
"learning_rate": 2.179991585919307e-05,
|
|
"loss": 0.3481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.348859965801239,
|
|
"step": 2265,
|
|
"valid_targets_mean": 3594.6,
|
|
"valid_targets_min": 1166
|
|
},
|
|
{
|
|
"epoch": 3.679092382495948,
|
|
"grad_norm": 0.5432953919737468,
|
|
"learning_rate": 2.1719406281400873e-05,
|
|
"loss": 0.2925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26746731996536255,
|
|
"step": 2270,
|
|
"valid_targets_mean": 4830.1,
|
|
"valid_targets_min": 1507
|
|
},
|
|
{
|
|
"epoch": 3.6871961102106967,
|
|
"grad_norm": 0.5290458938160579,
|
|
"learning_rate": 2.163886862415908e-05,
|
|
"loss": 0.3227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29459887742996216,
|
|
"step": 2275,
|
|
"valid_targets_mean": 5012.5,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 3.695299837925446,
|
|
"grad_norm": 0.7557537719911694,
|
|
"learning_rate": 2.155830420272e-05,
|
|
"loss": 0.3259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3687734007835388,
|
|
"step": 2280,
|
|
"valid_targets_mean": 3272.9,
|
|
"valid_targets_min": 971
|
|
},
|
|
{
|
|
"epoch": 3.7034035656401945,
|
|
"grad_norm": 0.6329174750743987,
|
|
"learning_rate": 2.1477714332773022e-05,
|
|
"loss": 0.3142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31322354078292847,
|
|
"step": 2285,
|
|
"valid_targets_mean": 4787.0,
|
|
"valid_targets_min": 1369
|
|
},
|
|
{
|
|
"epoch": 3.711507293354943,
|
|
"grad_norm": 0.6980753042204155,
|
|
"learning_rate": 2.139710033042314e-05,
|
|
"loss": 0.3132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3370036482810974,
|
|
"step": 2290,
|
|
"valid_targets_mean": 3192.8,
|
|
"valid_targets_min": 1708
|
|
},
|
|
{
|
|
"epoch": 3.719611021069692,
|
|
"grad_norm": 0.5353150533110514,
|
|
"learning_rate": 2.1316463512169453e-05,
|
|
"loss": 0.329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2963425815105438,
|
|
"step": 2295,
|
|
"valid_targets_mean": 5346.8,
|
|
"valid_targets_min": 1110
|
|
},
|
|
{
|
|
"epoch": 3.727714748784441,
|
|
"grad_norm": 0.727354106763134,
|
|
"learning_rate": 2.1235805194883665e-05,
|
|
"loss": 0.319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3349874019622803,
|
|
"step": 2300,
|
|
"valid_targets_mean": 3280.2,
|
|
"valid_targets_min": 1314
|
|
},
|
|
{
|
|
"epoch": 3.7358184764991895,
|
|
"grad_norm": 0.5340590099482381,
|
|
"learning_rate": 2.115512669578857e-05,
|
|
"loss": 0.3387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3433595299720764,
|
|
"step": 2305,
|
|
"valid_targets_mean": 5857.9,
|
|
"valid_targets_min": 1335
|
|
},
|
|
{
|
|
"epoch": 3.7439222042139386,
|
|
"grad_norm": 0.6620119615821005,
|
|
"learning_rate": 2.107442933243656e-05,
|
|
"loss": 0.3099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3657473623752594,
|
|
"step": 2310,
|
|
"valid_targets_mean": 3610.8,
|
|
"valid_targets_min": 1152
|
|
},
|
|
{
|
|
"epoch": 3.7520259319286873,
|
|
"grad_norm": 0.7166286016202882,
|
|
"learning_rate": 2.099371442268809e-05,
|
|
"loss": 0.3495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3325074017047882,
|
|
"step": 2315,
|
|
"valid_targets_mean": 3517.5,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 3.760129659643436,
|
|
"grad_norm": 0.4555518809686943,
|
|
"learning_rate": 2.0912983284690157e-05,
|
|
"loss": 0.314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2193748652935028,
|
|
"step": 2320,
|
|
"valid_targets_mean": 5561.4,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 3.768233387358185,
|
|
"grad_norm": 0.8753777799659089,
|
|
"learning_rate": 2.0832237236854794e-05,
|
|
"loss": 0.3557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3095594644546509,
|
|
"step": 2325,
|
|
"valid_targets_mean": 3388.4,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 3.7763371150729337,
|
|
"grad_norm": 0.6884922798779087,
|
|
"learning_rate": 2.0751477597837528e-05,
|
|
"loss": 0.3354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.335110604763031,
|
|
"step": 2330,
|
|
"valid_targets_mean": 3354.2,
|
|
"valid_targets_min": 1386
|
|
},
|
|
{
|
|
"epoch": 3.7844408427876823,
|
|
"grad_norm": 0.6332533539670777,
|
|
"learning_rate": 2.0670705686515822e-05,
|
|
"loss": 0.3299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30649179220199585,
|
|
"step": 2335,
|
|
"valid_targets_mean": 3949.0,
|
|
"valid_targets_min": 1135
|
|
},
|
|
{
|
|
"epoch": 3.792544570502431,
|
|
"grad_norm": 0.7717847306436155,
|
|
"learning_rate": 2.0589922821967566e-05,
|
|
"loss": 0.3532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37374433875083923,
|
|
"step": 2340,
|
|
"valid_targets_mean": 3164.6,
|
|
"valid_targets_min": 1119
|
|
},
|
|
{
|
|
"epoch": 3.8006482982171796,
|
|
"grad_norm": 0.6474397322416555,
|
|
"learning_rate": 2.0509130323449545e-05,
|
|
"loss": 0.3315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30513161420822144,
|
|
"step": 2345,
|
|
"valid_targets_mean": 3070.5,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 3.8087520259319287,
|
|
"grad_norm": 0.5480983935201096,
|
|
"learning_rate": 2.0428329510375838e-05,
|
|
"loss": 0.3197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.291774183511734,
|
|
"step": 2350,
|
|
"valid_targets_mean": 5193.6,
|
|
"valid_targets_min": 1208
|
|
},
|
|
{
|
|
"epoch": 3.8168557536466774,
|
|
"grad_norm": 0.7833094014207244,
|
|
"learning_rate": 2.0347521702296333e-05,
|
|
"loss": 0.3136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3558168113231659,
|
|
"step": 2355,
|
|
"valid_targets_mean": 2686.7,
|
|
"valid_targets_min": 1201
|
|
},
|
|
{
|
|
"epoch": 3.824959481361426,
|
|
"grad_norm": 0.7621050761785041,
|
|
"learning_rate": 2.026670821887516e-05,
|
|
"loss": 0.3188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29817432165145874,
|
|
"step": 2360,
|
|
"valid_targets_mean": 2564.6,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 3.833063209076175,
|
|
"grad_norm": 0.7337736559637801,
|
|
"learning_rate": 2.0185890379869115e-05,
|
|
"loss": 0.3015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34583503007888794,
|
|
"step": 2365,
|
|
"valid_targets_mean": 3290.3,
|
|
"valid_targets_min": 1456
|
|
},
|
|
{
|
|
"epoch": 3.841166936790924,
|
|
"grad_norm": 0.6579044742309087,
|
|
"learning_rate": 2.0105069505106126e-05,
|
|
"loss": 0.3055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30498212575912476,
|
|
"step": 2370,
|
|
"valid_targets_mean": 3680.5,
|
|
"valid_targets_min": 1111
|
|
},
|
|
{
|
|
"epoch": 3.8492706645056725,
|
|
"grad_norm": 0.6122672425508787,
|
|
"learning_rate": 2.00242469144637e-05,
|
|
"loss": 0.3155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32759958505630493,
|
|
"step": 2375,
|
|
"valid_targets_mean": 4098.4,
|
|
"valid_targets_min": 1125
|
|
},
|
|
{
|
|
"epoch": 3.8573743922204216,
|
|
"grad_norm": 0.6522969245487142,
|
|
"learning_rate": 1.994342392784738e-05,
|
|
"loss": 0.3488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4506216049194336,
|
|
"step": 2380,
|
|
"valid_targets_mean": 4865.1,
|
|
"valid_targets_min": 1181
|
|
},
|
|
{
|
|
"epoch": 3.86547811993517,
|
|
"grad_norm": 0.8104573831758627,
|
|
"learning_rate": 1.9862601865169154e-05,
|
|
"loss": 0.3602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3532731533050537,
|
|
"step": 2385,
|
|
"valid_targets_mean": 3282.5,
|
|
"valid_targets_min": 1020
|
|
},
|
|
{
|
|
"epoch": 3.873581847649919,
|
|
"grad_norm": 0.6575316647039278,
|
|
"learning_rate": 1.9781782046325938e-05,
|
|
"loss": 0.3112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3369067311286926,
|
|
"step": 2390,
|
|
"valid_targets_mean": 3650.9,
|
|
"valid_targets_min": 1164
|
|
},
|
|
{
|
|
"epoch": 3.881685575364668,
|
|
"grad_norm": 1.0376247260450713,
|
|
"learning_rate": 1.9700965791177986e-05,
|
|
"loss": 0.3506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30998164415359497,
|
|
"step": 2395,
|
|
"valid_targets_mean": 4215.6,
|
|
"valid_targets_min": 1159
|
|
},
|
|
{
|
|
"epoch": 3.8897893030794166,
|
|
"grad_norm": 0.7122315974730362,
|
|
"learning_rate": 1.9620154419527372e-05,
|
|
"loss": 0.3236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3429866135120392,
|
|
"step": 2400,
|
|
"valid_targets_mean": 3437.1,
|
|
"valid_targets_min": 959
|
|
},
|
|
{
|
|
"epoch": 3.8978930307941653,
|
|
"grad_norm": 0.5662001888178322,
|
|
"learning_rate": 1.953934925109641e-05,
|
|
"loss": 0.3004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2822979986667633,
|
|
"step": 2405,
|
|
"valid_targets_mean": 4019.1,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 3.9059967585089144,
|
|
"grad_norm": 0.8419287234640135,
|
|
"learning_rate": 1.945855160550611e-05,
|
|
"loss": 0.3374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3056894838809967,
|
|
"step": 2410,
|
|
"valid_targets_mean": 4442.9,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 3.914100486223663,
|
|
"grad_norm": 0.6801232516837518,
|
|
"learning_rate": 1.937776280225463e-05,
|
|
"loss": 0.3018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32119885087013245,
|
|
"step": 2415,
|
|
"valid_targets_mean": 3394.3,
|
|
"valid_targets_min": 985
|
|
},
|
|
{
|
|
"epoch": 3.9222042139384117,
|
|
"grad_norm": 0.6615050494532062,
|
|
"learning_rate": 1.929698416069571e-05,
|
|
"loss": 0.3354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3271116018295288,
|
|
"step": 2420,
|
|
"valid_targets_mean": 3491.4,
|
|
"valid_targets_min": 1466
|
|
},
|
|
{
|
|
"epoch": 3.9303079416531603,
|
|
"grad_norm": 0.6480829850992813,
|
|
"learning_rate": 1.9216217000017182e-05,
|
|
"loss": 0.3438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3169797658920288,
|
|
"step": 2425,
|
|
"valid_targets_mean": 3797.2,
|
|
"valid_targets_min": 1139
|
|
},
|
|
{
|
|
"epoch": 3.938411669367909,
|
|
"grad_norm": 0.6608189143051717,
|
|
"learning_rate": 1.9135462639219325e-05,
|
|
"loss": 0.3437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4551173448562622,
|
|
"step": 2430,
|
|
"valid_targets_mean": 4511.9,
|
|
"valid_targets_min": 1271
|
|
},
|
|
{
|
|
"epoch": 3.946515397082658,
|
|
"grad_norm": 0.6194102241113435,
|
|
"learning_rate": 1.905472239709343e-05,
|
|
"loss": 0.3281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2990500330924988,
|
|
"step": 2435,
|
|
"valid_targets_mean": 4128.2,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 3.9546191247974067,
|
|
"grad_norm": 0.5192772414872362,
|
|
"learning_rate": 1.89739975922002e-05,
|
|
"loss": 0.3177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26180487871170044,
|
|
"step": 2440,
|
|
"valid_targets_mean": 5093.2,
|
|
"valid_targets_min": 1237
|
|
},
|
|
{
|
|
"epoch": 3.9627228525121554,
|
|
"grad_norm": 0.5316589878706719,
|
|
"learning_rate": 1.889328954284823e-05,
|
|
"loss": 0.3231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29911136627197266,
|
|
"step": 2445,
|
|
"valid_targets_mean": 5077.1,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 3.9708265802269045,
|
|
"grad_norm": 0.6003750397681551,
|
|
"learning_rate": 1.8812599567072496e-05,
|
|
"loss": 0.3156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3472042977809906,
|
|
"step": 2450,
|
|
"valid_targets_mean": 4503.7,
|
|
"valid_targets_min": 1596
|
|
},
|
|
{
|
|
"epoch": 3.978930307941653,
|
|
"grad_norm": 0.6559987437310445,
|
|
"learning_rate": 1.873192898261281e-05,
|
|
"loss": 0.3465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3218577802181244,
|
|
"step": 2455,
|
|
"valid_targets_mean": 3607.2,
|
|
"valid_targets_min": 895
|
|
},
|
|
{
|
|
"epoch": 3.987034035656402,
|
|
"grad_norm": 0.6531204473773664,
|
|
"learning_rate": 1.8651279106892317e-05,
|
|
"loss": 0.3485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3570401966571808,
|
|
"step": 2460,
|
|
"valid_targets_mean": 3725.1,
|
|
"valid_targets_min": 1448
|
|
},
|
|
{
|
|
"epoch": 3.995137763371151,
|
|
"grad_norm": 0.6130067904924766,
|
|
"learning_rate": 1.8570651256995933e-05,
|
|
"loss": 0.34,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30347344279289246,
|
|
"step": 2465,
|
|
"valid_targets_mean": 4397.4,
|
|
"valid_targets_min": 1067
|
|
},
|
|
{
|
|
"epoch": 4.003241491085899,
|
|
"grad_norm": 0.8400473811046529,
|
|
"learning_rate": 1.849004674964891e-05,
|
|
"loss": 0.3326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37068480253219604,
|
|
"step": 2470,
|
|
"valid_targets_mean": 2330.9,
|
|
"valid_targets_min": 1330
|
|
},
|
|
{
|
|
"epoch": 4.011345218800648,
|
|
"grad_norm": 0.8209932869507489,
|
|
"learning_rate": 1.840946690119528e-05,
|
|
"loss": 0.2917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3333631455898285,
|
|
"step": 2475,
|
|
"valid_targets_mean": 2441.2,
|
|
"valid_targets_min": 1213
|
|
},
|
|
{
|
|
"epoch": 4.019448946515397,
|
|
"grad_norm": 0.6924400519323498,
|
|
"learning_rate": 1.8328913027576373e-05,
|
|
"loss": 0.3063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25744473934173584,
|
|
"step": 2480,
|
|
"valid_targets_mean": 3704.8,
|
|
"valid_targets_min": 1284
|
|
},
|
|
{
|
|
"epoch": 4.0275526742301455,
|
|
"grad_norm": 0.627014797979199,
|
|
"learning_rate": 1.824838644430934e-05,
|
|
"loss": 0.2714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2638548016548157,
|
|
"step": 2485,
|
|
"valid_targets_mean": 4240.4,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 4.035656401944895,
|
|
"grad_norm": 0.6411695887560652,
|
|
"learning_rate": 1.8167888466465652e-05,
|
|
"loss": 0.3312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.325685977935791,
|
|
"step": 2490,
|
|
"valid_targets_mean": 4314.9,
|
|
"valid_targets_min": 941
|
|
},
|
|
{
|
|
"epoch": 4.043760129659644,
|
|
"grad_norm": 0.7808679492122392,
|
|
"learning_rate": 1.8087420408649596e-05,
|
|
"loss": 0.3185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3526899814605713,
|
|
"step": 2495,
|
|
"valid_targets_mean": 2995.1,
|
|
"valid_targets_min": 1278
|
|
},
|
|
{
|
|
"epoch": 4.051863857374392,
|
|
"grad_norm": 0.6650808996351736,
|
|
"learning_rate": 1.8006983584976877e-05,
|
|
"loss": 0.2964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25968295335769653,
|
|
"step": 2500,
|
|
"valid_targets_mean": 3737.1,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 4.059967585089141,
|
|
"grad_norm": 0.6620959381335446,
|
|
"learning_rate": 1.7926579309053098e-05,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.275592565536499,
|
|
"step": 2505,
|
|
"valid_targets_mean": 3844.2,
|
|
"valid_targets_min": 1386
|
|
},
|
|
{
|
|
"epoch": 4.06807131280389,
|
|
"grad_norm": 0.611981072438135,
|
|
"learning_rate": 1.7846208893952346e-05,
|
|
"loss": 0.2977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2791903614997864,
|
|
"step": 2510,
|
|
"valid_targets_mean": 4565.8,
|
|
"valid_targets_min": 1242
|
|
},
|
|
{
|
|
"epoch": 4.076175040518638,
|
|
"grad_norm": 0.6412358606975395,
|
|
"learning_rate": 1.7765873652195713e-05,
|
|
"loss": 0.3192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3951537609100342,
|
|
"step": 2515,
|
|
"valid_targets_mean": 4688.7,
|
|
"valid_targets_min": 837
|
|
},
|
|
{
|
|
"epoch": 4.084278768233387,
|
|
"grad_norm": 0.7188217457261792,
|
|
"learning_rate": 1.7685574895729886e-05,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31020399928092957,
|
|
"step": 2520,
|
|
"valid_targets_mean": 3320.2,
|
|
"valid_targets_min": 823
|
|
},
|
|
{
|
|
"epoch": 4.0923824959481365,
|
|
"grad_norm": 0.6980680753937775,
|
|
"learning_rate": 1.7605313935905722e-05,
|
|
"loss": 0.2851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31744128465652466,
|
|
"step": 2525,
|
|
"valid_targets_mean": 4232.6,
|
|
"valid_targets_min": 1243
|
|
},
|
|
{
|
|
"epoch": 4.100486223662885,
|
|
"grad_norm": 0.7692631551657655,
|
|
"learning_rate": 1.7525092083456795e-05,
|
|
"loss": 0.2907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30562955141067505,
|
|
"step": 2530,
|
|
"valid_targets_mean": 2824.4,
|
|
"valid_targets_min": 1129
|
|
},
|
|
{
|
|
"epoch": 4.108589951377634,
|
|
"grad_norm": 0.8164264535125219,
|
|
"learning_rate": 1.744491064847805e-05,
|
|
"loss": 0.3008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31630781292915344,
|
|
"step": 2535,
|
|
"valid_targets_mean": 2976.9,
|
|
"valid_targets_min": 865
|
|
},
|
|
{
|
|
"epoch": 4.116693679092383,
|
|
"grad_norm": 0.7838788568786066,
|
|
"learning_rate": 1.7364770940404375e-05,
|
|
"loss": 0.2773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2557229995727539,
|
|
"step": 2540,
|
|
"valid_targets_mean": 3894.8,
|
|
"valid_targets_min": 1361
|
|
},
|
|
{
|
|
"epoch": 4.124797406807131,
|
|
"grad_norm": 0.6054279363053395,
|
|
"learning_rate": 1.7284674267989213e-05,
|
|
"loss": 0.3191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26003241539001465,
|
|
"step": 2545,
|
|
"valid_targets_mean": 4236.4,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 4.13290113452188,
|
|
"grad_norm": 0.6753456498802832,
|
|
"learning_rate": 1.72046219392832e-05,
|
|
"loss": 0.3109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3229011297225952,
|
|
"step": 2550,
|
|
"valid_targets_mean": 3918.2,
|
|
"valid_targets_min": 1319
|
|
},
|
|
{
|
|
"epoch": 4.1410048622366284,
|
|
"grad_norm": 0.9066318534345271,
|
|
"learning_rate": 1.712461526161279e-05,
|
|
"loss": 0.3346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3370896577835083,
|
|
"step": 2555,
|
|
"valid_targets_mean": 3921.2,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 4.1491085899513775,
|
|
"grad_norm": 0.6943027908207531,
|
|
"learning_rate": 1.7044655541558934e-05,
|
|
"loss": 0.3076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36387622356414795,
|
|
"step": 2560,
|
|
"valid_targets_mean": 3734.2,
|
|
"valid_targets_min": 1098
|
|
},
|
|
{
|
|
"epoch": 4.157212317666127,
|
|
"grad_norm": 0.6280793763976201,
|
|
"learning_rate": 1.69647440849357e-05,
|
|
"loss": 0.3334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29390856623649597,
|
|
"step": 2565,
|
|
"valid_targets_mean": 4059.8,
|
|
"valid_targets_min": 1173
|
|
},
|
|
{
|
|
"epoch": 4.165316045380875,
|
|
"grad_norm": 0.8652306023704479,
|
|
"learning_rate": 1.6884882196768985e-05,
|
|
"loss": 0.3279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.351234495639801,
|
|
"step": 2570,
|
|
"valid_targets_mean": 2524.9,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 4.173419773095624,
|
|
"grad_norm": 0.6507876691877541,
|
|
"learning_rate": 1.680507118127518e-05,
|
|
"loss": 0.2638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2786210775375366,
|
|
"step": 2575,
|
|
"valid_targets_mean": 4142.7,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 4.181523500810373,
|
|
"grad_norm": 0.4778265253290313,
|
|
"learning_rate": 1.6725312341839895e-05,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3144574463367462,
|
|
"step": 2580,
|
|
"valid_targets_mean": 7203.1,
|
|
"valid_targets_min": 1984
|
|
},
|
|
{
|
|
"epoch": 4.189627228525121,
|
|
"grad_norm": 0.7392662761852493,
|
|
"learning_rate": 1.664560698099664e-05,
|
|
"loss": 0.3002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28497248888015747,
|
|
"step": 2585,
|
|
"valid_targets_mean": 2962.3,
|
|
"valid_targets_min": 1237
|
|
},
|
|
{
|
|
"epoch": 4.19773095623987,
|
|
"grad_norm": 0.6112256034306288,
|
|
"learning_rate": 1.6565956400405586e-05,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2852174639701843,
|
|
"step": 2590,
|
|
"valid_targets_mean": 4563.4,
|
|
"valid_targets_min": 995
|
|
},
|
|
{
|
|
"epoch": 4.2058346839546195,
|
|
"grad_norm": 0.796163466102447,
|
|
"learning_rate": 1.6486361900832284e-05,
|
|
"loss": 0.3173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3211726248264313,
|
|
"step": 2595,
|
|
"valid_targets_mean": 3132.4,
|
|
"valid_targets_min": 805
|
|
},
|
|
{
|
|
"epoch": 4.213938411669368,
|
|
"grad_norm": 1.3208522637127655,
|
|
"learning_rate": 1.6406824782126428e-05,
|
|
"loss": 0.3245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30766162276268005,
|
|
"step": 2600,
|
|
"valid_targets_mean": 3026.2,
|
|
"valid_targets_min": 1100
|
|
},
|
|
{
|
|
"epoch": 4.222042139384117,
|
|
"grad_norm": 0.8272038623649756,
|
|
"learning_rate": 1.632734634320064e-05,
|
|
"loss": 0.3051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30653703212738037,
|
|
"step": 2605,
|
|
"valid_targets_mean": 2404.6,
|
|
"valid_targets_min": 1100
|
|
},
|
|
{
|
|
"epoch": 4.230145867098866,
|
|
"grad_norm": 0.7401433175255077,
|
|
"learning_rate": 1.6247927882009256e-05,
|
|
"loss": 0.2892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.266494482755661,
|
|
"step": 2610,
|
|
"valid_targets_mean": 3482.1,
|
|
"valid_targets_min": 1333
|
|
},
|
|
{
|
|
"epoch": 4.238249594813614,
|
|
"grad_norm": 0.7195940311079241,
|
|
"learning_rate": 1.6168570695527096e-05,
|
|
"loss": 0.368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40879541635513306,
|
|
"step": 2615,
|
|
"valid_targets_mean": 4103.9,
|
|
"valid_targets_min": 970
|
|
},
|
|
{
|
|
"epoch": 4.246353322528363,
|
|
"grad_norm": 0.6953987582503965,
|
|
"learning_rate": 1.6089276079728334e-05,
|
|
"loss": 0.3174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2825435698032379,
|
|
"step": 2620,
|
|
"valid_targets_mean": 3658.1,
|
|
"valid_targets_min": 1149
|
|
},
|
|
{
|
|
"epoch": 4.254457050243111,
|
|
"grad_norm": 0.7884023109909745,
|
|
"learning_rate": 1.6010045329565294e-05,
|
|
"loss": 0.2971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29981517791748047,
|
|
"step": 2625,
|
|
"valid_targets_mean": 2593.6,
|
|
"valid_targets_min": 1171
|
|
},
|
|
{
|
|
"epoch": 4.2625607779578605,
|
|
"grad_norm": 0.878488278755131,
|
|
"learning_rate": 1.5930879738947328e-05,
|
|
"loss": 0.2969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3167850971221924,
|
|
"step": 2630,
|
|
"valid_targets_mean": 2434.6,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 4.27066450567261,
|
|
"grad_norm": 0.6394432218196988,
|
|
"learning_rate": 1.585178060071966e-05,
|
|
"loss": 0.2675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2784843146800995,
|
|
"step": 2635,
|
|
"valid_targets_mean": 4280.8,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 4.278768233387358,
|
|
"grad_norm": 0.5920424009782541,
|
|
"learning_rate": 1.5772749206642296e-05,
|
|
"loss": 0.2903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2522963285446167,
|
|
"step": 2640,
|
|
"valid_targets_mean": 4178.9,
|
|
"valid_targets_min": 1028
|
|
},
|
|
{
|
|
"epoch": 4.286871961102107,
|
|
"grad_norm": 0.5768126619167596,
|
|
"learning_rate": 1.5693786847368918e-05,
|
|
"loss": 0.2957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2204509675502777,
|
|
"step": 2645,
|
|
"valid_targets_mean": 4170.9,
|
|
"valid_targets_min": 1110
|
|
},
|
|
{
|
|
"epoch": 4.294975688816856,
|
|
"grad_norm": 0.6628529323864057,
|
|
"learning_rate": 1.5614894812425806e-05,
|
|
"loss": 0.2932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.273931622505188,
|
|
"step": 2650,
|
|
"valid_targets_mean": 3944.0,
|
|
"valid_targets_min": 1176
|
|
},
|
|
{
|
|
"epoch": 4.303079416531604,
|
|
"grad_norm": 0.6807384959247684,
|
|
"learning_rate": 1.5536074390190786e-05,
|
|
"loss": 0.2859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29452162981033325,
|
|
"step": 2655,
|
|
"valid_targets_mean": 4216.1,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 4.311183144246353,
|
|
"grad_norm": 0.7186908503646409,
|
|
"learning_rate": 1.5457326867872177e-05,
|
|
"loss": 0.3121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.305240273475647,
|
|
"step": 2660,
|
|
"valid_targets_mean": 3643.4,
|
|
"valid_targets_min": 979
|
|
},
|
|
{
|
|
"epoch": 4.319286871961102,
|
|
"grad_norm": 0.6108649365109375,
|
|
"learning_rate": 1.5378653531487784e-05,
|
|
"loss": 0.3095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2870018780231476,
|
|
"step": 2665,
|
|
"valid_targets_mean": 4265.2,
|
|
"valid_targets_min": 1149
|
|
},
|
|
{
|
|
"epoch": 4.327390599675851,
|
|
"grad_norm": 0.5974723937021044,
|
|
"learning_rate": 1.5300055665843875e-05,
|
|
"loss": 0.3021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2622985243797302,
|
|
"step": 2670,
|
|
"valid_targets_mean": 4460.7,
|
|
"valid_targets_min": 1098
|
|
},
|
|
{
|
|
"epoch": 4.3354943273906,
|
|
"grad_norm": 0.813553355020424,
|
|
"learning_rate": 1.5221534554514225e-05,
|
|
"loss": 0.3018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31859850883483887,
|
|
"step": 2675,
|
|
"valid_targets_mean": 2785.9,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 4.343598055105349,
|
|
"grad_norm": 0.6516710233193221,
|
|
"learning_rate": 1.5143091479819146e-05,
|
|
"loss": 0.3136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2503086030483246,
|
|
"step": 2680,
|
|
"valid_targets_mean": 3712.2,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 4.351701782820097,
|
|
"grad_norm": 0.6614846672676474,
|
|
"learning_rate": 1.5064727722804531e-05,
|
|
"loss": 0.2674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2952650785446167,
|
|
"step": 2685,
|
|
"valid_targets_mean": 3959.1,
|
|
"valid_targets_min": 1150
|
|
},
|
|
{
|
|
"epoch": 4.359805510534846,
|
|
"grad_norm": 0.596297231951026,
|
|
"learning_rate": 1.4986444563220948e-05,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3439939022064209,
|
|
"step": 2690,
|
|
"valid_targets_mean": 4940.2,
|
|
"valid_targets_min": 1332
|
|
},
|
|
{
|
|
"epoch": 4.367909238249595,
|
|
"grad_norm": 0.5643409274980202,
|
|
"learning_rate": 1.4908243279502741e-05,
|
|
"loss": 0.3236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3852804899215698,
|
|
"step": 2695,
|
|
"valid_targets_mean": 5347.9,
|
|
"valid_targets_min": 1164
|
|
},
|
|
{
|
|
"epoch": 4.376012965964343,
|
|
"grad_norm": 0.7304625760670628,
|
|
"learning_rate": 1.4830125148747138e-05,
|
|
"loss": 0.317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31127190589904785,
|
|
"step": 2700,
|
|
"valid_targets_mean": 4680.9,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 4.3841166936790925,
|
|
"grad_norm": 0.6640526144672068,
|
|
"learning_rate": 1.475209144669341e-05,
|
|
"loss": 0.3174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26242735981941223,
|
|
"step": 2705,
|
|
"valid_targets_mean": 3676.2,
|
|
"valid_targets_min": 969
|
|
},
|
|
{
|
|
"epoch": 4.392220421393841,
|
|
"grad_norm": 0.8292112236286381,
|
|
"learning_rate": 1.4674143447702036e-05,
|
|
"loss": 0.2815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29638242721557617,
|
|
"step": 2710,
|
|
"valid_targets_mean": 2692.6,
|
|
"valid_targets_min": 351
|
|
},
|
|
{
|
|
"epoch": 4.40032414910859,
|
|
"grad_norm": 0.6417348703212333,
|
|
"learning_rate": 1.4596282424733877e-05,
|
|
"loss": 0.2863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2617226839065552,
|
|
"step": 2715,
|
|
"valid_targets_mean": 3868.3,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 4.408427876823339,
|
|
"grad_norm": 0.7926194067640823,
|
|
"learning_rate": 1.4518509649329406e-05,
|
|
"loss": 0.2987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2919907569885254,
|
|
"step": 2720,
|
|
"valid_targets_mean": 2726.9,
|
|
"valid_targets_min": 1215
|
|
},
|
|
{
|
|
"epoch": 4.416531604538087,
|
|
"grad_norm": 0.7677690933979908,
|
|
"learning_rate": 1.4440826391587926e-05,
|
|
"loss": 0.2976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30967211723327637,
|
|
"step": 2725,
|
|
"valid_targets_mean": 3164.4,
|
|
"valid_targets_min": 1702
|
|
},
|
|
{
|
|
"epoch": 4.424635332252836,
|
|
"grad_norm": 0.7635518361684928,
|
|
"learning_rate": 1.4363233920146855e-05,
|
|
"loss": 0.303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31172192096710205,
|
|
"step": 2730,
|
|
"valid_targets_mean": 3045.9,
|
|
"valid_targets_min": 1326
|
|
},
|
|
{
|
|
"epoch": 4.432739059967585,
|
|
"grad_norm": 0.5954227022352406,
|
|
"learning_rate": 1.4285733502160955e-05,
|
|
"loss": 0.3165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26830658316612244,
|
|
"step": 2735,
|
|
"valid_targets_mean": 4789.5,
|
|
"valid_targets_min": 311
|
|
},
|
|
{
|
|
"epoch": 4.4408427876823335,
|
|
"grad_norm": 0.5961589587008379,
|
|
"learning_rate": 1.4208326403281702e-05,
|
|
"loss": 0.3272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3103683888912201,
|
|
"step": 2740,
|
|
"valid_targets_mean": 4827.6,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 4.448946515397083,
|
|
"grad_norm": 0.5974178836414327,
|
|
"learning_rate": 1.4131013887636576e-05,
|
|
"loss": 0.2936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3044317364692688,
|
|
"step": 2745,
|
|
"valid_targets_mean": 4834.2,
|
|
"valid_targets_min": 890
|
|
},
|
|
{
|
|
"epoch": 4.457050243111832,
|
|
"grad_norm": 0.5924507972312091,
|
|
"learning_rate": 1.4053797217808432e-05,
|
|
"loss": 0.2874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23466259241104126,
|
|
"step": 2750,
|
|
"valid_targets_mean": 4736.9,
|
|
"valid_targets_min": 1096
|
|
},
|
|
{
|
|
"epoch": 4.46515397082658,
|
|
"grad_norm": 0.7050432170850669,
|
|
"learning_rate": 1.3976677654814866e-05,
|
|
"loss": 0.3217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3333389461040497,
|
|
"step": 2755,
|
|
"valid_targets_mean": 3746.6,
|
|
"valid_targets_min": 1264
|
|
},
|
|
{
|
|
"epoch": 4.473257698541329,
|
|
"grad_norm": 0.625082671183627,
|
|
"learning_rate": 1.3899656458087647e-05,
|
|
"loss": 0.2948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26414743065834045,
|
|
"step": 2760,
|
|
"valid_targets_mean": 4043.4,
|
|
"valid_targets_min": 1157
|
|
},
|
|
{
|
|
"epoch": 4.481361426256078,
|
|
"grad_norm": 0.6545938419305839,
|
|
"learning_rate": 1.3822734885452136e-05,
|
|
"loss": 0.2925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26352810859680176,
|
|
"step": 2765,
|
|
"valid_targets_mean": 3948.2,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 4.489465153970826,
|
|
"grad_norm": 0.5856504955272438,
|
|
"learning_rate": 1.3745914193106715e-05,
|
|
"loss": 0.2844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2534196972846985,
|
|
"step": 2770,
|
|
"valid_targets_mean": 4745.9,
|
|
"valid_targets_min": 1245
|
|
},
|
|
{
|
|
"epoch": 4.4975688816855754,
|
|
"grad_norm": 0.6637431157492306,
|
|
"learning_rate": 1.366919563560233e-05,
|
|
"loss": 0.2922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28753918409347534,
|
|
"step": 2775,
|
|
"valid_targets_mean": 4114.1,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 4.5056726094003245,
|
|
"grad_norm": 0.5874971907386854,
|
|
"learning_rate": 1.3592580465821956e-05,
|
|
"loss": 0.3361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26772981882095337,
|
|
"step": 2780,
|
|
"valid_targets_mean": 4026.8,
|
|
"valid_targets_min": 1021
|
|
},
|
|
{
|
|
"epoch": 4.513776337115073,
|
|
"grad_norm": 0.7407905359047069,
|
|
"learning_rate": 1.3516069934960174e-05,
|
|
"loss": 0.3207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3074095845222473,
|
|
"step": 2785,
|
|
"valid_targets_mean": 3382.6,
|
|
"valid_targets_min": 1153
|
|
},
|
|
{
|
|
"epoch": 4.521880064829822,
|
|
"grad_norm": 0.6700473537828823,
|
|
"learning_rate": 1.3439665292502695e-05,
|
|
"loss": 0.3056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3244781494140625,
|
|
"step": 2790,
|
|
"valid_targets_mean": 3940.2,
|
|
"valid_targets_min": 1105
|
|
},
|
|
{
|
|
"epoch": 4.52998379254457,
|
|
"grad_norm": 1.055189769478214,
|
|
"learning_rate": 1.3363367786205985e-05,
|
|
"loss": 0.2872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33718210458755493,
|
|
"step": 2795,
|
|
"valid_targets_mean": 3863.8,
|
|
"valid_targets_min": 1728
|
|
},
|
|
{
|
|
"epoch": 4.538087520259319,
|
|
"grad_norm": 0.7112409148556906,
|
|
"learning_rate": 1.3287178662076893e-05,
|
|
"loss": 0.2865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3041728138923645,
|
|
"step": 2800,
|
|
"valid_targets_mean": 3700.9,
|
|
"valid_targets_min": 1333
|
|
},
|
|
{
|
|
"epoch": 4.546191247974068,
|
|
"grad_norm": 0.7156076125028735,
|
|
"learning_rate": 1.3211099164352261e-05,
|
|
"loss": 0.3104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3130621016025543,
|
|
"step": 2805,
|
|
"valid_targets_mean": 3578.9,
|
|
"valid_targets_min": 1106
|
|
},
|
|
{
|
|
"epoch": 4.5542949756888165,
|
|
"grad_norm": 0.8719843408287105,
|
|
"learning_rate": 1.3135130535478655e-05,
|
|
"loss": 0.3133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32647475600242615,
|
|
"step": 2810,
|
|
"valid_targets_mean": 2229.1,
|
|
"valid_targets_min": 1139
|
|
},
|
|
{
|
|
"epoch": 4.562398703403566,
|
|
"grad_norm": 0.6029727322953958,
|
|
"learning_rate": 1.3059274016092057e-05,
|
|
"loss": 0.304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2911962568759918,
|
|
"step": 2815,
|
|
"valid_targets_mean": 4290.5,
|
|
"valid_targets_min": 1470
|
|
},
|
|
{
|
|
"epoch": 4.570502431118315,
|
|
"grad_norm": 0.5961063625812331,
|
|
"learning_rate": 1.2983530844997585e-05,
|
|
"loss": 0.3389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26468124985694885,
|
|
"step": 2820,
|
|
"valid_targets_mean": 4589.2,
|
|
"valid_targets_min": 1347
|
|
},
|
|
{
|
|
"epoch": 4.578606158833063,
|
|
"grad_norm": 0.5660573404796354,
|
|
"learning_rate": 1.2907902259149287e-05,
|
|
"loss": 0.2807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2578660249710083,
|
|
"step": 2825,
|
|
"valid_targets_mean": 4920.9,
|
|
"valid_targets_min": 1288
|
|
},
|
|
{
|
|
"epoch": 4.586709886547812,
|
|
"grad_norm": 0.7280963642347,
|
|
"learning_rate": 1.2832389493629928e-05,
|
|
"loss": 0.3097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32471543550491333,
|
|
"step": 2830,
|
|
"valid_targets_mean": 3581.1,
|
|
"valid_targets_min": 1017
|
|
},
|
|
{
|
|
"epoch": 4.594813614262561,
|
|
"grad_norm": 0.715412432811769,
|
|
"learning_rate": 1.275699378163083e-05,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28921806812286377,
|
|
"step": 2835,
|
|
"valid_targets_mean": 3497.8,
|
|
"valid_targets_min": 1320
|
|
},
|
|
{
|
|
"epoch": 4.602917341977309,
|
|
"grad_norm": 0.6532672996527726,
|
|
"learning_rate": 1.2681716354431704e-05,
|
|
"loss": 0.3025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32215386629104614,
|
|
"step": 2840,
|
|
"valid_targets_mean": 4033.8,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 4.611021069692058,
|
|
"grad_norm": 0.8260191084875083,
|
|
"learning_rate": 1.2606558441380587e-05,
|
|
"loss": 0.3022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32344701886177063,
|
|
"step": 2845,
|
|
"valid_targets_mean": 3283.2,
|
|
"valid_targets_min": 1043
|
|
},
|
|
{
|
|
"epoch": 4.6191247974068075,
|
|
"grad_norm": 0.6939779814125713,
|
|
"learning_rate": 1.2531521269873736e-05,
|
|
"loss": 0.2804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27716442942619324,
|
|
"step": 2850,
|
|
"valid_targets_mean": 3567.2,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 4.627228525121556,
|
|
"grad_norm": 0.792117184497524,
|
|
"learning_rate": 1.245660606533559e-05,
|
|
"loss": 0.2794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3006155490875244,
|
|
"step": 2855,
|
|
"valid_targets_mean": 2872.9,
|
|
"valid_targets_min": 887
|
|
},
|
|
{
|
|
"epoch": 4.635332252836305,
|
|
"grad_norm": 1.4604402166637813,
|
|
"learning_rate": 1.2381814051198751e-05,
|
|
"loss": 0.3094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2872532606124878,
|
|
"step": 2860,
|
|
"valid_targets_mean": 3158.4,
|
|
"valid_targets_min": 1152
|
|
},
|
|
{
|
|
"epoch": 4.643435980551054,
|
|
"grad_norm": 0.7254348350390687,
|
|
"learning_rate": 1.2307146448884021e-05,
|
|
"loss": 0.3061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2911936044692993,
|
|
"step": 2865,
|
|
"valid_targets_mean": 3529.8,
|
|
"valid_targets_min": 1246
|
|
},
|
|
{
|
|
"epoch": 4.651539708265802,
|
|
"grad_norm": 0.6614033286563541,
|
|
"learning_rate": 1.2232604477780445e-05,
|
|
"loss": 0.272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2721143066883087,
|
|
"step": 2870,
|
|
"valid_targets_mean": 3971.7,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 4.659643435980551,
|
|
"grad_norm": 0.7213983514513054,
|
|
"learning_rate": 1.2158189355225382e-05,
|
|
"loss": 0.307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28996139764785767,
|
|
"step": 2875,
|
|
"valid_targets_mean": 3356.2,
|
|
"valid_targets_min": 943
|
|
},
|
|
{
|
|
"epoch": 4.667747163695299,
|
|
"grad_norm": 0.5944666004073663,
|
|
"learning_rate": 1.2083902296484659e-05,
|
|
"loss": 0.2762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28627628087997437,
|
|
"step": 2880,
|
|
"valid_targets_mean": 5012.4,
|
|
"valid_targets_min": 1118
|
|
},
|
|
{
|
|
"epoch": 4.6758508914100485,
|
|
"grad_norm": 0.6862135372991642,
|
|
"learning_rate": 1.2009744514732698e-05,
|
|
"loss": 0.2824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3100573420524597,
|
|
"step": 2885,
|
|
"valid_targets_mean": 3498.6,
|
|
"valid_targets_min": 1077
|
|
},
|
|
{
|
|
"epoch": 4.683954619124798,
|
|
"grad_norm": 0.5823076416457025,
|
|
"learning_rate": 1.1935717221032707e-05,
|
|
"loss": 0.3038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30757367610931396,
|
|
"step": 2890,
|
|
"valid_targets_mean": 4603.4,
|
|
"valid_targets_min": 1196
|
|
},
|
|
{
|
|
"epoch": 4.692058346839546,
|
|
"grad_norm": 0.6566892716189943,
|
|
"learning_rate": 1.1861821624316916e-05,
|
|
"loss": 0.3018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3225197196006775,
|
|
"step": 2895,
|
|
"valid_targets_mean": 3725.3,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 4.700162074554295,
|
|
"grad_norm": 0.6976236384286916,
|
|
"learning_rate": 1.1788058931366822e-05,
|
|
"loss": 0.3277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4578794240951538,
|
|
"step": 2900,
|
|
"valid_targets_mean": 4013.0,
|
|
"valid_targets_min": 1016
|
|
},
|
|
{
|
|
"epoch": 4.708265802269044,
|
|
"grad_norm": 0.823649732616322,
|
|
"learning_rate": 1.1714430346793479e-05,
|
|
"loss": 0.3069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31944727897644043,
|
|
"step": 2905,
|
|
"valid_targets_mean": 2667.8,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 4.716369529983792,
|
|
"grad_norm": 0.6038110363670156,
|
|
"learning_rate": 1.1640937073017837e-05,
|
|
"loss": 0.2952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2808373272418976,
|
|
"step": 2910,
|
|
"valid_targets_mean": 4206.1,
|
|
"valid_targets_min": 1537
|
|
},
|
|
{
|
|
"epoch": 4.724473257698541,
|
|
"grad_norm": 0.5706371680859499,
|
|
"learning_rate": 1.1567580310251097e-05,
|
|
"loss": 0.3027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29434657096862793,
|
|
"step": 2915,
|
|
"valid_targets_mean": 5014.4,
|
|
"valid_targets_min": 1214
|
|
},
|
|
{
|
|
"epoch": 4.73257698541329,
|
|
"grad_norm": 0.7418205606965962,
|
|
"learning_rate": 1.1494361256475105e-05,
|
|
"loss": 0.2992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3157218098640442,
|
|
"step": 2920,
|
|
"valid_targets_mean": 3176.5,
|
|
"valid_targets_min": 1332
|
|
},
|
|
{
|
|
"epoch": 4.740680713128039,
|
|
"grad_norm": 0.5164826345529684,
|
|
"learning_rate": 1.1421281107422804e-05,
|
|
"loss": 0.311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2267780900001526,
|
|
"step": 2925,
|
|
"valid_targets_mean": 5747.8,
|
|
"valid_targets_min": 1101
|
|
},
|
|
{
|
|
"epoch": 4.748784440842788,
|
|
"grad_norm": 0.7545936872796103,
|
|
"learning_rate": 1.1348341056558709e-05,
|
|
"loss": 0.3157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2719404697418213,
|
|
"step": 2930,
|
|
"valid_targets_mean": 3002.6,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 4.756888168557537,
|
|
"grad_norm": 0.6208311308778571,
|
|
"learning_rate": 1.1275542295059384e-05,
|
|
"loss": 0.2854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3292774558067322,
|
|
"step": 2935,
|
|
"valid_targets_mean": 4970.6,
|
|
"valid_targets_min": 1138
|
|
},
|
|
{
|
|
"epoch": 4.764991896272285,
|
|
"grad_norm": 0.6019183667184134,
|
|
"learning_rate": 1.1202886011794023e-05,
|
|
"loss": 0.2999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3418167233467102,
|
|
"step": 2940,
|
|
"valid_targets_mean": 4616.4,
|
|
"valid_targets_min": 1057
|
|
},
|
|
{
|
|
"epoch": 4.773095623987034,
|
|
"grad_norm": 0.85052165270995,
|
|
"learning_rate": 1.1130373393305004e-05,
|
|
"loss": 0.3093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2802833914756775,
|
|
"step": 2945,
|
|
"valid_targets_mean": 4332.2,
|
|
"valid_targets_min": 1019
|
|
},
|
|
{
|
|
"epoch": 4.781199351701783,
|
|
"grad_norm": 0.7917179246807688,
|
|
"learning_rate": 1.1058005623788564e-05,
|
|
"loss": 0.3151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3122570514678955,
|
|
"step": 2950,
|
|
"valid_targets_mean": 3426.0,
|
|
"valid_targets_min": 972
|
|
},
|
|
{
|
|
"epoch": 4.789303079416531,
|
|
"grad_norm": 0.7139017191012962,
|
|
"learning_rate": 1.0985783885075407e-05,
|
|
"loss": 0.313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.289265513420105,
|
|
"step": 2955,
|
|
"valid_targets_mean": 3680.8,
|
|
"valid_targets_min": 1375
|
|
},
|
|
{
|
|
"epoch": 4.7974068071312805,
|
|
"grad_norm": 0.8032957029552218,
|
|
"learning_rate": 1.0913709356611411e-05,
|
|
"loss": 0.2946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2865556478500366,
|
|
"step": 2960,
|
|
"valid_targets_mean": 2568.1,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 4.805510534846029,
|
|
"grad_norm": 0.7258218597446954,
|
|
"learning_rate": 1.0841783215438406e-05,
|
|
"loss": 0.3054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27492740750312805,
|
|
"step": 2965,
|
|
"valid_targets_mean": 3073.8,
|
|
"valid_targets_min": 1155
|
|
},
|
|
{
|
|
"epoch": 4.813614262560778,
|
|
"grad_norm": 0.9231645223463597,
|
|
"learning_rate": 1.07700066361749e-05,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2736729085445404,
|
|
"step": 2970,
|
|
"valid_targets_mean": 4219.8,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 4.821717990275527,
|
|
"grad_norm": 0.7204809432667069,
|
|
"learning_rate": 1.0698380790996921e-05,
|
|
"loss": 0.3165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4039513170719147,
|
|
"step": 2975,
|
|
"valid_targets_mean": 3791.6,
|
|
"valid_targets_min": 988
|
|
},
|
|
{
|
|
"epoch": 4.829821717990275,
|
|
"grad_norm": 0.7355419119699267,
|
|
"learning_rate": 1.0626906849618903e-05,
|
|
"loss": 0.2976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32579725980758667,
|
|
"step": 2980,
|
|
"valid_targets_mean": 3853.4,
|
|
"valid_targets_min": 1194
|
|
},
|
|
{
|
|
"epoch": 4.837925445705024,
|
|
"grad_norm": 0.6872894810537605,
|
|
"learning_rate": 1.0555585979274513e-05,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2627268433570862,
|
|
"step": 2985,
|
|
"valid_targets_mean": 3120.8,
|
|
"valid_targets_min": 1194
|
|
},
|
|
{
|
|
"epoch": 4.846029173419773,
|
|
"grad_norm": 0.6320865135995328,
|
|
"learning_rate": 1.0484419344697667e-05,
|
|
"loss": 0.2853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33812516927719116,
|
|
"step": 2990,
|
|
"valid_targets_mean": 4165.2,
|
|
"valid_targets_min": 1044
|
|
},
|
|
{
|
|
"epoch": 4.854132901134522,
|
|
"grad_norm": 0.6525207154113098,
|
|
"learning_rate": 1.0413408108103445e-05,
|
|
"loss": 0.3038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2672692537307739,
|
|
"step": 2995,
|
|
"valid_targets_mean": 3958.0,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 4.862236628849271,
|
|
"grad_norm": 0.7324504070792344,
|
|
"learning_rate": 1.0342553429169163e-05,
|
|
"loss": 0.3069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.308619886636734,
|
|
"step": 3000,
|
|
"valid_targets_mean": 3301.4,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 4.87034035656402,
|
|
"grad_norm": 0.7729266863011056,
|
|
"learning_rate": 1.0271856465015388e-05,
|
|
"loss": 0.3243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2953605055809021,
|
|
"step": 3005,
|
|
"valid_targets_mean": 3670.2,
|
|
"valid_targets_min": 1511
|
|
},
|
|
{
|
|
"epoch": 4.878444084278768,
|
|
"grad_norm": 0.6049298397937694,
|
|
"learning_rate": 1.0201318370187065e-05,
|
|
"loss": 0.318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28800350427627563,
|
|
"step": 3010,
|
|
"valid_targets_mean": 4787.6,
|
|
"valid_targets_min": 1243
|
|
},
|
|
{
|
|
"epoch": 4.886547811993517,
|
|
"grad_norm": 0.7118664172339165,
|
|
"learning_rate": 1.0130940296634683e-05,
|
|
"loss": 0.2928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31415632367134094,
|
|
"step": 3015,
|
|
"valid_targets_mean": 3257.8,
|
|
"valid_targets_min": 1108
|
|
},
|
|
{
|
|
"epoch": 4.894651539708266,
|
|
"grad_norm": 0.5531509637033187,
|
|
"learning_rate": 1.0060723393695411e-05,
|
|
"loss": 0.2958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28131812810897827,
|
|
"step": 3020,
|
|
"valid_targets_mean": 4997.1,
|
|
"valid_targets_min": 1727
|
|
},
|
|
{
|
|
"epoch": 4.902755267423014,
|
|
"grad_norm": 0.5338020186024223,
|
|
"learning_rate": 9.990668808074378e-06,
|
|
"loss": 0.2923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28649789094924927,
|
|
"step": 3025,
|
|
"valid_targets_mean": 5955.0,
|
|
"valid_targets_min": 1806
|
|
},
|
|
{
|
|
"epoch": 4.9108589951377635,
|
|
"grad_norm": 0.7028420105154684,
|
|
"learning_rate": 9.920777683825906e-06,
|
|
"loss": 0.2917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2879384756088257,
|
|
"step": 3030,
|
|
"valid_targets_mean": 3591.8,
|
|
"valid_targets_min": 1963
|
|
},
|
|
{
|
|
"epoch": 4.918962722852513,
|
|
"grad_norm": 0.7304701576538732,
|
|
"learning_rate": 9.851051162334871e-06,
|
|
"loss": 0.3139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3374594449996948,
|
|
"step": 3035,
|
|
"valid_targets_mean": 3193.5,
|
|
"valid_targets_min": 863
|
|
},
|
|
{
|
|
"epoch": 4.927066450567261,
|
|
"grad_norm": 0.5865752240512088,
|
|
"learning_rate": 9.781490382298018e-06,
|
|
"loss": 0.2884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32436108589172363,
|
|
"step": 3040,
|
|
"valid_targets_mean": 4995.1,
|
|
"valid_targets_min": 1006
|
|
},
|
|
{
|
|
"epoch": 4.93517017828201,
|
|
"grad_norm": 0.8489788664414895,
|
|
"learning_rate": 9.712096479705382e-06,
|
|
"loss": 0.2999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3596591055393219,
|
|
"step": 3045,
|
|
"valid_targets_mean": 2581.2,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 4.943273905996758,
|
|
"grad_norm": 0.6007400071384102,
|
|
"learning_rate": 9.642870587821761e-06,
|
|
"loss": 0.282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2724454700946808,
|
|
"step": 3050,
|
|
"valid_targets_mean": 4420.8,
|
|
"valid_targets_min": 992
|
|
},
|
|
{
|
|
"epoch": 4.951377633711507,
|
|
"grad_norm": 0.7265588407960974,
|
|
"learning_rate": 9.573813837168166e-06,
|
|
"loss": 0.3017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30277544260025024,
|
|
"step": 3055,
|
|
"valid_targets_mean": 3065.9,
|
|
"valid_targets_min": 862
|
|
},
|
|
{
|
|
"epoch": 4.959481361426256,
|
|
"grad_norm": 0.7253607975993981,
|
|
"learning_rate": 9.504927355503399e-06,
|
|
"loss": 0.3311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33545592427253723,
|
|
"step": 3060,
|
|
"valid_targets_mean": 3197.4,
|
|
"valid_targets_min": 1275
|
|
},
|
|
{
|
|
"epoch": 4.9675850891410045,
|
|
"grad_norm": 0.6715975787612942,
|
|
"learning_rate": 9.436212267805591e-06,
|
|
"loss": 0.2941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28574198484420776,
|
|
"step": 3065,
|
|
"valid_targets_mean": 3559.9,
|
|
"valid_targets_min": 1152
|
|
},
|
|
{
|
|
"epoch": 4.975688816855754,
|
|
"grad_norm": 0.6147947824290071,
|
|
"learning_rate": 9.367669696253885e-06,
|
|
"loss": 0.3044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3622044324874878,
|
|
"step": 3070,
|
|
"valid_targets_mean": 5026.7,
|
|
"valid_targets_min": 829
|
|
},
|
|
{
|
|
"epoch": 4.983792544570503,
|
|
"grad_norm": 0.9266146599676338,
|
|
"learning_rate": 9.299300760210059e-06,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25320684909820557,
|
|
"step": 3075,
|
|
"valid_targets_mean": 5378.4,
|
|
"valid_targets_min": 1587
|
|
},
|
|
{
|
|
"epoch": 4.991896272285251,
|
|
"grad_norm": 0.886524861463812,
|
|
"learning_rate": 9.231106576200268e-06,
|
|
"loss": 0.3495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33286648988723755,
|
|
"step": 3080,
|
|
"valid_targets_mean": 2349.5,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.5637786613200365,
|
|
"learning_rate": 9.163088257896825e-06,
|
|
"loss": 0.2876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30712249875068665,
|
|
"step": 3085,
|
|
"valid_targets_mean": 5376.3,
|
|
"valid_targets_min": 1897
|
|
},
|
|
{
|
|
"epoch": 5.008103727714749,
|
|
"grad_norm": 0.679283516267655,
|
|
"learning_rate": 9.095246916099978e-06,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30201444029808044,
|
|
"step": 3090,
|
|
"valid_targets_mean": 3832.2,
|
|
"valid_targets_min": 1418
|
|
},
|
|
{
|
|
"epoch": 5.016207455429497,
|
|
"grad_norm": 0.7170611382488913,
|
|
"learning_rate": 9.027583658719812e-06,
|
|
"loss": 0.26,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2814018130302429,
|
|
"step": 3095,
|
|
"valid_targets_mean": 3344.4,
|
|
"valid_targets_min": 1012
|
|
},
|
|
{
|
|
"epoch": 5.024311183144246,
|
|
"grad_norm": 0.6078632668782452,
|
|
"learning_rate": 8.960099590758104e-06,
|
|
"loss": 0.2702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3396990895271301,
|
|
"step": 3100,
|
|
"valid_targets_mean": 5253.6,
|
|
"valid_targets_min": 1140
|
|
},
|
|
{
|
|
"epoch": 5.0324149108589955,
|
|
"grad_norm": 0.7835574782392883,
|
|
"learning_rate": 8.892795814290342e-06,
|
|
"loss": 0.3028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2960517406463623,
|
|
"step": 3105,
|
|
"valid_targets_mean": 3160.6,
|
|
"valid_targets_min": 1256
|
|
},
|
|
{
|
|
"epoch": 5.040518638573744,
|
|
"grad_norm": 0.7378400262878008,
|
|
"learning_rate": 8.825673428447668e-06,
|
|
"loss": 0.2988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2975686192512512,
|
|
"step": 3110,
|
|
"valid_targets_mean": 3756.3,
|
|
"valid_targets_min": 1114
|
|
},
|
|
{
|
|
"epoch": 5.048622366288493,
|
|
"grad_norm": 0.6887905386158489,
|
|
"learning_rate": 8.758733529398945e-06,
|
|
"loss": 0.2614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2563658654689789,
|
|
"step": 3115,
|
|
"valid_targets_mean": 3989.8,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 5.056726094003242,
|
|
"grad_norm": 0.6618439575404061,
|
|
"learning_rate": 8.691977210332892e-06,
|
|
"loss": 0.263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25922781229019165,
|
|
"step": 3120,
|
|
"valid_targets_mean": 4233.7,
|
|
"valid_targets_min": 898
|
|
},
|
|
{
|
|
"epoch": 5.06482982171799,
|
|
"grad_norm": 0.5650176047225448,
|
|
"learning_rate": 8.625405561440172e-06,
|
|
"loss": 0.2819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2414149045944214,
|
|
"step": 3125,
|
|
"valid_targets_mean": 4405.9,
|
|
"valid_targets_min": 344
|
|
},
|
|
{
|
|
"epoch": 5.072933549432739,
|
|
"grad_norm": 0.7330383005392311,
|
|
"learning_rate": 8.559019669895648e-06,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28208595514297485,
|
|
"step": 3130,
|
|
"valid_targets_mean": 3542.4,
|
|
"valid_targets_min": 969
|
|
},
|
|
{
|
|
"epoch": 5.081037277147487,
|
|
"grad_norm": 0.7265672842651938,
|
|
"learning_rate": 8.492820619840563e-06,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.256570041179657,
|
|
"step": 3135,
|
|
"valid_targets_mean": 4502.6,
|
|
"valid_targets_min": 1105
|
|
},
|
|
{
|
|
"epoch": 5.0891410048622365,
|
|
"grad_norm": 0.8957657482015896,
|
|
"learning_rate": 8.426809492364907e-06,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3049822449684143,
|
|
"step": 3140,
|
|
"valid_targets_mean": 2393.8,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 5.097244732576986,
|
|
"grad_norm": 0.6466368422052554,
|
|
"learning_rate": 8.360987365489698e-06,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25956565141677856,
|
|
"step": 3145,
|
|
"valid_targets_mean": 4452.4,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 5.105348460291734,
|
|
"grad_norm": 0.9077370475698178,
|
|
"learning_rate": 8.295355314149413e-06,
|
|
"loss": 0.3004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3483518362045288,
|
|
"step": 3150,
|
|
"valid_targets_mean": 2507.1,
|
|
"valid_targets_min": 863
|
|
},
|
|
{
|
|
"epoch": 5.113452188006483,
|
|
"grad_norm": 0.8110339444850555,
|
|
"learning_rate": 8.229914410174435e-06,
|
|
"loss": 0.282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3136909008026123,
|
|
"step": 3155,
|
|
"valid_targets_mean": 2973.8,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 5.121555915721232,
|
|
"grad_norm": 0.6807259870266567,
|
|
"learning_rate": 8.16466572227352e-06,
|
|
"loss": 0.2976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30755871534347534,
|
|
"step": 3160,
|
|
"valid_targets_mean": 3892.3,
|
|
"valid_targets_min": 1547
|
|
},
|
|
{
|
|
"epoch": 5.12965964343598,
|
|
"grad_norm": 0.658342938443184,
|
|
"learning_rate": 8.099610316016373e-06,
|
|
"loss": 0.2613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30165326595306396,
|
|
"step": 3165,
|
|
"valid_targets_mean": 4359.5,
|
|
"valid_targets_min": 1216
|
|
},
|
|
{
|
|
"epoch": 5.137763371150729,
|
|
"grad_norm": 0.9237059062611359,
|
|
"learning_rate": 8.03474925381625e-06,
|
|
"loss": 0.3009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3684241771697998,
|
|
"step": 3170,
|
|
"valid_targets_mean": 2736.1,
|
|
"valid_targets_min": 1269
|
|
},
|
|
{
|
|
"epoch": 5.145867098865478,
|
|
"grad_norm": 0.738400061019705,
|
|
"learning_rate": 7.97008359491257e-06,
|
|
"loss": 0.2805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29892492294311523,
|
|
"step": 3175,
|
|
"valid_targets_mean": 3671.7,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 5.153970826580227,
|
|
"grad_norm": 0.653087724753744,
|
|
"learning_rate": 7.905614395353649e-06,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32275858521461487,
|
|
"step": 3180,
|
|
"valid_targets_mean": 4359.4,
|
|
"valid_targets_min": 1197
|
|
},
|
|
{
|
|
"epoch": 5.162074554294976,
|
|
"grad_norm": 0.7687704188424886,
|
|
"learning_rate": 7.841342707979442e-06,
|
|
"loss": 0.2695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2669990062713623,
|
|
"step": 3185,
|
|
"valid_targets_mean": 3254.8,
|
|
"valid_targets_min": 1262
|
|
},
|
|
{
|
|
"epoch": 5.170178282009725,
|
|
"grad_norm": 0.6145652140558867,
|
|
"learning_rate": 7.77726958240437e-06,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30221909284591675,
|
|
"step": 3190,
|
|
"valid_targets_mean": 4819.4,
|
|
"valid_targets_min": 998
|
|
},
|
|
{
|
|
"epoch": 5.178282009724473,
|
|
"grad_norm": 0.7821087325095586,
|
|
"learning_rate": 7.713396065000133e-06,
|
|
"loss": 0.2719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28741025924682617,
|
|
"step": 3195,
|
|
"valid_targets_mean": 3316.5,
|
|
"valid_targets_min": 937
|
|
},
|
|
{
|
|
"epoch": 5.186385737439222,
|
|
"grad_norm": 0.5251864451472283,
|
|
"learning_rate": 7.649723198878676e-06,
|
|
"loss": 0.252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25311702489852905,
|
|
"step": 3200,
|
|
"valid_targets_mean": 5434.4,
|
|
"valid_targets_min": 1248
|
|
},
|
|
{
|
|
"epoch": 5.194489465153971,
|
|
"grad_norm": 0.7146340534365199,
|
|
"learning_rate": 7.586252023875125e-06,
|
|
"loss": 0.2924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2779831886291504,
|
|
"step": 3205,
|
|
"valid_targets_mean": 3548.5,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 5.2025931928687195,
|
|
"grad_norm": 0.6421920316889562,
|
|
"learning_rate": 7.522983576530791e-06,
|
|
"loss": 0.2651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2444455623626709,
|
|
"step": 3210,
|
|
"valid_targets_mean": 3775.6,
|
|
"valid_targets_min": 1018
|
|
},
|
|
{
|
|
"epoch": 5.210696920583469,
|
|
"grad_norm": 0.6085840752147157,
|
|
"learning_rate": 7.459918890076272e-06,
|
|
"loss": 0.2808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.286603718996048,
|
|
"step": 3215,
|
|
"valid_targets_mean": 5191.4,
|
|
"valid_targets_min": 1490
|
|
},
|
|
{
|
|
"epoch": 5.218800648298217,
|
|
"grad_norm": 0.682252527910381,
|
|
"learning_rate": 7.397058994414563e-06,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.283713698387146,
|
|
"step": 3220,
|
|
"valid_targets_mean": 4987.8,
|
|
"valid_targets_min": 1681
|
|
},
|
|
{
|
|
"epoch": 5.226904376012966,
|
|
"grad_norm": 0.761041867774423,
|
|
"learning_rate": 7.3344049161042495e-06,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30440282821655273,
|
|
"step": 3225,
|
|
"valid_targets_mean": 3551.4,
|
|
"valid_targets_min": 1016
|
|
},
|
|
{
|
|
"epoch": 5.235008103727715,
|
|
"grad_norm": 0.7457730095664243,
|
|
"learning_rate": 7.271957678342738e-06,
|
|
"loss": 0.2592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2805323898792267,
|
|
"step": 3230,
|
|
"valid_targets_mean": 3305.1,
|
|
"valid_targets_min": 296
|
|
},
|
|
{
|
|
"epoch": 5.243111831442463,
|
|
"grad_norm": 0.9269764600924907,
|
|
"learning_rate": 7.209718300949519e-06,
|
|
"loss": 0.301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30809998512268066,
|
|
"step": 3235,
|
|
"valid_targets_mean": 3278.8,
|
|
"valid_targets_min": 974
|
|
},
|
|
{
|
|
"epoch": 5.251215559157212,
|
|
"grad_norm": 0.8608783945025844,
|
|
"learning_rate": 7.14768780034957e-06,
|
|
"loss": 0.2945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2784419357776642,
|
|
"step": 3240,
|
|
"valid_targets_mean": 2483.4,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 5.259319286871961,
|
|
"grad_norm": 0.6552635503057336,
|
|
"learning_rate": 7.085867189556697e-06,
|
|
"loss": 0.2817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24957261979579926,
|
|
"step": 3245,
|
|
"valid_targets_mean": 4670.8,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 5.26742301458671,
|
|
"grad_norm": 0.7567556315587354,
|
|
"learning_rate": 7.024257478157015e-06,
|
|
"loss": 0.2496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25506865978240967,
|
|
"step": 3250,
|
|
"valid_targets_mean": 2933.9,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 5.275526742301459,
|
|
"grad_norm": 0.6500169004006787,
|
|
"learning_rate": 6.96285967229249e-06,
|
|
"loss": 0.2581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.285958856344223,
|
|
"step": 3255,
|
|
"valid_targets_mean": 4448.9,
|
|
"valid_targets_min": 1187
|
|
},
|
|
{
|
|
"epoch": 5.283630470016208,
|
|
"grad_norm": 0.8005728973226811,
|
|
"learning_rate": 6.901674774644449e-06,
|
|
"loss": 0.2584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28087589144706726,
|
|
"step": 3260,
|
|
"valid_targets_mean": 3049.1,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 5.291734197730956,
|
|
"grad_norm": 0.7675907087887824,
|
|
"learning_rate": 6.840703784417262e-06,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28006839752197266,
|
|
"step": 3265,
|
|
"valid_targets_mean": 3114.2,
|
|
"valid_targets_min": 1522
|
|
},
|
|
{
|
|
"epoch": 5.299837925445705,
|
|
"grad_norm": 0.6700312471419494,
|
|
"learning_rate": 6.779947697321974e-06,
|
|
"loss": 0.2782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.276754766702652,
|
|
"step": 3270,
|
|
"valid_targets_mean": 4571.8,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 5.307941653160454,
|
|
"grad_norm": 0.7128592910951062,
|
|
"learning_rate": 6.719407505560094e-06,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31054553389549255,
|
|
"step": 3275,
|
|
"valid_targets_mean": 4182.4,
|
|
"valid_targets_min": 1339
|
|
},
|
|
{
|
|
"epoch": 5.316045380875202,
|
|
"grad_norm": 0.6512636677298553,
|
|
"learning_rate": 6.659084197807348e-06,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29994767904281616,
|
|
"step": 3280,
|
|
"valid_targets_mean": 4672.8,
|
|
"valid_targets_min": 938
|
|
},
|
|
{
|
|
"epoch": 5.3241491085899515,
|
|
"grad_norm": 0.7606762515888708,
|
|
"learning_rate": 6.598978759197554e-06,
|
|
"loss": 0.275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2889411151409149,
|
|
"step": 3285,
|
|
"valid_targets_mean": 3447.2,
|
|
"valid_targets_min": 1074
|
|
},
|
|
{
|
|
"epoch": 5.332252836304701,
|
|
"grad_norm": 0.7089462941061058,
|
|
"learning_rate": 6.539092171306541e-06,
|
|
"loss": 0.2967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2591246962547302,
|
|
"step": 3290,
|
|
"valid_targets_mean": 3852.4,
|
|
"valid_targets_min": 1615
|
|
},
|
|
{
|
|
"epoch": 5.340356564019449,
|
|
"grad_norm": 0.793316654437433,
|
|
"learning_rate": 6.479425412136093e-06,
|
|
"loss": 0.3026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2555570602416992,
|
|
"step": 3295,
|
|
"valid_targets_mean": 2712.8,
|
|
"valid_targets_min": 1193
|
|
},
|
|
{
|
|
"epoch": 5.348460291734198,
|
|
"grad_norm": 0.8133606076526989,
|
|
"learning_rate": 6.419979456098016e-06,
|
|
"loss": 0.2977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27769023180007935,
|
|
"step": 3300,
|
|
"valid_targets_mean": 2869.2,
|
|
"valid_targets_min": 885
|
|
},
|
|
{
|
|
"epoch": 5.356564019448946,
|
|
"grad_norm": 0.6768085806436803,
|
|
"learning_rate": 6.360755273998174e-06,
|
|
"loss": 0.2558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2373725026845932,
|
|
"step": 3305,
|
|
"valid_targets_mean": 3990.9,
|
|
"valid_targets_min": 427
|
|
},
|
|
{
|
|
"epoch": 5.364667747163695,
|
|
"grad_norm": 0.8302998958250895,
|
|
"learning_rate": 6.301753833020691e-06,
|
|
"loss": 0.298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31967538595199585,
|
|
"step": 3310,
|
|
"valid_targets_mean": 2772.0,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 5.372771474878444,
|
|
"grad_norm": 0.5943028036049409,
|
|
"learning_rate": 6.242976096712112e-06,
|
|
"loss": 0.2946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29758501052856445,
|
|
"step": 3315,
|
|
"valid_targets_mean": 5840.2,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 5.3808752025931925,
|
|
"grad_norm": 0.7157345315365573,
|
|
"learning_rate": 6.18442302496568e-06,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25914475321769714,
|
|
"step": 3320,
|
|
"valid_targets_mean": 3808.9,
|
|
"valid_targets_min": 1101
|
|
},
|
|
{
|
|
"epoch": 5.388978930307942,
|
|
"grad_norm": 0.7786866822617113,
|
|
"learning_rate": 6.1260955740056835e-06,
|
|
"loss": 0.3051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2894706130027771,
|
|
"step": 3325,
|
|
"valid_targets_mean": 3148.8,
|
|
"valid_targets_min": 1090
|
|
},
|
|
{
|
|
"epoch": 5.397082658022691,
|
|
"grad_norm": 0.76294737241064,
|
|
"learning_rate": 6.067994696371797e-06,
|
|
"loss": 0.2913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2689441442489624,
|
|
"step": 3330,
|
|
"valid_targets_mean": 3199.1,
|
|
"valid_targets_min": 403
|
|
},
|
|
{
|
|
"epoch": 5.405186385737439,
|
|
"grad_norm": 0.7624414240294302,
|
|
"learning_rate": 6.010121340903574e-06,
|
|
"loss": 0.2922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2649106979370117,
|
|
"step": 3335,
|
|
"valid_targets_mean": 3114.9,
|
|
"valid_targets_min": 1195
|
|
},
|
|
{
|
|
"epoch": 5.413290113452188,
|
|
"grad_norm": 0.701323992069574,
|
|
"learning_rate": 5.952476452724898e-06,
|
|
"loss": 0.2851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3416491448879242,
|
|
"step": 3340,
|
|
"valid_targets_mean": 4082.7,
|
|
"valid_targets_min": 1759
|
|
},
|
|
{
|
|
"epoch": 5.421393841166937,
|
|
"grad_norm": 0.742330773570171,
|
|
"learning_rate": 5.895060973228606e-06,
|
|
"loss": 0.3077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3097589612007141,
|
|
"step": 3345,
|
|
"valid_targets_mean": 3322.2,
|
|
"valid_targets_min": 1518
|
|
},
|
|
{
|
|
"epoch": 5.429497568881685,
|
|
"grad_norm": 0.5877195535700861,
|
|
"learning_rate": 5.837875840061064e-06,
|
|
"loss": 0.2786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22878901660442352,
|
|
"step": 3350,
|
|
"valid_targets_mean": 4539.7,
|
|
"valid_targets_min": 1424
|
|
},
|
|
{
|
|
"epoch": 5.437601296596434,
|
|
"grad_norm": 0.6347647319563191,
|
|
"learning_rate": 5.780921987106878e-06,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23854267597198486,
|
|
"step": 3355,
|
|
"valid_targets_mean": 4752.8,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 5.4457050243111835,
|
|
"grad_norm": 0.8298002116959828,
|
|
"learning_rate": 5.724200344473651e-06,
|
|
"loss": 0.3062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27335381507873535,
|
|
"step": 3360,
|
|
"valid_targets_mean": 2939.4,
|
|
"valid_targets_min": 1065
|
|
},
|
|
{
|
|
"epoch": 5.453808752025932,
|
|
"grad_norm": 0.8280056823907535,
|
|
"learning_rate": 5.66771183847677e-06,
|
|
"loss": 0.292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28138309717178345,
|
|
"step": 3365,
|
|
"valid_targets_mean": 3459.2,
|
|
"valid_targets_min": 1159
|
|
},
|
|
{
|
|
"epoch": 5.461912479740681,
|
|
"grad_norm": 0.7652229448911942,
|
|
"learning_rate": 5.611457391624309e-06,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2577538788318634,
|
|
"step": 3370,
|
|
"valid_targets_mean": 3450.9,
|
|
"valid_targets_min": 1069
|
|
},
|
|
{
|
|
"epoch": 5.47001620745543,
|
|
"grad_norm": 0.7271629799488672,
|
|
"learning_rate": 5.555437922601918e-06,
|
|
"loss": 0.2765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2700653672218323,
|
|
"step": 3375,
|
|
"valid_targets_mean": 3632.2,
|
|
"valid_targets_min": 1087
|
|
},
|
|
{
|
|
"epoch": 5.478119935170178,
|
|
"grad_norm": 0.8700764884307809,
|
|
"learning_rate": 5.499654346257879e-06,
|
|
"loss": 0.2807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2594822645187378,
|
|
"step": 3380,
|
|
"valid_targets_mean": 4346.2,
|
|
"valid_targets_min": 1358
|
|
},
|
|
{
|
|
"epoch": 5.486223662884927,
|
|
"grad_norm": 0.770381419374314,
|
|
"learning_rate": 5.444107573588116e-06,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3069581985473633,
|
|
"step": 3385,
|
|
"valid_targets_mean": 4150.8,
|
|
"valid_targets_min": 1292
|
|
},
|
|
{
|
|
"epoch": 5.4943273905996755,
|
|
"grad_norm": 0.6696438750203964,
|
|
"learning_rate": 5.388798511721329e-06,
|
|
"loss": 0.2937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23633962869644165,
|
|
"step": 3390,
|
|
"valid_targets_mean": 4223.9,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 5.5024311183144246,
|
|
"grad_norm": 0.6855425861490273,
|
|
"learning_rate": 5.333728063904213e-06,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24334245920181274,
|
|
"step": 3395,
|
|
"valid_targets_mean": 3528.2,
|
|
"valid_targets_min": 1460
|
|
},
|
|
{
|
|
"epoch": 5.510534846029174,
|
|
"grad_norm": 0.786560444677125,
|
|
"learning_rate": 5.278897129486656e-06,
|
|
"loss": 0.3242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29019924998283386,
|
|
"step": 3400,
|
|
"valid_targets_mean": 3245.4,
|
|
"valid_targets_min": 1174
|
|
},
|
|
{
|
|
"epoch": 5.518638573743922,
|
|
"grad_norm": 0.7172670134554514,
|
|
"learning_rate": 5.224306603907095e-06,
|
|
"loss": 0.2943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28209179639816284,
|
|
"step": 3405,
|
|
"valid_targets_mean": 4107.5,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 5.526742301458671,
|
|
"grad_norm": 0.7350968379600731,
|
|
"learning_rate": 5.169957378677859e-06,
|
|
"loss": 0.2757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29147547483444214,
|
|
"step": 3410,
|
|
"valid_targets_mean": 3760.5,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 5.53484602917342,
|
|
"grad_norm": 0.7517239670767295,
|
|
"learning_rate": 5.11585034137064e-06,
|
|
"loss": 0.2808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2884262800216675,
|
|
"step": 3415,
|
|
"valid_targets_mean": 3442.4,
|
|
"valid_targets_min": 1271
|
|
},
|
|
{
|
|
"epoch": 5.542949756888168,
|
|
"grad_norm": 0.9054713950721245,
|
|
"learning_rate": 5.061986375601977e-06,
|
|
"loss": 0.294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.366168349981308,
|
|
"step": 3420,
|
|
"valid_targets_mean": 2817.2,
|
|
"valid_targets_min": 1101
|
|
},
|
|
{
|
|
"epoch": 5.551053484602917,
|
|
"grad_norm": 0.6501242905430781,
|
|
"learning_rate": 5.0083663610188215e-06,
|
|
"loss": 0.2639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24292755126953125,
|
|
"step": 3425,
|
|
"valid_targets_mean": 4625.0,
|
|
"valid_targets_min": 1485
|
|
},
|
|
{
|
|
"epoch": 5.5591572123176665,
|
|
"grad_norm": 1.4505012508960171,
|
|
"learning_rate": 4.954991173284207e-06,
|
|
"loss": 0.3121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2867453694343567,
|
|
"step": 3430,
|
|
"valid_targets_mean": 3474.4,
|
|
"valid_targets_min": 1479
|
|
},
|
|
{
|
|
"epoch": 5.567260940032415,
|
|
"grad_norm": 0.8611869362587048,
|
|
"learning_rate": 4.901861684062899e-06,
|
|
"loss": 0.294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.303717702627182,
|
|
"step": 3435,
|
|
"valid_targets_mean": 2582.6,
|
|
"valid_targets_min": 1024
|
|
},
|
|
{
|
|
"epoch": 5.575364667747164,
|
|
"grad_norm": 0.6318517106242182,
|
|
"learning_rate": 4.848978761007206e-06,
|
|
"loss": 0.3235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35377246141433716,
|
|
"step": 3440,
|
|
"valid_targets_mean": 5696.1,
|
|
"valid_targets_min": 1209
|
|
},
|
|
{
|
|
"epoch": 5.583468395461912,
|
|
"grad_norm": 0.7580961353653848,
|
|
"learning_rate": 4.796343267742782e-06,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27109336853027344,
|
|
"step": 3445,
|
|
"valid_targets_mean": 3441.2,
|
|
"valid_targets_min": 1022
|
|
},
|
|
{
|
|
"epoch": 5.591572123176661,
|
|
"grad_norm": 0.8569431910573655,
|
|
"learning_rate": 4.743956063854529e-06,
|
|
"loss": 0.2888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3265492916107178,
|
|
"step": 3450,
|
|
"valid_targets_mean": 3613.4,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 5.59967585089141,
|
|
"grad_norm": 0.6785717564457475,
|
|
"learning_rate": 4.691818004872557e-06,
|
|
"loss": 0.271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23201988637447357,
|
|
"step": 3455,
|
|
"valid_targets_mean": 3884.2,
|
|
"valid_targets_min": 1076
|
|
},
|
|
{
|
|
"epoch": 5.607779578606159,
|
|
"grad_norm": 1.5751988259842302,
|
|
"learning_rate": 4.639929942258217e-06,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24123671650886536,
|
|
"step": 3460,
|
|
"valid_targets_mean": 3035.5,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 5.6158833063209075,
|
|
"grad_norm": 0.671534242940153,
|
|
"learning_rate": 4.588292723390204e-06,
|
|
"loss": 0.2613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23632174730300903,
|
|
"step": 3465,
|
|
"valid_targets_mean": 4311.9,
|
|
"valid_targets_min": 954
|
|
},
|
|
{
|
|
"epoch": 5.623987034035657,
|
|
"grad_norm": 0.6599509783390952,
|
|
"learning_rate": 4.536907191550694e-06,
|
|
"loss": 0.2662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2659260928630829,
|
|
"step": 3470,
|
|
"valid_targets_mean": 3626.4,
|
|
"valid_targets_min": 902
|
|
},
|
|
{
|
|
"epoch": 5.632090761750405,
|
|
"grad_norm": 0.8703140687361607,
|
|
"learning_rate": 4.4857741859116024e-06,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3029107451438904,
|
|
"step": 3475,
|
|
"valid_targets_mean": 2693.6,
|
|
"valid_targets_min": 971
|
|
},
|
|
{
|
|
"epoch": 5.640194489465154,
|
|
"grad_norm": 0.7936006609606302,
|
|
"learning_rate": 4.434894541520862e-06,
|
|
"loss": 0.2615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25295132398605347,
|
|
"step": 3480,
|
|
"valid_targets_mean": 2842.1,
|
|
"valid_targets_min": 863
|
|
},
|
|
{
|
|
"epoch": 5.648298217179903,
|
|
"grad_norm": 0.7231262588490622,
|
|
"learning_rate": 4.3842690892887795e-06,
|
|
"loss": 0.2955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3111575245857239,
|
|
"step": 3485,
|
|
"valid_targets_mean": 3719.8,
|
|
"valid_targets_min": 745
|
|
},
|
|
{
|
|
"epoch": 5.656401944894651,
|
|
"grad_norm": 0.6158823413371348,
|
|
"learning_rate": 4.333898655974484e-06,
|
|
"loss": 0.2713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26768383383750916,
|
|
"step": 3490,
|
|
"valid_targets_mean": 4432.0,
|
|
"valid_targets_min": 1369
|
|
},
|
|
{
|
|
"epoch": 5.6645056726094,
|
|
"grad_norm": 0.8979021634703408,
|
|
"learning_rate": 4.283784064172405e-06,
|
|
"loss": 0.2779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32060831785202026,
|
|
"step": 3495,
|
|
"valid_targets_mean": 3322.2,
|
|
"valid_targets_min": 828
|
|
},
|
|
{
|
|
"epoch": 5.672609400324149,
|
|
"grad_norm": 0.5686930143372391,
|
|
"learning_rate": 4.233926132298867e-06,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24507595598697662,
|
|
"step": 3500,
|
|
"valid_targets_mean": 6302.6,
|
|
"valid_targets_min": 968
|
|
},
|
|
{
|
|
"epoch": 5.680713128038898,
|
|
"grad_norm": 0.4277289698869093,
|
|
"learning_rate": 4.1843256745787e-06,
|
|
"loss": 0.2726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2321561872959137,
|
|
"step": 3505,
|
|
"valid_targets_mean": 8880.2,
|
|
"valid_targets_min": 2792
|
|
},
|
|
{
|
|
"epoch": 5.688816855753647,
|
|
"grad_norm": 0.8297040218872506,
|
|
"learning_rate": 4.134983501031942e-06,
|
|
"loss": 0.2704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2722494900226593,
|
|
"step": 3510,
|
|
"valid_targets_mean": 3655.5,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 5.696920583468396,
|
|
"grad_norm": 0.7829749027309345,
|
|
"learning_rate": 4.085900417460633e-06,
|
|
"loss": 0.3048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4337892532348633,
|
|
"step": 3515,
|
|
"valid_targets_mean": 3732.4,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 5.705024311183144,
|
|
"grad_norm": 0.8126084942082932,
|
|
"learning_rate": 4.037077225435628e-06,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28513067960739136,
|
|
"step": 3520,
|
|
"valid_targets_mean": 2776.5,
|
|
"valid_targets_min": 1013
|
|
},
|
|
{
|
|
"epoch": 5.713128038897893,
|
|
"grad_norm": 0.8953819330769611,
|
|
"learning_rate": 3.988514722283523e-06,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3385658860206604,
|
|
"step": 3525,
|
|
"valid_targets_mean": 2753.9,
|
|
"valid_targets_min": 1290
|
|
},
|
|
{
|
|
"epoch": 5.721231766612641,
|
|
"grad_norm": 0.6417576970627987,
|
|
"learning_rate": 3.940213701073636e-06,
|
|
"loss": 0.2674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2606130838394165,
|
|
"step": 3530,
|
|
"valid_targets_mean": 4978.7,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 5.72933549432739,
|
|
"grad_norm": 0.594036263261497,
|
|
"learning_rate": 3.892174950605039e-06,
|
|
"loss": 0.2844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24494385719299316,
|
|
"step": 3535,
|
|
"valid_targets_mean": 4648.3,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 5.7374392220421395,
|
|
"grad_norm": 0.674852622493003,
|
|
"learning_rate": 3.844399255393705e-06,
|
|
"loss": 0.3211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30853283405303955,
|
|
"step": 3540,
|
|
"valid_targets_mean": 5255.0,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 5.745542949756889,
|
|
"grad_norm": 0.801058537021668,
|
|
"learning_rate": 3.7968873956596563e-06,
|
|
"loss": 0.2788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3078470826148987,
|
|
"step": 3545,
|
|
"valid_targets_mean": 3194.4,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 5.753646677471637,
|
|
"grad_norm": 0.7048342023609044,
|
|
"learning_rate": 3.749640147314264e-06,
|
|
"loss": 0.2718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24763602018356323,
|
|
"step": 3550,
|
|
"valid_targets_mean": 3711.5,
|
|
"valid_targets_min": 1193
|
|
},
|
|
{
|
|
"epoch": 5.761750405186386,
|
|
"grad_norm": 0.7693710636718659,
|
|
"learning_rate": 3.7026582819475443e-06,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23135964572429657,
|
|
"step": 3555,
|
|
"valid_targets_mean": 3138.4,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 5.769854132901134,
|
|
"grad_norm": 0.6632199872131253,
|
|
"learning_rate": 3.6559425668155733e-06,
|
|
"loss": 0.2669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27239274978637695,
|
|
"step": 3560,
|
|
"valid_targets_mean": 4193.6,
|
|
"valid_targets_min": 1230
|
|
},
|
|
{
|
|
"epoch": 5.777957860615883,
|
|
"grad_norm": 0.718545305799666,
|
|
"learning_rate": 3.6094937648279647e-06,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2218991219997406,
|
|
"step": 3565,
|
|
"valid_targets_mean": 5719.2,
|
|
"valid_targets_min": 867
|
|
},
|
|
{
|
|
"epoch": 5.786061588330632,
|
|
"grad_norm": 0.7244639386188173,
|
|
"learning_rate": 3.563312634535383e-06,
|
|
"loss": 0.3007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30564481019973755,
|
|
"step": 3570,
|
|
"valid_targets_mean": 3467.5,
|
|
"valid_targets_min": 967
|
|
},
|
|
{
|
|
"epoch": 5.7941653160453805,
|
|
"grad_norm": 0.5617300881909284,
|
|
"learning_rate": 3.517399930117196e-06,
|
|
"loss": 0.2945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33648714423179626,
|
|
"step": 3575,
|
|
"valid_targets_mean": 5446.6,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 5.80226904376013,
|
|
"grad_norm": 0.6597389907883395,
|
|
"learning_rate": 3.4717564013691087e-06,
|
|
"loss": 0.3028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2604525685310364,
|
|
"step": 3580,
|
|
"valid_targets_mean": 3790.9,
|
|
"valid_targets_min": 1105
|
|
},
|
|
{
|
|
"epoch": 5.810372771474879,
|
|
"grad_norm": 0.7745748020298397,
|
|
"learning_rate": 3.4263827936909744e-06,
|
|
"loss": 0.2688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2584492266178131,
|
|
"step": 3585,
|
|
"valid_targets_mean": 3191.6,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 5.818476499189627,
|
|
"grad_norm": 0.6810368699743038,
|
|
"learning_rate": 3.38127984807457e-06,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27961236238479614,
|
|
"step": 3590,
|
|
"valid_targets_mean": 3924.9,
|
|
"valid_targets_min": 1286
|
|
},
|
|
{
|
|
"epoch": 5.826580226904376,
|
|
"grad_norm": 0.6841769486834594,
|
|
"learning_rate": 3.3364483010915237e-06,
|
|
"loss": 0.2793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27105721831321716,
|
|
"step": 3595,
|
|
"valid_targets_mean": 4534.7,
|
|
"valid_targets_min": 1292
|
|
},
|
|
{
|
|
"epoch": 5.834683954619125,
|
|
"grad_norm": 0.6233870726744284,
|
|
"learning_rate": 3.2918888848812913e-06,
|
|
"loss": 0.2825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30562299489974976,
|
|
"step": 3600,
|
|
"valid_targets_mean": 4270.6,
|
|
"valid_targets_min": 870
|
|
},
|
|
{
|
|
"epoch": 5.842787682333873,
|
|
"grad_norm": 0.8764854670185418,
|
|
"learning_rate": 3.2476023271391698e-06,
|
|
"loss": 0.3149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34314560890197754,
|
|
"step": 3605,
|
|
"valid_targets_mean": 2687.0,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 5.8508914100486225,
|
|
"grad_norm": 0.7598308829904862,
|
|
"learning_rate": 3.2035893511044524e-06,
|
|
"loss": 0.3076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2514714300632477,
|
|
"step": 3610,
|
|
"valid_targets_mean": 4512.1,
|
|
"valid_targets_min": 1199
|
|
},
|
|
{
|
|
"epoch": 5.858995137763371,
|
|
"grad_norm": 0.685998691912319,
|
|
"learning_rate": 3.159850675548577e-06,
|
|
"loss": 0.3129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35390257835388184,
|
|
"step": 3615,
|
|
"valid_targets_mean": 4823.4,
|
|
"valid_targets_min": 1638
|
|
},
|
|
{
|
|
"epoch": 5.86709886547812,
|
|
"grad_norm": 0.7124267141614449,
|
|
"learning_rate": 3.116387014763429e-06,
|
|
"loss": 0.3014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29345035552978516,
|
|
"step": 3620,
|
|
"valid_targets_mean": 3753.6,
|
|
"valid_targets_min": 1064
|
|
},
|
|
{
|
|
"epoch": 5.875202593192869,
|
|
"grad_norm": 0.6137149594151944,
|
|
"learning_rate": 3.073199078549638e-06,
|
|
"loss": 0.2743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2540719509124756,
|
|
"step": 3625,
|
|
"valid_targets_mean": 5363.9,
|
|
"valid_targets_min": 1437
|
|
},
|
|
{
|
|
"epoch": 5.883306320907618,
|
|
"grad_norm": 0.6867497382371595,
|
|
"learning_rate": 3.0302875722050064e-06,
|
|
"loss": 0.2714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29915332794189453,
|
|
"step": 3630,
|
|
"valid_targets_mean": 3797.4,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 5.891410048622366,
|
|
"grad_norm": 0.6615681011902053,
|
|
"learning_rate": 2.987653196513003e-06,
|
|
"loss": 0.2644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24344134330749512,
|
|
"step": 3635,
|
|
"valid_targets_mean": 4025.9,
|
|
"valid_targets_min": 918
|
|
},
|
|
{
|
|
"epoch": 5.899513776337115,
|
|
"grad_norm": 0.7630726256680385,
|
|
"learning_rate": 2.9452966477312815e-06,
|
|
"loss": 0.3038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2798713743686676,
|
|
"step": 3640,
|
|
"valid_targets_mean": 3439.8,
|
|
"valid_targets_min": 1266
|
|
},
|
|
{
|
|
"epoch": 5.9076175040518635,
|
|
"grad_norm": 0.7157631234965041,
|
|
"learning_rate": 2.9032186175803545e-06,
|
|
"loss": 0.27,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2654950022697449,
|
|
"step": 3645,
|
|
"valid_targets_mean": 3439.6,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 5.915721231766613,
|
|
"grad_norm": 0.907754147422762,
|
|
"learning_rate": 2.8614197932322585e-06,
|
|
"loss": 0.2867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3148188591003418,
|
|
"step": 3650,
|
|
"valid_targets_mean": 2466.9,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 5.923824959481362,
|
|
"grad_norm": 0.6769846106042888,
|
|
"learning_rate": 2.819900857299358e-06,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29736822843551636,
|
|
"step": 3655,
|
|
"valid_targets_mean": 4191.1,
|
|
"valid_targets_min": 1110
|
|
},
|
|
{
|
|
"epoch": 5.93192868719611,
|
|
"grad_norm": 0.7096793380763345,
|
|
"learning_rate": 2.778662487823187e-06,
|
|
"loss": 0.2962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.283025860786438,
|
|
"step": 3660,
|
|
"valid_targets_mean": 4360.5,
|
|
"valid_targets_min": 1225
|
|
},
|
|
{
|
|
"epoch": 5.940032414910859,
|
|
"grad_norm": 0.616843225159254,
|
|
"learning_rate": 2.7377053582633652e-06,
|
|
"loss": 0.275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2597149610519409,
|
|
"step": 3665,
|
|
"valid_targets_mean": 5154.5,
|
|
"valid_targets_min": 1217
|
|
},
|
|
{
|
|
"epoch": 5.948136142625608,
|
|
"grad_norm": 0.6764261015597512,
|
|
"learning_rate": 2.6970301374866337e-06,
|
|
"loss": 0.292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24780400097370148,
|
|
"step": 3670,
|
|
"valid_targets_mean": 3628.2,
|
|
"valid_targets_min": 1129
|
|
},
|
|
{
|
|
"epoch": 5.956239870340356,
|
|
"grad_norm": 0.8480710671748108,
|
|
"learning_rate": 2.656637489755889e-06,
|
|
"loss": 0.2815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29920557141304016,
|
|
"step": 3675,
|
|
"valid_targets_mean": 2949.8,
|
|
"valid_targets_min": 1047
|
|
},
|
|
{
|
|
"epoch": 5.964343598055105,
|
|
"grad_norm": 0.6732019095661717,
|
|
"learning_rate": 2.616528074719371e-06,
|
|
"loss": 0.2643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2520577609539032,
|
|
"step": 3680,
|
|
"valid_targets_mean": 4207.6,
|
|
"valid_targets_min": 1149
|
|
},
|
|
{
|
|
"epoch": 5.9724473257698545,
|
|
"grad_norm": 0.8349743498637383,
|
|
"learning_rate": 2.576702547399863e-06,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27137356996536255,
|
|
"step": 3685,
|
|
"valid_targets_mean": 2445.9,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 5.980551053484603,
|
|
"grad_norm": 0.7106954338129656,
|
|
"learning_rate": 2.53716155818402e-06,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.336696982383728,
|
|
"step": 3690,
|
|
"valid_targets_mean": 3873.4,
|
|
"valid_targets_min": 1243
|
|
},
|
|
{
|
|
"epoch": 5.988654781199352,
|
|
"grad_norm": 0.58917163595744,
|
|
"learning_rate": 2.49790575281172e-06,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2684072256088257,
|
|
"step": 3695,
|
|
"valid_targets_mean": 5289.0,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 5.9967585089141,
|
|
"grad_norm": 0.6606555154321343,
|
|
"learning_rate": 2.4589357723655405e-06,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25777506828308105,
|
|
"step": 3700,
|
|
"valid_targets_mean": 4293.9,
|
|
"valid_targets_min": 1889
|
|
},
|
|
{
|
|
"epoch": 6.004862236628849,
|
|
"grad_norm": 0.7027728413429737,
|
|
"learning_rate": 2.4202522532602846e-06,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2268795222043991,
|
|
"step": 3705,
|
|
"valid_targets_mean": 3399.1,
|
|
"valid_targets_min": 1036
|
|
},
|
|
{
|
|
"epoch": 6.012965964343598,
|
|
"grad_norm": 0.8011004209420672,
|
|
"learning_rate": 2.381855827232571e-06,
|
|
"loss": 0.2824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30303099751472473,
|
|
"step": 3710,
|
|
"valid_targets_mean": 2753.8,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 6.021069692058346,
|
|
"grad_norm": 0.7622833107404628,
|
|
"learning_rate": 2.343747121330544e-06,
|
|
"loss": 0.264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26210296154022217,
|
|
"step": 3715,
|
|
"valid_targets_mean": 3469.6,
|
|
"valid_targets_min": 1178
|
|
},
|
|
{
|
|
"epoch": 6.0291734197730955,
|
|
"grad_norm": 0.7214806571727383,
|
|
"learning_rate": 2.3059267579036183e-06,
|
|
"loss": 0.2921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2521173655986786,
|
|
"step": 3720,
|
|
"valid_targets_mean": 3284.3,
|
|
"valid_targets_min": 998
|
|
},
|
|
{
|
|
"epoch": 6.037277147487845,
|
|
"grad_norm": 0.6902303351345986,
|
|
"learning_rate": 2.268395354592312e-06,
|
|
"loss": 0.2733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26502299308776855,
|
|
"step": 3725,
|
|
"valid_targets_mean": 4875.6,
|
|
"valid_targets_min": 1562
|
|
},
|
|
{
|
|
"epoch": 6.045380875202593,
|
|
"grad_norm": 0.7867477709532921,
|
|
"learning_rate": 2.2311535243181637e-06,
|
|
"loss": 0.2766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3001806139945984,
|
|
"step": 3730,
|
|
"valid_targets_mean": 3108.7,
|
|
"valid_targets_min": 1243
|
|
},
|
|
{
|
|
"epoch": 6.053484602917342,
|
|
"grad_norm": 0.8415320330661212,
|
|
"learning_rate": 2.1942018752737227e-06,
|
|
"loss": 0.2838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2877800464630127,
|
|
"step": 3735,
|
|
"valid_targets_mean": 2779.1,
|
|
"valid_targets_min": 1008
|
|
},
|
|
{
|
|
"epoch": 6.061588330632091,
|
|
"grad_norm": 0.8195152807284035,
|
|
"learning_rate": 2.1575410109126293e-06,
|
|
"loss": 0.2632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24368160963058472,
|
|
"step": 3740,
|
|
"valid_targets_mean": 2967.5,
|
|
"valid_targets_min": 1730
|
|
},
|
|
{
|
|
"epoch": 6.069692058346839,
|
|
"grad_norm": 0.6479699003172938,
|
|
"learning_rate": 2.121171529939734e-06,
|
|
"loss": 0.2685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2409634292125702,
|
|
"step": 3745,
|
|
"valid_targets_mean": 4277.7,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 6.077795786061588,
|
|
"grad_norm": 0.552108099887294,
|
|
"learning_rate": 2.085094026301349e-06,
|
|
"loss": 0.2555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2244204431772232,
|
|
"step": 3750,
|
|
"valid_targets_mean": 5570.9,
|
|
"valid_targets_min": 1132
|
|
},
|
|
{
|
|
"epoch": 6.085899513776337,
|
|
"grad_norm": 0.694523046518939,
|
|
"learning_rate": 2.0493090891755262e-06,
|
|
"loss": 0.2986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28496766090393066,
|
|
"step": 3755,
|
|
"valid_targets_mean": 4118.6,
|
|
"valid_targets_min": 934
|
|
},
|
|
{
|
|
"epoch": 6.094003241491086,
|
|
"grad_norm": 0.7428603071136939,
|
|
"learning_rate": 2.013817302962444e-06,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2954471707344055,
|
|
"step": 3760,
|
|
"valid_targets_mean": 4566.4,
|
|
"valid_targets_min": 1867
|
|
},
|
|
{
|
|
"epoch": 6.102106969205835,
|
|
"grad_norm": 0.6670308940945041,
|
|
"learning_rate": 1.9786192472748643e-06,
|
|
"loss": 0.3101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2413603663444519,
|
|
"step": 3765,
|
|
"valid_targets_mean": 3763.2,
|
|
"valid_targets_min": 1012
|
|
},
|
|
{
|
|
"epoch": 6.110210696920584,
|
|
"grad_norm": 0.6184485991768255,
|
|
"learning_rate": 1.9437154969286577e-06,
|
|
"loss": 0.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21730905771255493,
|
|
"step": 3770,
|
|
"valid_targets_mean": 4136.2,
|
|
"valid_targets_min": 1242
|
|
},
|
|
{
|
|
"epoch": 6.118314424635332,
|
|
"grad_norm": 0.8396579659658009,
|
|
"learning_rate": 1.9091066219334365e-06,
|
|
"loss": 0.301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3086123466491699,
|
|
"step": 3775,
|
|
"valid_targets_mean": 3251.0,
|
|
"valid_targets_min": 1051
|
|
},
|
|
{
|
|
"epoch": 6.126418152350081,
|
|
"grad_norm": 0.7996562551138828,
|
|
"learning_rate": 1.8747931874832325e-06,
|
|
"loss": 0.2793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31810393929481506,
|
|
"step": 3780,
|
|
"valid_targets_mean": 3095.3,
|
|
"valid_targets_min": 887
|
|
},
|
|
{
|
|
"epoch": 6.13452188006483,
|
|
"grad_norm": 0.7835295163411242,
|
|
"learning_rate": 1.8407757539472548e-06,
|
|
"loss": 0.2609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26433265209198,
|
|
"step": 3785,
|
|
"valid_targets_mean": 2967.4,
|
|
"valid_targets_min": 1040
|
|
},
|
|
{
|
|
"epoch": 6.1426256077795784,
|
|
"grad_norm": 0.671422897302015,
|
|
"learning_rate": 1.8070548768607744e-06,
|
|
"loss": 0.2885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25956496596336365,
|
|
"step": 3790,
|
|
"valid_targets_mean": 4248.0,
|
|
"valid_targets_min": 1536
|
|
},
|
|
{
|
|
"epoch": 6.1507293354943275,
|
|
"grad_norm": 0.7640652471884006,
|
|
"learning_rate": 1.773631106915996e-06,
|
|
"loss": 0.2913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2866196632385254,
|
|
"step": 3795,
|
|
"valid_targets_mean": 3140.5,
|
|
"valid_targets_min": 865
|
|
},
|
|
{
|
|
"epoch": 6.158833063209076,
|
|
"grad_norm": 0.7901497100315298,
|
|
"learning_rate": 1.740504989953129e-06,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26567843556404114,
|
|
"step": 3800,
|
|
"valid_targets_mean": 2979.9,
|
|
"valid_targets_min": 886
|
|
},
|
|
{
|
|
"epoch": 6.166936790923825,
|
|
"grad_norm": 0.7775483944222201,
|
|
"learning_rate": 1.707677066951432e-06,
|
|
"loss": 0.258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2601214349269867,
|
|
"step": 3805,
|
|
"valid_targets_mean": 3538.1,
|
|
"valid_targets_min": 1217
|
|
},
|
|
{
|
|
"epoch": 6.175040518638574,
|
|
"grad_norm": 0.6915414702499607,
|
|
"learning_rate": 1.6751478740203776e-06,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2392570823431015,
|
|
"step": 3810,
|
|
"valid_targets_mean": 4333.6,
|
|
"valid_targets_min": 886
|
|
},
|
|
{
|
|
"epoch": 6.183144246353322,
|
|
"grad_norm": 0.7120258292651832,
|
|
"learning_rate": 1.6429179423909248e-06,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25904911756515503,
|
|
"step": 3815,
|
|
"valid_targets_mean": 4281.4,
|
|
"valid_targets_min": 1739
|
|
},
|
|
{
|
|
"epoch": 6.191247974068071,
|
|
"grad_norm": 0.6073873394357148,
|
|
"learning_rate": 1.6109877984068089e-06,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2905542850494385,
|
|
"step": 3820,
|
|
"valid_targets_mean": 5658.9,
|
|
"valid_targets_min": 1019
|
|
},
|
|
{
|
|
"epoch": 6.19935170178282,
|
|
"grad_norm": 0.6940476997510958,
|
|
"learning_rate": 1.5793579635159883e-06,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.291831910610199,
|
|
"step": 3825,
|
|
"valid_targets_mean": 4014.4,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 6.207455429497569,
|
|
"grad_norm": 0.6769492704139075,
|
|
"learning_rate": 1.5480289542620686e-06,
|
|
"loss": 0.2772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24061062932014465,
|
|
"step": 3830,
|
|
"valid_targets_mean": 4078.7,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 6.215559157212318,
|
|
"grad_norm": 0.8350367658236924,
|
|
"learning_rate": 1.517001282275936e-06,
|
|
"loss": 0.2611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2500787377357483,
|
|
"step": 3835,
|
|
"valid_targets_mean": 3799.4,
|
|
"valid_targets_min": 1227
|
|
},
|
|
{
|
|
"epoch": 6.223662884927067,
|
|
"grad_norm": 0.651123871111124,
|
|
"learning_rate": 1.486275454267354e-06,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3093678951263428,
|
|
"step": 3840,
|
|
"valid_targets_mean": 5790.4,
|
|
"valid_targets_min": 1183
|
|
},
|
|
{
|
|
"epoch": 6.231766612641815,
|
|
"grad_norm": 0.5701257680611536,
|
|
"learning_rate": 1.4558519720166975e-06,
|
|
"loss": 0.2564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2544712722301483,
|
|
"step": 3845,
|
|
"valid_targets_mean": 5241.6,
|
|
"valid_targets_min": 1120
|
|
},
|
|
{
|
|
"epoch": 6.239870340356564,
|
|
"grad_norm": 0.7056219502775309,
|
|
"learning_rate": 1.4257313323667798e-06,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31050872802734375,
|
|
"step": 3850,
|
|
"valid_targets_mean": 4014.6,
|
|
"valid_targets_min": 1203
|
|
},
|
|
{
|
|
"epoch": 6.247974068071313,
|
|
"grad_norm": 0.7262226243616173,
|
|
"learning_rate": 1.3959140272146998e-06,
|
|
"loss": 0.282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27195703983306885,
|
|
"step": 3855,
|
|
"valid_targets_mean": 3768.8,
|
|
"valid_targets_min": 2037
|
|
},
|
|
{
|
|
"epoch": 6.256077795786061,
|
|
"grad_norm": 0.6357702807995875,
|
|
"learning_rate": 1.366400543503854e-06,
|
|
"loss": 0.2524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24853156507015228,
|
|
"step": 3860,
|
|
"valid_targets_mean": 4364.2,
|
|
"valid_targets_min": 969
|
|
},
|
|
{
|
|
"epoch": 6.2641815235008105,
|
|
"grad_norm": 0.766510336947776,
|
|
"learning_rate": 1.3371913632159506e-06,
|
|
"loss": 0.2846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2552717328071594,
|
|
"step": 3865,
|
|
"valid_targets_mean": 4211.0,
|
|
"valid_targets_min": 1164
|
|
},
|
|
{
|
|
"epoch": 6.272285251215559,
|
|
"grad_norm": 0.814344839562171,
|
|
"learning_rate": 1.3082869633631413e-06,
|
|
"loss": 0.2789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.299882709980011,
|
|
"step": 3870,
|
|
"valid_targets_mean": 2909.2,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 6.280388978930308,
|
|
"grad_norm": 0.7162700671892775,
|
|
"learning_rate": 1.2796878159802595e-06,
|
|
"loss": 0.2821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3379451632499695,
|
|
"step": 3875,
|
|
"valid_targets_mean": 4513.4,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 6.288492706645057,
|
|
"grad_norm": 0.7166231616916816,
|
|
"learning_rate": 1.2513943881170754e-06,
|
|
"loss": 0.2844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2593461871147156,
|
|
"step": 3880,
|
|
"valid_targets_mean": 3996.0,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 6.296596434359805,
|
|
"grad_norm": 0.7960125938291805,
|
|
"learning_rate": 1.2234071418306903e-06,
|
|
"loss": 0.2813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2560000419616699,
|
|
"step": 3885,
|
|
"valid_targets_mean": 2943.4,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 6.304700162074554,
|
|
"grad_norm": 0.7909822999764351,
|
|
"learning_rate": 1.1957265341779855e-06,
|
|
"loss": 0.293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2831363379955292,
|
|
"step": 3890,
|
|
"valid_targets_mean": 2929.0,
|
|
"valid_targets_min": 1313
|
|
},
|
|
{
|
|
"epoch": 6.312803889789303,
|
|
"grad_norm": 0.8078944578627083,
|
|
"learning_rate": 1.1683530172081592e-06,
|
|
"loss": 0.2712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28948408365249634,
|
|
"step": 3895,
|
|
"valid_targets_mean": 3166.2,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 6.3209076175040515,
|
|
"grad_norm": 0.6503889422727751,
|
|
"learning_rate": 1.1412870379553387e-06,
|
|
"loss": 0.2781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2851935029029846,
|
|
"step": 3900,
|
|
"valid_targets_mean": 5278.4,
|
|
"valid_targets_min": 1216
|
|
},
|
|
{
|
|
"epoch": 6.329011345218801,
|
|
"grad_norm": 0.7095808115759376,
|
|
"learning_rate": 1.1145290384312846e-06,
|
|
"loss": 0.2899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2966817617416382,
|
|
"step": 3905,
|
|
"valid_targets_mean": 3949.7,
|
|
"valid_targets_min": 344
|
|
},
|
|
{
|
|
"epoch": 6.33711507293355,
|
|
"grad_norm": 0.5713953001792478,
|
|
"learning_rate": 1.0880794556181762e-06,
|
|
"loss": 0.2762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26881951093673706,
|
|
"step": 3910,
|
|
"valid_targets_mean": 5394.6,
|
|
"valid_targets_min": 1396
|
|
},
|
|
{
|
|
"epoch": 6.345218800648298,
|
|
"grad_norm": 0.8057022743278727,
|
|
"learning_rate": 1.0619387214614662e-06,
|
|
"loss": 0.2808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28803229331970215,
|
|
"step": 3915,
|
|
"valid_targets_mean": 3252.2,
|
|
"valid_targets_min": 1268
|
|
},
|
|
{
|
|
"epoch": 6.353322528363047,
|
|
"grad_norm": 0.656199813723134,
|
|
"learning_rate": 1.0361072628628354e-06,
|
|
"loss": 0.2695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23762132227420807,
|
|
"step": 3920,
|
|
"valid_targets_mean": 4280.4,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 6.361426256077796,
|
|
"grad_norm": 0.5444634341534144,
|
|
"learning_rate": 1.0105855016732113e-06,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2526979148387909,
|
|
"step": 3925,
|
|
"valid_targets_mean": 6552.8,
|
|
"valid_targets_min": 1364
|
|
},
|
|
{
|
|
"epoch": 6.369529983792544,
|
|
"grad_norm": 0.7191276204333831,
|
|
"learning_rate": 9.853738546858893e-07,
|
|
"loss": 0.2959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3756384253501892,
|
|
"step": 3930,
|
|
"valid_targets_mean": 4363.0,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 6.377633711507293,
|
|
"grad_norm": 0.6072164246303409,
|
|
"learning_rate": 9.604727336297203e-07,
|
|
"loss": 0.2551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21978111565113068,
|
|
"step": 3935,
|
|
"valid_targets_mean": 4838.1,
|
|
"valid_targets_min": 1210
|
|
},
|
|
{
|
|
"epoch": 6.3857374392220425,
|
|
"grad_norm": 0.8535219826727345,
|
|
"learning_rate": 9.358825451623832e-07,
|
|
"loss": 0.2766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3109622001647949,
|
|
"step": 3940,
|
|
"valid_targets_mean": 2759.0,
|
|
"valid_targets_min": 1075
|
|
},
|
|
{
|
|
"epoch": 6.393841166936791,
|
|
"grad_norm": 0.8018109420542197,
|
|
"learning_rate": 9.116036908637582e-07,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27665650844573975,
|
|
"step": 3945,
|
|
"valid_targets_mean": 3128.8,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 6.40194489465154,
|
|
"grad_norm": 0.6118808079483848,
|
|
"learning_rate": 8.876365672293441e-07,
|
|
"loss": 0.2968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24101707339286804,
|
|
"step": 3950,
|
|
"valid_targets_mean": 5033.9,
|
|
"valid_targets_min": 1184
|
|
},
|
|
{
|
|
"epoch": 6.410048622366288,
|
|
"grad_norm": 0.6738977504802829,
|
|
"learning_rate": 8.639815656638162e-07,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23450031876564026,
|
|
"step": 3955,
|
|
"valid_targets_mean": 4632.9,
|
|
"valid_targets_min": 1016
|
|
},
|
|
{
|
|
"epoch": 6.418152350081037,
|
|
"grad_norm": 0.6578666448021531,
|
|
"learning_rate": 8.406390724745961e-07,
|
|
"loss": 0.2585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25716516375541687,
|
|
"step": 3960,
|
|
"valid_targets_mean": 4409.2,
|
|
"valid_targets_min": 1423
|
|
},
|
|
{
|
|
"epoch": 6.426256077795786,
|
|
"grad_norm": 0.7504549153226855,
|
|
"learning_rate": 8.176094688655789e-07,
|
|
"loss": 0.259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24591027200222015,
|
|
"step": 3965,
|
|
"valid_targets_mean": 3898.1,
|
|
"valid_targets_min": 1247
|
|
},
|
|
{
|
|
"epoch": 6.434359805510534,
|
|
"grad_norm": 0.716758853434586,
|
|
"learning_rate": 7.948931309308872e-07,
|
|
"loss": 0.2621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2400793880224228,
|
|
"step": 3970,
|
|
"valid_targets_mean": 3711.0,
|
|
"valid_targets_min": 815
|
|
},
|
|
{
|
|
"epoch": 6.4424635332252835,
|
|
"grad_norm": 0.6187434162303004,
|
|
"learning_rate": 7.724904296487246e-07,
|
|
"loss": 0.2402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.240831196308136,
|
|
"step": 3975,
|
|
"valid_targets_mean": 4534.2,
|
|
"valid_targets_min": 1314
|
|
},
|
|
{
|
|
"epoch": 6.450567260940033,
|
|
"grad_norm": 0.7413645604300104,
|
|
"learning_rate": 7.504017308753386e-07,
|
|
"loss": 0.2605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26031699776649475,
|
|
"step": 3980,
|
|
"valid_targets_mean": 3497.0,
|
|
"valid_targets_min": 985
|
|
},
|
|
{
|
|
"epoch": 6.458670988654781,
|
|
"grad_norm": 0.6834988461746635,
|
|
"learning_rate": 7.286273953390278e-07,
|
|
"loss": 0.2937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3183531165122986,
|
|
"step": 3985,
|
|
"valid_targets_mean": 4240.6,
|
|
"valid_targets_min": 895
|
|
},
|
|
{
|
|
"epoch": 6.46677471636953,
|
|
"grad_norm": 0.7125067985322497,
|
|
"learning_rate": 7.071677786342568e-07,
|
|
"loss": 0.2611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23387478291988373,
|
|
"step": 3990,
|
|
"valid_targets_mean": 4085.0,
|
|
"valid_targets_min": 1346
|
|
},
|
|
{
|
|
"epoch": 6.474878444084279,
|
|
"grad_norm": 0.5676104505322674,
|
|
"learning_rate": 6.860232312158554e-07,
|
|
"loss": 0.2599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.215545192360878,
|
|
"step": 3995,
|
|
"valid_targets_mean": 5310.1,
|
|
"valid_targets_min": 1485
|
|
},
|
|
{
|
|
"epoch": 6.482982171799027,
|
|
"grad_norm": 0.8287830712031203,
|
|
"learning_rate": 6.651940983932737e-07,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28875142335891724,
|
|
"step": 4000,
|
|
"valid_targets_mean": 2980.1,
|
|
"valid_targets_min": 1046
|
|
},
|
|
{
|
|
"epoch": 6.491085899513776,
|
|
"grad_norm": 0.567888858150834,
|
|
"learning_rate": 6.44680720324975e-07,
|
|
"loss": 0.2607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2143402099609375,
|
|
"step": 4005,
|
|
"valid_targets_mean": 4987.5,
|
|
"valid_targets_min": 1468
|
|
},
|
|
{
|
|
"epoch": 6.4991896272285254,
|
|
"grad_norm": 0.8657516748787141,
|
|
"learning_rate": 6.244834320128501e-07,
|
|
"loss": 0.2884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2904426157474518,
|
|
"step": 4010,
|
|
"valid_targets_mean": 2812.2,
|
|
"valid_targets_min": 1016
|
|
},
|
|
{
|
|
"epoch": 6.507293354943274,
|
|
"grad_norm": 0.6802979979107128,
|
|
"learning_rate": 6.0460256329677e-07,
|
|
"loss": 0.2451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2284238040447235,
|
|
"step": 4015,
|
|
"valid_targets_mean": 3829.1,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 6.515397082658023,
|
|
"grad_norm": 0.6616469553258651,
|
|
"learning_rate": 5.850384388491814e-07,
|
|
"loss": 0.2896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26627442240715027,
|
|
"step": 4020,
|
|
"valid_targets_mean": 4338.4,
|
|
"valid_targets_min": 1301
|
|
},
|
|
{
|
|
"epoch": 6.523500810372772,
|
|
"grad_norm": 0.7578702676971412,
|
|
"learning_rate": 5.657913781698221e-07,
|
|
"loss": 0.2784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26661694049835205,
|
|
"step": 4025,
|
|
"valid_targets_mean": 3835.7,
|
|
"valid_targets_min": 1097
|
|
},
|
|
{
|
|
"epoch": 6.53160453808752,
|
|
"grad_norm": 0.8009653624376153,
|
|
"learning_rate": 5.468616955804873e-07,
|
|
"loss": 0.2958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3027913570404053,
|
|
"step": 4030,
|
|
"valid_targets_mean": 3376.9,
|
|
"valid_targets_min": 1041
|
|
},
|
|
{
|
|
"epoch": 6.539708265802269,
|
|
"grad_norm": 0.7130215275679384,
|
|
"learning_rate": 5.282497002198983e-07,
|
|
"loss": 0.2567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25583645701408386,
|
|
"step": 4035,
|
|
"valid_targets_mean": 3905.9,
|
|
"valid_targets_min": 1207
|
|
},
|
|
{
|
|
"epoch": 6.547811993517017,
|
|
"grad_norm": 0.7385392471080525,
|
|
"learning_rate": 5.099556960386686e-07,
|
|
"loss": 0.2675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25842177867889404,
|
|
"step": 4040,
|
|
"valid_targets_mean": 3477.1,
|
|
"valid_targets_min": 1364
|
|
},
|
|
{
|
|
"epoch": 6.5559157212317665,
|
|
"grad_norm": 0.974897501639828,
|
|
"learning_rate": 4.919799817943238e-07,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3050478398799896,
|
|
"step": 4045,
|
|
"valid_targets_mean": 2837.5,
|
|
"valid_targets_min": 1061
|
|
},
|
|
{
|
|
"epoch": 6.564019448946516,
|
|
"grad_norm": 0.6798897469940042,
|
|
"learning_rate": 4.7432285104642703e-07,
|
|
"loss": 0.3127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23295563459396362,
|
|
"step": 4050,
|
|
"valid_targets_mean": 4162.9,
|
|
"valid_targets_min": 1044
|
|
},
|
|
{
|
|
"epoch": 6.572123176661265,
|
|
"grad_norm": 0.7307056993453411,
|
|
"learning_rate": 4.569845921517968e-07,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25635001063346863,
|
|
"step": 4055,
|
|
"valid_targets_mean": 3471.1,
|
|
"valid_targets_min": 1188
|
|
},
|
|
{
|
|
"epoch": 6.580226904376013,
|
|
"grad_norm": 0.6997105951135152,
|
|
"learning_rate": 4.399654882597726e-07,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3000991642475128,
|
|
"step": 4060,
|
|
"valid_targets_mean": 4791.9,
|
|
"valid_targets_min": 1107
|
|
},
|
|
{
|
|
"epoch": 6.588330632090762,
|
|
"grad_norm": 0.7720123442423956,
|
|
"learning_rate": 4.232658173076232e-07,
|
|
"loss": 0.2587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2398601472377777,
|
|
"step": 4065,
|
|
"valid_targets_mean": 3633.2,
|
|
"valid_targets_min": 971
|
|
},
|
|
{
|
|
"epoch": 6.59643435980551,
|
|
"grad_norm": 0.667849292616193,
|
|
"learning_rate": 4.068858520159724e-07,
|
|
"loss": 0.2706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2535744905471802,
|
|
"step": 4070,
|
|
"valid_targets_mean": 4069.8,
|
|
"valid_targets_min": 1143
|
|
},
|
|
{
|
|
"epoch": 6.604538087520259,
|
|
"grad_norm": 0.5638956199207986,
|
|
"learning_rate": 3.9082585988437617e-07,
|
|
"loss": 0.2918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24108228087425232,
|
|
"step": 4075,
|
|
"valid_targets_mean": 5918.1,
|
|
"valid_targets_min": 1143
|
|
},
|
|
{
|
|
"epoch": 6.612641815235008,
|
|
"grad_norm": 0.6112457385056609,
|
|
"learning_rate": 3.7508610318693684e-07,
|
|
"loss": 0.2722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21963872015476227,
|
|
"step": 4080,
|
|
"valid_targets_mean": 4737.4,
|
|
"valid_targets_min": 1405
|
|
},
|
|
{
|
|
"epoch": 6.620745542949757,
|
|
"grad_norm": 1.0780947655547286,
|
|
"learning_rate": 3.596668389680247e-07,
|
|
"loss": 0.2675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27150118350982666,
|
|
"step": 4085,
|
|
"valid_targets_mean": 3611.4,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 6.628849270664506,
|
|
"grad_norm": 0.6071034680621801,
|
|
"learning_rate": 3.445683190380833e-07,
|
|
"loss": 0.2813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29520419239997864,
|
|
"step": 4090,
|
|
"valid_targets_mean": 6065.9,
|
|
"valid_targets_min": 1438
|
|
},
|
|
{
|
|
"epoch": 6.636952998379255,
|
|
"grad_norm": 0.6734164739619142,
|
|
"learning_rate": 3.297907899695019e-07,
|
|
"loss": 0.2672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27870097756385803,
|
|
"step": 4095,
|
|
"valid_targets_mean": 3903.8,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 6.645056726094003,
|
|
"grad_norm": 0.840649714564737,
|
|
"learning_rate": 3.1533449309262056e-07,
|
|
"loss": 0.2567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2556115686893463,
|
|
"step": 4100,
|
|
"valid_targets_mean": 3202.4,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 6.653160453808752,
|
|
"grad_norm": 0.8751889443011073,
|
|
"learning_rate": 3.0119966449174474e-07,
|
|
"loss": 0.2861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3118496537208557,
|
|
"step": 4105,
|
|
"valid_targets_mean": 4041.4,
|
|
"valid_targets_min": 1145
|
|
},
|
|
{
|
|
"epoch": 6.661264181523501,
|
|
"grad_norm": 0.7587472130380253,
|
|
"learning_rate": 2.8738653500133494e-07,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2707531452178955,
|
|
"step": 4110,
|
|
"valid_targets_mean": 4125.2,
|
|
"valid_targets_min": 1152
|
|
},
|
|
{
|
|
"epoch": 6.669367909238249,
|
|
"grad_norm": 0.8004779128159383,
|
|
"learning_rate": 2.738953302022096e-07,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31876587867736816,
|
|
"step": 4115,
|
|
"valid_targets_mean": 3345.1,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 6.6774716369529985,
|
|
"grad_norm": 0.6332081406992072,
|
|
"learning_rate": 2.6072627041785925e-07,
|
|
"loss": 0.2567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23436513543128967,
|
|
"step": 4120,
|
|
"valid_targets_mean": 4620.5,
|
|
"valid_targets_min": 1546
|
|
},
|
|
{
|
|
"epoch": 6.685575364667747,
|
|
"grad_norm": 0.5678494967037826,
|
|
"learning_rate": 2.478795707108672e-07,
|
|
"loss": 0.291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23043310642242432,
|
|
"step": 4125,
|
|
"valid_targets_mean": 4959.5,
|
|
"valid_targets_min": 1162
|
|
},
|
|
{
|
|
"epoch": 6.693679092382496,
|
|
"grad_norm": 0.630783160958136,
|
|
"learning_rate": 2.3535544087938345e-07,
|
|
"loss": 0.2602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2515913248062134,
|
|
"step": 4130,
|
|
"valid_targets_mean": 4601.9,
|
|
"valid_targets_min": 1579
|
|
},
|
|
{
|
|
"epoch": 6.701782820097245,
|
|
"grad_norm": 1.0053058271111657,
|
|
"learning_rate": 2.2315408545370288e-07,
|
|
"loss": 0.2498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27254045009613037,
|
|
"step": 4135,
|
|
"valid_targets_mean": 2783.1,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 6.709886547811994,
|
|
"grad_norm": 0.6409152830997872,
|
|
"learning_rate": 2.1127570369292361e-07,
|
|
"loss": 0.2859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27788475155830383,
|
|
"step": 4140,
|
|
"valid_targets_mean": 5011.4,
|
|
"valid_targets_min": 1106
|
|
},
|
|
{
|
|
"epoch": 6.717990275526742,
|
|
"grad_norm": 0.6392402407017796,
|
|
"learning_rate": 1.9972048958168954e-07,
|
|
"loss": 0.2718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23322021961212158,
|
|
"step": 4145,
|
|
"valid_targets_mean": 4512.9,
|
|
"valid_targets_min": 1069
|
|
},
|
|
{
|
|
"epoch": 6.726094003241491,
|
|
"grad_norm": 0.6358548166174536,
|
|
"learning_rate": 1.8848863182703513e-07,
|
|
"loss": 0.2599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2605475187301636,
|
|
"step": 4150,
|
|
"valid_targets_mean": 3927.9,
|
|
"valid_targets_min": 1140
|
|
},
|
|
{
|
|
"epoch": 6.7341977309562395,
|
|
"grad_norm": 0.671040397851917,
|
|
"learning_rate": 1.7758031385528784e-07,
|
|
"loss": 0.293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39784008264541626,
|
|
"step": 4155,
|
|
"valid_targets_mean": 4703.1,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 6.742301458670989,
|
|
"grad_norm": 0.7726852582259216,
|
|
"learning_rate": 1.6699571380908385e-07,
|
|
"loss": 0.2895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32319122552871704,
|
|
"step": 4160,
|
|
"valid_targets_mean": 3434.7,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 6.750405186385738,
|
|
"grad_norm": 0.7122432763736924,
|
|
"learning_rate": 1.5673500454445046e-07,
|
|
"loss": 0.2603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2571845054626465,
|
|
"step": 4165,
|
|
"valid_targets_mean": 3780.6,
|
|
"valid_targets_min": 1323
|
|
},
|
|
{
|
|
"epoch": 6.758508914100486,
|
|
"grad_norm": 0.7328015093005262,
|
|
"learning_rate": 1.4679835362799266e-07,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29261136054992676,
|
|
"step": 4170,
|
|
"valid_targets_mean": 4295.0,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 6.766612641815235,
|
|
"grad_norm": 0.6595980272648833,
|
|
"learning_rate": 1.3718592333414881e-07,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2447427660226822,
|
|
"step": 4175,
|
|
"valid_targets_mean": 4700.2,
|
|
"valid_targets_min": 1429
|
|
},
|
|
{
|
|
"epoch": 6.774716369529984,
|
|
"grad_norm": 0.74623136258456,
|
|
"learning_rate": 1.2789787064253934e-07,
|
|
"loss": 0.2982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32457369565963745,
|
|
"step": 4180,
|
|
"valid_targets_mean": 4794.1,
|
|
"valid_targets_min": 361
|
|
},
|
|
{
|
|
"epoch": 6.782820097244732,
|
|
"grad_norm": 0.6387805597272295,
|
|
"learning_rate": 1.1893434723541542e-07,
|
|
"loss": 0.2654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.247330904006958,
|
|
"step": 4185,
|
|
"valid_targets_mean": 5098.8,
|
|
"valid_targets_min": 1586
|
|
},
|
|
{
|
|
"epoch": 6.790923824959481,
|
|
"grad_norm": 0.7914446560492252,
|
|
"learning_rate": 1.1029549949516549e-07,
|
|
"loss": 0.2564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25566303730010986,
|
|
"step": 4190,
|
|
"valid_targets_mean": 2928.6,
|
|
"valid_targets_min": 1435
|
|
},
|
|
{
|
|
"epoch": 6.7990275526742305,
|
|
"grad_norm": 0.6784435117492335,
|
|
"learning_rate": 1.0198146850193935e-07,
|
|
"loss": 0.2612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23428568243980408,
|
|
"step": 4195,
|
|
"valid_targets_mean": 4123.6,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 6.807131280388979,
|
|
"grad_norm": 0.9219689586760358,
|
|
"learning_rate": 9.399239003132999e-08,
|
|
"loss": 0.2567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27654707431793213,
|
|
"step": 4200,
|
|
"valid_targets_mean": 2339.8,
|
|
"valid_targets_min": 347
|
|
},
|
|
{
|
|
"epoch": 6.815235008103728,
|
|
"grad_norm": 0.9992054972300621,
|
|
"learning_rate": 8.632839455216869e-08,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3159137964248657,
|
|
"step": 4205,
|
|
"valid_targets_mean": 2398.5,
|
|
"valid_targets_min": 1235
|
|
},
|
|
{
|
|
"epoch": 6.823338735818476,
|
|
"grad_norm": 0.7850895651410487,
|
|
"learning_rate": 7.89896072243912e-08,
|
|
"loss": 0.2533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29079508781433105,
|
|
"step": 4210,
|
|
"valid_targets_mean": 3143.6,
|
|
"valid_targets_min": 1413
|
|
},
|
|
{
|
|
"epoch": 6.831442463533225,
|
|
"grad_norm": 1.0537086856892872,
|
|
"learning_rate": 7.197614789698604e-08,
|
|
"loss": 0.2972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2822344899177551,
|
|
"step": 4215,
|
|
"valid_targets_mean": 3394.4,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 6.839546191247974,
|
|
"grad_norm": 0.8211601548625681,
|
|
"learning_rate": 6.528813110604714e-08,
|
|
"loss": 0.2627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31433653831481934,
|
|
"step": 4220,
|
|
"valid_targets_mean": 2904.8,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 6.847649918962723,
|
|
"grad_norm": 0.8759060227159305,
|
|
"learning_rate": 5.8925666072899845e-08,
|
|
"loss": 0.2999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29553893208503723,
|
|
"step": 4225,
|
|
"valid_targets_mean": 3257.2,
|
|
"valid_targets_min": 1018
|
|
},
|
|
{
|
|
"epoch": 6.855753646677472,
|
|
"grad_norm": 0.8027670263828199,
|
|
"learning_rate": 5.288885670231336e-08,
|
|
"loss": 0.308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29728516936302185,
|
|
"step": 4230,
|
|
"valid_targets_mean": 3178.8,
|
|
"valid_targets_min": 1256
|
|
},
|
|
{
|
|
"epoch": 6.863857374392221,
|
|
"grad_norm": 0.6309128871254511,
|
|
"learning_rate": 4.717780158080887e-08,
|
|
"loss": 0.2601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22423002123832703,
|
|
"step": 4235,
|
|
"valid_targets_mean": 4645.9,
|
|
"valid_targets_min": 1714
|
|
},
|
|
{
|
|
"epoch": 6.871961102106969,
|
|
"grad_norm": 0.7061556050034734,
|
|
"learning_rate": 4.1792593975049644e-08,
|
|
"loss": 0.2573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23919057846069336,
|
|
"step": 4240,
|
|
"valid_targets_mean": 3472.2,
|
|
"valid_targets_min": 1101
|
|
},
|
|
{
|
|
"epoch": 6.880064829821718,
|
|
"grad_norm": 0.7475024848941403,
|
|
"learning_rate": 3.6733321830315636e-08,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3009330630302429,
|
|
"step": 4245,
|
|
"valid_targets_mean": 3434.6,
|
|
"valid_targets_min": 972
|
|
},
|
|
{
|
|
"epoch": 6.888168557536467,
|
|
"grad_norm": 0.5604792404146883,
|
|
"learning_rate": 3.200006776906461e-08,
|
|
"loss": 0.3085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2671416103839874,
|
|
"step": 4250,
|
|
"valid_targets_mean": 5724.2,
|
|
"valid_targets_min": 2505
|
|
},
|
|
{
|
|
"epoch": 6.896272285251215,
|
|
"grad_norm": 0.6499933621677116,
|
|
"learning_rate": 2.7592909089593224e-08,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2697727382183075,
|
|
"step": 4255,
|
|
"valid_targets_mean": 4409.6,
|
|
"valid_targets_min": 1403
|
|
},
|
|
{
|
|
"epoch": 6.904376012965964,
|
|
"grad_norm": 0.7422334882426721,
|
|
"learning_rate": 2.351191776475581e-08,
|
|
"loss": 0.2574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24201233685016632,
|
|
"step": 4260,
|
|
"valid_targets_mean": 3177.5,
|
|
"valid_targets_min": 1107
|
|
},
|
|
{
|
|
"epoch": 6.9124797406807135,
|
|
"grad_norm": 0.6139803295015263,
|
|
"learning_rate": 1.9757160440814217e-08,
|
|
"loss": 0.2695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2602802515029907,
|
|
"step": 4265,
|
|
"valid_targets_mean": 4547.1,
|
|
"valid_targets_min": 1339
|
|
},
|
|
{
|
|
"epoch": 6.920583468395462,
|
|
"grad_norm": 0.8330992402016216,
|
|
"learning_rate": 1.6328698436327562e-08,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27471017837524414,
|
|
"step": 4270,
|
|
"valid_targets_mean": 2963.1,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 6.928687196110211,
|
|
"grad_norm": 0.7303505672027717,
|
|
"learning_rate": 1.3226587741159702e-08,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26886293292045593,
|
|
"step": 4275,
|
|
"valid_targets_mean": 3608.6,
|
|
"valid_targets_min": 1171
|
|
},
|
|
{
|
|
"epoch": 6.93679092382496,
|
|
"grad_norm": 0.747537768686201,
|
|
"learning_rate": 1.0450879015566629e-08,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23849567770957947,
|
|
"step": 4280,
|
|
"valid_targets_mean": 2990.4,
|
|
"valid_targets_min": 1084
|
|
},
|
|
{
|
|
"epoch": 6.944894651539708,
|
|
"grad_norm": 0.7691581570436978,
|
|
"learning_rate": 8.001617589368238e-09,
|
|
"loss": 0.2855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27607500553131104,
|
|
"step": 4285,
|
|
"valid_targets_mean": 3319.6,
|
|
"valid_targets_min": 1448
|
|
},
|
|
{
|
|
"epoch": 6.952998379254457,
|
|
"grad_norm": 0.7777486056188054,
|
|
"learning_rate": 5.878843461200046e-09,
|
|
"loss": 0.2722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2839847207069397,
|
|
"step": 4290,
|
|
"valid_targets_mean": 3421.1,
|
|
"valid_targets_min": 989
|
|
},
|
|
{
|
|
"epoch": 6.961102106969205,
|
|
"grad_norm": 0.7537030177576087,
|
|
"learning_rate": 4.082591297873695e-09,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3192595839500427,
|
|
"step": 4295,
|
|
"valid_targets_mean": 3606.9,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 6.9692058346839545,
|
|
"grad_norm": 0.6782862771300621,
|
|
"learning_rate": 2.6128904338018624e-09,
|
|
"loss": 0.2998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3403030037879944,
|
|
"step": 4300,
|
|
"valid_targets_mean": 4660.7,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 6.977309562398704,
|
|
"grad_norm": 0.6131020501214138,
|
|
"learning_rate": 1.4697648705186417e-09,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20277269184589386,
|
|
"step": 4305,
|
|
"valid_targets_mean": 4003.3,
|
|
"valid_targets_min": 1271
|
|
},
|
|
{
|
|
"epoch": 6.985413290113452,
|
|
"grad_norm": 0.71749765762531,
|
|
"learning_rate": 6.532332762931859e-10,
|
|
"loss": 0.2445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2628193795681,
|
|
"step": 4310,
|
|
"valid_targets_mean": 3631.9,
|
|
"valid_targets_min": 1335
|
|
},
|
|
{
|
|
"epoch": 6.993517017828201,
|
|
"grad_norm": 0.7515541884292178,
|
|
"learning_rate": 1.6330898581884414e-10,
|
|
"loss": 0.2913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3376891016960144,
|
|
"step": 4315,
|
|
"valid_targets_mean": 3517.7,
|
|
"valid_targets_min": 1401
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23076336085796356,
|
|
"step": 4319,
|
|
"total_flos": 1056573424926720.0,
|
|
"train_loss": 0.349568347366189,
|
|
"train_runtime": 28403.7175,
|
|
"train_samples_per_second": 2.431,
|
|
"train_steps_per_second": 0.152,
|
|
"valid_targets_mean": 5247.5,
|
|
"valid_targets_min": 1276
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4319,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1056573424926720.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|