Model: laion/Kimi-K2T-neulab-agenttuning-webshop-sandboxes-maxeps-32k Source: Original Platform
5559 lines
154 KiB
JSON
5559 lines
154 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 2506,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.013986013986013986,
|
|
"grad_norm": 13.08118363303711,
|
|
"learning_rate": 6.374501992031873e-07,
|
|
"loss": 1.0535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5296926498413086,
|
|
"step": 5,
|
|
"valid_targets_mean": 4239.0,
|
|
"valid_targets_min": 3298
|
|
},
|
|
{
|
|
"epoch": 0.027972027972027972,
|
|
"grad_norm": 11.914950011402933,
|
|
"learning_rate": 1.4342629482071716e-06,
|
|
"loss": 1.0403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4918077290058136,
|
|
"step": 10,
|
|
"valid_targets_mean": 3680.8,
|
|
"valid_targets_min": 2534
|
|
},
|
|
{
|
|
"epoch": 0.04195804195804196,
|
|
"grad_norm": 8.748664702844085,
|
|
"learning_rate": 2.2310756972111555e-06,
|
|
"loss": 1.0073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5401319265365601,
|
|
"step": 15,
|
|
"valid_targets_mean": 4520.0,
|
|
"valid_targets_min": 3358
|
|
},
|
|
{
|
|
"epoch": 0.055944055944055944,
|
|
"grad_norm": 6.578134131681192,
|
|
"learning_rate": 3.0278884462151397e-06,
|
|
"loss": 0.9661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5008518695831299,
|
|
"step": 20,
|
|
"valid_targets_mean": 4016.0,
|
|
"valid_targets_min": 3201
|
|
},
|
|
{
|
|
"epoch": 0.06993006993006994,
|
|
"grad_norm": 4.64457041164554,
|
|
"learning_rate": 3.824701195219123e-06,
|
|
"loss": 0.9017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43356165289878845,
|
|
"step": 25,
|
|
"valid_targets_mean": 4458.5,
|
|
"valid_targets_min": 2740
|
|
},
|
|
{
|
|
"epoch": 0.08391608391608392,
|
|
"grad_norm": 3.2906721720118743,
|
|
"learning_rate": 4.621513944223108e-06,
|
|
"loss": 0.8338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43590834736824036,
|
|
"step": 30,
|
|
"valid_targets_mean": 4182.0,
|
|
"valid_targets_min": 3145
|
|
},
|
|
{
|
|
"epoch": 0.0979020979020979,
|
|
"grad_norm": 2.2213280960302737,
|
|
"learning_rate": 5.418326693227092e-06,
|
|
"loss": 0.7852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3904917240142822,
|
|
"step": 35,
|
|
"valid_targets_mean": 3757.9,
|
|
"valid_targets_min": 3232
|
|
},
|
|
{
|
|
"epoch": 0.11188811188811189,
|
|
"grad_norm": 1.517797734360262,
|
|
"learning_rate": 6.215139442231076e-06,
|
|
"loss": 0.7225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3458019495010376,
|
|
"step": 40,
|
|
"valid_targets_mean": 3985.0,
|
|
"valid_targets_min": 2319
|
|
},
|
|
{
|
|
"epoch": 0.1258741258741259,
|
|
"grad_norm": 1.3322026291958011,
|
|
"learning_rate": 7.011952191235061e-06,
|
|
"loss": 0.686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33076441287994385,
|
|
"step": 45,
|
|
"valid_targets_mean": 3783.4,
|
|
"valid_targets_min": 2961
|
|
},
|
|
{
|
|
"epoch": 0.13986013986013987,
|
|
"grad_norm": 1.1653038557461999,
|
|
"learning_rate": 7.808764940239044e-06,
|
|
"loss": 0.6457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.320470929145813,
|
|
"step": 50,
|
|
"valid_targets_mean": 4048.5,
|
|
"valid_targets_min": 2644
|
|
},
|
|
{
|
|
"epoch": 0.15384615384615385,
|
|
"grad_norm": 0.9815989525162515,
|
|
"learning_rate": 8.605577689243028e-06,
|
|
"loss": 0.6121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2778295576572418,
|
|
"step": 55,
|
|
"valid_targets_mean": 3791.1,
|
|
"valid_targets_min": 2987
|
|
},
|
|
{
|
|
"epoch": 0.16783216783216784,
|
|
"grad_norm": 0.8997344993173911,
|
|
"learning_rate": 9.402390438247013e-06,
|
|
"loss": 0.596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28497567772865295,
|
|
"step": 60,
|
|
"valid_targets_mean": 4066.9,
|
|
"valid_targets_min": 3234
|
|
},
|
|
{
|
|
"epoch": 0.18181818181818182,
|
|
"grad_norm": 0.8616110554113583,
|
|
"learning_rate": 1.0199203187250997e-05,
|
|
"loss": 0.5676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.248211070895195,
|
|
"step": 65,
|
|
"valid_targets_mean": 3500.8,
|
|
"valid_targets_min": 2803
|
|
},
|
|
{
|
|
"epoch": 0.1958041958041958,
|
|
"grad_norm": 0.7612355436071168,
|
|
"learning_rate": 1.099601593625498e-05,
|
|
"loss": 0.5405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26128190755844116,
|
|
"step": 70,
|
|
"valid_targets_mean": 3998.1,
|
|
"valid_targets_min": 2923
|
|
},
|
|
{
|
|
"epoch": 0.2097902097902098,
|
|
"grad_norm": 0.7653586307537427,
|
|
"learning_rate": 1.1792828685258967e-05,
|
|
"loss": 0.5166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24031201004981995,
|
|
"step": 75,
|
|
"valid_targets_mean": 3749.9,
|
|
"valid_targets_min": 3074
|
|
},
|
|
{
|
|
"epoch": 0.22377622377622378,
|
|
"grad_norm": 0.7206061108519799,
|
|
"learning_rate": 1.258964143426295e-05,
|
|
"loss": 0.5094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23275572061538696,
|
|
"step": 80,
|
|
"valid_targets_mean": 3978.4,
|
|
"valid_targets_min": 3212
|
|
},
|
|
{
|
|
"epoch": 0.23776223776223776,
|
|
"grad_norm": 0.7248405909913119,
|
|
"learning_rate": 1.3386454183266932e-05,
|
|
"loss": 0.4822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24045932292938232,
|
|
"step": 85,
|
|
"valid_targets_mean": 3960.0,
|
|
"valid_targets_min": 3165
|
|
},
|
|
{
|
|
"epoch": 0.2517482517482518,
|
|
"grad_norm": 0.642102398369385,
|
|
"learning_rate": 1.4183266932270919e-05,
|
|
"loss": 0.4835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24778877198696136,
|
|
"step": 90,
|
|
"valid_targets_mean": 4437.4,
|
|
"valid_targets_min": 3689
|
|
},
|
|
{
|
|
"epoch": 0.26573426573426573,
|
|
"grad_norm": 0.6779030700031962,
|
|
"learning_rate": 1.4980079681274901e-05,
|
|
"loss": 0.4679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24229872226715088,
|
|
"step": 95,
|
|
"valid_targets_mean": 4241.8,
|
|
"valid_targets_min": 3712
|
|
},
|
|
{
|
|
"epoch": 0.27972027972027974,
|
|
"grad_norm": 0.6919316585268371,
|
|
"learning_rate": 1.5776892430278886e-05,
|
|
"loss": 0.4653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23130342364311218,
|
|
"step": 100,
|
|
"valid_targets_mean": 4174.5,
|
|
"valid_targets_min": 3400
|
|
},
|
|
{
|
|
"epoch": 0.2937062937062937,
|
|
"grad_norm": 0.6430352865799613,
|
|
"learning_rate": 1.6573705179282872e-05,
|
|
"loss": 0.4423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22104957699775696,
|
|
"step": 105,
|
|
"valid_targets_mean": 3760.4,
|
|
"valid_targets_min": 992
|
|
},
|
|
{
|
|
"epoch": 0.3076923076923077,
|
|
"grad_norm": 0.6362542473505411,
|
|
"learning_rate": 1.7370517928286855e-05,
|
|
"loss": 0.4317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20805756747722626,
|
|
"step": 110,
|
|
"valid_targets_mean": 4144.9,
|
|
"valid_targets_min": 3061
|
|
},
|
|
{
|
|
"epoch": 0.32167832167832167,
|
|
"grad_norm": 0.6737054371286955,
|
|
"learning_rate": 1.8167330677290838e-05,
|
|
"loss": 0.4364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22847774624824524,
|
|
"step": 115,
|
|
"valid_targets_mean": 3954.5,
|
|
"valid_targets_min": 2889
|
|
},
|
|
{
|
|
"epoch": 0.3356643356643357,
|
|
"grad_norm": 0.7266322768815212,
|
|
"learning_rate": 1.8964143426294824e-05,
|
|
"loss": 0.4368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20634719729423523,
|
|
"step": 120,
|
|
"valid_targets_mean": 3737.1,
|
|
"valid_targets_min": 3060
|
|
},
|
|
{
|
|
"epoch": 0.34965034965034963,
|
|
"grad_norm": 0.6829819207654488,
|
|
"learning_rate": 1.9760956175298807e-05,
|
|
"loss": 0.4183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19363188743591309,
|
|
"step": 125,
|
|
"valid_targets_mean": 3675.4,
|
|
"valid_targets_min": 2831
|
|
},
|
|
{
|
|
"epoch": 0.36363636363636365,
|
|
"grad_norm": 0.7250977015774923,
|
|
"learning_rate": 2.055776892430279e-05,
|
|
"loss": 0.4161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18788361549377441,
|
|
"step": 130,
|
|
"valid_targets_mean": 3879.5,
|
|
"valid_targets_min": 3277
|
|
},
|
|
{
|
|
"epoch": 0.3776223776223776,
|
|
"grad_norm": 0.7210165535643743,
|
|
"learning_rate": 2.1354581673306773e-05,
|
|
"loss": 0.4174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17633922398090363,
|
|
"step": 135,
|
|
"valid_targets_mean": 3757.0,
|
|
"valid_targets_min": 3110
|
|
},
|
|
{
|
|
"epoch": 0.3916083916083916,
|
|
"grad_norm": 0.6747533826387861,
|
|
"learning_rate": 2.2151394422310756e-05,
|
|
"loss": 0.4071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20017263293266296,
|
|
"step": 140,
|
|
"valid_targets_mean": 4174.1,
|
|
"valid_targets_min": 2672
|
|
},
|
|
{
|
|
"epoch": 0.40559440559440557,
|
|
"grad_norm": 0.7402646690744179,
|
|
"learning_rate": 2.2948207171314745e-05,
|
|
"loss": 0.4142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19815121591091156,
|
|
"step": 145,
|
|
"valid_targets_mean": 3800.5,
|
|
"valid_targets_min": 2344
|
|
},
|
|
{
|
|
"epoch": 0.4195804195804196,
|
|
"grad_norm": 0.6882905212492071,
|
|
"learning_rate": 2.3745019920318728e-05,
|
|
"loss": 0.4044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23232436180114746,
|
|
"step": 150,
|
|
"valid_targets_mean": 4320.9,
|
|
"valid_targets_min": 3029
|
|
},
|
|
{
|
|
"epoch": 0.43356643356643354,
|
|
"grad_norm": 0.6867230259268453,
|
|
"learning_rate": 2.454183266932271e-05,
|
|
"loss": 0.3983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21659091114997864,
|
|
"step": 155,
|
|
"valid_targets_mean": 4310.1,
|
|
"valid_targets_min": 3283
|
|
},
|
|
{
|
|
"epoch": 0.44755244755244755,
|
|
"grad_norm": 0.6859829511382938,
|
|
"learning_rate": 2.5338645418326694e-05,
|
|
"loss": 0.3926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18998664617538452,
|
|
"step": 160,
|
|
"valid_targets_mean": 3856.8,
|
|
"valid_targets_min": 2810
|
|
},
|
|
{
|
|
"epoch": 0.46153846153846156,
|
|
"grad_norm": 0.6986164805285058,
|
|
"learning_rate": 2.6135458167330677e-05,
|
|
"loss": 0.3901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18579256534576416,
|
|
"step": 165,
|
|
"valid_targets_mean": 4134.0,
|
|
"valid_targets_min": 3603
|
|
},
|
|
{
|
|
"epoch": 0.4755244755244755,
|
|
"grad_norm": 0.6783298773224011,
|
|
"learning_rate": 2.6932270916334663e-05,
|
|
"loss": 0.3933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.184840589761734,
|
|
"step": 170,
|
|
"valid_targets_mean": 4108.1,
|
|
"valid_targets_min": 3187
|
|
},
|
|
{
|
|
"epoch": 0.48951048951048953,
|
|
"grad_norm": 0.7488260178884795,
|
|
"learning_rate": 2.772908366533865e-05,
|
|
"loss": 0.3982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19532346725463867,
|
|
"step": 175,
|
|
"valid_targets_mean": 4101.9,
|
|
"valid_targets_min": 2898
|
|
},
|
|
{
|
|
"epoch": 0.5034965034965035,
|
|
"grad_norm": 0.6861710567678492,
|
|
"learning_rate": 2.8525896414342632e-05,
|
|
"loss": 0.3773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1921890676021576,
|
|
"step": 180,
|
|
"valid_targets_mean": 4188.2,
|
|
"valid_targets_min": 3493
|
|
},
|
|
{
|
|
"epoch": 0.5174825174825175,
|
|
"grad_norm": 0.6691851908083688,
|
|
"learning_rate": 2.9322709163346615e-05,
|
|
"loss": 0.3861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18755507469177246,
|
|
"step": 185,
|
|
"valid_targets_mean": 3822.2,
|
|
"valid_targets_min": 3149
|
|
},
|
|
{
|
|
"epoch": 0.5314685314685315,
|
|
"grad_norm": 0.7064984838573337,
|
|
"learning_rate": 3.01195219123506e-05,
|
|
"loss": 0.3855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20090565085411072,
|
|
"step": 190,
|
|
"valid_targets_mean": 4206.1,
|
|
"valid_targets_min": 2981
|
|
},
|
|
{
|
|
"epoch": 0.5454545454545454,
|
|
"grad_norm": 0.7715107860489125,
|
|
"learning_rate": 3.0916334661354584e-05,
|
|
"loss": 0.3825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20451734960079193,
|
|
"step": 195,
|
|
"valid_targets_mean": 4249.6,
|
|
"valid_targets_min": 3523
|
|
},
|
|
{
|
|
"epoch": 0.5594405594405595,
|
|
"grad_norm": 0.7242629319106181,
|
|
"learning_rate": 3.1713147410358564e-05,
|
|
"loss": 0.3744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1759917438030243,
|
|
"step": 200,
|
|
"valid_targets_mean": 3672.8,
|
|
"valid_targets_min": 1933
|
|
},
|
|
{
|
|
"epoch": 0.5734265734265734,
|
|
"grad_norm": 0.7583819688407337,
|
|
"learning_rate": 3.250996015936256e-05,
|
|
"loss": 0.3894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19551926851272583,
|
|
"step": 205,
|
|
"valid_targets_mean": 4001.5,
|
|
"valid_targets_min": 3657
|
|
},
|
|
{
|
|
"epoch": 0.5874125874125874,
|
|
"grad_norm": 0.7683757040502193,
|
|
"learning_rate": 3.3306772908366536e-05,
|
|
"loss": 0.3757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18955138325691223,
|
|
"step": 210,
|
|
"valid_targets_mean": 4289.0,
|
|
"valid_targets_min": 3312
|
|
},
|
|
{
|
|
"epoch": 0.6013986013986014,
|
|
"grad_norm": 0.7244739994581784,
|
|
"learning_rate": 3.410358565737052e-05,
|
|
"loss": 0.371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17240570485591888,
|
|
"step": 215,
|
|
"valid_targets_mean": 3773.9,
|
|
"valid_targets_min": 3243
|
|
},
|
|
{
|
|
"epoch": 0.6153846153846154,
|
|
"grad_norm": 0.7405062652243871,
|
|
"learning_rate": 3.49003984063745e-05,
|
|
"loss": 0.3841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17839646339416504,
|
|
"step": 220,
|
|
"valid_targets_mean": 3992.4,
|
|
"valid_targets_min": 2738
|
|
},
|
|
{
|
|
"epoch": 0.6293706293706294,
|
|
"grad_norm": 0.7087430013485309,
|
|
"learning_rate": 3.569721115537849e-05,
|
|
"loss": 0.3705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2037450224161148,
|
|
"step": 225,
|
|
"valid_targets_mean": 4216.2,
|
|
"valid_targets_min": 3731
|
|
},
|
|
{
|
|
"epoch": 0.6433566433566433,
|
|
"grad_norm": 0.7332815327859702,
|
|
"learning_rate": 3.6494023904382475e-05,
|
|
"loss": 0.3643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18066954612731934,
|
|
"step": 230,
|
|
"valid_targets_mean": 4160.8,
|
|
"valid_targets_min": 3559
|
|
},
|
|
{
|
|
"epoch": 0.6573426573426573,
|
|
"grad_norm": 0.6970517350021399,
|
|
"learning_rate": 3.7290836653386454e-05,
|
|
"loss": 0.3619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17745010554790497,
|
|
"step": 235,
|
|
"valid_targets_mean": 4144.8,
|
|
"valid_targets_min": 3663
|
|
},
|
|
{
|
|
"epoch": 0.6713286713286714,
|
|
"grad_norm": 0.7451282096768427,
|
|
"learning_rate": 3.808764940239044e-05,
|
|
"loss": 0.3613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18617768585681915,
|
|
"step": 240,
|
|
"valid_targets_mean": 4251.4,
|
|
"valid_targets_min": 3508
|
|
},
|
|
{
|
|
"epoch": 0.6853146853146853,
|
|
"grad_norm": 0.7068045251723751,
|
|
"learning_rate": 3.8884462151394427e-05,
|
|
"loss": 0.356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1815338283777237,
|
|
"step": 245,
|
|
"valid_targets_mean": 4038.9,
|
|
"valid_targets_min": 3101
|
|
},
|
|
{
|
|
"epoch": 0.6993006993006993,
|
|
"grad_norm": 0.7858813502425421,
|
|
"learning_rate": 3.968127490039841e-05,
|
|
"loss": 0.3674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19982239603996277,
|
|
"step": 250,
|
|
"valid_targets_mean": 4397.8,
|
|
"valid_targets_min": 3461
|
|
},
|
|
{
|
|
"epoch": 0.7132867132867133,
|
|
"grad_norm": 0.7176304870172697,
|
|
"learning_rate": 3.999982531784936e-05,
|
|
"loss": 0.3586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.166753888130188,
|
|
"step": 255,
|
|
"valid_targets_mean": 3866.0,
|
|
"valid_targets_min": 3096
|
|
},
|
|
{
|
|
"epoch": 0.7272727272727273,
|
|
"grad_norm": 0.6347194324198923,
|
|
"learning_rate": 3.9998757826867935e-05,
|
|
"loss": 0.3638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17435021698474884,
|
|
"step": 260,
|
|
"valid_targets_mean": 3974.8,
|
|
"valid_targets_min": 2832
|
|
},
|
|
{
|
|
"epoch": 0.7412587412587412,
|
|
"grad_norm": 0.739929776169395,
|
|
"learning_rate": 3.9996719942279066e-05,
|
|
"loss": 0.3542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16606023907661438,
|
|
"step": 265,
|
|
"valid_targets_mean": 3664.4,
|
|
"valid_targets_min": 2917
|
|
},
|
|
{
|
|
"epoch": 0.7552447552447552,
|
|
"grad_norm": 0.755272340890183,
|
|
"learning_rate": 3.999371176296642e-05,
|
|
"loss": 0.3611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17609718441963196,
|
|
"step": 270,
|
|
"valid_targets_mean": 3968.5,
|
|
"valid_targets_min": 2829
|
|
},
|
|
{
|
|
"epoch": 0.7692307692307693,
|
|
"grad_norm": 0.7497354502032663,
|
|
"learning_rate": 3.998973343489495e-05,
|
|
"loss": 0.3623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19211605191230774,
|
|
"step": 275,
|
|
"valid_targets_mean": 4043.2,
|
|
"valid_targets_min": 3490
|
|
},
|
|
{
|
|
"epoch": 0.7832167832167832,
|
|
"grad_norm": 0.7343575604706798,
|
|
"learning_rate": 3.998478515110385e-05,
|
|
"loss": 0.352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18720020353794098,
|
|
"step": 280,
|
|
"valid_targets_mean": 4071.0,
|
|
"valid_targets_min": 3224
|
|
},
|
|
{
|
|
"epoch": 0.7972027972027972,
|
|
"grad_norm": 0.68409074012476,
|
|
"learning_rate": 3.99788671516972e-05,
|
|
"loss": 0.3534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1671791970729828,
|
|
"step": 285,
|
|
"valid_targets_mean": 3782.9,
|
|
"valid_targets_min": 2959
|
|
},
|
|
{
|
|
"epoch": 0.8111888111888111,
|
|
"grad_norm": 0.7792445633536357,
|
|
"learning_rate": 3.9971979723832254e-05,
|
|
"loss": 0.3558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15965257585048676,
|
|
"step": 290,
|
|
"valid_targets_mean": 3967.0,
|
|
"valid_targets_min": 3180
|
|
},
|
|
{
|
|
"epoch": 0.8251748251748252,
|
|
"grad_norm": 0.6610159058798779,
|
|
"learning_rate": 3.9964123201705584e-05,
|
|
"loss": 0.3524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18202170729637146,
|
|
"step": 295,
|
|
"valid_targets_mean": 4038.8,
|
|
"valid_targets_min": 3244
|
|
},
|
|
{
|
|
"epoch": 0.8391608391608392,
|
|
"grad_norm": 0.6972532901113331,
|
|
"learning_rate": 3.995529796653679e-05,
|
|
"loss": 0.3553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1697453409433365,
|
|
"step": 300,
|
|
"valid_targets_mean": 3957.4,
|
|
"valid_targets_min": 3113
|
|
},
|
|
{
|
|
"epoch": 0.8531468531468531,
|
|
"grad_norm": 0.7127394676926205,
|
|
"learning_rate": 3.9945504446550074e-05,
|
|
"loss": 0.3641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1838894933462143,
|
|
"step": 305,
|
|
"valid_targets_mean": 4152.6,
|
|
"valid_targets_min": 3360
|
|
},
|
|
{
|
|
"epoch": 0.8671328671328671,
|
|
"grad_norm": 0.6808745132127945,
|
|
"learning_rate": 3.99347431169534e-05,
|
|
"loss": 0.3461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18923607468605042,
|
|
"step": 310,
|
|
"valid_targets_mean": 4593.0,
|
|
"valid_targets_min": 3545
|
|
},
|
|
{
|
|
"epoch": 0.8811188811188811,
|
|
"grad_norm": 0.687007172307545,
|
|
"learning_rate": 3.992301449991548e-05,
|
|
"loss": 0.3673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16615572571754456,
|
|
"step": 315,
|
|
"valid_targets_mean": 3800.6,
|
|
"valid_targets_min": 3001
|
|
},
|
|
{
|
|
"epoch": 0.8951048951048951,
|
|
"grad_norm": 0.6816977872307991,
|
|
"learning_rate": 3.991031916454041e-05,
|
|
"loss": 0.3366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16688376665115356,
|
|
"step": 320,
|
|
"valid_targets_mean": 3923.0,
|
|
"valid_targets_min": 2836
|
|
},
|
|
{
|
|
"epoch": 0.9090909090909091,
|
|
"grad_norm": 0.729891603562499,
|
|
"learning_rate": 3.989665772684006e-05,
|
|
"loss": 0.3495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19064228236675262,
|
|
"step": 325,
|
|
"valid_targets_mean": 4230.6,
|
|
"valid_targets_min": 3136
|
|
},
|
|
{
|
|
"epoch": 0.9230769230769231,
|
|
"grad_norm": 0.6676103880290066,
|
|
"learning_rate": 3.988203084970418e-05,
|
|
"loss": 0.3434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18094299733638763,
|
|
"step": 330,
|
|
"valid_targets_mean": 3959.9,
|
|
"valid_targets_min": 2931
|
|
},
|
|
{
|
|
"epoch": 0.9370629370629371,
|
|
"grad_norm": 0.7376874440458068,
|
|
"learning_rate": 3.9866439242868275e-05,
|
|
"loss": 0.3442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17640212178230286,
|
|
"step": 335,
|
|
"valid_targets_mean": 4032.8,
|
|
"valid_targets_min": 3145
|
|
},
|
|
{
|
|
"epoch": 0.951048951048951,
|
|
"grad_norm": 0.66910614530988,
|
|
"learning_rate": 3.98498836628791e-05,
|
|
"loss": 0.3365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1491106152534485,
|
|
"step": 340,
|
|
"valid_targets_mean": 4081.6,
|
|
"valid_targets_min": 3248
|
|
},
|
|
{
|
|
"epoch": 0.965034965034965,
|
|
"grad_norm": 0.7616870298472965,
|
|
"learning_rate": 3.983236491305801e-05,
|
|
"loss": 0.3389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17968814074993134,
|
|
"step": 345,
|
|
"valid_targets_mean": 4023.8,
|
|
"valid_targets_min": 2892
|
|
},
|
|
{
|
|
"epoch": 0.9790209790209791,
|
|
"grad_norm": 0.632714729243677,
|
|
"learning_rate": 3.981388384346193e-05,
|
|
"loss": 0.3478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1566416621208191,
|
|
"step": 350,
|
|
"valid_targets_mean": 3693.1,
|
|
"valid_targets_min": 2343
|
|
},
|
|
{
|
|
"epoch": 0.993006993006993,
|
|
"grad_norm": 0.6609906108291348,
|
|
"learning_rate": 3.979444135084215e-05,
|
|
"loss": 0.3346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15235170722007751,
|
|
"step": 355,
|
|
"valid_targets_mean": 3772.6,
|
|
"valid_targets_min": 2905
|
|
},
|
|
{
|
|
"epoch": 1.0055944055944055,
|
|
"grad_norm": 0.6785452885452187,
|
|
"learning_rate": 3.9774038378600796e-05,
|
|
"loss": 0.3392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16667693853378296,
|
|
"step": 360,
|
|
"valid_targets_mean": 3899.9,
|
|
"valid_targets_min": 3402
|
|
},
|
|
{
|
|
"epoch": 1.0195804195804197,
|
|
"grad_norm": 0.7216286836712269,
|
|
"learning_rate": 3.975267591674504e-05,
|
|
"loss": 0.3389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16874702274799347,
|
|
"step": 365,
|
|
"valid_targets_mean": 3837.2,
|
|
"valid_targets_min": 2694
|
|
},
|
|
{
|
|
"epoch": 1.0335664335664336,
|
|
"grad_norm": 0.7538286002527794,
|
|
"learning_rate": 3.973035500183909e-05,
|
|
"loss": 0.3425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1855105757713318,
|
|
"step": 370,
|
|
"valid_targets_mean": 4314.4,
|
|
"valid_targets_min": 3164
|
|
},
|
|
{
|
|
"epoch": 1.0475524475524476,
|
|
"grad_norm": 0.6855638013823442,
|
|
"learning_rate": 3.9707076716953866e-05,
|
|
"loss": 0.3303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17090822756290436,
|
|
"step": 375,
|
|
"valid_targets_mean": 3918.8,
|
|
"valid_targets_min": 3060
|
|
},
|
|
{
|
|
"epoch": 1.0615384615384615,
|
|
"grad_norm": 0.6406278133349705,
|
|
"learning_rate": 3.9682842191614466e-05,
|
|
"loss": 0.3413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18505993485450745,
|
|
"step": 380,
|
|
"valid_targets_mean": 4170.4,
|
|
"valid_targets_min": 2313
|
|
},
|
|
{
|
|
"epoch": 1.0755244755244755,
|
|
"grad_norm": 0.624336684500911,
|
|
"learning_rate": 3.965765260174534e-05,
|
|
"loss": 0.3344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14859825372695923,
|
|
"step": 385,
|
|
"valid_targets_mean": 3817.0,
|
|
"valid_targets_min": 2413
|
|
},
|
|
{
|
|
"epoch": 1.0895104895104895,
|
|
"grad_norm": 0.6815493498131907,
|
|
"learning_rate": 3.9631509169613265e-05,
|
|
"loss": 0.3403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1760796457529068,
|
|
"step": 390,
|
|
"valid_targets_mean": 4037.1,
|
|
"valid_targets_min": 3431
|
|
},
|
|
{
|
|
"epoch": 1.1034965034965034,
|
|
"grad_norm": 0.6427970727384303,
|
|
"learning_rate": 3.9604413163767985e-05,
|
|
"loss": 0.3338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14617708325386047,
|
|
"step": 395,
|
|
"valid_targets_mean": 3709.0,
|
|
"valid_targets_min": 2929
|
|
},
|
|
{
|
|
"epoch": 1.1174825174825176,
|
|
"grad_norm": 0.6901088697837418,
|
|
"learning_rate": 3.957636589898072e-05,
|
|
"loss": 0.3249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1801576465368271,
|
|
"step": 400,
|
|
"valid_targets_mean": 4472.5,
|
|
"valid_targets_min": 3444
|
|
},
|
|
{
|
|
"epoch": 1.1314685314685315,
|
|
"grad_norm": 0.7016517054603091,
|
|
"learning_rate": 3.95473687361803e-05,
|
|
"loss": 0.327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16067080199718475,
|
|
"step": 405,
|
|
"valid_targets_mean": 3830.8,
|
|
"valid_targets_min": 3285
|
|
},
|
|
{
|
|
"epoch": 1.1454545454545455,
|
|
"grad_norm": 0.663970593232612,
|
|
"learning_rate": 3.951742308238719e-05,
|
|
"loss": 0.3364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1717248409986496,
|
|
"step": 410,
|
|
"valid_targets_mean": 4072.2,
|
|
"valid_targets_min": 3187
|
|
},
|
|
{
|
|
"epoch": 1.1594405594405595,
|
|
"grad_norm": 0.6484212894219356,
|
|
"learning_rate": 3.948653039064519e-05,
|
|
"loss": 0.3255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1626005917787552,
|
|
"step": 415,
|
|
"valid_targets_mean": 3676.9,
|
|
"valid_targets_min": 3259
|
|
},
|
|
{
|
|
"epoch": 1.1734265734265734,
|
|
"grad_norm": 0.6922369837367284,
|
|
"learning_rate": 3.9454692159950935e-05,
|
|
"loss": 0.3275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16085588932037354,
|
|
"step": 420,
|
|
"valid_targets_mean": 4151.0,
|
|
"valid_targets_min": 3364
|
|
},
|
|
{
|
|
"epoch": 1.1874125874125874,
|
|
"grad_norm": 0.6510823882497541,
|
|
"learning_rate": 3.9421909935181146e-05,
|
|
"loss": 0.3274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19022603332996368,
|
|
"step": 425,
|
|
"valid_targets_mean": 4349.9,
|
|
"valid_targets_min": 3851
|
|
},
|
|
{
|
|
"epoch": 1.2013986013986013,
|
|
"grad_norm": 0.6443695433649274,
|
|
"learning_rate": 3.938818530701768e-05,
|
|
"loss": 0.3322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17442061007022858,
|
|
"step": 430,
|
|
"valid_targets_mean": 4019.0,
|
|
"valid_targets_min": 3361
|
|
},
|
|
{
|
|
"epoch": 1.2153846153846155,
|
|
"grad_norm": 0.6810759757069667,
|
|
"learning_rate": 3.935351991187035e-05,
|
|
"loss": 0.3253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16698354482650757,
|
|
"step": 435,
|
|
"valid_targets_mean": 4215.0,
|
|
"valid_targets_min": 3192
|
|
},
|
|
{
|
|
"epoch": 1.2293706293706295,
|
|
"grad_norm": 0.6464880936746293,
|
|
"learning_rate": 3.9317915431797535e-05,
|
|
"loss": 0.3276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1752656102180481,
|
|
"step": 440,
|
|
"valid_targets_mean": 4225.0,
|
|
"valid_targets_min": 2949
|
|
},
|
|
{
|
|
"epoch": 1.2433566433566434,
|
|
"grad_norm": 0.7740804090329195,
|
|
"learning_rate": 3.928137359442452e-05,
|
|
"loss": 0.3276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1801951825618744,
|
|
"step": 445,
|
|
"valid_targets_mean": 4177.0,
|
|
"valid_targets_min": 2728
|
|
},
|
|
{
|
|
"epoch": 1.2573426573426574,
|
|
"grad_norm": 0.6433269129380625,
|
|
"learning_rate": 3.924389617285969e-05,
|
|
"loss": 0.3328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14021332561969757,
|
|
"step": 450,
|
|
"valid_targets_mean": 3671.8,
|
|
"valid_targets_min": 2837
|
|
},
|
|
{
|
|
"epoch": 1.2713286713286713,
|
|
"grad_norm": 0.6724022246014172,
|
|
"learning_rate": 3.920548498560852e-05,
|
|
"loss": 0.3278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1560121476650238,
|
|
"step": 455,
|
|
"valid_targets_mean": 3616.0,
|
|
"valid_targets_min": 2346
|
|
},
|
|
{
|
|
"epoch": 1.2853146853146853,
|
|
"grad_norm": 0.7049576278984316,
|
|
"learning_rate": 3.9166141896485295e-05,
|
|
"loss": 0.3256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16156238317489624,
|
|
"step": 460,
|
|
"valid_targets_mean": 4018.1,
|
|
"valid_targets_min": 3278
|
|
},
|
|
{
|
|
"epoch": 1.2993006993006992,
|
|
"grad_norm": 0.6610441492885364,
|
|
"learning_rate": 3.912586881452268e-05,
|
|
"loss": 0.319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16865435242652893,
|
|
"step": 465,
|
|
"valid_targets_mean": 4217.9,
|
|
"valid_targets_min": 3455
|
|
},
|
|
{
|
|
"epoch": 1.3132867132867134,
|
|
"grad_norm": 0.6857457459046699,
|
|
"learning_rate": 3.9084667693879116e-05,
|
|
"loss": 0.3269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1624636948108673,
|
|
"step": 470,
|
|
"valid_targets_mean": 4248.5,
|
|
"valid_targets_min": 3029
|
|
},
|
|
{
|
|
"epoch": 1.3272727272727272,
|
|
"grad_norm": 0.6748828329687094,
|
|
"learning_rate": 3.904254053374398e-05,
|
|
"loss": 0.3263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1654050052165985,
|
|
"step": 475,
|
|
"valid_targets_mean": 4087.2,
|
|
"valid_targets_min": 3125
|
|
},
|
|
{
|
|
"epoch": 1.3412587412587413,
|
|
"grad_norm": 0.6130205692601665,
|
|
"learning_rate": 3.899948937824058e-05,
|
|
"loss": 0.3255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1577146351337433,
|
|
"step": 480,
|
|
"valid_targets_mean": 4006.1,
|
|
"valid_targets_min": 3390
|
|
},
|
|
{
|
|
"epoch": 1.3552447552447553,
|
|
"grad_norm": 0.6849941983340869,
|
|
"learning_rate": 3.895551631632694e-05,
|
|
"loss": 0.3298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17559069395065308,
|
|
"step": 485,
|
|
"valid_targets_mean": 4253.0,
|
|
"valid_targets_min": 3799
|
|
},
|
|
{
|
|
"epoch": 1.3692307692307693,
|
|
"grad_norm": 0.6672737622478275,
|
|
"learning_rate": 3.8910623481694514e-05,
|
|
"loss": 0.3334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15144488215446472,
|
|
"step": 490,
|
|
"valid_targets_mean": 3746.4,
|
|
"valid_targets_min": 2415
|
|
},
|
|
{
|
|
"epoch": 1.3832167832167832,
|
|
"grad_norm": 0.6386020019411028,
|
|
"learning_rate": 3.886481305266456e-05,
|
|
"loss": 0.3249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16336670517921448,
|
|
"step": 495,
|
|
"valid_targets_mean": 4049.5,
|
|
"valid_targets_min": 3218
|
|
},
|
|
{
|
|
"epoch": 1.3972027972027972,
|
|
"grad_norm": 0.6442787344362436,
|
|
"learning_rate": 3.881808725208253e-05,
|
|
"loss": 0.33,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17428100109100342,
|
|
"step": 500,
|
|
"valid_targets_mean": 4064.1,
|
|
"valid_targets_min": 3023
|
|
},
|
|
{
|
|
"epoch": 1.4111888111888111,
|
|
"grad_norm": 0.6125443310145898,
|
|
"learning_rate": 3.8770448347210144e-05,
|
|
"loss": 0.3352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1777600347995758,
|
|
"step": 505,
|
|
"valid_targets_mean": 4344.0,
|
|
"valid_targets_min": 3361
|
|
},
|
|
{
|
|
"epoch": 1.425174825174825,
|
|
"grad_norm": 0.6329017642332787,
|
|
"learning_rate": 3.87218986496154e-05,
|
|
"loss": 0.3196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18501895666122437,
|
|
"step": 510,
|
|
"valid_targets_mean": 4310.1,
|
|
"valid_targets_min": 3364
|
|
},
|
|
{
|
|
"epoch": 1.4391608391608393,
|
|
"grad_norm": 0.666727217355154,
|
|
"learning_rate": 3.867244051506042e-05,
|
|
"loss": 0.3244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1838594377040863,
|
|
"step": 515,
|
|
"valid_targets_mean": 4075.8,
|
|
"valid_targets_min": 2931
|
|
},
|
|
{
|
|
"epoch": 1.4531468531468532,
|
|
"grad_norm": 0.6441646180577535,
|
|
"learning_rate": 3.862207634338715e-05,
|
|
"loss": 0.3285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15423941612243652,
|
|
"step": 520,
|
|
"valid_targets_mean": 3791.9,
|
|
"valid_targets_min": 3202
|
|
},
|
|
{
|
|
"epoch": 1.4671328671328672,
|
|
"grad_norm": 0.6668894524119863,
|
|
"learning_rate": 3.857080857840087e-05,
|
|
"loss": 0.3234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1552126258611679,
|
|
"step": 525,
|
|
"valid_targets_mean": 3679.8,
|
|
"valid_targets_min": 2938
|
|
},
|
|
{
|
|
"epoch": 1.4811188811188811,
|
|
"grad_norm": 0.6590831356215007,
|
|
"learning_rate": 3.851863970775166e-05,
|
|
"loss": 0.3267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16087287664413452,
|
|
"step": 530,
|
|
"valid_targets_mean": 3947.5,
|
|
"valid_targets_min": 2770
|
|
},
|
|
{
|
|
"epoch": 1.495104895104895,
|
|
"grad_norm": 0.6287181845606609,
|
|
"learning_rate": 3.846557226281367e-05,
|
|
"loss": 0.3293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15328553318977356,
|
|
"step": 535,
|
|
"valid_targets_mean": 3824.4,
|
|
"valid_targets_min": 2878
|
|
},
|
|
{
|
|
"epoch": 1.509090909090909,
|
|
"grad_norm": 0.633987975154039,
|
|
"learning_rate": 3.84116088185623e-05,
|
|
"loss": 0.3271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17237834632396698,
|
|
"step": 540,
|
|
"valid_targets_mean": 4145.6,
|
|
"valid_targets_min": 3694
|
|
},
|
|
{
|
|
"epoch": 1.523076923076923,
|
|
"grad_norm": 0.616935636790922,
|
|
"learning_rate": 3.835675199344923e-05,
|
|
"loss": 0.3202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17144149541854858,
|
|
"step": 545,
|
|
"valid_targets_mean": 4156.9,
|
|
"valid_targets_min": 2985
|
|
},
|
|
{
|
|
"epoch": 1.5370629370629372,
|
|
"grad_norm": 0.6334911296670876,
|
|
"learning_rate": 3.830100444927542e-05,
|
|
"loss": 0.3184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15144981443881989,
|
|
"step": 550,
|
|
"valid_targets_mean": 3769.9,
|
|
"valid_targets_min": 3100
|
|
},
|
|
{
|
|
"epoch": 1.551048951048951,
|
|
"grad_norm": 0.588231502791246,
|
|
"learning_rate": 3.8244368891061884e-05,
|
|
"loss": 0.3118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16937857866287231,
|
|
"step": 555,
|
|
"valid_targets_mean": 4288.6,
|
|
"valid_targets_min": 3352
|
|
},
|
|
{
|
|
"epoch": 1.565034965034965,
|
|
"grad_norm": 0.5853188773467621,
|
|
"learning_rate": 3.81868480669185e-05,
|
|
"loss": 0.3229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14558354020118713,
|
|
"step": 560,
|
|
"valid_targets_mean": 4037.2,
|
|
"valid_targets_min": 3164
|
|
},
|
|
{
|
|
"epoch": 1.579020979020979,
|
|
"grad_norm": 0.6125020767553089,
|
|
"learning_rate": 3.812844476791061e-05,
|
|
"loss": 0.3255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17109696567058563,
|
|
"step": 565,
|
|
"valid_targets_mean": 4062.5,
|
|
"valid_targets_min": 3222
|
|
},
|
|
{
|
|
"epoch": 1.593006993006993,
|
|
"grad_norm": 0.6151869304117249,
|
|
"learning_rate": 3.8069161827923624e-05,
|
|
"loss": 0.3258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17402294278144836,
|
|
"step": 570,
|
|
"valid_targets_mean": 4408.9,
|
|
"valid_targets_min": 3318
|
|
},
|
|
{
|
|
"epoch": 1.606993006993007,
|
|
"grad_norm": 0.6266101504112581,
|
|
"learning_rate": 3.80090021235255e-05,
|
|
"loss": 0.3301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17710405588150024,
|
|
"step": 575,
|
|
"valid_targets_mean": 3975.9,
|
|
"valid_targets_min": 2527
|
|
},
|
|
{
|
|
"epoch": 1.620979020979021,
|
|
"grad_norm": 0.6836774754436181,
|
|
"learning_rate": 3.794796857382717e-05,
|
|
"loss": 0.3256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17877398431301117,
|
|
"step": 580,
|
|
"valid_targets_mean": 4186.1,
|
|
"valid_targets_min": 3490
|
|
},
|
|
{
|
|
"epoch": 1.634965034965035,
|
|
"grad_norm": 0.6161301970228785,
|
|
"learning_rate": 3.7886064140340896e-05,
|
|
"loss": 0.3277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16287347674369812,
|
|
"step": 585,
|
|
"valid_targets_mean": 3867.0,
|
|
"valid_targets_min": 2643
|
|
},
|
|
{
|
|
"epoch": 1.6489510489510488,
|
|
"grad_norm": 0.6460775180383357,
|
|
"learning_rate": 3.782329182683657e-05,
|
|
"loss": 0.3154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15202784538269043,
|
|
"step": 590,
|
|
"valid_targets_mean": 3923.5,
|
|
"valid_targets_min": 3167
|
|
},
|
|
{
|
|
"epoch": 1.662937062937063,
|
|
"grad_norm": 0.5995808420248436,
|
|
"learning_rate": 3.775965467919594e-05,
|
|
"loss": 0.3219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18219563364982605,
|
|
"step": 595,
|
|
"valid_targets_mean": 3959.5,
|
|
"valid_targets_min": 3053
|
|
},
|
|
{
|
|
"epoch": 1.676923076923077,
|
|
"grad_norm": 0.6386284637394193,
|
|
"learning_rate": 3.769515578526486e-05,
|
|
"loss": 0.3237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15771865844726562,
|
|
"step": 600,
|
|
"valid_targets_mean": 3612.4,
|
|
"valid_targets_min": 992
|
|
},
|
|
{
|
|
"epoch": 1.690909090909091,
|
|
"grad_norm": 0.6279213610200182,
|
|
"learning_rate": 3.762979827470343e-05,
|
|
"loss": 0.3209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14989188313484192,
|
|
"step": 605,
|
|
"valid_targets_mean": 3997.4,
|
|
"valid_targets_min": 2683
|
|
},
|
|
{
|
|
"epoch": 1.7048951048951049,
|
|
"grad_norm": 0.6277447589292829,
|
|
"learning_rate": 3.756358531883413e-05,
|
|
"loss": 0.3158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1424318104982376,
|
|
"step": 610,
|
|
"valid_targets_mean": 3939.2,
|
|
"valid_targets_min": 3266
|
|
},
|
|
{
|
|
"epoch": 1.7188811188811188,
|
|
"grad_norm": 0.6035263117336757,
|
|
"learning_rate": 3.749652013048797e-05,
|
|
"loss": 0.3248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16539938747882843,
|
|
"step": 615,
|
|
"valid_targets_mean": 4345.2,
|
|
"valid_targets_min": 3593
|
|
},
|
|
{
|
|
"epoch": 1.732867132867133,
|
|
"grad_norm": 0.622614038645928,
|
|
"learning_rate": 3.742860596384856e-05,
|
|
"loss": 0.3259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15031281113624573,
|
|
"step": 620,
|
|
"valid_targets_mean": 4044.1,
|
|
"valid_targets_min": 3318
|
|
},
|
|
{
|
|
"epoch": 1.7468531468531467,
|
|
"grad_norm": 0.639965638141473,
|
|
"learning_rate": 3.735984611429423e-05,
|
|
"loss": 0.3144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13543468713760376,
|
|
"step": 625,
|
|
"valid_targets_mean": 3618.5,
|
|
"valid_targets_min": 3066
|
|
},
|
|
{
|
|
"epoch": 1.760839160839161,
|
|
"grad_norm": 0.6730183016192768,
|
|
"learning_rate": 3.7290243918238117e-05,
|
|
"loss": 0.3227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15004126727581024,
|
|
"step": 630,
|
|
"valid_targets_mean": 3738.6,
|
|
"valid_targets_min": 2904
|
|
},
|
|
{
|
|
"epoch": 1.7748251748251749,
|
|
"grad_norm": 0.6099293222360501,
|
|
"learning_rate": 3.72198027529663e-05,
|
|
"loss": 0.3298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1613060086965561,
|
|
"step": 635,
|
|
"valid_targets_mean": 4154.8,
|
|
"valid_targets_min": 3562
|
|
},
|
|
{
|
|
"epoch": 1.7888111888111888,
|
|
"grad_norm": 0.6824934179672323,
|
|
"learning_rate": 3.714852603647387e-05,
|
|
"loss": 0.3252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16229523718357086,
|
|
"step": 640,
|
|
"valid_targets_mean": 4170.4,
|
|
"valid_targets_min": 2457
|
|
},
|
|
{
|
|
"epoch": 1.8027972027972028,
|
|
"grad_norm": 0.5866660922114889,
|
|
"learning_rate": 3.707641722729915e-05,
|
|
"loss": 0.3218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16064469516277313,
|
|
"step": 645,
|
|
"valid_targets_mean": 4250.9,
|
|
"valid_targets_min": 2766
|
|
},
|
|
{
|
|
"epoch": 1.8167832167832167,
|
|
"grad_norm": 0.6021573321793868,
|
|
"learning_rate": 3.700347982435583e-05,
|
|
"loss": 0.3166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16888031363487244,
|
|
"step": 650,
|
|
"valid_targets_mean": 3838.9,
|
|
"valid_targets_min": 2576
|
|
},
|
|
{
|
|
"epoch": 1.830769230769231,
|
|
"grad_norm": 0.6215416842905932,
|
|
"learning_rate": 3.6929717366763186e-05,
|
|
"loss": 0.3233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16497664153575897,
|
|
"step": 655,
|
|
"valid_targets_mean": 3762.6,
|
|
"valid_targets_min": 2730
|
|
},
|
|
{
|
|
"epoch": 1.8447552447552447,
|
|
"grad_norm": 0.6395491001752005,
|
|
"learning_rate": 3.685513343367438e-05,
|
|
"loss": 0.3106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16028185188770294,
|
|
"step": 660,
|
|
"valid_targets_mean": 4035.4,
|
|
"valid_targets_min": 3159
|
|
},
|
|
{
|
|
"epoch": 1.8587412587412588,
|
|
"grad_norm": 0.6160971784063584,
|
|
"learning_rate": 3.677973164410278e-05,
|
|
"loss": 0.3192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16289816796779633,
|
|
"step": 665,
|
|
"valid_targets_mean": 3981.8,
|
|
"valid_targets_min": 2859
|
|
},
|
|
{
|
|
"epoch": 1.8727272727272726,
|
|
"grad_norm": 0.5826891645612171,
|
|
"learning_rate": 3.6703515656746365e-05,
|
|
"loss": 0.3164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15475299954414368,
|
|
"step": 670,
|
|
"valid_targets_mean": 4015.0,
|
|
"valid_targets_min": 3141
|
|
},
|
|
{
|
|
"epoch": 1.8867132867132868,
|
|
"grad_norm": 0.5808959508873444,
|
|
"learning_rate": 3.662648916981015e-05,
|
|
"loss": 0.3156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1405600607395172,
|
|
"step": 675,
|
|
"valid_targets_mean": 3794.0,
|
|
"valid_targets_min": 2621
|
|
},
|
|
{
|
|
"epoch": 1.9006993006993007,
|
|
"grad_norm": 0.6044157534723815,
|
|
"learning_rate": 3.654865592082681e-05,
|
|
"loss": 0.3198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15282100439071655,
|
|
"step": 680,
|
|
"valid_targets_mean": 4143.5,
|
|
"valid_targets_min": 2980
|
|
},
|
|
{
|
|
"epoch": 1.9146853146853147,
|
|
"grad_norm": 0.6392815281243518,
|
|
"learning_rate": 3.647001968647527e-05,
|
|
"loss": 0.3144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14917393028736115,
|
|
"step": 685,
|
|
"valid_targets_mean": 3979.6,
|
|
"valid_targets_min": 2717
|
|
},
|
|
{
|
|
"epoch": 1.9286713286713286,
|
|
"grad_norm": 0.6286155160321347,
|
|
"learning_rate": 3.6390584282397464e-05,
|
|
"loss": 0.3216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17181769013404846,
|
|
"step": 690,
|
|
"valid_targets_mean": 4348.1,
|
|
"valid_targets_min": 3396
|
|
},
|
|
{
|
|
"epoch": 1.9426573426573426,
|
|
"grad_norm": 0.6298464119641867,
|
|
"learning_rate": 3.631035356301321e-05,
|
|
"loss": 0.3267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.153742253780365,
|
|
"step": 695,
|
|
"valid_targets_mean": 3986.4,
|
|
"valid_targets_min": 2569
|
|
},
|
|
{
|
|
"epoch": 1.9566433566433568,
|
|
"grad_norm": 0.6339318759105212,
|
|
"learning_rate": 3.6229331421333155e-05,
|
|
"loss": 0.317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14185550808906555,
|
|
"step": 700,
|
|
"valid_targets_mean": 3847.1,
|
|
"valid_targets_min": 2589
|
|
},
|
|
{
|
|
"epoch": 1.9706293706293705,
|
|
"grad_norm": 0.6343178715607247,
|
|
"learning_rate": 3.6147521788769884e-05,
|
|
"loss": 0.3146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13274836540222168,
|
|
"step": 705,
|
|
"valid_targets_mean": 3321.9,
|
|
"valid_targets_min": 2781
|
|
},
|
|
{
|
|
"epoch": 1.9846153846153847,
|
|
"grad_norm": 0.6110252636645802,
|
|
"learning_rate": 3.606492863494718e-05,
|
|
"loss": 0.3136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15173494815826416,
|
|
"step": 710,
|
|
"valid_targets_mean": 3732.9,
|
|
"valid_targets_min": 3067
|
|
},
|
|
{
|
|
"epoch": 1.9986013986013986,
|
|
"grad_norm": 0.5812066041833913,
|
|
"learning_rate": 3.598155596750736e-05,
|
|
"loss": 0.3158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16258171200752258,
|
|
"step": 715,
|
|
"valid_targets_mean": 3896.4,
|
|
"valid_targets_min": 2959
|
|
},
|
|
{
|
|
"epoch": 2.011188811188811,
|
|
"grad_norm": 0.5743614871834676,
|
|
"learning_rate": 3.589740783191688e-05,
|
|
"loss": 0.3132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14478623867034912,
|
|
"step": 720,
|
|
"valid_targets_mean": 3748.2,
|
|
"valid_targets_min": 2964
|
|
},
|
|
{
|
|
"epoch": 2.025174825174825,
|
|
"grad_norm": 0.595189094441731,
|
|
"learning_rate": 3.581248831126996e-05,
|
|
"loss": 0.3026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16492405533790588,
|
|
"step": 725,
|
|
"valid_targets_mean": 4154.8,
|
|
"valid_targets_min": 3265
|
|
},
|
|
{
|
|
"epoch": 2.0391608391608393,
|
|
"grad_norm": 0.6172798210211731,
|
|
"learning_rate": 3.572680152609053e-05,
|
|
"loss": 0.3088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1609179973602295,
|
|
"step": 730,
|
|
"valid_targets_mean": 4198.1,
|
|
"valid_targets_min": 3168
|
|
},
|
|
{
|
|
"epoch": 2.053146853146853,
|
|
"grad_norm": 0.5691716424074825,
|
|
"learning_rate": 3.564035163413225e-05,
|
|
"loss": 0.3129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16421687602996826,
|
|
"step": 735,
|
|
"valid_targets_mean": 4140.0,
|
|
"valid_targets_min": 3504
|
|
},
|
|
{
|
|
"epoch": 2.0671328671328673,
|
|
"grad_norm": 0.5937600015154407,
|
|
"learning_rate": 3.555314283017677e-05,
|
|
"loss": 0.3041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15526092052459717,
|
|
"step": 740,
|
|
"valid_targets_mean": 4100.5,
|
|
"valid_targets_min": 3536
|
|
},
|
|
{
|
|
"epoch": 2.081118881118881,
|
|
"grad_norm": 0.6223941210676391,
|
|
"learning_rate": 3.546517934583021e-05,
|
|
"loss": 0.3097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16442987322807312,
|
|
"step": 745,
|
|
"valid_targets_mean": 4481.6,
|
|
"valid_targets_min": 3849
|
|
},
|
|
{
|
|
"epoch": 2.095104895104895,
|
|
"grad_norm": 0.6087190275417665,
|
|
"learning_rate": 3.5376465449317816e-05,
|
|
"loss": 0.3057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13107624650001526,
|
|
"step": 750,
|
|
"valid_targets_mean": 3853.0,
|
|
"valid_targets_min": 3042
|
|
},
|
|
{
|
|
"epoch": 2.109090909090909,
|
|
"grad_norm": 0.588449529918364,
|
|
"learning_rate": 3.5287005445276835e-05,
|
|
"loss": 0.2998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13993586599826813,
|
|
"step": 755,
|
|
"valid_targets_mean": 3786.8,
|
|
"valid_targets_min": 3101
|
|
},
|
|
{
|
|
"epoch": 2.123076923076923,
|
|
"grad_norm": 0.589351215775367,
|
|
"learning_rate": 3.5196803674547674e-05,
|
|
"loss": 0.2969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14328411221504211,
|
|
"step": 760,
|
|
"valid_targets_mean": 4038.6,
|
|
"valid_targets_min": 2740
|
|
},
|
|
{
|
|
"epoch": 2.1370629370629373,
|
|
"grad_norm": 0.8156144735369695,
|
|
"learning_rate": 3.510586451396326e-05,
|
|
"loss": 0.3083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1407259702682495,
|
|
"step": 765,
|
|
"valid_targets_mean": 3942.2,
|
|
"valid_targets_min": 3268
|
|
},
|
|
{
|
|
"epoch": 2.151048951048951,
|
|
"grad_norm": 0.6002072237961282,
|
|
"learning_rate": 3.5014192376136655e-05,
|
|
"loss": 0.3125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14943557977676392,
|
|
"step": 770,
|
|
"valid_targets_mean": 4074.9,
|
|
"valid_targets_min": 3462
|
|
},
|
|
{
|
|
"epoch": 2.165034965034965,
|
|
"grad_norm": 0.635268094736542,
|
|
"learning_rate": 3.492179170924696e-05,
|
|
"loss": 0.3028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16214363276958466,
|
|
"step": 775,
|
|
"valid_targets_mean": 4062.0,
|
|
"valid_targets_min": 3218
|
|
},
|
|
{
|
|
"epoch": 2.179020979020979,
|
|
"grad_norm": 0.5881271243993743,
|
|
"learning_rate": 3.482866699682347e-05,
|
|
"loss": 0.3072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15315216779708862,
|
|
"step": 780,
|
|
"valid_targets_mean": 3918.9,
|
|
"valid_targets_min": 2894
|
|
},
|
|
{
|
|
"epoch": 2.193006993006993,
|
|
"grad_norm": 0.6422135086829801,
|
|
"learning_rate": 3.47348227575281e-05,
|
|
"loss": 0.3084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1634099781513214,
|
|
"step": 785,
|
|
"valid_targets_mean": 4611.0,
|
|
"valid_targets_min": 3717
|
|
},
|
|
{
|
|
"epoch": 2.206993006993007,
|
|
"grad_norm": 0.5906081895645069,
|
|
"learning_rate": 3.464026354493617e-05,
|
|
"loss": 0.3042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14315278828144073,
|
|
"step": 790,
|
|
"valid_targets_mean": 3942.5,
|
|
"valid_targets_min": 3405
|
|
},
|
|
{
|
|
"epoch": 2.220979020979021,
|
|
"grad_norm": 0.7105849807417328,
|
|
"learning_rate": 3.454499394731543e-05,
|
|
"loss": 0.3008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13363346457481384,
|
|
"step": 795,
|
|
"valid_targets_mean": 3611.0,
|
|
"valid_targets_min": 2723
|
|
},
|
|
{
|
|
"epoch": 2.234965034965035,
|
|
"grad_norm": 0.6098423245269216,
|
|
"learning_rate": 3.4449018587403414e-05,
|
|
"loss": 0.3169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14147688448429108,
|
|
"step": 800,
|
|
"valid_targets_mean": 3944.2,
|
|
"valid_targets_min": 2635
|
|
},
|
|
{
|
|
"epoch": 2.248951048951049,
|
|
"grad_norm": 0.5863096258549487,
|
|
"learning_rate": 3.435234212218313e-05,
|
|
"loss": 0.301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13860483467578888,
|
|
"step": 805,
|
|
"valid_targets_mean": 3520.5,
|
|
"valid_targets_min": 2908
|
|
},
|
|
{
|
|
"epoch": 2.262937062937063,
|
|
"grad_norm": 0.598037261266258,
|
|
"learning_rate": 3.425496924265714e-05,
|
|
"loss": 0.3069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15808618068695068,
|
|
"step": 810,
|
|
"valid_targets_mean": 4074.6,
|
|
"valid_targets_min": 3061
|
|
},
|
|
{
|
|
"epoch": 2.276923076923077,
|
|
"grad_norm": 0.6073448842298433,
|
|
"learning_rate": 3.415690467361989e-05,
|
|
"loss": 0.3061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16971629858016968,
|
|
"step": 815,
|
|
"valid_targets_mean": 4395.5,
|
|
"valid_targets_min": 3602
|
|
},
|
|
{
|
|
"epoch": 2.290909090909091,
|
|
"grad_norm": 0.6548317072525282,
|
|
"learning_rate": 3.405815317342844e-05,
|
|
"loss": 0.3064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.164471834897995,
|
|
"step": 820,
|
|
"valid_targets_mean": 4100.1,
|
|
"valid_targets_min": 3447
|
|
},
|
|
{
|
|
"epoch": 2.3048951048951047,
|
|
"grad_norm": 0.618089201274623,
|
|
"learning_rate": 3.395871953377164e-05,
|
|
"loss": 0.2918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1507072150707245,
|
|
"step": 825,
|
|
"valid_targets_mean": 4090.2,
|
|
"valid_targets_min": 2831
|
|
},
|
|
{
|
|
"epoch": 2.318881118881119,
|
|
"grad_norm": 0.6031388487536505,
|
|
"learning_rate": 3.3858608579437556e-05,
|
|
"loss": 0.3025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13351577520370483,
|
|
"step": 830,
|
|
"valid_targets_mean": 3678.9,
|
|
"valid_targets_min": 2680
|
|
},
|
|
{
|
|
"epoch": 2.3328671328671327,
|
|
"grad_norm": 0.6045729601671705,
|
|
"learning_rate": 3.3757825168079396e-05,
|
|
"loss": 0.3034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15121564269065857,
|
|
"step": 835,
|
|
"valid_targets_mean": 3937.5,
|
|
"valid_targets_min": 2920
|
|
},
|
|
{
|
|
"epoch": 2.346853146853147,
|
|
"grad_norm": 0.5731552691739368,
|
|
"learning_rate": 3.365637418997981e-05,
|
|
"loss": 0.3054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1540403664112091,
|
|
"step": 840,
|
|
"valid_targets_mean": 4148.5,
|
|
"valid_targets_min": 3554
|
|
},
|
|
{
|
|
"epoch": 2.360839160839161,
|
|
"grad_norm": 0.6292676398227383,
|
|
"learning_rate": 3.3554260567813546e-05,
|
|
"loss": 0.2999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1413697451353073,
|
|
"step": 845,
|
|
"valid_targets_mean": 3904.2,
|
|
"valid_targets_min": 3167
|
|
},
|
|
{
|
|
"epoch": 2.3748251748251747,
|
|
"grad_norm": 0.5972678726345569,
|
|
"learning_rate": 3.3451489256408664e-05,
|
|
"loss": 0.3011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13888861238956451,
|
|
"step": 850,
|
|
"valid_targets_mean": 3959.9,
|
|
"valid_targets_min": 2950
|
|
},
|
|
{
|
|
"epoch": 2.388811188811189,
|
|
"grad_norm": 0.6192844037159877,
|
|
"learning_rate": 3.3348065242506066e-05,
|
|
"loss": 0.3045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15562085807323456,
|
|
"step": 855,
|
|
"valid_targets_mean": 3538.9,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 2.4027972027972027,
|
|
"grad_norm": 0.595293999532859,
|
|
"learning_rate": 3.3243993544517525e-05,
|
|
"loss": 0.3014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1708168387413025,
|
|
"step": 860,
|
|
"valid_targets_mean": 4071.6,
|
|
"valid_targets_min": 3415
|
|
},
|
|
{
|
|
"epoch": 2.416783216783217,
|
|
"grad_norm": 0.5779936717582573,
|
|
"learning_rate": 3.313927921228221e-05,
|
|
"loss": 0.304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15482652187347412,
|
|
"step": 865,
|
|
"valid_targets_mean": 4141.9,
|
|
"valid_targets_min": 3504
|
|
},
|
|
{
|
|
"epoch": 2.430769230769231,
|
|
"grad_norm": 0.5924911850269354,
|
|
"learning_rate": 3.303392732682163e-05,
|
|
"loss": 0.3004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14046156406402588,
|
|
"step": 870,
|
|
"valid_targets_mean": 3971.1,
|
|
"valid_targets_min": 2608
|
|
},
|
|
{
|
|
"epoch": 2.4447552447552447,
|
|
"grad_norm": 0.5649308938190039,
|
|
"learning_rate": 3.292794300009309e-05,
|
|
"loss": 0.3092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1662825047969818,
|
|
"step": 875,
|
|
"valid_targets_mean": 4169.5,
|
|
"valid_targets_min": 3295
|
|
},
|
|
{
|
|
"epoch": 2.458741258741259,
|
|
"grad_norm": 0.6038324946866227,
|
|
"learning_rate": 3.282133137474164e-05,
|
|
"loss": 0.3091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14935675263404846,
|
|
"step": 880,
|
|
"valid_targets_mean": 3792.0,
|
|
"valid_targets_min": 3233
|
|
},
|
|
{
|
|
"epoch": 2.4727272727272727,
|
|
"grad_norm": 0.5628142894986014,
|
|
"learning_rate": 3.271409762385057e-05,
|
|
"loss": 0.2987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1566285341978073,
|
|
"step": 885,
|
|
"valid_targets_mean": 4116.4,
|
|
"valid_targets_min": 3233
|
|
},
|
|
{
|
|
"epoch": 2.486713286713287,
|
|
"grad_norm": 0.5690126323587896,
|
|
"learning_rate": 3.2606246950690365e-05,
|
|
"loss": 0.3052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14359772205352783,
|
|
"step": 890,
|
|
"valid_targets_mean": 4127.1,
|
|
"valid_targets_min": 2999
|
|
},
|
|
{
|
|
"epoch": 2.5006993006993006,
|
|
"grad_norm": 0.5786323924138527,
|
|
"learning_rate": 3.2497784588466235e-05,
|
|
"loss": 0.3095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13480177521705627,
|
|
"step": 895,
|
|
"valid_targets_mean": 3744.5,
|
|
"valid_targets_min": 3057
|
|
},
|
|
{
|
|
"epoch": 2.5146853146853148,
|
|
"grad_norm": 0.6135926415957672,
|
|
"learning_rate": 3.23887158000642e-05,
|
|
"loss": 0.3064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1602180004119873,
|
|
"step": 900,
|
|
"valid_targets_mean": 4174.6,
|
|
"valid_targets_min": 3046
|
|
},
|
|
{
|
|
"epoch": 2.5286713286713285,
|
|
"grad_norm": 0.6166674089503721,
|
|
"learning_rate": 3.2279045877795724e-05,
|
|
"loss": 0.3007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15190884470939636,
|
|
"step": 905,
|
|
"valid_targets_mean": 4146.0,
|
|
"valid_targets_min": 2904
|
|
},
|
|
{
|
|
"epoch": 2.5426573426573427,
|
|
"grad_norm": 0.591655929505564,
|
|
"learning_rate": 3.216878014314088e-05,
|
|
"loss": 0.3016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14331448078155518,
|
|
"step": 910,
|
|
"valid_targets_mean": 3779.4,
|
|
"valid_targets_min": 3294
|
|
},
|
|
{
|
|
"epoch": 2.556643356643357,
|
|
"grad_norm": 0.5722745458923197,
|
|
"learning_rate": 3.205792394649017e-05,
|
|
"loss": 0.2981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14977261424064636,
|
|
"step": 915,
|
|
"valid_targets_mean": 3924.5,
|
|
"valid_targets_min": 2738
|
|
},
|
|
{
|
|
"epoch": 2.5706293706293706,
|
|
"grad_norm": 0.5714914092502298,
|
|
"learning_rate": 3.194648266688492e-05,
|
|
"loss": 0.3028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15439298748970032,
|
|
"step": 920,
|
|
"valid_targets_mean": 4133.9,
|
|
"valid_targets_min": 3535
|
|
},
|
|
{
|
|
"epoch": 2.5846153846153848,
|
|
"grad_norm": 0.6157264534450223,
|
|
"learning_rate": 3.183446171175623e-05,
|
|
"loss": 0.3003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15904788672924042,
|
|
"step": 925,
|
|
"valid_targets_mean": 3680.8,
|
|
"valid_targets_min": 2910
|
|
},
|
|
{
|
|
"epoch": 2.5986013986013985,
|
|
"grad_norm": 0.6096304017907245,
|
|
"learning_rate": 3.1721866516662646e-05,
|
|
"loss": 0.296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1429840326309204,
|
|
"step": 930,
|
|
"valid_targets_mean": 3547.1,
|
|
"valid_targets_min": 2803
|
|
},
|
|
{
|
|
"epoch": 2.6125874125874127,
|
|
"grad_norm": 0.5939141460355332,
|
|
"learning_rate": 3.160870254502637e-05,
|
|
"loss": 0.3065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16182993352413177,
|
|
"step": 935,
|
|
"valid_targets_mean": 4263.1,
|
|
"valid_targets_min": 3364
|
|
},
|
|
{
|
|
"epoch": 2.626573426573427,
|
|
"grad_norm": 0.6078547754682735,
|
|
"learning_rate": 3.1494975287868166e-05,
|
|
"loss": 0.3006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16442373394966125,
|
|
"step": 940,
|
|
"valid_targets_mean": 4124.8,
|
|
"valid_targets_min": 3573
|
|
},
|
|
{
|
|
"epoch": 2.6405594405594406,
|
|
"grad_norm": 0.5332110011961554,
|
|
"learning_rate": 3.138069026354095e-05,
|
|
"loss": 0.298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13804768025875092,
|
|
"step": 945,
|
|
"valid_targets_mean": 3926.1,
|
|
"valid_targets_min": 3110
|
|
},
|
|
{
|
|
"epoch": 2.6545454545454543,
|
|
"grad_norm": 0.5992219248556111,
|
|
"learning_rate": 3.1265853017461984e-05,
|
|
"loss": 0.2966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14446228742599487,
|
|
"step": 950,
|
|
"valid_targets_mean": 3793.1,
|
|
"valid_targets_min": 2634
|
|
},
|
|
{
|
|
"epoch": 2.6685314685314685,
|
|
"grad_norm": 0.5906808715236577,
|
|
"learning_rate": 3.115046912184382e-05,
|
|
"loss": 0.3065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15945465862751007,
|
|
"step": 955,
|
|
"valid_targets_mean": 4146.2,
|
|
"valid_targets_min": 3248
|
|
},
|
|
{
|
|
"epoch": 2.6825174825174827,
|
|
"grad_norm": 0.5851720383851202,
|
|
"learning_rate": 3.103454417542394e-05,
|
|
"loss": 0.3064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13522455096244812,
|
|
"step": 960,
|
|
"valid_targets_mean": 3813.8,
|
|
"valid_targets_min": 2961
|
|
},
|
|
{
|
|
"epoch": 2.6965034965034964,
|
|
"grad_norm": 0.5896699725800286,
|
|
"learning_rate": 3.091808380319305e-05,
|
|
"loss": 0.3062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1530534029006958,
|
|
"step": 965,
|
|
"valid_targets_mean": 4099.0,
|
|
"valid_targets_min": 2977
|
|
},
|
|
{
|
|
"epoch": 2.7104895104895106,
|
|
"grad_norm": 0.582215290493422,
|
|
"learning_rate": 3.0801093656122136e-05,
|
|
"loss": 0.2989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14571411907672882,
|
|
"step": 970,
|
|
"valid_targets_mean": 3914.6,
|
|
"valid_targets_min": 3250
|
|
},
|
|
{
|
|
"epoch": 2.7244755244755243,
|
|
"grad_norm": 0.558341621419776,
|
|
"learning_rate": 3.0683579410888345e-05,
|
|
"loss": 0.3048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14048753678798676,
|
|
"step": 975,
|
|
"valid_targets_mean": 3710.1,
|
|
"valid_targets_min": 2534
|
|
},
|
|
{
|
|
"epoch": 2.7384615384615385,
|
|
"grad_norm": 0.5656858687504162,
|
|
"learning_rate": 3.056554676959942e-05,
|
|
"loss": 0.299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12521235644817352,
|
|
"step": 980,
|
|
"valid_targets_mean": 3584.2,
|
|
"valid_targets_min": 2981
|
|
},
|
|
{
|
|
"epoch": 2.7524475524475527,
|
|
"grad_norm": 0.5449736994099068,
|
|
"learning_rate": 3.0447001459517117e-05,
|
|
"loss": 0.3031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14620693027973175,
|
|
"step": 985,
|
|
"valid_targets_mean": 3956.8,
|
|
"valid_targets_min": 3004
|
|
},
|
|
{
|
|
"epoch": 2.7664335664335664,
|
|
"grad_norm": 2.4950395364840245,
|
|
"learning_rate": 3.0327949232779242e-05,
|
|
"loss": 0.3043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13793346285820007,
|
|
"step": 990,
|
|
"valid_targets_mean": 3818.4,
|
|
"valid_targets_min": 3323
|
|
},
|
|
{
|
|
"epoch": 2.78041958041958,
|
|
"grad_norm": 0.6273958669859198,
|
|
"learning_rate": 3.020839586612057e-05,
|
|
"loss": 0.3034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15779882669448853,
|
|
"step": 995,
|
|
"valid_targets_mean": 3840.5,
|
|
"valid_targets_min": 2683
|
|
},
|
|
{
|
|
"epoch": 2.7944055944055943,
|
|
"grad_norm": 0.5873562506440816,
|
|
"learning_rate": 3.0088347160592534e-05,
|
|
"loss": 0.2919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1517733931541443,
|
|
"step": 1000,
|
|
"valid_targets_mean": 4189.0,
|
|
"valid_targets_min": 3177
|
|
},
|
|
{
|
|
"epoch": 2.8083916083916085,
|
|
"grad_norm": 0.5952889862867294,
|
|
"learning_rate": 2.996780894128174e-05,
|
|
"loss": 0.3059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15184283256530762,
|
|
"step": 1005,
|
|
"valid_targets_mean": 3989.8,
|
|
"valid_targets_min": 2859
|
|
},
|
|
{
|
|
"epoch": 2.8223776223776222,
|
|
"grad_norm": 0.5772743752732764,
|
|
"learning_rate": 2.9846787057027335e-05,
|
|
"loss": 0.3046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13652929663658142,
|
|
"step": 1010,
|
|
"valid_targets_mean": 3563.9,
|
|
"valid_targets_min": 2926
|
|
},
|
|
{
|
|
"epoch": 2.8363636363636364,
|
|
"grad_norm": 0.6153984951792437,
|
|
"learning_rate": 2.972528738013717e-05,
|
|
"loss": 0.303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14372235536575317,
|
|
"step": 1015,
|
|
"valid_targets_mean": 3852.0,
|
|
"valid_targets_min": 3140
|
|
},
|
|
{
|
|
"epoch": 2.85034965034965,
|
|
"grad_norm": 0.6175706903658995,
|
|
"learning_rate": 2.960331580610291e-05,
|
|
"loss": 0.2978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1684339940547943,
|
|
"step": 1020,
|
|
"valid_targets_mean": 4381.4,
|
|
"valid_targets_min": 3633
|
|
},
|
|
{
|
|
"epoch": 2.8643356643356643,
|
|
"grad_norm": 0.6013087024609713,
|
|
"learning_rate": 2.9480878253313908e-05,
|
|
"loss": 0.303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13773339986801147,
|
|
"step": 1025,
|
|
"valid_targets_mean": 3716.9,
|
|
"valid_targets_min": 2800
|
|
},
|
|
{
|
|
"epoch": 2.8783216783216785,
|
|
"grad_norm": 0.5690562225384614,
|
|
"learning_rate": 2.9357980662770082e-05,
|
|
"loss": 0.3073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15410861372947693,
|
|
"step": 1030,
|
|
"valid_targets_mean": 4014.1,
|
|
"valid_targets_min": 3246
|
|
},
|
|
{
|
|
"epoch": 2.8923076923076922,
|
|
"grad_norm": 0.5836767309730673,
|
|
"learning_rate": 2.923462899779363e-05,
|
|
"loss": 0.2955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1419641524553299,
|
|
"step": 1035,
|
|
"valid_targets_mean": 3847.4,
|
|
"valid_targets_min": 660
|
|
},
|
|
{
|
|
"epoch": 2.9062937062937064,
|
|
"grad_norm": 0.5943150762666415,
|
|
"learning_rate": 2.9110829243739638e-05,
|
|
"loss": 0.2922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14206726849079132,
|
|
"step": 1040,
|
|
"valid_targets_mean": 3915.0,
|
|
"valid_targets_min": 3165
|
|
},
|
|
{
|
|
"epoch": 2.92027972027972,
|
|
"grad_norm": 0.5901244471485346,
|
|
"learning_rate": 2.8986587407705698e-05,
|
|
"loss": 0.3146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15809054672718048,
|
|
"step": 1045,
|
|
"valid_targets_mean": 4720.0,
|
|
"valid_targets_min": 3354
|
|
},
|
|
{
|
|
"epoch": 2.9342657342657343,
|
|
"grad_norm": 0.574348712454776,
|
|
"learning_rate": 2.8861909518240412e-05,
|
|
"loss": 0.3047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13096821308135986,
|
|
"step": 1050,
|
|
"valid_targets_mean": 3712.1,
|
|
"valid_targets_min": 2726
|
|
},
|
|
{
|
|
"epoch": 2.9482517482517485,
|
|
"grad_norm": 0.5775146916340026,
|
|
"learning_rate": 2.873680162505087e-05,
|
|
"loss": 0.2988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15335050225257874,
|
|
"step": 1055,
|
|
"valid_targets_mean": 4055.0,
|
|
"valid_targets_min": 2802
|
|
},
|
|
{
|
|
"epoch": 2.9622377622377623,
|
|
"grad_norm": 0.5886462559759911,
|
|
"learning_rate": 2.8611269798709088e-05,
|
|
"loss": 0.2979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15094557404518127,
|
|
"step": 1060,
|
|
"valid_targets_mean": 3997.2,
|
|
"valid_targets_min": 3389
|
|
},
|
|
{
|
|
"epoch": 2.976223776223776,
|
|
"grad_norm": 0.5792930097643623,
|
|
"learning_rate": 2.8485320130357467e-05,
|
|
"loss": 0.3048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15749546885490417,
|
|
"step": 1065,
|
|
"valid_targets_mean": 4244.0,
|
|
"valid_targets_min": 3307
|
|
},
|
|
{
|
|
"epoch": 2.99020979020979,
|
|
"grad_norm": 0.5412531316096433,
|
|
"learning_rate": 2.8358958731413237e-05,
|
|
"loss": 0.3053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14990413188934326,
|
|
"step": 1070,
|
|
"valid_targets_mean": 4070.8,
|
|
"valid_targets_min": 2959
|
|
},
|
|
{
|
|
"epoch": 3.0027972027972027,
|
|
"grad_norm": 0.5987773847286868,
|
|
"learning_rate": 2.8232191733271902e-05,
|
|
"loss": 0.2893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15005648136138916,
|
|
"step": 1075,
|
|
"valid_targets_mean": 4235.8,
|
|
"valid_targets_min": 3358
|
|
},
|
|
{
|
|
"epoch": 3.016783216783217,
|
|
"grad_norm": 0.6102100675399175,
|
|
"learning_rate": 2.8105025287009722e-05,
|
|
"loss": 0.289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13793258368968964,
|
|
"step": 1080,
|
|
"valid_targets_mean": 3900.8,
|
|
"valid_targets_min": 2882
|
|
},
|
|
{
|
|
"epoch": 3.0307692307692307,
|
|
"grad_norm": 0.6062042303419031,
|
|
"learning_rate": 2.7977465563085266e-05,
|
|
"loss": 0.2849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13424527645111084,
|
|
"step": 1085,
|
|
"valid_targets_mean": 3554.8,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 3.044755244755245,
|
|
"grad_norm": 0.5903347227932119,
|
|
"learning_rate": 2.7849518751039988e-05,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13443614542484283,
|
|
"step": 1090,
|
|
"valid_targets_mean": 4138.4,
|
|
"valid_targets_min": 2779
|
|
},
|
|
{
|
|
"epoch": 3.0587412587412586,
|
|
"grad_norm": 0.5512429822848546,
|
|
"learning_rate": 2.7721191059197906e-05,
|
|
"loss": 0.2936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14363616704940796,
|
|
"step": 1095,
|
|
"valid_targets_mean": 3785.6,
|
|
"valid_targets_min": 3061
|
|
},
|
|
{
|
|
"epoch": 3.0727272727272728,
|
|
"grad_norm": 0.5873538300440081,
|
|
"learning_rate": 2.7592488714364346e-05,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14701727032661438,
|
|
"step": 1100,
|
|
"valid_targets_mean": 4083.5,
|
|
"valid_targets_min": 3004
|
|
},
|
|
{
|
|
"epoch": 3.0867132867132865,
|
|
"grad_norm": 0.572390188505052,
|
|
"learning_rate": 2.7463417961523818e-05,
|
|
"loss": 0.2921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13609379529953003,
|
|
"step": 1105,
|
|
"valid_targets_mean": 3602.6,
|
|
"valid_targets_min": 2527
|
|
},
|
|
{
|
|
"epoch": 3.1006993006993007,
|
|
"grad_norm": 0.5622134342613668,
|
|
"learning_rate": 2.7333985063536963e-05,
|
|
"loss": 0.29,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1393313705921173,
|
|
"step": 1110,
|
|
"valid_targets_mean": 4095.5,
|
|
"valid_targets_min": 3101
|
|
},
|
|
{
|
|
"epoch": 3.114685314685315,
|
|
"grad_norm": 0.5593515284390895,
|
|
"learning_rate": 2.72041963008367e-05,
|
|
"loss": 0.2882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14072662591934204,
|
|
"step": 1115,
|
|
"valid_targets_mean": 4279.5,
|
|
"valid_targets_min": 3243
|
|
},
|
|
{
|
|
"epoch": 3.1286713286713286,
|
|
"grad_norm": 0.5610889742269053,
|
|
"learning_rate": 2.707405797112344e-05,
|
|
"loss": 0.2912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.158025324344635,
|
|
"step": 1120,
|
|
"valid_targets_mean": 4263.9,
|
|
"valid_targets_min": 3177
|
|
},
|
|
{
|
|
"epoch": 3.1426573426573428,
|
|
"grad_norm": 0.6024251945209658,
|
|
"learning_rate": 2.6943576389059555e-05,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1297697126865387,
|
|
"step": 1125,
|
|
"valid_targets_mean": 3528.4,
|
|
"valid_targets_min": 1980
|
|
},
|
|
{
|
|
"epoch": 3.1566433566433565,
|
|
"grad_norm": 0.5597392486313498,
|
|
"learning_rate": 2.6812757885962925e-05,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13107940554618835,
|
|
"step": 1130,
|
|
"valid_targets_mean": 4273.8,
|
|
"valid_targets_min": 3953
|
|
},
|
|
{
|
|
"epoch": 3.1706293706293707,
|
|
"grad_norm": 0.5876810531953238,
|
|
"learning_rate": 2.6681608809499742e-05,
|
|
"loss": 0.2859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15600530803203583,
|
|
"step": 1135,
|
|
"valid_targets_mean": 4078.9,
|
|
"valid_targets_min": 3364
|
|
},
|
|
{
|
|
"epoch": 3.184615384615385,
|
|
"grad_norm": 0.5783426546002428,
|
|
"learning_rate": 2.6550135523376536e-05,
|
|
"loss": 0.2925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15904384851455688,
|
|
"step": 1140,
|
|
"valid_targets_mean": 4351.2,
|
|
"valid_targets_min": 3517
|
|
},
|
|
{
|
|
"epoch": 3.1986013986013986,
|
|
"grad_norm": 0.5813694968263312,
|
|
"learning_rate": 2.641834440703133e-05,
|
|
"loss": 0.2838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15258988738059998,
|
|
"step": 1145,
|
|
"valid_targets_mean": 4117.5,
|
|
"valid_targets_min": 3063
|
|
},
|
|
{
|
|
"epoch": 3.2125874125874128,
|
|
"grad_norm": 0.5905046846776977,
|
|
"learning_rate": 2.6286241855324148e-05,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14545059204101562,
|
|
"step": 1150,
|
|
"valid_targets_mean": 4036.8,
|
|
"valid_targets_min": 2770
|
|
},
|
|
{
|
|
"epoch": 3.2265734265734265,
|
|
"grad_norm": 0.6251524497566068,
|
|
"learning_rate": 2.615383427822669e-05,
|
|
"loss": 0.2945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14009219408035278,
|
|
"step": 1155,
|
|
"valid_targets_mean": 4025.4,
|
|
"valid_targets_min": 3274
|
|
},
|
|
{
|
|
"epoch": 3.2405594405594407,
|
|
"grad_norm": 0.5833207093013281,
|
|
"learning_rate": 2.6021128100511312e-05,
|
|
"loss": 0.2948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1496381163597107,
|
|
"step": 1160,
|
|
"valid_targets_mean": 3876.6,
|
|
"valid_targets_min": 3294
|
|
},
|
|
{
|
|
"epoch": 3.2545454545454544,
|
|
"grad_norm": 0.5869118064695467,
|
|
"learning_rate": 2.5888129761439268e-05,
|
|
"loss": 0.2854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12796083092689514,
|
|
"step": 1165,
|
|
"valid_targets_mean": 4000.5,
|
|
"valid_targets_min": 3198
|
|
},
|
|
{
|
|
"epoch": 3.2685314685314686,
|
|
"grad_norm": 0.6105334860746966,
|
|
"learning_rate": 2.575484571444828e-05,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15768763422966003,
|
|
"step": 1170,
|
|
"valid_targets_mean": 4069.2,
|
|
"valid_targets_min": 3275
|
|
},
|
|
{
|
|
"epoch": 3.2825174825174823,
|
|
"grad_norm": 0.6148135538144459,
|
|
"learning_rate": 2.5621282426839376e-05,
|
|
"loss": 0.2966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14051169157028198,
|
|
"step": 1175,
|
|
"valid_targets_mean": 3816.0,
|
|
"valid_targets_min": 3018
|
|
},
|
|
{
|
|
"epoch": 3.2965034965034965,
|
|
"grad_norm": 0.5784403352707399,
|
|
"learning_rate": 2.5487446379463095e-05,
|
|
"loss": 0.2859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14809316396713257,
|
|
"step": 1180,
|
|
"valid_targets_mean": 4309.2,
|
|
"valid_targets_min": 3524
|
|
},
|
|
{
|
|
"epoch": 3.3104895104895107,
|
|
"grad_norm": 0.5661451133427263,
|
|
"learning_rate": 2.535334406640503e-05,
|
|
"loss": 0.2888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14418381452560425,
|
|
"step": 1185,
|
|
"valid_targets_mean": 3979.0,
|
|
"valid_targets_min": 3214
|
|
},
|
|
{
|
|
"epoch": 3.3244755244755244,
|
|
"grad_norm": 0.5459169548362879,
|
|
"learning_rate": 2.5218981994670683e-05,
|
|
"loss": 0.2918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16877073049545288,
|
|
"step": 1190,
|
|
"valid_targets_mean": 4564.4,
|
|
"valid_targets_min": 2955
|
|
},
|
|
{
|
|
"epoch": 3.3384615384615386,
|
|
"grad_norm": 0.5750076988938344,
|
|
"learning_rate": 2.5084366683869746e-05,
|
|
"loss": 0.2838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16434863209724426,
|
|
"step": 1195,
|
|
"valid_targets_mean": 4575.8,
|
|
"valid_targets_min": 3817
|
|
},
|
|
{
|
|
"epoch": 3.3524475524475523,
|
|
"grad_norm": 0.5796812607678961,
|
|
"learning_rate": 2.494950466589976e-05,
|
|
"loss": 0.2944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1434839367866516,
|
|
"step": 1200,
|
|
"valid_targets_mean": 4157.1,
|
|
"valid_targets_min": 3372
|
|
},
|
|
{
|
|
"epoch": 3.3664335664335665,
|
|
"grad_norm": 0.5571773596093456,
|
|
"learning_rate": 2.4814402484629172e-05,
|
|
"loss": 0.2874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16117364168167114,
|
|
"step": 1205,
|
|
"valid_targets_mean": 4443.8,
|
|
"valid_targets_min": 3930
|
|
},
|
|
{
|
|
"epoch": 3.3804195804195802,
|
|
"grad_norm": 0.542012095877534,
|
|
"learning_rate": 2.4679066695579783e-05,
|
|
"loss": 0.2912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13552315533161163,
|
|
"step": 1210,
|
|
"valid_targets_mean": 3729.8,
|
|
"valid_targets_min": 2346
|
|
},
|
|
{
|
|
"epoch": 3.3944055944055944,
|
|
"grad_norm": 0.6262546928082594,
|
|
"learning_rate": 2.454350386560868e-05,
|
|
"loss": 0.2958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.132088303565979,
|
|
"step": 1215,
|
|
"valid_targets_mean": 3667.1,
|
|
"valid_targets_min": 3007
|
|
},
|
|
{
|
|
"epoch": 3.408391608391608,
|
|
"grad_norm": 0.5654793675821718,
|
|
"learning_rate": 2.440772057258958e-05,
|
|
"loss": 0.293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12824289500713348,
|
|
"step": 1220,
|
|
"valid_targets_mean": 3630.8,
|
|
"valid_targets_min": 2659
|
|
},
|
|
{
|
|
"epoch": 3.4223776223776223,
|
|
"grad_norm": 0.5847485061544787,
|
|
"learning_rate": 2.4271723405093683e-05,
|
|
"loss": 0.2905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13168518245220184,
|
|
"step": 1225,
|
|
"valid_targets_mean": 3625.9,
|
|
"valid_targets_min": 3142
|
|
},
|
|
{
|
|
"epoch": 3.4363636363636365,
|
|
"grad_norm": 0.5821821351030247,
|
|
"learning_rate": 2.4135518962069924e-05,
|
|
"loss": 0.2953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12825189530849457,
|
|
"step": 1230,
|
|
"valid_targets_mean": 3588.0,
|
|
"valid_targets_min": 2988
|
|
},
|
|
{
|
|
"epoch": 3.4503496503496502,
|
|
"grad_norm": 0.5659166757325212,
|
|
"learning_rate": 2.3999113852524825e-05,
|
|
"loss": 0.2874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12714150547981262,
|
|
"step": 1235,
|
|
"valid_targets_mean": 3578.1,
|
|
"valid_targets_min": 2938
|
|
},
|
|
{
|
|
"epoch": 3.4643356643356644,
|
|
"grad_norm": 0.5861208282116811,
|
|
"learning_rate": 2.386251469520179e-05,
|
|
"loss": 0.2846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15201061964035034,
|
|
"step": 1240,
|
|
"valid_targets_mean": 4156.2,
|
|
"valid_targets_min": 3405
|
|
},
|
|
{
|
|
"epoch": 3.478321678321678,
|
|
"grad_norm": 0.5535911039436388,
|
|
"learning_rate": 2.3725728118259927e-05,
|
|
"loss": 0.2851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1573452353477478,
|
|
"step": 1245,
|
|
"valid_targets_mean": 4481.2,
|
|
"valid_targets_min": 3900
|
|
},
|
|
{
|
|
"epoch": 3.4923076923076923,
|
|
"grad_norm": 0.5346692692232985,
|
|
"learning_rate": 2.358876075895247e-05,
|
|
"loss": 0.2904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13130052387714386,
|
|
"step": 1250,
|
|
"valid_targets_mean": 3746.9,
|
|
"valid_targets_min": 2357
|
|
},
|
|
{
|
|
"epoch": 3.5062937062937065,
|
|
"grad_norm": 0.5590784293301762,
|
|
"learning_rate": 2.345161926330468e-05,
|
|
"loss": 0.2895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1504519134759903,
|
|
"step": 1255,
|
|
"valid_targets_mean": 3962.4,
|
|
"valid_targets_min": 3050
|
|
},
|
|
{
|
|
"epoch": 3.5202797202797202,
|
|
"grad_norm": 0.5526105682767487,
|
|
"learning_rate": 2.3314310285791395e-05,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14131543040275574,
|
|
"step": 1260,
|
|
"valid_targets_mean": 3988.5,
|
|
"valid_targets_min": 3365
|
|
},
|
|
{
|
|
"epoch": 3.5342657342657344,
|
|
"grad_norm": 0.5605267073009159,
|
|
"learning_rate": 2.3176840489014127e-05,
|
|
"loss": 0.2851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14908947050571442,
|
|
"step": 1265,
|
|
"valid_targets_mean": 4193.6,
|
|
"valid_targets_min": 3234
|
|
},
|
|
{
|
|
"epoch": 3.548251748251748,
|
|
"grad_norm": 0.5567496368325469,
|
|
"learning_rate": 2.303921654337776e-05,
|
|
"loss": 0.29,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15985560417175293,
|
|
"step": 1270,
|
|
"valid_targets_mean": 4427.0,
|
|
"valid_targets_min": 3686
|
|
},
|
|
{
|
|
"epoch": 3.5622377622377623,
|
|
"grad_norm": 0.580201073915128,
|
|
"learning_rate": 2.29014451267669e-05,
|
|
"loss": 0.2948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1479303240776062,
|
|
"step": 1275,
|
|
"valid_targets_mean": 3910.6,
|
|
"valid_targets_min": 3425
|
|
},
|
|
{
|
|
"epoch": 3.576223776223776,
|
|
"grad_norm": 0.5329574918906276,
|
|
"learning_rate": 2.276353292422185e-05,
|
|
"loss": 0.2919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15829353034496307,
|
|
"step": 1280,
|
|
"valid_targets_mean": 3959.4,
|
|
"valid_targets_min": 3298
|
|
},
|
|
{
|
|
"epoch": 3.5902097902097903,
|
|
"grad_norm": 0.6871295147458576,
|
|
"learning_rate": 2.2625486627614223e-05,
|
|
"loss": 0.282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14807263016700745,
|
|
"step": 1285,
|
|
"valid_targets_mean": 3968.4,
|
|
"valid_targets_min": 2854
|
|
},
|
|
{
|
|
"epoch": 3.604195804195804,
|
|
"grad_norm": 0.5781576024288366,
|
|
"learning_rate": 2.248731293532222e-05,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14441080391407013,
|
|
"step": 1290,
|
|
"valid_targets_mean": 4244.2,
|
|
"valid_targets_min": 3643
|
|
},
|
|
{
|
|
"epoch": 3.618181818181818,
|
|
"grad_norm": 0.5642792543631863,
|
|
"learning_rate": 2.2349018551905653e-05,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15375354886054993,
|
|
"step": 1295,
|
|
"valid_targets_mean": 3981.8,
|
|
"valid_targets_min": 2294
|
|
},
|
|
{
|
|
"epoch": 3.6321678321678323,
|
|
"grad_norm": 0.5603604594097068,
|
|
"learning_rate": 2.221061018778058e-05,
|
|
"loss": 0.2847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.147846981883049,
|
|
"step": 1300,
|
|
"valid_targets_mean": 3968.8,
|
|
"valid_targets_min": 3391
|
|
},
|
|
{
|
|
"epoch": 3.646153846153846,
|
|
"grad_norm": 0.5673580651732941,
|
|
"learning_rate": 2.207209455889368e-05,
|
|
"loss": 0.2885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.126178577542305,
|
|
"step": 1305,
|
|
"valid_targets_mean": 3585.2,
|
|
"valid_targets_min": 2635
|
|
},
|
|
{
|
|
"epoch": 3.6601398601398603,
|
|
"grad_norm": 0.5367686648454142,
|
|
"learning_rate": 2.193347838639647e-05,
|
|
"loss": 0.2906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13222312927246094,
|
|
"step": 1310,
|
|
"valid_targets_mean": 3554.9,
|
|
"valid_targets_min": 495
|
|
},
|
|
{
|
|
"epoch": 3.674125874125874,
|
|
"grad_norm": 0.5576957411476551,
|
|
"learning_rate": 2.1794768396319058e-05,
|
|
"loss": 0.2825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12126507610082626,
|
|
"step": 1315,
|
|
"valid_targets_mean": 3548.1,
|
|
"valid_targets_min": 2486
|
|
},
|
|
{
|
|
"epoch": 3.688111888111888,
|
|
"grad_norm": 0.5614984976925238,
|
|
"learning_rate": 2.1655971319243853e-05,
|
|
"loss": 0.2924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13723014295101166,
|
|
"step": 1320,
|
|
"valid_targets_mean": 3631.2,
|
|
"valid_targets_min": 2436
|
|
},
|
|
{
|
|
"epoch": 3.7020979020979023,
|
|
"grad_norm": 0.5682898573966657,
|
|
"learning_rate": 2.1517093889978966e-05,
|
|
"loss": 0.2817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13498768210411072,
|
|
"step": 1325,
|
|
"valid_targets_mean": 3797.8,
|
|
"valid_targets_min": 3111
|
|
},
|
|
{
|
|
"epoch": 3.716083916083916,
|
|
"grad_norm": 0.5563945992386156,
|
|
"learning_rate": 2.1378142847231417e-05,
|
|
"loss": 0.284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14077872037887573,
|
|
"step": 1330,
|
|
"valid_targets_mean": 3898.6,
|
|
"valid_targets_min": 2740
|
|
},
|
|
{
|
|
"epoch": 3.73006993006993,
|
|
"grad_norm": 0.5525569479616117,
|
|
"learning_rate": 2.123912493328013e-05,
|
|
"loss": 0.283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15129277110099792,
|
|
"step": 1335,
|
|
"valid_targets_mean": 4313.1,
|
|
"valid_targets_min": 3386
|
|
},
|
|
{
|
|
"epoch": 3.744055944055944,
|
|
"grad_norm": 0.5613543756192618,
|
|
"learning_rate": 2.1100046893648813e-05,
|
|
"loss": 0.2929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1311524659395218,
|
|
"step": 1340,
|
|
"valid_targets_mean": 3788.1,
|
|
"valid_targets_min": 3177
|
|
},
|
|
{
|
|
"epoch": 3.758041958041958,
|
|
"grad_norm": 0.5960495305158579,
|
|
"learning_rate": 2.096091547677864e-05,
|
|
"loss": 0.2932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13484007120132446,
|
|
"step": 1345,
|
|
"valid_targets_mean": 3836.5,
|
|
"valid_targets_min": 3397
|
|
},
|
|
{
|
|
"epoch": 3.772027972027972,
|
|
"grad_norm": 0.534714708744089,
|
|
"learning_rate": 2.0821737433700773e-05,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13426387310028076,
|
|
"step": 1350,
|
|
"valid_targets_mean": 3965.5,
|
|
"valid_targets_min": 3316
|
|
},
|
|
{
|
|
"epoch": 3.786013986013986,
|
|
"grad_norm": 0.5608719066575932,
|
|
"learning_rate": 2.068251951770882e-05,
|
|
"loss": 0.2922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14681580662727356,
|
|
"step": 1355,
|
|
"valid_targets_mean": 3656.8,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 3.8,
|
|
"grad_norm": 0.5392596511784064,
|
|
"learning_rate": 2.054326848403113e-05,
|
|
"loss": 0.2847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1456853449344635,
|
|
"step": 1360,
|
|
"valid_targets_mean": 3982.4,
|
|
"valid_targets_min": 3106
|
|
},
|
|
{
|
|
"epoch": 3.813986013986014,
|
|
"grad_norm": 0.5824843228693691,
|
|
"learning_rate": 2.0403991089502995e-05,
|
|
"loss": 0.2865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1382858157157898,
|
|
"step": 1365,
|
|
"valid_targets_mean": 3885.2,
|
|
"valid_targets_min": 3067
|
|
},
|
|
{
|
|
"epoch": 3.827972027972028,
|
|
"grad_norm": 0.5447761110635077,
|
|
"learning_rate": 2.026469409223883e-05,
|
|
"loss": 0.291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15115302801132202,
|
|
"step": 1370,
|
|
"valid_targets_mean": 3895.8,
|
|
"valid_targets_min": 3321
|
|
},
|
|
{
|
|
"epoch": 3.841958041958042,
|
|
"grad_norm": 0.5264471128691284,
|
|
"learning_rate": 2.012538425130421e-05,
|
|
"loss": 0.2891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15932296216487885,
|
|
"step": 1375,
|
|
"valid_targets_mean": 4400.5,
|
|
"valid_targets_min": 3458
|
|
},
|
|
{
|
|
"epoch": 3.855944055944056,
|
|
"grad_norm": 0.553572290453652,
|
|
"learning_rate": 1.998606832638792e-05,
|
|
"loss": 0.295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14538070559501648,
|
|
"step": 1380,
|
|
"valid_targets_mean": 3952.4,
|
|
"valid_targets_min": 3027
|
|
},
|
|
{
|
|
"epoch": 3.86993006993007,
|
|
"grad_norm": 0.5503890705860522,
|
|
"learning_rate": 1.984675307747397e-05,
|
|
"loss": 0.2978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13637414574623108,
|
|
"step": 1385,
|
|
"valid_targets_mean": 3990.6,
|
|
"valid_targets_min": 3045
|
|
},
|
|
{
|
|
"epoch": 3.883916083916084,
|
|
"grad_norm": 0.5695849028067055,
|
|
"learning_rate": 1.970744526451356e-05,
|
|
"loss": 0.2876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15931588411331177,
|
|
"step": 1390,
|
|
"valid_targets_mean": 4204.8,
|
|
"valid_targets_min": 3360
|
|
},
|
|
{
|
|
"epoch": 3.8979020979020977,
|
|
"grad_norm": 0.5065264632685823,
|
|
"learning_rate": 1.956815164709707e-05,
|
|
"loss": 0.2926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14099319279193878,
|
|
"step": 1395,
|
|
"valid_targets_mean": 4106.9,
|
|
"valid_targets_min": 3364
|
|
},
|
|
{
|
|
"epoch": 3.911888111888112,
|
|
"grad_norm": 0.5552657926969193,
|
|
"learning_rate": 1.942887898412608e-05,
|
|
"loss": 0.2826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1472746580839157,
|
|
"step": 1400,
|
|
"valid_targets_mean": 3892.6,
|
|
"valid_targets_min": 3214
|
|
},
|
|
{
|
|
"epoch": 3.9258741258741257,
|
|
"grad_norm": 0.5873648605403345,
|
|
"learning_rate": 1.928963403348541e-05,
|
|
"loss": 0.2872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15483799576759338,
|
|
"step": 1405,
|
|
"valid_targets_mean": 4065.4,
|
|
"valid_targets_min": 2955
|
|
},
|
|
{
|
|
"epoch": 3.93986013986014,
|
|
"grad_norm": 0.5571666073607908,
|
|
"learning_rate": 1.91504235517152e-05,
|
|
"loss": 0.2941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14902883768081665,
|
|
"step": 1410,
|
|
"valid_targets_mean": 4142.4,
|
|
"valid_targets_min": 3177
|
|
},
|
|
{
|
|
"epoch": 3.953846153846154,
|
|
"grad_norm": 0.5534221036668195,
|
|
"learning_rate": 1.9011254293683067e-05,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14316719770431519,
|
|
"step": 1415,
|
|
"valid_targets_mean": 4087.0,
|
|
"valid_targets_min": 3266
|
|
},
|
|
{
|
|
"epoch": 3.9678321678321677,
|
|
"grad_norm": 0.5596823503493599,
|
|
"learning_rate": 1.8872133012256328e-05,
|
|
"loss": 0.2929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14076735079288483,
|
|
"step": 1420,
|
|
"valid_targets_mean": 3920.1,
|
|
"valid_targets_min": 3317
|
|
},
|
|
{
|
|
"epoch": 3.981818181818182,
|
|
"grad_norm": 0.5765862283093903,
|
|
"learning_rate": 1.8733066457974373e-05,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12725147604942322,
|
|
"step": 1425,
|
|
"valid_targets_mean": 3559.5,
|
|
"valid_targets_min": 2868
|
|
},
|
|
{
|
|
"epoch": 3.9958041958041957,
|
|
"grad_norm": 0.5779364420855677,
|
|
"learning_rate": 1.8594061378721057e-05,
|
|
"loss": 0.2897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13181552290916443,
|
|
"step": 1430,
|
|
"valid_targets_mean": 3682.4,
|
|
"valid_targets_min": 2829
|
|
},
|
|
{
|
|
"epoch": 4.008391608391609,
|
|
"grad_norm": 0.5346526605133614,
|
|
"learning_rate": 1.8455124519397308e-05,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15485987067222595,
|
|
"step": 1435,
|
|
"valid_targets_mean": 4141.9,
|
|
"valid_targets_min": 3243
|
|
},
|
|
{
|
|
"epoch": 4.022377622377622,
|
|
"grad_norm": 0.5766020959293648,
|
|
"learning_rate": 1.831626262159386e-05,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13374757766723633,
|
|
"step": 1440,
|
|
"valid_targets_mean": 3724.5,
|
|
"valid_targets_min": 3272
|
|
},
|
|
{
|
|
"epoch": 4.036363636363636,
|
|
"grad_norm": 0.6086344578776043,
|
|
"learning_rate": 1.817748242326409e-05,
|
|
"loss": 0.2832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15734803676605225,
|
|
"step": 1445,
|
|
"valid_targets_mean": 4231.8,
|
|
"valid_targets_min": 3283
|
|
},
|
|
{
|
|
"epoch": 4.05034965034965,
|
|
"grad_norm": 0.5838271193374448,
|
|
"learning_rate": 1.8038790658397097e-05,
|
|
"loss": 0.2803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13393522799015045,
|
|
"step": 1450,
|
|
"valid_targets_mean": 3853.5,
|
|
"valid_targets_min": 2955
|
|
},
|
|
{
|
|
"epoch": 4.0643356643356645,
|
|
"grad_norm": 0.5866672557191704,
|
|
"learning_rate": 1.7900194056690955e-05,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13227057456970215,
|
|
"step": 1455,
|
|
"valid_targets_mean": 4068.1,
|
|
"valid_targets_min": 3364
|
|
},
|
|
{
|
|
"epoch": 4.078321678321679,
|
|
"grad_norm": 0.5829979033149572,
|
|
"learning_rate": 1.7761699343226167e-05,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13871875405311584,
|
|
"step": 1460,
|
|
"valid_targets_mean": 4002.0,
|
|
"valid_targets_min": 2979
|
|
},
|
|
{
|
|
"epoch": 4.092307692307692,
|
|
"grad_norm": 0.563177618637755,
|
|
"learning_rate": 1.7623313238139335e-05,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12607219815254211,
|
|
"step": 1465,
|
|
"valid_targets_mean": 3888.6,
|
|
"valid_targets_min": 3177
|
|
},
|
|
{
|
|
"epoch": 4.106293706293706,
|
|
"grad_norm": 0.5860307723730551,
|
|
"learning_rate": 1.748504245629711e-05,
|
|
"loss": 0.2811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1451467126607895,
|
|
"step": 1470,
|
|
"valid_targets_mean": 4002.6,
|
|
"valid_targets_min": 3262
|
|
},
|
|
{
|
|
"epoch": 4.12027972027972,
|
|
"grad_norm": 0.5551191446812656,
|
|
"learning_rate": 1.7346893706970333e-05,
|
|
"loss": 0.2858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14970549941062927,
|
|
"step": 1475,
|
|
"valid_targets_mean": 4383.4,
|
|
"valid_targets_min": 3061
|
|
},
|
|
{
|
|
"epoch": 4.1342657342657345,
|
|
"grad_norm": 0.5581366796607292,
|
|
"learning_rate": 1.7208873693508493e-05,
|
|
"loss": 0.2794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15094107389450073,
|
|
"step": 1480,
|
|
"valid_targets_mean": 4262.2,
|
|
"valid_targets_min": 3051
|
|
},
|
|
{
|
|
"epoch": 4.148251748251749,
|
|
"grad_norm": 0.5729297719466728,
|
|
"learning_rate": 1.7070989113014483e-05,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15359677374362946,
|
|
"step": 1485,
|
|
"valid_targets_mean": 4198.6,
|
|
"valid_targets_min": 3511
|
|
},
|
|
{
|
|
"epoch": 4.162237762237762,
|
|
"grad_norm": 0.5634444934272489,
|
|
"learning_rate": 1.6933246656019613e-05,
|
|
"loss": 0.2772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1567789912223816,
|
|
"step": 1490,
|
|
"valid_targets_mean": 4295.5,
|
|
"valid_targets_min": 3127
|
|
},
|
|
{
|
|
"epoch": 4.176223776223776,
|
|
"grad_norm": 0.5581693723899023,
|
|
"learning_rate": 1.6795653006158977e-05,
|
|
"loss": 0.2806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13514573872089386,
|
|
"step": 1495,
|
|
"valid_targets_mean": 3887.1,
|
|
"valid_targets_min": 3602
|
|
},
|
|
{
|
|
"epoch": 4.19020979020979,
|
|
"grad_norm": 0.5863814075958925,
|
|
"learning_rate": 1.6658214839847168e-05,
|
|
"loss": 0.2804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1320951133966446,
|
|
"step": 1500,
|
|
"valid_targets_mean": 3769.1,
|
|
"valid_targets_min": 3233
|
|
},
|
|
{
|
|
"epoch": 4.2041958041958045,
|
|
"grad_norm": 0.5635215900428802,
|
|
"learning_rate": 1.6520938825954265e-05,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14185290038585663,
|
|
"step": 1505,
|
|
"valid_targets_mean": 4110.9,
|
|
"valid_targets_min": 3167
|
|
},
|
|
{
|
|
"epoch": 4.218181818181818,
|
|
"grad_norm": 0.5423622664089363,
|
|
"learning_rate": 1.638383162548229e-05,
|
|
"loss": 0.2794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13250473141670227,
|
|
"step": 1510,
|
|
"valid_targets_mean": 4070.2,
|
|
"valid_targets_min": 3372
|
|
},
|
|
{
|
|
"epoch": 4.232167832167832,
|
|
"grad_norm": 0.5675193264978978,
|
|
"learning_rate": 1.6246899891241995e-05,
|
|
"loss": 0.2806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14088118076324463,
|
|
"step": 1515,
|
|
"valid_targets_mean": 4075.2,
|
|
"valid_targets_min": 2977
|
|
},
|
|
{
|
|
"epoch": 4.246153846153846,
|
|
"grad_norm": 0.6175459473635595,
|
|
"learning_rate": 1.6110150267530017e-05,
|
|
"loss": 0.2765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14475172758102417,
|
|
"step": 1520,
|
|
"valid_targets_mean": 4005.4,
|
|
"valid_targets_min": 3295
|
|
},
|
|
{
|
|
"epoch": 4.26013986013986,
|
|
"grad_norm": 0.5504368434307025,
|
|
"learning_rate": 1.597358938980651e-05,
|
|
"loss": 0.2774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1377810835838318,
|
|
"step": 1525,
|
|
"valid_targets_mean": 3906.4,
|
|
"valid_targets_min": 3145
|
|
},
|
|
{
|
|
"epoch": 4.2741258741258745,
|
|
"grad_norm": 0.5735624376122325,
|
|
"learning_rate": 1.583722388437317e-05,
|
|
"loss": 0.2782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14436104893684387,
|
|
"step": 1530,
|
|
"valid_targets_mean": 3985.4,
|
|
"valid_targets_min": 3227
|
|
},
|
|
{
|
|
"epoch": 4.288111888111888,
|
|
"grad_norm": 0.5295048011764109,
|
|
"learning_rate": 1.570106036805169e-05,
|
|
"loss": 0.2732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12768390774726868,
|
|
"step": 1535,
|
|
"valid_targets_mean": 3741.1,
|
|
"valid_targets_min": 3090
|
|
},
|
|
{
|
|
"epoch": 4.302097902097902,
|
|
"grad_norm": 0.5946384554538933,
|
|
"learning_rate": 1.5565105447862716e-05,
|
|
"loss": 0.2835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15328478813171387,
|
|
"step": 1540,
|
|
"valid_targets_mean": 4330.8,
|
|
"valid_targets_min": 3690
|
|
},
|
|
{
|
|
"epoch": 4.316083916083916,
|
|
"grad_norm": 0.5527044105773028,
|
|
"learning_rate": 1.5429365720705247e-05,
|
|
"loss": 0.2794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14041993021965027,
|
|
"step": 1545,
|
|
"valid_targets_mean": 4291.0,
|
|
"valid_targets_min": 3663
|
|
},
|
|
{
|
|
"epoch": 4.33006993006993,
|
|
"grad_norm": 0.5932036773483532,
|
|
"learning_rate": 1.5293847773036526e-05,
|
|
"loss": 0.2788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14854060113430023,
|
|
"step": 1550,
|
|
"valid_targets_mean": 4029.6,
|
|
"valid_targets_min": 3392
|
|
},
|
|
{
|
|
"epoch": 4.344055944055944,
|
|
"grad_norm": 0.5756488887636685,
|
|
"learning_rate": 1.5158558180552467e-05,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14178456366062164,
|
|
"step": 1555,
|
|
"valid_targets_mean": 3695.5,
|
|
"valid_targets_min": 2346
|
|
},
|
|
{
|
|
"epoch": 4.358041958041958,
|
|
"grad_norm": 0.5551445420921324,
|
|
"learning_rate": 1.5023503507868586e-05,
|
|
"loss": 0.2802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11629106104373932,
|
|
"step": 1560,
|
|
"valid_targets_mean": 3662.4,
|
|
"valid_targets_min": 2917
|
|
},
|
|
{
|
|
"epoch": 4.372027972027972,
|
|
"grad_norm": 0.5551336199610265,
|
|
"learning_rate": 1.4888690308201442e-05,
|
|
"loss": 0.2817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1290501058101654,
|
|
"step": 1565,
|
|
"valid_targets_mean": 3783.2,
|
|
"valid_targets_min": 3226
|
|
},
|
|
{
|
|
"epoch": 4.386013986013986,
|
|
"grad_norm": 0.5403841543747977,
|
|
"learning_rate": 1.4754125123050668e-05,
|
|
"loss": 0.2805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13973750174045563,
|
|
"step": 1570,
|
|
"valid_targets_mean": 4077.0,
|
|
"valid_targets_min": 2524
|
|
},
|
|
{
|
|
"epoch": 4.4,
|
|
"grad_norm": 0.5332210520239432,
|
|
"learning_rate": 1.4619814481881582e-05,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16121509671211243,
|
|
"step": 1575,
|
|
"valid_targets_mean": 4338.4,
|
|
"valid_targets_min": 3256
|
|
},
|
|
{
|
|
"epoch": 4.413986013986014,
|
|
"grad_norm": 0.5722141726370856,
|
|
"learning_rate": 1.4485764901808328e-05,
|
|
"loss": 0.2703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13942039012908936,
|
|
"step": 1580,
|
|
"valid_targets_mean": 3827.6,
|
|
"valid_targets_min": 2615
|
|
},
|
|
{
|
|
"epoch": 4.427972027972028,
|
|
"grad_norm": 0.5578695901658584,
|
|
"learning_rate": 1.435198288727766e-05,
|
|
"loss": 0.2743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14053496718406677,
|
|
"step": 1585,
|
|
"valid_targets_mean": 4390.1,
|
|
"valid_targets_min": 3765
|
|
},
|
|
{
|
|
"epoch": 4.441958041958042,
|
|
"grad_norm": 0.5694976114609359,
|
|
"learning_rate": 1.4218474929753358e-05,
|
|
"loss": 0.2768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13226018846035004,
|
|
"step": 1590,
|
|
"valid_targets_mean": 4177.5,
|
|
"valid_targets_min": 3413
|
|
},
|
|
{
|
|
"epoch": 4.455944055944056,
|
|
"grad_norm": 0.5771053577968344,
|
|
"learning_rate": 1.4085247507401188e-05,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16159263253211975,
|
|
"step": 1595,
|
|
"valid_targets_mean": 4184.8,
|
|
"valid_targets_min": 3045
|
|
},
|
|
{
|
|
"epoch": 4.46993006993007,
|
|
"grad_norm": 0.5281811897589362,
|
|
"learning_rate": 1.3952307084774599e-05,
|
|
"loss": 0.2738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1455651968717575,
|
|
"step": 1600,
|
|
"valid_targets_mean": 3982.0,
|
|
"valid_targets_min": 2879
|
|
},
|
|
{
|
|
"epoch": 4.483916083916084,
|
|
"grad_norm": 0.5470105044154708,
|
|
"learning_rate": 1.3819660112501054e-05,
|
|
"loss": 0.2785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1340590864419937,
|
|
"step": 1605,
|
|
"valid_targets_mean": 3864.2,
|
|
"valid_targets_min": 2878
|
|
},
|
|
{
|
|
"epoch": 4.497902097902098,
|
|
"grad_norm": 0.5516752626597579,
|
|
"learning_rate": 1.3687313026969003e-05,
|
|
"loss": 0.2811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1467994898557663,
|
|
"step": 1610,
|
|
"valid_targets_mean": 4225.1,
|
|
"valid_targets_min": 3888
|
|
},
|
|
{
|
|
"epoch": 4.511888111888112,
|
|
"grad_norm": 0.5312163411468419,
|
|
"learning_rate": 1.3555272250015575e-05,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12923194468021393,
|
|
"step": 1615,
|
|
"valid_targets_mean": 3883.1,
|
|
"valid_targets_min": 3304
|
|
},
|
|
{
|
|
"epoch": 4.525874125874126,
|
|
"grad_norm": 0.5771253783673834,
|
|
"learning_rate": 1.342354418861501e-05,
|
|
"loss": 0.2898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13594885170459747,
|
|
"step": 1620,
|
|
"valid_targets_mean": 3983.5,
|
|
"valid_targets_min": 3446
|
|
},
|
|
{
|
|
"epoch": 4.5398601398601395,
|
|
"grad_norm": 0.5781191215370589,
|
|
"learning_rate": 1.329213523456772e-05,
|
|
"loss": 0.2807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15987467765808105,
|
|
"step": 1625,
|
|
"valid_targets_mean": 4329.5,
|
|
"valid_targets_min": 3487
|
|
},
|
|
{
|
|
"epoch": 4.553846153846154,
|
|
"grad_norm": 0.529526946498919,
|
|
"learning_rate": 1.316105176419018e-05,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13953280448913574,
|
|
"step": 1630,
|
|
"valid_targets_mean": 4159.1,
|
|
"valid_targets_min": 3214
|
|
},
|
|
{
|
|
"epoch": 4.567832167832168,
|
|
"grad_norm": 0.5921815456958419,
|
|
"learning_rate": 1.3030300138005516e-05,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14543133974075317,
|
|
"step": 1635,
|
|
"valid_targets_mean": 4053.8,
|
|
"valid_targets_min": 3316
|
|
},
|
|
{
|
|
"epoch": 4.581818181818182,
|
|
"grad_norm": 0.5523405721602177,
|
|
"learning_rate": 1.2899886700434885e-05,
|
|
"loss": 0.2783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13520309329032898,
|
|
"step": 1640,
|
|
"valid_targets_mean": 4057.2,
|
|
"valid_targets_min": 2859
|
|
},
|
|
{
|
|
"epoch": 4.595804195804196,
|
|
"grad_norm": 0.566729641970418,
|
|
"learning_rate": 1.2769817779489606e-05,
|
|
"loss": 0.2793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1414514183998108,
|
|
"step": 1645,
|
|
"valid_targets_mean": 3926.2,
|
|
"valid_targets_min": 2998
|
|
},
|
|
{
|
|
"epoch": 4.6097902097902095,
|
|
"grad_norm": 0.5298872696100833,
|
|
"learning_rate": 1.2640099686464157e-05,
|
|
"loss": 0.2834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13758346438407898,
|
|
"step": 1650,
|
|
"valid_targets_mean": 4043.6,
|
|
"valid_targets_min": 3057
|
|
},
|
|
{
|
|
"epoch": 4.623776223776224,
|
|
"grad_norm": 0.5428239821038428,
|
|
"learning_rate": 1.2510738715629866e-05,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1377483308315277,
|
|
"step": 1655,
|
|
"valid_targets_mean": 4040.9,
|
|
"valid_targets_min": 3152
|
|
},
|
|
{
|
|
"epoch": 4.637762237762238,
|
|
"grad_norm": 0.5376319363172789,
|
|
"learning_rate": 1.2381741143929547e-05,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13625864684581757,
|
|
"step": 1660,
|
|
"valid_targets_mean": 4123.6,
|
|
"valid_targets_min": 3342
|
|
},
|
|
{
|
|
"epoch": 4.651748251748252,
|
|
"grad_norm": 0.5376602142844261,
|
|
"learning_rate": 1.22531132306729e-05,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1259552240371704,
|
|
"step": 1665,
|
|
"valid_targets_mean": 4075.9,
|
|
"valid_targets_min": 3132
|
|
},
|
|
{
|
|
"epoch": 4.665734265734265,
|
|
"grad_norm": 0.5540515888363535,
|
|
"learning_rate": 1.212486121723281e-05,
|
|
"loss": 0.2704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14987067878246307,
|
|
"step": 1670,
|
|
"valid_targets_mean": 4072.9,
|
|
"valid_targets_min": 3441
|
|
},
|
|
{
|
|
"epoch": 4.6797202797202795,
|
|
"grad_norm": 0.5164474972749298,
|
|
"learning_rate": 1.1996991326742484e-05,
|
|
"loss": 0.2813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14604651927947998,
|
|
"step": 1675,
|
|
"valid_targets_mean": 4272.6,
|
|
"valid_targets_min": 3428
|
|
},
|
|
{
|
|
"epoch": 4.693706293706294,
|
|
"grad_norm": 0.5824465695451619,
|
|
"learning_rate": 1.1869509763793497e-05,
|
|
"loss": 0.2775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14033998548984528,
|
|
"step": 1680,
|
|
"valid_targets_mean": 4183.6,
|
|
"valid_targets_min": 3243
|
|
},
|
|
{
|
|
"epoch": 4.707692307692308,
|
|
"grad_norm": 0.5404895105893867,
|
|
"learning_rate": 1.174242271413473e-05,
|
|
"loss": 0.2708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15117552876472473,
|
|
"step": 1685,
|
|
"valid_targets_mean": 4291.9,
|
|
"valid_targets_min": 3409
|
|
},
|
|
{
|
|
"epoch": 4.721678321678322,
|
|
"grad_norm": 0.5489265801598253,
|
|
"learning_rate": 1.1615736344372203e-05,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1279614418745041,
|
|
"step": 1690,
|
|
"valid_targets_mean": 3617.1,
|
|
"valid_targets_min": 3062
|
|
},
|
|
{
|
|
"epoch": 4.735664335664335,
|
|
"grad_norm": 0.5308545623191452,
|
|
"learning_rate": 1.148945680166989e-05,
|
|
"loss": 0.2704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1290333867073059,
|
|
"step": 1695,
|
|
"valid_targets_mean": 3886.9,
|
|
"valid_targets_min": 2923
|
|
},
|
|
{
|
|
"epoch": 4.7496503496503495,
|
|
"grad_norm": 0.5503407817873848,
|
|
"learning_rate": 1.136359021345139e-05,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13935747742652893,
|
|
"step": 1700,
|
|
"valid_targets_mean": 4160.4,
|
|
"valid_targets_min": 3091
|
|
},
|
|
{
|
|
"epoch": 4.763636363636364,
|
|
"grad_norm": 0.5674354574812547,
|
|
"learning_rate": 1.123814268710267e-05,
|
|
"loss": 0.2822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.154280424118042,
|
|
"step": 1705,
|
|
"valid_targets_mean": 4431.9,
|
|
"valid_targets_min": 3089
|
|
},
|
|
{
|
|
"epoch": 4.777622377622378,
|
|
"grad_norm": 0.5078199113419997,
|
|
"learning_rate": 1.1113120309675645e-05,
|
|
"loss": 0.2754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12909035384655,
|
|
"step": 1710,
|
|
"valid_targets_mean": 4203.4,
|
|
"valid_targets_min": 3700
|
|
},
|
|
{
|
|
"epoch": 4.791608391608392,
|
|
"grad_norm": 0.528464450299718,
|
|
"learning_rate": 1.098852914759292e-05,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12253694981336594,
|
|
"step": 1715,
|
|
"valid_targets_mean": 3791.1,
|
|
"valid_targets_min": 3212
|
|
},
|
|
{
|
|
"epoch": 4.805594405594405,
|
|
"grad_norm": 0.5539863408262431,
|
|
"learning_rate": 1.086437524635331e-05,
|
|
"loss": 0.2748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1329549103975296,
|
|
"step": 1720,
|
|
"valid_targets_mean": 4068.2,
|
|
"valid_targets_min": 2964
|
|
},
|
|
{
|
|
"epoch": 4.8195804195804195,
|
|
"grad_norm": 0.564749576404987,
|
|
"learning_rate": 1.0740664630238592e-05,
|
|
"loss": 0.2689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12435588240623474,
|
|
"step": 1725,
|
|
"valid_targets_mean": 3838.9,
|
|
"valid_targets_min": 2756
|
|
},
|
|
{
|
|
"epoch": 4.833566433566434,
|
|
"grad_norm": 0.5470450188336874,
|
|
"learning_rate": 1.0617403302021128e-05,
|
|
"loss": 0.2758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14149603247642517,
|
|
"step": 1730,
|
|
"valid_targets_mean": 4215.6,
|
|
"valid_targets_min": 3468
|
|
},
|
|
{
|
|
"epoch": 4.847552447552448,
|
|
"grad_norm": 0.5565904255417349,
|
|
"learning_rate": 1.0494597242672647e-05,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13995948433876038,
|
|
"step": 1735,
|
|
"valid_targets_mean": 4206.4,
|
|
"valid_targets_min": 3317
|
|
},
|
|
{
|
|
"epoch": 4.861538461538462,
|
|
"grad_norm": 0.6516067057690171,
|
|
"learning_rate": 1.037225241107399e-05,
|
|
"loss": 0.2772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13070757687091827,
|
|
"step": 1740,
|
|
"valid_targets_mean": 3938.9,
|
|
"valid_targets_min": 2717
|
|
},
|
|
{
|
|
"epoch": 4.875524475524475,
|
|
"grad_norm": 0.5643871855173305,
|
|
"learning_rate": 1.025037474372599e-05,
|
|
"loss": 0.2817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15128248929977417,
|
|
"step": 1745,
|
|
"valid_targets_mean": 4264.1,
|
|
"valid_targets_min": 3654
|
|
},
|
|
{
|
|
"epoch": 4.8895104895104895,
|
|
"grad_norm": 0.5377462969057915,
|
|
"learning_rate": 1.0128970154461424e-05,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14507511258125305,
|
|
"step": 1750,
|
|
"valid_targets_mean": 4460.8,
|
|
"valid_targets_min": 3433
|
|
},
|
|
{
|
|
"epoch": 4.903496503496504,
|
|
"grad_norm": 0.6056957063528053,
|
|
"learning_rate": 1.000804453415801e-05,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1173890233039856,
|
|
"step": 1755,
|
|
"valid_targets_mean": 3437.2,
|
|
"valid_targets_min": 2078
|
|
},
|
|
{
|
|
"epoch": 4.917482517482518,
|
|
"grad_norm": 0.5482919546590974,
|
|
"learning_rate": 9.887603750452646e-06,
|
|
"loss": 0.2756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13681714236736298,
|
|
"step": 1760,
|
|
"valid_targets_mean": 3923.8,
|
|
"valid_targets_min": 3110
|
|
},
|
|
{
|
|
"epoch": 4.931468531468531,
|
|
"grad_norm": 0.5724368546420713,
|
|
"learning_rate": 9.767653647456614e-06,
|
|
"loss": 0.2823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14031797647476196,
|
|
"step": 1765,
|
|
"valid_targets_mean": 3936.8,
|
|
"valid_targets_min": 3029
|
|
},
|
|
{
|
|
"epoch": 4.945454545454545,
|
|
"grad_norm": 0.5162764043379312,
|
|
"learning_rate": 9.648200045472071e-06,
|
|
"loss": 0.2825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13154445588588715,
|
|
"step": 1770,
|
|
"valid_targets_mean": 4408.4,
|
|
"valid_targets_min": 3414
|
|
},
|
|
{
|
|
"epoch": 4.9594405594405595,
|
|
"grad_norm": 0.5574455444195064,
|
|
"learning_rate": 9.5292487407096e-06,
|
|
"loss": 0.2774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1369532346725464,
|
|
"step": 1775,
|
|
"valid_targets_mean": 3946.4,
|
|
"valid_targets_min": 3012
|
|
},
|
|
{
|
|
"epoch": 4.973426573426574,
|
|
"grad_norm": 0.5516667838189067,
|
|
"learning_rate": 9.410805505006974e-06,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14430870115756989,
|
|
"step": 1780,
|
|
"valid_targets_mean": 3940.4,
|
|
"valid_targets_min": 3259
|
|
},
|
|
{
|
|
"epoch": 4.987412587412587,
|
|
"grad_norm": 0.5624377252296943,
|
|
"learning_rate": 9.29287608554907e-06,
|
|
"loss": 0.2825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13863155245780945,
|
|
"step": 1785,
|
|
"valid_targets_mean": 3757.0,
|
|
"valid_targets_min": 3237
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.7795219641149852,
|
|
"learning_rate": 9.175466204589039e-06,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2640479803085327,
|
|
"step": 1790,
|
|
"valid_targets_mean": 3898.2,
|
|
"valid_targets_min": 3124
|
|
},
|
|
{
|
|
"epoch": 5.013986013986014,
|
|
"grad_norm": 0.5219102843986503,
|
|
"learning_rate": 9.0585815591706e-06,
|
|
"loss": 0.271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12575533986091614,
|
|
"step": 1795,
|
|
"valid_targets_mean": 4108.4,
|
|
"valid_targets_min": 3131
|
|
},
|
|
{
|
|
"epoch": 5.027972027972028,
|
|
"grad_norm": 0.5652738851475124,
|
|
"learning_rate": 8.942227820851653e-06,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1312795877456665,
|
|
"step": 1800,
|
|
"valid_targets_mean": 4253.5,
|
|
"valid_targets_min": 3575
|
|
},
|
|
{
|
|
"epoch": 5.041958041958042,
|
|
"grad_norm": 0.5488102769899961,
|
|
"learning_rate": 8.82641063542904e-06,
|
|
"loss": 0.2672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13220839202404022,
|
|
"step": 1805,
|
|
"valid_targets_mean": 3746.8,
|
|
"valid_targets_min": 2958
|
|
},
|
|
{
|
|
"epoch": 5.055944055944056,
|
|
"grad_norm": 0.5326044748082528,
|
|
"learning_rate": 8.711135622664622e-06,
|
|
"loss": 0.2677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13870134949684143,
|
|
"step": 1810,
|
|
"valid_targets_mean": 4063.0,
|
|
"valid_targets_min": 2738
|
|
},
|
|
{
|
|
"epoch": 5.06993006993007,
|
|
"grad_norm": 0.5286347807264584,
|
|
"learning_rate": 8.596408376012562e-06,
|
|
"loss": 0.2678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14026141166687012,
|
|
"step": 1815,
|
|
"valid_targets_mean": 4341.8,
|
|
"valid_targets_min": 3070
|
|
},
|
|
{
|
|
"epoch": 5.083916083916084,
|
|
"grad_norm": 0.5694766098358943,
|
|
"learning_rate": 8.482234462347955e-06,
|
|
"loss": 0.2629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1287498027086258,
|
|
"step": 1820,
|
|
"valid_targets_mean": 3671.8,
|
|
"valid_targets_min": 2751
|
|
},
|
|
{
|
|
"epoch": 5.0979020979020975,
|
|
"grad_norm": 0.5253387879455069,
|
|
"learning_rate": 8.368619421696693e-06,
|
|
"loss": 0.2686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13443228602409363,
|
|
"step": 1825,
|
|
"valid_targets_mean": 3833.9,
|
|
"valid_targets_min": 2874
|
|
},
|
|
{
|
|
"epoch": 5.111888111888112,
|
|
"grad_norm": 0.5620562693740041,
|
|
"learning_rate": 8.255568766966613e-06,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14100167155265808,
|
|
"step": 1830,
|
|
"valid_targets_mean": 4265.2,
|
|
"valid_targets_min": 3186
|
|
},
|
|
{
|
|
"epoch": 5.125874125874126,
|
|
"grad_norm": 0.5796942165125105,
|
|
"learning_rate": 8.143087983680061e-06,
|
|
"loss": 0.2705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12822787463665009,
|
|
"step": 1835,
|
|
"valid_targets_mean": 4018.0,
|
|
"valid_targets_min": 2579
|
|
},
|
|
{
|
|
"epoch": 5.13986013986014,
|
|
"grad_norm": 0.5690161371950347,
|
|
"learning_rate": 8.031182529707664e-06,
|
|
"loss": 0.2716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1354401558637619,
|
|
"step": 1840,
|
|
"valid_targets_mean": 4168.8,
|
|
"valid_targets_min": 3519
|
|
},
|
|
{
|
|
"epoch": 5.153846153846154,
|
|
"grad_norm": 0.5629144093332551,
|
|
"learning_rate": 7.919857835003537e-06,
|
|
"loss": 0.2711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15186838805675507,
|
|
"step": 1845,
|
|
"valid_targets_mean": 4165.1,
|
|
"valid_targets_min": 3368
|
|
},
|
|
{
|
|
"epoch": 5.1678321678321675,
|
|
"grad_norm": 0.563743984339271,
|
|
"learning_rate": 7.80911930134177e-06,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14788247644901276,
|
|
"step": 1850,
|
|
"valid_targets_mean": 4042.1,
|
|
"valid_targets_min": 3037
|
|
},
|
|
{
|
|
"epoch": 5.181818181818182,
|
|
"grad_norm": 0.5709261686227755,
|
|
"learning_rate": 7.698972302054363e-06,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12226757407188416,
|
|
"step": 1855,
|
|
"valid_targets_mean": 3660.8,
|
|
"valid_targets_min": 2672
|
|
},
|
|
{
|
|
"epoch": 5.195804195804196,
|
|
"grad_norm": 0.5230511431966958,
|
|
"learning_rate": 7.589422181770445e-06,
|
|
"loss": 0.2733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1555660367012024,
|
|
"step": 1860,
|
|
"valid_targets_mean": 4756.2,
|
|
"valid_targets_min": 3550
|
|
},
|
|
{
|
|
"epoch": 5.20979020979021,
|
|
"grad_norm": 0.5355198645927597,
|
|
"learning_rate": 7.480474256157009e-06,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13351857662200928,
|
|
"step": 1865,
|
|
"valid_targets_mean": 4117.5,
|
|
"valid_targets_min": 3253
|
|
},
|
|
{
|
|
"epoch": 5.223776223776224,
|
|
"grad_norm": 0.5608540652136996,
|
|
"learning_rate": 7.3721338116609e-06,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11535479128360748,
|
|
"step": 1870,
|
|
"valid_targets_mean": 3693.8,
|
|
"valid_targets_min": 3153
|
|
},
|
|
{
|
|
"epoch": 5.2377622377622375,
|
|
"grad_norm": 0.5154018340638021,
|
|
"learning_rate": 7.264406105252371e-06,
|
|
"loss": 0.2665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13983562588691711,
|
|
"step": 1875,
|
|
"valid_targets_mean": 4123.9,
|
|
"valid_targets_min": 3040
|
|
},
|
|
{
|
|
"epoch": 5.251748251748252,
|
|
"grad_norm": 0.5467856713482777,
|
|
"learning_rate": 7.15729636416995e-06,
|
|
"loss": 0.2632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1238892674446106,
|
|
"step": 1880,
|
|
"valid_targets_mean": 3791.5,
|
|
"valid_targets_min": 2608
|
|
},
|
|
{
|
|
"epoch": 5.265734265734266,
|
|
"grad_norm": 0.54842359149951,
|
|
"learning_rate": 7.050809785666843e-06,
|
|
"loss": 0.2653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13005605340003967,
|
|
"step": 1885,
|
|
"valid_targets_mean": 4164.2,
|
|
"valid_targets_min": 3396
|
|
},
|
|
{
|
|
"epoch": 5.27972027972028,
|
|
"grad_norm": 0.545252910180718,
|
|
"learning_rate": 6.944951536758704e-06,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13909177482128143,
|
|
"step": 1890,
|
|
"valid_targets_mean": 4063.8,
|
|
"valid_targets_min": 2827
|
|
},
|
|
{
|
|
"epoch": 5.293706293706293,
|
|
"grad_norm": 0.5397346270972334,
|
|
"learning_rate": 6.83972675397298e-06,
|
|
"loss": 0.2772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13181471824645996,
|
|
"step": 1895,
|
|
"valid_targets_mean": 3866.4,
|
|
"valid_targets_min": 2803
|
|
},
|
|
{
|
|
"epoch": 5.3076923076923075,
|
|
"grad_norm": 0.5548237621459055,
|
|
"learning_rate": 6.7351405430995945e-06,
|
|
"loss": 0.2692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13173869252204895,
|
|
"step": 1900,
|
|
"valid_targets_mean": 3641.2,
|
|
"valid_targets_min": 3145
|
|
},
|
|
{
|
|
"epoch": 5.321678321678322,
|
|
"grad_norm": 0.5345974972781379,
|
|
"learning_rate": 6.631197978943273e-06,
|
|
"loss": 0.2717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13525938987731934,
|
|
"step": 1905,
|
|
"valid_targets_mean": 4209.6,
|
|
"valid_targets_min": 3261
|
|
},
|
|
{
|
|
"epoch": 5.335664335664336,
|
|
"grad_norm": 0.5454110327227513,
|
|
"learning_rate": 6.527904105077243e-06,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13628993928432465,
|
|
"step": 1910,
|
|
"valid_targets_mean": 3950.0,
|
|
"valid_targets_min": 3165
|
|
},
|
|
{
|
|
"epoch": 5.34965034965035,
|
|
"grad_norm": 0.5592856850404151,
|
|
"learning_rate": 6.425263933598549e-06,
|
|
"loss": 0.2733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14988678693771362,
|
|
"step": 1915,
|
|
"valid_targets_mean": 4385.8,
|
|
"valid_targets_min": 3143
|
|
},
|
|
{
|
|
"epoch": 5.363636363636363,
|
|
"grad_norm": 0.5382247504745173,
|
|
"learning_rate": 6.323282444884826e-06,
|
|
"loss": 0.2751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12678101658821106,
|
|
"step": 1920,
|
|
"valid_targets_mean": 3971.8,
|
|
"valid_targets_min": 3394
|
|
},
|
|
{
|
|
"epoch": 5.3776223776223775,
|
|
"grad_norm": 0.5598315390711681,
|
|
"learning_rate": 6.221964587352653e-06,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14666910469532013,
|
|
"step": 1925,
|
|
"valid_targets_mean": 4419.0,
|
|
"valid_targets_min": 3410
|
|
},
|
|
{
|
|
"epoch": 5.391608391608392,
|
|
"grad_norm": 0.5706554507694481,
|
|
"learning_rate": 6.121315277217441e-06,
|
|
"loss": 0.2642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12421803176403046,
|
|
"step": 1930,
|
|
"valid_targets_mean": 3574.9,
|
|
"valid_targets_min": 2831
|
|
},
|
|
{
|
|
"epoch": 5.405594405594406,
|
|
"grad_norm": 0.544445641577291,
|
|
"learning_rate": 6.0213393982548555e-06,
|
|
"loss": 0.2729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13498976826667786,
|
|
"step": 1935,
|
|
"valid_targets_mean": 3970.4,
|
|
"valid_targets_min": 2889
|
|
},
|
|
{
|
|
"epoch": 5.41958041958042,
|
|
"grad_norm": 0.5301613281703389,
|
|
"learning_rate": 5.922041801563898e-06,
|
|
"loss": 0.2712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1342143714427948,
|
|
"step": 1940,
|
|
"valid_targets_mean": 4135.8,
|
|
"valid_targets_min": 2934
|
|
},
|
|
{
|
|
"epoch": 5.433566433566433,
|
|
"grad_norm": 0.5854176960773179,
|
|
"learning_rate": 5.823427305331461e-06,
|
|
"loss": 0.2693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1379438191652298,
|
|
"step": 1945,
|
|
"valid_targets_mean": 3838.6,
|
|
"valid_targets_min": 3264
|
|
},
|
|
{
|
|
"epoch": 5.4475524475524475,
|
|
"grad_norm": 0.5436336083045578,
|
|
"learning_rate": 5.72550069459858e-06,
|
|
"loss": 0.2733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11156716197729111,
|
|
"step": 1950,
|
|
"valid_targets_mean": 3711.8,
|
|
"valid_targets_min": 2618
|
|
},
|
|
{
|
|
"epoch": 5.461538461538462,
|
|
"grad_norm": 0.5441899128462344,
|
|
"learning_rate": 5.628266721028226e-06,
|
|
"loss": 0.2718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13238012790679932,
|
|
"step": 1955,
|
|
"valid_targets_mean": 4007.5,
|
|
"valid_targets_min": 2541
|
|
},
|
|
{
|
|
"epoch": 5.475524475524476,
|
|
"grad_norm": 0.5509661923488149,
|
|
"learning_rate": 5.5317301026747575e-06,
|
|
"loss": 0.2701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.122523233294487,
|
|
"step": 1960,
|
|
"valid_targets_mean": 3560.5,
|
|
"valid_targets_min": 2805
|
|
},
|
|
{
|
|
"epoch": 5.489510489510489,
|
|
"grad_norm": 0.5387065769733951,
|
|
"learning_rate": 5.435895523754957e-06,
|
|
"loss": 0.2714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11541329324245453,
|
|
"step": 1965,
|
|
"valid_targets_mean": 3595.0,
|
|
"valid_targets_min": 2677
|
|
},
|
|
{
|
|
"epoch": 5.503496503496503,
|
|
"grad_norm": 0.5689105133041431,
|
|
"learning_rate": 5.340767634420794e-06,
|
|
"loss": 0.2701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14338526129722595,
|
|
"step": 1970,
|
|
"valid_targets_mean": 4151.4,
|
|
"valid_targets_min": 3121
|
|
},
|
|
{
|
|
"epoch": 5.5174825174825175,
|
|
"grad_norm": 0.5512275132765089,
|
|
"learning_rate": 5.24635105053372e-06,
|
|
"loss": 0.2721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12350888550281525,
|
|
"step": 1975,
|
|
"valid_targets_mean": 3481.2,
|
|
"valid_targets_min": 1555
|
|
},
|
|
{
|
|
"epoch": 5.531468531468532,
|
|
"grad_norm": 0.5431318474984762,
|
|
"learning_rate": 5.15265035344076e-06,
|
|
"loss": 0.2709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12556716799736023,
|
|
"step": 1980,
|
|
"valid_targets_mean": 4025.4,
|
|
"valid_targets_min": 3229
|
|
},
|
|
{
|
|
"epoch": 5.545454545454545,
|
|
"grad_norm": 0.5234687553570591,
|
|
"learning_rate": 5.059670089752166e-06,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13217531144618988,
|
|
"step": 1985,
|
|
"valid_targets_mean": 4185.8,
|
|
"valid_targets_min": 3466
|
|
},
|
|
{
|
|
"epoch": 5.559440559440559,
|
|
"grad_norm": 0.5398701726946824,
|
|
"learning_rate": 4.967414771120837e-06,
|
|
"loss": 0.2746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12115156650543213,
|
|
"step": 1990,
|
|
"valid_targets_mean": 3713.0,
|
|
"valid_targets_min": 3033
|
|
},
|
|
{
|
|
"epoch": 5.573426573426573,
|
|
"grad_norm": 0.5233699630630368,
|
|
"learning_rate": 4.875888874023358e-06,
|
|
"loss": 0.2719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13094103336334229,
|
|
"step": 1995,
|
|
"valid_targets_mean": 3981.8,
|
|
"valid_targets_min": 2770
|
|
},
|
|
{
|
|
"epoch": 5.5874125874125875,
|
|
"grad_norm": 0.5586978374619914,
|
|
"learning_rate": 4.78509683954284e-06,
|
|
"loss": 0.2802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12762457132339478,
|
|
"step": 2000,
|
|
"valid_targets_mean": 3771.9,
|
|
"valid_targets_min": 2910
|
|
},
|
|
{
|
|
"epoch": 5.601398601398602,
|
|
"grad_norm": 0.5548139517512097,
|
|
"learning_rate": 4.695043073153398e-06,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12287722527980804,
|
|
"step": 2005,
|
|
"valid_targets_mean": 3610.1,
|
|
"valid_targets_min": 2849
|
|
},
|
|
{
|
|
"epoch": 5.615384615384615,
|
|
"grad_norm": 0.5591205721510735,
|
|
"learning_rate": 4.605731944506377e-06,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14226460456848145,
|
|
"step": 2010,
|
|
"valid_targets_mean": 4057.4,
|
|
"valid_targets_min": 3171
|
|
},
|
|
{
|
|
"epoch": 5.629370629370629,
|
|
"grad_norm": 0.5834660820002847,
|
|
"learning_rate": 4.5171677872183506e-06,
|
|
"loss": 0.266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12792709469795227,
|
|
"step": 2015,
|
|
"valid_targets_mean": 3787.1,
|
|
"valid_targets_min": 3070
|
|
},
|
|
{
|
|
"epoch": 5.643356643356643,
|
|
"grad_norm": 0.5665120389034493,
|
|
"learning_rate": 4.429354898660829e-06,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12466013431549072,
|
|
"step": 2020,
|
|
"valid_targets_mean": 3776.8,
|
|
"valid_targets_min": 2837
|
|
},
|
|
{
|
|
"epoch": 5.6573426573426575,
|
|
"grad_norm": 0.5403150986710322,
|
|
"learning_rate": 4.3422975397517455e-06,
|
|
"loss": 0.2601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12709736824035645,
|
|
"step": 2025,
|
|
"valid_targets_mean": 3963.9,
|
|
"valid_targets_min": 2884
|
|
},
|
|
{
|
|
"epoch": 5.671328671328672,
|
|
"grad_norm": 0.566316186869702,
|
|
"learning_rate": 4.255999934748673e-06,
|
|
"loss": 0.2729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13081523776054382,
|
|
"step": 2030,
|
|
"valid_targets_mean": 3995.1,
|
|
"valid_targets_min": 2928
|
|
},
|
|
{
|
|
"epoch": 5.685314685314685,
|
|
"grad_norm": 0.5427526585310279,
|
|
"learning_rate": 4.1704662710439156e-06,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1348644345998764,
|
|
"step": 2035,
|
|
"valid_targets_mean": 3812.4,
|
|
"valid_targets_min": 2977
|
|
},
|
|
{
|
|
"epoch": 5.699300699300699,
|
|
"grad_norm": 0.543804002042524,
|
|
"learning_rate": 4.085700698961252e-06,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1415868103504181,
|
|
"step": 2040,
|
|
"valid_targets_mean": 4173.8,
|
|
"valid_targets_min": 3361
|
|
},
|
|
{
|
|
"epoch": 5.713286713286713,
|
|
"grad_norm": 0.5598561184404056,
|
|
"learning_rate": 4.00170733155461e-06,
|
|
"loss": 0.2662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14282873272895813,
|
|
"step": 2045,
|
|
"valid_targets_mean": 4030.9,
|
|
"valid_targets_min": 3101
|
|
},
|
|
{
|
|
"epoch": 5.7272727272727275,
|
|
"grad_norm": 0.5358578632226371,
|
|
"learning_rate": 3.9184902444084575e-06,
|
|
"loss": 0.2729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16486287117004395,
|
|
"step": 2050,
|
|
"valid_targets_mean": 4656.9,
|
|
"valid_targets_min": 3610
|
|
},
|
|
{
|
|
"epoch": 5.741258741258742,
|
|
"grad_norm": 0.5307718718193138,
|
|
"learning_rate": 3.836053475440058e-06,
|
|
"loss": 0.2702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1442933827638626,
|
|
"step": 2055,
|
|
"valid_targets_mean": 4207.2,
|
|
"valid_targets_min": 3318
|
|
},
|
|
{
|
|
"epoch": 5.755244755244755,
|
|
"grad_norm": 0.5732719806023284,
|
|
"learning_rate": 3.7544010247035247e-06,
|
|
"loss": 0.2698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12907767295837402,
|
|
"step": 2060,
|
|
"valid_targets_mean": 3727.5,
|
|
"valid_targets_min": 3049
|
|
},
|
|
{
|
|
"epoch": 5.769230769230769,
|
|
"grad_norm": 0.5276593358765387,
|
|
"learning_rate": 3.6735368541957494e-06,
|
|
"loss": 0.2675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12089847773313522,
|
|
"step": 2065,
|
|
"valid_targets_mean": 3708.5,
|
|
"valid_targets_min": 1757
|
|
},
|
|
{
|
|
"epoch": 5.783216783216783,
|
|
"grad_norm": 0.5519567254529144,
|
|
"learning_rate": 3.5934648876641287e-06,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11965890228748322,
|
|
"step": 2070,
|
|
"valid_targets_mean": 3717.1,
|
|
"valid_targets_min": 2343
|
|
},
|
|
{
|
|
"epoch": 5.7972027972027975,
|
|
"grad_norm": 0.5322446259091591,
|
|
"learning_rate": 3.5141890104162e-06,
|
|
"loss": 0.2717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12779703736305237,
|
|
"step": 2075,
|
|
"valid_targets_mean": 4028.6,
|
|
"valid_targets_min": 3267
|
|
},
|
|
{
|
|
"epoch": 5.811188811188811,
|
|
"grad_norm": 0.5334986962890785,
|
|
"learning_rate": 3.4357130691311057e-06,
|
|
"loss": 0.2792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13747793436050415,
|
|
"step": 2080,
|
|
"valid_targets_mean": 4066.5,
|
|
"valid_targets_min": 3061
|
|
},
|
|
{
|
|
"epoch": 5.825174825174825,
|
|
"grad_norm": 0.5476187220706545,
|
|
"learning_rate": 3.3580408716729342e-06,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13098829984664917,
|
|
"step": 2085,
|
|
"valid_targets_mean": 4036.9,
|
|
"valid_targets_min": 3111
|
|
},
|
|
{
|
|
"epoch": 5.839160839160839,
|
|
"grad_norm": 0.5266622418784924,
|
|
"learning_rate": 3.2811761869059524e-06,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13867492973804474,
|
|
"step": 2090,
|
|
"valid_targets_mean": 4135.0,
|
|
"valid_targets_min": 3449
|
|
},
|
|
{
|
|
"epoch": 5.853146853146853,
|
|
"grad_norm": 0.5437713900346135,
|
|
"learning_rate": 3.205122744511746e-06,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11924904584884644,
|
|
"step": 2095,
|
|
"valid_targets_mean": 3806.0,
|
|
"valid_targets_min": 2644
|
|
},
|
|
{
|
|
"epoch": 5.867132867132867,
|
|
"grad_norm": 0.5349444010988446,
|
|
"learning_rate": 3.129884234808238e-06,
|
|
"loss": 0.2695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13737419247627258,
|
|
"step": 2100,
|
|
"valid_targets_mean": 3994.4,
|
|
"valid_targets_min": 2756
|
|
},
|
|
{
|
|
"epoch": 5.881118881118881,
|
|
"grad_norm": 0.5553025674903422,
|
|
"learning_rate": 3.0554643085706037e-06,
|
|
"loss": 0.2644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12544646859169006,
|
|
"step": 2105,
|
|
"valid_targets_mean": 4000.6,
|
|
"valid_targets_min": 2457
|
|
},
|
|
{
|
|
"epoch": 5.895104895104895,
|
|
"grad_norm": 0.6817629822110741,
|
|
"learning_rate": 2.981866576854164e-06,
|
|
"loss": 0.2693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12345971167087555,
|
|
"step": 2110,
|
|
"valid_targets_mean": 3586.5,
|
|
"valid_targets_min": 992
|
|
},
|
|
{
|
|
"epoch": 5.909090909090909,
|
|
"grad_norm": 0.5590147260294989,
|
|
"learning_rate": 2.909094610819134e-06,
|
|
"loss": 0.2698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1251392960548401,
|
|
"step": 2115,
|
|
"valid_targets_mean": 3674.2,
|
|
"valid_targets_min": 2820
|
|
},
|
|
{
|
|
"epoch": 5.923076923076923,
|
|
"grad_norm": 0.557366303536245,
|
|
"learning_rate": 2.8371519415573635e-06,
|
|
"loss": 0.2653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13488712906837463,
|
|
"step": 2120,
|
|
"valid_targets_mean": 3840.5,
|
|
"valid_targets_min": 2575
|
|
},
|
|
{
|
|
"epoch": 5.937062937062937,
|
|
"grad_norm": 0.5544828872161118,
|
|
"learning_rate": 2.7660420599209726e-06,
|
|
"loss": 0.2711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14264677464962006,
|
|
"step": 2125,
|
|
"valid_targets_mean": 4114.9,
|
|
"valid_targets_min": 3232
|
|
},
|
|
{
|
|
"epoch": 5.951048951048951,
|
|
"grad_norm": 0.524463252308108,
|
|
"learning_rate": 2.6957684163530017e-06,
|
|
"loss": 0.2733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1365211009979248,
|
|
"step": 2130,
|
|
"valid_targets_mean": 4064.0,
|
|
"valid_targets_min": 3214
|
|
},
|
|
{
|
|
"epoch": 5.965034965034965,
|
|
"grad_norm": 0.5565069001442264,
|
|
"learning_rate": 2.6263344207199446e-06,
|
|
"loss": 0.2681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12422733008861542,
|
|
"step": 2135,
|
|
"valid_targets_mean": 3880.2,
|
|
"valid_targets_min": 3098
|
|
},
|
|
{
|
|
"epoch": 5.979020979020979,
|
|
"grad_norm": 0.5546179099722904,
|
|
"learning_rate": 2.557743442146343e-06,
|
|
"loss": 0.2713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12977087497711182,
|
|
"step": 2140,
|
|
"valid_targets_mean": 3723.1,
|
|
"valid_targets_min": 3051
|
|
},
|
|
{
|
|
"epoch": 5.993006993006993,
|
|
"grad_norm": 0.5418205062024356,
|
|
"learning_rate": 2.489998808851255e-06,
|
|
"loss": 0.2654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12254351377487183,
|
|
"step": 2145,
|
|
"valid_targets_mean": 3896.9,
|
|
"valid_targets_min": 3199
|
|
},
|
|
{
|
|
"epoch": 6.0055944055944055,
|
|
"grad_norm": 0.552957468149782,
|
|
"learning_rate": 2.423103807986802e-06,
|
|
"loss": 0.2756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14880408346652985,
|
|
"step": 2150,
|
|
"valid_targets_mean": 4114.1,
|
|
"valid_targets_min": 3250
|
|
},
|
|
{
|
|
"epoch": 6.01958041958042,
|
|
"grad_norm": 0.5492004448838709,
|
|
"learning_rate": 2.3570616854786364e-06,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14903533458709717,
|
|
"step": 2155,
|
|
"valid_targets_mean": 4255.5,
|
|
"valid_targets_min": 2806
|
|
},
|
|
{
|
|
"epoch": 6.033566433566434,
|
|
"grad_norm": 0.571435073296382,
|
|
"learning_rate": 2.291875645868471e-06,
|
|
"loss": 0.2639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13312679529190063,
|
|
"step": 2160,
|
|
"valid_targets_mean": 3886.4,
|
|
"valid_targets_min": 3200
|
|
},
|
|
{
|
|
"epoch": 6.047552447552447,
|
|
"grad_norm": 0.5374462445034749,
|
|
"learning_rate": 2.227548852158552e-06,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13961821794509888,
|
|
"step": 2165,
|
|
"valid_targets_mean": 4129.4,
|
|
"valid_targets_min": 2926
|
|
},
|
|
{
|
|
"epoch": 6.061538461538461,
|
|
"grad_norm": 0.5438203346611344,
|
|
"learning_rate": 2.1640844256582262e-06,
|
|
"loss": 0.2679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12883234024047852,
|
|
"step": 2170,
|
|
"valid_targets_mean": 4176.5,
|
|
"valid_targets_min": 2854
|
|
},
|
|
{
|
|
"epoch": 6.0755244755244755,
|
|
"grad_norm": 0.5505312743429058,
|
|
"learning_rate": 2.10148544583243e-06,
|
|
"loss": 0.2658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1427292674779892,
|
|
"step": 2175,
|
|
"valid_targets_mean": 4324.2,
|
|
"valid_targets_min": 3172
|
|
},
|
|
{
|
|
"epoch": 6.08951048951049,
|
|
"grad_norm": 0.5667608880027779,
|
|
"learning_rate": 2.039754950152313e-06,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1438840627670288,
|
|
"step": 2180,
|
|
"valid_targets_mean": 4165.2,
|
|
"valid_targets_min": 3731
|
|
},
|
|
{
|
|
"epoch": 6.103496503496504,
|
|
"grad_norm": 0.5400616178402221,
|
|
"learning_rate": 1.978895933947835e-06,
|
|
"loss": 0.2698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11797113716602325,
|
|
"step": 2185,
|
|
"valid_targets_mean": 3583.9,
|
|
"valid_targets_min": 2896
|
|
},
|
|
{
|
|
"epoch": 6.117482517482517,
|
|
"grad_norm": 0.5821801793796898,
|
|
"learning_rate": 1.918911350262411e-06,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1384894847869873,
|
|
"step": 2190,
|
|
"valid_targets_mean": 3854.6,
|
|
"valid_targets_min": 3037
|
|
},
|
|
{
|
|
"epoch": 6.131468531468531,
|
|
"grad_norm": 0.5502620689863138,
|
|
"learning_rate": 1.859804109709651e-06,
|
|
"loss": 0.2668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13089479506015778,
|
|
"step": 2195,
|
|
"valid_targets_mean": 4039.4,
|
|
"valid_targets_min": 3085
|
|
},
|
|
{
|
|
"epoch": 6.1454545454545455,
|
|
"grad_norm": 0.5322934652272718,
|
|
"learning_rate": 1.8015770803320997e-06,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13279101252555847,
|
|
"step": 2200,
|
|
"valid_targets_mean": 4039.9,
|
|
"valid_targets_min": 3100
|
|
},
|
|
{
|
|
"epoch": 6.15944055944056,
|
|
"grad_norm": 0.5632067853325199,
|
|
"learning_rate": 1.744233087462095e-06,
|
|
"loss": 0.2721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13131995499134064,
|
|
"step": 2205,
|
|
"valid_targets_mean": 4003.1,
|
|
"valid_targets_min": 2486
|
|
},
|
|
{
|
|
"epoch": 6.173426573426573,
|
|
"grad_norm": 0.553891728108681,
|
|
"learning_rate": 1.6877749135846521e-06,
|
|
"loss": 0.2651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12857134640216827,
|
|
"step": 2210,
|
|
"valid_targets_mean": 4070.9,
|
|
"valid_targets_min": 3413
|
|
},
|
|
{
|
|
"epoch": 6.187412587412587,
|
|
"grad_norm": 0.5454514353907892,
|
|
"learning_rate": 1.6322052982024739e-06,
|
|
"loss": 0.2686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1367558240890503,
|
|
"step": 2215,
|
|
"valid_targets_mean": 4020.5,
|
|
"valid_targets_min": 3350
|
|
},
|
|
{
|
|
"epoch": 6.201398601398601,
|
|
"grad_norm": 0.5198130923177857,
|
|
"learning_rate": 1.577526937703e-06,
|
|
"loss": 0.2679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13713058829307556,
|
|
"step": 2220,
|
|
"valid_targets_mean": 4349.2,
|
|
"valid_targets_min": 3645
|
|
},
|
|
{
|
|
"epoch": 6.2153846153846155,
|
|
"grad_norm": 0.5571955144273739,
|
|
"learning_rate": 1.5237424852275905e-06,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13345694541931152,
|
|
"step": 2225,
|
|
"valid_targets_mean": 3902.6,
|
|
"valid_targets_min": 3360
|
|
},
|
|
{
|
|
"epoch": 6.22937062937063,
|
|
"grad_norm": 0.5457269649577497,
|
|
"learning_rate": 1.4708545505427796e-06,
|
|
"loss": 0.2715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13142569363117218,
|
|
"step": 2230,
|
|
"valid_targets_mean": 4007.6,
|
|
"valid_targets_min": 2861
|
|
},
|
|
{
|
|
"epoch": 6.243356643356643,
|
|
"grad_norm": 0.5554927561165469,
|
|
"learning_rate": 1.418865699913643e-06,
|
|
"loss": 0.2698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.138395756483078,
|
|
"step": 2235,
|
|
"valid_targets_mean": 4075.2,
|
|
"valid_targets_min": 3224
|
|
},
|
|
{
|
|
"epoch": 6.257342657342657,
|
|
"grad_norm": 0.5404990692630535,
|
|
"learning_rate": 1.3677784559792672e-06,
|
|
"loss": 0.2695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11688215285539627,
|
|
"step": 2240,
|
|
"valid_targets_mean": 3285.1,
|
|
"valid_targets_min": 1616
|
|
},
|
|
{
|
|
"epoch": 6.271328671328671,
|
|
"grad_norm": 0.531430730417053,
|
|
"learning_rate": 1.3175952976303675e-06,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.138889878988266,
|
|
"step": 2245,
|
|
"valid_targets_mean": 4228.5,
|
|
"valid_targets_min": 2868
|
|
},
|
|
{
|
|
"epoch": 6.2853146853146855,
|
|
"grad_norm": 0.5253694275916504,
|
|
"learning_rate": 1.268318659888974e-06,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.129787415266037,
|
|
"step": 2250,
|
|
"valid_targets_mean": 4033.1,
|
|
"valid_targets_min": 3074
|
|
},
|
|
{
|
|
"epoch": 6.2993006993007,
|
|
"grad_norm": 0.5497783467552961,
|
|
"learning_rate": 1.2199509337903103e-06,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13305220007896423,
|
|
"step": 2255,
|
|
"valid_targets_mean": 3959.0,
|
|
"valid_targets_min": 3331
|
|
},
|
|
{
|
|
"epoch": 6.313286713286713,
|
|
"grad_norm": 0.5448391159677953,
|
|
"learning_rate": 1.172494466266747e-06,
|
|
"loss": 0.2733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14321637153625488,
|
|
"step": 2260,
|
|
"valid_targets_mean": 4348.4,
|
|
"valid_targets_min": 3067
|
|
},
|
|
{
|
|
"epoch": 6.327272727272727,
|
|
"grad_norm": 0.5238073262215436,
|
|
"learning_rate": 1.1259515600339465e-06,
|
|
"loss": 0.263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13795027136802673,
|
|
"step": 2265,
|
|
"valid_targets_mean": 4254.9,
|
|
"valid_targets_min": 3826
|
|
},
|
|
{
|
|
"epoch": 6.341258741258741,
|
|
"grad_norm": 0.5784905306811993,
|
|
"learning_rate": 1.0803244734790996e-06,
|
|
"loss": 0.2637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1256517767906189,
|
|
"step": 2270,
|
|
"valid_targets_mean": 4051.8,
|
|
"valid_targets_min": 3172
|
|
},
|
|
{
|
|
"epoch": 6.3552447552447555,
|
|
"grad_norm": 0.5269490201207044,
|
|
"learning_rate": 1.0356154205513724e-06,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13470402359962463,
|
|
"step": 2275,
|
|
"valid_targets_mean": 4274.8,
|
|
"valid_targets_min": 3491
|
|
},
|
|
{
|
|
"epoch": 6.36923076923077,
|
|
"grad_norm": 0.538753432570666,
|
|
"learning_rate": 9.918265706544617e-07,
|
|
"loss": 0.2684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12407108396291733,
|
|
"step": 2280,
|
|
"valid_targets_mean": 3773.9,
|
|
"valid_targets_min": 2717
|
|
},
|
|
{
|
|
"epoch": 6.383216783216783,
|
|
"grad_norm": 0.5368620159533775,
|
|
"learning_rate": 9.489600485413297e-07,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13677144050598145,
|
|
"step": 2285,
|
|
"valid_targets_mean": 4002.6,
|
|
"valid_targets_min": 3199
|
|
},
|
|
{
|
|
"epoch": 6.397202797202797,
|
|
"grad_norm": 0.5426039569577848,
|
|
"learning_rate": 9.070179342111163e-07,
|
|
"loss": 0.2666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13231492042541504,
|
|
"step": 2290,
|
|
"valid_targets_mean": 3965.5,
|
|
"valid_targets_min": 3125
|
|
},
|
|
{
|
|
"epoch": 6.411188811188811,
|
|
"grad_norm": 0.5749418323529093,
|
|
"learning_rate": 8.660022628082033e-07,
|
|
"loss": 0.2745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1334465742111206,
|
|
"step": 2295,
|
|
"valid_targets_mean": 3806.8,
|
|
"valid_targets_min": 2858
|
|
},
|
|
{
|
|
"epoch": 6.4251748251748255,
|
|
"grad_norm": 0.5390735537017816,
|
|
"learning_rate": 8.259150245234671e-07,
|
|
"loss": 0.2642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13433882594108582,
|
|
"step": 2300,
|
|
"valid_targets_mean": 4025.5,
|
|
"valid_targets_min": 3216
|
|
},
|
|
{
|
|
"epoch": 6.439160839160839,
|
|
"grad_norm": 0.5333577613609506,
|
|
"learning_rate": 7.867581644977029e-07,
|
|
"loss": 0.2627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12350418418645859,
|
|
"step": 2305,
|
|
"valid_targets_mean": 3507.1,
|
|
"valid_targets_min": 1980
|
|
},
|
|
{
|
|
"epoch": 6.453146853146853,
|
|
"grad_norm": 0.5202092545268794,
|
|
"learning_rate": 7.485335827272555e-07,
|
|
"loss": 0.2615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12309978902339935,
|
|
"step": 2310,
|
|
"valid_targets_mean": 3788.5,
|
|
"valid_targets_min": 3092
|
|
},
|
|
{
|
|
"epoch": 6.467132867132867,
|
|
"grad_norm": 0.5503621055173982,
|
|
"learning_rate": 7.11243133971804e-07,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11553049087524414,
|
|
"step": 2315,
|
|
"valid_targets_mean": 3731.9,
|
|
"valid_targets_min": 3035
|
|
},
|
|
{
|
|
"epoch": 6.481118881118881,
|
|
"grad_norm": 0.5575585617975486,
|
|
"learning_rate": 6.748886276643874e-07,
|
|
"loss": 0.2592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14074815809726715,
|
|
"step": 2320,
|
|
"valid_targets_mean": 3874.0,
|
|
"valid_targets_min": 2985
|
|
},
|
|
{
|
|
"epoch": 6.495104895104895,
|
|
"grad_norm": 0.5620344157367718,
|
|
"learning_rate": 6.394718278235923e-07,
|
|
"loss": 0.2678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13184072077274323,
|
|
"step": 2325,
|
|
"valid_targets_mean": 4030.6,
|
|
"valid_targets_min": 3225
|
|
},
|
|
{
|
|
"epoch": 6.509090909090909,
|
|
"grad_norm": 0.5346237968435054,
|
|
"learning_rate": 6.049944529679641e-07,
|
|
"loss": 0.2651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.130045086145401,
|
|
"step": 2330,
|
|
"valid_targets_mean": 4099.5,
|
|
"valid_targets_min": 3545
|
|
},
|
|
{
|
|
"epoch": 6.523076923076923,
|
|
"grad_norm": 0.5410265786955848,
|
|
"learning_rate": 5.714581760326133e-07,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14165914058685303,
|
|
"step": 2335,
|
|
"valid_targets_mean": 4237.1,
|
|
"valid_targets_min": 3637
|
|
},
|
|
{
|
|
"epoch": 6.537062937062937,
|
|
"grad_norm": 0.5346944294313545,
|
|
"learning_rate": 5.388646242880446e-07,
|
|
"loss": 0.2636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14222806692123413,
|
|
"step": 2340,
|
|
"valid_targets_mean": 4174.2,
|
|
"valid_targets_min": 3276
|
|
},
|
|
{
|
|
"epoch": 6.551048951048951,
|
|
"grad_norm": 0.5386590991971009,
|
|
"learning_rate": 5.072153792611967e-07,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.147568941116333,
|
|
"step": 2345,
|
|
"valid_targets_mean": 4274.8,
|
|
"valid_targets_min": 2955
|
|
},
|
|
{
|
|
"epoch": 6.565034965034965,
|
|
"grad_norm": 0.5331131790767714,
|
|
"learning_rate": 4.765119766587023e-07,
|
|
"loss": 0.2662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13249267637729645,
|
|
"step": 2350,
|
|
"valid_targets_mean": 3939.5,
|
|
"valid_targets_min": 3084
|
|
},
|
|
{
|
|
"epoch": 6.579020979020979,
|
|
"grad_norm": 0.5390748002008539,
|
|
"learning_rate": 4.4675590629237543e-07,
|
|
"loss": 0.2715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14052125811576843,
|
|
"step": 2355,
|
|
"valid_targets_mean": 4178.8,
|
|
"valid_targets_min": 3183
|
|
},
|
|
{
|
|
"epoch": 6.593006993006993,
|
|
"grad_norm": 0.5409625226651458,
|
|
"learning_rate": 4.1794861200691317e-07,
|
|
"loss": 0.26,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1431492567062378,
|
|
"step": 2360,
|
|
"valid_targets_mean": 4139.4,
|
|
"valid_targets_min": 2476
|
|
},
|
|
{
|
|
"epoch": 6.606993006993007,
|
|
"grad_norm": 0.5264929203806954,
|
|
"learning_rate": 3.9009149160984305e-07,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14085371792316437,
|
|
"step": 2365,
|
|
"valid_targets_mean": 4276.2,
|
|
"valid_targets_min": 3364
|
|
},
|
|
{
|
|
"epoch": 6.620979020979021,
|
|
"grad_norm": 0.527456064000642,
|
|
"learning_rate": 3.6318589680369276e-07,
|
|
"loss": 0.2683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12847009301185608,
|
|
"step": 2370,
|
|
"valid_targets_mean": 4058.8,
|
|
"valid_targets_min": 3111
|
|
},
|
|
{
|
|
"epoch": 6.634965034965035,
|
|
"grad_norm": 0.535149286903409,
|
|
"learning_rate": 3.3723313312040927e-07,
|
|
"loss": 0.2656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1440798044204712,
|
|
"step": 2375,
|
|
"valid_targets_mean": 4225.2,
|
|
"valid_targets_min": 3191
|
|
},
|
|
{
|
|
"epoch": 6.648951048951049,
|
|
"grad_norm": 0.5572363983642188,
|
|
"learning_rate": 3.1223445985800294e-07,
|
|
"loss": 0.2656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11801092326641083,
|
|
"step": 2380,
|
|
"valid_targets_mean": 3499.2,
|
|
"valid_targets_min": 2751
|
|
},
|
|
{
|
|
"epoch": 6.662937062937063,
|
|
"grad_norm": 0.5441750509803539,
|
|
"learning_rate": 2.88191090019454e-07,
|
|
"loss": 0.2678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14033466577529907,
|
|
"step": 2385,
|
|
"valid_targets_mean": 3916.5,
|
|
"valid_targets_min": 3171
|
|
},
|
|
{
|
|
"epoch": 6.676923076923077,
|
|
"grad_norm": 0.5320750039573556,
|
|
"learning_rate": 2.651041902538332e-07,
|
|
"loss": 0.2602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13491207361221313,
|
|
"step": 2390,
|
|
"valid_targets_mean": 3886.1,
|
|
"valid_targets_min": 2984
|
|
},
|
|
{
|
|
"epoch": 6.690909090909091,
|
|
"grad_norm": 0.5549125626831143,
|
|
"learning_rate": 2.429748807997201e-07,
|
|
"loss": 0.275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1348915696144104,
|
|
"step": 2395,
|
|
"valid_targets_mean": 3911.8,
|
|
"valid_targets_min": 3358
|
|
},
|
|
{
|
|
"epoch": 6.704895104895105,
|
|
"grad_norm": 0.527522284417272,
|
|
"learning_rate": 2.2180423543082253e-07,
|
|
"loss": 0.2653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14493533968925476,
|
|
"step": 2400,
|
|
"valid_targets_mean": 4229.1,
|
|
"valid_targets_min": 3291
|
|
},
|
|
{
|
|
"epoch": 6.718881118881119,
|
|
"grad_norm": 0.5422104630142606,
|
|
"learning_rate": 2.0159328140389346e-07,
|
|
"loss": 0.2661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1292484998703003,
|
|
"step": 2405,
|
|
"valid_targets_mean": 3970.4,
|
|
"valid_targets_min": 3076
|
|
},
|
|
{
|
|
"epoch": 6.732867132867133,
|
|
"grad_norm": 0.5413238372734106,
|
|
"learning_rate": 1.8234299940886434e-07,
|
|
"loss": 0.2658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14486880600452423,
|
|
"step": 2410,
|
|
"valid_targets_mean": 4450.5,
|
|
"valid_targets_min": 3504
|
|
},
|
|
{
|
|
"epoch": 6.746853146853147,
|
|
"grad_norm": 0.5611932644120156,
|
|
"learning_rate": 1.640543235212877e-07,
|
|
"loss": 0.2629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1367993950843811,
|
|
"step": 2415,
|
|
"valid_targets_mean": 3955.9,
|
|
"valid_targets_min": 2917
|
|
},
|
|
{
|
|
"epoch": 6.7608391608391605,
|
|
"grad_norm": 0.5335539255184142,
|
|
"learning_rate": 1.467281411569821e-07,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1371554732322693,
|
|
"step": 2420,
|
|
"valid_targets_mean": 3969.2,
|
|
"valid_targets_min": 3193
|
|
},
|
|
{
|
|
"epoch": 6.774825174825175,
|
|
"grad_norm": 0.5248690412448175,
|
|
"learning_rate": 1.303652930289956e-07,
|
|
"loss": 0.2622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12784817814826965,
|
|
"step": 2425,
|
|
"valid_targets_mean": 4070.5,
|
|
"valid_targets_min": 2717
|
|
},
|
|
{
|
|
"epoch": 6.788811188811189,
|
|
"grad_norm": 0.5406486609963523,
|
|
"learning_rate": 1.1496657310680282e-07,
|
|
"loss": 0.2701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12824086844921112,
|
|
"step": 2430,
|
|
"valid_targets_mean": 3958.2,
|
|
"valid_targets_min": 2986
|
|
},
|
|
{
|
|
"epoch": 6.802797202797203,
|
|
"grad_norm": 0.5626681678696616,
|
|
"learning_rate": 1.0053272857777797e-07,
|
|
"loss": 0.2721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13599160313606262,
|
|
"step": 2435,
|
|
"valid_targets_mean": 4058.1,
|
|
"valid_targets_min": 3411
|
|
},
|
|
{
|
|
"epoch": 6.816783216783216,
|
|
"grad_norm": 0.5283427444759582,
|
|
"learning_rate": 8.706445981093937e-08,
|
|
"loss": 0.2604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1412394791841507,
|
|
"step": 2440,
|
|
"valid_targets_mean": 4395.0,
|
|
"valid_targets_min": 3198
|
|
},
|
|
{
|
|
"epoch": 6.8307692307692305,
|
|
"grad_norm": 0.5438460618102079,
|
|
"learning_rate": 7.45624203229789e-08,
|
|
"loss": 0.2713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13004207611083984,
|
|
"step": 2445,
|
|
"valid_targets_mean": 3813.1,
|
|
"valid_targets_min": 2914
|
|
},
|
|
{
|
|
"epoch": 6.844755244755245,
|
|
"grad_norm": 0.5429046624514684,
|
|
"learning_rate": 6.302721674652957e-08,
|
|
"loss": 0.2603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14121410250663757,
|
|
"step": 2450,
|
|
"valid_targets_mean": 3919.0,
|
|
"valid_targets_min": 3132
|
|
},
|
|
{
|
|
"epoch": 6.858741258741259,
|
|
"grad_norm": 0.5441887195893792,
|
|
"learning_rate": 5.2459408800744626e-08,
|
|
"loss": 0.2624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1358700692653656,
|
|
"step": 2455,
|
|
"valid_targets_mean": 3912.2,
|
|
"valid_targets_min": 1919
|
|
},
|
|
{
|
|
"epoch": 6.872727272727273,
|
|
"grad_norm": 0.5288312054094918,
|
|
"learning_rate": 4.285950926413929e-08,
|
|
"loss": 0.2674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11977240443229675,
|
|
"step": 2460,
|
|
"valid_targets_mean": 3656.6,
|
|
"valid_targets_min": 3087
|
|
},
|
|
{
|
|
"epoch": 6.886713286713286,
|
|
"grad_norm": 0.5639120371598475,
|
|
"learning_rate": 3.4227983949699506e-08,
|
|
"loss": 0.2689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12470817565917969,
|
|
"step": 2465,
|
|
"valid_targets_mean": 3575.1,
|
|
"valid_targets_min": 2499
|
|
},
|
|
{
|
|
"epoch": 6.9006993006993005,
|
|
"grad_norm": 0.5268719788697689,
|
|
"learning_rate": 2.656525168228674e-08,
|
|
"loss": 0.2713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13214604556560516,
|
|
"step": 2470,
|
|
"valid_targets_mean": 4328.4,
|
|
"valid_targets_min": 3577
|
|
},
|
|
{
|
|
"epoch": 6.914685314685315,
|
|
"grad_norm": 0.5451610230902841,
|
|
"learning_rate": 1.9871684278314207e-08,
|
|
"loss": 0.2687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15296560525894165,
|
|
"step": 2475,
|
|
"valid_targets_mean": 4383.1,
|
|
"valid_targets_min": 3174
|
|
},
|
|
{
|
|
"epoch": 6.928671328671329,
|
|
"grad_norm": 0.5363716539542387,
|
|
"learning_rate": 1.4147606527707969e-08,
|
|
"loss": 0.2678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14485123753547668,
|
|
"step": 2480,
|
|
"valid_targets_mean": 4384.8,
|
|
"valid_targets_min": 3575
|
|
},
|
|
{
|
|
"epoch": 6.942657342657343,
|
|
"grad_norm": 0.552159799619343,
|
|
"learning_rate": 9.393296178137334e-09,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.141590878367424,
|
|
"step": 2485,
|
|
"valid_targets_mean": 3966.6,
|
|
"valid_targets_min": 3358
|
|
},
|
|
{
|
|
"epoch": 6.956643356643356,
|
|
"grad_norm": 0.5470148363304371,
|
|
"learning_rate": 5.6089839215522916e-09,
|
|
"loss": 0.2609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14139097929000854,
|
|
"step": 2490,
|
|
"valid_targets_mean": 4122.8,
|
|
"valid_targets_min": 3202
|
|
},
|
|
{
|
|
"epoch": 6.9706293706293705,
|
|
"grad_norm": 0.5343799935594152,
|
|
"learning_rate": 2.794853382976914e-09,
|
|
"loss": 0.2668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14910870790481567,
|
|
"step": 2495,
|
|
"valid_targets_mean": 4249.1,
|
|
"valid_targets_min": 3352
|
|
},
|
|
{
|
|
"epoch": 6.984615384615385,
|
|
"grad_norm": 0.5388994673635329,
|
|
"learning_rate": 9.510411116075978e-10,
|
|
"loss": 0.2702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12203998863697052,
|
|
"step": 2500,
|
|
"valid_targets_mean": 3729.9,
|
|
"valid_targets_min": 2964
|
|
},
|
|
{
|
|
"epoch": 6.998601398601399,
|
|
"grad_norm": 0.5557446952727101,
|
|
"learning_rate": 7.763657418280446e-11,
|
|
"loss": 0.2672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14027032256126404,
|
|
"step": 2505,
|
|
"valid_targets_mean": 3807.5,
|
|
"valid_targets_min": 2712
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26545536518096924,
|
|
"step": 2506,
|
|
"total_flos": 9.890960502181724e+17,
|
|
"train_loss": 0.3139860285869714,
|
|
"train_runtime": 19951.9657,
|
|
"train_samples_per_second": 2.007,
|
|
"train_steps_per_second": 0.126,
|
|
"valid_targets_mean": 4094.0,
|
|
"valid_targets_min": 3070
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 2506,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 9.890960502181724e+17,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|