2783 lines
77 KiB
JSON
2783 lines
77 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 1246,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.02819548872180451,
|
|
"grad_norm": 11.27667292176756,
|
|
"learning_rate": 1.28e-06,
|
|
"loss": 0.9036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29199543595314026,
|
|
"step": 5,
|
|
"valid_targets_mean": 9628.2,
|
|
"valid_targets_min": 1853
|
|
},
|
|
{
|
|
"epoch": 0.05639097744360902,
|
|
"grad_norm": 8.317911958432434,
|
|
"learning_rate": 2.88e-06,
|
|
"loss": 0.8814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3067687451839447,
|
|
"step": 10,
|
|
"valid_targets_mean": 10587.8,
|
|
"valid_targets_min": 6075
|
|
},
|
|
{
|
|
"epoch": 0.08458646616541353,
|
|
"grad_norm": 4.357377107906101,
|
|
"learning_rate": 4.48e-06,
|
|
"loss": 0.8293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28829988837242126,
|
|
"step": 15,
|
|
"valid_targets_mean": 10112.8,
|
|
"valid_targets_min": 4070
|
|
},
|
|
{
|
|
"epoch": 0.11278195488721804,
|
|
"grad_norm": 1.930567122176637,
|
|
"learning_rate": 6.08e-06,
|
|
"loss": 0.7689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.254660964012146,
|
|
"step": 20,
|
|
"valid_targets_mean": 9978.3,
|
|
"valid_targets_min": 4467
|
|
},
|
|
{
|
|
"epoch": 0.14097744360902256,
|
|
"grad_norm": 1.4457544018909307,
|
|
"learning_rate": 7.680000000000001e-06,
|
|
"loss": 0.7462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25300565361976624,
|
|
"step": 25,
|
|
"valid_targets_mean": 9490.9,
|
|
"valid_targets_min": 3438
|
|
},
|
|
{
|
|
"epoch": 0.16917293233082706,
|
|
"grad_norm": 1.1826535917994672,
|
|
"learning_rate": 9.280000000000001e-06,
|
|
"loss": 0.7089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22363917529582977,
|
|
"step": 30,
|
|
"valid_targets_mean": 9377.6,
|
|
"valid_targets_min": 2854
|
|
},
|
|
{
|
|
"epoch": 0.19736842105263158,
|
|
"grad_norm": 0.735665641953498,
|
|
"learning_rate": 1.0880000000000001e-05,
|
|
"loss": 0.678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2022905796766281,
|
|
"step": 35,
|
|
"valid_targets_mean": 8942.0,
|
|
"valid_targets_min": 2253
|
|
},
|
|
{
|
|
"epoch": 0.22556390977443608,
|
|
"grad_norm": 0.6077005768579393,
|
|
"learning_rate": 1.248e-05,
|
|
"loss": 0.6568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23237602412700653,
|
|
"step": 40,
|
|
"valid_targets_mean": 10732.1,
|
|
"valid_targets_min": 4297
|
|
},
|
|
{
|
|
"epoch": 0.25375939849624063,
|
|
"grad_norm": 0.5156231779831542,
|
|
"learning_rate": 1.408e-05,
|
|
"loss": 0.6408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20694395899772644,
|
|
"step": 45,
|
|
"valid_targets_mean": 9659.8,
|
|
"valid_targets_min": 3259
|
|
},
|
|
{
|
|
"epoch": 0.2819548872180451,
|
|
"grad_norm": 0.37161737532708955,
|
|
"learning_rate": 1.5680000000000002e-05,
|
|
"loss": 0.6238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.209224671125412,
|
|
"step": 50,
|
|
"valid_targets_mean": 10202.5,
|
|
"valid_targets_min": 5347
|
|
},
|
|
{
|
|
"epoch": 0.3101503759398496,
|
|
"grad_norm": 0.3417973572153747,
|
|
"learning_rate": 1.728e-05,
|
|
"loss": 0.6053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2059696614742279,
|
|
"step": 55,
|
|
"valid_targets_mean": 9660.9,
|
|
"valid_targets_min": 4023
|
|
},
|
|
{
|
|
"epoch": 0.3383458646616541,
|
|
"grad_norm": 0.31758682305547137,
|
|
"learning_rate": 1.8880000000000002e-05,
|
|
"loss": 0.5907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20821066200733185,
|
|
"step": 60,
|
|
"valid_targets_mean": 9809.2,
|
|
"valid_targets_min": 4066
|
|
},
|
|
{
|
|
"epoch": 0.36654135338345867,
|
|
"grad_norm": 0.2507740265564074,
|
|
"learning_rate": 2.048e-05,
|
|
"loss": 0.5641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18758416175842285,
|
|
"step": 65,
|
|
"valid_targets_mean": 9209.8,
|
|
"valid_targets_min": 3945
|
|
},
|
|
{
|
|
"epoch": 0.39473684210526316,
|
|
"grad_norm": 0.23718427206276319,
|
|
"learning_rate": 2.2080000000000002e-05,
|
|
"loss": 0.5576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19377034902572632,
|
|
"step": 70,
|
|
"valid_targets_mean": 9540.7,
|
|
"valid_targets_min": 5447
|
|
},
|
|
{
|
|
"epoch": 0.42293233082706766,
|
|
"grad_norm": 0.24872982035759736,
|
|
"learning_rate": 2.368e-05,
|
|
"loss": 0.5446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18347474932670593,
|
|
"step": 75,
|
|
"valid_targets_mean": 9611.9,
|
|
"valid_targets_min": 2717
|
|
},
|
|
{
|
|
"epoch": 0.45112781954887216,
|
|
"grad_norm": 0.21797546092426584,
|
|
"learning_rate": 2.5280000000000002e-05,
|
|
"loss": 0.5433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18492883443832397,
|
|
"step": 80,
|
|
"valid_targets_mean": 9892.9,
|
|
"valid_targets_min": 2107
|
|
},
|
|
{
|
|
"epoch": 0.4793233082706767,
|
|
"grad_norm": 0.2468075467899746,
|
|
"learning_rate": 2.6880000000000004e-05,
|
|
"loss": 0.5308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1767086386680603,
|
|
"step": 85,
|
|
"valid_targets_mean": 9605.6,
|
|
"valid_targets_min": 4655
|
|
},
|
|
{
|
|
"epoch": 0.5075187969924813,
|
|
"grad_norm": 0.22619547032239734,
|
|
"learning_rate": 2.8480000000000002e-05,
|
|
"loss": 0.5262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15464217960834503,
|
|
"step": 90,
|
|
"valid_targets_mean": 8574.6,
|
|
"valid_targets_min": 4031
|
|
},
|
|
{
|
|
"epoch": 0.5357142857142857,
|
|
"grad_norm": 0.23131634119706504,
|
|
"learning_rate": 3.0080000000000003e-05,
|
|
"loss": 0.5173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16377687454223633,
|
|
"step": 95,
|
|
"valid_targets_mean": 9045.4,
|
|
"valid_targets_min": 4294
|
|
},
|
|
{
|
|
"epoch": 0.5639097744360902,
|
|
"grad_norm": 0.2240444930912022,
|
|
"learning_rate": 3.168e-05,
|
|
"loss": 0.5054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14757883548736572,
|
|
"step": 100,
|
|
"valid_targets_mean": 8858.5,
|
|
"valid_targets_min": 2554
|
|
},
|
|
{
|
|
"epoch": 0.5921052631578947,
|
|
"grad_norm": 0.22516374944025624,
|
|
"learning_rate": 3.328e-05,
|
|
"loss": 0.4985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1508697122335434,
|
|
"step": 105,
|
|
"valid_targets_mean": 8304.1,
|
|
"valid_targets_min": 242
|
|
},
|
|
{
|
|
"epoch": 0.6203007518796992,
|
|
"grad_norm": 0.25125223132051416,
|
|
"learning_rate": 3.4880000000000005e-05,
|
|
"loss": 0.4935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.163669615983963,
|
|
"step": 110,
|
|
"valid_targets_mean": 9637.2,
|
|
"valid_targets_min": 1700
|
|
},
|
|
{
|
|
"epoch": 0.6484962406015038,
|
|
"grad_norm": 0.2741917627559633,
|
|
"learning_rate": 3.648e-05,
|
|
"loss": 0.4863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17761027812957764,
|
|
"step": 115,
|
|
"valid_targets_mean": 10324.5,
|
|
"valid_targets_min": 5329
|
|
},
|
|
{
|
|
"epoch": 0.6766917293233082,
|
|
"grad_norm": 0.5530142596392317,
|
|
"learning_rate": 3.808e-05,
|
|
"loss": 0.4853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16129770874977112,
|
|
"step": 120,
|
|
"valid_targets_mean": 9438.1,
|
|
"valid_targets_min": 2586
|
|
},
|
|
{
|
|
"epoch": 0.7048872180451128,
|
|
"grad_norm": 0.2501311718678911,
|
|
"learning_rate": 3.9680000000000006e-05,
|
|
"loss": 0.4774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16897393763065338,
|
|
"step": 125,
|
|
"valid_targets_mean": 9593.8,
|
|
"valid_targets_min": 4187
|
|
},
|
|
{
|
|
"epoch": 0.7330827067669173,
|
|
"grad_norm": 0.2574918108729956,
|
|
"learning_rate": 3.9998743380036454e-05,
|
|
"loss": 0.4779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14019961655139923,
|
|
"step": 130,
|
|
"valid_targets_mean": 8366.4,
|
|
"valid_targets_min": 321
|
|
},
|
|
{
|
|
"epoch": 0.7612781954887218,
|
|
"grad_norm": 0.24407543075435603,
|
|
"learning_rate": 3.999363863206902e-05,
|
|
"loss": 0.4714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15657010674476624,
|
|
"step": 135,
|
|
"valid_targets_mean": 9552.6,
|
|
"valid_targets_min": 1814
|
|
},
|
|
{
|
|
"epoch": 0.7894736842105263,
|
|
"grad_norm": 0.2894802340984338,
|
|
"learning_rate": 3.9984608218874315e-05,
|
|
"loss": 0.4789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1610604226589203,
|
|
"step": 140,
|
|
"valid_targets_mean": 9847.2,
|
|
"valid_targets_min": 6162
|
|
},
|
|
{
|
|
"epoch": 0.8176691729323309,
|
|
"grad_norm": 0.2845790780592853,
|
|
"learning_rate": 3.997165391353524e-05,
|
|
"loss": 0.468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1624469757080078,
|
|
"step": 145,
|
|
"valid_targets_mean": 10109.4,
|
|
"valid_targets_min": 4090
|
|
},
|
|
{
|
|
"epoch": 0.8458646616541353,
|
|
"grad_norm": 0.2572172583431577,
|
|
"learning_rate": 3.995477825957412e-05,
|
|
"loss": 0.4555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15889598429203033,
|
|
"step": 150,
|
|
"valid_targets_mean": 9374.6,
|
|
"valid_targets_min": 2968
|
|
},
|
|
{
|
|
"epoch": 0.8740601503759399,
|
|
"grad_norm": 0.2612824650214882,
|
|
"learning_rate": 3.9933984570453255e-05,
|
|
"loss": 0.4551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13706836104393005,
|
|
"step": 155,
|
|
"valid_targets_mean": 9366.4,
|
|
"valid_targets_min": 4764
|
|
},
|
|
{
|
|
"epoch": 0.9022556390977443,
|
|
"grad_norm": 0.25358505200342496,
|
|
"learning_rate": 3.990927692892435e-05,
|
|
"loss": 0.4576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14485809206962585,
|
|
"step": 160,
|
|
"valid_targets_mean": 9978.2,
|
|
"valid_targets_min": 4852
|
|
},
|
|
{
|
|
"epoch": 0.9304511278195489,
|
|
"grad_norm": 0.22521949735204938,
|
|
"learning_rate": 3.9880660186226905e-05,
|
|
"loss": 0.4587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1584489345550537,
|
|
"step": 165,
|
|
"valid_targets_mean": 9612.8,
|
|
"valid_targets_min": 2895
|
|
},
|
|
{
|
|
"epoch": 0.9586466165413534,
|
|
"grad_norm": 0.25106154655786905,
|
|
"learning_rate": 3.9848139961135647e-05,
|
|
"loss": 0.4559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15277701616287231,
|
|
"step": 170,
|
|
"valid_targets_mean": 10049.1,
|
|
"valid_targets_min": 5349
|
|
},
|
|
{
|
|
"epoch": 0.9868421052631579,
|
|
"grad_norm": 0.2928102281837378,
|
|
"learning_rate": 3.981172263885736e-05,
|
|
"loss": 0.4576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15941108763217926,
|
|
"step": 175,
|
|
"valid_targets_mean": 9364.9,
|
|
"valid_targets_min": 3674
|
|
},
|
|
{
|
|
"epoch": 1.0112781954887218,
|
|
"grad_norm": 0.2718097968773888,
|
|
"learning_rate": 3.977141536977713e-05,
|
|
"loss": 0.4489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14890170097351074,
|
|
"step": 180,
|
|
"valid_targets_mean": 9873.9,
|
|
"valid_targets_min": 5207
|
|
},
|
|
{
|
|
"epoch": 1.0394736842105263,
|
|
"grad_norm": 0.27830371358135386,
|
|
"learning_rate": 3.972722606805445e-05,
|
|
"loss": 0.4386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14617493748664856,
|
|
"step": 185,
|
|
"valid_targets_mean": 9708.8,
|
|
"valid_targets_min": 2309
|
|
},
|
|
{
|
|
"epoch": 1.0676691729323309,
|
|
"grad_norm": 0.24888473802447664,
|
|
"learning_rate": 3.967916341006925e-05,
|
|
"loss": 0.4524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16539250314235687,
|
|
"step": 190,
|
|
"valid_targets_mean": 9782.2,
|
|
"valid_targets_min": 4150
|
|
},
|
|
{
|
|
"epoch": 1.0958646616541354,
|
|
"grad_norm": 0.2536978404456141,
|
|
"learning_rate": 3.962723683271837e-05,
|
|
"loss": 0.4476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15597505867481232,
|
|
"step": 195,
|
|
"valid_targets_mean": 10103.2,
|
|
"valid_targets_min": 6501
|
|
},
|
|
{
|
|
"epoch": 1.1240601503759398,
|
|
"grad_norm": 0.2712191334628464,
|
|
"learning_rate": 3.957145653156265e-05,
|
|
"loss": 0.4506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1572057008743286,
|
|
"step": 200,
|
|
"valid_targets_mean": 10415.6,
|
|
"valid_targets_min": 3634
|
|
},
|
|
{
|
|
"epoch": 1.1522556390977443,
|
|
"grad_norm": 0.26617835772104925,
|
|
"learning_rate": 3.9511833458825076e-05,
|
|
"loss": 0.4416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15055301785469055,
|
|
"step": 205,
|
|
"valid_targets_mean": 9808.2,
|
|
"valid_targets_min": 4399
|
|
},
|
|
{
|
|
"epoch": 1.1804511278195489,
|
|
"grad_norm": 0.26894103365631217,
|
|
"learning_rate": 3.944837932124036e-05,
|
|
"loss": 0.4442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1498723179101944,
|
|
"step": 210,
|
|
"valid_targets_mean": 9847.2,
|
|
"valid_targets_min": 5140
|
|
},
|
|
{
|
|
"epoch": 1.2086466165413534,
|
|
"grad_norm": 0.29824687817690704,
|
|
"learning_rate": 3.9381106577756374e-05,
|
|
"loss": 0.4487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14083534479141235,
|
|
"step": 215,
|
|
"valid_targets_mean": 9044.2,
|
|
"valid_targets_min": 3147
|
|
},
|
|
{
|
|
"epoch": 1.236842105263158,
|
|
"grad_norm": 0.2371578018995009,
|
|
"learning_rate": 3.931002843708788e-05,
|
|
"loss": 0.4455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1637556552886963,
|
|
"step": 220,
|
|
"valid_targets_mean": 10195.7,
|
|
"valid_targets_min": 5971
|
|
},
|
|
{
|
|
"epoch": 1.2650375939849625,
|
|
"grad_norm": 0.2641963474548977,
|
|
"learning_rate": 3.923515885512307e-05,
|
|
"loss": 0.4401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13960033655166626,
|
|
"step": 225,
|
|
"valid_targets_mean": 9160.0,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 1.2932330827067668,
|
|
"grad_norm": 0.2407752036577108,
|
|
"learning_rate": 3.9156512532183384e-05,
|
|
"loss": 0.4401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14932051301002502,
|
|
"step": 230,
|
|
"valid_targets_mean": 9700.4,
|
|
"valid_targets_min": 5426
|
|
},
|
|
{
|
|
"epoch": 1.3214285714285714,
|
|
"grad_norm": 0.27828486846847406,
|
|
"learning_rate": 3.907410491013714e-05,
|
|
"loss": 0.4364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14444586634635925,
|
|
"step": 235,
|
|
"valid_targets_mean": 9815.5,
|
|
"valid_targets_min": 2947
|
|
},
|
|
{
|
|
"epoch": 1.349624060150376,
|
|
"grad_norm": 0.2776790733337406,
|
|
"learning_rate": 3.898795216936763e-05,
|
|
"loss": 0.436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14298999309539795,
|
|
"step": 240,
|
|
"valid_targets_mean": 9667.4,
|
|
"valid_targets_min": 312
|
|
},
|
|
{
|
|
"epoch": 1.3778195488721805,
|
|
"grad_norm": 0.2478141720344162,
|
|
"learning_rate": 3.889807122559614e-05,
|
|
"loss": 0.4335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15552732348442078,
|
|
"step": 245,
|
|
"valid_targets_mean": 10064.5,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 1.4060150375939848,
|
|
"grad_norm": 0.2881202815268935,
|
|
"learning_rate": 3.8804479726560644e-05,
|
|
"loss": 0.4383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14759336411952972,
|
|
"step": 250,
|
|
"valid_targets_mean": 10201.8,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 1.4342105263157894,
|
|
"grad_norm": 0.2360033680760221,
|
|
"learning_rate": 3.870719604855071e-05,
|
|
"loss": 0.4279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1278318464756012,
|
|
"step": 255,
|
|
"valid_targets_mean": 9452.0,
|
|
"valid_targets_min": 3097
|
|
},
|
|
{
|
|
"epoch": 1.462406015037594,
|
|
"grad_norm": 0.2678985162825098,
|
|
"learning_rate": 3.860623929279946e-05,
|
|
"loss": 0.4301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13829311728477478,
|
|
"step": 260,
|
|
"valid_targets_mean": 9852.8,
|
|
"valid_targets_min": 2718
|
|
},
|
|
{
|
|
"epoch": 1.4906015037593985,
|
|
"grad_norm": 0.24111892462438012,
|
|
"learning_rate": 3.850162928173303e-05,
|
|
"loss": 0.4407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13607658445835114,
|
|
"step": 265,
|
|
"valid_targets_mean": 9477.2,
|
|
"valid_targets_min": 5643
|
|
},
|
|
{
|
|
"epoch": 1.518796992481203,
|
|
"grad_norm": 0.29578707903937657,
|
|
"learning_rate": 3.839338655507861e-05,
|
|
"loss": 0.4342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13879016041755676,
|
|
"step": 270,
|
|
"valid_targets_mean": 9750.3,
|
|
"valid_targets_min": 1886
|
|
},
|
|
{
|
|
"epoch": 1.5469924812030076,
|
|
"grad_norm": 0.2669302765773925,
|
|
"learning_rate": 3.828153236583152e-05,
|
|
"loss": 0.4401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14289501309394836,
|
|
"step": 275,
|
|
"valid_targets_mean": 9182.9,
|
|
"valid_targets_min": 1758
|
|
},
|
|
{
|
|
"epoch": 1.5751879699248121,
|
|
"grad_norm": 0.25183783099282825,
|
|
"learning_rate": 3.816608867608227e-05,
|
|
"loss": 0.4293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1422910988330841,
|
|
"step": 280,
|
|
"valid_targets_mean": 9582.1,
|
|
"valid_targets_min": 2641
|
|
},
|
|
{
|
|
"epoch": 1.6033834586466167,
|
|
"grad_norm": 0.25792139048892554,
|
|
"learning_rate": 3.80470781527044e-05,
|
|
"loss": 0.4347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14068448543548584,
|
|
"step": 285,
|
|
"valid_targets_mean": 9446.0,
|
|
"valid_targets_min": 1282
|
|
},
|
|
{
|
|
"epoch": 1.631578947368421,
|
|
"grad_norm": 0.22282061010532125,
|
|
"learning_rate": 3.792452416290393e-05,
|
|
"loss": 0.4354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15323126316070557,
|
|
"step": 290,
|
|
"valid_targets_mean": 9909.8,
|
|
"valid_targets_min": 3343
|
|
},
|
|
{
|
|
"epoch": 1.6597744360902256,
|
|
"grad_norm": 0.25037386293951636,
|
|
"learning_rate": 3.779845076963136e-05,
|
|
"loss": 0.4319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1571081131696701,
|
|
"step": 295,
|
|
"valid_targets_mean": 10085.2,
|
|
"valid_targets_min": 3558
|
|
},
|
|
{
|
|
"epoch": 1.6879699248120301,
|
|
"grad_norm": 0.2324474919615337,
|
|
"learning_rate": 3.766888272685693e-05,
|
|
"loss": 0.4255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15331974625587463,
|
|
"step": 300,
|
|
"valid_targets_mean": 9570.4,
|
|
"valid_targets_min": 3686
|
|
},
|
|
{
|
|
"epoch": 1.7161654135338344,
|
|
"grad_norm": 0.252366647005355,
|
|
"learning_rate": 3.753584547471036e-05,
|
|
"loss": 0.4271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14724957942962646,
|
|
"step": 305,
|
|
"valid_targets_mean": 9747.0,
|
|
"valid_targets_min": 2611
|
|
},
|
|
{
|
|
"epoch": 1.744360902255639,
|
|
"grad_norm": 0.22612296362484413,
|
|
"learning_rate": 3.739936513448573e-05,
|
|
"loss": 0.4202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13358333706855774,
|
|
"step": 310,
|
|
"valid_targets_mean": 9892.2,
|
|
"valid_targets_min": 3969
|
|
},
|
|
{
|
|
"epoch": 1.7725563909774436,
|
|
"grad_norm": 0.242275248738654,
|
|
"learning_rate": 3.725946850351272e-05,
|
|
"loss": 0.4274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14728665351867676,
|
|
"step": 315,
|
|
"valid_targets_mean": 9580.7,
|
|
"valid_targets_min": 5246
|
|
},
|
|
{
|
|
"epoch": 1.800751879699248,
|
|
"grad_norm": 0.2301098778262513,
|
|
"learning_rate": 3.7116183049895054e-05,
|
|
"loss": 0.4233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14626328647136688,
|
|
"step": 320,
|
|
"valid_targets_mean": 9595.5,
|
|
"valid_targets_min": 3599
|
|
},
|
|
{
|
|
"epoch": 1.8289473684210527,
|
|
"grad_norm": 0.255145468595074,
|
|
"learning_rate": 3.6969536907117234e-05,
|
|
"loss": 0.4139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1345970779657364,
|
|
"step": 325,
|
|
"valid_targets_mean": 8776.2,
|
|
"valid_targets_min": 354
|
|
},
|
|
{
|
|
"epoch": 1.8571428571428572,
|
|
"grad_norm": 0.2536680403230651,
|
|
"learning_rate": 3.681955886852069e-05,
|
|
"loss": 0.4206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13223111629486084,
|
|
"step": 330,
|
|
"valid_targets_mean": 9203.2,
|
|
"valid_targets_min": 3441
|
|
},
|
|
{
|
|
"epoch": 1.8853383458646618,
|
|
"grad_norm": 0.23460226413834012,
|
|
"learning_rate": 3.66662783816503e-05,
|
|
"loss": 0.4206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15391016006469727,
|
|
"step": 335,
|
|
"valid_targets_mean": 10040.4,
|
|
"valid_targets_min": 2118
|
|
},
|
|
{
|
|
"epoch": 1.9135338345864663,
|
|
"grad_norm": 0.2445266751763312,
|
|
"learning_rate": 3.6509725542472516e-05,
|
|
"loss": 0.4211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14127874374389648,
|
|
"step": 340,
|
|
"valid_targets_mean": 9436.1,
|
|
"valid_targets_min": 352
|
|
},
|
|
{
|
|
"epoch": 1.9417293233082706,
|
|
"grad_norm": 0.2671155341941523,
|
|
"learning_rate": 3.6349931089466114e-05,
|
|
"loss": 0.4154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14088255167007446,
|
|
"step": 345,
|
|
"valid_targets_mean": 10083.5,
|
|
"valid_targets_min": 1785
|
|
},
|
|
{
|
|
"epoch": 1.9699248120300752,
|
|
"grad_norm": 0.24327846719266866,
|
|
"learning_rate": 3.6186926397586866e-05,
|
|
"loss": 0.4224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15060530602931976,
|
|
"step": 350,
|
|
"valid_targets_mean": 9973.8,
|
|
"valid_targets_min": 3180
|
|
},
|
|
{
|
|
"epoch": 1.9981203007518797,
|
|
"grad_norm": 0.2515435073960223,
|
|
"learning_rate": 3.602074347210717e-05,
|
|
"loss": 0.4214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1467684507369995,
|
|
"step": 355,
|
|
"valid_targets_mean": 10120.1,
|
|
"valid_targets_min": 5849
|
|
},
|
|
{
|
|
"epoch": 2.0225563909774436,
|
|
"grad_norm": 0.27460864105225735,
|
|
"learning_rate": 3.5851414942331986e-05,
|
|
"loss": 0.4123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1371781826019287,
|
|
"step": 360,
|
|
"valid_targets_mean": 9121.8,
|
|
"valid_targets_min": 3424
|
|
},
|
|
{
|
|
"epoch": 2.050751879699248,
|
|
"grad_norm": 0.30961309307690715,
|
|
"learning_rate": 3.5678974055192176e-05,
|
|
"loss": 0.4121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1294780969619751,
|
|
"step": 365,
|
|
"valid_targets_mean": 8944.1,
|
|
"valid_targets_min": 2724
|
|
},
|
|
{
|
|
"epoch": 2.0789473684210527,
|
|
"grad_norm": 0.2381008546148629,
|
|
"learning_rate": 3.550345466871662e-05,
|
|
"loss": 0.4133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1437140703201294,
|
|
"step": 370,
|
|
"valid_targets_mean": 9402.6,
|
|
"valid_targets_min": 3163
|
|
},
|
|
{
|
|
"epoch": 2.107142857142857,
|
|
"grad_norm": 0.23784456731163375,
|
|
"learning_rate": 3.5324891245384354e-05,
|
|
"loss": 0.4109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11915447562932968,
|
|
"step": 375,
|
|
"valid_targets_mean": 9617.0,
|
|
"valid_targets_min": 303
|
|
},
|
|
{
|
|
"epoch": 2.1353383458646618,
|
|
"grad_norm": 0.236728125374762,
|
|
"learning_rate": 3.5143318845358006e-05,
|
|
"loss": 0.4197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14139775931835175,
|
|
"step": 380,
|
|
"valid_targets_mean": 9871.7,
|
|
"valid_targets_min": 5207
|
|
},
|
|
{
|
|
"epoch": 2.1635338345864663,
|
|
"grad_norm": 0.2504925152689405,
|
|
"learning_rate": 3.4958773119599874e-05,
|
|
"loss": 0.4108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13846886157989502,
|
|
"step": 385,
|
|
"valid_targets_mean": 9910.7,
|
|
"valid_targets_min": 2559
|
|
},
|
|
{
|
|
"epoch": 2.191729323308271,
|
|
"grad_norm": 0.25485707228406046,
|
|
"learning_rate": 3.4771290302872e-05,
|
|
"loss": 0.4062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13879907131195068,
|
|
"step": 390,
|
|
"valid_targets_mean": 9667.2,
|
|
"valid_targets_min": 5697
|
|
},
|
|
{
|
|
"epoch": 2.219924812030075,
|
|
"grad_norm": 0.2898577935562,
|
|
"learning_rate": 3.458090720662167e-05,
|
|
"loss": 0.4192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14320047199726105,
|
|
"step": 395,
|
|
"valid_targets_mean": 9772.4,
|
|
"valid_targets_min": 3686
|
|
},
|
|
{
|
|
"epoch": 2.2481203007518795,
|
|
"grad_norm": 0.2919595914258059,
|
|
"learning_rate": 3.438766121175361e-05,
|
|
"loss": 0.403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13252058625221252,
|
|
"step": 400,
|
|
"valid_targets_mean": 9276.8,
|
|
"valid_targets_min": 3180
|
|
},
|
|
{
|
|
"epoch": 2.276315789473684,
|
|
"grad_norm": 0.2500124988262427,
|
|
"learning_rate": 3.4191590261290405e-05,
|
|
"loss": 0.4067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.161848783493042,
|
|
"step": 405,
|
|
"valid_targets_mean": 10478.8,
|
|
"valid_targets_min": 2726
|
|
},
|
|
{
|
|
"epoch": 2.3045112781954886,
|
|
"grad_norm": 0.2687590505702195,
|
|
"learning_rate": 3.399273285292258e-05,
|
|
"loss": 0.4129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13902103900909424,
|
|
"step": 410,
|
|
"valid_targets_mean": 9363.7,
|
|
"valid_targets_min": 3981
|
|
},
|
|
{
|
|
"epoch": 2.332706766917293,
|
|
"grad_norm": 0.22847314394479787,
|
|
"learning_rate": 3.379112803144971e-05,
|
|
"loss": 0.4145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14047273993492126,
|
|
"step": 415,
|
|
"valid_targets_mean": 9385.0,
|
|
"valid_targets_min": 1673
|
|
},
|
|
{
|
|
"epoch": 2.3609022556390977,
|
|
"grad_norm": 0.26353674089580303,
|
|
"learning_rate": 3.358681538111415e-05,
|
|
"loss": 0.4172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1435493379831314,
|
|
"step": 420,
|
|
"valid_targets_mean": 9179.6,
|
|
"valid_targets_min": 2170
|
|
},
|
|
{
|
|
"epoch": 2.3890977443609023,
|
|
"grad_norm": 0.24163690514511912,
|
|
"learning_rate": 3.3379835017828855e-05,
|
|
"loss": 0.4157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1342683732509613,
|
|
"step": 425,
|
|
"valid_targets_mean": 9376.6,
|
|
"valid_targets_min": 5213
|
|
},
|
|
{
|
|
"epoch": 2.417293233082707,
|
|
"grad_norm": 0.23583527410814714,
|
|
"learning_rate": 3.317022758130078e-05,
|
|
"loss": 0.4146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1486685574054718,
|
|
"step": 430,
|
|
"valid_targets_mean": 9923.1,
|
|
"valid_targets_min": 3855
|
|
},
|
|
{
|
|
"epoch": 2.4454887218045114,
|
|
"grad_norm": 0.23323844774390975,
|
|
"learning_rate": 3.295803422705143e-05,
|
|
"loss": 0.4137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14215630292892456,
|
|
"step": 435,
|
|
"valid_targets_mean": 9026.8,
|
|
"valid_targets_min": 1700
|
|
},
|
|
{
|
|
"epoch": 2.473684210526316,
|
|
"grad_norm": 0.29945443437999814,
|
|
"learning_rate": 3.27432966183362e-05,
|
|
"loss": 0.4132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12867683172225952,
|
|
"step": 440,
|
|
"valid_targets_mean": 9211.3,
|
|
"valid_targets_min": 4959
|
|
},
|
|
{
|
|
"epoch": 2.5018796992481205,
|
|
"grad_norm": 0.25272960687873497,
|
|
"learning_rate": 3.25260569179639e-05,
|
|
"loss": 0.4104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1396941989660263,
|
|
"step": 445,
|
|
"valid_targets_mean": 9191.3,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 2.530075187969925,
|
|
"grad_norm": 0.2457426246972927,
|
|
"learning_rate": 3.230635778001836e-05,
|
|
"loss": 0.4017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14244124293327332,
|
|
"step": 450,
|
|
"valid_targets_mean": 9817.1,
|
|
"valid_targets_min": 2712
|
|
},
|
|
{
|
|
"epoch": 2.5582706766917296,
|
|
"grad_norm": 0.25887288298744265,
|
|
"learning_rate": 3.208424234148338e-05,
|
|
"loss": 0.3996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1393296867609024,
|
|
"step": 455,
|
|
"valid_targets_mean": 9903.1,
|
|
"valid_targets_min": 3147
|
|
},
|
|
{
|
|
"epoch": 2.5864661654135337,
|
|
"grad_norm": 0.21877812272153066,
|
|
"learning_rate": 3.185975421377307e-05,
|
|
"loss": 0.4088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12278774380683899,
|
|
"step": 460,
|
|
"valid_targets_mean": 8609.7,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 2.6146616541353382,
|
|
"grad_norm": 0.23735497326800922,
|
|
"learning_rate": 3.1632937474168855e-05,
|
|
"loss": 0.4118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13202975690364838,
|
|
"step": 465,
|
|
"valid_targets_mean": 10006.2,
|
|
"valid_targets_min": 3324
|
|
},
|
|
{
|
|
"epoch": 2.642857142857143,
|
|
"grad_norm": 0.22193176122691816,
|
|
"learning_rate": 3.140383665716512e-05,
|
|
"loss": 0.396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1347397416830063,
|
|
"step": 470,
|
|
"valid_targets_mean": 8921.9,
|
|
"valid_targets_min": 3634
|
|
},
|
|
{
|
|
"epoch": 2.6710526315789473,
|
|
"grad_norm": 0.23139797768847337,
|
|
"learning_rate": 3.1172496745725085e-05,
|
|
"loss": 0.409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13778147101402283,
|
|
"step": 475,
|
|
"valid_targets_mean": 9567.4,
|
|
"valid_targets_min": 4070
|
|
},
|
|
{
|
|
"epoch": 2.699248120300752,
|
|
"grad_norm": 0.21530585156050375,
|
|
"learning_rate": 3.093896316244855e-05,
|
|
"loss": 0.4046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1352767050266266,
|
|
"step": 480,
|
|
"valid_targets_mean": 10076.9,
|
|
"valid_targets_min": 4043
|
|
},
|
|
{
|
|
"epoch": 2.7274436090225564,
|
|
"grad_norm": 0.2582975552308644,
|
|
"learning_rate": 3.0703281760653336e-05,
|
|
"loss": 0.4083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1417762041091919,
|
|
"step": 485,
|
|
"valid_targets_mean": 9484.2,
|
|
"valid_targets_min": 3106
|
|
},
|
|
{
|
|
"epoch": 2.755639097744361,
|
|
"grad_norm": 0.25989182642732045,
|
|
"learning_rate": 3.0465498815372285e-05,
|
|
"loss": 0.4031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12462268769741058,
|
|
"step": 490,
|
|
"valid_targets_mean": 9462.6,
|
|
"valid_targets_min": 1853
|
|
},
|
|
{
|
|
"epoch": 2.7838345864661656,
|
|
"grad_norm": 0.22331231185945719,
|
|
"learning_rate": 3.0225661014267255e-05,
|
|
"loss": 0.4025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14205169677734375,
|
|
"step": 495,
|
|
"valid_targets_mean": 9931.2,
|
|
"valid_targets_min": 4449
|
|
},
|
|
{
|
|
"epoch": 2.8120300751879697,
|
|
"grad_norm": 0.237834758537334,
|
|
"learning_rate": 2.9983815448462245e-05,
|
|
"loss": 0.4072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1147095113992691,
|
|
"step": 500,
|
|
"valid_targets_mean": 8174.8,
|
|
"valid_targets_min": 312
|
|
},
|
|
{
|
|
"epoch": 2.840225563909774,
|
|
"grad_norm": 0.23790363756246555,
|
|
"learning_rate": 2.9740009603297236e-05,
|
|
"loss": 0.4127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13740326464176178,
|
|
"step": 505,
|
|
"valid_targets_mean": 9327.2,
|
|
"valid_targets_min": 4517
|
|
},
|
|
{
|
|
"epoch": 2.8684210526315788,
|
|
"grad_norm": 0.2658433889855018,
|
|
"learning_rate": 2.949429134900468e-05,
|
|
"loss": 0.4017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15330830216407776,
|
|
"step": 510,
|
|
"valid_targets_mean": 10222.3,
|
|
"valid_targets_min": 4176
|
|
},
|
|
{
|
|
"epoch": 2.8966165413533833,
|
|
"grad_norm": 0.2129277840732677,
|
|
"learning_rate": 2.924670893131033e-05,
|
|
"loss": 0.4039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1338801383972168,
|
|
"step": 515,
|
|
"valid_targets_mean": 9623.5,
|
|
"valid_targets_min": 5138
|
|
},
|
|
{
|
|
"epoch": 2.924812030075188,
|
|
"grad_norm": 0.23260012738615085,
|
|
"learning_rate": 2.8997310961960456e-05,
|
|
"loss": 0.4065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12857681512832642,
|
|
"step": 520,
|
|
"valid_targets_mean": 9521.6,
|
|
"valid_targets_min": 2854
|
|
},
|
|
{
|
|
"epoch": 2.9530075187969924,
|
|
"grad_norm": 0.23948753412160725,
|
|
"learning_rate": 2.8746146409177112e-05,
|
|
"loss": 0.4017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13795359432697296,
|
|
"step": 525,
|
|
"valid_targets_mean": 9622.8,
|
|
"valid_targets_min": 1144
|
|
},
|
|
{
|
|
"epoch": 2.981203007518797,
|
|
"grad_norm": 0.2434249410932302,
|
|
"learning_rate": 2.849326458804341e-05,
|
|
"loss": 0.4079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13328373432159424,
|
|
"step": 530,
|
|
"valid_targets_mean": 9911.9,
|
|
"valid_targets_min": 4039
|
|
},
|
|
{
|
|
"epoch": 3.005639097744361,
|
|
"grad_norm": 0.26324430203522414,
|
|
"learning_rate": 2.8238715150820764e-05,
|
|
"loss": 0.3958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11944051831960678,
|
|
"step": 535,
|
|
"valid_targets_mean": 9227.2,
|
|
"valid_targets_min": 4837
|
|
},
|
|
{
|
|
"epoch": 3.0338345864661656,
|
|
"grad_norm": 0.23159723969729487,
|
|
"learning_rate": 2.7982548077199853e-05,
|
|
"loss": 0.3961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13192662596702576,
|
|
"step": 540,
|
|
"valid_targets_mean": 9961.9,
|
|
"valid_targets_min": 5917
|
|
},
|
|
{
|
|
"epoch": 3.06203007518797,
|
|
"grad_norm": 0.23925085896994178,
|
|
"learning_rate": 2.7724813664487323e-05,
|
|
"loss": 0.4004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13217054307460785,
|
|
"step": 545,
|
|
"valid_targets_mean": 10060.8,
|
|
"valid_targets_min": 4260
|
|
},
|
|
{
|
|
"epoch": 3.090225563909774,
|
|
"grad_norm": 0.23759165570511362,
|
|
"learning_rate": 2.7465562517730132e-05,
|
|
"loss": 0.3946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14225870370864868,
|
|
"step": 550,
|
|
"valid_targets_mean": 10172.3,
|
|
"valid_targets_min": 2940
|
|
},
|
|
{
|
|
"epoch": 3.1184210526315788,
|
|
"grad_norm": 0.23673756939271293,
|
|
"learning_rate": 2.7204845539779468e-05,
|
|
"loss": 0.4026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14777497947216034,
|
|
"step": 555,
|
|
"valid_targets_mean": 9553.6,
|
|
"valid_targets_min": 2366
|
|
},
|
|
{
|
|
"epoch": 3.1466165413533833,
|
|
"grad_norm": 0.25165539406420895,
|
|
"learning_rate": 2.6942713921296186e-05,
|
|
"loss": 0.3974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13649114966392517,
|
|
"step": 560,
|
|
"valid_targets_mean": 10204.3,
|
|
"valid_targets_min": 5230
|
|
},
|
|
{
|
|
"epoch": 3.174812030075188,
|
|
"grad_norm": 0.23177453783787236,
|
|
"learning_rate": 2.6679219130699747e-05,
|
|
"loss": 0.4,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1317581832408905,
|
|
"step": 565,
|
|
"valid_targets_mean": 9459.1,
|
|
"valid_targets_min": 3125
|
|
},
|
|
{
|
|
"epoch": 3.2030075187969924,
|
|
"grad_norm": 0.2439671619550467,
|
|
"learning_rate": 2.641441290406261e-05,
|
|
"loss": 0.4012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13911797106266022,
|
|
"step": 570,
|
|
"valid_targets_mean": 9394.8,
|
|
"valid_targets_min": 4771
|
|
},
|
|
{
|
|
"epoch": 3.231203007518797,
|
|
"grad_norm": 0.24705170121255965,
|
|
"learning_rate": 2.614834723495208e-05,
|
|
"loss": 0.3962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1319943219423294,
|
|
"step": 575,
|
|
"valid_targets_mean": 10358.8,
|
|
"valid_targets_min": 5759
|
|
},
|
|
{
|
|
"epoch": 3.2593984962406015,
|
|
"grad_norm": 0.2431113202848758,
|
|
"learning_rate": 2.5881074364221593e-05,
|
|
"loss": 0.3926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13881921768188477,
|
|
"step": 580,
|
|
"valid_targets_mean": 9148.1,
|
|
"valid_targets_min": 3053
|
|
},
|
|
{
|
|
"epoch": 3.287593984962406,
|
|
"grad_norm": 0.22917367826880933,
|
|
"learning_rate": 2.5612646769753454e-05,
|
|
"loss": 0.3984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12870250642299652,
|
|
"step": 585,
|
|
"valid_targets_mean": 9295.9,
|
|
"valid_targets_min": 264
|
|
},
|
|
{
|
|
"epoch": 3.3157894736842106,
|
|
"grad_norm": 0.23874522846122448,
|
|
"learning_rate": 2.5343117156154994e-05,
|
|
"loss": 0.395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13996782898902893,
|
|
"step": 590,
|
|
"valid_targets_mean": 9605.2,
|
|
"valid_targets_min": 2517
|
|
},
|
|
{
|
|
"epoch": 3.343984962406015,
|
|
"grad_norm": 0.22969310162018472,
|
|
"learning_rate": 2.507253844441031e-05,
|
|
"loss": 0.3913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1319817304611206,
|
|
"step": 595,
|
|
"valid_targets_mean": 9670.2,
|
|
"valid_targets_min": 2436
|
|
},
|
|
{
|
|
"epoch": 3.3721804511278197,
|
|
"grad_norm": 0.22621680783522202,
|
|
"learning_rate": 2.480096376148941e-05,
|
|
"loss": 0.4003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12083794176578522,
|
|
"step": 600,
|
|
"valid_targets_mean": 9294.9,
|
|
"valid_targets_min": 2342
|
|
},
|
|
{
|
|
"epoch": 3.4003759398496243,
|
|
"grad_norm": 0.23285299892205277,
|
|
"learning_rate": 2.4528446429916973e-05,
|
|
"loss": 0.3988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1262476146221161,
|
|
"step": 605,
|
|
"valid_targets_mean": 8696.2,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 3.4285714285714284,
|
|
"grad_norm": 0.23236710961218587,
|
|
"learning_rate": 2.4255039957302692e-05,
|
|
"loss": 0.3978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1298486590385437,
|
|
"step": 610,
|
|
"valid_targets_mean": 9753.8,
|
|
"valid_targets_min": 2766
|
|
},
|
|
{
|
|
"epoch": 3.456766917293233,
|
|
"grad_norm": 0.23202738403502496,
|
|
"learning_rate": 2.3980798025835298e-05,
|
|
"loss": 0.3966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12037086486816406,
|
|
"step": 615,
|
|
"valid_targets_mean": 8639.8,
|
|
"valid_targets_min": 3035
|
|
},
|
|
{
|
|
"epoch": 3.4849624060150375,
|
|
"grad_norm": 0.22584889839089625,
|
|
"learning_rate": 2.370577448174229e-05,
|
|
"loss": 0.3954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13931915163993835,
|
|
"step": 620,
|
|
"valid_targets_mean": 10037.7,
|
|
"valid_targets_min": 3089
|
|
},
|
|
{
|
|
"epoch": 3.513157894736842,
|
|
"grad_norm": 0.22178066562941928,
|
|
"learning_rate": 2.3430023324717443e-05,
|
|
"loss": 0.3894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12872684001922607,
|
|
"step": 625,
|
|
"valid_targets_mean": 9377.9,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 3.5413533834586466,
|
|
"grad_norm": 0.2568058966490674,
|
|
"learning_rate": 2.3153598697318237e-05,
|
|
"loss": 0.4027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1503409445285797,
|
|
"step": 630,
|
|
"valid_targets_mean": 10004.6,
|
|
"valid_targets_min": 3068
|
|
},
|
|
{
|
|
"epoch": 3.569548872180451,
|
|
"grad_norm": 0.2118375304003317,
|
|
"learning_rate": 2.2876554874335124e-05,
|
|
"loss": 0.3926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13170304894447327,
|
|
"step": 635,
|
|
"valid_targets_mean": 9806.5,
|
|
"valid_targets_min": 4963
|
|
},
|
|
{
|
|
"epoch": 3.5977443609022557,
|
|
"grad_norm": 0.22824112120822312,
|
|
"learning_rate": 2.2598946252135017e-05,
|
|
"loss": 0.4018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13064341247081757,
|
|
"step": 640,
|
|
"valid_targets_mean": 9978.8,
|
|
"valid_targets_min": 4497
|
|
},
|
|
{
|
|
"epoch": 3.6259398496240602,
|
|
"grad_norm": 0.22511608795603558,
|
|
"learning_rate": 2.2320827337980676e-05,
|
|
"loss": 0.3914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12164842337369919,
|
|
"step": 645,
|
|
"valid_targets_mean": 9501.5,
|
|
"valid_targets_min": 5364
|
|
},
|
|
{
|
|
"epoch": 3.654135338345865,
|
|
"grad_norm": 0.21962275837845524,
|
|
"learning_rate": 2.204225273932855e-05,
|
|
"loss": 0.3946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12767906486988068,
|
|
"step": 650,
|
|
"valid_targets_mean": 9994.6,
|
|
"valid_targets_min": 5580
|
|
},
|
|
{
|
|
"epoch": 3.682330827067669,
|
|
"grad_norm": 0.22326981379224142,
|
|
"learning_rate": 2.176327715310673e-05,
|
|
"loss": 0.3916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13035039603710175,
|
|
"step": 655,
|
|
"valid_targets_mean": 9763.4,
|
|
"valid_targets_min": 1826
|
|
},
|
|
{
|
|
"epoch": 3.7105263157894735,
|
|
"grad_norm": 0.22785142761097762,
|
|
"learning_rate": 2.1483955354975557e-05,
|
|
"loss": 0.3968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13587889075279236,
|
|
"step": 660,
|
|
"valid_targets_mean": 9339.4,
|
|
"valid_targets_min": 4732
|
|
},
|
|
{
|
|
"epoch": 3.738721804511278,
|
|
"grad_norm": 0.2338735231717096,
|
|
"learning_rate": 2.120434218857254e-05,
|
|
"loss": 0.3949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14184817671775818,
|
|
"step": 665,
|
|
"valid_targets_mean": 9305.5,
|
|
"valid_targets_min": 3570
|
|
},
|
|
{
|
|
"epoch": 3.7669172932330826,
|
|
"grad_norm": 0.23510291425793847,
|
|
"learning_rate": 2.0924492554744145e-05,
|
|
"loss": 0.3946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1337651014328003,
|
|
"step": 670,
|
|
"valid_targets_mean": 8838.2,
|
|
"valid_targets_min": 1853
|
|
},
|
|
{
|
|
"epoch": 3.795112781954887,
|
|
"grad_norm": 0.22779287321668726,
|
|
"learning_rate": 2.0644461400766174e-05,
|
|
"loss": 0.3958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14175552129745483,
|
|
"step": 675,
|
|
"valid_targets_mean": 10304.8,
|
|
"valid_targets_min": 5382
|
|
},
|
|
{
|
|
"epoch": 3.8233082706766917,
|
|
"grad_norm": 0.22017841967874427,
|
|
"learning_rate": 2.036430370955514e-05,
|
|
"loss": 0.3893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13052602112293243,
|
|
"step": 680,
|
|
"valid_targets_mean": 9796.6,
|
|
"valid_targets_min": 5183
|
|
},
|
|
{
|
|
"epoch": 3.851503759398496,
|
|
"grad_norm": 0.24234384303043424,
|
|
"learning_rate": 2.0084074488872606e-05,
|
|
"loss": 0.3942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13533687591552734,
|
|
"step": 685,
|
|
"valid_targets_mean": 10036.1,
|
|
"valid_targets_min": 5742
|
|
},
|
|
{
|
|
"epoch": 3.8796992481203008,
|
|
"grad_norm": 0.23247406840250695,
|
|
"learning_rate": 1.9803828760524627e-05,
|
|
"loss": 0.3928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13567492365837097,
|
|
"step": 690,
|
|
"valid_targets_mean": 9282.2,
|
|
"valid_targets_min": 2939
|
|
},
|
|
{
|
|
"epoch": 3.9078947368421053,
|
|
"grad_norm": 0.2217230300549226,
|
|
"learning_rate": 1.952362154955848e-05,
|
|
"loss": 0.3918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13407818973064423,
|
|
"step": 695,
|
|
"valid_targets_mean": 10740.7,
|
|
"valid_targets_min": 5872
|
|
},
|
|
{
|
|
"epoch": 3.93609022556391,
|
|
"grad_norm": 0.2103692494148384,
|
|
"learning_rate": 1.9243507873458717e-05,
|
|
"loss": 0.3943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12526118755340576,
|
|
"step": 700,
|
|
"valid_targets_mean": 9699.6,
|
|
"valid_targets_min": 3534
|
|
},
|
|
{
|
|
"epoch": 3.9642857142857144,
|
|
"grad_norm": 0.21604893038726108,
|
|
"learning_rate": 1.896354273134472e-05,
|
|
"loss": 0.3905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13225769996643066,
|
|
"step": 705,
|
|
"valid_targets_mean": 10412.7,
|
|
"valid_targets_min": 5378
|
|
},
|
|
{
|
|
"epoch": 3.992481203007519,
|
|
"grad_norm": 0.23178482273025192,
|
|
"learning_rate": 1.8683781093171846e-05,
|
|
"loss": 0.3884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12075187265872955,
|
|
"step": 710,
|
|
"valid_targets_mean": 8626.9,
|
|
"valid_targets_min": 4149
|
|
},
|
|
{
|
|
"epoch": 4.0169172932330826,
|
|
"grad_norm": 0.21899831151451,
|
|
"learning_rate": 1.8404277888938337e-05,
|
|
"loss": 0.3899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12334593385457993,
|
|
"step": 715,
|
|
"valid_targets_mean": 9348.6,
|
|
"valid_targets_min": 3053
|
|
},
|
|
{
|
|
"epoch": 4.045112781954887,
|
|
"grad_norm": 0.22782795872601927,
|
|
"learning_rate": 1.8125087997900054e-05,
|
|
"loss": 0.386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13175661861896515,
|
|
"step": 720,
|
|
"valid_targets_mean": 9545.6,
|
|
"valid_targets_min": 3147
|
|
},
|
|
{
|
|
"epoch": 4.073308270676692,
|
|
"grad_norm": 0.2215031525141966,
|
|
"learning_rate": 1.784626623779512e-05,
|
|
"loss": 0.3907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12877237796783447,
|
|
"step": 725,
|
|
"valid_targets_mean": 9193.4,
|
|
"valid_targets_min": 3835
|
|
},
|
|
{
|
|
"epoch": 4.101503759398496,
|
|
"grad_norm": 0.21240434399860283,
|
|
"learning_rate": 1.7567867354080766e-05,
|
|
"loss": 0.3793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11605602502822876,
|
|
"step": 730,
|
|
"valid_targets_mean": 9114.1,
|
|
"valid_targets_min": 3085
|
|
},
|
|
{
|
|
"epoch": 4.129699248120301,
|
|
"grad_norm": 0.20674610974143964,
|
|
"learning_rate": 1.7289946009184217e-05,
|
|
"loss": 0.3889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11780436336994171,
|
|
"step": 735,
|
|
"valid_targets_mean": 9190.0,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 4.157894736842105,
|
|
"grad_norm": 0.2151999993530917,
|
|
"learning_rate": 1.701255677177004e-05,
|
|
"loss": 0.3857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13381031155586243,
|
|
"step": 740,
|
|
"valid_targets_mean": 10586.0,
|
|
"valid_targets_min": 4279
|
|
},
|
|
{
|
|
"epoch": 4.18609022556391,
|
|
"grad_norm": 0.24136733794847645,
|
|
"learning_rate": 1.6735754106025747e-05,
|
|
"loss": 0.3856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14070633053779602,
|
|
"step": 745,
|
|
"valid_targets_mean": 9712.8,
|
|
"valid_targets_min": 4449
|
|
},
|
|
{
|
|
"epoch": 4.214285714285714,
|
|
"grad_norm": 0.2689899308894728,
|
|
"learning_rate": 1.6459592360968036e-05,
|
|
"loss": 0.3837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13649453222751617,
|
|
"step": 750,
|
|
"valid_targets_mean": 9510.1,
|
|
"valid_targets_min": 4483
|
|
},
|
|
{
|
|
"epoch": 4.242481203007519,
|
|
"grad_norm": 0.21065172047325978,
|
|
"learning_rate": 1.618412575977156e-05,
|
|
"loss": 0.3798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11696235090494156,
|
|
"step": 755,
|
|
"valid_targets_mean": 9127.5,
|
|
"valid_targets_min": 4573
|
|
},
|
|
{
|
|
"epoch": 4.2706766917293235,
|
|
"grad_norm": 0.22547082757713885,
|
|
"learning_rate": 1.5909408389122473e-05,
|
|
"loss": 0.39,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12373727560043335,
|
|
"step": 760,
|
|
"valid_targets_mean": 8933.2,
|
|
"valid_targets_min": 3761
|
|
},
|
|
{
|
|
"epoch": 4.298872180451128,
|
|
"grad_norm": 0.2224972608121181,
|
|
"learning_rate": 1.5635494188598713e-05,
|
|
"loss": 0.3892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12517967820167542,
|
|
"step": 765,
|
|
"valid_targets_mean": 8647.8,
|
|
"valid_targets_min": 3420
|
|
},
|
|
{
|
|
"epoch": 4.327067669172933,
|
|
"grad_norm": 0.21014542918768556,
|
|
"learning_rate": 1.5362436940079194e-05,
|
|
"loss": 0.3841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12938112020492554,
|
|
"step": 770,
|
|
"valid_targets_mean": 10605.2,
|
|
"valid_targets_min": 3102
|
|
},
|
|
{
|
|
"epoch": 4.355263157894737,
|
|
"grad_norm": 0.22326364891523268,
|
|
"learning_rate": 1.5090290257184019e-05,
|
|
"loss": 0.3938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12240595370531082,
|
|
"step": 775,
|
|
"valid_targets_mean": 9136.8,
|
|
"valid_targets_min": 3968
|
|
},
|
|
{
|
|
"epoch": 4.383458646616542,
|
|
"grad_norm": 0.2290266501731758,
|
|
"learning_rate": 1.481910757474759e-05,
|
|
"loss": 0.3869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11885623633861542,
|
|
"step": 780,
|
|
"valid_targets_mean": 8942.6,
|
|
"valid_targets_min": 328
|
|
},
|
|
{
|
|
"epoch": 4.411654135338346,
|
|
"grad_norm": 0.20583422703018228,
|
|
"learning_rate": 1.4548942138326978e-05,
|
|
"loss": 0.3875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12395472824573517,
|
|
"step": 785,
|
|
"valid_targets_mean": 8851.2,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 4.43984962406015,
|
|
"grad_norm": 0.21949993262902937,
|
|
"learning_rate": 1.4279846993747342e-05,
|
|
"loss": 0.3859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12866610288619995,
|
|
"step": 790,
|
|
"valid_targets_mean": 9402.3,
|
|
"valid_targets_min": 2886
|
|
},
|
|
{
|
|
"epoch": 4.4680451127819545,
|
|
"grad_norm": 0.1981370750992788,
|
|
"learning_rate": 1.4011874976686648e-05,
|
|
"loss": 0.3842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12139122933149338,
|
|
"step": 795,
|
|
"valid_targets_mean": 10267.0,
|
|
"valid_targets_min": 3276
|
|
},
|
|
{
|
|
"epoch": 4.496240601503759,
|
|
"grad_norm": 0.22529503663433614,
|
|
"learning_rate": 1.3745078702301569e-05,
|
|
"loss": 0.3855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12190576642751694,
|
|
"step": 800,
|
|
"valid_targets_mean": 9045.6,
|
|
"valid_targets_min": 283
|
|
},
|
|
{
|
|
"epoch": 4.524436090225564,
|
|
"grad_norm": 0.21131753037367373,
|
|
"learning_rate": 1.3479510554896762e-05,
|
|
"loss": 0.3877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13228657841682434,
|
|
"step": 805,
|
|
"valid_targets_mean": 9695.1,
|
|
"valid_targets_min": 2237
|
|
},
|
|
{
|
|
"epoch": 4.552631578947368,
|
|
"grad_norm": 0.2228431849996671,
|
|
"learning_rate": 1.3215222677639394e-05,
|
|
"loss": 0.3862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13006101548671722,
|
|
"step": 810,
|
|
"valid_targets_mean": 10069.1,
|
|
"valid_targets_min": 5674
|
|
},
|
|
{
|
|
"epoch": 4.580827067669173,
|
|
"grad_norm": 0.21469936302557577,
|
|
"learning_rate": 1.2952266962321106e-05,
|
|
"loss": 0.391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12173780798912048,
|
|
"step": 815,
|
|
"valid_targets_mean": 8536.1,
|
|
"valid_targets_min": 255
|
|
},
|
|
{
|
|
"epoch": 4.609022556390977,
|
|
"grad_norm": 0.21888380673587698,
|
|
"learning_rate": 1.2690695039169231e-05,
|
|
"loss": 0.3929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13081592321395874,
|
|
"step": 820,
|
|
"valid_targets_mean": 9528.8,
|
|
"valid_targets_min": 1502
|
|
},
|
|
{
|
|
"epoch": 4.637218045112782,
|
|
"grad_norm": 0.20661151271918127,
|
|
"learning_rate": 1.2430558266709451e-05,
|
|
"loss": 0.3849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1282854676246643,
|
|
"step": 825,
|
|
"valid_targets_mean": 9321.7,
|
|
"valid_targets_min": 2724
|
|
},
|
|
{
|
|
"epoch": 4.665413533834586,
|
|
"grad_norm": 0.21221277245667652,
|
|
"learning_rate": 1.2171907721681755e-05,
|
|
"loss": 0.383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12876039743423462,
|
|
"step": 830,
|
|
"valid_targets_mean": 9454.6,
|
|
"valid_targets_min": 3387
|
|
},
|
|
{
|
|
"epoch": 4.693609022556391,
|
|
"grad_norm": 0.21708808777941638,
|
|
"learning_rate": 1.1914794189011767e-05,
|
|
"loss": 0.3786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12404484301805496,
|
|
"step": 835,
|
|
"valid_targets_mean": 8156.2,
|
|
"valid_targets_min": 233
|
|
},
|
|
{
|
|
"epoch": 4.7218045112781954,
|
|
"grad_norm": 0.20897527096483418,
|
|
"learning_rate": 1.1659268151839305e-05,
|
|
"loss": 0.3851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1307717263698578,
|
|
"step": 840,
|
|
"valid_targets_mean": 9895.8,
|
|
"valid_targets_min": 4674
|
|
},
|
|
{
|
|
"epoch": 4.75,
|
|
"grad_norm": 0.22210410229590297,
|
|
"learning_rate": 1.1405379781606264e-05,
|
|
"loss": 0.3874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13114789128303528,
|
|
"step": 845,
|
|
"valid_targets_mean": 10139.0,
|
|
"valid_targets_min": 3599
|
|
},
|
|
{
|
|
"epoch": 4.7781954887218046,
|
|
"grad_norm": 0.20204916090873393,
|
|
"learning_rate": 1.115317892820564e-05,
|
|
"loss": 0.3912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13144543766975403,
|
|
"step": 850,
|
|
"valid_targets_mean": 9591.1,
|
|
"valid_targets_min": 1761
|
|
},
|
|
{
|
|
"epoch": 4.806390977443609,
|
|
"grad_norm": 0.21947799398719045,
|
|
"learning_rate": 1.0902715110193758e-05,
|
|
"loss": 0.3883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12176516652107239,
|
|
"step": 855,
|
|
"valid_targets_mean": 9651.8,
|
|
"valid_targets_min": 330
|
|
},
|
|
{
|
|
"epoch": 4.834586466165414,
|
|
"grad_norm": 0.2161101557758461,
|
|
"learning_rate": 1.0654037505067474e-05,
|
|
"loss": 0.3862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11943140625953674,
|
|
"step": 860,
|
|
"valid_targets_mean": 9051.2,
|
|
"valid_targets_min": 263
|
|
},
|
|
{
|
|
"epoch": 4.862781954887218,
|
|
"grad_norm": 0.23013476486288856,
|
|
"learning_rate": 1.0407194939608388e-05,
|
|
"loss": 0.3912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13007856905460358,
|
|
"step": 865,
|
|
"valid_targets_mean": 8389.8,
|
|
"valid_targets_min": 1988
|
|
},
|
|
{
|
|
"epoch": 4.890977443609023,
|
|
"grad_norm": 0.21572479500745287,
|
|
"learning_rate": 1.016223588029598e-05,
|
|
"loss": 0.3902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13947170972824097,
|
|
"step": 870,
|
|
"valid_targets_mean": 10650.0,
|
|
"valid_targets_min": 4889
|
|
},
|
|
{
|
|
"epoch": 4.919172932330827,
|
|
"grad_norm": 0.22701666134084075,
|
|
"learning_rate": 9.919208423791327e-06,
|
|
"loss": 0.3869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11517477035522461,
|
|
"step": 875,
|
|
"valid_targets_mean": 8416.4,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 4.947368421052632,
|
|
"grad_norm": 0.20121129270758675,
|
|
"learning_rate": 9.678160287493586e-06,
|
|
"loss": 0.3923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13425880670547485,
|
|
"step": 880,
|
|
"valid_targets_mean": 10285.5,
|
|
"valid_targets_min": 6432
|
|
},
|
|
{
|
|
"epoch": 4.975563909774436,
|
|
"grad_norm": 0.2037395492075607,
|
|
"learning_rate": 9.439138800170873e-06,
|
|
"loss": 0.3849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13961973786354065,
|
|
"step": 885,
|
|
"valid_targets_mean": 9947.3,
|
|
"valid_targets_min": 3588
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.3346947281102825,
|
|
"learning_rate": 9.202190892667482e-06,
|
|
"loss": 0.3942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3897612690925598,
|
|
"step": 890,
|
|
"valid_targets_mean": 9228.0,
|
|
"valid_targets_min": 264
|
|
},
|
|
{
|
|
"epoch": 5.0281954887218046,
|
|
"grad_norm": 0.21302247364187424,
|
|
"learning_rate": 8.9673630886892e-06,
|
|
"loss": 0.3772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1213647872209549,
|
|
"step": 895,
|
|
"valid_targets_mean": 9319.2,
|
|
"valid_targets_min": 3102
|
|
},
|
|
{
|
|
"epoch": 5.056390977443609,
|
|
"grad_norm": 0.21587342614420138,
|
|
"learning_rate": 8.734701495668564e-06,
|
|
"loss": 0.3911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12353166937828064,
|
|
"step": 900,
|
|
"valid_targets_mean": 8606.1,
|
|
"valid_targets_min": 3015
|
|
},
|
|
{
|
|
"epoch": 5.084586466165414,
|
|
"grad_norm": 0.2054472136286052,
|
|
"learning_rate": 8.504251795711865e-06,
|
|
"loss": 0.3771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12194749712944031,
|
|
"step": 905,
|
|
"valid_targets_mean": 9603.5,
|
|
"valid_targets_min": 5439
|
|
},
|
|
{
|
|
"epoch": 5.112781954887218,
|
|
"grad_norm": 0.20120803043734226,
|
|
"learning_rate": 8.276059236629704e-06,
|
|
"loss": 0.3792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14248129725456238,
|
|
"step": 910,
|
|
"valid_targets_mean": 10743.0,
|
|
"valid_targets_min": 6817
|
|
},
|
|
{
|
|
"epoch": 5.140977443609023,
|
|
"grad_norm": 0.19670861515459628,
|
|
"learning_rate": 8.050168623052737e-06,
|
|
"loss": 0.3792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12006880342960358,
|
|
"step": 915,
|
|
"valid_targets_mean": 9209.8,
|
|
"valid_targets_min": 2766
|
|
},
|
|
{
|
|
"epoch": 5.169172932330827,
|
|
"grad_norm": 0.20191000822437544,
|
|
"learning_rate": 7.826624307634478e-06,
|
|
"loss": 0.3846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13062483072280884,
|
|
"step": 920,
|
|
"valid_targets_mean": 9041.1,
|
|
"valid_targets_min": 323
|
|
},
|
|
{
|
|
"epoch": 5.197368421052632,
|
|
"grad_norm": 0.2397344582787734,
|
|
"learning_rate": 7.605470182342862e-06,
|
|
"loss": 0.3804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13111959397792816,
|
|
"step": 925,
|
|
"valid_targets_mean": 9180.5,
|
|
"valid_targets_min": 3764
|
|
},
|
|
{
|
|
"epoch": 5.225563909774436,
|
|
"grad_norm": 0.19522323823550103,
|
|
"learning_rate": 7.386749669842246e-06,
|
|
"loss": 0.3847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12215429544448853,
|
|
"step": 930,
|
|
"valid_targets_mean": 10079.3,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 5.253759398496241,
|
|
"grad_norm": 0.20319852194089968,
|
|
"learning_rate": 7.170505714967551e-06,
|
|
"loss": 0.3802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13277199864387512,
|
|
"step": 935,
|
|
"valid_targets_mean": 9177.5,
|
|
"valid_targets_min": 4229
|
|
},
|
|
{
|
|
"epoch": 5.2819548872180455,
|
|
"grad_norm": 0.2204976318873647,
|
|
"learning_rate": 6.956780776292211e-06,
|
|
"loss": 0.3883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12151244282722473,
|
|
"step": 940,
|
|
"valid_targets_mean": 9185.8,
|
|
"valid_targets_min": 3386
|
|
},
|
|
{
|
|
"epoch": 5.31015037593985,
|
|
"grad_norm": 0.21028831630622308,
|
|
"learning_rate": 6.7456168177916494e-06,
|
|
"loss": 0.3815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13375839591026306,
|
|
"step": 945,
|
|
"valid_targets_mean": 9805.7,
|
|
"valid_targets_min": 6152
|
|
},
|
|
{
|
|
"epoch": 5.338345864661654,
|
|
"grad_norm": 0.2137812404469613,
|
|
"learning_rate": 6.537055300603796e-06,
|
|
"loss": 0.3851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12712323665618896,
|
|
"step": 950,
|
|
"valid_targets_mean": 9146.8,
|
|
"valid_targets_min": 4794
|
|
},
|
|
{
|
|
"epoch": 5.366541353383458,
|
|
"grad_norm": 0.20464250361296388,
|
|
"learning_rate": 6.331137174888382e-06,
|
|
"loss": 0.3842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1389198750257492,
|
|
"step": 955,
|
|
"valid_targets_mean": 10260.5,
|
|
"valid_targets_min": 6000
|
|
},
|
|
{
|
|
"epoch": 5.394736842105263,
|
|
"grad_norm": 0.23219865944151416,
|
|
"learning_rate": 6.127902871786573e-06,
|
|
"loss": 0.3832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.130233034491539,
|
|
"step": 960,
|
|
"valid_targets_mean": 8265.2,
|
|
"valid_targets_min": 3281
|
|
},
|
|
{
|
|
"epoch": 5.422932330827067,
|
|
"grad_norm": 0.19786930591243593,
|
|
"learning_rate": 5.927392295482468e-06,
|
|
"loss": 0.3805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13488087058067322,
|
|
"step": 965,
|
|
"valid_targets_mean": 11112.7,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 5.451127819548872,
|
|
"grad_norm": 0.20387542461587269,
|
|
"learning_rate": 5.729644815368076e-06,
|
|
"loss": 0.3892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13123825192451477,
|
|
"step": 970,
|
|
"valid_targets_mean": 10086.3,
|
|
"valid_targets_min": 2633
|
|
},
|
|
{
|
|
"epoch": 5.4793233082706765,
|
|
"grad_norm": 0.2101922545961769,
|
|
"learning_rate": 5.534699258313314e-06,
|
|
"loss": 0.3765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12793463468551636,
|
|
"step": 975,
|
|
"valid_targets_mean": 8901.5,
|
|
"valid_targets_min": 3394
|
|
},
|
|
{
|
|
"epoch": 5.507518796992481,
|
|
"grad_norm": 0.20752634763736152,
|
|
"learning_rate": 5.342593901042532e-06,
|
|
"loss": 0.3842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14083635807037354,
|
|
"step": 980,
|
|
"valid_targets_mean": 9925.1,
|
|
"valid_targets_min": 4286
|
|
},
|
|
{
|
|
"epoch": 5.535714285714286,
|
|
"grad_norm": 0.35335285262089283,
|
|
"learning_rate": 5.15336646261903e-06,
|
|
"loss": 0.388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12494904547929764,
|
|
"step": 985,
|
|
"valid_targets_mean": 9939.7,
|
|
"valid_targets_min": 1785
|
|
},
|
|
{
|
|
"epoch": 5.56390977443609,
|
|
"grad_norm": 0.19133182123598222,
|
|
"learning_rate": 4.9670540970390636e-06,
|
|
"loss": 0.3811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1396777480840683,
|
|
"step": 990,
|
|
"valid_targets_mean": 10060.9,
|
|
"valid_targets_min": 1024
|
|
},
|
|
{
|
|
"epoch": 5.592105263157895,
|
|
"grad_norm": 0.1964296720328508,
|
|
"learning_rate": 4.783693385936841e-06,
|
|
"loss": 0.3848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13299846649169922,
|
|
"step": 995,
|
|
"valid_targets_mean": 9927.0,
|
|
"valid_targets_min": 4932
|
|
},
|
|
{
|
|
"epoch": 5.620300751879699,
|
|
"grad_norm": 0.2292044326812199,
|
|
"learning_rate": 4.603320331401873e-06,
|
|
"loss": 0.3774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13259446620941162,
|
|
"step": 1000,
|
|
"valid_targets_mean": 9544.0,
|
|
"valid_targets_min": 3814
|
|
},
|
|
{
|
|
"epoch": 5.648496240601504,
|
|
"grad_norm": 0.19384689052380707,
|
|
"learning_rate": 4.425970348910118e-06,
|
|
"loss": 0.3829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12481251358985901,
|
|
"step": 1005,
|
|
"valid_targets_mean": 9298.8,
|
|
"valid_targets_min": 4568
|
|
},
|
|
{
|
|
"epoch": 5.676691729323308,
|
|
"grad_norm": 0.241070665980004,
|
|
"learning_rate": 4.251678260370282e-06,
|
|
"loss": 0.3871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1310911476612091,
|
|
"step": 1010,
|
|
"valid_targets_mean": 9981.9,
|
|
"valid_targets_min": 1687
|
|
},
|
|
{
|
|
"epoch": 5.704887218045113,
|
|
"grad_norm": 0.20849322427978206,
|
|
"learning_rate": 4.080478287286711e-06,
|
|
"loss": 0.3776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14320482313632965,
|
|
"step": 1015,
|
|
"valid_targets_mean": 10985.3,
|
|
"valid_targets_min": 6200
|
|
},
|
|
{
|
|
"epoch": 5.7330827067669174,
|
|
"grad_norm": 0.2046835056413381,
|
|
"learning_rate": 3.912404044040146e-06,
|
|
"loss": 0.3825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12832486629486084,
|
|
"step": 1020,
|
|
"valid_targets_mean": 9840.4,
|
|
"valid_targets_min": 5349
|
|
},
|
|
{
|
|
"epoch": 5.761278195488722,
|
|
"grad_norm": 0.209012305730083,
|
|
"learning_rate": 3.747488531287662e-06,
|
|
"loss": 0.3803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1327711045742035,
|
|
"step": 1025,
|
|
"valid_targets_mean": 9742.9,
|
|
"valid_targets_min": 2831
|
|
},
|
|
{
|
|
"epoch": 5.7894736842105265,
|
|
"grad_norm": 0.19674967780451014,
|
|
"learning_rate": 3.58576412948316e-06,
|
|
"loss": 0.3835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13531413674354553,
|
|
"step": 1030,
|
|
"valid_targets_mean": 10704.5,
|
|
"valid_targets_min": 5899
|
|
},
|
|
{
|
|
"epoch": 5.817669172932331,
|
|
"grad_norm": 0.24741818557781142,
|
|
"learning_rate": 3.4272625925195667e-06,
|
|
"loss": 0.3844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1324344128370285,
|
|
"step": 1035,
|
|
"valid_targets_mean": 10045.7,
|
|
"valid_targets_min": 3420
|
|
},
|
|
{
|
|
"epoch": 5.845864661654136,
|
|
"grad_norm": 0.20550211199403412,
|
|
"learning_rate": 3.2720150414941366e-06,
|
|
"loss": 0.3809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14488618075847626,
|
|
"step": 1040,
|
|
"valid_targets_mean": 10401.9,
|
|
"valid_targets_min": 267
|
|
},
|
|
{
|
|
"epoch": 5.87406015037594,
|
|
"grad_norm": 0.1945930539190225,
|
|
"learning_rate": 3.120051958597916e-06,
|
|
"loss": 0.3835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.115653857588768,
|
|
"step": 1045,
|
|
"valid_targets_mean": 8594.2,
|
|
"valid_targets_min": 4221
|
|
},
|
|
{
|
|
"epoch": 5.902255639097744,
|
|
"grad_norm": 0.19145738360862058,
|
|
"learning_rate": 2.971403181130734e-06,
|
|
"loss": 0.3777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12772971391677856,
|
|
"step": 1050,
|
|
"valid_targets_mean": 10064.6,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 5.930451127819548,
|
|
"grad_norm": 0.19566345119018624,
|
|
"learning_rate": 2.8260978956427388e-06,
|
|
"loss": 0.386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1241394504904747,
|
|
"step": 1055,
|
|
"valid_targets_mean": 9185.3,
|
|
"valid_targets_min": 3536
|
|
},
|
|
{
|
|
"epoch": 5.958646616541353,
|
|
"grad_norm": 0.21202924343158872,
|
|
"learning_rate": 2.6841646322037827e-06,
|
|
"loss": 0.3805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12480293959379196,
|
|
"step": 1060,
|
|
"valid_targets_mean": 9387.4,
|
|
"valid_targets_min": 3979
|
|
},
|
|
{
|
|
"epoch": 5.9868421052631575,
|
|
"grad_norm": 0.20254745835885732,
|
|
"learning_rate": 2.5456312588016285e-06,
|
|
"loss": 0.3826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1360769122838974,
|
|
"step": 1065,
|
|
"valid_targets_mean": 9964.2,
|
|
"valid_targets_min": 5163
|
|
},
|
|
{
|
|
"epoch": 6.011278195488722,
|
|
"grad_norm": 0.19454756606987048,
|
|
"learning_rate": 2.410524975870221e-06,
|
|
"loss": 0.374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12437275052070618,
|
|
"step": 1070,
|
|
"valid_targets_mean": 9493.0,
|
|
"valid_targets_min": 3644
|
|
},
|
|
{
|
|
"epoch": 6.0394736842105265,
|
|
"grad_norm": 0.20219767320500046,
|
|
"learning_rate": 2.2788723109489675e-06,
|
|
"loss": 0.38,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13244961202144623,
|
|
"step": 1075,
|
|
"valid_targets_mean": 8873.8,
|
|
"valid_targets_min": 2542
|
|
},
|
|
{
|
|
"epoch": 6.067669172932331,
|
|
"grad_norm": 0.19808173432092233,
|
|
"learning_rate": 2.1506991134742017e-06,
|
|
"loss": 0.3803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11605939269065857,
|
|
"step": 1080,
|
|
"valid_targets_mean": 9045.4,
|
|
"valid_targets_min": 2608
|
|
},
|
|
{
|
|
"epoch": 6.095864661654136,
|
|
"grad_norm": 0.1993158153091752,
|
|
"learning_rate": 2.026030549703735e-06,
|
|
"loss": 0.3777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1352459192276001,
|
|
"step": 1085,
|
|
"valid_targets_mean": 9781.2,
|
|
"valid_targets_min": 4811
|
|
},
|
|
{
|
|
"epoch": 6.12406015037594,
|
|
"grad_norm": 0.19751305555869267,
|
|
"learning_rate": 1.9048910977755675e-06,
|
|
"loss": 0.3819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11429759114980698,
|
|
"step": 1090,
|
|
"valid_targets_mean": 8647.2,
|
|
"valid_targets_min": 1578
|
|
},
|
|
{
|
|
"epoch": 6.152255639097745,
|
|
"grad_norm": 0.18493304636707505,
|
|
"learning_rate": 1.7873045429017356e-06,
|
|
"loss": 0.3803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12613776326179504,
|
|
"step": 1095,
|
|
"valid_targets_mean": 9668.5,
|
|
"valid_targets_min": 3259
|
|
},
|
|
{
|
|
"epoch": 6.180451127819548,
|
|
"grad_norm": 0.18776161177206288,
|
|
"learning_rate": 1.6732939726981645e-06,
|
|
"loss": 0.3842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11943809688091278,
|
|
"step": 1100,
|
|
"valid_targets_mean": 9483.2,
|
|
"valid_targets_min": 2189
|
|
},
|
|
{
|
|
"epoch": 6.208646616541353,
|
|
"grad_norm": 0.18838448317833534,
|
|
"learning_rate": 1.5628817726515166e-06,
|
|
"loss": 0.3792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12597958743572235,
|
|
"step": 1105,
|
|
"valid_targets_mean": 9460.0,
|
|
"valid_targets_min": 4144
|
|
},
|
|
{
|
|
"epoch": 6.2368421052631575,
|
|
"grad_norm": 0.2375705493410105,
|
|
"learning_rate": 1.4560896217239017e-06,
|
|
"loss": 0.388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11778394877910614,
|
|
"step": 1110,
|
|
"valid_targets_mean": 8584.2,
|
|
"valid_targets_min": 4502
|
|
},
|
|
{
|
|
"epoch": 6.265037593984962,
|
|
"grad_norm": 0.18438660001412205,
|
|
"learning_rate": 1.3529384880963092e-06,
|
|
"loss": 0.3864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12522190809249878,
|
|
"step": 1115,
|
|
"valid_targets_mean": 9539.2,
|
|
"valid_targets_min": 4219
|
|
},
|
|
{
|
|
"epoch": 6.293233082706767,
|
|
"grad_norm": 0.1938567399119474,
|
|
"learning_rate": 1.2534486250515943e-06,
|
|
"loss": 0.3805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12083841115236282,
|
|
"step": 1120,
|
|
"valid_targets_mean": 9289.3,
|
|
"valid_targets_min": 5185
|
|
},
|
|
{
|
|
"epoch": 6.321428571428571,
|
|
"grad_norm": 0.18207638742287655,
|
|
"learning_rate": 1.1576395669978212e-06,
|
|
"loss": 0.3777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12208961695432663,
|
|
"step": 1125,
|
|
"valid_targets_mean": 9382.1,
|
|
"valid_targets_min": 2742
|
|
},
|
|
{
|
|
"epoch": 6.349624060150376,
|
|
"grad_norm": 0.19649914276382552,
|
|
"learning_rate": 1.0655301256327788e-06,
|
|
"loss": 0.3809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13968250155448914,
|
|
"step": 1130,
|
|
"valid_targets_mean": 9572.9,
|
|
"valid_targets_min": 3557
|
|
},
|
|
{
|
|
"epoch": 6.37781954887218,
|
|
"grad_norm": 0.1867429869043491,
|
|
"learning_rate": 9.771383862503847e-07,
|
|
"loss": 0.3794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13156625628471375,
|
|
"step": 1135,
|
|
"valid_targets_mean": 9659.4,
|
|
"valid_targets_min": 4569
|
|
},
|
|
{
|
|
"epoch": 6.406015037593985,
|
|
"grad_norm": 0.19929157079790816,
|
|
"learning_rate": 8.924817041897072e-07,
|
|
"loss": 0.3782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12250533699989319,
|
|
"step": 1140,
|
|
"valid_targets_mean": 9121.6,
|
|
"valid_targets_min": 924
|
|
},
|
|
{
|
|
"epoch": 6.434210526315789,
|
|
"grad_norm": 0.2013351979879708,
|
|
"learning_rate": 8.115767014273213e-07,
|
|
"loss": 0.3722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.128327414393425,
|
|
"step": 1145,
|
|
"valid_targets_mean": 10044.7,
|
|
"valid_targets_min": 5153
|
|
},
|
|
{
|
|
"epoch": 6.462406015037594,
|
|
"grad_norm": 0.23209535963825284,
|
|
"learning_rate": 7.344392633136555e-07,
|
|
"loss": 0.3797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12083439528942108,
|
|
"step": 1150,
|
|
"valid_targets_mean": 8392.7,
|
|
"valid_targets_min": 2634
|
|
},
|
|
{
|
|
"epoch": 6.4906015037593985,
|
|
"grad_norm": 0.1840825719493825,
|
|
"learning_rate": 6.610845354539796e-07,
|
|
"loss": 0.3818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1267203688621521,
|
|
"step": 1155,
|
|
"valid_targets_mean": 9207.0,
|
|
"valid_targets_min": 3549
|
|
},
|
|
{
|
|
"epoch": 6.518796992481203,
|
|
"grad_norm": 0.2134352841117307,
|
|
"learning_rate": 5.91526920734613e-07,
|
|
"loss": 0.3764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12298408150672913,
|
|
"step": 1160,
|
|
"valid_targets_mean": 9080.2,
|
|
"valid_targets_min": 3904
|
|
},
|
|
{
|
|
"epoch": 6.546992481203008,
|
|
"grad_norm": 0.19169087509874685,
|
|
"learning_rate": 5.257800764949972e-07,
|
|
"loss": 0.3819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12543432414531708,
|
|
"step": 1165,
|
|
"valid_targets_mean": 10044.2,
|
|
"valid_targets_min": 3035
|
|
},
|
|
{
|
|
"epoch": 6.575187969924812,
|
|
"grad_norm": 0.19277136539646209,
|
|
"learning_rate": 4.6385691184611894e-07,
|
|
"loss": 0.3784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11640805006027222,
|
|
"step": 1170,
|
|
"valid_targets_mean": 8799.6,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 6.603383458646617,
|
|
"grad_norm": 0.20398736100512838,
|
|
"learning_rate": 4.057695851358823e-07,
|
|
"loss": 0.3756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13924311101436615,
|
|
"step": 1175,
|
|
"valid_targets_mean": 9870.4,
|
|
"valid_targets_min": 4946
|
|
},
|
|
{
|
|
"epoch": 6.631578947368421,
|
|
"grad_norm": 0.18409054912360828,
|
|
"learning_rate": 3.5152950156184475e-07,
|
|
"loss": 0.3767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12219169735908508,
|
|
"step": 1180,
|
|
"valid_targets_mean": 10114.9,
|
|
"valid_targets_min": 4133
|
|
},
|
|
{
|
|
"epoch": 6.659774436090226,
|
|
"grad_norm": 0.18569484256245025,
|
|
"learning_rate": 3.0114731093187743e-07,
|
|
"loss": 0.38,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12492775917053223,
|
|
"step": 1185,
|
|
"valid_targets_mean": 9793.9,
|
|
"valid_targets_min": 3441
|
|
},
|
|
{
|
|
"epoch": 6.68796992481203,
|
|
"grad_norm": 0.18747732627298463,
|
|
"learning_rate": 2.5463290557310895e-07,
|
|
"loss": 0.3867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14373329281806946,
|
|
"step": 1190,
|
|
"valid_targets_mean": 10796.1,
|
|
"valid_targets_min": 5625
|
|
},
|
|
{
|
|
"epoch": 6.716165413533835,
|
|
"grad_norm": 0.21713007804547255,
|
|
"learning_rate": 2.1199541838961003e-07,
|
|
"loss": 0.3802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11859285831451416,
|
|
"step": 1195,
|
|
"valid_targets_mean": 8973.0,
|
|
"valid_targets_min": 1970
|
|
},
|
|
{
|
|
"epoch": 6.7443609022556394,
|
|
"grad_norm": 0.1977847982916674,
|
|
"learning_rate": 1.7324322106919033e-07,
|
|
"loss": 0.3772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11961568892002106,
|
|
"step": 1200,
|
|
"valid_targets_mean": 8944.6,
|
|
"valid_targets_min": 2968
|
|
},
|
|
{
|
|
"epoch": 6.772556390977444,
|
|
"grad_norm": 0.19875337528932926,
|
|
"learning_rate": 1.3838392243965548e-07,
|
|
"loss": 0.381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1357158124446869,
|
|
"step": 1205,
|
|
"valid_targets_mean": 9618.5,
|
|
"valid_targets_min": 4026
|
|
},
|
|
{
|
|
"epoch": 6.8007518796992485,
|
|
"grad_norm": 0.18307191543836435,
|
|
"learning_rate": 1.0742436697483761e-07,
|
|
"loss": 0.3807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1133594959974289,
|
|
"step": 1210,
|
|
"valid_targets_mean": 9182.8,
|
|
"valid_targets_min": 3276
|
|
},
|
|
{
|
|
"epoch": 6.828947368421053,
|
|
"grad_norm": 0.18809350835162797,
|
|
"learning_rate": 8.03706334507215e-08,
|
|
"loss": 0.385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13438230752944946,
|
|
"step": 1215,
|
|
"valid_targets_mean": 9513.1,
|
|
"valid_targets_min": 2890
|
|
},
|
|
{
|
|
"epoch": 6.857142857142857,
|
|
"grad_norm": 0.20793246089025288,
|
|
"learning_rate": 5.72280337518949e-08,
|
|
"loss": 0.3771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13449817895889282,
|
|
"step": 1220,
|
|
"valid_targets_mean": 10282.8,
|
|
"valid_targets_min": 3864
|
|
},
|
|
{
|
|
"epoch": 6.885338345864661,
|
|
"grad_norm": 0.19353297540771533,
|
|
"learning_rate": 3.8001111828593897e-08,
|
|
"loss": 0.3788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12465780973434448,
|
|
"step": 1225,
|
|
"valid_targets_mean": 9802.5,
|
|
"valid_targets_min": 5736
|
|
},
|
|
{
|
|
"epoch": 6.913533834586466,
|
|
"grad_norm": 0.1963382310528581,
|
|
"learning_rate": 2.2693642804505477e-08,
|
|
"loss": 0.3862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13435246050357819,
|
|
"step": 1230,
|
|
"valid_targets_mean": 10365.5,
|
|
"valid_targets_min": 3981
|
|
},
|
|
{
|
|
"epoch": 6.94172932330827,
|
|
"grad_norm": 0.19442832123497505,
|
|
"learning_rate": 1.1308632235547123e-08,
|
|
"loss": 0.3796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1150025874376297,
|
|
"step": 1235,
|
|
"valid_targets_mean": 8373.7,
|
|
"valid_targets_min": 2958
|
|
},
|
|
{
|
|
"epoch": 6.969924812030075,
|
|
"grad_norm": 0.20224428523142735,
|
|
"learning_rate": 3.848315519729973e-09,
|
|
"loss": 0.3829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1252399981021881,
|
|
"step": 1240,
|
|
"valid_targets_mean": 8950.3,
|
|
"valid_targets_min": 4169
|
|
},
|
|
{
|
|
"epoch": 6.9981203007518795,
|
|
"grad_norm": 0.1923211103930577,
|
|
"learning_rate": 3.1415745826102185e-10,
|
|
"loss": 0.3839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14353609085083008,
|
|
"step": 1245,
|
|
"valid_targets_mean": 9890.2,
|
|
"valid_targets_min": 4361
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"step": 1246,
|
|
"total_flos": 5.085080059986117e+18,
|
|
"train_loss": 0.0,
|
|
"train_runtime": 1.1558,
|
|
"train_samples_per_second": 103019.289,
|
|
"train_steps_per_second": 1078.038
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 1246,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 300,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 5.085080059986117e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|