9596 lines
265 KiB
JSON
9596 lines
265 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4340,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.008064516129032258,
|
|
"grad_norm": 22.153798311576903,
|
|
"learning_rate": 3.686635944700461e-07,
|
|
"loss": 1.0499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 1.0028603076934814,
|
|
"step": 5,
|
|
"valid_targets_mean": 2338.9,
|
|
"valid_targets_min": 149
|
|
},
|
|
{
|
|
"epoch": 0.016129032258064516,
|
|
"grad_norm": 24.313655699072513,
|
|
"learning_rate": 8.294930875576038e-07,
|
|
"loss": 1.0807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 1.0979466438293457,
|
|
"step": 10,
|
|
"valid_targets_mean": 2394.4,
|
|
"valid_targets_min": 224
|
|
},
|
|
{
|
|
"epoch": 0.024193548387096774,
|
|
"grad_norm": 21.100939089574812,
|
|
"learning_rate": 1.2903225806451614e-06,
|
|
"loss": 1.0344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 1.0227618217468262,
|
|
"step": 15,
|
|
"valid_targets_mean": 2298.0,
|
|
"valid_targets_min": 372
|
|
},
|
|
{
|
|
"epoch": 0.03225806451612903,
|
|
"grad_norm": 19.012426353924557,
|
|
"learning_rate": 1.751152073732719e-06,
|
|
"loss": 0.9675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.9436134696006775,
|
|
"step": 20,
|
|
"valid_targets_mean": 2984.3,
|
|
"valid_targets_min": 177
|
|
},
|
|
{
|
|
"epoch": 0.04032258064516129,
|
|
"grad_norm": 11.978234364686644,
|
|
"learning_rate": 2.211981566820277e-06,
|
|
"loss": 0.8978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8549253344535828,
|
|
"step": 25,
|
|
"valid_targets_mean": 2548.9,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 0.04838709677419355,
|
|
"grad_norm": 6.876328979491469,
|
|
"learning_rate": 2.6728110599078343e-06,
|
|
"loss": 0.7635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7230551838874817,
|
|
"step": 30,
|
|
"valid_targets_mean": 2607.9,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 0.056451612903225805,
|
|
"grad_norm": 3.216264237135888,
|
|
"learning_rate": 3.1336405529953917e-06,
|
|
"loss": 0.7335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6459892988204956,
|
|
"step": 35,
|
|
"valid_targets_mean": 2942.9,
|
|
"valid_targets_min": 197
|
|
},
|
|
{
|
|
"epoch": 0.06451612903225806,
|
|
"grad_norm": 2.033108895381906,
|
|
"learning_rate": 3.5944700460829495e-06,
|
|
"loss": 0.6942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6203195452690125,
|
|
"step": 40,
|
|
"valid_targets_mean": 3209.6,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 0.07258064516129033,
|
|
"grad_norm": 1.9428790990576104,
|
|
"learning_rate": 4.055299539170508e-06,
|
|
"loss": 0.692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6886614561080933,
|
|
"step": 45,
|
|
"valid_targets_mean": 2969.2,
|
|
"valid_targets_min": 550
|
|
},
|
|
{
|
|
"epoch": 0.08064516129032258,
|
|
"grad_norm": 1.309751703713515,
|
|
"learning_rate": 4.516129032258065e-06,
|
|
"loss": 0.6484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5555537939071655,
|
|
"step": 50,
|
|
"valid_targets_mean": 3255.8,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 0.08870967741935484,
|
|
"grad_norm": 1.795407780726021,
|
|
"learning_rate": 4.976958525345623e-06,
|
|
"loss": 0.6663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7421262860298157,
|
|
"step": 55,
|
|
"valid_targets_mean": 2511.2,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 0.0967741935483871,
|
|
"grad_norm": 1.070066710355763,
|
|
"learning_rate": 5.43778801843318e-06,
|
|
"loss": 0.632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.628358006477356,
|
|
"step": 60,
|
|
"valid_targets_mean": 2810.1,
|
|
"valid_targets_min": 197
|
|
},
|
|
{
|
|
"epoch": 0.10483870967741936,
|
|
"grad_norm": 1.0816003295877297,
|
|
"learning_rate": 5.8986175115207375e-06,
|
|
"loss": 0.6198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.670561671257019,
|
|
"step": 65,
|
|
"valid_targets_mean": 2344.2,
|
|
"valid_targets_min": 229
|
|
},
|
|
{
|
|
"epoch": 0.11290322580645161,
|
|
"grad_norm": 1.212982213113322,
|
|
"learning_rate": 6.359447004608295e-06,
|
|
"loss": 0.6015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6335461139678955,
|
|
"step": 70,
|
|
"valid_targets_mean": 1688.5,
|
|
"valid_targets_min": 165
|
|
},
|
|
{
|
|
"epoch": 0.12096774193548387,
|
|
"grad_norm": 0.8240001418853877,
|
|
"learning_rate": 6.820276497695853e-06,
|
|
"loss": 0.6014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5493898391723633,
|
|
"step": 75,
|
|
"valid_targets_mean": 3259.4,
|
|
"valid_targets_min": 276
|
|
},
|
|
{
|
|
"epoch": 0.12903225806451613,
|
|
"grad_norm": 0.8857851113533175,
|
|
"learning_rate": 7.28110599078341e-06,
|
|
"loss": 0.5626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6091587543487549,
|
|
"step": 80,
|
|
"valid_targets_mean": 2665.1,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 0.13709677419354838,
|
|
"grad_norm": 0.9254530057559824,
|
|
"learning_rate": 7.741935483870968e-06,
|
|
"loss": 0.5814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6003361940383911,
|
|
"step": 85,
|
|
"valid_targets_mean": 2455.0,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 0.14516129032258066,
|
|
"grad_norm": 0.7870100101147485,
|
|
"learning_rate": 8.202764976958527e-06,
|
|
"loss": 0.5752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5859330892562866,
|
|
"step": 90,
|
|
"valid_targets_mean": 3165.4,
|
|
"valid_targets_min": 896
|
|
},
|
|
{
|
|
"epoch": 0.1532258064516129,
|
|
"grad_norm": 0.8708736737561684,
|
|
"learning_rate": 8.663594470046084e-06,
|
|
"loss": 0.5657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5680344700813293,
|
|
"step": 95,
|
|
"valid_targets_mean": 2367.0,
|
|
"valid_targets_min": 339
|
|
},
|
|
{
|
|
"epoch": 0.16129032258064516,
|
|
"grad_norm": 0.8540397501407886,
|
|
"learning_rate": 9.124423963133642e-06,
|
|
"loss": 0.5359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49768128991127014,
|
|
"step": 100,
|
|
"valid_targets_mean": 3016.1,
|
|
"valid_targets_min": 930
|
|
},
|
|
{
|
|
"epoch": 0.1693548387096774,
|
|
"grad_norm": 0.8124824007674212,
|
|
"learning_rate": 9.5852534562212e-06,
|
|
"loss": 0.5093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49459367990493774,
|
|
"step": 105,
|
|
"valid_targets_mean": 2655.1,
|
|
"valid_targets_min": 263
|
|
},
|
|
{
|
|
"epoch": 0.1774193548387097,
|
|
"grad_norm": 0.828601487529907,
|
|
"learning_rate": 1.0046082949308758e-05,
|
|
"loss": 0.5094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5794700384140015,
|
|
"step": 110,
|
|
"valid_targets_mean": 2543.2,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 0.18548387096774194,
|
|
"grad_norm": 0.8184404398781,
|
|
"learning_rate": 1.0506912442396313e-05,
|
|
"loss": 0.5273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.535675585269928,
|
|
"step": 115,
|
|
"valid_targets_mean": 2693.6,
|
|
"valid_targets_min": 315
|
|
},
|
|
{
|
|
"epoch": 0.1935483870967742,
|
|
"grad_norm": 0.8468961613559854,
|
|
"learning_rate": 1.096774193548387e-05,
|
|
"loss": 0.527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5003695487976074,
|
|
"step": 120,
|
|
"valid_targets_mean": 2453.9,
|
|
"valid_targets_min": 388
|
|
},
|
|
{
|
|
"epoch": 0.20161290322580644,
|
|
"grad_norm": 0.9495181975956839,
|
|
"learning_rate": 1.1428571428571429e-05,
|
|
"loss": 0.4944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49977967143058777,
|
|
"step": 125,
|
|
"valid_targets_mean": 1848.8,
|
|
"valid_targets_min": 254
|
|
},
|
|
{
|
|
"epoch": 0.20967741935483872,
|
|
"grad_norm": 0.9612288275918914,
|
|
"learning_rate": 1.1889400921658986e-05,
|
|
"loss": 0.5277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5501251220703125,
|
|
"step": 130,
|
|
"valid_targets_mean": 1938.6,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 0.21774193548387097,
|
|
"grad_norm": 0.9820067192497332,
|
|
"learning_rate": 1.2350230414746545e-05,
|
|
"loss": 0.4953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5261563658714294,
|
|
"step": 135,
|
|
"valid_targets_mean": 2140.9,
|
|
"valid_targets_min": 921
|
|
},
|
|
{
|
|
"epoch": 0.22580645161290322,
|
|
"grad_norm": 0.7339689344800059,
|
|
"learning_rate": 1.2811059907834102e-05,
|
|
"loss": 0.5044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46617329120635986,
|
|
"step": 140,
|
|
"valid_targets_mean": 2844.9,
|
|
"valid_targets_min": 254
|
|
},
|
|
{
|
|
"epoch": 0.23387096774193547,
|
|
"grad_norm": 0.7667433097072047,
|
|
"learning_rate": 1.327188940092166e-05,
|
|
"loss": 0.5029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48202306032180786,
|
|
"step": 145,
|
|
"valid_targets_mean": 2817.4,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 0.24193548387096775,
|
|
"grad_norm": 0.7623308074927395,
|
|
"learning_rate": 1.3732718894009217e-05,
|
|
"loss": 0.4783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4694739580154419,
|
|
"step": 150,
|
|
"valid_targets_mean": 3060.5,
|
|
"valid_targets_min": 288
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"grad_norm": 0.7059248929137352,
|
|
"learning_rate": 1.4193548387096776e-05,
|
|
"loss": 0.4711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4394400715827942,
|
|
"step": 155,
|
|
"valid_targets_mean": 3063.4,
|
|
"valid_targets_min": 504
|
|
},
|
|
{
|
|
"epoch": 0.25806451612903225,
|
|
"grad_norm": 0.8140608892687763,
|
|
"learning_rate": 1.4654377880184335e-05,
|
|
"loss": 0.4766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4742165505886078,
|
|
"step": 160,
|
|
"valid_targets_mean": 2791.8,
|
|
"valid_targets_min": 322
|
|
},
|
|
{
|
|
"epoch": 0.2661290322580645,
|
|
"grad_norm": 0.8570983881052114,
|
|
"learning_rate": 1.511520737327189e-05,
|
|
"loss": 0.4811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4933184087276459,
|
|
"step": 165,
|
|
"valid_targets_mean": 2616.5,
|
|
"valid_targets_min": 398
|
|
},
|
|
{
|
|
"epoch": 0.27419354838709675,
|
|
"grad_norm": 0.6456903451641555,
|
|
"learning_rate": 1.5576036866359447e-05,
|
|
"loss": 0.4642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4399348795413971,
|
|
"step": 170,
|
|
"valid_targets_mean": 3509.4,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 0.28225806451612906,
|
|
"grad_norm": 0.8656206716791085,
|
|
"learning_rate": 1.6036866359447006e-05,
|
|
"loss": 0.4621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4377855658531189,
|
|
"step": 175,
|
|
"valid_targets_mean": 2538.1,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 0.2903225806451613,
|
|
"grad_norm": 0.92373810220119,
|
|
"learning_rate": 1.6497695852534564e-05,
|
|
"loss": 0.4559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46523940563201904,
|
|
"step": 180,
|
|
"valid_targets_mean": 2507.2,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 0.29838709677419356,
|
|
"grad_norm": 0.7807086861493168,
|
|
"learning_rate": 1.695852534562212e-05,
|
|
"loss": 0.4588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47477734088897705,
|
|
"step": 185,
|
|
"valid_targets_mean": 3189.0,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 0.3064516129032258,
|
|
"grad_norm": 0.8051571386601775,
|
|
"learning_rate": 1.741935483870968e-05,
|
|
"loss": 0.4556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46097469329833984,
|
|
"step": 190,
|
|
"valid_targets_mean": 2955.4,
|
|
"valid_targets_min": 351
|
|
},
|
|
{
|
|
"epoch": 0.31451612903225806,
|
|
"grad_norm": 0.9383905522147491,
|
|
"learning_rate": 1.7880184331797237e-05,
|
|
"loss": 0.4478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45917099714279175,
|
|
"step": 195,
|
|
"valid_targets_mean": 1947.2,
|
|
"valid_targets_min": 418
|
|
},
|
|
{
|
|
"epoch": 0.3225806451612903,
|
|
"grad_norm": 0.7935710920096379,
|
|
"learning_rate": 1.8341013824884796e-05,
|
|
"loss": 0.4343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45032191276550293,
|
|
"step": 200,
|
|
"valid_targets_mean": 2611.2,
|
|
"valid_targets_min": 200
|
|
},
|
|
{
|
|
"epoch": 0.33064516129032256,
|
|
"grad_norm": 0.7239525505374069,
|
|
"learning_rate": 1.880184331797235e-05,
|
|
"loss": 0.4293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4075087904930115,
|
|
"step": 205,
|
|
"valid_targets_mean": 3403.3,
|
|
"valid_targets_min": 510
|
|
},
|
|
{
|
|
"epoch": 0.3387096774193548,
|
|
"grad_norm": 0.8681887257543291,
|
|
"learning_rate": 1.926267281105991e-05,
|
|
"loss": 0.4454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4293495714664459,
|
|
"step": 210,
|
|
"valid_targets_mean": 2276.1,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 0.3467741935483871,
|
|
"grad_norm": 0.8459756823945307,
|
|
"learning_rate": 1.9723502304147465e-05,
|
|
"loss": 0.4689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4373023509979248,
|
|
"step": 215,
|
|
"valid_targets_mean": 2690.1,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 0.3548387096774194,
|
|
"grad_norm": 2.259119486455417,
|
|
"learning_rate": 2.0184331797235024e-05,
|
|
"loss": 0.4402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.420354425907135,
|
|
"step": 220,
|
|
"valid_targets_mean": 3459.8,
|
|
"valid_targets_min": 420
|
|
},
|
|
{
|
|
"epoch": 0.3629032258064516,
|
|
"grad_norm": 0.897688678006966,
|
|
"learning_rate": 2.0645161290322582e-05,
|
|
"loss": 0.4478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4796941578388214,
|
|
"step": 225,
|
|
"valid_targets_mean": 2300.6,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 0.3709677419354839,
|
|
"grad_norm": 0.8694935775294907,
|
|
"learning_rate": 2.110599078341014e-05,
|
|
"loss": 0.4499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42712506651878357,
|
|
"step": 230,
|
|
"valid_targets_mean": 2498.7,
|
|
"valid_targets_min": 195
|
|
},
|
|
{
|
|
"epoch": 0.3790322580645161,
|
|
"grad_norm": 0.8939392929687752,
|
|
"learning_rate": 2.1566820276497696e-05,
|
|
"loss": 0.4551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45447176694869995,
|
|
"step": 235,
|
|
"valid_targets_mean": 2198.6,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 0.3870967741935484,
|
|
"grad_norm": 0.8004418393110241,
|
|
"learning_rate": 2.2027649769585255e-05,
|
|
"loss": 0.4593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4889240860939026,
|
|
"step": 240,
|
|
"valid_targets_mean": 3048.8,
|
|
"valid_targets_min": 855
|
|
},
|
|
{
|
|
"epoch": 0.3951612903225806,
|
|
"grad_norm": 0.8593232184641156,
|
|
"learning_rate": 2.2488479262672814e-05,
|
|
"loss": 0.4652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4700632095336914,
|
|
"step": 245,
|
|
"valid_targets_mean": 2631.5,
|
|
"valid_targets_min": 420
|
|
},
|
|
{
|
|
"epoch": 0.4032258064516129,
|
|
"grad_norm": 1.140404961170291,
|
|
"learning_rate": 2.2949308755760372e-05,
|
|
"loss": 0.4271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38768091797828674,
|
|
"step": 250,
|
|
"valid_targets_mean": 2475.6,
|
|
"valid_targets_min": 276
|
|
},
|
|
{
|
|
"epoch": 0.4112903225806452,
|
|
"grad_norm": 0.6485480850385038,
|
|
"learning_rate": 2.3410138248847928e-05,
|
|
"loss": 0.4075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3335469961166382,
|
|
"step": 255,
|
|
"valid_targets_mean": 3376.4,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 0.41935483870967744,
|
|
"grad_norm": 0.78715876715725,
|
|
"learning_rate": 2.3870967741935486e-05,
|
|
"loss": 0.4185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4031834006309509,
|
|
"step": 260,
|
|
"valid_targets_mean": 2965.1,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 0.4274193548387097,
|
|
"grad_norm": 0.7718408830172203,
|
|
"learning_rate": 2.4331797235023045e-05,
|
|
"loss": 0.4267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4061693251132965,
|
|
"step": 265,
|
|
"valid_targets_mean": 3494.8,
|
|
"valid_targets_min": 303
|
|
},
|
|
{
|
|
"epoch": 0.43548387096774194,
|
|
"grad_norm": 0.9219338882404587,
|
|
"learning_rate": 2.4792626728110604e-05,
|
|
"loss": 0.443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.456881046295166,
|
|
"step": 270,
|
|
"valid_targets_mean": 2524.9,
|
|
"valid_targets_min": 255
|
|
},
|
|
{
|
|
"epoch": 0.4435483870967742,
|
|
"grad_norm": 0.7343666066268928,
|
|
"learning_rate": 2.525345622119816e-05,
|
|
"loss": 0.4363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4578344225883484,
|
|
"step": 275,
|
|
"valid_targets_mean": 3219.4,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 0.45161290322580644,
|
|
"grad_norm": 0.7522794310132573,
|
|
"learning_rate": 2.5714285714285718e-05,
|
|
"loss": 0.4271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.387943297624588,
|
|
"step": 280,
|
|
"valid_targets_mean": 3286.3,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 0.4596774193548387,
|
|
"grad_norm": 0.8808096316714892,
|
|
"learning_rate": 2.6175115207373277e-05,
|
|
"loss": 0.4454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4791191518306732,
|
|
"step": 285,
|
|
"valid_targets_mean": 2359.4,
|
|
"valid_targets_min": 234
|
|
},
|
|
{
|
|
"epoch": 0.46774193548387094,
|
|
"grad_norm": 0.7072758630589446,
|
|
"learning_rate": 2.663594470046083e-05,
|
|
"loss": 0.41,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4219507873058319,
|
|
"step": 290,
|
|
"valid_targets_mean": 3786.7,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 0.47580645161290325,
|
|
"grad_norm": 0.8373459567182849,
|
|
"learning_rate": 2.7096774193548387e-05,
|
|
"loss": 0.4186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40138912200927734,
|
|
"step": 295,
|
|
"valid_targets_mean": 2658.4,
|
|
"valid_targets_min": 440
|
|
},
|
|
{
|
|
"epoch": 0.4838709677419355,
|
|
"grad_norm": 0.725510530632353,
|
|
"learning_rate": 2.7557603686635946e-05,
|
|
"loss": 0.4199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41206637024879456,
|
|
"step": 300,
|
|
"valid_targets_mean": 3544.5,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 0.49193548387096775,
|
|
"grad_norm": 0.83051584026467,
|
|
"learning_rate": 2.8018433179723505e-05,
|
|
"loss": 0.4112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35923537611961365,
|
|
"step": 305,
|
|
"valid_targets_mean": 2149.1,
|
|
"valid_targets_min": 187
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"grad_norm": 0.8142342134992904,
|
|
"learning_rate": 2.847926267281106e-05,
|
|
"loss": 0.4233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39805740118026733,
|
|
"step": 310,
|
|
"valid_targets_mean": 2615.1,
|
|
"valid_targets_min": 252
|
|
},
|
|
{
|
|
"epoch": 0.5080645161290323,
|
|
"grad_norm": 0.7975067666076274,
|
|
"learning_rate": 2.894009216589862e-05,
|
|
"loss": 0.3903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3147194981575012,
|
|
"step": 315,
|
|
"valid_targets_mean": 2832.3,
|
|
"valid_targets_min": 178
|
|
},
|
|
{
|
|
"epoch": 0.5161290322580645,
|
|
"grad_norm": 0.8331527506439725,
|
|
"learning_rate": 2.9400921658986177e-05,
|
|
"loss": 0.4058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37615129351615906,
|
|
"step": 320,
|
|
"valid_targets_mean": 2367.1,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 0.5241935483870968,
|
|
"grad_norm": 0.9298714961552893,
|
|
"learning_rate": 2.9861751152073736e-05,
|
|
"loss": 0.4281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43610337376594543,
|
|
"step": 325,
|
|
"valid_targets_mean": 1960.0,
|
|
"valid_targets_min": 187
|
|
},
|
|
{
|
|
"epoch": 0.532258064516129,
|
|
"grad_norm": 0.7803898122512177,
|
|
"learning_rate": 3.032258064516129e-05,
|
|
"loss": 0.3858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40581780672073364,
|
|
"step": 330,
|
|
"valid_targets_mean": 3651.7,
|
|
"valid_targets_min": 898
|
|
},
|
|
{
|
|
"epoch": 0.5403225806451613,
|
|
"grad_norm": 0.8421136446549863,
|
|
"learning_rate": 3.078341013824885e-05,
|
|
"loss": 0.4193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40105634927749634,
|
|
"step": 335,
|
|
"valid_targets_mean": 3268.5,
|
|
"valid_targets_min": 319
|
|
},
|
|
{
|
|
"epoch": 0.5483870967741935,
|
|
"grad_norm": 0.9209675895059894,
|
|
"learning_rate": 3.124423963133641e-05,
|
|
"loss": 0.4173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4191959798336029,
|
|
"step": 340,
|
|
"valid_targets_mean": 2238.6,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 0.5564516129032258,
|
|
"grad_norm": 0.8156862894197787,
|
|
"learning_rate": 3.170506912442397e-05,
|
|
"loss": 0.4175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4362891912460327,
|
|
"step": 345,
|
|
"valid_targets_mean": 2997.9,
|
|
"valid_targets_min": 388
|
|
},
|
|
{
|
|
"epoch": 0.5645161290322581,
|
|
"grad_norm": 0.7899678562138566,
|
|
"learning_rate": 3.2165898617511526e-05,
|
|
"loss": 0.4026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42016178369522095,
|
|
"step": 350,
|
|
"valid_targets_mean": 2863.1,
|
|
"valid_targets_min": 187
|
|
},
|
|
{
|
|
"epoch": 0.5725806451612904,
|
|
"grad_norm": 0.8558684416782738,
|
|
"learning_rate": 3.2626728110599085e-05,
|
|
"loss": 0.3992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4148564636707306,
|
|
"step": 355,
|
|
"valid_targets_mean": 2949.4,
|
|
"valid_targets_min": 293
|
|
},
|
|
{
|
|
"epoch": 0.5806451612903226,
|
|
"grad_norm": 0.8746517054653995,
|
|
"learning_rate": 3.3087557603686637e-05,
|
|
"loss": 0.3984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44954875111579895,
|
|
"step": 360,
|
|
"valid_targets_mean": 2254.4,
|
|
"valid_targets_min": 181
|
|
},
|
|
{
|
|
"epoch": 0.5887096774193549,
|
|
"grad_norm": 0.808896433518699,
|
|
"learning_rate": 3.3548387096774195e-05,
|
|
"loss": 0.4028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3894229233264923,
|
|
"step": 365,
|
|
"valid_targets_mean": 2525.1,
|
|
"valid_targets_min": 948
|
|
},
|
|
{
|
|
"epoch": 0.5967741935483871,
|
|
"grad_norm": 0.7576237738559274,
|
|
"learning_rate": 3.4009216589861754e-05,
|
|
"loss": 0.4092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3938983976840973,
|
|
"step": 370,
|
|
"valid_targets_mean": 2815.8,
|
|
"valid_targets_min": 212
|
|
},
|
|
{
|
|
"epoch": 0.6048387096774194,
|
|
"grad_norm": 0.8846305461431809,
|
|
"learning_rate": 3.447004608294931e-05,
|
|
"loss": 0.4375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41364169120788574,
|
|
"step": 375,
|
|
"valid_targets_mean": 2287.9,
|
|
"valid_targets_min": 212
|
|
},
|
|
{
|
|
"epoch": 0.6129032258064516,
|
|
"grad_norm": 0.7210368111410277,
|
|
"learning_rate": 3.493087557603687e-05,
|
|
"loss": 0.4129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4009098708629608,
|
|
"step": 380,
|
|
"valid_targets_mean": 3262.9,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 0.6209677419354839,
|
|
"grad_norm": 0.8497932476194547,
|
|
"learning_rate": 3.539170506912443e-05,
|
|
"loss": 0.4,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44678089022636414,
|
|
"step": 385,
|
|
"valid_targets_mean": 3296.5,
|
|
"valid_targets_min": 198
|
|
},
|
|
{
|
|
"epoch": 0.6290322580645161,
|
|
"grad_norm": 0.8026256428850127,
|
|
"learning_rate": 3.585253456221198e-05,
|
|
"loss": 0.4248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4670369029045105,
|
|
"step": 390,
|
|
"valid_targets_mean": 2745.7,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 0.6370967741935484,
|
|
"grad_norm": 0.7890932153260195,
|
|
"learning_rate": 3.631336405529954e-05,
|
|
"loss": 0.4017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3878169655799866,
|
|
"step": 395,
|
|
"valid_targets_mean": 2648.2,
|
|
"valid_targets_min": 380
|
|
},
|
|
{
|
|
"epoch": 0.6451612903225806,
|
|
"grad_norm": 0.792677971928404,
|
|
"learning_rate": 3.67741935483871e-05,
|
|
"loss": 0.4044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41010338068008423,
|
|
"step": 400,
|
|
"valid_targets_mean": 2770.4,
|
|
"valid_targets_min": 239
|
|
},
|
|
{
|
|
"epoch": 0.6532258064516129,
|
|
"grad_norm": 0.7077087950989933,
|
|
"learning_rate": 3.723502304147466e-05,
|
|
"loss": 0.3813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36924007534980774,
|
|
"step": 405,
|
|
"valid_targets_mean": 3274.1,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 0.6612903225806451,
|
|
"grad_norm": 0.8147092929472229,
|
|
"learning_rate": 3.7695852534562217e-05,
|
|
"loss": 0.3928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37256717681884766,
|
|
"step": 410,
|
|
"valid_targets_mean": 2490.9,
|
|
"valid_targets_min": 318
|
|
},
|
|
{
|
|
"epoch": 0.6693548387096774,
|
|
"grad_norm": 0.8726915645612878,
|
|
"learning_rate": 3.815668202764977e-05,
|
|
"loss": 0.3984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4332878291606903,
|
|
"step": 415,
|
|
"valid_targets_mean": 2450.7,
|
|
"valid_targets_min": 453
|
|
},
|
|
{
|
|
"epoch": 0.6774193548387096,
|
|
"grad_norm": 0.8877543315890436,
|
|
"learning_rate": 3.861751152073733e-05,
|
|
"loss": 0.4092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40370655059814453,
|
|
"step": 420,
|
|
"valid_targets_mean": 2743.3,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 0.6854838709677419,
|
|
"grad_norm": 1.0020304389735728,
|
|
"learning_rate": 3.9078341013824886e-05,
|
|
"loss": 0.3747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4023987650871277,
|
|
"step": 425,
|
|
"valid_targets_mean": 1796.6,
|
|
"valid_targets_min": 302
|
|
},
|
|
{
|
|
"epoch": 0.6935483870967742,
|
|
"grad_norm": 0.9017472064219423,
|
|
"learning_rate": 3.9539170506912445e-05,
|
|
"loss": 0.3855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3981413245201111,
|
|
"step": 430,
|
|
"valid_targets_mean": 2609.2,
|
|
"valid_targets_min": 1033
|
|
},
|
|
{
|
|
"epoch": 0.7016129032258065,
|
|
"grad_norm": 0.7312844272905582,
|
|
"learning_rate": 4e-05,
|
|
"loss": 0.3809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.349636435508728,
|
|
"step": 435,
|
|
"valid_targets_mean": 2927.2,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 0.7096774193548387,
|
|
"grad_norm": 0.9604661456017165,
|
|
"learning_rate": 3.9999838275919404e-05,
|
|
"loss": 0.4122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4350871443748474,
|
|
"step": 440,
|
|
"valid_targets_mean": 2711.8,
|
|
"valid_targets_min": 304
|
|
},
|
|
{
|
|
"epoch": 0.717741935483871,
|
|
"grad_norm": 0.8110891793954338,
|
|
"learning_rate": 3.9999353106293074e-05,
|
|
"loss": 0.3748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37878522276878357,
|
|
"step": 445,
|
|
"valid_targets_mean": 2750.4,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 0.7258064516129032,
|
|
"grad_norm": 0.9731450376436603,
|
|
"learning_rate": 3.999854449896738e-05,
|
|
"loss": 0.4018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.384330153465271,
|
|
"step": 450,
|
|
"valid_targets_mean": 2557.8,
|
|
"valid_targets_min": 325
|
|
},
|
|
{
|
|
"epoch": 0.7338709677419355,
|
|
"grad_norm": 0.908414908463216,
|
|
"learning_rate": 3.999741246701944e-05,
|
|
"loss": 0.3805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40037620067596436,
|
|
"step": 455,
|
|
"valid_targets_mean": 2684.3,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 0.7419354838709677,
|
|
"grad_norm": 0.769417013452383,
|
|
"learning_rate": 3.9995957028756935e-05,
|
|
"loss": 0.3923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3699696660041809,
|
|
"step": 460,
|
|
"valid_targets_mean": 2991.0,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"grad_norm": 0.8560124738397724,
|
|
"learning_rate": 3.999417820771782e-05,
|
|
"loss": 0.4036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43018868565559387,
|
|
"step": 465,
|
|
"valid_targets_mean": 2671.2,
|
|
"valid_targets_min": 257
|
|
},
|
|
{
|
|
"epoch": 0.7580645161290323,
|
|
"grad_norm": 0.8655289922049345,
|
|
"learning_rate": 3.9992076032669905e-05,
|
|
"loss": 0.3753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38146650791168213,
|
|
"step": 470,
|
|
"valid_targets_mean": 2408.2,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 0.7661290322580645,
|
|
"grad_norm": 0.835728755739235,
|
|
"learning_rate": 3.998965053761042e-05,
|
|
"loss": 0.3773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3994385898113251,
|
|
"step": 475,
|
|
"valid_targets_mean": 2967.5,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 0.7741935483870968,
|
|
"grad_norm": 0.7882208289569873,
|
|
"learning_rate": 3.998690176176547e-05,
|
|
"loss": 0.3913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37645983695983887,
|
|
"step": 480,
|
|
"valid_targets_mean": 3028.9,
|
|
"valid_targets_min": 223
|
|
},
|
|
{
|
|
"epoch": 0.782258064516129,
|
|
"grad_norm": 0.8770093534666644,
|
|
"learning_rate": 3.9983829749589383e-05,
|
|
"loss": 0.3884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39846763014793396,
|
|
"step": 485,
|
|
"valid_targets_mean": 2504.9,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 0.7903225806451613,
|
|
"grad_norm": 0.6777415038972312,
|
|
"learning_rate": 3.998043455076398e-05,
|
|
"loss": 0.3624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3279041647911072,
|
|
"step": 490,
|
|
"valid_targets_mean": 3392.4,
|
|
"valid_targets_min": 200
|
|
},
|
|
{
|
|
"epoch": 0.7983870967741935,
|
|
"grad_norm": 0.865670948650911,
|
|
"learning_rate": 3.9976716220197805e-05,
|
|
"loss": 0.3932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3613618314266205,
|
|
"step": 495,
|
|
"valid_targets_mean": 2237.6,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 0.8064516129032258,
|
|
"grad_norm": 0.9275763208426596,
|
|
"learning_rate": 3.997267481802522e-05,
|
|
"loss": 0.4042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41816750168800354,
|
|
"step": 500,
|
|
"valid_targets_mean": 1959.5,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 0.8145161290322581,
|
|
"grad_norm": 0.8866141318700316,
|
|
"learning_rate": 3.996831040960543e-05,
|
|
"loss": 0.3916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3982001841068268,
|
|
"step": 505,
|
|
"valid_targets_mean": 2453.3,
|
|
"valid_targets_min": 300
|
|
},
|
|
{
|
|
"epoch": 0.8225806451612904,
|
|
"grad_norm": 0.9464209718213515,
|
|
"learning_rate": 3.9963623065521435e-05,
|
|
"loss": 0.3892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3798317313194275,
|
|
"step": 510,
|
|
"valid_targets_mean": 1958.2,
|
|
"valid_targets_min": 237
|
|
},
|
|
{
|
|
"epoch": 0.8306451612903226,
|
|
"grad_norm": 0.7413470456844349,
|
|
"learning_rate": 3.995861286157887e-05,
|
|
"loss": 0.3736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31605035066604614,
|
|
"step": 515,
|
|
"valid_targets_mean": 3700.5,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 0.8387096774193549,
|
|
"grad_norm": 0.7280787716532399,
|
|
"learning_rate": 3.9953279878804786e-05,
|
|
"loss": 0.3651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.328852117061615,
|
|
"step": 520,
|
|
"valid_targets_mean": 2673.6,
|
|
"valid_targets_min": 221
|
|
},
|
|
{
|
|
"epoch": 0.8467741935483871,
|
|
"grad_norm": 0.8061320946368317,
|
|
"learning_rate": 3.994762420344638e-05,
|
|
"loss": 0.4161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39812400937080383,
|
|
"step": 525,
|
|
"valid_targets_mean": 2558.6,
|
|
"valid_targets_min": 288
|
|
},
|
|
{
|
|
"epoch": 0.8548387096774194,
|
|
"grad_norm": 0.9945504162841292,
|
|
"learning_rate": 3.994164592696952e-05,
|
|
"loss": 0.36,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30306100845336914,
|
|
"step": 530,
|
|
"valid_targets_mean": 1992.9,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 0.8629032258064516,
|
|
"grad_norm": 0.8212359708287054,
|
|
"learning_rate": 3.9935345146057354e-05,
|
|
"loss": 0.3721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38747477531433105,
|
|
"step": 535,
|
|
"valid_targets_mean": 2337.0,
|
|
"valid_targets_min": 316
|
|
},
|
|
{
|
|
"epoch": 0.8709677419354839,
|
|
"grad_norm": 0.8556620135579487,
|
|
"learning_rate": 3.992872196260866e-05,
|
|
"loss": 0.3797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43010061979293823,
|
|
"step": 540,
|
|
"valid_targets_mean": 2450.1,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 0.8790322580645161,
|
|
"grad_norm": 0.8037091952548979,
|
|
"learning_rate": 3.992177648373628e-05,
|
|
"loss": 0.3672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3795507550239563,
|
|
"step": 545,
|
|
"valid_targets_mean": 2788.5,
|
|
"valid_targets_min": 358
|
|
},
|
|
{
|
|
"epoch": 0.8870967741935484,
|
|
"grad_norm": 0.7714181278853671,
|
|
"learning_rate": 3.991450882176533e-05,
|
|
"loss": 0.3616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35366809368133545,
|
|
"step": 550,
|
|
"valid_targets_mean": 2654.3,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 0.8951612903225806,
|
|
"grad_norm": 0.8313601556367838,
|
|
"learning_rate": 3.99069190942314e-05,
|
|
"loss": 0.4004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3939402997493744,
|
|
"step": 555,
|
|
"valid_targets_mean": 2366.6,
|
|
"valid_targets_min": 225
|
|
},
|
|
{
|
|
"epoch": 0.9032258064516129,
|
|
"grad_norm": 0.9459985636025808,
|
|
"learning_rate": 3.9899007423878656e-05,
|
|
"loss": 0.3866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4524783194065094,
|
|
"step": 560,
|
|
"valid_targets_mean": 2332.0,
|
|
"valid_targets_min": 487
|
|
},
|
|
{
|
|
"epoch": 0.9112903225806451,
|
|
"grad_norm": 0.8389176396547706,
|
|
"learning_rate": 3.989077393865788e-05,
|
|
"loss": 0.3716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3362874984741211,
|
|
"step": 565,
|
|
"valid_targets_mean": 2266.6,
|
|
"valid_targets_min": 242
|
|
},
|
|
{
|
|
"epoch": 0.9193548387096774,
|
|
"grad_norm": 0.9061819502243754,
|
|
"learning_rate": 3.9882218771724324e-05,
|
|
"loss": 0.3589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3696625530719757,
|
|
"step": 570,
|
|
"valid_targets_mean": 3400.4,
|
|
"valid_targets_min": 886
|
|
},
|
|
{
|
|
"epoch": 0.9274193548387096,
|
|
"grad_norm": 0.8043796829530833,
|
|
"learning_rate": 3.9873342061435664e-05,
|
|
"loss": 0.3711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37369734048843384,
|
|
"step": 575,
|
|
"valid_targets_mean": 2942.6,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 0.9354838709677419,
|
|
"grad_norm": 0.7336198126862365,
|
|
"learning_rate": 3.986414395134967e-05,
|
|
"loss": 0.3676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37671172618865967,
|
|
"step": 580,
|
|
"valid_targets_mean": 3066.2,
|
|
"valid_targets_min": 1192
|
|
},
|
|
{
|
|
"epoch": 0.9435483870967742,
|
|
"grad_norm": 0.9005419554905094,
|
|
"learning_rate": 3.985462459022193e-05,
|
|
"loss": 0.3698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3866089880466461,
|
|
"step": 585,
|
|
"valid_targets_mean": 2411.2,
|
|
"valid_targets_min": 179
|
|
},
|
|
{
|
|
"epoch": 0.9516129032258065,
|
|
"grad_norm": 0.8308651954028964,
|
|
"learning_rate": 3.984478413200345e-05,
|
|
"loss": 0.3597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3795483410358429,
|
|
"step": 590,
|
|
"valid_targets_mean": 3128.1,
|
|
"valid_targets_min": 255
|
|
},
|
|
{
|
|
"epoch": 0.9596774193548387,
|
|
"grad_norm": 0.7702644905164495,
|
|
"learning_rate": 3.9834622735838114e-05,
|
|
"loss": 0.3842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36591237783432007,
|
|
"step": 595,
|
|
"valid_targets_mean": 3150.5,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 0.967741935483871,
|
|
"grad_norm": 0.7564650385737629,
|
|
"learning_rate": 3.9824140566060185e-05,
|
|
"loss": 0.3844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3469301760196686,
|
|
"step": 600,
|
|
"valid_targets_mean": 2893.7,
|
|
"valid_targets_min": 316
|
|
},
|
|
{
|
|
"epoch": 0.9758064516129032,
|
|
"grad_norm": 0.9229892699718916,
|
|
"learning_rate": 3.981333779219159e-05,
|
|
"loss": 0.3762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3691994547843933,
|
|
"step": 605,
|
|
"valid_targets_mean": 2212.1,
|
|
"valid_targets_min": 254
|
|
},
|
|
{
|
|
"epoch": 0.9838709677419355,
|
|
"grad_norm": 0.7148111961746664,
|
|
"learning_rate": 3.980221458893919e-05,
|
|
"loss": 0.363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33804255723953247,
|
|
"step": 610,
|
|
"valid_targets_mean": 2840.1,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 0.9919354838709677,
|
|
"grad_norm": 0.8362152223027871,
|
|
"learning_rate": 3.979077113619196e-05,
|
|
"loss": 0.3666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42063724994659424,
|
|
"step": 615,
|
|
"valid_targets_mean": 2413.1,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 0.889453091568353,
|
|
"learning_rate": 3.97790076190181e-05,
|
|
"loss": 0.3831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37052467465400696,
|
|
"step": 620,
|
|
"valid_targets_mean": 2083.5,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 1.0080645161290323,
|
|
"grad_norm": 0.7617332990666287,
|
|
"learning_rate": 3.976692422766201e-05,
|
|
"loss": 0.3553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3392760753631592,
|
|
"step": 625,
|
|
"valid_targets_mean": 2838.1,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 1.0161290322580645,
|
|
"grad_norm": 0.8350829790217384,
|
|
"learning_rate": 3.975452115754121e-05,
|
|
"loss": 0.3405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3286127746105194,
|
|
"step": 630,
|
|
"valid_targets_mean": 2534.4,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 1.0241935483870968,
|
|
"grad_norm": 1.4292683679096823,
|
|
"learning_rate": 3.9741798609243234e-05,
|
|
"loss": 0.3374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.337451696395874,
|
|
"step": 635,
|
|
"valid_targets_mean": 2048.8,
|
|
"valid_targets_min": 283
|
|
},
|
|
{
|
|
"epoch": 1.032258064516129,
|
|
"grad_norm": 0.8236074672895594,
|
|
"learning_rate": 3.972875678852231e-05,
|
|
"loss": 0.3532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32509103417396545,
|
|
"step": 640,
|
|
"valid_targets_mean": 2395.7,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 1.0403225806451613,
|
|
"grad_norm": 0.7423421704280885,
|
|
"learning_rate": 3.971539590629608e-05,
|
|
"loss": 0.3328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34629684686660767,
|
|
"step": 645,
|
|
"valid_targets_mean": 3057.1,
|
|
"valid_targets_min": 288
|
|
},
|
|
{
|
|
"epoch": 1.0483870967741935,
|
|
"grad_norm": 0.7829948431301426,
|
|
"learning_rate": 3.970171617864219e-05,
|
|
"loss": 0.3404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3382974863052368,
|
|
"step": 650,
|
|
"valid_targets_mean": 2558.6,
|
|
"valid_targets_min": 236
|
|
},
|
|
{
|
|
"epoch": 1.0564516129032258,
|
|
"grad_norm": 0.7432326362551297,
|
|
"learning_rate": 3.9687717826794785e-05,
|
|
"loss": 0.329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3245026469230652,
|
|
"step": 655,
|
|
"valid_targets_mean": 2977.2,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 1.064516129032258,
|
|
"grad_norm": 0.779987723646763,
|
|
"learning_rate": 3.967340107714091e-05,
|
|
"loss": 0.3345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3098449110984802,
|
|
"step": 660,
|
|
"valid_targets_mean": 2869.6,
|
|
"valid_targets_min": 296
|
|
},
|
|
{
|
|
"epoch": 1.0725806451612903,
|
|
"grad_norm": 0.663829751056487,
|
|
"learning_rate": 3.96587661612169e-05,
|
|
"loss": 0.3234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3037823438644409,
|
|
"step": 665,
|
|
"valid_targets_mean": 3344.3,
|
|
"valid_targets_min": 370
|
|
},
|
|
{
|
|
"epoch": 1.0806451612903225,
|
|
"grad_norm": 0.6913018770364177,
|
|
"learning_rate": 3.964381331570457e-05,
|
|
"loss": 0.3172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3325570225715637,
|
|
"step": 670,
|
|
"valid_targets_mean": 3265.3,
|
|
"valid_targets_min": 265
|
|
},
|
|
{
|
|
"epoch": 1.0887096774193548,
|
|
"grad_norm": 0.7954468215386384,
|
|
"learning_rate": 3.962854278242745e-05,
|
|
"loss": 0.3391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30283841490745544,
|
|
"step": 675,
|
|
"valid_targets_mean": 2193.9,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 1.096774193548387,
|
|
"grad_norm": 0.830948986439798,
|
|
"learning_rate": 3.961295480834683e-05,
|
|
"loss": 0.3411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34882187843322754,
|
|
"step": 680,
|
|
"valid_targets_mean": 3173.1,
|
|
"valid_targets_min": 394
|
|
},
|
|
{
|
|
"epoch": 1.1048387096774193,
|
|
"grad_norm": 0.6867297284265137,
|
|
"learning_rate": 3.959704964555778e-05,
|
|
"loss": 0.36,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3616577982902527,
|
|
"step": 685,
|
|
"valid_targets_mean": 3532.1,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 1.1129032258064515,
|
|
"grad_norm": 0.969641437549168,
|
|
"learning_rate": 3.95808275512851e-05,
|
|
"loss": 0.3359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3409084975719452,
|
|
"step": 690,
|
|
"valid_targets_mean": 1853.3,
|
|
"valid_targets_min": 255
|
|
},
|
|
{
|
|
"epoch": 1.120967741935484,
|
|
"grad_norm": 0.855549875412275,
|
|
"learning_rate": 3.9564288787879104e-05,
|
|
"loss": 0.3342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3443087339401245,
|
|
"step": 695,
|
|
"valid_targets_mean": 2837.9,
|
|
"valid_targets_min": 1284
|
|
},
|
|
{
|
|
"epoch": 1.129032258064516,
|
|
"grad_norm": 0.8342930155413376,
|
|
"learning_rate": 3.954743362281144e-05,
|
|
"loss": 0.3215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3445117473602295,
|
|
"step": 700,
|
|
"valid_targets_mean": 2575.4,
|
|
"valid_targets_min": 239
|
|
},
|
|
{
|
|
"epoch": 1.1370967741935485,
|
|
"grad_norm": 0.7639949649103195,
|
|
"learning_rate": 3.953026232867069e-05,
|
|
"loss": 0.3419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3205694556236267,
|
|
"step": 705,
|
|
"valid_targets_mean": 2733.9,
|
|
"valid_targets_min": 176
|
|
},
|
|
{
|
|
"epoch": 1.1451612903225807,
|
|
"grad_norm": 0.7815278720980775,
|
|
"learning_rate": 3.951277518315805e-05,
|
|
"loss": 0.3328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32587891817092896,
|
|
"step": 710,
|
|
"valid_targets_mean": 3220.0,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 1.153225806451613,
|
|
"grad_norm": 0.8046494016697258,
|
|
"learning_rate": 3.9494972469082764e-05,
|
|
"loss": 0.341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3240455090999603,
|
|
"step": 715,
|
|
"valid_targets_mean": 2456.1,
|
|
"valid_targets_min": 310
|
|
},
|
|
{
|
|
"epoch": 1.1612903225806452,
|
|
"grad_norm": 0.9424599194137768,
|
|
"learning_rate": 3.9476854474357596e-05,
|
|
"loss": 0.3306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33007752895355225,
|
|
"step": 720,
|
|
"valid_targets_mean": 2186.3,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 1.1693548387096775,
|
|
"grad_norm": 0.7135703463884432,
|
|
"learning_rate": 3.9458421491994146e-05,
|
|
"loss": 0.3382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3003184199333191,
|
|
"step": 725,
|
|
"valid_targets_mean": 3533.6,
|
|
"valid_targets_min": 198
|
|
},
|
|
{
|
|
"epoch": 1.1774193548387097,
|
|
"grad_norm": 0.802492873553148,
|
|
"learning_rate": 3.943967382009812e-05,
|
|
"loss": 0.3213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34742555022239685,
|
|
"step": 730,
|
|
"valid_targets_mean": 2678.6,
|
|
"valid_targets_min": 565
|
|
},
|
|
{
|
|
"epoch": 1.185483870967742,
|
|
"grad_norm": 0.7149816099817451,
|
|
"learning_rate": 3.942061176186452e-05,
|
|
"loss": 0.3421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33808737993240356,
|
|
"step": 735,
|
|
"valid_targets_mean": 3043.1,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 1.1935483870967742,
|
|
"grad_norm": 0.6692019932869789,
|
|
"learning_rate": 3.940123562557275e-05,
|
|
"loss": 0.3267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30320584774017334,
|
|
"step": 740,
|
|
"valid_targets_mean": 3257.3,
|
|
"valid_targets_min": 236
|
|
},
|
|
{
|
|
"epoch": 1.2016129032258065,
|
|
"grad_norm": 0.7497792184533375,
|
|
"learning_rate": 3.938154572458156e-05,
|
|
"loss": 0.3286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32157742977142334,
|
|
"step": 745,
|
|
"valid_targets_mean": 2866.8,
|
|
"valid_targets_min": 295
|
|
},
|
|
{
|
|
"epoch": 1.2096774193548387,
|
|
"grad_norm": 0.7393029058739877,
|
|
"learning_rate": 3.936154237732409e-05,
|
|
"loss": 0.328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35701730847358704,
|
|
"step": 750,
|
|
"valid_targets_mean": 3347.3,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 1.217741935483871,
|
|
"grad_norm": 0.8320809328048984,
|
|
"learning_rate": 3.934122590730262e-05,
|
|
"loss": 0.3407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3124454617500305,
|
|
"step": 755,
|
|
"valid_targets_mean": 2415.2,
|
|
"valid_targets_min": 347
|
|
},
|
|
{
|
|
"epoch": 1.2258064516129032,
|
|
"grad_norm": 0.7609697031472477,
|
|
"learning_rate": 3.93205966430834e-05,
|
|
"loss": 0.3509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3307950794696808,
|
|
"step": 760,
|
|
"valid_targets_mean": 2804.5,
|
|
"valid_targets_min": 358
|
|
},
|
|
{
|
|
"epoch": 1.2338709677419355,
|
|
"grad_norm": 0.6705883235181443,
|
|
"learning_rate": 3.929965491829131e-05,
|
|
"loss": 0.3325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3480954170227051,
|
|
"step": 765,
|
|
"valid_targets_mean": 3360.4,
|
|
"valid_targets_min": 244
|
|
},
|
|
{
|
|
"epoch": 1.2419354838709677,
|
|
"grad_norm": 0.7197835618285123,
|
|
"learning_rate": 3.927840107160446e-05,
|
|
"loss": 0.33,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33156174421310425,
|
|
"step": 770,
|
|
"valid_targets_mean": 3182.1,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"grad_norm": 0.926109224909292,
|
|
"learning_rate": 3.925683544674874e-05,
|
|
"loss": 0.3172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3189659118652344,
|
|
"step": 775,
|
|
"valid_targets_mean": 2162.1,
|
|
"valid_targets_min": 255
|
|
},
|
|
{
|
|
"epoch": 1.2580645161290323,
|
|
"grad_norm": 0.8114536492206651,
|
|
"learning_rate": 3.923495839249223e-05,
|
|
"loss": 0.341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32908767461776733,
|
|
"step": 780,
|
|
"valid_targets_mean": 2986.9,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 1.2661290322580645,
|
|
"grad_norm": 0.7712706079864282,
|
|
"learning_rate": 3.921277026263959e-05,
|
|
"loss": 0.3315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3225595951080322,
|
|
"step": 785,
|
|
"valid_targets_mean": 2429.2,
|
|
"valid_targets_min": 165
|
|
},
|
|
{
|
|
"epoch": 1.2741935483870968,
|
|
"grad_norm": 0.830084495170336,
|
|
"learning_rate": 3.9190271416026294e-05,
|
|
"loss": 0.3561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3553479313850403,
|
|
"step": 790,
|
|
"valid_targets_mean": 2340.6,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 1.282258064516129,
|
|
"grad_norm": 0.6519979659668729,
|
|
"learning_rate": 3.916746221651288e-05,
|
|
"loss": 0.2946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26097941398620605,
|
|
"step": 795,
|
|
"valid_targets_mean": 2931.9,
|
|
"valid_targets_min": 212
|
|
},
|
|
{
|
|
"epoch": 1.2903225806451613,
|
|
"grad_norm": 0.765329468230946,
|
|
"learning_rate": 3.9144343032979026e-05,
|
|
"loss": 0.343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30156654119491577,
|
|
"step": 800,
|
|
"valid_targets_mean": 2520.7,
|
|
"valid_targets_min": 276
|
|
},
|
|
{
|
|
"epoch": 1.2983870967741935,
|
|
"grad_norm": 0.7633539902410428,
|
|
"learning_rate": 3.9120914239317604e-05,
|
|
"loss": 0.3404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32053524255752563,
|
|
"step": 805,
|
|
"valid_targets_mean": 2677.7,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 1.3064516129032258,
|
|
"grad_norm": 0.7597363016351406,
|
|
"learning_rate": 3.909717621442862e-05,
|
|
"loss": 0.3336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3356117606163025,
|
|
"step": 810,
|
|
"valid_targets_mean": 2820.0,
|
|
"valid_targets_min": 168
|
|
},
|
|
{
|
|
"epoch": 1.314516129032258,
|
|
"grad_norm": 0.697933250664817,
|
|
"learning_rate": 3.907312934221311e-05,
|
|
"loss": 0.318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28037482500076294,
|
|
"step": 815,
|
|
"valid_targets_mean": 3155.6,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 1.3225806451612903,
|
|
"grad_norm": 0.7470526933388427,
|
|
"learning_rate": 3.9048774011566906e-05,
|
|
"loss": 0.3166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3190610706806183,
|
|
"step": 820,
|
|
"valid_targets_mean": 3149.8,
|
|
"valid_targets_min": 361
|
|
},
|
|
{
|
|
"epoch": 1.3306451612903225,
|
|
"grad_norm": 0.7170241067842804,
|
|
"learning_rate": 3.9024110616374334e-05,
|
|
"loss": 0.3389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3164803385734558,
|
|
"step": 825,
|
|
"valid_targets_mean": 3112.4,
|
|
"valid_targets_min": 223
|
|
},
|
|
{
|
|
"epoch": 1.3387096774193548,
|
|
"grad_norm": 0.786390449474482,
|
|
"learning_rate": 3.8999139555501904e-05,
|
|
"loss": 0.3314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36476385593414307,
|
|
"step": 830,
|
|
"valid_targets_mean": 3046.3,
|
|
"valid_targets_min": 1916
|
|
},
|
|
{
|
|
"epoch": 1.346774193548387,
|
|
"grad_norm": 0.8411900453301722,
|
|
"learning_rate": 3.897386123279179e-05,
|
|
"loss": 0.3245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3298591077327728,
|
|
"step": 835,
|
|
"valid_targets_mean": 2520.5,
|
|
"valid_targets_min": 227
|
|
},
|
|
{
|
|
"epoch": 1.3548387096774195,
|
|
"grad_norm": 0.7662586072168371,
|
|
"learning_rate": 3.894827605705535e-05,
|
|
"loss": 0.3026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3178406357765198,
|
|
"step": 840,
|
|
"valid_targets_mean": 2750.5,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 1.3629032258064515,
|
|
"grad_norm": 0.7292599022713386,
|
|
"learning_rate": 3.892238444206648e-05,
|
|
"loss": 0.3084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2952648997306824,
|
|
"step": 845,
|
|
"valid_targets_mean": 2671.3,
|
|
"valid_targets_min": 385
|
|
},
|
|
{
|
|
"epoch": 1.370967741935484,
|
|
"grad_norm": 0.8513928213675153,
|
|
"learning_rate": 3.889618680655495e-05,
|
|
"loss": 0.3459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35951143503189087,
|
|
"step": 850,
|
|
"valid_targets_mean": 2184.1,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 1.379032258064516,
|
|
"grad_norm": 0.6913371700273704,
|
|
"learning_rate": 3.886968357419961e-05,
|
|
"loss": 0.3313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.285597562789917,
|
|
"step": 855,
|
|
"valid_targets_mean": 3467.1,
|
|
"valid_targets_min": 1186
|
|
},
|
|
{
|
|
"epoch": 1.3870967741935485,
|
|
"grad_norm": 0.7778282469168251,
|
|
"learning_rate": 3.884287517362154e-05,
|
|
"loss": 0.3307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28004664182662964,
|
|
"step": 860,
|
|
"valid_targets_mean": 2534.9,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 1.3951612903225805,
|
|
"grad_norm": 0.9705025429620857,
|
|
"learning_rate": 3.881576203837714e-05,
|
|
"loss": 0.3318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34265702962875366,
|
|
"step": 865,
|
|
"valid_targets_mean": 2044.5,
|
|
"valid_targets_min": 198
|
|
},
|
|
{
|
|
"epoch": 1.403225806451613,
|
|
"grad_norm": 0.7579016105289186,
|
|
"learning_rate": 3.8788344606951104e-05,
|
|
"loss": 0.3227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31639543175697327,
|
|
"step": 870,
|
|
"valid_targets_mean": 3047.8,
|
|
"valid_targets_min": 420
|
|
},
|
|
{
|
|
"epoch": 1.4112903225806452,
|
|
"grad_norm": 0.7234818274088813,
|
|
"learning_rate": 3.876062332274931e-05,
|
|
"loss": 0.3127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28561094403266907,
|
|
"step": 875,
|
|
"valid_targets_mean": 2983.8,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 1.4193548387096775,
|
|
"grad_norm": 0.798772027134817,
|
|
"learning_rate": 3.873259863409169e-05,
|
|
"loss": 0.3285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3158986568450928,
|
|
"step": 880,
|
|
"valid_targets_mean": 3314.4,
|
|
"valid_targets_min": 236
|
|
},
|
|
{
|
|
"epoch": 1.4274193548387097,
|
|
"grad_norm": 0.769879696847225,
|
|
"learning_rate": 3.870427099420493e-05,
|
|
"loss": 0.3273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3215793967247009,
|
|
"step": 885,
|
|
"valid_targets_mean": 2866.9,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 1.435483870967742,
|
|
"grad_norm": 0.755533996916312,
|
|
"learning_rate": 3.867564086121519e-05,
|
|
"loss": 0.3235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3253098130226135,
|
|
"step": 890,
|
|
"valid_targets_mean": 2511.7,
|
|
"valid_targets_min": 316
|
|
},
|
|
{
|
|
"epoch": 1.4435483870967742,
|
|
"grad_norm": 0.8742322113608911,
|
|
"learning_rate": 3.864670869814066e-05,
|
|
"loss": 0.3553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35710740089416504,
|
|
"step": 895,
|
|
"valid_targets_mean": 2052.2,
|
|
"valid_targets_min": 202
|
|
},
|
|
{
|
|
"epoch": 1.4516129032258065,
|
|
"grad_norm": 0.7720684346688318,
|
|
"learning_rate": 3.861747497288409e-05,
|
|
"loss": 0.32,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29578348994255066,
|
|
"step": 900,
|
|
"valid_targets_mean": 2610.3,
|
|
"valid_targets_min": 197
|
|
},
|
|
{
|
|
"epoch": 1.4596774193548387,
|
|
"grad_norm": 0.7878212479034656,
|
|
"learning_rate": 3.858794015822521e-05,
|
|
"loss": 0.3327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33580535650253296,
|
|
"step": 905,
|
|
"valid_targets_mean": 2783.5,
|
|
"valid_targets_min": 1117
|
|
},
|
|
{
|
|
"epoch": 1.467741935483871,
|
|
"grad_norm": 0.6286582972004154,
|
|
"learning_rate": 3.8558104731813106e-05,
|
|
"loss": 0.3202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2678036689758301,
|
|
"step": 910,
|
|
"valid_targets_mean": 3533.7,
|
|
"valid_targets_min": 189
|
|
},
|
|
{
|
|
"epoch": 1.4758064516129032,
|
|
"grad_norm": 0.9984219615276483,
|
|
"learning_rate": 3.8527969176158455e-05,
|
|
"loss": 0.3225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34154292941093445,
|
|
"step": 915,
|
|
"valid_targets_mean": 1635.4,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 1.4838709677419355,
|
|
"grad_norm": 0.7328620185291194,
|
|
"learning_rate": 3.849753397862577e-05,
|
|
"loss": 0.3224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29498499631881714,
|
|
"step": 920,
|
|
"valid_targets_mean": 2979.5,
|
|
"valid_targets_min": 666
|
|
},
|
|
{
|
|
"epoch": 1.4919354838709677,
|
|
"grad_norm": 0.9562407889006808,
|
|
"learning_rate": 3.8466799631425474e-05,
|
|
"loss": 0.3396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34355616569519043,
|
|
"step": 925,
|
|
"valid_targets_mean": 1970.5,
|
|
"valid_targets_min": 211
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"grad_norm": 0.8232851294773023,
|
|
"learning_rate": 3.843576663160598e-05,
|
|
"loss": 0.318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30704474449157715,
|
|
"step": 930,
|
|
"valid_targets_mean": 2216.4,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 1.5080645161290323,
|
|
"grad_norm": 0.7220815648962998,
|
|
"learning_rate": 3.840443548104563e-05,
|
|
"loss": 0.309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3310515880584717,
|
|
"step": 935,
|
|
"valid_targets_mean": 3016.9,
|
|
"valid_targets_min": 226
|
|
},
|
|
{
|
|
"epoch": 1.5161290322580645,
|
|
"grad_norm": 0.8503324657401694,
|
|
"learning_rate": 3.8372806686444556e-05,
|
|
"loss": 0.3244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32475751638412476,
|
|
"step": 940,
|
|
"valid_targets_mean": 2588.5,
|
|
"valid_targets_min": 191
|
|
},
|
|
{
|
|
"epoch": 1.5241935483870968,
|
|
"grad_norm": 0.8317538443212328,
|
|
"learning_rate": 3.834088075931655e-05,
|
|
"loss": 0.3116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30145543813705444,
|
|
"step": 945,
|
|
"valid_targets_mean": 2364.0,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 1.532258064516129,
|
|
"grad_norm": 0.8872911828648146,
|
|
"learning_rate": 3.830865821598073e-05,
|
|
"loss": 0.3201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2913435399532318,
|
|
"step": 950,
|
|
"valid_targets_mean": 1769.3,
|
|
"valid_targets_min": 209
|
|
},
|
|
{
|
|
"epoch": 1.5403225806451613,
|
|
"grad_norm": 0.769297124187624,
|
|
"learning_rate": 3.82761395775532e-05,
|
|
"loss": 0.3422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29973626136779785,
|
|
"step": 955,
|
|
"valid_targets_mean": 2746.4,
|
|
"valid_targets_min": 338
|
|
},
|
|
{
|
|
"epoch": 1.5483870967741935,
|
|
"grad_norm": 0.8682068573643323,
|
|
"learning_rate": 3.8243325369938674e-05,
|
|
"loss": 0.3238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29823970794677734,
|
|
"step": 960,
|
|
"valid_targets_mean": 2104.1,
|
|
"valid_targets_min": 388
|
|
},
|
|
{
|
|
"epoch": 1.5564516129032258,
|
|
"grad_norm": 0.7394893590217415,
|
|
"learning_rate": 3.821021612382189e-05,
|
|
"loss": 0.3313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34347981214523315,
|
|
"step": 965,
|
|
"valid_targets_mean": 2929.8,
|
|
"valid_targets_min": 399
|
|
},
|
|
{
|
|
"epoch": 1.564516129032258,
|
|
"grad_norm": 0.8104043023765269,
|
|
"learning_rate": 3.817681237465909e-05,
|
|
"loss": 0.3181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3732466995716095,
|
|
"step": 970,
|
|
"valid_targets_mean": 2559.1,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 1.5725806451612905,
|
|
"grad_norm": 0.8777364293507144,
|
|
"learning_rate": 3.814311466266934e-05,
|
|
"loss": 0.3167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31848567724227905,
|
|
"step": 975,
|
|
"valid_targets_mean": 2609.3,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 1.5806451612903225,
|
|
"grad_norm": 0.7737312378222269,
|
|
"learning_rate": 3.8109123532825784e-05,
|
|
"loss": 0.3136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34432709217071533,
|
|
"step": 980,
|
|
"valid_targets_mean": 2959.5,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 1.588709677419355,
|
|
"grad_norm": 0.802368079888405,
|
|
"learning_rate": 3.8074839534846856e-05,
|
|
"loss": 0.3227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3101446032524109,
|
|
"step": 985,
|
|
"valid_targets_mean": 2500.3,
|
|
"valid_targets_min": 233
|
|
},
|
|
{
|
|
"epoch": 1.596774193548387,
|
|
"grad_norm": 0.6599039201326624,
|
|
"learning_rate": 3.804026322318735e-05,
|
|
"loss": 0.333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29382163286209106,
|
|
"step": 990,
|
|
"valid_targets_mean": 3415.4,
|
|
"valid_targets_min": 399
|
|
},
|
|
{
|
|
"epoch": 1.6048387096774195,
|
|
"grad_norm": 0.8028714348620065,
|
|
"learning_rate": 3.800539515702949e-05,
|
|
"loss": 0.3297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32470858097076416,
|
|
"step": 995,
|
|
"valid_targets_mean": 2622.9,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 1.6129032258064515,
|
|
"grad_norm": 0.7004451556733231,
|
|
"learning_rate": 3.7970235900273874e-05,
|
|
"loss": 0.3,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28437164425849915,
|
|
"step": 1000,
|
|
"valid_targets_mean": 3426.9,
|
|
"valid_targets_min": 1192
|
|
},
|
|
{
|
|
"epoch": 1.620967741935484,
|
|
"grad_norm": 0.809927442164271,
|
|
"learning_rate": 3.793478602153034e-05,
|
|
"loss": 0.3228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29896730184555054,
|
|
"step": 1005,
|
|
"valid_targets_mean": 2627.8,
|
|
"valid_targets_min": 247
|
|
},
|
|
{
|
|
"epoch": 1.629032258064516,
|
|
"grad_norm": 0.7912703234534971,
|
|
"learning_rate": 3.78990460941088e-05,
|
|
"loss": 0.3186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3189028203487396,
|
|
"step": 1010,
|
|
"valid_targets_mean": 2726.3,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 1.6370967741935485,
|
|
"grad_norm": 0.7800876242212562,
|
|
"learning_rate": 3.7863016696009944e-05,
|
|
"loss": 0.3237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34125587344169617,
|
|
"step": 1015,
|
|
"valid_targets_mean": 2491.2,
|
|
"valid_targets_min": 317
|
|
},
|
|
{
|
|
"epoch": 1.6451612903225805,
|
|
"grad_norm": 0.8425638099391318,
|
|
"learning_rate": 3.78266984099159e-05,
|
|
"loss": 0.2992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2836432158946991,
|
|
"step": 1020,
|
|
"valid_targets_mean": 2419.2,
|
|
"valid_targets_min": 405
|
|
},
|
|
{
|
|
"epoch": 1.653225806451613,
|
|
"grad_norm": 0.8450262885751405,
|
|
"learning_rate": 3.779009182318081e-05,
|
|
"loss": 0.3144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.345761775970459,
|
|
"step": 1025,
|
|
"valid_targets_mean": 2448.4,
|
|
"valid_targets_min": 359
|
|
},
|
|
{
|
|
"epoch": 1.661290322580645,
|
|
"grad_norm": 0.8494775317752005,
|
|
"learning_rate": 3.775319752782133e-05,
|
|
"loss": 0.3205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31920838356018066,
|
|
"step": 1030,
|
|
"valid_targets_mean": 2367.8,
|
|
"valid_targets_min": 242
|
|
},
|
|
{
|
|
"epoch": 1.6693548387096775,
|
|
"grad_norm": 0.8896097208351357,
|
|
"learning_rate": 3.7716016120507066e-05,
|
|
"loss": 0.3011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.319049596786499,
|
|
"step": 1035,
|
|
"valid_targets_mean": 2728.2,
|
|
"valid_targets_min": 187
|
|
},
|
|
{
|
|
"epoch": 1.6774193548387095,
|
|
"grad_norm": 0.7305892742117013,
|
|
"learning_rate": 3.76785482025509e-05,
|
|
"loss": 0.2937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27338457107543945,
|
|
"step": 1040,
|
|
"valid_targets_mean": 3410.7,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 1.685483870967742,
|
|
"grad_norm": 0.8146617564852123,
|
|
"learning_rate": 3.7640794379899295e-05,
|
|
"loss": 0.2748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31929486989974976,
|
|
"step": 1045,
|
|
"valid_targets_mean": 2604.1,
|
|
"valid_targets_min": 861
|
|
},
|
|
{
|
|
"epoch": 1.6935483870967742,
|
|
"grad_norm": 0.7717564124737906,
|
|
"learning_rate": 3.7602755263122486e-05,
|
|
"loss": 0.317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3172432780265808,
|
|
"step": 1050,
|
|
"valid_targets_mean": 2824.6,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 1.7016129032258065,
|
|
"grad_norm": 0.7602965223285216,
|
|
"learning_rate": 3.756443146740457e-05,
|
|
"loss": 0.3227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.323469877243042,
|
|
"step": 1055,
|
|
"valid_targets_mean": 2858.6,
|
|
"valid_targets_min": 303
|
|
},
|
|
{
|
|
"epoch": 1.7096774193548387,
|
|
"grad_norm": 0.8210353899887861,
|
|
"learning_rate": 3.752582361253363e-05,
|
|
"loss": 0.3087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3094032406806946,
|
|
"step": 1060,
|
|
"valid_targets_mean": 2479.8,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 1.717741935483871,
|
|
"grad_norm": 0.7542099551460979,
|
|
"learning_rate": 3.7486932322891646e-05,
|
|
"loss": 0.3084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2996753752231598,
|
|
"step": 1065,
|
|
"valid_targets_mean": 2666.7,
|
|
"valid_targets_min": 487
|
|
},
|
|
{
|
|
"epoch": 1.7258064516129032,
|
|
"grad_norm": 0.7481144820049127,
|
|
"learning_rate": 3.7447758227444416e-05,
|
|
"loss": 0.3107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2976187467575073,
|
|
"step": 1070,
|
|
"valid_targets_mean": 2465.6,
|
|
"valid_targets_min": 170
|
|
},
|
|
{
|
|
"epoch": 1.7338709677419355,
|
|
"grad_norm": 0.7125371207722736,
|
|
"learning_rate": 3.74083019597314e-05,
|
|
"loss": 0.3042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28654929995536804,
|
|
"step": 1075,
|
|
"valid_targets_mean": 3242.5,
|
|
"valid_targets_min": 348
|
|
},
|
|
{
|
|
"epoch": 1.7419354838709677,
|
|
"grad_norm": 0.8504577180014085,
|
|
"learning_rate": 3.736856415785546e-05,
|
|
"loss": 0.3415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3364261984825134,
|
|
"step": 1080,
|
|
"valid_targets_mean": 2045.4,
|
|
"valid_targets_min": 453
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"grad_norm": 0.7323741138919478,
|
|
"learning_rate": 3.732854546447255e-05,
|
|
"loss": 0.3158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29836729168891907,
|
|
"step": 1085,
|
|
"valid_targets_mean": 3061.2,
|
|
"valid_targets_min": 224
|
|
},
|
|
{
|
|
"epoch": 1.7580645161290323,
|
|
"grad_norm": 0.7967875974490015,
|
|
"learning_rate": 3.72882465267813e-05,
|
|
"loss": 0.3008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31713539361953735,
|
|
"step": 1090,
|
|
"valid_targets_mean": 2482.7,
|
|
"valid_targets_min": 170
|
|
},
|
|
{
|
|
"epoch": 1.7661290322580645,
|
|
"grad_norm": 0.7470450380912257,
|
|
"learning_rate": 3.724766799651258e-05,
|
|
"loss": 0.3162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3073997497558594,
|
|
"step": 1095,
|
|
"valid_targets_mean": 2560.1,
|
|
"valid_targets_min": 342
|
|
},
|
|
{
|
|
"epoch": 1.7741935483870968,
|
|
"grad_norm": 0.7489455355715164,
|
|
"learning_rate": 3.7206810529918935e-05,
|
|
"loss": 0.2991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2869497239589691,
|
|
"step": 1100,
|
|
"valid_targets_mean": 2734.8,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 1.782258064516129,
|
|
"grad_norm": 0.7243586019613993,
|
|
"learning_rate": 3.716567478776399e-05,
|
|
"loss": 0.2985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28402721881866455,
|
|
"step": 1105,
|
|
"valid_targets_mean": 3091.9,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 1.7903225806451613,
|
|
"grad_norm": 0.7240944277025194,
|
|
"learning_rate": 3.712426143531176e-05,
|
|
"loss": 0.3155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3084307909011841,
|
|
"step": 1110,
|
|
"valid_targets_mean": 3116.0,
|
|
"valid_targets_min": 412
|
|
},
|
|
{
|
|
"epoch": 1.7983870967741935,
|
|
"grad_norm": 0.9378709981299327,
|
|
"learning_rate": 3.708257114231587e-05,
|
|
"loss": 0.3049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3133944869041443,
|
|
"step": 1115,
|
|
"valid_targets_mean": 2010.8,
|
|
"valid_targets_min": 280
|
|
},
|
|
{
|
|
"epoch": 1.8064516129032258,
|
|
"grad_norm": 0.7387829151879912,
|
|
"learning_rate": 3.7040604583008756e-05,
|
|
"loss": 0.2963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2566390633583069,
|
|
"step": 1120,
|
|
"valid_targets_mean": 3128.1,
|
|
"valid_targets_min": 165
|
|
},
|
|
{
|
|
"epoch": 1.814516129032258,
|
|
"grad_norm": 0.7255070820708734,
|
|
"learning_rate": 3.6998362436090736e-05,
|
|
"loss": 0.2901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25356805324554443,
|
|
"step": 1125,
|
|
"valid_targets_mean": 2791.9,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 1.8225806451612905,
|
|
"grad_norm": 0.6737217278182145,
|
|
"learning_rate": 3.695584538471905e-05,
|
|
"loss": 0.2949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24737226963043213,
|
|
"step": 1130,
|
|
"valid_targets_mean": 2939.9,
|
|
"valid_targets_min": 316
|
|
},
|
|
{
|
|
"epoch": 1.8306451612903225,
|
|
"grad_norm": 0.8544928418320776,
|
|
"learning_rate": 3.6913054116496797e-05,
|
|
"loss": 0.303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3007391095161438,
|
|
"step": 1135,
|
|
"valid_targets_mean": 2489.9,
|
|
"valid_targets_min": 311
|
|
},
|
|
{
|
|
"epoch": 1.838709677419355,
|
|
"grad_norm": 0.9317851244496647,
|
|
"learning_rate": 3.686998932346184e-05,
|
|
"loss": 0.3336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3622196316719055,
|
|
"step": 1140,
|
|
"valid_targets_mean": 2150.3,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 1.846774193548387,
|
|
"grad_norm": 0.891845852295561,
|
|
"learning_rate": 3.6826651702075574e-05,
|
|
"loss": 0.3032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3285204768180847,
|
|
"step": 1145,
|
|
"valid_targets_mean": 2223.3,
|
|
"valid_targets_min": 223
|
|
},
|
|
{
|
|
"epoch": 1.8548387096774195,
|
|
"grad_norm": 0.8025384148751605,
|
|
"learning_rate": 3.67830419532117e-05,
|
|
"loss": 0.2961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2830648124217987,
|
|
"step": 1150,
|
|
"valid_targets_mean": 2618.3,
|
|
"valid_targets_min": 176
|
|
},
|
|
{
|
|
"epoch": 1.8629032258064515,
|
|
"grad_norm": 0.8056644759392779,
|
|
"learning_rate": 3.673916078214487e-05,
|
|
"loss": 0.2993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2630714476108551,
|
|
"step": 1155,
|
|
"valid_targets_mean": 2436.2,
|
|
"valid_targets_min": 254
|
|
},
|
|
{
|
|
"epoch": 1.870967741935484,
|
|
"grad_norm": 0.7009422522613835,
|
|
"learning_rate": 3.6695008898539295e-05,
|
|
"loss": 0.2969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2817384898662567,
|
|
"step": 1160,
|
|
"valid_targets_mean": 2902.9,
|
|
"valid_targets_min": 489
|
|
},
|
|
{
|
|
"epoch": 1.879032258064516,
|
|
"grad_norm": 0.8429797303944103,
|
|
"learning_rate": 3.6650587016437246e-05,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2838149070739746,
|
|
"step": 1165,
|
|
"valid_targets_mean": 3219.1,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 1.8870967741935485,
|
|
"grad_norm": 0.78878578635476,
|
|
"learning_rate": 3.6605895854247534e-05,
|
|
"loss": 0.3311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33269816637039185,
|
|
"step": 1170,
|
|
"valid_targets_mean": 2777.4,
|
|
"valid_targets_min": 257
|
|
},
|
|
{
|
|
"epoch": 1.8951612903225805,
|
|
"grad_norm": 0.75861503829167,
|
|
"learning_rate": 3.656093613473386e-05,
|
|
"loss": 0.2941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3073751926422119,
|
|
"step": 1175,
|
|
"valid_targets_mean": 3003.5,
|
|
"valid_targets_min": 178
|
|
},
|
|
{
|
|
"epoch": 1.903225806451613,
|
|
"grad_norm": 0.7724536337023623,
|
|
"learning_rate": 3.6515708585003165e-05,
|
|
"loss": 0.3064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32818832993507385,
|
|
"step": 1180,
|
|
"valid_targets_mean": 3083.1,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 1.911290322580645,
|
|
"grad_norm": 0.761289588780475,
|
|
"learning_rate": 3.6470213936493834e-05,
|
|
"loss": 0.2913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3342047333717346,
|
|
"step": 1185,
|
|
"valid_targets_mean": 3032.6,
|
|
"valid_targets_min": 496
|
|
},
|
|
{
|
|
"epoch": 1.9193548387096775,
|
|
"grad_norm": 0.7625234160940279,
|
|
"learning_rate": 3.642445292496389e-05,
|
|
"loss": 0.2949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2634608745574951,
|
|
"step": 1190,
|
|
"valid_targets_mean": 2614.5,
|
|
"valid_targets_min": 234
|
|
},
|
|
{
|
|
"epoch": 1.9274193548387095,
|
|
"grad_norm": 0.8043589670386713,
|
|
"learning_rate": 3.637842629047908e-05,
|
|
"loss": 0.2994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3091210126876831,
|
|
"step": 1195,
|
|
"valid_targets_mean": 2990.5,
|
|
"valid_targets_min": 239
|
|
},
|
|
{
|
|
"epoch": 1.935483870967742,
|
|
"grad_norm": 0.6890264579201089,
|
|
"learning_rate": 3.633213477740092e-05,
|
|
"loss": 0.3038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32466626167297363,
|
|
"step": 1200,
|
|
"valid_targets_mean": 3549.2,
|
|
"valid_targets_min": 254
|
|
},
|
|
{
|
|
"epoch": 1.9435483870967742,
|
|
"grad_norm": 0.8460465650126942,
|
|
"learning_rate": 3.6285579134374655e-05,
|
|
"loss": 0.3041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3282429277896881,
|
|
"step": 1205,
|
|
"valid_targets_mean": 2670.2,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 1.9516129032258065,
|
|
"grad_norm": 0.8409000115895613,
|
|
"learning_rate": 3.623876011431714e-05,
|
|
"loss": 0.2923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29395943880081177,
|
|
"step": 1210,
|
|
"valid_targets_mean": 2202.6,
|
|
"valid_targets_min": 302
|
|
},
|
|
{
|
|
"epoch": 1.9596774193548387,
|
|
"grad_norm": 0.9031015831868221,
|
|
"learning_rate": 3.6191678474404656e-05,
|
|
"loss": 0.3042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30777186155319214,
|
|
"step": 1215,
|
|
"valid_targets_mean": 2239.8,
|
|
"valid_targets_min": 412
|
|
},
|
|
{
|
|
"epoch": 1.967741935483871,
|
|
"grad_norm": 1.0360133297570613,
|
|
"learning_rate": 3.6144334976060716e-05,
|
|
"loss": 0.325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33510613441467285,
|
|
"step": 1220,
|
|
"valid_targets_mean": 2122.0,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 1.9758064516129032,
|
|
"grad_norm": 0.744884156263577,
|
|
"learning_rate": 3.609673038494369e-05,
|
|
"loss": 0.316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30243536829948425,
|
|
"step": 1225,
|
|
"valid_targets_mean": 2993.6,
|
|
"valid_targets_min": 275
|
|
},
|
|
{
|
|
"epoch": 1.9838709677419355,
|
|
"grad_norm": 0.7936083243359146,
|
|
"learning_rate": 3.604886547093444e-05,
|
|
"loss": 0.3089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3229416608810425,
|
|
"step": 1230,
|
|
"valid_targets_mean": 2611.6,
|
|
"valid_targets_min": 221
|
|
},
|
|
{
|
|
"epoch": 1.9919354838709677,
|
|
"grad_norm": 0.7639450986581604,
|
|
"learning_rate": 3.600074100812391e-05,
|
|
"loss": 0.3065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2898416817188263,
|
|
"step": 1235,
|
|
"valid_targets_mean": 2731.6,
|
|
"valid_targets_min": 224
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 0.745300113653654,
|
|
"learning_rate": 3.5952357774800526e-05,
|
|
"loss": 0.3135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32822301983833313,
|
|
"step": 1240,
|
|
"valid_targets_mean": 2878.8,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 2.0080645161290325,
|
|
"grad_norm": 0.8298990901024914,
|
|
"learning_rate": 3.590371655343769e-05,
|
|
"loss": 0.2668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2610350549221039,
|
|
"step": 1245,
|
|
"valid_targets_mean": 2671.3,
|
|
"valid_targets_min": 181
|
|
},
|
|
{
|
|
"epoch": 2.0161290322580645,
|
|
"grad_norm": 0.8188069531813381,
|
|
"learning_rate": 3.585481813068109e-05,
|
|
"loss": 0.2509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.295757532119751,
|
|
"step": 1250,
|
|
"valid_targets_mean": 2992.8,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 2.024193548387097,
|
|
"grad_norm": 0.6971508227764184,
|
|
"learning_rate": 3.5805663297335955e-05,
|
|
"loss": 0.2595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22992394864559174,
|
|
"step": 1255,
|
|
"valid_targets_mean": 3354.1,
|
|
"valid_targets_min": 316
|
|
},
|
|
{
|
|
"epoch": 2.032258064516129,
|
|
"grad_norm": 0.8548316683471872,
|
|
"learning_rate": 3.575625284835432e-05,
|
|
"loss": 0.2617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2411145716905594,
|
|
"step": 1260,
|
|
"valid_targets_mean": 2455.6,
|
|
"valid_targets_min": 192
|
|
},
|
|
{
|
|
"epoch": 2.0403225806451615,
|
|
"grad_norm": 0.7038729829319951,
|
|
"learning_rate": 3.570658758282213e-05,
|
|
"loss": 0.2606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2642463445663452,
|
|
"step": 1265,
|
|
"valid_targets_mean": 3234.4,
|
|
"valid_targets_min": 196
|
|
},
|
|
{
|
|
"epoch": 2.0483870967741935,
|
|
"grad_norm": 0.9085594147461656,
|
|
"learning_rate": 3.565666830394632e-05,
|
|
"loss": 0.2535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28095555305480957,
|
|
"step": 1270,
|
|
"valid_targets_mean": 2052.5,
|
|
"valid_targets_min": 212
|
|
},
|
|
{
|
|
"epoch": 2.056451612903226,
|
|
"grad_norm": 0.7022803344949544,
|
|
"learning_rate": 3.560649581904184e-05,
|
|
"loss": 0.2726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28833383321762085,
|
|
"step": 1275,
|
|
"valid_targets_mean": 4055.1,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 2.064516129032258,
|
|
"grad_norm": 0.7420477515126025,
|
|
"learning_rate": 3.555607093951859e-05,
|
|
"loss": 0.2418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25743263959884644,
|
|
"step": 1280,
|
|
"valid_targets_mean": 2819.7,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 2.0725806451612905,
|
|
"grad_norm": 0.8020073234709156,
|
|
"learning_rate": 3.550539448086829e-05,
|
|
"loss": 0.2665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27234992384910583,
|
|
"step": 1285,
|
|
"valid_targets_mean": 2448.7,
|
|
"valid_targets_min": 513
|
|
},
|
|
{
|
|
"epoch": 2.0806451612903225,
|
|
"grad_norm": 0.8867306328737811,
|
|
"learning_rate": 3.545446726265133e-05,
|
|
"loss": 0.2632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2699108123779297,
|
|
"step": 1290,
|
|
"valid_targets_mean": 2369.5,
|
|
"valid_targets_min": 229
|
|
},
|
|
{
|
|
"epoch": 2.088709677419355,
|
|
"grad_norm": 0.9216845089288095,
|
|
"learning_rate": 3.540329010848344e-05,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29844406247138977,
|
|
"step": 1295,
|
|
"valid_targets_mean": 2879.4,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 2.096774193548387,
|
|
"grad_norm": 0.8309280102030538,
|
|
"learning_rate": 3.535186384602245e-05,
|
|
"loss": 0.2615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27994275093078613,
|
|
"step": 1300,
|
|
"valid_targets_mean": 3053.6,
|
|
"valid_targets_min": 215
|
|
},
|
|
{
|
|
"epoch": 2.1048387096774195,
|
|
"grad_norm": 1.0066720718810707,
|
|
"learning_rate": 3.530018930695486e-05,
|
|
"loss": 0.2624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.270887166261673,
|
|
"step": 1305,
|
|
"valid_targets_mean": 1606.0,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 2.1129032258064515,
|
|
"grad_norm": 0.7127631351791698,
|
|
"learning_rate": 3.524826732698241e-05,
|
|
"loss": 0.2613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22675874829292297,
|
|
"step": 1310,
|
|
"valid_targets_mean": 3567.4,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 2.120967741935484,
|
|
"grad_norm": 0.8602488116734004,
|
|
"learning_rate": 3.519609874580854e-05,
|
|
"loss": 0.2606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27167990803718567,
|
|
"step": 1315,
|
|
"valid_targets_mean": 2422.5,
|
|
"valid_targets_min": 280
|
|
},
|
|
{
|
|
"epoch": 2.129032258064516,
|
|
"grad_norm": 0.8393575391552871,
|
|
"learning_rate": 3.514368440712483e-05,
|
|
"loss": 0.2463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24156935513019562,
|
|
"step": 1320,
|
|
"valid_targets_mean": 2386.2,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 2.1370967741935485,
|
|
"grad_norm": 0.8725788884924897,
|
|
"learning_rate": 3.5091025158597367e-05,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2941593825817108,
|
|
"step": 1325,
|
|
"valid_targets_mean": 2612.1,
|
|
"valid_targets_min": 242
|
|
},
|
|
{
|
|
"epoch": 2.1451612903225805,
|
|
"grad_norm": 0.7720095917306824,
|
|
"learning_rate": 3.5038121851852995e-05,
|
|
"loss": 0.2674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27245259284973145,
|
|
"step": 1330,
|
|
"valid_targets_mean": 2822.8,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 2.153225806451613,
|
|
"grad_norm": 0.7437748669846534,
|
|
"learning_rate": 3.498497534246557e-05,
|
|
"loss": 0.2547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2749680280685425,
|
|
"step": 1335,
|
|
"valid_targets_mean": 3058.0,
|
|
"valid_targets_min": 476
|
|
},
|
|
{
|
|
"epoch": 2.161290322580645,
|
|
"grad_norm": 0.850328843437496,
|
|
"learning_rate": 3.4931586489942144e-05,
|
|
"loss": 0.2604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2582993805408478,
|
|
"step": 1340,
|
|
"valid_targets_mean": 2566.1,
|
|
"valid_targets_min": 165
|
|
},
|
|
{
|
|
"epoch": 2.1693548387096775,
|
|
"grad_norm": 0.8635092037606357,
|
|
"learning_rate": 3.4877956157709024e-05,
|
|
"loss": 0.252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27229222655296326,
|
|
"step": 1345,
|
|
"valid_targets_mean": 2379.1,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 2.1774193548387095,
|
|
"grad_norm": 0.7539855333025892,
|
|
"learning_rate": 3.482408521309782e-05,
|
|
"loss": 0.2581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.250728577375412,
|
|
"step": 1350,
|
|
"valid_targets_mean": 3173.8,
|
|
"valid_targets_min": 244
|
|
},
|
|
{
|
|
"epoch": 2.185483870967742,
|
|
"grad_norm": 0.7882271678541567,
|
|
"learning_rate": 3.476997452733144e-05,
|
|
"loss": 0.2666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2818753123283386,
|
|
"step": 1355,
|
|
"valid_targets_mean": 3194.8,
|
|
"valid_targets_min": 377
|
|
},
|
|
{
|
|
"epoch": 2.193548387096774,
|
|
"grad_norm": 0.7446355833198072,
|
|
"learning_rate": 3.4715624975509965e-05,
|
|
"loss": 0.2751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24863293766975403,
|
|
"step": 1360,
|
|
"valid_targets_mean": 3068.8,
|
|
"valid_targets_min": 423
|
|
},
|
|
{
|
|
"epoch": 2.2016129032258065,
|
|
"grad_norm": 0.6399029009386605,
|
|
"learning_rate": 3.4661037436596526e-05,
|
|
"loss": 0.2655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2526560127735138,
|
|
"step": 1365,
|
|
"valid_targets_mean": 5169.6,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 2.2096774193548385,
|
|
"grad_norm": 0.7881118673563908,
|
|
"learning_rate": 3.460621279340309e-05,
|
|
"loss": 0.25,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24836188554763794,
|
|
"step": 1370,
|
|
"valid_targets_mean": 3235.6,
|
|
"valid_targets_min": 748
|
|
},
|
|
{
|
|
"epoch": 2.217741935483871,
|
|
"grad_norm": 0.857296364904341,
|
|
"learning_rate": 3.455115193257614e-05,
|
|
"loss": 0.2456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2274957001209259,
|
|
"step": 1375,
|
|
"valid_targets_mean": 2718.6,
|
|
"valid_targets_min": 226
|
|
},
|
|
{
|
|
"epoch": 2.225806451612903,
|
|
"grad_norm": 0.8100345744662037,
|
|
"learning_rate": 3.44958557445824e-05,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2548145651817322,
|
|
"step": 1380,
|
|
"valid_targets_mean": 2790.9,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 2.2338709677419355,
|
|
"grad_norm": 0.7798880316926826,
|
|
"learning_rate": 3.444032512369438e-05,
|
|
"loss": 0.2497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2866021692752838,
|
|
"step": 1385,
|
|
"valid_targets_mean": 2659.1,
|
|
"valid_targets_min": 162
|
|
},
|
|
{
|
|
"epoch": 2.241935483870968,
|
|
"grad_norm": 1.038248496855875,
|
|
"learning_rate": 3.4384560967975944e-05,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2579859495162964,
|
|
"step": 1390,
|
|
"valid_targets_mean": 2569.9,
|
|
"valid_targets_min": 178
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"grad_norm": 0.8751644009557498,
|
|
"learning_rate": 3.432856417926777e-05,
|
|
"loss": 0.264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28950878977775574,
|
|
"step": 1395,
|
|
"valid_targets_mean": 2299.8,
|
|
"valid_targets_min": 244
|
|
},
|
|
{
|
|
"epoch": 2.258064516129032,
|
|
"grad_norm": 0.8706373069908244,
|
|
"learning_rate": 3.4272335663172773e-05,
|
|
"loss": 0.2551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2768270969390869,
|
|
"step": 1400,
|
|
"valid_targets_mean": 2368.0,
|
|
"valid_targets_min": 490
|
|
},
|
|
{
|
|
"epoch": 2.2661290322580645,
|
|
"grad_norm": 0.93400450561681,
|
|
"learning_rate": 3.421587632904147e-05,
|
|
"loss": 0.2549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24980685114860535,
|
|
"step": 1405,
|
|
"valid_targets_mean": 2017.8,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 2.274193548387097,
|
|
"grad_norm": 0.7868730743718847,
|
|
"learning_rate": 3.415918708995724e-05,
|
|
"loss": 0.2513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24551303684711456,
|
|
"step": 1410,
|
|
"valid_targets_mean": 3131.8,
|
|
"valid_targets_min": 399
|
|
},
|
|
{
|
|
"epoch": 2.282258064516129,
|
|
"grad_norm": 0.8885072568935852,
|
|
"learning_rate": 3.410226886272159e-05,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2733041048049927,
|
|
"step": 1415,
|
|
"valid_targets_mean": 2230.9,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 2.2903225806451615,
|
|
"grad_norm": 0.7714004150477628,
|
|
"learning_rate": 3.4045122567839325e-05,
|
|
"loss": 0.2811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2788838744163513,
|
|
"step": 1420,
|
|
"valid_targets_mean": 2960.6,
|
|
"valid_targets_min": 276
|
|
},
|
|
{
|
|
"epoch": 2.2983870967741935,
|
|
"grad_norm": 0.9031548851564962,
|
|
"learning_rate": 3.398774912950365e-05,
|
|
"loss": 0.2629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2643152177333832,
|
|
"step": 1425,
|
|
"valid_targets_mean": 2416.4,
|
|
"valid_targets_min": 331
|
|
},
|
|
{
|
|
"epoch": 2.306451612903226,
|
|
"grad_norm": 0.8442433722483331,
|
|
"learning_rate": 3.39301494755812e-05,
|
|
"loss": 0.2447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2394760549068451,
|
|
"step": 1430,
|
|
"valid_targets_mean": 3043.4,
|
|
"valid_targets_min": 666
|
|
},
|
|
{
|
|
"epoch": 2.314516129032258,
|
|
"grad_norm": 0.7659434668327061,
|
|
"learning_rate": 3.3872324537597104e-05,
|
|
"loss": 0.2257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2231777310371399,
|
|
"step": 1435,
|
|
"valid_targets_mean": 3173.4,
|
|
"valid_targets_min": 252
|
|
},
|
|
{
|
|
"epoch": 2.3225806451612905,
|
|
"grad_norm": 0.9610566598576283,
|
|
"learning_rate": 3.381427525071984e-05,
|
|
"loss": 0.256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2249530553817749,
|
|
"step": 1440,
|
|
"valid_targets_mean": 2290.4,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 2.3306451612903225,
|
|
"grad_norm": 0.7849727982920722,
|
|
"learning_rate": 3.3756002553746175e-05,
|
|
"loss": 0.2465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24936802685260773,
|
|
"step": 1445,
|
|
"valid_targets_mean": 2541.6,
|
|
"valid_targets_min": 241
|
|
},
|
|
{
|
|
"epoch": 2.338709677419355,
|
|
"grad_norm": 0.7303815642952538,
|
|
"learning_rate": 3.369750738908593e-05,
|
|
"loss": 0.2334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19507169723510742,
|
|
"step": 1450,
|
|
"valid_targets_mean": 3326.1,
|
|
"valid_targets_min": 321
|
|
},
|
|
{
|
|
"epoch": 2.346774193548387,
|
|
"grad_norm": 0.925359147082696,
|
|
"learning_rate": 3.3638790702746793e-05,
|
|
"loss": 0.2643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24659791588783264,
|
|
"step": 1455,
|
|
"valid_targets_mean": 1993.8,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 2.3548387096774195,
|
|
"grad_norm": 0.8091800968690972,
|
|
"learning_rate": 3.357985344431897e-05,
|
|
"loss": 0.2672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29042160511016846,
|
|
"step": 1460,
|
|
"valid_targets_mean": 2629.8,
|
|
"valid_targets_min": 501
|
|
},
|
|
{
|
|
"epoch": 2.3629032258064515,
|
|
"grad_norm": 0.7213141778097782,
|
|
"learning_rate": 3.3520696566959845e-05,
|
|
"loss": 0.2507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1928853690624237,
|
|
"step": 1465,
|
|
"valid_targets_mean": 3548.3,
|
|
"valid_targets_min": 312
|
|
},
|
|
{
|
|
"epoch": 2.370967741935484,
|
|
"grad_norm": 0.9064355602381016,
|
|
"learning_rate": 3.346132102737859e-05,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26201021671295166,
|
|
"step": 1470,
|
|
"valid_targets_mean": 3104.6,
|
|
"valid_targets_min": 187
|
|
},
|
|
{
|
|
"epoch": 2.379032258064516,
|
|
"grad_norm": 0.8003709033609432,
|
|
"learning_rate": 3.3401727785820644e-05,
|
|
"loss": 0.2547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21827691793441772,
|
|
"step": 1475,
|
|
"valid_targets_mean": 2726.4,
|
|
"valid_targets_min": 379
|
|
},
|
|
{
|
|
"epoch": 2.3870967741935485,
|
|
"grad_norm": 0.8938611631834128,
|
|
"learning_rate": 3.3341917806052246e-05,
|
|
"loss": 0.2566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25817686319351196,
|
|
"step": 1480,
|
|
"valid_targets_mean": 2709.6,
|
|
"valid_targets_min": 255
|
|
},
|
|
{
|
|
"epoch": 2.3951612903225805,
|
|
"grad_norm": 0.7118928931894681,
|
|
"learning_rate": 3.328189205534479e-05,
|
|
"loss": 0.251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2540888786315918,
|
|
"step": 1485,
|
|
"valid_targets_mean": 3103.5,
|
|
"valid_targets_min": 504
|
|
},
|
|
{
|
|
"epoch": 2.403225806451613,
|
|
"grad_norm": 1.0517668057964693,
|
|
"learning_rate": 3.322165150445919e-05,
|
|
"loss": 0.2576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2775261402130127,
|
|
"step": 1490,
|
|
"valid_targets_mean": 1799.1,
|
|
"valid_targets_min": 189
|
|
},
|
|
{
|
|
"epoch": 2.411290322580645,
|
|
"grad_norm": 0.9198770088219779,
|
|
"learning_rate": 3.3161197127630256e-05,
|
|
"loss": 0.2555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2670985162258148,
|
|
"step": 1495,
|
|
"valid_targets_mean": 2359.5,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 2.4193548387096775,
|
|
"grad_norm": 0.8111872240529917,
|
|
"learning_rate": 3.3100529902550806e-05,
|
|
"loss": 0.2552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25475049018859863,
|
|
"step": 1500,
|
|
"valid_targets_mean": 2807.1,
|
|
"valid_targets_min": 232
|
|
},
|
|
{
|
|
"epoch": 2.4274193548387095,
|
|
"grad_norm": 0.7769108410231289,
|
|
"learning_rate": 3.303965081035597e-05,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26501262187957764,
|
|
"step": 1505,
|
|
"valid_targets_mean": 3040.3,
|
|
"valid_targets_min": 585
|
|
},
|
|
{
|
|
"epoch": 2.435483870967742,
|
|
"grad_norm": 0.8141889482291088,
|
|
"learning_rate": 3.297856083560728e-05,
|
|
"loss": 0.2552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1932304948568344,
|
|
"step": 1510,
|
|
"valid_targets_mean": 3202.5,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 2.443548387096774,
|
|
"grad_norm": 0.9599868110996262,
|
|
"learning_rate": 3.291726096627671e-05,
|
|
"loss": 0.253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.259549617767334,
|
|
"step": 1515,
|
|
"valid_targets_mean": 2464.2,
|
|
"valid_targets_min": 319
|
|
},
|
|
{
|
|
"epoch": 2.4516129032258065,
|
|
"grad_norm": 0.8250290638846852,
|
|
"learning_rate": 3.285575219373079e-05,
|
|
"loss": 0.2443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22651734948158264,
|
|
"step": 1520,
|
|
"valid_targets_mean": 2752.6,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 2.4596774193548385,
|
|
"grad_norm": 0.814114938565853,
|
|
"learning_rate": 3.279403551271447e-05,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25240465998649597,
|
|
"step": 1525,
|
|
"valid_targets_mean": 3135.8,
|
|
"valid_targets_min": 221
|
|
},
|
|
{
|
|
"epoch": 2.467741935483871,
|
|
"grad_norm": 0.8707020279697332,
|
|
"learning_rate": 3.273211192133511e-05,
|
|
"loss": 0.2678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27748358249664307,
|
|
"step": 1530,
|
|
"valid_targets_mean": 2607.1,
|
|
"valid_targets_min": 236
|
|
},
|
|
{
|
|
"epoch": 2.475806451612903,
|
|
"grad_norm": 0.8243592885174454,
|
|
"learning_rate": 3.266998242104629e-05,
|
|
"loss": 0.2427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26760369539260864,
|
|
"step": 1535,
|
|
"valid_targets_mean": 2997.1,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 2.4838709677419355,
|
|
"grad_norm": 0.9866820573851856,
|
|
"learning_rate": 3.260764801663165e-05,
|
|
"loss": 0.2475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24638138711452484,
|
|
"step": 1540,
|
|
"valid_targets_mean": 2944.6,
|
|
"valid_targets_min": 227
|
|
},
|
|
{
|
|
"epoch": 2.491935483870968,
|
|
"grad_norm": 0.7235906040292996,
|
|
"learning_rate": 3.25451097161886e-05,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2673245370388031,
|
|
"step": 1545,
|
|
"valid_targets_mean": 3518.2,
|
|
"valid_targets_min": 234
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"grad_norm": 0.9055698352228431,
|
|
"learning_rate": 3.248236853111207e-05,
|
|
"loss": 0.2444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24379627406597137,
|
|
"step": 1550,
|
|
"valid_targets_mean": 2076.4,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 2.508064516129032,
|
|
"grad_norm": 0.7116698657636277,
|
|
"learning_rate": 3.24194254760781e-05,
|
|
"loss": 0.2562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25199657678604126,
|
|
"step": 1555,
|
|
"valid_targets_mean": 3354.8,
|
|
"valid_targets_min": 479
|
|
},
|
|
{
|
|
"epoch": 2.5161290322580645,
|
|
"grad_norm": 0.631533937849814,
|
|
"learning_rate": 3.2356281569027466e-05,
|
|
"loss": 0.2511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22752869129180908,
|
|
"step": 1560,
|
|
"valid_targets_mean": 3588.9,
|
|
"valid_targets_min": 190
|
|
},
|
|
{
|
|
"epoch": 2.524193548387097,
|
|
"grad_norm": 0.9595436390050573,
|
|
"learning_rate": 3.229293783114918e-05,
|
|
"loss": 0.2293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.225123792886734,
|
|
"step": 1565,
|
|
"valid_targets_mean": 3132.4,
|
|
"valid_targets_min": 504
|
|
},
|
|
{
|
|
"epoch": 2.532258064516129,
|
|
"grad_norm": 0.8693956299774712,
|
|
"learning_rate": 3.2229395286864045e-05,
|
|
"loss": 0.2499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22287040948867798,
|
|
"step": 1570,
|
|
"valid_targets_mean": 2696.6,
|
|
"valid_targets_min": 379
|
|
},
|
|
{
|
|
"epoch": 2.540322580645161,
|
|
"grad_norm": 0.8246049717858901,
|
|
"learning_rate": 3.2165654963808e-05,
|
|
"loss": 0.2507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28304338455200195,
|
|
"step": 1575,
|
|
"valid_targets_mean": 3154.3,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 2.5483870967741935,
|
|
"grad_norm": 0.7919795320141321,
|
|
"learning_rate": 3.2101717892815564e-05,
|
|
"loss": 0.2383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23847147822380066,
|
|
"step": 1580,
|
|
"valid_targets_mean": 2950.5,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 2.556451612903226,
|
|
"grad_norm": 0.9889567014877007,
|
|
"learning_rate": 3.2037585107903146e-05,
|
|
"loss": 0.255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.261405885219574,
|
|
"step": 1585,
|
|
"valid_targets_mean": 2835.9,
|
|
"valid_targets_min": 242
|
|
},
|
|
{
|
|
"epoch": 2.564516129032258,
|
|
"grad_norm": 0.8095649295839071,
|
|
"learning_rate": 3.19732576462523e-05,
|
|
"loss": 0.2467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22092658281326294,
|
|
"step": 1590,
|
|
"valid_targets_mean": 2961.8,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 2.5725806451612905,
|
|
"grad_norm": 0.7999230845687979,
|
|
"learning_rate": 3.1908736548193e-05,
|
|
"loss": 0.2514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21227002143859863,
|
|
"step": 1595,
|
|
"valid_targets_mean": 2924.9,
|
|
"valid_targets_min": 319
|
|
},
|
|
{
|
|
"epoch": 2.5806451612903225,
|
|
"grad_norm": 1.1759496331594432,
|
|
"learning_rate": 3.184402285718676e-05,
|
|
"loss": 0.2548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27379560470581055,
|
|
"step": 1600,
|
|
"valid_targets_mean": 2301.4,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 2.588709677419355,
|
|
"grad_norm": 0.8135710268959665,
|
|
"learning_rate": 3.17791176198098e-05,
|
|
"loss": 0.2404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2617451250553131,
|
|
"step": 1605,
|
|
"valid_targets_mean": 2755.7,
|
|
"valid_targets_min": 253
|
|
},
|
|
{
|
|
"epoch": 2.596774193548387,
|
|
"grad_norm": 0.9020659189514568,
|
|
"learning_rate": 3.171402188573611e-05,
|
|
"loss": 0.2473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2679401636123657,
|
|
"step": 1610,
|
|
"valid_targets_mean": 2695.8,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 2.6048387096774195,
|
|
"grad_norm": 0.8096812076554731,
|
|
"learning_rate": 3.164873670772046e-05,
|
|
"loss": 0.2336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25376781821250916,
|
|
"step": 1615,
|
|
"valid_targets_mean": 3185.5,
|
|
"valid_targets_min": 352
|
|
},
|
|
{
|
|
"epoch": 2.6129032258064515,
|
|
"grad_norm": 0.8698813760839395,
|
|
"learning_rate": 3.158326314158139e-05,
|
|
"loss": 0.2412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24048633873462677,
|
|
"step": 1620,
|
|
"valid_targets_mean": 2917.1,
|
|
"valid_targets_min": 293
|
|
},
|
|
{
|
|
"epoch": 2.620967741935484,
|
|
"grad_norm": 0.6010269401724435,
|
|
"learning_rate": 3.151760224618413e-05,
|
|
"loss": 0.2429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2129538506269455,
|
|
"step": 1625,
|
|
"valid_targets_mean": 4879.6,
|
|
"valid_targets_min": 421
|
|
},
|
|
{
|
|
"epoch": 2.629032258064516,
|
|
"grad_norm": 0.8353041947161233,
|
|
"learning_rate": 3.1451755083423475e-05,
|
|
"loss": 0.2241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2260596603155136,
|
|
"step": 1630,
|
|
"valid_targets_mean": 2132.6,
|
|
"valid_targets_min": 197
|
|
},
|
|
{
|
|
"epoch": 2.6370967741935485,
|
|
"grad_norm": 0.8097435649736887,
|
|
"learning_rate": 3.138572271820661e-05,
|
|
"loss": 0.2365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21300700306892395,
|
|
"step": 1635,
|
|
"valid_targets_mean": 2630.6,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 2.6451612903225805,
|
|
"grad_norm": 0.8449992122779407,
|
|
"learning_rate": 3.131950621843588e-05,
|
|
"loss": 0.2413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24731802940368652,
|
|
"step": 1640,
|
|
"valid_targets_mean": 2228.6,
|
|
"valid_targets_min": 194
|
|
},
|
|
{
|
|
"epoch": 2.653225806451613,
|
|
"grad_norm": 0.8259852980899384,
|
|
"learning_rate": 3.125310665499156e-05,
|
|
"loss": 0.2346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22202807664871216,
|
|
"step": 1645,
|
|
"valid_targets_mean": 2339.2,
|
|
"valid_targets_min": 191
|
|
},
|
|
{
|
|
"epoch": 2.661290322580645,
|
|
"grad_norm": 0.816493918299217,
|
|
"learning_rate": 3.1186525101714466e-05,
|
|
"loss": 0.2359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24829751253128052,
|
|
"step": 1650,
|
|
"valid_targets_mean": 2508.4,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 2.6693548387096775,
|
|
"grad_norm": 0.8390688799622861,
|
|
"learning_rate": 3.111976263538866e-05,
|
|
"loss": 0.2505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2587744891643524,
|
|
"step": 1655,
|
|
"valid_targets_mean": 2534.8,
|
|
"valid_targets_min": 562
|
|
},
|
|
{
|
|
"epoch": 2.6774193548387095,
|
|
"grad_norm": 0.755351181891341,
|
|
"learning_rate": 3.105282033572398e-05,
|
|
"loss": 0.2469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24163603782653809,
|
|
"step": 1660,
|
|
"valid_targets_mean": 3305.6,
|
|
"valid_targets_min": 388
|
|
},
|
|
{
|
|
"epoch": 2.685483870967742,
|
|
"grad_norm": 0.9333693939630984,
|
|
"learning_rate": 3.098569928533862e-05,
|
|
"loss": 0.2441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24809277057647705,
|
|
"step": 1665,
|
|
"valid_targets_mean": 2465.0,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 2.693548387096774,
|
|
"grad_norm": 0.7914698402520541,
|
|
"learning_rate": 3.091840056974159e-05,
|
|
"loss": 0.2354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22854290902614594,
|
|
"step": 1670,
|
|
"valid_targets_mean": 3027.0,
|
|
"valid_targets_min": 162
|
|
},
|
|
{
|
|
"epoch": 2.7016129032258065,
|
|
"grad_norm": 0.9555980254857154,
|
|
"learning_rate": 3.0850925277315193e-05,
|
|
"loss": 0.2198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20246204733848572,
|
|
"step": 1675,
|
|
"valid_targets_mean": 1834.5,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 2.709677419354839,
|
|
"grad_norm": 0.6906406451897702,
|
|
"learning_rate": 3.078327449929738e-05,
|
|
"loss": 0.2498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22112296521663666,
|
|
"step": 1680,
|
|
"valid_targets_mean": 4248.9,
|
|
"valid_targets_min": 209
|
|
},
|
|
{
|
|
"epoch": 2.717741935483871,
|
|
"grad_norm": 0.8774515346941294,
|
|
"learning_rate": 3.071544932976414e-05,
|
|
"loss": 0.2462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25591331720352173,
|
|
"step": 1685,
|
|
"valid_targets_mean": 2948.6,
|
|
"valid_targets_min": 490
|
|
},
|
|
{
|
|
"epoch": 2.725806451612903,
|
|
"grad_norm": 0.9496133885960026,
|
|
"learning_rate": 3.064745086561179e-05,
|
|
"loss": 0.2254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2266722470521927,
|
|
"step": 1690,
|
|
"valid_targets_mean": 3001.8,
|
|
"valid_targets_min": 178
|
|
},
|
|
{
|
|
"epoch": 2.7338709677419355,
|
|
"grad_norm": 0.8062537535116742,
|
|
"learning_rate": 3.057928020653925e-05,
|
|
"loss": 0.2454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2645246982574463,
|
|
"step": 1695,
|
|
"valid_targets_mean": 3215.7,
|
|
"valid_targets_min": 1103
|
|
},
|
|
{
|
|
"epoch": 2.741935483870968,
|
|
"grad_norm": 0.7343246237372493,
|
|
"learning_rate": 3.0510938455030233e-05,
|
|
"loss": 0.2372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18695321679115295,
|
|
"step": 1700,
|
|
"valid_targets_mean": 3150.8,
|
|
"valid_targets_min": 227
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"grad_norm": 1.0296140356269057,
|
|
"learning_rate": 3.044242671633542e-05,
|
|
"loss": 0.2581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25386279821395874,
|
|
"step": 1705,
|
|
"valid_targets_mean": 1929.1,
|
|
"valid_targets_min": 352
|
|
},
|
|
{
|
|
"epoch": 2.758064516129032,
|
|
"grad_norm": 0.7045657201227054,
|
|
"learning_rate": 3.0373746098454617e-05,
|
|
"loss": 0.2359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2057691514492035,
|
|
"step": 1710,
|
|
"valid_targets_mean": 3697.8,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 2.7661290322580645,
|
|
"grad_norm": 0.8694195037643573,
|
|
"learning_rate": 3.0304897712118807e-05,
|
|
"loss": 0.2534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27011603116989136,
|
|
"step": 1715,
|
|
"valid_targets_mean": 2705.1,
|
|
"valid_targets_min": 329
|
|
},
|
|
{
|
|
"epoch": 2.774193548387097,
|
|
"grad_norm": 0.7582115148968539,
|
|
"learning_rate": 3.023588267077217e-05,
|
|
"loss": 0.2534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27019011974334717,
|
|
"step": 1720,
|
|
"valid_targets_mean": 2892.5,
|
|
"valid_targets_min": 954
|
|
},
|
|
{
|
|
"epoch": 2.782258064516129,
|
|
"grad_norm": 0.810921783814966,
|
|
"learning_rate": 3.0166702090554137e-05,
|
|
"loss": 0.2287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21895082294940948,
|
|
"step": 1725,
|
|
"valid_targets_mean": 3321.3,
|
|
"valid_targets_min": 239
|
|
},
|
|
{
|
|
"epoch": 2.790322580645161,
|
|
"grad_norm": 0.7763111517034101,
|
|
"learning_rate": 3.0097357090281267e-05,
|
|
"loss": 0.2297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21651533246040344,
|
|
"step": 1730,
|
|
"valid_targets_mean": 2793.8,
|
|
"valid_targets_min": 286
|
|
},
|
|
{
|
|
"epoch": 2.7983870967741935,
|
|
"grad_norm": 0.9466318984258744,
|
|
"learning_rate": 3.0027848791429207e-05,
|
|
"loss": 0.2256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22231006622314453,
|
|
"step": 1735,
|
|
"valid_targets_mean": 2668.2,
|
|
"valid_targets_min": 283
|
|
},
|
|
{
|
|
"epoch": 2.806451612903226,
|
|
"grad_norm": 0.8674292896489737,
|
|
"learning_rate": 2.995817831811453e-05,
|
|
"loss": 0.2331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20861703157424927,
|
|
"step": 1740,
|
|
"valid_targets_mean": 2371.1,
|
|
"valid_targets_min": 200
|
|
},
|
|
{
|
|
"epoch": 2.814516129032258,
|
|
"grad_norm": 0.942144555345226,
|
|
"learning_rate": 2.9888346797076562e-05,
|
|
"loss": 0.232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2378596067428589,
|
|
"step": 1745,
|
|
"valid_targets_mean": 2634.9,
|
|
"valid_targets_min": 402
|
|
},
|
|
{
|
|
"epoch": 2.8225806451612905,
|
|
"grad_norm": 0.9017772972818529,
|
|
"learning_rate": 2.9818355357659146e-05,
|
|
"loss": 0.2329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25357794761657715,
|
|
"step": 1750,
|
|
"valid_targets_mean": 2541.2,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 2.8306451612903225,
|
|
"grad_norm": 0.96832093843113,
|
|
"learning_rate": 2.9748205131792412e-05,
|
|
"loss": 0.2267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23954816162586212,
|
|
"step": 1755,
|
|
"valid_targets_mean": 2244.4,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 2.838709677419355,
|
|
"grad_norm": 0.6629496328395067,
|
|
"learning_rate": 2.9677897253974434e-05,
|
|
"loss": 0.2461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24393409490585327,
|
|
"step": 1760,
|
|
"valid_targets_mean": 3514.8,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 2.846774193548387,
|
|
"grad_norm": 0.7861618771345119,
|
|
"learning_rate": 2.960743286125291e-05,
|
|
"loss": 0.2461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26197177171707153,
|
|
"step": 1765,
|
|
"valid_targets_mean": 2930.6,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 2.8548387096774195,
|
|
"grad_norm": 0.9613029903469441,
|
|
"learning_rate": 2.9536813093206744e-05,
|
|
"loss": 0.2501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25926387310028076,
|
|
"step": 1770,
|
|
"valid_targets_mean": 2477.4,
|
|
"valid_targets_min": 168
|
|
},
|
|
{
|
|
"epoch": 2.8629032258064515,
|
|
"grad_norm": 0.8872597279019357,
|
|
"learning_rate": 2.9466039091927638e-05,
|
|
"loss": 0.231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23096643388271332,
|
|
"step": 1775,
|
|
"valid_targets_mean": 2987.4,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 2.870967741935484,
|
|
"grad_norm": 0.7461375299179586,
|
|
"learning_rate": 2.939511200200163e-05,
|
|
"loss": 0.2421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20537687838077545,
|
|
"step": 1780,
|
|
"valid_targets_mean": 3140.8,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 2.879032258064516,
|
|
"grad_norm": 0.8476011616410694,
|
|
"learning_rate": 2.9324032970490547e-05,
|
|
"loss": 0.2198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22208863496780396,
|
|
"step": 1785,
|
|
"valid_targets_mean": 3059.9,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 2.8870967741935485,
|
|
"grad_norm": 0.6926362153711592,
|
|
"learning_rate": 2.9252803146913515e-05,
|
|
"loss": 0.2264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2011837363243103,
|
|
"step": 1790,
|
|
"valid_targets_mean": 4129.8,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 2.8951612903225805,
|
|
"grad_norm": 0.9131152687677379,
|
|
"learning_rate": 2.918142368322829e-05,
|
|
"loss": 0.2417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24615013599395752,
|
|
"step": 1795,
|
|
"valid_targets_mean": 2343.6,
|
|
"valid_targets_min": 263
|
|
},
|
|
{
|
|
"epoch": 2.903225806451613,
|
|
"grad_norm": 0.8906259836863368,
|
|
"learning_rate": 2.910989573381268e-05,
|
|
"loss": 0.234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23596858978271484,
|
|
"step": 1800,
|
|
"valid_targets_mean": 2236.2,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 2.911290322580645,
|
|
"grad_norm": 0.8395186092996292,
|
|
"learning_rate": 2.9038220455445886e-05,
|
|
"loss": 0.2233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22551089525222778,
|
|
"step": 1805,
|
|
"valid_targets_mean": 2666.2,
|
|
"valid_targets_min": 315
|
|
},
|
|
{
|
|
"epoch": 2.9193548387096775,
|
|
"grad_norm": 0.825096337455636,
|
|
"learning_rate": 2.896639900728975e-05,
|
|
"loss": 0.2353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24492347240447998,
|
|
"step": 1810,
|
|
"valid_targets_mean": 2459.6,
|
|
"valid_targets_min": 398
|
|
},
|
|
{
|
|
"epoch": 2.9274193548387095,
|
|
"grad_norm": 0.8043480123579212,
|
|
"learning_rate": 2.8894432550870046e-05,
|
|
"loss": 0.244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24770645797252655,
|
|
"step": 1815,
|
|
"valid_targets_mean": 3397.1,
|
|
"valid_targets_min": 948
|
|
},
|
|
{
|
|
"epoch": 2.935483870967742,
|
|
"grad_norm": 0.9270210103573074,
|
|
"learning_rate": 2.8822322250057665e-05,
|
|
"loss": 0.2551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21392768621444702,
|
|
"step": 1820,
|
|
"valid_targets_mean": 2329.7,
|
|
"valid_targets_min": 149
|
|
},
|
|
{
|
|
"epoch": 2.943548387096774,
|
|
"grad_norm": 0.9566729654241561,
|
|
"learning_rate": 2.8750069271049814e-05,
|
|
"loss": 0.2285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22934722900390625,
|
|
"step": 1825,
|
|
"valid_targets_mean": 2614.5,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 2.9516129032258065,
|
|
"grad_norm": 0.9062539004736491,
|
|
"learning_rate": 2.8677674782351164e-05,
|
|
"loss": 0.2329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2549256980419159,
|
|
"step": 1830,
|
|
"valid_targets_mean": 2815.6,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 2.959677419354839,
|
|
"grad_norm": 0.966334872758629,
|
|
"learning_rate": 2.8605139954754923e-05,
|
|
"loss": 0.2487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22865012288093567,
|
|
"step": 1835,
|
|
"valid_targets_mean": 2427.0,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 2.967741935483871,
|
|
"grad_norm": 0.8937496661192146,
|
|
"learning_rate": 2.8532465961323922e-05,
|
|
"loss": 0.2336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24133840203285217,
|
|
"step": 1840,
|
|
"valid_targets_mean": 2510.4,
|
|
"valid_targets_min": 212
|
|
},
|
|
{
|
|
"epoch": 2.975806451612903,
|
|
"grad_norm": 0.9356594116988157,
|
|
"learning_rate": 2.845965397737164e-05,
|
|
"loss": 0.2329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22419969737529755,
|
|
"step": 1845,
|
|
"valid_targets_mean": 2553.6,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 2.9838709677419355,
|
|
"grad_norm": 0.8882655910670298,
|
|
"learning_rate": 2.8386705180443175e-05,
|
|
"loss": 0.2049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19150173664093018,
|
|
"step": 1850,
|
|
"valid_targets_mean": 2592.6,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 2.991935483870968,
|
|
"grad_norm": 1.020245504023328,
|
|
"learning_rate": 2.8313620750296266e-05,
|
|
"loss": 0.2333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23064059019088745,
|
|
"step": 1855,
|
|
"valid_targets_mean": 2314.6,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 0.7898572933465332,
|
|
"learning_rate": 2.824040186888213e-05,
|
|
"loss": 0.2426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2544041574001312,
|
|
"step": 1860,
|
|
"valid_targets_mean": 2863.7,
|
|
"valid_targets_min": 241
|
|
},
|
|
{
|
|
"epoch": 3.0080645161290325,
|
|
"grad_norm": 0.9534897094748244,
|
|
"learning_rate": 2.8167049720326387e-05,
|
|
"loss": 0.189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22409482300281525,
|
|
"step": 1865,
|
|
"valid_targets_mean": 2411.8,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 3.0161290322580645,
|
|
"grad_norm": 0.7374737406048381,
|
|
"learning_rate": 2.809356549090992e-05,
|
|
"loss": 0.2175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23597770929336548,
|
|
"step": 1870,
|
|
"valid_targets_mean": 3534.6,
|
|
"valid_targets_min": 376
|
|
},
|
|
{
|
|
"epoch": 3.024193548387097,
|
|
"grad_norm": 0.8722700645784663,
|
|
"learning_rate": 2.801995036904968e-05,
|
|
"loss": 0.215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20545664429664612,
|
|
"step": 1875,
|
|
"valid_targets_mean": 3000.0,
|
|
"valid_targets_min": 201
|
|
},
|
|
{
|
|
"epoch": 3.032258064516129,
|
|
"grad_norm": 0.8788859146998531,
|
|
"learning_rate": 2.794620554527945e-05,
|
|
"loss": 0.1981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18698415160179138,
|
|
"step": 1880,
|
|
"valid_targets_mean": 2358.8,
|
|
"valid_targets_min": 315
|
|
},
|
|
{
|
|
"epoch": 3.0403225806451615,
|
|
"grad_norm": 0.9602484570261894,
|
|
"learning_rate": 2.7872332212230612e-05,
|
|
"loss": 0.2125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2208431363105774,
|
|
"step": 1885,
|
|
"valid_targets_mean": 2309.4,
|
|
"valid_targets_min": 187
|
|
},
|
|
{
|
|
"epoch": 3.0483870967741935,
|
|
"grad_norm": 0.9273577858171649,
|
|
"learning_rate": 2.7798331564612856e-05,
|
|
"loss": 0.1935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1678655594587326,
|
|
"step": 1890,
|
|
"valid_targets_mean": 2861.1,
|
|
"valid_targets_min": 399
|
|
},
|
|
{
|
|
"epoch": 3.056451612903226,
|
|
"grad_norm": 0.9051561147472552,
|
|
"learning_rate": 2.7724204799194846e-05,
|
|
"loss": 0.2087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22073279321193695,
|
|
"step": 1895,
|
|
"valid_targets_mean": 3054.1,
|
|
"valid_targets_min": 236
|
|
},
|
|
{
|
|
"epoch": 3.064516129032258,
|
|
"grad_norm": 0.8070147641711335,
|
|
"learning_rate": 2.764995311478489e-05,
|
|
"loss": 0.1923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1582649052143097,
|
|
"step": 1900,
|
|
"valid_targets_mean": 2764.9,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 3.0725806451612905,
|
|
"grad_norm": 0.9114545153406952,
|
|
"learning_rate": 2.7575577712211524e-05,
|
|
"loss": 0.1895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2086646854877472,
|
|
"step": 1905,
|
|
"valid_targets_mean": 2314.6,
|
|
"valid_targets_min": 197
|
|
},
|
|
{
|
|
"epoch": 3.0806451612903225,
|
|
"grad_norm": 0.7922195540505073,
|
|
"learning_rate": 2.7501079794304102e-05,
|
|
"loss": 0.2036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17211270332336426,
|
|
"step": 1910,
|
|
"valid_targets_mean": 3543.8,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 3.088709677419355,
|
|
"grad_norm": 0.9763578816994177,
|
|
"learning_rate": 2.742646056587336e-05,
|
|
"loss": 0.1971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19247817993164062,
|
|
"step": 1915,
|
|
"valid_targets_mean": 2094.7,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 3.096774193548387,
|
|
"grad_norm": 0.8588682892525076,
|
|
"learning_rate": 2.7351721233691906e-05,
|
|
"loss": 0.2048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1990266740322113,
|
|
"step": 1920,
|
|
"valid_targets_mean": 2593.0,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 3.1048387096774195,
|
|
"grad_norm": 0.8528299386276436,
|
|
"learning_rate": 2.7276863006474715e-05,
|
|
"loss": 0.207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20858368277549744,
|
|
"step": 1925,
|
|
"valid_targets_mean": 3191.8,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 3.1129032258064515,
|
|
"grad_norm": 0.8593930859631348,
|
|
"learning_rate": 2.7201887094859588e-05,
|
|
"loss": 0.2008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16805998980998993,
|
|
"step": 1930,
|
|
"valid_targets_mean": 2786.5,
|
|
"valid_targets_min": 370
|
|
},
|
|
{
|
|
"epoch": 3.120967741935484,
|
|
"grad_norm": 1.0202261065016798,
|
|
"learning_rate": 2.7126794711387566e-05,
|
|
"loss": 0.2134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2062893509864807,
|
|
"step": 1935,
|
|
"valid_targets_mean": 2353.9,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 3.129032258064516,
|
|
"grad_norm": 0.8097332477846493,
|
|
"learning_rate": 2.7051587070483307e-05,
|
|
"loss": 0.1883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2042836844921112,
|
|
"step": 1940,
|
|
"valid_targets_mean": 3530.8,
|
|
"valid_targets_min": 242
|
|
},
|
|
{
|
|
"epoch": 3.1370967741935485,
|
|
"grad_norm": 0.90091698943588,
|
|
"learning_rate": 2.6976265388435475e-05,
|
|
"loss": 0.1811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1659466028213501,
|
|
"step": 1945,
|
|
"valid_targets_mean": 2494.7,
|
|
"valid_targets_min": 197
|
|
},
|
|
{
|
|
"epoch": 3.1451612903225805,
|
|
"grad_norm": 0.7852846721132106,
|
|
"learning_rate": 2.6900830883377044e-05,
|
|
"loss": 0.2123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20571090281009674,
|
|
"step": 1950,
|
|
"valid_targets_mean": 3104.9,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 3.153225806451613,
|
|
"grad_norm": 0.8830153152019784,
|
|
"learning_rate": 2.682528477526562e-05,
|
|
"loss": 0.2159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23456811904907227,
|
|
"step": 1955,
|
|
"valid_targets_mean": 2453.4,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 3.161290322580645,
|
|
"grad_norm": 0.8231207099243653,
|
|
"learning_rate": 2.6749628285863688e-05,
|
|
"loss": 0.2076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19091397523880005,
|
|
"step": 1960,
|
|
"valid_targets_mean": 2809.3,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 3.1693548387096775,
|
|
"grad_norm": 0.6654756671657087,
|
|
"learning_rate": 2.6673862638718856e-05,
|
|
"loss": 0.1992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17690053582191467,
|
|
"step": 1965,
|
|
"valid_targets_mean": 4336.9,
|
|
"valid_targets_min": 885
|
|
},
|
|
{
|
|
"epoch": 3.1774193548387095,
|
|
"grad_norm": 0.9569640993319863,
|
|
"learning_rate": 2.6597989059144098e-05,
|
|
"loss": 0.1943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19659510254859924,
|
|
"step": 1970,
|
|
"valid_targets_mean": 2791.4,
|
|
"valid_targets_min": 304
|
|
},
|
|
{
|
|
"epoch": 3.185483870967742,
|
|
"grad_norm": 0.9637699341160658,
|
|
"learning_rate": 2.6522008774197902e-05,
|
|
"loss": 0.2084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19137433171272278,
|
|
"step": 1975,
|
|
"valid_targets_mean": 2510.5,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 3.193548387096774,
|
|
"grad_norm": 0.7597222910475057,
|
|
"learning_rate": 2.6445923012664437e-05,
|
|
"loss": 0.1915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20159660279750824,
|
|
"step": 1980,
|
|
"valid_targets_mean": 3181.9,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 3.2016129032258065,
|
|
"grad_norm": 0.9383137355124391,
|
|
"learning_rate": 2.6369733005033693e-05,
|
|
"loss": 0.2112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21307817101478577,
|
|
"step": 1985,
|
|
"valid_targets_mean": 2544.0,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 3.2096774193548385,
|
|
"grad_norm": 0.7566507682047693,
|
|
"learning_rate": 2.629343998348155e-05,
|
|
"loss": 0.1794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16361549496650696,
|
|
"step": 1990,
|
|
"valid_targets_mean": 3088.2,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 3.217741935483871,
|
|
"grad_norm": 0.8513524558760817,
|
|
"learning_rate": 2.6217045181849906e-05,
|
|
"loss": 0.1884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20082426071166992,
|
|
"step": 1995,
|
|
"valid_targets_mean": 3377.9,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 3.225806451612903,
|
|
"grad_norm": 0.9661078532137528,
|
|
"learning_rate": 2.6140549835626645e-05,
|
|
"loss": 0.1839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17874982953071594,
|
|
"step": 2000,
|
|
"valid_targets_mean": 2210.6,
|
|
"valid_targets_min": 179
|
|
},
|
|
{
|
|
"epoch": 3.2338709677419355,
|
|
"grad_norm": 1.0029811530888848,
|
|
"learning_rate": 2.6063955181925736e-05,
|
|
"loss": 0.2063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21504683792591095,
|
|
"step": 2005,
|
|
"valid_targets_mean": 2418.2,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 3.241935483870968,
|
|
"grad_norm": 0.740328148940221,
|
|
"learning_rate": 2.5987262459467168e-05,
|
|
"loss": 0.1964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1947890818119049,
|
|
"step": 2010,
|
|
"valid_targets_mean": 3269.2,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 3.25,
|
|
"grad_norm": 0.9564294077785219,
|
|
"learning_rate": 2.5910472908556933e-05,
|
|
"loss": 0.1875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18946582078933716,
|
|
"step": 2015,
|
|
"valid_targets_mean": 2550.6,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 3.258064516129032,
|
|
"grad_norm": 0.8614872467627219,
|
|
"learning_rate": 2.5833587771067e-05,
|
|
"loss": 0.2055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21252858638763428,
|
|
"step": 2020,
|
|
"valid_targets_mean": 2838.5,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 3.2661290322580645,
|
|
"grad_norm": 1.0344025276091713,
|
|
"learning_rate": 2.5756608290415185e-05,
|
|
"loss": 0.1891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20546601712703705,
|
|
"step": 2025,
|
|
"valid_targets_mean": 2222.2,
|
|
"valid_targets_min": 354
|
|
},
|
|
{
|
|
"epoch": 3.274193548387097,
|
|
"grad_norm": 0.971722056807594,
|
|
"learning_rate": 2.5679535711545053e-05,
|
|
"loss": 0.1996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19733572006225586,
|
|
"step": 2030,
|
|
"valid_targets_mean": 2713.2,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 3.282258064516129,
|
|
"grad_norm": 0.7248468692882492,
|
|
"learning_rate": 2.56023712809058e-05,
|
|
"loss": 0.1924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18412882089614868,
|
|
"step": 2035,
|
|
"valid_targets_mean": 3494.7,
|
|
"valid_targets_min": 280
|
|
},
|
|
{
|
|
"epoch": 3.2903225806451615,
|
|
"grad_norm": 0.7677047493428087,
|
|
"learning_rate": 2.552511624643209e-05,
|
|
"loss": 0.1768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20295536518096924,
|
|
"step": 2040,
|
|
"valid_targets_mean": 2958.4,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 3.2983870967741935,
|
|
"grad_norm": 0.8848516236640487,
|
|
"learning_rate": 2.5447771857523868e-05,
|
|
"loss": 0.1945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18372538685798645,
|
|
"step": 2045,
|
|
"valid_targets_mean": 3044.1,
|
|
"valid_targets_min": 450
|
|
},
|
|
{
|
|
"epoch": 3.306451612903226,
|
|
"grad_norm": 0.8776931117788924,
|
|
"learning_rate": 2.5370339365026145e-05,
|
|
"loss": 0.1844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15434062480926514,
|
|
"step": 2050,
|
|
"valid_targets_mean": 2974.8,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 3.314516129032258,
|
|
"grad_norm": 0.9669142889375918,
|
|
"learning_rate": 2.5292820021208794e-05,
|
|
"loss": 0.2034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16768679022789001,
|
|
"step": 2055,
|
|
"valid_targets_mean": 2212.3,
|
|
"valid_targets_min": 162
|
|
},
|
|
{
|
|
"epoch": 3.3225806451612905,
|
|
"grad_norm": 0.9213260899962259,
|
|
"learning_rate": 2.5215215079746268e-05,
|
|
"loss": 0.1918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16212767362594604,
|
|
"step": 2060,
|
|
"valid_targets_mean": 2776.0,
|
|
"valid_targets_min": 975
|
|
},
|
|
{
|
|
"epoch": 3.3306451612903225,
|
|
"grad_norm": 0.8582376416098897,
|
|
"learning_rate": 2.5137525795697356e-05,
|
|
"loss": 0.1783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1788593977689743,
|
|
"step": 2065,
|
|
"valid_targets_mean": 2802.6,
|
|
"valid_targets_min": 257
|
|
},
|
|
{
|
|
"epoch": 3.338709677419355,
|
|
"grad_norm": 0.8937478030413571,
|
|
"learning_rate": 2.5059753425484858e-05,
|
|
"loss": 0.2178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19318518042564392,
|
|
"step": 2070,
|
|
"valid_targets_mean": 2988.1,
|
|
"valid_targets_min": 341
|
|
},
|
|
{
|
|
"epoch": 3.346774193548387,
|
|
"grad_norm": 0.9864069360790093,
|
|
"learning_rate": 2.4981899226875274e-05,
|
|
"loss": 0.1931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19566687941551208,
|
|
"step": 2075,
|
|
"valid_targets_mean": 2498.1,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 3.3548387096774195,
|
|
"grad_norm": 0.9408224206977921,
|
|
"learning_rate": 2.490396445895849e-05,
|
|
"loss": 0.1805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18655025959014893,
|
|
"step": 2080,
|
|
"valid_targets_mean": 2473.8,
|
|
"valid_targets_min": 283
|
|
},
|
|
{
|
|
"epoch": 3.3629032258064515,
|
|
"grad_norm": 0.7550861749072267,
|
|
"learning_rate": 2.4825950382127356e-05,
|
|
"loss": 0.1933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1983252614736557,
|
|
"step": 2085,
|
|
"valid_targets_mean": 3727.8,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 3.370967741935484,
|
|
"grad_norm": 1.0033507667264645,
|
|
"learning_rate": 2.4747858258057365e-05,
|
|
"loss": 0.1698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16870397329330444,
|
|
"step": 2090,
|
|
"valid_targets_mean": 2409.5,
|
|
"valid_targets_min": 233
|
|
},
|
|
{
|
|
"epoch": 3.379032258064516,
|
|
"grad_norm": 0.9385609794781297,
|
|
"learning_rate": 2.4669689349686224e-05,
|
|
"loss": 0.1816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19624167680740356,
|
|
"step": 2095,
|
|
"valid_targets_mean": 2369.9,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 3.3870967741935485,
|
|
"grad_norm": 0.903084755532927,
|
|
"learning_rate": 2.4591444921193396e-05,
|
|
"loss": 0.1884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18280445039272308,
|
|
"step": 2100,
|
|
"valid_targets_mean": 2695.1,
|
|
"valid_targets_min": 359
|
|
},
|
|
{
|
|
"epoch": 3.3951612903225805,
|
|
"grad_norm": 0.7788473027521549,
|
|
"learning_rate": 2.4513126237979723e-05,
|
|
"loss": 0.1952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19049371778964996,
|
|
"step": 2105,
|
|
"valid_targets_mean": 2789.4,
|
|
"valid_targets_min": 421
|
|
},
|
|
{
|
|
"epoch": 3.403225806451613,
|
|
"grad_norm": 0.9689335047615265,
|
|
"learning_rate": 2.4434734566646903e-05,
|
|
"loss": 0.1999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2175639420747757,
|
|
"step": 2110,
|
|
"valid_targets_mean": 2601.1,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 3.411290322580645,
|
|
"grad_norm": 1.0354117280139254,
|
|
"learning_rate": 2.435627117497703e-05,
|
|
"loss": 0.2054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22621501982212067,
|
|
"step": 2115,
|
|
"valid_targets_mean": 2305.3,
|
|
"valid_targets_min": 200
|
|
},
|
|
{
|
|
"epoch": 3.4193548387096775,
|
|
"grad_norm": 0.9839717792896802,
|
|
"learning_rate": 2.4277737331912104e-05,
|
|
"loss": 0.2043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2071266919374466,
|
|
"step": 2120,
|
|
"valid_targets_mean": 2344.4,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 3.4274193548387095,
|
|
"grad_norm": 1.0772950034693336,
|
|
"learning_rate": 2.419913430753347e-05,
|
|
"loss": 0.1838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2085425853729248,
|
|
"step": 2125,
|
|
"valid_targets_mean": 2838.8,
|
|
"valid_targets_min": 179
|
|
},
|
|
{
|
|
"epoch": 3.435483870967742,
|
|
"grad_norm": 0.971668327387246,
|
|
"learning_rate": 2.412046337304131e-05,
|
|
"loss": 0.2048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20770132541656494,
|
|
"step": 2130,
|
|
"valid_targets_mean": 2340.5,
|
|
"valid_targets_min": 254
|
|
},
|
|
{
|
|
"epoch": 3.443548387096774,
|
|
"grad_norm": 0.8314074706199648,
|
|
"learning_rate": 2.404172580073409e-05,
|
|
"loss": 0.1898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18518871068954468,
|
|
"step": 2135,
|
|
"valid_targets_mean": 3152.2,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 3.4516129032258065,
|
|
"grad_norm": 0.9437766720043008,
|
|
"learning_rate": 2.3962922863987956e-05,
|
|
"loss": 0.2004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21757878363132477,
|
|
"step": 2140,
|
|
"valid_targets_mean": 2376.1,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 3.4596774193548385,
|
|
"grad_norm": 0.9096819186271492,
|
|
"learning_rate": 2.388405583723615e-05,
|
|
"loss": 0.1858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2121957242488861,
|
|
"step": 2145,
|
|
"valid_targets_mean": 2802.9,
|
|
"valid_targets_min": 388
|
|
},
|
|
{
|
|
"epoch": 3.467741935483871,
|
|
"grad_norm": 0.9545719417678955,
|
|
"learning_rate": 2.3805125995948422e-05,
|
|
"loss": 0.1845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19185297191143036,
|
|
"step": 2150,
|
|
"valid_targets_mean": 2206.1,
|
|
"valid_targets_min": 303
|
|
},
|
|
{
|
|
"epoch": 3.475806451612903,
|
|
"grad_norm": 0.9934829200272315,
|
|
"learning_rate": 2.3726134616610366e-05,
|
|
"loss": 0.1924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22792363166809082,
|
|
"step": 2155,
|
|
"valid_targets_mean": 2449.7,
|
|
"valid_targets_min": 288
|
|
},
|
|
{
|
|
"epoch": 3.4838709677419355,
|
|
"grad_norm": 0.8987024317793929,
|
|
"learning_rate": 2.3647082976702805e-05,
|
|
"loss": 0.1911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19362598657608032,
|
|
"step": 2160,
|
|
"valid_targets_mean": 2890.2,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 3.491935483870968,
|
|
"grad_norm": 1.0048177851858588,
|
|
"learning_rate": 2.3567972354681113e-05,
|
|
"loss": 0.189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1506122648715973,
|
|
"step": 2165,
|
|
"valid_targets_mean": 2496.9,
|
|
"valid_targets_min": 190
|
|
},
|
|
{
|
|
"epoch": 3.5,
|
|
"grad_norm": 0.8984785785355409,
|
|
"learning_rate": 2.348880402995456e-05,
|
|
"loss": 0.1839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2214203178882599,
|
|
"step": 2170,
|
|
"valid_targets_mean": 2721.9,
|
|
"valid_targets_min": 168
|
|
},
|
|
{
|
|
"epoch": 3.508064516129032,
|
|
"grad_norm": 1.2049098361133423,
|
|
"learning_rate": 2.3409579282865592e-05,
|
|
"loss": 0.1689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14519715309143066,
|
|
"step": 2175,
|
|
"valid_targets_mean": 2268.5,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 3.5161290322580645,
|
|
"grad_norm": 1.091697578616202,
|
|
"learning_rate": 2.3330299394669144e-05,
|
|
"loss": 0.1835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1759229302406311,
|
|
"step": 2180,
|
|
"valid_targets_mean": 2581.4,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 3.524193548387097,
|
|
"grad_norm": 0.9271981066443962,
|
|
"learning_rate": 2.325096564751193e-05,
|
|
"loss": 0.1749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20771491527557373,
|
|
"step": 2185,
|
|
"valid_targets_mean": 2512.8,
|
|
"valid_targets_min": 326
|
|
},
|
|
{
|
|
"epoch": 3.532258064516129,
|
|
"grad_norm": 0.8937137079195363,
|
|
"learning_rate": 2.317157932441167e-05,
|
|
"loss": 0.1898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18069633841514587,
|
|
"step": 2190,
|
|
"valid_targets_mean": 2611.8,
|
|
"valid_targets_min": 173
|
|
},
|
|
{
|
|
"epoch": 3.540322580645161,
|
|
"grad_norm": 1.0297894552097773,
|
|
"learning_rate": 2.3092141709236388e-05,
|
|
"loss": 0.1841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18333375453948975,
|
|
"step": 2195,
|
|
"valid_targets_mean": 2214.9,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 3.5483870967741935,
|
|
"grad_norm": 0.8288567586178099,
|
|
"learning_rate": 2.3012654086683605e-05,
|
|
"loss": 0.1931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17873722314834595,
|
|
"step": 2200,
|
|
"valid_targets_mean": 3754.8,
|
|
"valid_targets_min": 259
|
|
},
|
|
{
|
|
"epoch": 3.556451612903226,
|
|
"grad_norm": 0.7421628361720626,
|
|
"learning_rate": 2.293311774225958e-05,
|
|
"loss": 0.183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1540219783782959,
|
|
"step": 2205,
|
|
"valid_targets_mean": 3208.2,
|
|
"valid_targets_min": 630
|
|
},
|
|
{
|
|
"epoch": 3.564516129032258,
|
|
"grad_norm": 0.9467491696705914,
|
|
"learning_rate": 2.2853533962258547e-05,
|
|
"loss": 0.1948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15216803550720215,
|
|
"step": 2210,
|
|
"valid_targets_mean": 2432.2,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 3.5725806451612905,
|
|
"grad_norm": 0.9113348265831415,
|
|
"learning_rate": 2.277390403374186e-05,
|
|
"loss": 0.1904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18010471761226654,
|
|
"step": 2215,
|
|
"valid_targets_mean": 3006.2,
|
|
"valid_targets_min": 489
|
|
},
|
|
{
|
|
"epoch": 3.5806451612903225,
|
|
"grad_norm": 0.8751941483576071,
|
|
"learning_rate": 2.2694229244517226e-05,
|
|
"loss": 0.196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18321330845355988,
|
|
"step": 2220,
|
|
"valid_targets_mean": 2741.6,
|
|
"valid_targets_min": 319
|
|
},
|
|
{
|
|
"epoch": 3.588709677419355,
|
|
"grad_norm": 0.7674248086127234,
|
|
"learning_rate": 2.2614510883117834e-05,
|
|
"loss": 0.1759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1977037489414215,
|
|
"step": 2225,
|
|
"valid_targets_mean": 4118.6,
|
|
"valid_targets_min": 835
|
|
},
|
|
{
|
|
"epoch": 3.596774193548387,
|
|
"grad_norm": 1.090115502700135,
|
|
"learning_rate": 2.2534750238781566e-05,
|
|
"loss": 0.1808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17552846670150757,
|
|
"step": 2230,
|
|
"valid_targets_mean": 2655.6,
|
|
"valid_targets_min": 185
|
|
},
|
|
{
|
|
"epoch": 3.6048387096774195,
|
|
"grad_norm": 0.8291330830337824,
|
|
"learning_rate": 2.2454948601430112e-05,
|
|
"loss": 0.1869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.158736914396286,
|
|
"step": 2235,
|
|
"valid_targets_mean": 3359.7,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 3.6129032258064515,
|
|
"grad_norm": 0.9780479579592944,
|
|
"learning_rate": 2.2375107261648102e-05,
|
|
"loss": 0.1747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1751236617565155,
|
|
"step": 2240,
|
|
"valid_targets_mean": 2478.9,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 3.620967741935484,
|
|
"grad_norm": 0.9808513975303134,
|
|
"learning_rate": 2.229522751066228e-05,
|
|
"loss": 0.1729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16816920042037964,
|
|
"step": 2245,
|
|
"valid_targets_mean": 2377.1,
|
|
"valid_targets_min": 427
|
|
},
|
|
{
|
|
"epoch": 3.629032258064516,
|
|
"grad_norm": 0.7616802773706063,
|
|
"learning_rate": 2.2215310640320555e-05,
|
|
"loss": 0.1954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20205530524253845,
|
|
"step": 2250,
|
|
"valid_targets_mean": 3998.6,
|
|
"valid_targets_min": 178
|
|
},
|
|
{
|
|
"epoch": 3.6370967741935485,
|
|
"grad_norm": 0.760427852993998,
|
|
"learning_rate": 2.213535794307118e-05,
|
|
"loss": 0.1733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20245593786239624,
|
|
"step": 2255,
|
|
"valid_targets_mean": 3613.8,
|
|
"valid_targets_min": 224
|
|
},
|
|
{
|
|
"epoch": 3.6451612903225805,
|
|
"grad_norm": 1.3939312222087392,
|
|
"learning_rate": 2.2055370711941797e-05,
|
|
"loss": 0.1662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1684255301952362,
|
|
"step": 2260,
|
|
"valid_targets_mean": 1981.3,
|
|
"valid_targets_min": 177
|
|
},
|
|
{
|
|
"epoch": 3.653225806451613,
|
|
"grad_norm": 0.7787065285506174,
|
|
"learning_rate": 2.1975350240518542e-05,
|
|
"loss": 0.2004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17972789704799652,
|
|
"step": 2265,
|
|
"valid_targets_mean": 3220.9,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 3.661290322580645,
|
|
"grad_norm": 0.904004001790419,
|
|
"learning_rate": 2.1895297822925138e-05,
|
|
"loss": 0.1847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20349371433258057,
|
|
"step": 2270,
|
|
"valid_targets_mean": 2432.5,
|
|
"valid_targets_min": 214
|
|
},
|
|
{
|
|
"epoch": 3.6693548387096775,
|
|
"grad_norm": 0.8971620709202416,
|
|
"learning_rate": 2.1815214753801944e-05,
|
|
"loss": 0.1736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16575834155082703,
|
|
"step": 2275,
|
|
"valid_targets_mean": 3270.8,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 3.6774193548387095,
|
|
"grad_norm": 0.9923053405017076,
|
|
"learning_rate": 2.173510232828504e-05,
|
|
"loss": 0.1798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18248391151428223,
|
|
"step": 2280,
|
|
"valid_targets_mean": 2636.6,
|
|
"valid_targets_min": 236
|
|
},
|
|
{
|
|
"epoch": 3.685483870967742,
|
|
"grad_norm": 0.7937177157490851,
|
|
"learning_rate": 2.1654961841985256e-05,
|
|
"loss": 0.1948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1414434015750885,
|
|
"step": 2285,
|
|
"valid_targets_mean": 2846.7,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 3.693548387096774,
|
|
"grad_norm": 1.0781458555165373,
|
|
"learning_rate": 2.157479459096724e-05,
|
|
"loss": 0.1659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19631700217723846,
|
|
"step": 2290,
|
|
"valid_targets_mean": 2468.4,
|
|
"valid_targets_min": 192
|
|
},
|
|
{
|
|
"epoch": 3.7016129032258065,
|
|
"grad_norm": 0.9443566131745327,
|
|
"learning_rate": 2.149460187172849e-05,
|
|
"loss": 0.1762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2260238379240036,
|
|
"step": 2295,
|
|
"valid_targets_mean": 3034.2,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 3.709677419354839,
|
|
"grad_norm": 0.9201775098877073,
|
|
"learning_rate": 2.1414384981178377e-05,
|
|
"loss": 0.1775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15513110160827637,
|
|
"step": 2300,
|
|
"valid_targets_mean": 3192.1,
|
|
"valid_targets_min": 198
|
|
},
|
|
{
|
|
"epoch": 3.717741935483871,
|
|
"grad_norm": 0.8862845459040455,
|
|
"learning_rate": 2.13341452166172e-05,
|
|
"loss": 0.1652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15558579564094543,
|
|
"step": 2305,
|
|
"valid_targets_mean": 2466.9,
|
|
"valid_targets_min": 317
|
|
},
|
|
{
|
|
"epoch": 3.725806451612903,
|
|
"grad_norm": 0.9730504692851871,
|
|
"learning_rate": 2.125388387571517e-05,
|
|
"loss": 0.1859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1859060823917389,
|
|
"step": 2310,
|
|
"valid_targets_mean": 3241.9,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 3.7338709677419355,
|
|
"grad_norm": 0.9806915956145613,
|
|
"learning_rate": 2.1173602256491438e-05,
|
|
"loss": 0.1798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17069466412067413,
|
|
"step": 2315,
|
|
"valid_targets_mean": 2448.5,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 3.741935483870968,
|
|
"grad_norm": 0.9186741972263072,
|
|
"learning_rate": 2.109330165729311e-05,
|
|
"loss": 0.1795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1644703447818756,
|
|
"step": 2320,
|
|
"valid_targets_mean": 2511.8,
|
|
"valid_targets_min": 201
|
|
},
|
|
{
|
|
"epoch": 3.75,
|
|
"grad_norm": 0.9780766439134044,
|
|
"learning_rate": 2.1012983376774255e-05,
|
|
"loss": 0.1744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15557001531124115,
|
|
"step": 2325,
|
|
"valid_targets_mean": 2074.3,
|
|
"valid_targets_min": 198
|
|
},
|
|
{
|
|
"epoch": 3.758064516129032,
|
|
"grad_norm": 0.9402765490215035,
|
|
"learning_rate": 2.0932648713874873e-05,
|
|
"loss": 0.1835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16606613993644714,
|
|
"step": 2330,
|
|
"valid_targets_mean": 2039.1,
|
|
"valid_targets_min": 187
|
|
},
|
|
{
|
|
"epoch": 3.7661290322580645,
|
|
"grad_norm": 0.953144594728451,
|
|
"learning_rate": 2.0852298967799915e-05,
|
|
"loss": 0.1732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14848968386650085,
|
|
"step": 2335,
|
|
"valid_targets_mean": 3320.4,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 3.774193548387097,
|
|
"grad_norm": 0.9513406439960583,
|
|
"learning_rate": 2.0771935437998256e-05,
|
|
"loss": 0.1878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18800488114356995,
|
|
"step": 2340,
|
|
"valid_targets_mean": 2408.1,
|
|
"valid_targets_min": 302
|
|
},
|
|
{
|
|
"epoch": 3.782258064516129,
|
|
"grad_norm": 1.1110665348627655,
|
|
"learning_rate": 2.0691559424141694e-05,
|
|
"loss": 0.1671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2009063959121704,
|
|
"step": 2345,
|
|
"valid_targets_mean": 2128.6,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 3.790322580645161,
|
|
"grad_norm": 0.8202676986615219,
|
|
"learning_rate": 2.0611172226103936e-05,
|
|
"loss": 0.1905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18475189805030823,
|
|
"step": 2350,
|
|
"valid_targets_mean": 2913.4,
|
|
"valid_targets_min": 321
|
|
},
|
|
{
|
|
"epoch": 3.7983870967741935,
|
|
"grad_norm": 0.8720509305927525,
|
|
"learning_rate": 2.0530775143939536e-05,
|
|
"loss": 0.1761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18788838386535645,
|
|
"step": 2355,
|
|
"valid_targets_mean": 3561.0,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 3.806451612903226,
|
|
"grad_norm": 1.0076000451202138,
|
|
"learning_rate": 2.0450369477862922e-05,
|
|
"loss": 0.187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1766311377286911,
|
|
"step": 2360,
|
|
"valid_targets_mean": 2529.4,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 3.814516129032258,
|
|
"grad_norm": 0.9439834230873386,
|
|
"learning_rate": 2.036995652822734e-05,
|
|
"loss": 0.1786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17991912364959717,
|
|
"step": 2365,
|
|
"valid_targets_mean": 2873.6,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 3.8225806451612905,
|
|
"grad_norm": 0.960271797917251,
|
|
"learning_rate": 2.028953759550381e-05,
|
|
"loss": 0.1767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1966097056865692,
|
|
"step": 2370,
|
|
"valid_targets_mean": 2673.1,
|
|
"valid_targets_min": 1006
|
|
},
|
|
{
|
|
"epoch": 3.8306451612903225,
|
|
"grad_norm": 0.9676694617928986,
|
|
"learning_rate": 2.0209113980260146e-05,
|
|
"loss": 0.1979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2254253327846527,
|
|
"step": 2375,
|
|
"valid_targets_mean": 2678.8,
|
|
"valid_targets_min": 286
|
|
},
|
|
{
|
|
"epoch": 3.838709677419355,
|
|
"grad_norm": 0.9709025313710058,
|
|
"learning_rate": 2.012868698313985e-05,
|
|
"loss": 0.1653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15920354425907135,
|
|
"step": 2380,
|
|
"valid_targets_mean": 2462.2,
|
|
"valid_targets_min": 288
|
|
},
|
|
{
|
|
"epoch": 3.846774193548387,
|
|
"grad_norm": 0.7971481849819358,
|
|
"learning_rate": 2.0048257904841165e-05,
|
|
"loss": 0.176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1681784987449646,
|
|
"step": 2385,
|
|
"valid_targets_mean": 3441.0,
|
|
"valid_targets_min": 187
|
|
},
|
|
{
|
|
"epoch": 3.8548387096774195,
|
|
"grad_norm": 0.882203199287911,
|
|
"learning_rate": 1.9967828046095945e-05,
|
|
"loss": 0.1574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17107778787612915,
|
|
"step": 2390,
|
|
"valid_targets_mean": 2388.1,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 3.8629032258064515,
|
|
"grad_norm": 0.990121988816984,
|
|
"learning_rate": 1.988739870764869e-05,
|
|
"loss": 0.1857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2109382152557373,
|
|
"step": 2395,
|
|
"valid_targets_mean": 2354.2,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 3.870967741935484,
|
|
"grad_norm": 0.909797611805979,
|
|
"learning_rate": 1.9806971190235485e-05,
|
|
"loss": 0.1579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1638723909854889,
|
|
"step": 2400,
|
|
"valid_targets_mean": 2547.6,
|
|
"valid_targets_min": 197
|
|
},
|
|
{
|
|
"epoch": 3.879032258064516,
|
|
"grad_norm": 0.9046926257274428,
|
|
"learning_rate": 1.972654679456295e-05,
|
|
"loss": 0.1661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15158769488334656,
|
|
"step": 2405,
|
|
"valid_targets_mean": 2900.1,
|
|
"valid_targets_min": 1173
|
|
},
|
|
{
|
|
"epoch": 3.8870967741935485,
|
|
"grad_norm": 0.9262562242765543,
|
|
"learning_rate": 1.9646126821287245e-05,
|
|
"loss": 0.1872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18479233980178833,
|
|
"step": 2410,
|
|
"valid_targets_mean": 2655.8,
|
|
"valid_targets_min": 465
|
|
},
|
|
{
|
|
"epoch": 3.8951612903225805,
|
|
"grad_norm": 0.9062335666560299,
|
|
"learning_rate": 1.9565712570992988e-05,
|
|
"loss": 0.1863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16763897240161896,
|
|
"step": 2415,
|
|
"valid_targets_mean": 2485.1,
|
|
"valid_targets_min": 302
|
|
},
|
|
{
|
|
"epoch": 3.903225806451613,
|
|
"grad_norm": 1.0026482765115579,
|
|
"learning_rate": 1.948530534417225e-05,
|
|
"loss": 0.1651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1325940042734146,
|
|
"step": 2420,
|
|
"valid_targets_mean": 2590.0,
|
|
"valid_targets_min": 476
|
|
},
|
|
{
|
|
"epoch": 3.911290322580645,
|
|
"grad_norm": 0.7670726828552455,
|
|
"learning_rate": 1.9404906441203512e-05,
|
|
"loss": 0.1665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17119964957237244,
|
|
"step": 2425,
|
|
"valid_targets_mean": 3634.2,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 3.9193548387096775,
|
|
"grad_norm": 0.8858960406520359,
|
|
"learning_rate": 1.932451716233064e-05,
|
|
"loss": 0.1805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18451717495918274,
|
|
"step": 2430,
|
|
"valid_targets_mean": 3279.6,
|
|
"valid_targets_min": 318
|
|
},
|
|
{
|
|
"epoch": 3.9274193548387095,
|
|
"grad_norm": 0.8702500919389756,
|
|
"learning_rate": 1.9244138807641858e-05,
|
|
"loss": 0.1727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16101489961147308,
|
|
"step": 2435,
|
|
"valid_targets_mean": 2583.7,
|
|
"valid_targets_min": 227
|
|
},
|
|
{
|
|
"epoch": 3.935483870967742,
|
|
"grad_norm": 0.8074574574602585,
|
|
"learning_rate": 1.9163772677048716e-05,
|
|
"loss": 0.1801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1586969792842865,
|
|
"step": 2440,
|
|
"valid_targets_mean": 3215.1,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 3.943548387096774,
|
|
"grad_norm": 0.8579554464034049,
|
|
"learning_rate": 1.9083420070265065e-05,
|
|
"loss": 0.1614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15047268569469452,
|
|
"step": 2445,
|
|
"valid_targets_mean": 3209.3,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 3.9516129032258065,
|
|
"grad_norm": 0.8851002197404696,
|
|
"learning_rate": 1.9003082286786056e-05,
|
|
"loss": 0.1693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1710921674966812,
|
|
"step": 2450,
|
|
"valid_targets_mean": 3043.4,
|
|
"valid_targets_min": 317
|
|
},
|
|
{
|
|
"epoch": 3.959677419354839,
|
|
"grad_norm": 0.9589554291297794,
|
|
"learning_rate": 1.8922760625867114e-05,
|
|
"loss": 0.1844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19913877546787262,
|
|
"step": 2455,
|
|
"valid_targets_mean": 2623.2,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 3.967741935483871,
|
|
"grad_norm": 1.0039056755447395,
|
|
"learning_rate": 1.8842456386502907e-05,
|
|
"loss": 0.1803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18878231942653656,
|
|
"step": 2460,
|
|
"valid_targets_mean": 2495.0,
|
|
"valid_targets_min": 234
|
|
},
|
|
{
|
|
"epoch": 3.975806451612903,
|
|
"grad_norm": 0.9443229433869169,
|
|
"learning_rate": 1.8762170867406366e-05,
|
|
"loss": 0.185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14608219265937805,
|
|
"step": 2465,
|
|
"valid_targets_mean": 2906.6,
|
|
"valid_targets_min": 168
|
|
},
|
|
{
|
|
"epoch": 3.9838709677419355,
|
|
"grad_norm": 0.8830925384899825,
|
|
"learning_rate": 1.868190536698766e-05,
|
|
"loss": 0.1772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1684223711490631,
|
|
"step": 2470,
|
|
"valid_targets_mean": 2887.8,
|
|
"valid_targets_min": 259
|
|
},
|
|
{
|
|
"epoch": 3.991935483870968,
|
|
"grad_norm": 0.9438057833472185,
|
|
"learning_rate": 1.860166118333323e-05,
|
|
"loss": 0.1782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1783444881439209,
|
|
"step": 2475,
|
|
"valid_targets_mean": 2905.9,
|
|
"valid_targets_min": 244
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"grad_norm": 0.8549347840294532,
|
|
"learning_rate": 1.852143961418474e-05,
|
|
"loss": 0.19,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15983647108078003,
|
|
"step": 2480,
|
|
"valid_targets_mean": 3175.4,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 4.008064516129032,
|
|
"grad_norm": 0.8213954710177551,
|
|
"learning_rate": 1.844124195691816e-05,
|
|
"loss": 0.1365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16196708381175995,
|
|
"step": 2485,
|
|
"valid_targets_mean": 3476.1,
|
|
"valid_targets_min": 883
|
|
},
|
|
{
|
|
"epoch": 4.016129032258065,
|
|
"grad_norm": 0.9205252414911493,
|
|
"learning_rate": 1.8361069508522716e-05,
|
|
"loss": 0.1544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17692270874977112,
|
|
"step": 2490,
|
|
"valid_targets_mean": 2647.0,
|
|
"valid_targets_min": 331
|
|
},
|
|
{
|
|
"epoch": 4.024193548387097,
|
|
"grad_norm": 0.9583964752110224,
|
|
"learning_rate": 1.828092356557996e-05,
|
|
"loss": 0.1399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15441617369651794,
|
|
"step": 2495,
|
|
"valid_targets_mean": 2488.5,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 4.032258064516129,
|
|
"grad_norm": 0.8519190550024878,
|
|
"learning_rate": 1.820080542424278e-05,
|
|
"loss": 0.1456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14468294382095337,
|
|
"step": 2500,
|
|
"valid_targets_mean": 3474.1,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 4.040322580645161,
|
|
"grad_norm": 1.174285564619348,
|
|
"learning_rate": 1.812071638021447e-05,
|
|
"loss": 0.1563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14380963146686554,
|
|
"step": 2505,
|
|
"valid_targets_mean": 2758.0,
|
|
"valid_targets_min": 787
|
|
},
|
|
{
|
|
"epoch": 4.048387096774194,
|
|
"grad_norm": 0.6933221366785827,
|
|
"learning_rate": 1.8040657728727714e-05,
|
|
"loss": 0.1391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10148196667432785,
|
|
"step": 2510,
|
|
"valid_targets_mean": 3707.6,
|
|
"valid_targets_min": 440
|
|
},
|
|
{
|
|
"epoch": 4.056451612903226,
|
|
"grad_norm": 1.0045424776080378,
|
|
"learning_rate": 1.7960630764523703e-05,
|
|
"loss": 0.1578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15910252928733826,
|
|
"step": 2515,
|
|
"valid_targets_mean": 2545.7,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 4.064516129032258,
|
|
"grad_norm": 0.9645647150971115,
|
|
"learning_rate": 1.7880636781831148e-05,
|
|
"loss": 0.1472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12726779282093048,
|
|
"step": 2520,
|
|
"valid_targets_mean": 2431.1,
|
|
"valid_targets_min": 267
|
|
},
|
|
{
|
|
"epoch": 4.07258064516129,
|
|
"grad_norm": 1.0650584847710014,
|
|
"learning_rate": 1.7800677074345387e-05,
|
|
"loss": 0.1561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1669185906648636,
|
|
"step": 2525,
|
|
"valid_targets_mean": 2255.6,
|
|
"valid_targets_min": 197
|
|
},
|
|
{
|
|
"epoch": 4.080645161290323,
|
|
"grad_norm": 0.8014420580492201,
|
|
"learning_rate": 1.7720752935207437e-05,
|
|
"loss": 0.157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13192100822925568,
|
|
"step": 2530,
|
|
"valid_targets_mean": 2956.8,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 4.088709677419355,
|
|
"grad_norm": 1.0528253062571888,
|
|
"learning_rate": 1.7640865656983084e-05,
|
|
"loss": 0.1592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1590539664030075,
|
|
"step": 2535,
|
|
"valid_targets_mean": 2633.7,
|
|
"valid_targets_min": 198
|
|
},
|
|
{
|
|
"epoch": 4.096774193548387,
|
|
"grad_norm": 0.819825926972137,
|
|
"learning_rate": 1.7561016531642e-05,
|
|
"loss": 0.1363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12090647220611572,
|
|
"step": 2540,
|
|
"valid_targets_mean": 3194.6,
|
|
"valid_targets_min": 1152
|
|
},
|
|
{
|
|
"epoch": 4.104838709677419,
|
|
"grad_norm": 0.8710203692236937,
|
|
"learning_rate": 1.748120685053681e-05,
|
|
"loss": 0.1421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09878993034362793,
|
|
"step": 2545,
|
|
"valid_targets_mean": 2640.0,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 4.112903225806452,
|
|
"grad_norm": 0.9475084790745687,
|
|
"learning_rate": 1.7401437904382252e-05,
|
|
"loss": 0.1489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14847058057785034,
|
|
"step": 2550,
|
|
"valid_targets_mean": 2831.2,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 4.120967741935484,
|
|
"grad_norm": 0.8944145879033736,
|
|
"learning_rate": 1.7321710983234278e-05,
|
|
"loss": 0.1387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12117449939250946,
|
|
"step": 2555,
|
|
"valid_targets_mean": 4198.2,
|
|
"valid_targets_min": 1225
|
|
},
|
|
{
|
|
"epoch": 4.129032258064516,
|
|
"grad_norm": 1.0346783644721311,
|
|
"learning_rate": 1.7242027376469183e-05,
|
|
"loss": 0.1319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13216251134872437,
|
|
"step": 2560,
|
|
"valid_targets_mean": 2612.6,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 4.137096774193548,
|
|
"grad_norm": 0.8311454135218885,
|
|
"learning_rate": 1.7162388372762775e-05,
|
|
"loss": 0.155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16681070625782013,
|
|
"step": 2565,
|
|
"valid_targets_mean": 3763.9,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 4.145161290322581,
|
|
"grad_norm": 0.8504990995462252,
|
|
"learning_rate": 1.7082795260069515e-05,
|
|
"loss": 0.1437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14639855921268463,
|
|
"step": 2570,
|
|
"valid_targets_mean": 3181.9,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 4.153225806451613,
|
|
"grad_norm": 0.8626500268722279,
|
|
"learning_rate": 1.7003249325601712e-05,
|
|
"loss": 0.1543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11330239474773407,
|
|
"step": 2575,
|
|
"valid_targets_mean": 3376.2,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 4.161290322580645,
|
|
"grad_norm": 0.9725118345608886,
|
|
"learning_rate": 1.6923751855808664e-05,
|
|
"loss": 0.1536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1442527025938034,
|
|
"step": 2580,
|
|
"valid_targets_mean": 2627.1,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 4.169354838709677,
|
|
"grad_norm": 0.8243176172096316,
|
|
"learning_rate": 1.6844304136355894e-05,
|
|
"loss": 0.1655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14019308984279633,
|
|
"step": 2585,
|
|
"valid_targets_mean": 3090.4,
|
|
"valid_targets_min": 370
|
|
},
|
|
{
|
|
"epoch": 4.17741935483871,
|
|
"grad_norm": 1.0032005840119445,
|
|
"learning_rate": 1.6764907452104352e-05,
|
|
"loss": 0.1485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15119823813438416,
|
|
"step": 2590,
|
|
"valid_targets_mean": 2699.6,
|
|
"valid_targets_min": 387
|
|
},
|
|
{
|
|
"epoch": 4.185483870967742,
|
|
"grad_norm": 1.0662008558504592,
|
|
"learning_rate": 1.6685563087089597e-05,
|
|
"loss": 0.1351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15488022565841675,
|
|
"step": 2595,
|
|
"valid_targets_mean": 2234.5,
|
|
"valid_targets_min": 223
|
|
},
|
|
{
|
|
"epoch": 4.193548387096774,
|
|
"grad_norm": 0.7658883255399783,
|
|
"learning_rate": 1.6606272324501087e-05,
|
|
"loss": 0.1577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12042025476694107,
|
|
"step": 2600,
|
|
"valid_targets_mean": 3504.8,
|
|
"valid_targets_min": 255
|
|
},
|
|
{
|
|
"epoch": 4.201612903225806,
|
|
"grad_norm": 0.9561036374269946,
|
|
"learning_rate": 1.6527036446661396e-05,
|
|
"loss": 0.149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14777009189128876,
|
|
"step": 2605,
|
|
"valid_targets_mean": 2596.0,
|
|
"valid_targets_min": 420
|
|
},
|
|
{
|
|
"epoch": 4.209677419354839,
|
|
"grad_norm": 0.8451545374872436,
|
|
"learning_rate": 1.644785673500546e-05,
|
|
"loss": 0.1333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11389363557100296,
|
|
"step": 2610,
|
|
"valid_targets_mean": 2920.2,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 4.217741935483871,
|
|
"grad_norm": 0.8400836779802142,
|
|
"learning_rate": 1.6368734470059902e-05,
|
|
"loss": 0.143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15508580207824707,
|
|
"step": 2615,
|
|
"valid_targets_mean": 3347.5,
|
|
"valid_targets_min": 196
|
|
},
|
|
{
|
|
"epoch": 4.225806451612903,
|
|
"grad_norm": 0.6551631671777359,
|
|
"learning_rate": 1.628967093142226e-05,
|
|
"loss": 0.1559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14567065238952637,
|
|
"step": 2620,
|
|
"valid_targets_mean": 4150.4,
|
|
"valid_targets_min": 225
|
|
},
|
|
{
|
|
"epoch": 4.233870967741935,
|
|
"grad_norm": 0.8808699597452085,
|
|
"learning_rate": 1.6210667397740357e-05,
|
|
"loss": 0.1511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11268693208694458,
|
|
"step": 2625,
|
|
"valid_targets_mean": 2671.8,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 4.241935483870968,
|
|
"grad_norm": 0.9882784553304603,
|
|
"learning_rate": 1.613172514669157e-05,
|
|
"loss": 0.1541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1811019778251648,
|
|
"step": 2630,
|
|
"valid_targets_mean": 2270.1,
|
|
"valid_targets_min": 395
|
|
},
|
|
{
|
|
"epoch": 4.25,
|
|
"grad_norm": 0.8993444666787753,
|
|
"learning_rate": 1.6052845454962195e-05,
|
|
"loss": 0.1405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1350913643836975,
|
|
"step": 2635,
|
|
"valid_targets_mean": 2924.2,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 4.258064516129032,
|
|
"grad_norm": 1.023494377705266,
|
|
"learning_rate": 1.5974029598226796e-05,
|
|
"loss": 0.1414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1555602252483368,
|
|
"step": 2640,
|
|
"valid_targets_mean": 2274.1,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 4.266129032258064,
|
|
"grad_norm": 1.1751275058513142,
|
|
"learning_rate": 1.589527885112758e-05,
|
|
"loss": 0.1658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14619585871696472,
|
|
"step": 2645,
|
|
"valid_targets_mean": 1915.4,
|
|
"valid_targets_min": 190
|
|
},
|
|
{
|
|
"epoch": 4.274193548387097,
|
|
"grad_norm": 0.901822828413539,
|
|
"learning_rate": 1.5816594487253752e-05,
|
|
"loss": 0.1583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16659660637378693,
|
|
"step": 2650,
|
|
"valid_targets_mean": 2559.7,
|
|
"valid_targets_min": 342
|
|
},
|
|
{
|
|
"epoch": 4.282258064516129,
|
|
"grad_norm": 0.9399828906723822,
|
|
"learning_rate": 1.5737977779120957e-05,
|
|
"loss": 0.1194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12059611082077026,
|
|
"step": 2655,
|
|
"valid_targets_mean": 2591.1,
|
|
"valid_targets_min": 232
|
|
},
|
|
{
|
|
"epoch": 4.290322580645161,
|
|
"grad_norm": 0.8885139807180427,
|
|
"learning_rate": 1.5659429998150676e-05,
|
|
"loss": 0.153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14536958932876587,
|
|
"step": 2660,
|
|
"valid_targets_mean": 2856.2,
|
|
"valid_targets_min": 197
|
|
},
|
|
{
|
|
"epoch": 4.298387096774194,
|
|
"grad_norm": 0.8996449209525608,
|
|
"learning_rate": 1.5580952414649683e-05,
|
|
"loss": 0.1442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14367900788784027,
|
|
"step": 2665,
|
|
"valid_targets_mean": 2825.9,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 4.306451612903226,
|
|
"grad_norm": 0.9624290809212261,
|
|
"learning_rate": 1.550254629778947e-05,
|
|
"loss": 0.1332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14275847375392914,
|
|
"step": 2670,
|
|
"valid_targets_mean": 2852.9,
|
|
"valid_targets_min": 288
|
|
},
|
|
{
|
|
"epoch": 4.314516129032258,
|
|
"grad_norm": 0.8606298336940839,
|
|
"learning_rate": 1.5424212915585766e-05,
|
|
"loss": 0.1448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1456950306892395,
|
|
"step": 2675,
|
|
"valid_targets_mean": 3170.6,
|
|
"valid_targets_min": 347
|
|
},
|
|
{
|
|
"epoch": 4.32258064516129,
|
|
"grad_norm": 0.9782182456927098,
|
|
"learning_rate": 1.5345953534877986e-05,
|
|
"loss": 0.1473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12120696902275085,
|
|
"step": 2680,
|
|
"valid_targets_mean": 2613.8,
|
|
"valid_targets_min": 389
|
|
},
|
|
{
|
|
"epoch": 4.330645161290323,
|
|
"grad_norm": 1.1256145540413764,
|
|
"learning_rate": 1.5267769421308765e-05,
|
|
"loss": 0.1307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12931227684020996,
|
|
"step": 2685,
|
|
"valid_targets_mean": 2138.4,
|
|
"valid_targets_min": 209
|
|
},
|
|
{
|
|
"epoch": 4.338709677419355,
|
|
"grad_norm": 0.7892910336204383,
|
|
"learning_rate": 1.5189661839303493e-05,
|
|
"loss": 0.1377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1283065676689148,
|
|
"step": 2690,
|
|
"valid_targets_mean": 3242.8,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 4.346774193548387,
|
|
"grad_norm": 0.9469082380458769,
|
|
"learning_rate": 1.5111632052049872e-05,
|
|
"loss": 0.1521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1405557543039322,
|
|
"step": 2695,
|
|
"valid_targets_mean": 2069.8,
|
|
"valid_targets_min": 323
|
|
},
|
|
{
|
|
"epoch": 4.354838709677419,
|
|
"grad_norm": 1.1477030727242157,
|
|
"learning_rate": 1.5033681321477445e-05,
|
|
"loss": 0.1454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1569700837135315,
|
|
"step": 2700,
|
|
"valid_targets_mean": 2357.4,
|
|
"valid_targets_min": 426
|
|
},
|
|
{
|
|
"epoch": 4.362903225806452,
|
|
"grad_norm": 0.8992719758367996,
|
|
"learning_rate": 1.4955810908237247e-05,
|
|
"loss": 0.1554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14563268423080444,
|
|
"step": 2705,
|
|
"valid_targets_mean": 3360.2,
|
|
"valid_targets_min": 881
|
|
},
|
|
{
|
|
"epoch": 4.370967741935484,
|
|
"grad_norm": 0.7982492596339266,
|
|
"learning_rate": 1.4878022071681368e-05,
|
|
"loss": 0.153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14491286873817444,
|
|
"step": 2710,
|
|
"valid_targets_mean": 3405.6,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 4.379032258064516,
|
|
"grad_norm": 0.9097059873613562,
|
|
"learning_rate": 1.4800316069842623e-05,
|
|
"loss": 0.1376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1494056135416031,
|
|
"step": 2715,
|
|
"valid_targets_mean": 3112.3,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 4.387096774193548,
|
|
"grad_norm": 0.8700056199670083,
|
|
"learning_rate": 1.4722694159414176e-05,
|
|
"loss": 0.1535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1480676531791687,
|
|
"step": 2720,
|
|
"valid_targets_mean": 3123.4,
|
|
"valid_targets_min": 197
|
|
},
|
|
{
|
|
"epoch": 4.395161290322581,
|
|
"grad_norm": 0.9994684741785218,
|
|
"learning_rate": 1.4645157595729247e-05,
|
|
"loss": 0.1377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16081808507442474,
|
|
"step": 2725,
|
|
"valid_targets_mean": 2589.9,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 4.403225806451613,
|
|
"grad_norm": 0.9960867940413308,
|
|
"learning_rate": 1.4567707632740773e-05,
|
|
"loss": 0.1502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1463235318660736,
|
|
"step": 2730,
|
|
"valid_targets_mean": 2630.8,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 4.411290322580645,
|
|
"grad_norm": 0.7989369789223338,
|
|
"learning_rate": 1.4490345523001155e-05,
|
|
"loss": 0.1323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12822982668876648,
|
|
"step": 2735,
|
|
"valid_targets_mean": 3307.9,
|
|
"valid_targets_min": 321
|
|
},
|
|
{
|
|
"epoch": 4.419354838709677,
|
|
"grad_norm": 0.8835188942777478,
|
|
"learning_rate": 1.4413072517642013e-05,
|
|
"loss": 0.1392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14991280436515808,
|
|
"step": 2740,
|
|
"valid_targets_mean": 2808.2,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 4.42741935483871,
|
|
"grad_norm": 1.0051998052202122,
|
|
"learning_rate": 1.433588986635392e-05,
|
|
"loss": 0.1481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15139266848564148,
|
|
"step": 2745,
|
|
"valid_targets_mean": 2624.9,
|
|
"valid_targets_min": 162
|
|
},
|
|
{
|
|
"epoch": 4.435483870967742,
|
|
"grad_norm": 1.0913273065776055,
|
|
"learning_rate": 1.4258798817366202e-05,
|
|
"loss": 0.1391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13491889834403992,
|
|
"step": 2750,
|
|
"valid_targets_mean": 2588.6,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 4.443548387096774,
|
|
"grad_norm": 0.7577825742645623,
|
|
"learning_rate": 1.418180061742677e-05,
|
|
"loss": 0.144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09989809989929199,
|
|
"step": 2755,
|
|
"valid_targets_mean": 3960.6,
|
|
"valid_targets_min": 200
|
|
},
|
|
{
|
|
"epoch": 4.451612903225806,
|
|
"grad_norm": 0.9872315622450911,
|
|
"learning_rate": 1.4104896511781916e-05,
|
|
"loss": 0.1612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1825554370880127,
|
|
"step": 2760,
|
|
"valid_targets_mean": 2845.2,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 4.459677419354839,
|
|
"grad_norm": 0.9145317372334784,
|
|
"learning_rate": 1.4028087744156239e-05,
|
|
"loss": 0.1379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15647068619728088,
|
|
"step": 2765,
|
|
"valid_targets_mean": 2918.1,
|
|
"valid_targets_min": 666
|
|
},
|
|
{
|
|
"epoch": 4.467741935483871,
|
|
"grad_norm": 0.8764408514254786,
|
|
"learning_rate": 1.3951375556732459e-05,
|
|
"loss": 0.1387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14826920628547668,
|
|
"step": 2770,
|
|
"valid_targets_mean": 3184.9,
|
|
"valid_targets_min": 221
|
|
},
|
|
{
|
|
"epoch": 4.475806451612903,
|
|
"grad_norm": 0.9307968404855765,
|
|
"learning_rate": 1.3874761190131371e-05,
|
|
"loss": 0.1508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1213061511516571,
|
|
"step": 2775,
|
|
"valid_targets_mean": 3271.5,
|
|
"valid_targets_min": 240
|
|
},
|
|
{
|
|
"epoch": 4.483870967741936,
|
|
"grad_norm": 1.0386284307663567,
|
|
"learning_rate": 1.3798245883391788e-05,
|
|
"loss": 0.151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1734910011291504,
|
|
"step": 2780,
|
|
"valid_targets_mean": 2518.5,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 4.491935483870968,
|
|
"grad_norm": 0.6593811211510962,
|
|
"learning_rate": 1.3721830873950457e-05,
|
|
"loss": 0.1552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1213173195719719,
|
|
"step": 2785,
|
|
"valid_targets_mean": 4040.8,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 4.5,
|
|
"grad_norm": 1.0453195488640104,
|
|
"learning_rate": 1.3645517397622104e-05,
|
|
"loss": 0.1265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17029725015163422,
|
|
"step": 2790,
|
|
"valid_targets_mean": 3622.1,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 4.508064516129032,
|
|
"grad_norm": 1.0248219933806566,
|
|
"learning_rate": 1.356930668857941e-05,
|
|
"loss": 0.1348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11269605159759521,
|
|
"step": 2795,
|
|
"valid_targets_mean": 2761.5,
|
|
"valid_targets_min": 236
|
|
},
|
|
{
|
|
"epoch": 4.516129032258064,
|
|
"grad_norm": 0.8426178221212375,
|
|
"learning_rate": 1.3493199979333053e-05,
|
|
"loss": 0.1279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1396656334400177,
|
|
"step": 2800,
|
|
"valid_targets_mean": 3315.2,
|
|
"valid_targets_min": 312
|
|
},
|
|
{
|
|
"epoch": 4.524193548387097,
|
|
"grad_norm": 0.84853751489602,
|
|
"learning_rate": 1.3417198500711801e-05,
|
|
"loss": 0.1417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13605958223342896,
|
|
"step": 2805,
|
|
"valid_targets_mean": 3250.6,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 4.532258064516129,
|
|
"grad_norm": 0.979265616079346,
|
|
"learning_rate": 1.3341303481842566e-05,
|
|
"loss": 0.1439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12708471715450287,
|
|
"step": 2810,
|
|
"valid_targets_mean": 2495.8,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 4.540322580645161,
|
|
"grad_norm": 0.8455562265406171,
|
|
"learning_rate": 1.3265516150130577e-05,
|
|
"loss": 0.1391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17497974634170532,
|
|
"step": 2815,
|
|
"valid_targets_mean": 2865.9,
|
|
"valid_targets_min": 187
|
|
},
|
|
{
|
|
"epoch": 4.548387096774194,
|
|
"grad_norm": 0.8692275108359037,
|
|
"learning_rate": 1.318983773123948e-05,
|
|
"loss": 0.1401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1375136822462082,
|
|
"step": 2820,
|
|
"valid_targets_mean": 3004.0,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 4.556451612903226,
|
|
"grad_norm": 0.9274103008458302,
|
|
"learning_rate": 1.311426944907154e-05,
|
|
"loss": 0.1479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.140288844704628,
|
|
"step": 2825,
|
|
"valid_targets_mean": 2565.2,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 4.564516129032258,
|
|
"grad_norm": 1.0428919865551005,
|
|
"learning_rate": 1.3038812525747859e-05,
|
|
"loss": 0.1581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16872337460517883,
|
|
"step": 2830,
|
|
"valid_targets_mean": 2502.6,
|
|
"valid_targets_min": 187
|
|
},
|
|
{
|
|
"epoch": 4.57258064516129,
|
|
"grad_norm": 1.0738911446354682,
|
|
"learning_rate": 1.2963468181588602e-05,
|
|
"loss": 0.1312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12904733419418335,
|
|
"step": 2835,
|
|
"valid_targets_mean": 2745.4,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 4.580645161290323,
|
|
"grad_norm": 1.1034555984717644,
|
|
"learning_rate": 1.2888237635093233e-05,
|
|
"loss": 0.1417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13974744081497192,
|
|
"step": 2840,
|
|
"valid_targets_mean": 2827.8,
|
|
"valid_targets_min": 376
|
|
},
|
|
{
|
|
"epoch": 4.588709677419355,
|
|
"grad_norm": 0.8748502733328836,
|
|
"learning_rate": 1.2813122102920859e-05,
|
|
"loss": 0.1389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14511331915855408,
|
|
"step": 2845,
|
|
"valid_targets_mean": 3048.0,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 4.596774193548387,
|
|
"grad_norm": 0.8937982702862034,
|
|
"learning_rate": 1.273812279987051e-05,
|
|
"loss": 0.143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14839908480644226,
|
|
"step": 2850,
|
|
"valid_targets_mean": 2812.6,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 4.604838709677419,
|
|
"grad_norm": 0.8627802279047316,
|
|
"learning_rate": 1.2663240938861526e-05,
|
|
"loss": 0.1364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13329030573368073,
|
|
"step": 2855,
|
|
"valid_targets_mean": 3413.9,
|
|
"valid_targets_min": 187
|
|
},
|
|
{
|
|
"epoch": 4.612903225806452,
|
|
"grad_norm": 0.9178859835097213,
|
|
"learning_rate": 1.2588477730913912e-05,
|
|
"loss": 0.1367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14521445333957672,
|
|
"step": 2860,
|
|
"valid_targets_mean": 2701.4,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 4.620967741935484,
|
|
"grad_norm": 0.8242408007845785,
|
|
"learning_rate": 1.2513834385128783e-05,
|
|
"loss": 0.1499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13470721244812012,
|
|
"step": 2865,
|
|
"valid_targets_mean": 3023.8,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 4.629032258064516,
|
|
"grad_norm": 1.0244904157196844,
|
|
"learning_rate": 1.2439312108668782e-05,
|
|
"loss": 0.1559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13942378759384155,
|
|
"step": 2870,
|
|
"valid_targets_mean": 2818.9,
|
|
"valid_targets_min": 311
|
|
},
|
|
{
|
|
"epoch": 4.637096774193548,
|
|
"grad_norm": 0.9261243052981046,
|
|
"learning_rate": 1.2364912106738567e-05,
|
|
"loss": 0.1396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12044207751750946,
|
|
"step": 2875,
|
|
"valid_targets_mean": 2450.1,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 4.645161290322581,
|
|
"grad_norm": 0.8971595082157878,
|
|
"learning_rate": 1.2290635582565334e-05,
|
|
"loss": 0.1321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10077952593564987,
|
|
"step": 2880,
|
|
"valid_targets_mean": 2810.8,
|
|
"valid_targets_min": 227
|
|
},
|
|
{
|
|
"epoch": 4.653225806451613,
|
|
"grad_norm": 0.9046732617565613,
|
|
"learning_rate": 1.221648373737935e-05,
|
|
"loss": 0.1393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13093318045139313,
|
|
"step": 2885,
|
|
"valid_targets_mean": 2955.6,
|
|
"valid_targets_min": 226
|
|
},
|
|
{
|
|
"epoch": 4.661290322580645,
|
|
"grad_norm": 1.191900804216905,
|
|
"learning_rate": 1.21424577703945e-05,
|
|
"loss": 0.1576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17283648252487183,
|
|
"step": 2890,
|
|
"valid_targets_mean": 2750.8,
|
|
"valid_targets_min": 181
|
|
},
|
|
{
|
|
"epoch": 4.669354838709677,
|
|
"grad_norm": 0.8361037069511664,
|
|
"learning_rate": 1.2068558878788941e-05,
|
|
"loss": 0.1613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17283347249031067,
|
|
"step": 2895,
|
|
"valid_targets_mean": 3264.1,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 4.67741935483871,
|
|
"grad_norm": 1.036690889533898,
|
|
"learning_rate": 1.1994788257685693e-05,
|
|
"loss": 0.1291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15280653536319733,
|
|
"step": 2900,
|
|
"valid_targets_mean": 2159.6,
|
|
"valid_targets_min": 285
|
|
},
|
|
{
|
|
"epoch": 4.685483870967742,
|
|
"grad_norm": 1.3094336822666033,
|
|
"learning_rate": 1.192114710013335e-05,
|
|
"loss": 0.1514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1487935483455658,
|
|
"step": 2905,
|
|
"valid_targets_mean": 2235.9,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 4.693548387096774,
|
|
"grad_norm": 1.1120148613819532,
|
|
"learning_rate": 1.1847636597086759e-05,
|
|
"loss": 0.1409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1350301057100296,
|
|
"step": 2910,
|
|
"valid_targets_mean": 2306.8,
|
|
"valid_targets_min": 244
|
|
},
|
|
{
|
|
"epoch": 4.701612903225806,
|
|
"grad_norm": 0.7630472425622755,
|
|
"learning_rate": 1.1774257937387774e-05,
|
|
"loss": 0.1407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10234333574771881,
|
|
"step": 2915,
|
|
"valid_targets_mean": 3734.6,
|
|
"valid_targets_min": 1172
|
|
},
|
|
{
|
|
"epoch": 4.709677419354839,
|
|
"grad_norm": 1.0596157936877553,
|
|
"learning_rate": 1.1701012307746021e-05,
|
|
"loss": 0.1413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13332626223564148,
|
|
"step": 2920,
|
|
"valid_targets_mean": 2067.9,
|
|
"valid_targets_min": 302
|
|
},
|
|
{
|
|
"epoch": 4.717741935483871,
|
|
"grad_norm": 1.0116204077022857,
|
|
"learning_rate": 1.1627900892719706e-05,
|
|
"loss": 0.1331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16812828183174133,
|
|
"step": 2925,
|
|
"valid_targets_mean": 2588.6,
|
|
"valid_targets_min": 198
|
|
},
|
|
{
|
|
"epoch": 4.725806451612903,
|
|
"grad_norm": 0.9568935328469356,
|
|
"learning_rate": 1.1554924874696471e-05,
|
|
"loss": 0.1431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16433778405189514,
|
|
"step": 2930,
|
|
"valid_targets_mean": 2922.1,
|
|
"valid_targets_min": 352
|
|
},
|
|
{
|
|
"epoch": 4.733870967741936,
|
|
"grad_norm": 1.1175413857451626,
|
|
"learning_rate": 1.1482085433874264e-05,
|
|
"loss": 0.1403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1356509029865265,
|
|
"step": 2935,
|
|
"valid_targets_mean": 2655.2,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 4.741935483870968,
|
|
"grad_norm": 0.9032472081646458,
|
|
"learning_rate": 1.1409383748242246e-05,
|
|
"loss": 0.1457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13084343075752258,
|
|
"step": 2940,
|
|
"valid_targets_mean": 2926.8,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 4.75,
|
|
"grad_norm": 0.8094116345499048,
|
|
"learning_rate": 1.133682099356173e-05,
|
|
"loss": 0.1435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18918533623218536,
|
|
"step": 2945,
|
|
"valid_targets_mean": 3222.8,
|
|
"valid_targets_min": 190
|
|
},
|
|
{
|
|
"epoch": 4.758064516129032,
|
|
"grad_norm": 0.9896356810322104,
|
|
"learning_rate": 1.12643983433472e-05,
|
|
"loss": 0.1369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13156412541866302,
|
|
"step": 2950,
|
|
"valid_targets_mean": 2578.1,
|
|
"valid_targets_min": 254
|
|
},
|
|
{
|
|
"epoch": 4.766129032258064,
|
|
"grad_norm": 0.8183368636424956,
|
|
"learning_rate": 1.1192116968847313e-05,
|
|
"loss": 0.1463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1352168470621109,
|
|
"step": 2955,
|
|
"valid_targets_mean": 3542.2,
|
|
"valid_targets_min": 303
|
|
},
|
|
{
|
|
"epoch": 4.774193548387097,
|
|
"grad_norm": 1.1243081541090518,
|
|
"learning_rate": 1.1119978039025959e-05,
|
|
"loss": 0.1255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11422336101531982,
|
|
"step": 2960,
|
|
"valid_targets_mean": 2281.4,
|
|
"valid_targets_min": 286
|
|
},
|
|
{
|
|
"epoch": 4.782258064516129,
|
|
"grad_norm": 1.00605209541701,
|
|
"learning_rate": 1.1047982720543326e-05,
|
|
"loss": 0.1593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13918514549732208,
|
|
"step": 2965,
|
|
"valid_targets_mean": 2584.7,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 4.790322580645161,
|
|
"grad_norm": 0.9669496802462436,
|
|
"learning_rate": 1.0976132177737098e-05,
|
|
"loss": 0.1424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15626245737075806,
|
|
"step": 2970,
|
|
"valid_targets_mean": 2903.9,
|
|
"valid_targets_min": 400
|
|
},
|
|
{
|
|
"epoch": 4.798387096774194,
|
|
"grad_norm": 1.0009643462892552,
|
|
"learning_rate": 1.090442757260357e-05,
|
|
"loss": 0.1474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13716918230056763,
|
|
"step": 2975,
|
|
"valid_targets_mean": 2316.2,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 4.806451612903226,
|
|
"grad_norm": 1.0143551890956948,
|
|
"learning_rate": 1.083287006477888e-05,
|
|
"loss": 0.1346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13639025390148163,
|
|
"step": 2980,
|
|
"valid_targets_mean": 2333.2,
|
|
"valid_targets_min": 288
|
|
},
|
|
{
|
|
"epoch": 4.814516129032258,
|
|
"grad_norm": 0.876787928006797,
|
|
"learning_rate": 1.0761460811520236e-05,
|
|
"loss": 0.1255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11864929646253586,
|
|
"step": 2985,
|
|
"valid_targets_mean": 2796.3,
|
|
"valid_targets_min": 1172
|
|
},
|
|
{
|
|
"epoch": 4.82258064516129,
|
|
"grad_norm": 1.0697959150521756,
|
|
"learning_rate": 1.0690200967687234e-05,
|
|
"loss": 0.1412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1397312581539154,
|
|
"step": 2990,
|
|
"valid_targets_mean": 3083.6,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 4.830645161290323,
|
|
"grad_norm": 0.9569030063211383,
|
|
"learning_rate": 1.0619091685723132e-05,
|
|
"loss": 0.126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13941988348960876,
|
|
"step": 2995,
|
|
"valid_targets_mean": 2889.4,
|
|
"valid_targets_min": 165
|
|
},
|
|
{
|
|
"epoch": 4.838709677419355,
|
|
"grad_norm": 1.142830124442297,
|
|
"learning_rate": 1.0548134115636262e-05,
|
|
"loss": 0.1413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14603787660598755,
|
|
"step": 3000,
|
|
"valid_targets_mean": 2144.6,
|
|
"valid_targets_min": 178
|
|
},
|
|
{
|
|
"epoch": 4.846774193548387,
|
|
"grad_norm": 0.9972533718413399,
|
|
"learning_rate": 1.04773294049814e-05,
|
|
"loss": 0.1419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15350191295146942,
|
|
"step": 3005,
|
|
"valid_targets_mean": 2431.2,
|
|
"valid_targets_min": 295
|
|
},
|
|
{
|
|
"epoch": 4.854838709677419,
|
|
"grad_norm": 0.8785541724579881,
|
|
"learning_rate": 1.0406678698841231e-05,
|
|
"loss": 0.1444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12709422409534454,
|
|
"step": 3010,
|
|
"valid_targets_mean": 3114.4,
|
|
"valid_targets_min": 902
|
|
},
|
|
{
|
|
"epoch": 4.862903225806452,
|
|
"grad_norm": 0.9922768726165682,
|
|
"learning_rate": 1.0336183139807783e-05,
|
|
"loss": 0.1413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1540890634059906,
|
|
"step": 3015,
|
|
"valid_targets_mean": 2374.8,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 4.870967741935484,
|
|
"grad_norm": 1.1281054167136748,
|
|
"learning_rate": 1.0265843867964014e-05,
|
|
"loss": 0.1269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1375105082988739,
|
|
"step": 3020,
|
|
"valid_targets_mean": 2244.9,
|
|
"valid_targets_min": 254
|
|
},
|
|
{
|
|
"epoch": 4.879032258064516,
|
|
"grad_norm": 0.8656217542566792,
|
|
"learning_rate": 1.0195662020865333e-05,
|
|
"loss": 0.1341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12996995449066162,
|
|
"step": 3025,
|
|
"valid_targets_mean": 2717.6,
|
|
"valid_targets_min": 372
|
|
},
|
|
{
|
|
"epoch": 4.887096774193548,
|
|
"grad_norm": 0.8384362922352775,
|
|
"learning_rate": 1.0125638733521209e-05,
|
|
"loss": 0.1382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11156944930553436,
|
|
"step": 3030,
|
|
"valid_targets_mean": 3053.3,
|
|
"valid_targets_min": 233
|
|
},
|
|
{
|
|
"epoch": 4.895161290322581,
|
|
"grad_norm": 1.0550935292799568,
|
|
"learning_rate": 1.0055775138376816e-05,
|
|
"loss": 0.1415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18390315771102905,
|
|
"step": 3035,
|
|
"valid_targets_mean": 2199.6,
|
|
"valid_targets_min": 267
|
|
},
|
|
{
|
|
"epoch": 4.903225806451613,
|
|
"grad_norm": 1.0592844575544045,
|
|
"learning_rate": 9.986072365294731e-06,
|
|
"loss": 0.1557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14451473951339722,
|
|
"step": 3040,
|
|
"valid_targets_mean": 2701.4,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 4.911290322580645,
|
|
"grad_norm": 0.8683549243436681,
|
|
"learning_rate": 9.91653154153663e-06,
|
|
"loss": 0.1233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12277504801750183,
|
|
"step": 3045,
|
|
"valid_targets_mean": 3131.6,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 4.919354838709677,
|
|
"grad_norm": 1.1335074094848654,
|
|
"learning_rate": 9.8471537917451e-06,
|
|
"loss": 0.1308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15363723039627075,
|
|
"step": 3050,
|
|
"valid_targets_mean": 2425.8,
|
|
"valid_targets_min": 288
|
|
},
|
|
{
|
|
"epoch": 4.92741935483871,
|
|
"grad_norm": 0.8763804930456096,
|
|
"learning_rate": 9.777940237925427e-06,
|
|
"loss": 0.1575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14299733936786652,
|
|
"step": 3055,
|
|
"valid_targets_mean": 3248.1,
|
|
"valid_targets_min": 286
|
|
},
|
|
{
|
|
"epoch": 4.935483870967742,
|
|
"grad_norm": 0.9520777306681445,
|
|
"learning_rate": 9.70889199942743e-06,
|
|
"loss": 0.1519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16239605844020844,
|
|
"step": 3060,
|
|
"valid_targets_mean": 2762.4,
|
|
"valid_targets_min": 296
|
|
},
|
|
{
|
|
"epoch": 4.943548387096774,
|
|
"grad_norm": 0.9130528375340291,
|
|
"learning_rate": 9.640010192927407e-06,
|
|
"loss": 0.1458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17796222865581512,
|
|
"step": 3065,
|
|
"valid_targets_mean": 2997.4,
|
|
"valid_targets_min": 376
|
|
},
|
|
{
|
|
"epoch": 4.951612903225806,
|
|
"grad_norm": 1.0970565900242906,
|
|
"learning_rate": 9.57129593241004e-06,
|
|
"loss": 0.1486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1678534299135208,
|
|
"step": 3070,
|
|
"valid_targets_mean": 2449.4,
|
|
"valid_targets_min": 168
|
|
},
|
|
{
|
|
"epoch": 4.959677419354839,
|
|
"grad_norm": 0.9951086921040369,
|
|
"learning_rate": 9.502750329150391e-06,
|
|
"loss": 0.1278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11366388201713562,
|
|
"step": 3075,
|
|
"valid_targets_mean": 3055.8,
|
|
"valid_targets_min": 508
|
|
},
|
|
{
|
|
"epoch": 4.967741935483871,
|
|
"grad_norm": 0.8634018739524523,
|
|
"learning_rate": 9.434374491695927e-06,
|
|
"loss": 0.1359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10599691420793533,
|
|
"step": 3080,
|
|
"valid_targets_mean": 2899.1,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 4.975806451612903,
|
|
"grad_norm": 1.3462156495792064,
|
|
"learning_rate": 9.366169525848591e-06,
|
|
"loss": 0.1408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15738080441951752,
|
|
"step": 3085,
|
|
"valid_targets_mean": 2238.2,
|
|
"valid_targets_min": 200
|
|
},
|
|
{
|
|
"epoch": 4.983870967741936,
|
|
"grad_norm": 1.0644507921921302,
|
|
"learning_rate": 9.29813653464693e-06,
|
|
"loss": 0.1374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15163150429725647,
|
|
"step": 3090,
|
|
"valid_targets_mean": 2299.8,
|
|
"valid_targets_min": 233
|
|
},
|
|
{
|
|
"epoch": 4.991935483870968,
|
|
"grad_norm": 0.9865419382410165,
|
|
"learning_rate": 9.230276618348224e-06,
|
|
"loss": 0.1373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1459825485944748,
|
|
"step": 3095,
|
|
"valid_targets_mean": 2484.3,
|
|
"valid_targets_min": 189
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.7325923064738777,
|
|
"learning_rate": 9.16259087441074e-06,
|
|
"loss": 0.126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08983122557401657,
|
|
"step": 3100,
|
|
"valid_targets_mean": 3105.4,
|
|
"valid_targets_min": 237
|
|
},
|
|
{
|
|
"epoch": 5.008064516129032,
|
|
"grad_norm": 0.7928278633371083,
|
|
"learning_rate": 9.095080397475952e-06,
|
|
"loss": 0.1271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09464974701404572,
|
|
"step": 3105,
|
|
"valid_targets_mean": 2793.9,
|
|
"valid_targets_min": 181
|
|
},
|
|
{
|
|
"epoch": 5.016129032258065,
|
|
"grad_norm": 0.9106623076306375,
|
|
"learning_rate": 9.027746279350832e-06,
|
|
"loss": 0.1177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11713890731334686,
|
|
"step": 3110,
|
|
"valid_targets_mean": 2453.6,
|
|
"valid_targets_min": 286
|
|
},
|
|
{
|
|
"epoch": 5.024193548387097,
|
|
"grad_norm": 0.7843370038062453,
|
|
"learning_rate": 8.960589608990211e-06,
|
|
"loss": 0.1197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08030445873737335,
|
|
"step": 3115,
|
|
"valid_targets_mean": 2694.6,
|
|
"valid_targets_min": 367
|
|
},
|
|
{
|
|
"epoch": 5.032258064516129,
|
|
"grad_norm": 0.9855372684140141,
|
|
"learning_rate": 8.893611472479189e-06,
|
|
"loss": 0.1172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10840868204832077,
|
|
"step": 3120,
|
|
"valid_targets_mean": 2391.9,
|
|
"valid_targets_min": 436
|
|
},
|
|
{
|
|
"epoch": 5.040322580645161,
|
|
"grad_norm": 1.0057170683788865,
|
|
"learning_rate": 8.826812953015498e-06,
|
|
"loss": 0.1026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11926726996898651,
|
|
"step": 3125,
|
|
"valid_targets_mean": 2439.3,
|
|
"valid_targets_min": 499
|
|
},
|
|
{
|
|
"epoch": 5.048387096774194,
|
|
"grad_norm": 0.8891189355412932,
|
|
"learning_rate": 8.76019513089206e-06,
|
|
"loss": 0.1108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1101326122879982,
|
|
"step": 3130,
|
|
"valid_targets_mean": 2574.8,
|
|
"valid_targets_min": 190
|
|
},
|
|
{
|
|
"epoch": 5.056451612903226,
|
|
"grad_norm": 0.7129673307742821,
|
|
"learning_rate": 8.69375908347948e-06,
|
|
"loss": 0.1089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10742997378110886,
|
|
"step": 3135,
|
|
"valid_targets_mean": 3204.2,
|
|
"valid_targets_min": 341
|
|
},
|
|
{
|
|
"epoch": 5.064516129032258,
|
|
"grad_norm": 1.2782536487033205,
|
|
"learning_rate": 8.627505885208631e-06,
|
|
"loss": 0.1212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12972065806388855,
|
|
"step": 3140,
|
|
"valid_targets_mean": 2501.4,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 5.07258064516129,
|
|
"grad_norm": 1.0312647400534445,
|
|
"learning_rate": 8.561436607553261e-06,
|
|
"loss": 0.1223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13556832075119019,
|
|
"step": 3145,
|
|
"valid_targets_mean": 2094.0,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 5.080645161290323,
|
|
"grad_norm": 0.8000351895162175,
|
|
"learning_rate": 8.495552319012692e-06,
|
|
"loss": 0.1165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11800212413072586,
|
|
"step": 3150,
|
|
"valid_targets_mean": 3395.9,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 5.088709677419355,
|
|
"grad_norm": 0.7927563763938764,
|
|
"learning_rate": 8.42985408509453e-06,
|
|
"loss": 0.1171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11252060532569885,
|
|
"step": 3155,
|
|
"valid_targets_mean": 2698.9,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 5.096774193548387,
|
|
"grad_norm": 0.9409562374511837,
|
|
"learning_rate": 8.36434296829741e-06,
|
|
"loss": 0.1285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13446341454982758,
|
|
"step": 3160,
|
|
"valid_targets_mean": 2823.1,
|
|
"valid_targets_min": 1046
|
|
},
|
|
{
|
|
"epoch": 5.104838709677419,
|
|
"grad_norm": 0.884885419006514,
|
|
"learning_rate": 8.299020028093844e-06,
|
|
"loss": 0.1292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1310967206954956,
|
|
"step": 3165,
|
|
"valid_targets_mean": 2735.3,
|
|
"valid_targets_min": 302
|
|
},
|
|
{
|
|
"epoch": 5.112903225806452,
|
|
"grad_norm": 1.129382395256545,
|
|
"learning_rate": 8.2338863209131e-06,
|
|
"loss": 0.1088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10772794485092163,
|
|
"step": 3170,
|
|
"valid_targets_mean": 2318.9,
|
|
"valid_targets_min": 212
|
|
},
|
|
{
|
|
"epoch": 5.120967741935484,
|
|
"grad_norm": 0.991324476171588,
|
|
"learning_rate": 8.168942900124046e-06,
|
|
"loss": 0.1009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12066034227609634,
|
|
"step": 3175,
|
|
"valid_targets_mean": 3152.9,
|
|
"valid_targets_min": 212
|
|
},
|
|
{
|
|
"epoch": 5.129032258064516,
|
|
"grad_norm": 0.7995724713320561,
|
|
"learning_rate": 8.104190816018191e-06,
|
|
"loss": 0.1198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11466895043849945,
|
|
"step": 3180,
|
|
"valid_targets_mean": 2708.6,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 5.137096774193548,
|
|
"grad_norm": 0.8698545292927409,
|
|
"learning_rate": 8.039631115792663e-06,
|
|
"loss": 0.1193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1032571941614151,
|
|
"step": 3185,
|
|
"valid_targets_mean": 3525.1,
|
|
"valid_targets_min": 162
|
|
},
|
|
{
|
|
"epoch": 5.145161290322581,
|
|
"grad_norm": 1.115340593437082,
|
|
"learning_rate": 7.975264843533283e-06,
|
|
"loss": 0.1102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10125815123319626,
|
|
"step": 3190,
|
|
"valid_targets_mean": 2270.2,
|
|
"valid_targets_min": 197
|
|
},
|
|
{
|
|
"epoch": 5.153225806451613,
|
|
"grad_norm": 0.6383798436928889,
|
|
"learning_rate": 7.911093040197662e-06,
|
|
"loss": 0.105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09034892916679382,
|
|
"step": 3195,
|
|
"valid_targets_mean": 3671.6,
|
|
"valid_targets_min": 286
|
|
},
|
|
{
|
|
"epoch": 5.161290322580645,
|
|
"grad_norm": 0.9414346802890253,
|
|
"learning_rate": 7.847116743598388e-06,
|
|
"loss": 0.1296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13133779168128967,
|
|
"step": 3200,
|
|
"valid_targets_mean": 2723.1,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 5.169354838709677,
|
|
"grad_norm": 1.1627526331775735,
|
|
"learning_rate": 7.783336988386252e-06,
|
|
"loss": 0.1291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1512688398361206,
|
|
"step": 3205,
|
|
"valid_targets_mean": 2178.1,
|
|
"valid_targets_min": 518
|
|
},
|
|
{
|
|
"epoch": 5.17741935483871,
|
|
"grad_norm": 0.8560133002146495,
|
|
"learning_rate": 7.719754806033455e-06,
|
|
"loss": 0.1134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10853614658117294,
|
|
"step": 3210,
|
|
"valid_targets_mean": 2936.6,
|
|
"valid_targets_min": 975
|
|
},
|
|
{
|
|
"epoch": 5.185483870967742,
|
|
"grad_norm": 0.8187312714298294,
|
|
"learning_rate": 7.656371224817019e-06,
|
|
"loss": 0.1193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09285955876111984,
|
|
"step": 3215,
|
|
"valid_targets_mean": 3854.4,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 5.193548387096774,
|
|
"grad_norm": 0.9658744733454105,
|
|
"learning_rate": 7.59318726980208e-06,
|
|
"loss": 0.1074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13732989132404327,
|
|
"step": 3220,
|
|
"valid_targets_mean": 2494.6,
|
|
"valid_targets_min": 255
|
|
},
|
|
{
|
|
"epoch": 5.201612903225806,
|
|
"grad_norm": 1.0457393244558184,
|
|
"learning_rate": 7.530203962825331e-06,
|
|
"loss": 0.118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11538533866405487,
|
|
"step": 3225,
|
|
"valid_targets_mean": 2029.3,
|
|
"valid_targets_min": 372
|
|
},
|
|
{
|
|
"epoch": 5.209677419354839,
|
|
"grad_norm": 1.1689131832905681,
|
|
"learning_rate": 7.4674223224785196e-06,
|
|
"loss": 0.1274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13994917273521423,
|
|
"step": 3230,
|
|
"valid_targets_mean": 2839.6,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 5.217741935483871,
|
|
"grad_norm": 0.906368252515845,
|
|
"learning_rate": 7.404843364091951e-06,
|
|
"loss": 0.1213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14213156700134277,
|
|
"step": 3235,
|
|
"valid_targets_mean": 3901.3,
|
|
"valid_targets_min": 1384
|
|
},
|
|
{
|
|
"epoch": 5.225806451612903,
|
|
"grad_norm": 1.0712505781127002,
|
|
"learning_rate": 7.342468099718083e-06,
|
|
"loss": 0.1308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13916999101638794,
|
|
"step": 3240,
|
|
"valid_targets_mean": 2504.6,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 5.233870967741935,
|
|
"grad_norm": 0.8765318879123596,
|
|
"learning_rate": 7.280297538115131e-06,
|
|
"loss": 0.1196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11804334819316864,
|
|
"step": 3245,
|
|
"valid_targets_mean": 3022.4,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 5.241935483870968,
|
|
"grad_norm": 1.517447186885499,
|
|
"learning_rate": 7.218332684730793e-06,
|
|
"loss": 0.1121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1128285750746727,
|
|
"step": 3250,
|
|
"valid_targets_mean": 2019.0,
|
|
"valid_targets_min": 225
|
|
},
|
|
{
|
|
"epoch": 5.25,
|
|
"grad_norm": 1.133127639267407,
|
|
"learning_rate": 7.156574541685972e-06,
|
|
"loss": 0.1147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11529646813869476,
|
|
"step": 3255,
|
|
"valid_targets_mean": 2626.7,
|
|
"valid_targets_min": 201
|
|
},
|
|
{
|
|
"epoch": 5.258064516129032,
|
|
"grad_norm": 0.9239799193135049,
|
|
"learning_rate": 7.095024107758535e-06,
|
|
"loss": 0.1197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1463707685470581,
|
|
"step": 3260,
|
|
"valid_targets_mean": 3747.7,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 5.266129032258064,
|
|
"grad_norm": 0.765422067603741,
|
|
"learning_rate": 7.033682378367239e-06,
|
|
"loss": 0.1067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08645801991224289,
|
|
"step": 3265,
|
|
"valid_targets_mean": 3449.4,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 5.274193548387097,
|
|
"grad_norm": 1.1792075771750488,
|
|
"learning_rate": 6.97255034555556e-06,
|
|
"loss": 0.1342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12287145853042603,
|
|
"step": 3270,
|
|
"valid_targets_mean": 2421.2,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 5.282258064516129,
|
|
"grad_norm": 1.0433695093077782,
|
|
"learning_rate": 6.911628997975666e-06,
|
|
"loss": 0.1269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1689334511756897,
|
|
"step": 3275,
|
|
"valid_targets_mean": 2478.1,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 5.290322580645161,
|
|
"grad_norm": 1.1234819875725308,
|
|
"learning_rate": 6.8509193208724555e-06,
|
|
"loss": 0.1257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10509350150823593,
|
|
"step": 3280,
|
|
"valid_targets_mean": 3410.9,
|
|
"valid_targets_min": 829
|
|
},
|
|
{
|
|
"epoch": 5.298387096774194,
|
|
"grad_norm": 0.9651530026732231,
|
|
"learning_rate": 6.790422296067601e-06,
|
|
"loss": 0.1146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11354875564575195,
|
|
"step": 3285,
|
|
"valid_targets_mean": 2223.8,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 5.306451612903226,
|
|
"grad_norm": 0.8605662963221766,
|
|
"learning_rate": 6.730138901943682e-06,
|
|
"loss": 0.116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10352926701307297,
|
|
"step": 3290,
|
|
"valid_targets_mean": 2982.8,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 5.314516129032258,
|
|
"grad_norm": 0.8501813160605611,
|
|
"learning_rate": 6.670070113428329e-06,
|
|
"loss": 0.1183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1277409940958023,
|
|
"step": 3295,
|
|
"valid_targets_mean": 3541.8,
|
|
"valid_targets_min": 1067
|
|
},
|
|
{
|
|
"epoch": 5.32258064516129,
|
|
"grad_norm": 0.8269469833011988,
|
|
"learning_rate": 6.6102169019785145e-06,
|
|
"loss": 0.1098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1069549173116684,
|
|
"step": 3300,
|
|
"valid_targets_mean": 3094.0,
|
|
"valid_targets_min": 197
|
|
},
|
|
{
|
|
"epoch": 5.330645161290323,
|
|
"grad_norm": 1.0929104671332337,
|
|
"learning_rate": 6.550580235564794e-06,
|
|
"loss": 0.1144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10499197244644165,
|
|
"step": 3305,
|
|
"valid_targets_mean": 2186.1,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 5.338709677419355,
|
|
"grad_norm": 0.9720001700984059,
|
|
"learning_rate": 6.491161078655672e-06,
|
|
"loss": 0.1165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14998789131641388,
|
|
"step": 3310,
|
|
"valid_targets_mean": 3238.7,
|
|
"valid_targets_min": 316
|
|
},
|
|
{
|
|
"epoch": 5.346774193548387,
|
|
"grad_norm": 0.964248106187236,
|
|
"learning_rate": 6.431960392202004e-06,
|
|
"loss": 0.1224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15501153469085693,
|
|
"step": 3315,
|
|
"valid_targets_mean": 2685.1,
|
|
"valid_targets_min": 254
|
|
},
|
|
{
|
|
"epoch": 5.354838709677419,
|
|
"grad_norm": 0.9415303014351648,
|
|
"learning_rate": 6.3729791336214505e-06,
|
|
"loss": 0.1202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11437273025512695,
|
|
"step": 3320,
|
|
"valid_targets_mean": 2692.8,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 5.362903225806452,
|
|
"grad_norm": 0.9633369114489742,
|
|
"learning_rate": 6.314218256782984e-06,
|
|
"loss": 0.1168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09257033467292786,
|
|
"step": 3325,
|
|
"valid_targets_mean": 2583.5,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 5.370967741935484,
|
|
"grad_norm": 0.7302032273062459,
|
|
"learning_rate": 6.255678711991486e-06,
|
|
"loss": 0.1243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11280353367328644,
|
|
"step": 3330,
|
|
"valid_targets_mean": 3679.4,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 5.379032258064516,
|
|
"grad_norm": 0.7620183984672544,
|
|
"learning_rate": 6.1973614459723675e-06,
|
|
"loss": 0.1123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.066011942923069,
|
|
"step": 3335,
|
|
"valid_targets_mean": 2995.1,
|
|
"valid_targets_min": 198
|
|
},
|
|
{
|
|
"epoch": 5.387096774193548,
|
|
"grad_norm": 1.022109949583127,
|
|
"learning_rate": 6.1392674018562525e-06,
|
|
"loss": 0.1154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07422903180122375,
|
|
"step": 3340,
|
|
"valid_targets_mean": 2870.6,
|
|
"valid_targets_min": 303
|
|
},
|
|
{
|
|
"epoch": 5.395161290322581,
|
|
"grad_norm": 0.8999472321109041,
|
|
"learning_rate": 6.081397519163716e-06,
|
|
"loss": 0.1079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12761415541172028,
|
|
"step": 3345,
|
|
"valid_targets_mean": 2708.4,
|
|
"valid_targets_min": 789
|
|
},
|
|
{
|
|
"epoch": 5.403225806451613,
|
|
"grad_norm": 1.161213540142583,
|
|
"learning_rate": 6.023752733790124e-06,
|
|
"loss": 0.1194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15303024649620056,
|
|
"step": 3350,
|
|
"valid_targets_mean": 1880.2,
|
|
"valid_targets_min": 236
|
|
},
|
|
{
|
|
"epoch": 5.411290322580645,
|
|
"grad_norm": 0.9993750412392302,
|
|
"learning_rate": 5.96633397799047e-06,
|
|
"loss": 0.1146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10079734027385712,
|
|
"step": 3355,
|
|
"valid_targets_mean": 3001.9,
|
|
"valid_targets_min": 221
|
|
},
|
|
{
|
|
"epoch": 5.419354838709677,
|
|
"grad_norm": 0.8849282289133072,
|
|
"learning_rate": 5.909142180364298e-06,
|
|
"loss": 0.1148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1247493177652359,
|
|
"step": 3360,
|
|
"valid_targets_mean": 3336.6,
|
|
"valid_targets_min": 496
|
|
},
|
|
{
|
|
"epoch": 5.42741935483871,
|
|
"grad_norm": 0.973531663035612,
|
|
"learning_rate": 5.8521782658407e-06,
|
|
"loss": 0.128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12807932496070862,
|
|
"step": 3365,
|
|
"valid_targets_mean": 2054.2,
|
|
"valid_targets_min": 321
|
|
},
|
|
{
|
|
"epoch": 5.435483870967742,
|
|
"grad_norm": 1.0282079346680015,
|
|
"learning_rate": 5.795443155663354e-06,
|
|
"loss": 0.1161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11028341948986053,
|
|
"step": 3370,
|
|
"valid_targets_mean": 2192.7,
|
|
"valid_targets_min": 475
|
|
},
|
|
{
|
|
"epoch": 5.443548387096774,
|
|
"grad_norm": 0.9687696134717432,
|
|
"learning_rate": 5.738937767375596e-06,
|
|
"loss": 0.128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15361368656158447,
|
|
"step": 3375,
|
|
"valid_targets_mean": 3151.4,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 5.451612903225806,
|
|
"grad_norm": 0.7542879054647976,
|
|
"learning_rate": 5.682663014805631e-06,
|
|
"loss": 0.1244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09807437658309937,
|
|
"step": 3380,
|
|
"valid_targets_mean": 3528.6,
|
|
"valid_targets_min": 285
|
|
},
|
|
{
|
|
"epoch": 5.459677419354839,
|
|
"grad_norm": 0.9034561175052493,
|
|
"learning_rate": 5.626619808051725e-06,
|
|
"loss": 0.1082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11302371323108673,
|
|
"step": 3385,
|
|
"valid_targets_mean": 2797.6,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 5.467741935483871,
|
|
"grad_norm": 0.8573242206328966,
|
|
"learning_rate": 5.5708090534674874e-06,
|
|
"loss": 0.102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10703709721565247,
|
|
"step": 3390,
|
|
"valid_targets_mean": 3023.2,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 5.475806451612903,
|
|
"grad_norm": 0.9107840387353426,
|
|
"learning_rate": 5.5152316536472065e-06,
|
|
"loss": 0.135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13341592252254486,
|
|
"step": 3395,
|
|
"valid_targets_mean": 2714.2,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 5.483870967741936,
|
|
"grad_norm": 1.0254919185400357,
|
|
"learning_rate": 5.459888507411275e-06,
|
|
"loss": 0.1182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10588261485099792,
|
|
"step": 3400,
|
|
"valid_targets_mean": 2477.0,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 5.491935483870968,
|
|
"grad_norm": 0.7953648905645644,
|
|
"learning_rate": 5.4047805097916385e-06,
|
|
"loss": 0.1235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11812490969896317,
|
|
"step": 3405,
|
|
"valid_targets_mean": 3436.8,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 5.5,
|
|
"grad_norm": 0.6525808900476495,
|
|
"learning_rate": 5.349908552017323e-06,
|
|
"loss": 0.1038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07666683197021484,
|
|
"step": 3410,
|
|
"valid_targets_mean": 3541.9,
|
|
"valid_targets_min": 232
|
|
},
|
|
{
|
|
"epoch": 5.508064516129032,
|
|
"grad_norm": 0.8197004548576328,
|
|
"learning_rate": 5.295273521500017e-06,
|
|
"loss": 0.1039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10779140889644623,
|
|
"step": 3415,
|
|
"valid_targets_mean": 3296.8,
|
|
"valid_targets_min": 226
|
|
},
|
|
{
|
|
"epoch": 5.516129032258064,
|
|
"grad_norm": 1.1010874404590432,
|
|
"learning_rate": 5.240876301819737e-06,
|
|
"loss": 0.1164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1362469643354416,
|
|
"step": 3420,
|
|
"valid_targets_mean": 2429.2,
|
|
"valid_targets_min": 189
|
|
},
|
|
{
|
|
"epoch": 5.524193548387097,
|
|
"grad_norm": 1.0304334054280224,
|
|
"learning_rate": 5.186717772710508e-06,
|
|
"loss": 0.0982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0989312157034874,
|
|
"step": 3425,
|
|
"valid_targets_mean": 2264.8,
|
|
"valid_targets_min": 247
|
|
},
|
|
{
|
|
"epoch": 5.532258064516129,
|
|
"grad_norm": 0.9736748769348844,
|
|
"learning_rate": 5.132798810046162e-06,
|
|
"loss": 0.098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1279708445072174,
|
|
"step": 3430,
|
|
"valid_targets_mean": 2790.2,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 5.540322580645161,
|
|
"grad_norm": 0.7071037244831279,
|
|
"learning_rate": 5.079120285826176e-06,
|
|
"loss": 0.1082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07910849153995514,
|
|
"step": 3435,
|
|
"valid_targets_mean": 3304.2,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 5.548387096774194,
|
|
"grad_norm": 1.0817636050821438,
|
|
"learning_rate": 5.025683068161533e-06,
|
|
"loss": 0.1351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16724850237369537,
|
|
"step": 3440,
|
|
"valid_targets_mean": 2449.6,
|
|
"valid_targets_min": 499
|
|
},
|
|
{
|
|
"epoch": 5.556451612903226,
|
|
"grad_norm": 1.0453386561639684,
|
|
"learning_rate": 4.972488021260733e-06,
|
|
"loss": 0.1202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12520018219947815,
|
|
"step": 3445,
|
|
"valid_targets_mean": 2364.2,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 5.564516129032258,
|
|
"grad_norm": 1.038399159967357,
|
|
"learning_rate": 4.919536005415775e-06,
|
|
"loss": 0.1203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09334912151098251,
|
|
"step": 3450,
|
|
"valid_targets_mean": 2693.5,
|
|
"valid_targets_min": 177
|
|
},
|
|
{
|
|
"epoch": 5.57258064516129,
|
|
"grad_norm": 0.7754877896148306,
|
|
"learning_rate": 4.866827876988274e-06,
|
|
"loss": 0.1216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1045357882976532,
|
|
"step": 3455,
|
|
"valid_targets_mean": 3459.1,
|
|
"valid_targets_min": 198
|
|
},
|
|
{
|
|
"epoch": 5.580645161290323,
|
|
"grad_norm": 1.0734073871018261,
|
|
"learning_rate": 4.814364488395584e-06,
|
|
"loss": 0.1255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13502070307731628,
|
|
"step": 3460,
|
|
"valid_targets_mean": 2391.6,
|
|
"valid_targets_min": 254
|
|
},
|
|
{
|
|
"epoch": 5.588709677419355,
|
|
"grad_norm": 1.0561914690631717,
|
|
"learning_rate": 4.762146688097038e-06,
|
|
"loss": 0.1116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12528647482395172,
|
|
"step": 3465,
|
|
"valid_targets_mean": 2223.9,
|
|
"valid_targets_min": 233
|
|
},
|
|
{
|
|
"epoch": 5.596774193548387,
|
|
"grad_norm": 0.9414920082689894,
|
|
"learning_rate": 4.710175320580215e-06,
|
|
"loss": 0.1273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11818397045135498,
|
|
"step": 3470,
|
|
"valid_targets_mean": 2299.8,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 5.604838709677419,
|
|
"grad_norm": 0.9153655150207458,
|
|
"learning_rate": 4.658451226347267e-06,
|
|
"loss": 0.1165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1271030604839325,
|
|
"step": 3475,
|
|
"valid_targets_mean": 2952.7,
|
|
"valid_targets_min": 327
|
|
},
|
|
{
|
|
"epoch": 5.612903225806452,
|
|
"grad_norm": 0.9375013837195536,
|
|
"learning_rate": 4.606975241901354e-06,
|
|
"loss": 0.1122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12373068928718567,
|
|
"step": 3480,
|
|
"valid_targets_mean": 2437.5,
|
|
"valid_targets_min": 241
|
|
},
|
|
{
|
|
"epoch": 5.620967741935484,
|
|
"grad_norm": 0.9279385615257469,
|
|
"learning_rate": 4.555748199733117e-06,
|
|
"loss": 0.1028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10733592510223389,
|
|
"step": 3485,
|
|
"valid_targets_mean": 3035.9,
|
|
"valid_targets_min": 338
|
|
},
|
|
{
|
|
"epoch": 5.629032258064516,
|
|
"grad_norm": 0.8467842321874853,
|
|
"learning_rate": 4.504770928307163e-06,
|
|
"loss": 0.1174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08826883137226105,
|
|
"step": 3490,
|
|
"valid_targets_mean": 3058.3,
|
|
"valid_targets_min": 489
|
|
},
|
|
{
|
|
"epoch": 5.637096774193548,
|
|
"grad_norm": 0.8932167560733536,
|
|
"learning_rate": 4.454044252048735e-06,
|
|
"loss": 0.1017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09955582767724991,
|
|
"step": 3495,
|
|
"valid_targets_mean": 3409.8,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 5.645161290322581,
|
|
"grad_norm": 1.0783532930506496,
|
|
"learning_rate": 4.403568991330356e-06,
|
|
"loss": 0.1223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13407951593399048,
|
|
"step": 3500,
|
|
"valid_targets_mean": 2204.5,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 5.653225806451613,
|
|
"grad_norm": 1.2050520658392365,
|
|
"learning_rate": 4.353345962458519e-06,
|
|
"loss": 0.1146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14499002695083618,
|
|
"step": 3505,
|
|
"valid_targets_mean": 2068.6,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 5.661290322580645,
|
|
"grad_norm": 0.810193611294997,
|
|
"learning_rate": 4.303375977660553e-06,
|
|
"loss": 0.11,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0819336324930191,
|
|
"step": 3510,
|
|
"valid_targets_mean": 3164.6,
|
|
"valid_targets_min": 302
|
|
},
|
|
{
|
|
"epoch": 5.669354838709677,
|
|
"grad_norm": 1.1375123108203136,
|
|
"learning_rate": 4.253659845071436e-06,
|
|
"loss": 0.1014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12206903845071793,
|
|
"step": 3515,
|
|
"valid_targets_mean": 2571.2,
|
|
"valid_targets_min": 202
|
|
},
|
|
{
|
|
"epoch": 5.67741935483871,
|
|
"grad_norm": 0.770492454360222,
|
|
"learning_rate": 4.204198368720762e-06,
|
|
"loss": 0.1072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07708331197500229,
|
|
"step": 3520,
|
|
"valid_targets_mean": 3127.5,
|
|
"valid_targets_min": 241
|
|
},
|
|
{
|
|
"epoch": 5.685483870967742,
|
|
"grad_norm": 1.0056395165725616,
|
|
"learning_rate": 4.154992348519698e-06,
|
|
"loss": 0.101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08381040394306183,
|
|
"step": 3525,
|
|
"valid_targets_mean": 2315.6,
|
|
"valid_targets_min": 187
|
|
},
|
|
{
|
|
"epoch": 5.693548387096774,
|
|
"grad_norm": 0.8650356965717153,
|
|
"learning_rate": 4.106042580248084e-06,
|
|
"loss": 0.1272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12917710840702057,
|
|
"step": 3530,
|
|
"valid_targets_mean": 2781.4,
|
|
"valid_targets_min": 406
|
|
},
|
|
{
|
|
"epoch": 5.701612903225806,
|
|
"grad_norm": 0.9881804060681364,
|
|
"learning_rate": 4.057349855541557e-06,
|
|
"loss": 0.1146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11400888860225677,
|
|
"step": 3535,
|
|
"valid_targets_mean": 2776.8,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 5.709677419354839,
|
|
"grad_norm": 0.9821141795540693,
|
|
"learning_rate": 4.00891496187871e-06,
|
|
"loss": 0.1227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1292840838432312,
|
|
"step": 3540,
|
|
"valid_targets_mean": 2789.3,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 5.717741935483871,
|
|
"grad_norm": 1.1921938138691928,
|
|
"learning_rate": 3.9607386825684256e-06,
|
|
"loss": 0.1197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11726173013448715,
|
|
"step": 3545,
|
|
"valid_targets_mean": 2273.4,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 5.725806451612903,
|
|
"grad_norm": 0.9571900848570131,
|
|
"learning_rate": 3.9128217967371515e-06,
|
|
"loss": 0.106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08216507732868195,
|
|
"step": 3550,
|
|
"valid_targets_mean": 2508.5,
|
|
"valid_targets_min": 339
|
|
},
|
|
{
|
|
"epoch": 5.733870967741936,
|
|
"grad_norm": 0.9780051810348152,
|
|
"learning_rate": 3.865165079316308e-06,
|
|
"loss": 0.122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10140609741210938,
|
|
"step": 3555,
|
|
"valid_targets_mean": 2780.0,
|
|
"valid_targets_min": 1002
|
|
},
|
|
{
|
|
"epoch": 5.741935483870968,
|
|
"grad_norm": 1.0179367941340487,
|
|
"learning_rate": 3.817769301029781e-06,
|
|
"loss": 0.1164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14184284210205078,
|
|
"step": 3560,
|
|
"valid_targets_mean": 2581.3,
|
|
"valid_targets_min": 318
|
|
},
|
|
{
|
|
"epoch": 5.75,
|
|
"grad_norm": 0.9373610589114464,
|
|
"learning_rate": 3.7706352283814387e-06,
|
|
"loss": 0.1284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12259986996650696,
|
|
"step": 3565,
|
|
"valid_targets_mean": 2606.8,
|
|
"valid_targets_min": 457
|
|
},
|
|
{
|
|
"epoch": 5.758064516129032,
|
|
"grad_norm": 1.2014203218717068,
|
|
"learning_rate": 3.7237636236427397e-06,
|
|
"loss": 0.1238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11834845691919327,
|
|
"step": 3570,
|
|
"valid_targets_mean": 1936.8,
|
|
"valid_targets_min": 165
|
|
},
|
|
{
|
|
"epoch": 5.766129032258064,
|
|
"grad_norm": 0.9394301296767549,
|
|
"learning_rate": 3.6771552448403935e-06,
|
|
"loss": 0.1092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12579339742660522,
|
|
"step": 3575,
|
|
"valid_targets_mean": 2930.5,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 5.774193548387097,
|
|
"grad_norm": 0.7547992264928276,
|
|
"learning_rate": 3.630810845744128e-06,
|
|
"loss": 0.1186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10460078716278076,
|
|
"step": 3580,
|
|
"valid_targets_mean": 3377.2,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 5.782258064516129,
|
|
"grad_norm": 0.9485662229652074,
|
|
"learning_rate": 3.584731175854479e-06,
|
|
"loss": 0.1109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12333004921674728,
|
|
"step": 3585,
|
|
"valid_targets_mean": 2426.4,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 5.790322580645161,
|
|
"grad_norm": 1.069212843970159,
|
|
"learning_rate": 3.5389169803906566e-06,
|
|
"loss": 0.1165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1323833167552948,
|
|
"step": 3590,
|
|
"valid_targets_mean": 2114.5,
|
|
"valid_targets_min": 348
|
|
},
|
|
{
|
|
"epoch": 5.798387096774194,
|
|
"grad_norm": 0.9747630612186717,
|
|
"learning_rate": 3.4933690002785414e-06,
|
|
"loss": 0.1015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12368382513523102,
|
|
"step": 3595,
|
|
"valid_targets_mean": 2971.9,
|
|
"valid_targets_min": 254
|
|
},
|
|
{
|
|
"epoch": 5.806451612903226,
|
|
"grad_norm": 1.1047309223314667,
|
|
"learning_rate": 3.448087972138654e-06,
|
|
"loss": 0.1252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1193995475769043,
|
|
"step": 3600,
|
|
"valid_targets_mean": 2382.1,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 5.814516129032258,
|
|
"grad_norm": 0.9343162170369842,
|
|
"learning_rate": 3.4030746282742455e-06,
|
|
"loss": 0.1131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11362332105636597,
|
|
"step": 3605,
|
|
"valid_targets_mean": 3065.6,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 5.82258064516129,
|
|
"grad_norm": 1.077402531437745,
|
|
"learning_rate": 3.3583296966594904e-06,
|
|
"loss": 0.1122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13400667905807495,
|
|
"step": 3610,
|
|
"valid_targets_mean": 2511.6,
|
|
"valid_targets_min": 787
|
|
},
|
|
{
|
|
"epoch": 5.830645161290323,
|
|
"grad_norm": 0.862783767803568,
|
|
"learning_rate": 3.313853900927679e-06,
|
|
"loss": 0.1062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07515303045511246,
|
|
"step": 3615,
|
|
"valid_targets_mean": 2678.0,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 5.838709677419355,
|
|
"grad_norm": 0.9430780004274277,
|
|
"learning_rate": 3.269647960359532e-06,
|
|
"loss": 0.1304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10815194994211197,
|
|
"step": 3620,
|
|
"valid_targets_mean": 3063.9,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 5.846774193548387,
|
|
"grad_norm": 0.9607012461747731,
|
|
"learning_rate": 3.2257125898715547e-06,
|
|
"loss": 0.123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11267538368701935,
|
|
"step": 3625,
|
|
"valid_targets_mean": 2931.1,
|
|
"valid_targets_min": 170
|
|
},
|
|
{
|
|
"epoch": 5.854838709677419,
|
|
"grad_norm": 0.9335699523556695,
|
|
"learning_rate": 3.1820485000044867e-06,
|
|
"loss": 0.1171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1256910264492035,
|
|
"step": 3630,
|
|
"valid_targets_mean": 2649.9,
|
|
"valid_targets_min": 162
|
|
},
|
|
{
|
|
"epoch": 5.862903225806452,
|
|
"grad_norm": 0.8997973008251309,
|
|
"learning_rate": 3.1386563969118076e-06,
|
|
"loss": 0.1232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11587494611740112,
|
|
"step": 3635,
|
|
"valid_targets_mean": 2300.8,
|
|
"valid_targets_min": 276
|
|
},
|
|
{
|
|
"epoch": 5.870967741935484,
|
|
"grad_norm": 1.08473090763604,
|
|
"learning_rate": 3.0955369823483173e-06,
|
|
"loss": 0.1298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12131787836551666,
|
|
"step": 3640,
|
|
"valid_targets_mean": 2429.9,
|
|
"valid_targets_min": 200
|
|
},
|
|
{
|
|
"epoch": 5.879032258064516,
|
|
"grad_norm": 1.0475071527101079,
|
|
"learning_rate": 3.0526909536587813e-06,
|
|
"loss": 0.1004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09543988108634949,
|
|
"step": 3645,
|
|
"valid_targets_mean": 2795.1,
|
|
"valid_targets_min": 242
|
|
},
|
|
{
|
|
"epoch": 5.887096774193548,
|
|
"grad_norm": 0.9637105907051934,
|
|
"learning_rate": 3.010119003766665e-06,
|
|
"loss": 0.1038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0938195288181305,
|
|
"step": 3650,
|
|
"valid_targets_mean": 2354.1,
|
|
"valid_targets_min": 275
|
|
},
|
|
{
|
|
"epoch": 5.895161290322581,
|
|
"grad_norm": 0.8634791774905152,
|
|
"learning_rate": 2.967821821162904e-06,
|
|
"loss": 0.1031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0677330493927002,
|
|
"step": 3655,
|
|
"valid_targets_mean": 2358.2,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 5.903225806451613,
|
|
"grad_norm": 0.6981920539391867,
|
|
"learning_rate": 2.925800089894801e-06,
|
|
"loss": 0.0781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056576624512672424,
|
|
"step": 3660,
|
|
"valid_targets_mean": 3303.3,
|
|
"valid_targets_min": 254
|
|
},
|
|
{
|
|
"epoch": 5.911290322580645,
|
|
"grad_norm": 0.8731369435316495,
|
|
"learning_rate": 2.8840544895549396e-06,
|
|
"loss": 0.1174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11164005845785141,
|
|
"step": 3665,
|
|
"valid_targets_mean": 3083.4,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 5.919354838709677,
|
|
"grad_norm": 0.9502883110731841,
|
|
"learning_rate": 2.8425856952702103e-06,
|
|
"loss": 0.1208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11415612697601318,
|
|
"step": 3670,
|
|
"valid_targets_mean": 2329.6,
|
|
"valid_targets_min": 226
|
|
},
|
|
{
|
|
"epoch": 5.92741935483871,
|
|
"grad_norm": 1.0718832040488573,
|
|
"learning_rate": 2.801394377690865e-06,
|
|
"loss": 0.1111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12924499809741974,
|
|
"step": 3675,
|
|
"valid_targets_mean": 2604.1,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 5.935483870967742,
|
|
"grad_norm": 0.8880100279189946,
|
|
"learning_rate": 2.7604812029797057e-06,
|
|
"loss": 0.1061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11932685971260071,
|
|
"step": 3680,
|
|
"valid_targets_mean": 3256.9,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 5.943548387096774,
|
|
"grad_norm": 0.8455562147687403,
|
|
"learning_rate": 2.719846832801287e-06,
|
|
"loss": 0.1055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11557763814926147,
|
|
"step": 3685,
|
|
"valid_targets_mean": 2976.0,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 5.951612903225806,
|
|
"grad_norm": 0.9799061326211842,
|
|
"learning_rate": 2.679491924311226e-06,
|
|
"loss": 0.092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10677024722099304,
|
|
"step": 3690,
|
|
"valid_targets_mean": 2568.4,
|
|
"valid_targets_min": 297
|
|
},
|
|
{
|
|
"epoch": 5.959677419354839,
|
|
"grad_norm": 0.7912210244894068,
|
|
"learning_rate": 2.6394171301455717e-06,
|
|
"loss": 0.126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10999710857868195,
|
|
"step": 3695,
|
|
"valid_targets_mean": 3541.1,
|
|
"valid_targets_min": 426
|
|
},
|
|
{
|
|
"epoch": 5.967741935483871,
|
|
"grad_norm": 0.9793435876080762,
|
|
"learning_rate": 2.599623098410251e-06,
|
|
"loss": 0.1101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1141715794801712,
|
|
"step": 3700,
|
|
"valid_targets_mean": 2764.5,
|
|
"valid_targets_min": 224
|
|
},
|
|
{
|
|
"epoch": 5.975806451612903,
|
|
"grad_norm": 0.8730841957738629,
|
|
"learning_rate": 2.5601104726705737e-06,
|
|
"loss": 0.1159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08468244224786758,
|
|
"step": 3705,
|
|
"valid_targets_mean": 2779.4,
|
|
"valid_targets_min": 242
|
|
},
|
|
{
|
|
"epoch": 5.983870967741936,
|
|
"grad_norm": 1.1304774153044528,
|
|
"learning_rate": 2.5208798919408527e-06,
|
|
"loss": 0.1048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09466509521007538,
|
|
"step": 3710,
|
|
"valid_targets_mean": 2970.1,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 5.991935483870968,
|
|
"grad_norm": 0.956092418461868,
|
|
"learning_rate": 2.481931990674047e-06,
|
|
"loss": 0.1031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11746788024902344,
|
|
"step": 3715,
|
|
"valid_targets_mean": 2507.4,
|
|
"valid_targets_min": 316
|
|
},
|
|
{
|
|
"epoch": 6.0,
|
|
"grad_norm": 0.8880681843106466,
|
|
"learning_rate": 2.4432673987515123e-06,
|
|
"loss": 0.1225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10720624029636383,
|
|
"step": 3720,
|
|
"valid_targets_mean": 2967.3,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 6.008064516129032,
|
|
"grad_norm": 0.653709945113357,
|
|
"learning_rate": 2.4048867414728004e-06,
|
|
"loss": 0.0895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04550575464963913,
|
|
"step": 3725,
|
|
"valid_targets_mean": 3180.4,
|
|
"valid_targets_min": 178
|
|
},
|
|
{
|
|
"epoch": 6.016129032258065,
|
|
"grad_norm": 1.0514272334258081,
|
|
"learning_rate": 2.3667906395455663e-06,
|
|
"loss": 0.108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08894272148609161,
|
|
"step": 3730,
|
|
"valid_targets_mean": 1810.0,
|
|
"valid_targets_min": 170
|
|
},
|
|
{
|
|
"epoch": 6.024193548387097,
|
|
"grad_norm": 1.0903745714809925,
|
|
"learning_rate": 2.328979709075516e-06,
|
|
"loss": 0.0953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11905939877033234,
|
|
"step": 3735,
|
|
"valid_targets_mean": 2205.4,
|
|
"valid_targets_min": 288
|
|
},
|
|
{
|
|
"epoch": 6.032258064516129,
|
|
"grad_norm": 0.8119635286081439,
|
|
"learning_rate": 2.2914545615564454e-06,
|
|
"loss": 0.1129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12603063881397247,
|
|
"step": 3740,
|
|
"valid_targets_mean": 3519.4,
|
|
"valid_targets_min": 856
|
|
},
|
|
{
|
|
"epoch": 6.040322580645161,
|
|
"grad_norm": 0.9518485269950627,
|
|
"learning_rate": 2.254215803860351e-06,
|
|
"loss": 0.0963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12654322385787964,
|
|
"step": 3745,
|
|
"valid_targets_mean": 2507.4,
|
|
"valid_targets_min": 302
|
|
},
|
|
{
|
|
"epoch": 6.048387096774194,
|
|
"grad_norm": 0.8469612348658618,
|
|
"learning_rate": 2.2172640382276267e-06,
|
|
"loss": 0.0917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10160379111766815,
|
|
"step": 3750,
|
|
"valid_targets_mean": 2764.6,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 6.056451612903226,
|
|
"grad_norm": 0.881645847011186,
|
|
"learning_rate": 2.180599862257291e-06,
|
|
"loss": 0.0964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10982454568147659,
|
|
"step": 3755,
|
|
"valid_targets_mean": 3121.2,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 6.064516129032258,
|
|
"grad_norm": 0.8686719486218358,
|
|
"learning_rate": 2.1442238688973682e-06,
|
|
"loss": 0.0972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09625842422246933,
|
|
"step": 3760,
|
|
"valid_targets_mean": 2910.8,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 6.07258064516129,
|
|
"grad_norm": 0.9398421939855708,
|
|
"learning_rate": 2.1081366464352614e-06,
|
|
"loss": 0.1052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08295704424381256,
|
|
"step": 3765,
|
|
"valid_targets_mean": 2366.5,
|
|
"valid_targets_min": 187
|
|
},
|
|
{
|
|
"epoch": 6.080645161290323,
|
|
"grad_norm": 0.7546796049746096,
|
|
"learning_rate": 2.0723387784882674e-06,
|
|
"loss": 0.1027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1132131814956665,
|
|
"step": 3770,
|
|
"valid_targets_mean": 3513.7,
|
|
"valid_targets_min": 293
|
|
},
|
|
{
|
|
"epoch": 6.088709677419355,
|
|
"grad_norm": 0.8284220646739447,
|
|
"learning_rate": 2.036830843994102e-06,
|
|
"loss": 0.102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060089051723480225,
|
|
"step": 3775,
|
|
"valid_targets_mean": 2583.0,
|
|
"valid_targets_min": 341
|
|
},
|
|
{
|
|
"epoch": 6.096774193548387,
|
|
"grad_norm": 0.8809734945848332,
|
|
"learning_rate": 2.0016134172015755e-06,
|
|
"loss": 0.0927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09762517362833023,
|
|
"step": 3780,
|
|
"valid_targets_mean": 3083.7,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 6.104838709677419,
|
|
"grad_norm": 1.0004192931661953,
|
|
"learning_rate": 1.9666870676612883e-06,
|
|
"loss": 0.1059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11415060609579086,
|
|
"step": 3785,
|
|
"valid_targets_mean": 3116.8,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 6.112903225806452,
|
|
"grad_norm": 0.8978740757571727,
|
|
"learning_rate": 1.9320523602164145e-06,
|
|
"loss": 0.0836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08282393217086792,
|
|
"step": 3790,
|
|
"valid_targets_mean": 2849.4,
|
|
"valid_targets_min": 316
|
|
},
|
|
{
|
|
"epoch": 6.120967741935484,
|
|
"grad_norm": 0.8248072863951421,
|
|
"learning_rate": 1.8977098549935745e-06,
|
|
"loss": 0.109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09873765707015991,
|
|
"step": 3795,
|
|
"valid_targets_mean": 2941.9,
|
|
"valid_targets_min": 376
|
|
},
|
|
{
|
|
"epoch": 6.129032258064516,
|
|
"grad_norm": 0.9178353627116477,
|
|
"learning_rate": 1.8636601073937855e-06,
|
|
"loss": 0.1087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14534446597099304,
|
|
"step": 3800,
|
|
"valid_targets_mean": 2892.6,
|
|
"valid_targets_min": 178
|
|
},
|
|
{
|
|
"epoch": 6.137096774193548,
|
|
"grad_norm": 0.8268503541565092,
|
|
"learning_rate": 1.8299036680834459e-06,
|
|
"loss": 0.0868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07433824986219406,
|
|
"step": 3805,
|
|
"valid_targets_mean": 2245.9,
|
|
"valid_targets_min": 242
|
|
},
|
|
{
|
|
"epoch": 6.145161290322581,
|
|
"grad_norm": 1.2051322706821608,
|
|
"learning_rate": 1.796441082985476e-06,
|
|
"loss": 0.1016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10102617740631104,
|
|
"step": 3810,
|
|
"valid_targets_mean": 1585.5,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 6.153225806451613,
|
|
"grad_norm": 0.8055537601440061,
|
|
"learning_rate": 1.763272893270458e-06,
|
|
"loss": 0.0959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08175353705883026,
|
|
"step": 3815,
|
|
"valid_targets_mean": 2450.9,
|
|
"valid_targets_min": 242
|
|
},
|
|
{
|
|
"epoch": 6.161290322580645,
|
|
"grad_norm": 1.1555176799721592,
|
|
"learning_rate": 1.7303996353478837e-06,
|
|
"loss": 0.1086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12794934213161469,
|
|
"step": 3820,
|
|
"valid_targets_mean": 2041.8,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 6.169354838709677,
|
|
"grad_norm": 0.9072433215793436,
|
|
"learning_rate": 1.6978218408574943e-06,
|
|
"loss": 0.0898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0873514786362648,
|
|
"step": 3825,
|
|
"valid_targets_mean": 2325.8,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 6.17741935483871,
|
|
"grad_norm": 0.7365683666375045,
|
|
"learning_rate": 1.6655400366606867e-06,
|
|
"loss": 0.0977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09240084886550903,
|
|
"step": 3830,
|
|
"valid_targets_mean": 3508.6,
|
|
"valid_targets_min": 149
|
|
},
|
|
{
|
|
"epoch": 6.185483870967742,
|
|
"grad_norm": 1.049695657102972,
|
|
"learning_rate": 1.6335547448319622e-06,
|
|
"loss": 0.1012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12014150619506836,
|
|
"step": 3835,
|
|
"valid_targets_mean": 2273.7,
|
|
"valid_targets_min": 242
|
|
},
|
|
{
|
|
"epoch": 6.193548387096774,
|
|
"grad_norm": 1.1406857065393055,
|
|
"learning_rate": 1.6018664826505114e-06,
|
|
"loss": 0.0838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10282783210277557,
|
|
"step": 3840,
|
|
"valid_targets_mean": 2082.6,
|
|
"valid_targets_min": 181
|
|
},
|
|
{
|
|
"epoch": 6.201612903225806,
|
|
"grad_norm": 0.9571517853320788,
|
|
"learning_rate": 1.5704757625918454e-06,
|
|
"loss": 0.1129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1430928111076355,
|
|
"step": 3845,
|
|
"valid_targets_mean": 2492.4,
|
|
"valid_targets_min": 187
|
|
},
|
|
{
|
|
"epoch": 6.209677419354839,
|
|
"grad_norm": 0.8438716100705801,
|
|
"learning_rate": 1.539383092319502e-06,
|
|
"loss": 0.1008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08023163676261902,
|
|
"step": 3850,
|
|
"valid_targets_mean": 2707.0,
|
|
"valid_targets_min": 181
|
|
},
|
|
{
|
|
"epoch": 6.217741935483871,
|
|
"grad_norm": 0.8344507995164966,
|
|
"learning_rate": 1.5085889746768211e-06,
|
|
"loss": 0.0997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09152617305517197,
|
|
"step": 3855,
|
|
"valid_targets_mean": 2995.1,
|
|
"valid_targets_min": 267
|
|
},
|
|
{
|
|
"epoch": 6.225806451612903,
|
|
"grad_norm": 0.7589693262791405,
|
|
"learning_rate": 1.478093907678848e-06,
|
|
"loss": 0.1188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0859871357679367,
|
|
"step": 3860,
|
|
"valid_targets_mean": 3330.4,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 6.233870967741935,
|
|
"grad_norm": 1.1039283348358595,
|
|
"learning_rate": 1.4478983845042493e-06,
|
|
"loss": 0.1219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11936347186565399,
|
|
"step": 3865,
|
|
"valid_targets_mean": 1997.6,
|
|
"valid_targets_min": 215
|
|
},
|
|
{
|
|
"epoch": 6.241935483870968,
|
|
"grad_norm": 1.0563726016947035,
|
|
"learning_rate": 1.4180028934873402e-06,
|
|
"loss": 0.0807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11746098101139069,
|
|
"step": 3870,
|
|
"valid_targets_mean": 2180.2,
|
|
"valid_targets_min": 179
|
|
},
|
|
{
|
|
"epoch": 6.25,
|
|
"grad_norm": 0.7623701413409668,
|
|
"learning_rate": 1.3884079181102062e-06,
|
|
"loss": 0.0905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07550927251577377,
|
|
"step": 3875,
|
|
"valid_targets_mean": 3458.4,
|
|
"valid_targets_min": 293
|
|
},
|
|
{
|
|
"epoch": 6.258064516129032,
|
|
"grad_norm": 0.7742124957219088,
|
|
"learning_rate": 1.3591139369948692e-06,
|
|
"loss": 0.1117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11610357463359833,
|
|
"step": 3880,
|
|
"valid_targets_mean": 3673.0,
|
|
"valid_targets_min": 1406
|
|
},
|
|
{
|
|
"epoch": 6.266129032258064,
|
|
"grad_norm": 0.8637524241224023,
|
|
"learning_rate": 1.3301214238955384e-06,
|
|
"loss": 0.108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12006796151399612,
|
|
"step": 3885,
|
|
"valid_targets_mean": 3102.5,
|
|
"valid_targets_min": 319
|
|
},
|
|
{
|
|
"epoch": 6.274193548387097,
|
|
"grad_norm": 1.0178770431430397,
|
|
"learning_rate": 1.3014308476909698e-06,
|
|
"loss": 0.1083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10940100252628326,
|
|
"step": 3890,
|
|
"valid_targets_mean": 2604.4,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 6.282258064516129,
|
|
"grad_norm": 0.8982302298683263,
|
|
"learning_rate": 1.2730426723768673e-06,
|
|
"loss": 0.1084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09276868402957916,
|
|
"step": 3895,
|
|
"valid_targets_mean": 3116.1,
|
|
"valid_targets_min": 920
|
|
},
|
|
{
|
|
"epoch": 6.290322580645161,
|
|
"grad_norm": 1.0780746248518926,
|
|
"learning_rate": 1.244957357058394e-06,
|
|
"loss": 0.1064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1320962905883789,
|
|
"step": 3900,
|
|
"valid_targets_mean": 2351.1,
|
|
"valid_targets_min": 351
|
|
},
|
|
{
|
|
"epoch": 6.298387096774194,
|
|
"grad_norm": 0.8556200206477135,
|
|
"learning_rate": 1.2171753559427168e-06,
|
|
"loss": 0.1006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07612516731023788,
|
|
"step": 3905,
|
|
"valid_targets_mean": 2255.8,
|
|
"valid_targets_min": 389
|
|
},
|
|
{
|
|
"epoch": 6.306451612903226,
|
|
"grad_norm": 1.337707707465035,
|
|
"learning_rate": 1.189697118331703e-06,
|
|
"loss": 0.1019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13546554744243622,
|
|
"step": 3910,
|
|
"valid_targets_mean": 2060.2,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 6.314516129032258,
|
|
"grad_norm": 0.814352087793429,
|
|
"learning_rate": 1.1625230886146245e-06,
|
|
"loss": 0.1185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10286865383386612,
|
|
"step": 3915,
|
|
"valid_targets_mean": 3382.2,
|
|
"valid_targets_min": 1298
|
|
},
|
|
{
|
|
"epoch": 6.32258064516129,
|
|
"grad_norm": 0.7550853367967078,
|
|
"learning_rate": 1.1356537062609706e-06,
|
|
"loss": 0.1072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07276131957769394,
|
|
"step": 3920,
|
|
"valid_targets_mean": 2782.2,
|
|
"valid_targets_min": 288
|
|
},
|
|
{
|
|
"epoch": 6.330645161290323,
|
|
"grad_norm": 0.8378986431535546,
|
|
"learning_rate": 1.109089405813366e-06,
|
|
"loss": 0.0924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07647045701742172,
|
|
"step": 3925,
|
|
"valid_targets_mean": 2851.2,
|
|
"valid_targets_min": 224
|
|
},
|
|
{
|
|
"epoch": 6.338709677419355,
|
|
"grad_norm": 0.9315121710552545,
|
|
"learning_rate": 1.0828306168805148e-06,
|
|
"loss": 0.0975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08981286734342575,
|
|
"step": 3930,
|
|
"valid_targets_mean": 3160.8,
|
|
"valid_targets_min": 304
|
|
},
|
|
{
|
|
"epoch": 6.346774193548387,
|
|
"grad_norm": 0.8079433257124655,
|
|
"learning_rate": 1.0568777641302663e-06,
|
|
"loss": 0.1035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08632143586874008,
|
|
"step": 3935,
|
|
"valid_targets_mean": 2958.3,
|
|
"valid_targets_min": 220
|
|
},
|
|
{
|
|
"epoch": 6.354838709677419,
|
|
"grad_norm": 1.001893881041316,
|
|
"learning_rate": 1.0312312672827417e-06,
|
|
"loss": 0.1134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12303528189659119,
|
|
"step": 3940,
|
|
"valid_targets_mean": 2596.1,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 6.362903225806452,
|
|
"grad_norm": 0.9836231862846007,
|
|
"learning_rate": 1.0058915411035586e-06,
|
|
"loss": 0.1104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11340214312076569,
|
|
"step": 3945,
|
|
"valid_targets_mean": 2857.9,
|
|
"valid_targets_min": 379
|
|
},
|
|
{
|
|
"epoch": 6.370967741935484,
|
|
"grad_norm": 0.9618343233101341,
|
|
"learning_rate": 9.808589953971092e-07,
|
|
"loss": 0.0974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10142101347446442,
|
|
"step": 3950,
|
|
"valid_targets_mean": 2454.5,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 6.379032258064516,
|
|
"grad_norm": 1.0186561589481962,
|
|
"learning_rate": 9.561340349999315e-07,
|
|
"loss": 0.0972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1158541589975357,
|
|
"step": 3955,
|
|
"valid_targets_mean": 2507.4,
|
|
"valid_targets_min": 192
|
|
},
|
|
{
|
|
"epoch": 6.387096774193548,
|
|
"grad_norm": 0.986884571453095,
|
|
"learning_rate": 9.317170597741798e-07,
|
|
"loss": 0.1239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1477774828672409,
|
|
"step": 3960,
|
|
"valid_targets_mean": 2678.1,
|
|
"valid_targets_min": 315
|
|
},
|
|
{
|
|
"epoch": 6.395161290322581,
|
|
"grad_norm": 0.6516412438561562,
|
|
"learning_rate": 9.076084646011418e-07,
|
|
"loss": 0.1,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08538518846035004,
|
|
"step": 3965,
|
|
"valid_targets_mean": 3736.5,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 6.403225806451613,
|
|
"grad_norm": 0.8894786919196126,
|
|
"learning_rate": 8.83808639374848e-07,
|
|
"loss": 0.114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1639164686203003,
|
|
"step": 3970,
|
|
"valid_targets_mean": 3082.1,
|
|
"valid_targets_min": 177
|
|
},
|
|
{
|
|
"epoch": 6.411290322580645,
|
|
"grad_norm": 0.8022056045579493,
|
|
"learning_rate": 8.60317968995792e-07,
|
|
"loss": 0.1044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10185885429382324,
|
|
"step": 3975,
|
|
"valid_targets_mean": 3011.6,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 6.419354838709677,
|
|
"grad_norm": 0.8855039623998457,
|
|
"learning_rate": 8.371368333646823e-07,
|
|
"loss": 0.1111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09152577817440033,
|
|
"step": 3980,
|
|
"valid_targets_mean": 2967.3,
|
|
"valid_targets_min": 224
|
|
},
|
|
{
|
|
"epoch": 6.42741935483871,
|
|
"grad_norm": 1.110316259112958,
|
|
"learning_rate": 8.142656073762944e-07,
|
|
"loss": 0.1138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13550147414207458,
|
|
"step": 3985,
|
|
"valid_targets_mean": 2586.4,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 6.435483870967742,
|
|
"grad_norm": 0.8375751587045906,
|
|
"learning_rate": 7.917046609134349e-07,
|
|
"loss": 0.0969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07988212257623672,
|
|
"step": 3990,
|
|
"valid_targets_mean": 3246.0,
|
|
"valid_targets_min": 833
|
|
},
|
|
{
|
|
"epoch": 6.443548387096774,
|
|
"grad_norm": 0.9547161642712011,
|
|
"learning_rate": 7.694543588409353e-07,
|
|
"loss": 0.1119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.169877827167511,
|
|
"step": 3995,
|
|
"valid_targets_mean": 2686.7,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 6.451612903225806,
|
|
"grad_norm": 0.9860055048941169,
|
|
"learning_rate": 7.475150609997595e-07,
|
|
"loss": 0.0991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09808245301246643,
|
|
"step": 4000,
|
|
"valid_targets_mean": 2322.4,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 6.459677419354839,
|
|
"grad_norm": 0.9016172065093397,
|
|
"learning_rate": 7.258871222011832e-07,
|
|
"loss": 0.0924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09027966856956482,
|
|
"step": 4005,
|
|
"valid_targets_mean": 2839.9,
|
|
"valid_targets_min": 280
|
|
},
|
|
{
|
|
"epoch": 6.467741935483871,
|
|
"grad_norm": 0.9186800535476021,
|
|
"learning_rate": 7.045708922210615e-07,
|
|
"loss": 0.1027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07926385849714279,
|
|
"step": 4010,
|
|
"valid_targets_mean": 2187.3,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 6.475806451612903,
|
|
"grad_norm": 0.8766197021578271,
|
|
"learning_rate": 6.835667157941594e-07,
|
|
"loss": 0.1065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10170367360115051,
|
|
"step": 4015,
|
|
"valid_targets_mean": 3418.7,
|
|
"valid_targets_min": 476
|
|
},
|
|
{
|
|
"epoch": 6.483870967741936,
|
|
"grad_norm": 1.0962468498466758,
|
|
"learning_rate": 6.628749326085926e-07,
|
|
"loss": 0.1058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13843509554862976,
|
|
"step": 4020,
|
|
"valid_targets_mean": 2540.9,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 6.491935483870968,
|
|
"grad_norm": 0.8845495218294658,
|
|
"learning_rate": 6.424958773003198e-07,
|
|
"loss": 0.096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13085493445396423,
|
|
"step": 4025,
|
|
"valid_targets_mean": 3274.2,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 6.5,
|
|
"grad_norm": 1.0581926243994473,
|
|
"learning_rate": 6.224298794477434e-07,
|
|
"loss": 0.0953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09745995700359344,
|
|
"step": 4030,
|
|
"valid_targets_mean": 2170.9,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 6.508064516129032,
|
|
"grad_norm": 0.8393928040387246,
|
|
"learning_rate": 6.026772635663647e-07,
|
|
"loss": 0.0968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11127416789531708,
|
|
"step": 4035,
|
|
"valid_targets_mean": 3440.8,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 6.516129032258064,
|
|
"grad_norm": 0.9859888851485699,
|
|
"learning_rate": 5.832383491035499e-07,
|
|
"loss": 0.1173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12367809563875198,
|
|
"step": 4040,
|
|
"valid_targets_mean": 2740.9,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 6.524193548387097,
|
|
"grad_norm": 0.9345302060024089,
|
|
"learning_rate": 5.641134504333546e-07,
|
|
"loss": 0.1087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11320258677005768,
|
|
"step": 4045,
|
|
"valid_targets_mean": 2442.6,
|
|
"valid_targets_min": 504
|
|
},
|
|
{
|
|
"epoch": 6.532258064516129,
|
|
"grad_norm": 0.9587860926591898,
|
|
"learning_rate": 5.453028768514457e-07,
|
|
"loss": 0.1021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13761839270591736,
|
|
"step": 4050,
|
|
"valid_targets_mean": 2639.1,
|
|
"valid_targets_min": 253
|
|
},
|
|
{
|
|
"epoch": 6.540322580645161,
|
|
"grad_norm": 0.8764086050788349,
|
|
"learning_rate": 5.268069325700942e-07,
|
|
"loss": 0.1023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09688013792037964,
|
|
"step": 4055,
|
|
"valid_targets_mean": 3010.8,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 6.548387096774194,
|
|
"grad_norm": 0.9895352862839494,
|
|
"learning_rate": 5.08625916713259e-07,
|
|
"loss": 0.1125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10071180760860443,
|
|
"step": 4060,
|
|
"valid_targets_mean": 2243.3,
|
|
"valid_targets_min": 304
|
|
},
|
|
{
|
|
"epoch": 6.556451612903226,
|
|
"grad_norm": 1.225561163735873,
|
|
"learning_rate": 4.907601233117465e-07,
|
|
"loss": 0.0969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1308695375919342,
|
|
"step": 4065,
|
|
"valid_targets_mean": 1915.1,
|
|
"valid_targets_min": 201
|
|
},
|
|
{
|
|
"epoch": 6.564516129032258,
|
|
"grad_norm": 0.8613716492442585,
|
|
"learning_rate": 4.732098412984565e-07,
|
|
"loss": 0.0933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09116493165493011,
|
|
"step": 4070,
|
|
"valid_targets_mean": 2935.2,
|
|
"valid_targets_min": 196
|
|
},
|
|
{
|
|
"epoch": 6.57258064516129,
|
|
"grad_norm": 1.0429557956780533,
|
|
"learning_rate": 4.559753545037171e-07,
|
|
"loss": 0.1155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11084151268005371,
|
|
"step": 4075,
|
|
"valid_targets_mean": 2394.5,
|
|
"valid_targets_min": 486
|
|
},
|
|
{
|
|
"epoch": 6.580645161290323,
|
|
"grad_norm": 0.8280999620783632,
|
|
"learning_rate": 4.3905694165067735e-07,
|
|
"loss": 0.0988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0851370170712471,
|
|
"step": 4080,
|
|
"valid_targets_mean": 3130.6,
|
|
"valid_targets_min": 965
|
|
},
|
|
{
|
|
"epoch": 6.588709677419355,
|
|
"grad_norm": 1.0636597228631615,
|
|
"learning_rate": 4.2245487635081075e-07,
|
|
"loss": 0.0959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11703017354011536,
|
|
"step": 4085,
|
|
"valid_targets_mean": 2354.5,
|
|
"valid_targets_min": 948
|
|
},
|
|
{
|
|
"epoch": 6.596774193548387,
|
|
"grad_norm": 0.9493770719786321,
|
|
"learning_rate": 4.061694270994965e-07,
|
|
"loss": 0.1058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10230499505996704,
|
|
"step": 4090,
|
|
"valid_targets_mean": 2629.2,
|
|
"valid_targets_min": 197
|
|
},
|
|
{
|
|
"epoch": 6.604838709677419,
|
|
"grad_norm": 0.9309865748244619,
|
|
"learning_rate": 3.9020085727166536e-07,
|
|
"loss": 0.0944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07763160765171051,
|
|
"step": 4095,
|
|
"valid_targets_mean": 2500.9,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 6.612903225806452,
|
|
"grad_norm": 0.8504995553279323,
|
|
"learning_rate": 3.74549425117543e-07,
|
|
"loss": 0.0996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09736315906047821,
|
|
"step": 4100,
|
|
"valid_targets_mean": 3161.8,
|
|
"valid_targets_min": 1279
|
|
},
|
|
{
|
|
"epoch": 6.620967741935484,
|
|
"grad_norm": 0.9769376080548845,
|
|
"learning_rate": 3.5921538375847776e-07,
|
|
"loss": 0.1023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10059460252523422,
|
|
"step": 4105,
|
|
"valid_targets_mean": 2439.7,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 6.629032258064516,
|
|
"grad_norm": 1.0031493088214463,
|
|
"learning_rate": 3.441989811828417e-07,
|
|
"loss": 0.1171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13304896652698517,
|
|
"step": 4110,
|
|
"valid_targets_mean": 2510.5,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 6.637096774193548,
|
|
"grad_norm": 0.8738157859279624,
|
|
"learning_rate": 3.2950046024202485e-07,
|
|
"loss": 0.1074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08905614167451859,
|
|
"step": 4115,
|
|
"valid_targets_mean": 3830.5,
|
|
"valid_targets_min": 326
|
|
},
|
|
{
|
|
"epoch": 6.645161290322581,
|
|
"grad_norm": 0.8083246638581202,
|
|
"learning_rate": 3.1512005864650973e-07,
|
|
"loss": 0.094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06182112544775009,
|
|
"step": 4120,
|
|
"valid_targets_mean": 2673.1,
|
|
"valid_targets_min": 380
|
|
},
|
|
{
|
|
"epoch": 6.653225806451613,
|
|
"grad_norm": 1.1000684173945874,
|
|
"learning_rate": 3.010580089620163e-07,
|
|
"loss": 0.1272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13314281404018402,
|
|
"step": 4125,
|
|
"valid_targets_mean": 2829.8,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 6.661290322580645,
|
|
"grad_norm": 0.7876079558521055,
|
|
"learning_rate": 2.8731453860575185e-07,
|
|
"loss": 0.0854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0726052075624466,
|
|
"step": 4130,
|
|
"valid_targets_mean": 2874.2,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 6.669354838709677,
|
|
"grad_norm": 0.952065629031024,
|
|
"learning_rate": 2.738898698427228e-07,
|
|
"loss": 0.1058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14465448260307312,
|
|
"step": 4135,
|
|
"valid_targets_mean": 2976.6,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 6.67741935483871,
|
|
"grad_norm": 1.1150962684856347,
|
|
"learning_rate": 2.607842197821553e-07,
|
|
"loss": 0.1116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12224879860877991,
|
|
"step": 4140,
|
|
"valid_targets_mean": 2284.4,
|
|
"valid_targets_min": 279
|
|
},
|
|
{
|
|
"epoch": 6.685483870967742,
|
|
"grad_norm": 0.8996422434504083,
|
|
"learning_rate": 2.479978003739669e-07,
|
|
"loss": 0.0968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08970175683498383,
|
|
"step": 4145,
|
|
"valid_targets_mean": 2480.2,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 6.693548387096774,
|
|
"grad_norm": 1.0718859225574244,
|
|
"learning_rate": 2.3553081840535396e-07,
|
|
"loss": 0.1072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15989378094673157,
|
|
"step": 4150,
|
|
"valid_targets_mean": 2743.2,
|
|
"valid_targets_min": 238
|
|
},
|
|
{
|
|
"epoch": 6.701612903225806,
|
|
"grad_norm": 0.9929707720004761,
|
|
"learning_rate": 2.2338347549742956e-07,
|
|
"loss": 0.1159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10933834314346313,
|
|
"step": 4155,
|
|
"valid_targets_mean": 2495.9,
|
|
"valid_targets_min": 215
|
|
},
|
|
{
|
|
"epoch": 6.709677419354839,
|
|
"grad_norm": 0.9702416447647217,
|
|
"learning_rate": 2.115559681019863e-07,
|
|
"loss": 0.0967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08554413914680481,
|
|
"step": 4160,
|
|
"valid_targets_mean": 2559.2,
|
|
"valid_targets_min": 385
|
|
},
|
|
{
|
|
"epoch": 6.717741935483871,
|
|
"grad_norm": 0.7510724262873237,
|
|
"learning_rate": 2.0004848749829663e-07,
|
|
"loss": 0.1078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08912431448698044,
|
|
"step": 4165,
|
|
"valid_targets_mean": 3407.3,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 6.725806451612903,
|
|
"grad_norm": 1.0808508906045105,
|
|
"learning_rate": 1.8886121979003302e-07,
|
|
"loss": 0.0998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11145492643117905,
|
|
"step": 4170,
|
|
"valid_targets_mean": 2143.4,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 6.733870967741936,
|
|
"grad_norm": 1.051703233896765,
|
|
"learning_rate": 1.7799434590225484e-07,
|
|
"loss": 0.1024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10442635416984558,
|
|
"step": 4175,
|
|
"valid_targets_mean": 2232.4,
|
|
"valid_targets_min": 380
|
|
},
|
|
{
|
|
"epoch": 6.741935483870968,
|
|
"grad_norm": 0.898718039781069,
|
|
"learning_rate": 1.6744804157848183e-07,
|
|
"loss": 0.0987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08237645030021667,
|
|
"step": 4180,
|
|
"valid_targets_mean": 3253.7,
|
|
"valid_targets_min": 861
|
|
},
|
|
{
|
|
"epoch": 6.75,
|
|
"grad_norm": 0.9433363778764026,
|
|
"learning_rate": 1.5722247737784968e-07,
|
|
"loss": 0.1128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06485513597726822,
|
|
"step": 4185,
|
|
"valid_targets_mean": 2735.6,
|
|
"valid_targets_min": 252
|
|
},
|
|
{
|
|
"epoch": 6.758064516129032,
|
|
"grad_norm": 0.803638621586576,
|
|
"learning_rate": 1.473178186723545e-07,
|
|
"loss": 0.0954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10735434293746948,
|
|
"step": 4190,
|
|
"valid_targets_mean": 3508.1,
|
|
"valid_targets_min": 1365
|
|
},
|
|
{
|
|
"epoch": 6.766129032258064,
|
|
"grad_norm": 1.0430823175912833,
|
|
"learning_rate": 1.3773422564417939e-07,
|
|
"loss": 0.1133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11260170489549637,
|
|
"step": 4195,
|
|
"valid_targets_mean": 2348.9,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 6.774193548387097,
|
|
"grad_norm": 0.8947232478850037,
|
|
"learning_rate": 1.2847185328310087e-07,
|
|
"loss": 0.0931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09149382263422012,
|
|
"step": 4200,
|
|
"valid_targets_mean": 2356.5,
|
|
"valid_targets_min": 224
|
|
},
|
|
{
|
|
"epoch": 6.782258064516129,
|
|
"grad_norm": 0.8675678935967469,
|
|
"learning_rate": 1.1953085138398656e-07,
|
|
"loss": 0.0872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08889655023813248,
|
|
"step": 4205,
|
|
"valid_targets_mean": 2513.7,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 6.790322580645161,
|
|
"grad_norm": 1.047169671643998,
|
|
"learning_rate": 1.1091136454436602e-07,
|
|
"loss": 0.1138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14637883007526398,
|
|
"step": 4210,
|
|
"valid_targets_mean": 3166.8,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 6.798387096774194,
|
|
"grad_norm": 0.8320768157874395,
|
|
"learning_rate": 1.0261353216209691e-07,
|
|
"loss": 0.11,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0842428207397461,
|
|
"step": 4215,
|
|
"valid_targets_mean": 3135.2,
|
|
"valid_targets_min": 420
|
|
},
|
|
{
|
|
"epoch": 6.806451612903226,
|
|
"grad_norm": 0.926953315368151,
|
|
"learning_rate": 9.463748843311138e-08,
|
|
"loss": 0.1088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13603878021240234,
|
|
"step": 4220,
|
|
"valid_targets_mean": 3156.4,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 6.814516129032258,
|
|
"grad_norm": 0.7935105257195403,
|
|
"learning_rate": 8.698336234924442e-08,
|
|
"loss": 0.096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09205318242311478,
|
|
"step": 4225,
|
|
"valid_targets_mean": 3285.7,
|
|
"valid_targets_min": 924
|
|
},
|
|
{
|
|
"epoch": 6.82258064516129,
|
|
"grad_norm": 1.0866511624648105,
|
|
"learning_rate": 7.965127769614667e-08,
|
|
"loss": 0.1129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1254270076751709,
|
|
"step": 4230,
|
|
"valid_targets_mean": 2613.4,
|
|
"valid_targets_min": 200
|
|
},
|
|
{
|
|
"epoch": 6.830645161290323,
|
|
"grad_norm": 0.9518667593302784,
|
|
"learning_rate": 7.264135305127929e-08,
|
|
"loss": 0.0882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11524783819913864,
|
|
"step": 4235,
|
|
"valid_targets_mean": 2723.8,
|
|
"valid_targets_min": 390
|
|
},
|
|
{
|
|
"epoch": 6.838709677419355,
|
|
"grad_norm": 1.1026534070291392,
|
|
"learning_rate": 6.595370178200666e-08,
|
|
"loss": 0.0949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11620059609413147,
|
|
"step": 4240,
|
|
"valid_targets_mean": 2064.8,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 6.846774193548387,
|
|
"grad_norm": 0.959632490190956,
|
|
"learning_rate": 5.958843204375564e-08,
|
|
"loss": 0.1147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12032244354486465,
|
|
"step": 4245,
|
|
"valid_targets_mean": 2925.9,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 6.854838709677419,
|
|
"grad_norm": 0.9307954329615019,
|
|
"learning_rate": 5.3545646778263575e-08,
|
|
"loss": 0.0994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10153885185718536,
|
|
"step": 4250,
|
|
"valid_targets_mean": 3046.6,
|
|
"valid_targets_min": 514
|
|
},
|
|
{
|
|
"epoch": 6.862903225806452,
|
|
"grad_norm": 0.854016573699672,
|
|
"learning_rate": 4.782544371191966e-08,
|
|
"loss": 0.1001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1235848069190979,
|
|
"step": 4255,
|
|
"valid_targets_mean": 3194.2,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 6.870967741935484,
|
|
"grad_norm": 0.8619940108883142,
|
|
"learning_rate": 4.242791535418178e-08,
|
|
"loss": 0.1055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11621841043233871,
|
|
"step": 4260,
|
|
"valid_targets_mean": 3302.3,
|
|
"valid_targets_min": 481
|
|
},
|
|
{
|
|
"epoch": 6.879032258064516,
|
|
"grad_norm": 0.9144680544879583,
|
|
"learning_rate": 3.7353148996084334e-08,
|
|
"loss": 0.1026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09738455712795258,
|
|
"step": 4265,
|
|
"valid_targets_mean": 2701.0,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 6.887096774193548,
|
|
"grad_norm": 0.9012952866898348,
|
|
"learning_rate": 3.260122670881494e-08,
|
|
"loss": 0.0996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11877933889627457,
|
|
"step": 4270,
|
|
"valid_targets_mean": 3084.2,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 6.895161290322581,
|
|
"grad_norm": 0.9121244437311394,
|
|
"learning_rate": 2.817222534240438e-08,
|
|
"loss": 0.097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10560651123523712,
|
|
"step": 4275,
|
|
"valid_targets_mean": 2815.1,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 6.903225806451613,
|
|
"grad_norm": 0.9801343619000624,
|
|
"learning_rate": 2.4066216524465392e-08,
|
|
"loss": 0.1126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11139892041683197,
|
|
"step": 4280,
|
|
"valid_targets_mean": 2843.1,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 6.911290322580645,
|
|
"grad_norm": 1.0161604288610293,
|
|
"learning_rate": 2.0283266659051338e-08,
|
|
"loss": 0.0795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10963751375675201,
|
|
"step": 4285,
|
|
"valid_targets_mean": 2503.1,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 6.919354838709677,
|
|
"grad_norm": 0.9430494000115871,
|
|
"learning_rate": 1.6823436925572646e-08,
|
|
"loss": 0.106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08313114941120148,
|
|
"step": 4290,
|
|
"valid_targets_mean": 2415.2,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 6.92741935483871,
|
|
"grad_norm": 0.8300093658878771,
|
|
"learning_rate": 1.368678327780204e-08,
|
|
"loss": 0.1108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11185728758573532,
|
|
"step": 4295,
|
|
"valid_targets_mean": 3194.9,
|
|
"valid_targets_min": 423
|
|
},
|
|
{
|
|
"epoch": 6.935483870967742,
|
|
"grad_norm": 1.0756260925161554,
|
|
"learning_rate": 1.0873356442986371e-08,
|
|
"loss": 0.0995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08312395960092545,
|
|
"step": 4300,
|
|
"valid_targets_mean": 2797.4,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 6.943548387096774,
|
|
"grad_norm": 1.084230379715312,
|
|
"learning_rate": 8.383201921011719e-09,
|
|
"loss": 0.1045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12866733968257904,
|
|
"step": 4305,
|
|
"valid_targets_mean": 2527.2,
|
|
"valid_targets_min": 195
|
|
},
|
|
{
|
|
"epoch": 6.951612903225806,
|
|
"grad_norm": 0.9358924501448302,
|
|
"learning_rate": 6.216359983675091e-09,
|
|
"loss": 0.1023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11012262850999832,
|
|
"step": 4310,
|
|
"valid_targets_mean": 2598.6,
|
|
"valid_targets_min": 1065
|
|
},
|
|
{
|
|
"epoch": 6.959677419354839,
|
|
"grad_norm": 0.7748131819854617,
|
|
"learning_rate": 4.372865674024951e-09,
|
|
"loss": 0.1048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08292579650878906,
|
|
"step": 4315,
|
|
"valid_targets_mean": 3479.9,
|
|
"valid_targets_min": 1418
|
|
},
|
|
{
|
|
"epoch": 6.967741935483871,
|
|
"grad_norm": 1.1372282143492654,
|
|
"learning_rate": 2.8527488058038844e-09,
|
|
"loss": 0.109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13180695474147797,
|
|
"step": 4320,
|
|
"valid_targets_mean": 2567.8,
|
|
"valid_targets_min": 233
|
|
},
|
|
{
|
|
"epoch": 6.975806451612903,
|
|
"grad_norm": 0.9688282135556883,
|
|
"learning_rate": 1.6560339629645427e-09,
|
|
"loss": 0.0993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10365772992372513,
|
|
"step": 4325,
|
|
"valid_targets_mean": 2300.3,
|
|
"valid_targets_min": 223
|
|
},
|
|
{
|
|
"epoch": 6.983870967741936,
|
|
"grad_norm": 0.8993769394343382,
|
|
"learning_rate": 7.827404992655219e-10,
|
|
"loss": 0.1145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09686796367168427,
|
|
"step": 4330,
|
|
"valid_targets_mean": 2925.3,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 6.991935483870968,
|
|
"grad_norm": 0.9236214720671814,
|
|
"learning_rate": 2.328825379649402e-10,
|
|
"loss": 0.1128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.126864954829216,
|
|
"step": 4335,
|
|
"valid_targets_mean": 3038.2,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"grad_norm": 0.9583649513591975,
|
|
"learning_rate": 6.4689715939536544e-12,
|
|
"loss": 0.1103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11804352700710297,
|
|
"step": 4340,
|
|
"valid_targets_mean": 2527.1,
|
|
"valid_targets_min": 473
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11804352700710297,
|
|
"step": 4340,
|
|
"total_flos": 1062461680058368.0,
|
|
"train_loss": 0.22651692718816793,
|
|
"train_runtime": 23508.2832,
|
|
"train_samples_per_second": 2.951,
|
|
"train_steps_per_second": 0.185,
|
|
"valid_targets_mean": 2527.1,
|
|
"valid_targets_min": 473
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4340,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1062461680058368.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|