12973 lines
362 KiB
JSON
12973 lines
362 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 5.0,
|
|
"eval_steps": 500,
|
|
"global_step": 1175,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0042643923240938165,
|
|
"grad_norm": 0.923063742588706,
|
|
"learning_rate": 0.0,
|
|
"loss": 1.4073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34478819370269775,
|
|
"step": 1,
|
|
"valid_targets_mean": 16173.8,
|
|
"valid_targets_min": 15319
|
|
},
|
|
{
|
|
"epoch": 0.008528784648187633,
|
|
"grad_norm": 0.9305099084747117,
|
|
"learning_rate": 3.3898305084745766e-07,
|
|
"loss": 1.4239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40313273668289185,
|
|
"step": 2,
|
|
"valid_targets_mean": 16208.1,
|
|
"valid_targets_min": 15263
|
|
},
|
|
{
|
|
"epoch": 0.01279317697228145,
|
|
"grad_norm": 0.9136781561192545,
|
|
"learning_rate": 6.779661016949153e-07,
|
|
"loss": 1.4396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33241310715675354,
|
|
"step": 3,
|
|
"valid_targets_mean": 14086.3,
|
|
"valid_targets_min": 10055
|
|
},
|
|
{
|
|
"epoch": 0.017057569296375266,
|
|
"grad_norm": 0.9292217758184855,
|
|
"learning_rate": 1.016949152542373e-06,
|
|
"loss": 1.4868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3823639154434204,
|
|
"step": 4,
|
|
"valid_targets_mean": 16125.8,
|
|
"valid_targets_min": 14411
|
|
},
|
|
{
|
|
"epoch": 0.021321961620469083,
|
|
"grad_norm": 0.8949993626679958,
|
|
"learning_rate": 1.3559322033898307e-06,
|
|
"loss": 1.4284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39662015438079834,
|
|
"step": 5,
|
|
"valid_targets_mean": 16183.2,
|
|
"valid_targets_min": 15530
|
|
},
|
|
{
|
|
"epoch": 0.0255863539445629,
|
|
"grad_norm": 0.9170130221134415,
|
|
"learning_rate": 1.6949152542372882e-06,
|
|
"loss": 1.4277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3468186855316162,
|
|
"step": 6,
|
|
"valid_targets_mean": 15146.0,
|
|
"valid_targets_min": 13188
|
|
},
|
|
{
|
|
"epoch": 0.029850746268656716,
|
|
"grad_norm": 0.9023047718783697,
|
|
"learning_rate": 2.033898305084746e-06,
|
|
"loss": 1.4014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4160924553871155,
|
|
"step": 7,
|
|
"valid_targets_mean": 15960.2,
|
|
"valid_targets_min": 14408
|
|
},
|
|
{
|
|
"epoch": 0.03411513859275053,
|
|
"grad_norm": 0.9050874165127887,
|
|
"learning_rate": 2.372881355932204e-06,
|
|
"loss": 1.4297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32029926776885986,
|
|
"step": 8,
|
|
"valid_targets_mean": 12870.2,
|
|
"valid_targets_min": 1369
|
|
},
|
|
{
|
|
"epoch": 0.03837953091684435,
|
|
"grad_norm": 0.9231391216221533,
|
|
"learning_rate": 2.7118644067796613e-06,
|
|
"loss": 1.3908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.365803062915802,
|
|
"step": 9,
|
|
"valid_targets_mean": 16124.7,
|
|
"valid_targets_min": 15200
|
|
},
|
|
{
|
|
"epoch": 0.042643923240938165,
|
|
"grad_norm": 0.917774103471896,
|
|
"learning_rate": 3.0508474576271192e-06,
|
|
"loss": 1.4568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4231685400009155,
|
|
"step": 10,
|
|
"valid_targets_mean": 16039.8,
|
|
"valid_targets_min": 14984
|
|
},
|
|
{
|
|
"epoch": 0.046908315565031986,
|
|
"grad_norm": 0.886681586215936,
|
|
"learning_rate": 3.3898305084745763e-06,
|
|
"loss": 1.3883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2462669461965561,
|
|
"step": 11,
|
|
"valid_targets_mean": 9540.6,
|
|
"valid_targets_min": 1871
|
|
},
|
|
{
|
|
"epoch": 0.0511727078891258,
|
|
"grad_norm": 0.8379511629282393,
|
|
"learning_rate": 3.7288135593220342e-06,
|
|
"loss": 1.421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3626353144645691,
|
|
"step": 12,
|
|
"valid_targets_mean": 16114.4,
|
|
"valid_targets_min": 15375
|
|
},
|
|
{
|
|
"epoch": 0.05543710021321962,
|
|
"grad_norm": 0.8486033566175825,
|
|
"learning_rate": 4.067796610169492e-06,
|
|
"loss": 1.3986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3718816637992859,
|
|
"step": 13,
|
|
"valid_targets_mean": 16104.5,
|
|
"valid_targets_min": 14667
|
|
},
|
|
{
|
|
"epoch": 0.05970149253731343,
|
|
"grad_norm": 0.8118381624965181,
|
|
"learning_rate": 4.40677966101695e-06,
|
|
"loss": 1.413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2754650115966797,
|
|
"step": 14,
|
|
"valid_targets_mean": 10814.3,
|
|
"valid_targets_min": 6238
|
|
},
|
|
{
|
|
"epoch": 0.06396588486140725,
|
|
"grad_norm": 0.7769671005266339,
|
|
"learning_rate": 4.745762711864408e-06,
|
|
"loss": 1.416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3721126616001129,
|
|
"step": 15,
|
|
"valid_targets_mean": 16076.4,
|
|
"valid_targets_min": 14527
|
|
},
|
|
{
|
|
"epoch": 0.06823027718550106,
|
|
"grad_norm": 0.685613328265006,
|
|
"learning_rate": 5.084745762711865e-06,
|
|
"loss": 1.4456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.423447847366333,
|
|
"step": 16,
|
|
"valid_targets_mean": 16059.7,
|
|
"valid_targets_min": 15092
|
|
},
|
|
{
|
|
"epoch": 0.07249466950959488,
|
|
"grad_norm": 0.6817121647317774,
|
|
"learning_rate": 5.423728813559323e-06,
|
|
"loss": 1.4169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34258681535720825,
|
|
"step": 17,
|
|
"valid_targets_mean": 15675.4,
|
|
"valid_targets_min": 12947
|
|
},
|
|
{
|
|
"epoch": 0.0767590618336887,
|
|
"grad_norm": 0.6622098856601724,
|
|
"learning_rate": 5.7627118644067805e-06,
|
|
"loss": 1.4361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3814476430416107,
|
|
"step": 18,
|
|
"valid_targets_mean": 16112.1,
|
|
"valid_targets_min": 14626
|
|
},
|
|
{
|
|
"epoch": 0.08102345415778252,
|
|
"grad_norm": 0.6318337945589603,
|
|
"learning_rate": 6.1016949152542385e-06,
|
|
"loss": 1.3792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3839952349662781,
|
|
"step": 19,
|
|
"valid_targets_mean": 16181.4,
|
|
"valid_targets_min": 15743
|
|
},
|
|
{
|
|
"epoch": 0.08528784648187633,
|
|
"grad_norm": 0.6241393202344414,
|
|
"learning_rate": 6.440677966101695e-06,
|
|
"loss": 1.4159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36410316824913025,
|
|
"step": 20,
|
|
"valid_targets_mean": 15882.8,
|
|
"valid_targets_min": 14139
|
|
},
|
|
{
|
|
"epoch": 0.08955223880597014,
|
|
"grad_norm": 0.48866718482692234,
|
|
"learning_rate": 6.779661016949153e-06,
|
|
"loss": 1.4111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4058961272239685,
|
|
"step": 21,
|
|
"valid_targets_mean": 16059.0,
|
|
"valid_targets_min": 13761
|
|
},
|
|
{
|
|
"epoch": 0.09381663113006397,
|
|
"grad_norm": 0.5413976262979766,
|
|
"learning_rate": 7.1186440677966106e-06,
|
|
"loss": 1.4042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2942916750907898,
|
|
"step": 22,
|
|
"valid_targets_mean": 11803.9,
|
|
"valid_targets_min": 2574
|
|
},
|
|
{
|
|
"epoch": 0.09808102345415778,
|
|
"grad_norm": 0.5738947650467969,
|
|
"learning_rate": 7.4576271186440685e-06,
|
|
"loss": 1.3554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34572991728782654,
|
|
"step": 23,
|
|
"valid_targets_mean": 16071.7,
|
|
"valid_targets_min": 14586
|
|
},
|
|
{
|
|
"epoch": 0.1023454157782516,
|
|
"grad_norm": 0.5647202365106777,
|
|
"learning_rate": 7.796610169491526e-06,
|
|
"loss": 1.3883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38620564341545105,
|
|
"step": 24,
|
|
"valid_targets_mean": 15971.1,
|
|
"valid_targets_min": 13000
|
|
},
|
|
{
|
|
"epoch": 0.10660980810234541,
|
|
"grad_norm": 0.5342292634157738,
|
|
"learning_rate": 8.135593220338983e-06,
|
|
"loss": 1.3883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26566004753112793,
|
|
"step": 25,
|
|
"valid_targets_mean": 11071.2,
|
|
"valid_targets_min": 2124
|
|
},
|
|
{
|
|
"epoch": 0.11087420042643924,
|
|
"grad_norm": 0.49623625681060596,
|
|
"learning_rate": 8.47457627118644e-06,
|
|
"loss": 1.3642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.353068470954895,
|
|
"step": 26,
|
|
"valid_targets_mean": 16010.8,
|
|
"valid_targets_min": 14802
|
|
},
|
|
{
|
|
"epoch": 0.11513859275053305,
|
|
"grad_norm": 0.47796632331164113,
|
|
"learning_rate": 8.8135593220339e-06,
|
|
"loss": 1.3829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36602669954299927,
|
|
"step": 27,
|
|
"valid_targets_mean": 16068.4,
|
|
"valid_targets_min": 12675
|
|
},
|
|
{
|
|
"epoch": 0.11940298507462686,
|
|
"grad_norm": 0.3877868654192863,
|
|
"learning_rate": 9.152542372881356e-06,
|
|
"loss": 1.3444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28627559542655945,
|
|
"step": 28,
|
|
"valid_targets_mean": 12614.0,
|
|
"valid_targets_min": 9505
|
|
},
|
|
{
|
|
"epoch": 0.12366737739872068,
|
|
"grad_norm": 0.45139066595979044,
|
|
"learning_rate": 9.491525423728815e-06,
|
|
"loss": 1.3899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37706679105758667,
|
|
"step": 29,
|
|
"valid_targets_mean": 16010.6,
|
|
"valid_targets_min": 15016
|
|
},
|
|
{
|
|
"epoch": 0.1279317697228145,
|
|
"grad_norm": 0.48915273459759945,
|
|
"learning_rate": 9.830508474576272e-06,
|
|
"loss": 1.3385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37142670154571533,
|
|
"step": 30,
|
|
"valid_targets_mean": 16120.3,
|
|
"valid_targets_min": 15129
|
|
},
|
|
{
|
|
"epoch": 0.13219616204690832,
|
|
"grad_norm": 0.43636905338915144,
|
|
"learning_rate": 1.016949152542373e-05,
|
|
"loss": 1.3187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3224691152572632,
|
|
"step": 31,
|
|
"valid_targets_mean": 15506.6,
|
|
"valid_targets_min": 13200
|
|
},
|
|
{
|
|
"epoch": 0.13646055437100213,
|
|
"grad_norm": 0.3986941199207519,
|
|
"learning_rate": 1.0508474576271188e-05,
|
|
"loss": 1.3588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.365680992603302,
|
|
"step": 32,
|
|
"valid_targets_mean": 16121.6,
|
|
"valid_targets_min": 14469
|
|
},
|
|
{
|
|
"epoch": 0.14072494669509594,
|
|
"grad_norm": 0.395415560789257,
|
|
"learning_rate": 1.0847457627118645e-05,
|
|
"loss": 1.3173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3040735125541687,
|
|
"step": 33,
|
|
"valid_targets_mean": 13350.7,
|
|
"valid_targets_min": 2354
|
|
},
|
|
{
|
|
"epoch": 0.14498933901918976,
|
|
"grad_norm": 0.41905932089054293,
|
|
"learning_rate": 1.1186440677966102e-05,
|
|
"loss": 1.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31133008003234863,
|
|
"step": 34,
|
|
"valid_targets_mean": 16066.9,
|
|
"valid_targets_min": 15234
|
|
},
|
|
{
|
|
"epoch": 0.14925373134328357,
|
|
"grad_norm": 0.41726730894094605,
|
|
"learning_rate": 1.1525423728813561e-05,
|
|
"loss": 1.2942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3790018558502197,
|
|
"step": 35,
|
|
"valid_targets_mean": 16216.8,
|
|
"valid_targets_min": 15778
|
|
},
|
|
{
|
|
"epoch": 0.1535181236673774,
|
|
"grad_norm": 0.4004453351024151,
|
|
"learning_rate": 1.1864406779661018e-05,
|
|
"loss": 1.3323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2407425343990326,
|
|
"step": 36,
|
|
"valid_targets_mean": 10344.7,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 0.15778251599147122,
|
|
"grad_norm": 0.3809811462207289,
|
|
"learning_rate": 1.2203389830508477e-05,
|
|
"loss": 1.3287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33607640862464905,
|
|
"step": 37,
|
|
"valid_targets_mean": 16148.4,
|
|
"valid_targets_min": 15385
|
|
},
|
|
{
|
|
"epoch": 0.16204690831556504,
|
|
"grad_norm": 0.3060866522711702,
|
|
"learning_rate": 1.2542372881355932e-05,
|
|
"loss": 1.2726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3560906648635864,
|
|
"step": 38,
|
|
"valid_targets_mean": 16172.9,
|
|
"valid_targets_min": 15067
|
|
},
|
|
{
|
|
"epoch": 0.16631130063965885,
|
|
"grad_norm": 0.26276197669558604,
|
|
"learning_rate": 1.288135593220339e-05,
|
|
"loss": 1.352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2517072558403015,
|
|
"step": 39,
|
|
"valid_targets_mean": 10800.8,
|
|
"valid_targets_min": 7521
|
|
},
|
|
{
|
|
"epoch": 0.17057569296375266,
|
|
"grad_norm": 0.2388590846090314,
|
|
"learning_rate": 1.3220338983050848e-05,
|
|
"loss": 1.2841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3392382264137268,
|
|
"step": 40,
|
|
"valid_targets_mean": 16184.2,
|
|
"valid_targets_min": 15125
|
|
},
|
|
{
|
|
"epoch": 0.17484008528784648,
|
|
"grad_norm": 0.29076014202593087,
|
|
"learning_rate": 1.3559322033898305e-05,
|
|
"loss": 1.281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3522093892097473,
|
|
"step": 41,
|
|
"valid_targets_mean": 16164.3,
|
|
"valid_targets_min": 14909
|
|
},
|
|
{
|
|
"epoch": 0.1791044776119403,
|
|
"grad_norm": 0.36503606526714,
|
|
"learning_rate": 1.3898305084745764e-05,
|
|
"loss": 1.3089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30831772089004517,
|
|
"step": 42,
|
|
"valid_targets_mean": 14466.6,
|
|
"valid_targets_min": 12298
|
|
},
|
|
{
|
|
"epoch": 0.18336886993603413,
|
|
"grad_norm": 0.3603484527570734,
|
|
"learning_rate": 1.4237288135593221e-05,
|
|
"loss": 1.2613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3524203300476074,
|
|
"step": 43,
|
|
"valid_targets_mean": 16118.2,
|
|
"valid_targets_min": 15474
|
|
},
|
|
{
|
|
"epoch": 0.18763326226012794,
|
|
"grad_norm": 0.3532914596662034,
|
|
"learning_rate": 1.4576271186440678e-05,
|
|
"loss": 1.2373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31800127029418945,
|
|
"step": 44,
|
|
"valid_targets_mean": 16199.9,
|
|
"valid_targets_min": 14692
|
|
},
|
|
{
|
|
"epoch": 0.19189765458422176,
|
|
"grad_norm": 0.33327703358537636,
|
|
"learning_rate": 1.4915254237288137e-05,
|
|
"loss": 1.3566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3171482980251312,
|
|
"step": 45,
|
|
"valid_targets_mean": 15770.3,
|
|
"valid_targets_min": 12098
|
|
},
|
|
{
|
|
"epoch": 0.19616204690831557,
|
|
"grad_norm": 0.2740791393857246,
|
|
"learning_rate": 1.5254237288135594e-05,
|
|
"loss": 1.2558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33353474736213684,
|
|
"step": 46,
|
|
"valid_targets_mean": 16201.4,
|
|
"valid_targets_min": 14738
|
|
},
|
|
{
|
|
"epoch": 0.20042643923240938,
|
|
"grad_norm": 0.25662623817093577,
|
|
"learning_rate": 1.5593220338983053e-05,
|
|
"loss": 1.3103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27710434794425964,
|
|
"step": 47,
|
|
"valid_targets_mean": 11987.1,
|
|
"valid_targets_min": 3717
|
|
},
|
|
{
|
|
"epoch": 0.2046908315565032,
|
|
"grad_norm": 0.2258648101014383,
|
|
"learning_rate": 1.593220338983051e-05,
|
|
"loss": 1.2326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30634868144989014,
|
|
"step": 48,
|
|
"valid_targets_mean": 16182.0,
|
|
"valid_targets_min": 15499
|
|
},
|
|
{
|
|
"epoch": 0.208955223880597,
|
|
"grad_norm": 0.23047151696710227,
|
|
"learning_rate": 1.6271186440677967e-05,
|
|
"loss": 1.2993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38019201159477234,
|
|
"step": 49,
|
|
"valid_targets_mean": 16076.0,
|
|
"valid_targets_min": 15089
|
|
},
|
|
{
|
|
"epoch": 0.21321961620469082,
|
|
"grad_norm": 0.22379498287302,
|
|
"learning_rate": 1.6610169491525424e-05,
|
|
"loss": 1.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19615648686885834,
|
|
"step": 50,
|
|
"valid_targets_mean": 7794.0,
|
|
"valid_targets_min": 1533
|
|
},
|
|
{
|
|
"epoch": 0.21748400852878466,
|
|
"grad_norm": 0.22630578812039112,
|
|
"learning_rate": 1.694915254237288e-05,
|
|
"loss": 1.2254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30712956190109253,
|
|
"step": 51,
|
|
"valid_targets_mean": 16192.3,
|
|
"valid_targets_min": 15763
|
|
},
|
|
{
|
|
"epoch": 0.22174840085287847,
|
|
"grad_norm": 0.22493153616678388,
|
|
"learning_rate": 1.728813559322034e-05,
|
|
"loss": 1.2715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36887258291244507,
|
|
"step": 52,
|
|
"valid_targets_mean": 16103.9,
|
|
"valid_targets_min": 15295
|
|
},
|
|
{
|
|
"epoch": 0.2260127931769723,
|
|
"grad_norm": 0.2168643050612985,
|
|
"learning_rate": 1.76271186440678e-05,
|
|
"loss": 1.2802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2857760787010193,
|
|
"step": 53,
|
|
"valid_targets_mean": 13549.6,
|
|
"valid_targets_min": 10609
|
|
},
|
|
{
|
|
"epoch": 0.2302771855010661,
|
|
"grad_norm": 0.19982814420688294,
|
|
"learning_rate": 1.7966101694915256e-05,
|
|
"loss": 1.2436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.349561870098114,
|
|
"step": 54,
|
|
"valid_targets_mean": 16096.2,
|
|
"valid_targets_min": 14888
|
|
},
|
|
{
|
|
"epoch": 0.2345415778251599,
|
|
"grad_norm": 0.2057131529036208,
|
|
"learning_rate": 1.8305084745762713e-05,
|
|
"loss": 1.2529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3670814335346222,
|
|
"step": 55,
|
|
"valid_targets_mean": 16092.0,
|
|
"valid_targets_min": 15315
|
|
},
|
|
{
|
|
"epoch": 0.23880597014925373,
|
|
"grad_norm": 0.1985762577232187,
|
|
"learning_rate": 1.864406779661017e-05,
|
|
"loss": 1.2457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2944626212120056,
|
|
"step": 56,
|
|
"valid_targets_mean": 15526.4,
|
|
"valid_targets_min": 13549
|
|
},
|
|
{
|
|
"epoch": 0.24307036247334754,
|
|
"grad_norm": 0.20660679490773234,
|
|
"learning_rate": 1.898305084745763e-05,
|
|
"loss": 1.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3371580243110657,
|
|
"step": 57,
|
|
"valid_targets_mean": 15877.7,
|
|
"valid_targets_min": 9100
|
|
},
|
|
{
|
|
"epoch": 0.24733475479744135,
|
|
"grad_norm": 0.16997953510561223,
|
|
"learning_rate": 1.9322033898305087e-05,
|
|
"loss": 1.2354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30616968870162964,
|
|
"step": 58,
|
|
"valid_targets_mean": 13225.8,
|
|
"valid_targets_min": 2794
|
|
},
|
|
{
|
|
"epoch": 0.2515991471215352,
|
|
"grad_norm": 0.1468893466977844,
|
|
"learning_rate": 1.9661016949152545e-05,
|
|
"loss": 1.2444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30669939517974854,
|
|
"step": 59,
|
|
"valid_targets_mean": 16199.9,
|
|
"valid_targets_min": 15570
|
|
},
|
|
{
|
|
"epoch": 0.255863539445629,
|
|
"grad_norm": 0.14846982365230144,
|
|
"learning_rate": 2e-05,
|
|
"loss": 1.2441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3395230174064636,
|
|
"step": 60,
|
|
"valid_targets_mean": 16238.8,
|
|
"valid_targets_min": 15712
|
|
},
|
|
{
|
|
"epoch": 0.2601279317697228,
|
|
"grad_norm": 0.15364456486766126,
|
|
"learning_rate": 2.033898305084746e-05,
|
|
"loss": 1.2218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21228767931461334,
|
|
"step": 61,
|
|
"valid_targets_mean": 8884.0,
|
|
"valid_targets_min": 1169
|
|
},
|
|
{
|
|
"epoch": 0.26439232409381663,
|
|
"grad_norm": 0.14705351343871695,
|
|
"learning_rate": 2.0677966101694916e-05,
|
|
"loss": 1.2527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32020318508148193,
|
|
"step": 62,
|
|
"valid_targets_mean": 15858.8,
|
|
"valid_targets_min": 9034
|
|
},
|
|
{
|
|
"epoch": 0.26865671641791045,
|
|
"grad_norm": 0.13895045949448423,
|
|
"learning_rate": 2.1016949152542376e-05,
|
|
"loss": 1.1856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3360688090324402,
|
|
"step": 63,
|
|
"valid_targets_mean": 16140.3,
|
|
"valid_targets_min": 15243
|
|
},
|
|
{
|
|
"epoch": 0.27292110874200426,
|
|
"grad_norm": 0.14665368863434225,
|
|
"learning_rate": 2.1355932203389833e-05,
|
|
"loss": 1.2689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2518323063850403,
|
|
"step": 64,
|
|
"valid_targets_mean": 11215.2,
|
|
"valid_targets_min": 5932
|
|
},
|
|
{
|
|
"epoch": 0.2771855010660981,
|
|
"grad_norm": 0.12039043398987492,
|
|
"learning_rate": 2.169491525423729e-05,
|
|
"loss": 1.2173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3103894591331482,
|
|
"step": 65,
|
|
"valid_targets_mean": 16148.5,
|
|
"valid_targets_min": 15158
|
|
},
|
|
{
|
|
"epoch": 0.2814498933901919,
|
|
"grad_norm": 0.12413464218056737,
|
|
"learning_rate": 2.2033898305084748e-05,
|
|
"loss": 1.2126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35742056369781494,
|
|
"step": 66,
|
|
"valid_targets_mean": 16164.3,
|
|
"valid_targets_min": 15280
|
|
},
|
|
{
|
|
"epoch": 0.2857142857142857,
|
|
"grad_norm": 0.12359441290379822,
|
|
"learning_rate": 2.2372881355932205e-05,
|
|
"loss": 1.2249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2765950560569763,
|
|
"step": 67,
|
|
"valid_targets_mean": 13715.3,
|
|
"valid_targets_min": 11340
|
|
},
|
|
{
|
|
"epoch": 0.2899786780383795,
|
|
"grad_norm": 0.12356701650095285,
|
|
"learning_rate": 2.2711864406779665e-05,
|
|
"loss": 1.213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3342365026473999,
|
|
"step": 68,
|
|
"valid_targets_mean": 16153.7,
|
|
"valid_targets_min": 15399
|
|
},
|
|
{
|
|
"epoch": 0.2942430703624733,
|
|
"grad_norm": 0.12876613648119586,
|
|
"learning_rate": 2.3050847457627122e-05,
|
|
"loss": 1.2072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35359737277030945,
|
|
"step": 69,
|
|
"valid_targets_mean": 16151.6,
|
|
"valid_targets_min": 15261
|
|
},
|
|
{
|
|
"epoch": 0.29850746268656714,
|
|
"grad_norm": 0.10904310596151173,
|
|
"learning_rate": 2.338983050847458e-05,
|
|
"loss": 1.2499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3098958730697632,
|
|
"step": 70,
|
|
"valid_targets_mean": 15655.6,
|
|
"valid_targets_min": 14448
|
|
},
|
|
{
|
|
"epoch": 0.302771855010661,
|
|
"grad_norm": 0.11733639739181774,
|
|
"learning_rate": 2.3728813559322036e-05,
|
|
"loss": 1.202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3490135073661804,
|
|
"step": 71,
|
|
"valid_targets_mean": 16135.2,
|
|
"valid_targets_min": 15190
|
|
},
|
|
{
|
|
"epoch": 0.3070362473347548,
|
|
"grad_norm": 0.10563764914049407,
|
|
"learning_rate": 2.406779661016949e-05,
|
|
"loss": 1.2741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2543482780456543,
|
|
"step": 72,
|
|
"valid_targets_mean": 11782.0,
|
|
"valid_targets_min": 1467
|
|
},
|
|
{
|
|
"epoch": 0.31130063965884863,
|
|
"grad_norm": 0.10816339800153085,
|
|
"learning_rate": 2.4406779661016954e-05,
|
|
"loss": 1.2407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3115493655204773,
|
|
"step": 73,
|
|
"valid_targets_mean": 16111.2,
|
|
"valid_targets_min": 13968
|
|
},
|
|
{
|
|
"epoch": 0.31556503198294245,
|
|
"grad_norm": 0.11584020520561765,
|
|
"learning_rate": 2.474576271186441e-05,
|
|
"loss": 1.2456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32911521196365356,
|
|
"step": 74,
|
|
"valid_targets_mean": 16192.1,
|
|
"valid_targets_min": 15486
|
|
},
|
|
{
|
|
"epoch": 0.31982942430703626,
|
|
"grad_norm": 0.10912443360725571,
|
|
"learning_rate": 2.5084745762711865e-05,
|
|
"loss": 1.2179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19079627096652985,
|
|
"step": 75,
|
|
"valid_targets_mean": 8972.6,
|
|
"valid_targets_min": 2624
|
|
},
|
|
{
|
|
"epoch": 0.32409381663113007,
|
|
"grad_norm": 0.09732475676951689,
|
|
"learning_rate": 2.5423728813559322e-05,
|
|
"loss": 1.186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2943054437637329,
|
|
"step": 76,
|
|
"valid_targets_mean": 16085.3,
|
|
"valid_targets_min": 14667
|
|
},
|
|
{
|
|
"epoch": 0.3283582089552239,
|
|
"grad_norm": 0.09624343626349152,
|
|
"learning_rate": 2.576271186440678e-05,
|
|
"loss": 1.2239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3504287004470825,
|
|
"step": 77,
|
|
"valid_targets_mean": 16163.7,
|
|
"valid_targets_min": 15284
|
|
},
|
|
{
|
|
"epoch": 0.3326226012793177,
|
|
"grad_norm": 0.09807934133915451,
|
|
"learning_rate": 2.610169491525424e-05,
|
|
"loss": 1.2057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.261921763420105,
|
|
"step": 78,
|
|
"valid_targets_mean": 13263.0,
|
|
"valid_targets_min": 9054
|
|
},
|
|
{
|
|
"epoch": 0.3368869936034115,
|
|
"grad_norm": 0.10336596810778118,
|
|
"learning_rate": 2.6440677966101696e-05,
|
|
"loss": 1.2245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3308961093425751,
|
|
"step": 79,
|
|
"valid_targets_mean": 16083.3,
|
|
"valid_targets_min": 15014
|
|
},
|
|
{
|
|
"epoch": 0.3411513859275053,
|
|
"grad_norm": 0.0994106901204273,
|
|
"learning_rate": 2.6779661016949153e-05,
|
|
"loss": 1.2266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35107001662254333,
|
|
"step": 80,
|
|
"valid_targets_mean": 16136.8,
|
|
"valid_targets_min": 14215
|
|
},
|
|
{
|
|
"epoch": 0.34541577825159914,
|
|
"grad_norm": 0.10680288195228302,
|
|
"learning_rate": 2.711864406779661e-05,
|
|
"loss": 1.2186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28332552313804626,
|
|
"step": 81,
|
|
"valid_targets_mean": 15134.7,
|
|
"valid_targets_min": 12481
|
|
},
|
|
{
|
|
"epoch": 0.34968017057569295,
|
|
"grad_norm": 0.09406135802685557,
|
|
"learning_rate": 2.7457627118644068e-05,
|
|
"loss": 1.1819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3389149308204651,
|
|
"step": 82,
|
|
"valid_targets_mean": 16069.7,
|
|
"valid_targets_min": 15267
|
|
},
|
|
{
|
|
"epoch": 0.35394456289978676,
|
|
"grad_norm": 0.09685910917477361,
|
|
"learning_rate": 2.7796610169491528e-05,
|
|
"loss": 1.2304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30646660923957825,
|
|
"step": 83,
|
|
"valid_targets_mean": 13215.1,
|
|
"valid_targets_min": 2475
|
|
},
|
|
{
|
|
"epoch": 0.3582089552238806,
|
|
"grad_norm": 0.08514974050576918,
|
|
"learning_rate": 2.8135593220338985e-05,
|
|
"loss": 1.1602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2780168354511261,
|
|
"step": 84,
|
|
"valid_targets_mean": 16194.8,
|
|
"valid_targets_min": 15298
|
|
},
|
|
{
|
|
"epoch": 0.3624733475479744,
|
|
"grad_norm": 0.09476782972664151,
|
|
"learning_rate": 2.8474576271186442e-05,
|
|
"loss": 1.229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35387125611305237,
|
|
"step": 85,
|
|
"valid_targets_mean": 16054.6,
|
|
"valid_targets_min": 14788
|
|
},
|
|
{
|
|
"epoch": 0.36673773987206826,
|
|
"grad_norm": 0.0938548881752735,
|
|
"learning_rate": 2.88135593220339e-05,
|
|
"loss": 1.2486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21074512600898743,
|
|
"step": 86,
|
|
"valid_targets_mean": 9831.6,
|
|
"valid_targets_min": 1944
|
|
},
|
|
{
|
|
"epoch": 0.37100213219616207,
|
|
"grad_norm": 0.09040027775241334,
|
|
"learning_rate": 2.9152542372881356e-05,
|
|
"loss": 1.2021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30791014432907104,
|
|
"step": 87,
|
|
"valid_targets_mean": 16121.4,
|
|
"valid_targets_min": 15104
|
|
},
|
|
{
|
|
"epoch": 0.3752665245202559,
|
|
"grad_norm": 0.08902079664385842,
|
|
"learning_rate": 2.9491525423728817e-05,
|
|
"loss": 1.2074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35100027918815613,
|
|
"step": 88,
|
|
"valid_targets_mean": 16156.4,
|
|
"valid_targets_min": 15104
|
|
},
|
|
{
|
|
"epoch": 0.3795309168443497,
|
|
"grad_norm": 0.08440767661261377,
|
|
"learning_rate": 2.9830508474576274e-05,
|
|
"loss": 1.168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22463306784629822,
|
|
"step": 89,
|
|
"valid_targets_mean": 11103.9,
|
|
"valid_targets_min": 6214
|
|
},
|
|
{
|
|
"epoch": 0.3837953091684435,
|
|
"grad_norm": 0.09686222739771823,
|
|
"learning_rate": 3.016949152542373e-05,
|
|
"loss": 1.2173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30074310302734375,
|
|
"step": 90,
|
|
"valid_targets_mean": 16186.0,
|
|
"valid_targets_min": 15273
|
|
},
|
|
{
|
|
"epoch": 0.3880597014925373,
|
|
"grad_norm": 0.0909813322835024,
|
|
"learning_rate": 3.0508474576271188e-05,
|
|
"loss": 1.2071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3536166846752167,
|
|
"step": 91,
|
|
"valid_targets_mean": 16070.8,
|
|
"valid_targets_min": 14714
|
|
},
|
|
{
|
|
"epoch": 0.39232409381663114,
|
|
"grad_norm": 0.09552742825970711,
|
|
"learning_rate": 3.084745762711865e-05,
|
|
"loss": 1.217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2861877679824829,
|
|
"step": 92,
|
|
"valid_targets_mean": 14561.2,
|
|
"valid_targets_min": 11489
|
|
},
|
|
{
|
|
"epoch": 0.39658848614072495,
|
|
"grad_norm": 0.09408462217954633,
|
|
"learning_rate": 3.1186440677966106e-05,
|
|
"loss": 1.1962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33742398023605347,
|
|
"step": 93,
|
|
"valid_targets_mean": 16111.6,
|
|
"valid_targets_min": 15201
|
|
},
|
|
{
|
|
"epoch": 0.40085287846481876,
|
|
"grad_norm": 0.10286798269718264,
|
|
"learning_rate": 3.152542372881356e-05,
|
|
"loss": 1.1749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34325945377349854,
|
|
"step": 94,
|
|
"valid_targets_mean": 16112.3,
|
|
"valid_targets_min": 13815
|
|
},
|
|
{
|
|
"epoch": 0.4051172707889126,
|
|
"grad_norm": 0.0948789407394245,
|
|
"learning_rate": 3.186440677966102e-05,
|
|
"loss": 1.1947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3018621802330017,
|
|
"step": 95,
|
|
"valid_targets_mean": 16085.2,
|
|
"valid_targets_min": 15104
|
|
},
|
|
{
|
|
"epoch": 0.4093816631130064,
|
|
"grad_norm": 0.09473052282078821,
|
|
"learning_rate": 3.2203389830508473e-05,
|
|
"loss": 1.2578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3464816212654114,
|
|
"step": 96,
|
|
"valid_targets_mean": 16017.2,
|
|
"valid_targets_min": 13272
|
|
},
|
|
{
|
|
"epoch": 0.4136460554371002,
|
|
"grad_norm": 0.0923201127752878,
|
|
"learning_rate": 3.2542372881355934e-05,
|
|
"loss": 1.1979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2386438250541687,
|
|
"step": 97,
|
|
"valid_targets_mean": 10922.8,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 0.417910447761194,
|
|
"grad_norm": 0.09340272006062793,
|
|
"learning_rate": 3.2881355932203394e-05,
|
|
"loss": 1.1763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30437636375427246,
|
|
"step": 98,
|
|
"valid_targets_mean": 16054.7,
|
|
"valid_targets_min": 15537
|
|
},
|
|
{
|
|
"epoch": 0.42217484008528783,
|
|
"grad_norm": 0.09153379447873532,
|
|
"learning_rate": 3.322033898305085e-05,
|
|
"loss": 1.2116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3612423241138458,
|
|
"step": 99,
|
|
"valid_targets_mean": 16142.9,
|
|
"valid_targets_min": 15009
|
|
},
|
|
{
|
|
"epoch": 0.42643923240938164,
|
|
"grad_norm": 0.09903432350068125,
|
|
"learning_rate": 3.355932203389831e-05,
|
|
"loss": 1.1889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2107563018798828,
|
|
"step": 100,
|
|
"valid_targets_mean": 9525.3,
|
|
"valid_targets_min": 2584
|
|
},
|
|
{
|
|
"epoch": 0.43070362473347545,
|
|
"grad_norm": 0.0898221776791752,
|
|
"learning_rate": 3.389830508474576e-05,
|
|
"loss": 1.1606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28595858812332153,
|
|
"step": 101,
|
|
"valid_targets_mean": 16141.8,
|
|
"valid_targets_min": 15131
|
|
},
|
|
{
|
|
"epoch": 0.4349680170575693,
|
|
"grad_norm": 0.09530728453889971,
|
|
"learning_rate": 3.423728813559322e-05,
|
|
"loss": 1.1854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3275136649608612,
|
|
"step": 102,
|
|
"valid_targets_mean": 16166.8,
|
|
"valid_targets_min": 15502
|
|
},
|
|
{
|
|
"epoch": 0.43923240938166314,
|
|
"grad_norm": 0.0950962645380679,
|
|
"learning_rate": 3.457627118644068e-05,
|
|
"loss": 1.1468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2579317092895508,
|
|
"step": 103,
|
|
"valid_targets_mean": 13171.3,
|
|
"valid_targets_min": 10139
|
|
},
|
|
{
|
|
"epoch": 0.44349680170575695,
|
|
"grad_norm": 0.09612228648929626,
|
|
"learning_rate": 3.491525423728814e-05,
|
|
"loss": 1.1792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3070680499076843,
|
|
"step": 104,
|
|
"valid_targets_mean": 16120.3,
|
|
"valid_targets_min": 15224
|
|
},
|
|
{
|
|
"epoch": 0.44776119402985076,
|
|
"grad_norm": 0.09818346925017292,
|
|
"learning_rate": 3.52542372881356e-05,
|
|
"loss": 1.1762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34970101714134216,
|
|
"step": 105,
|
|
"valid_targets_mean": 15789.8,
|
|
"valid_targets_min": 6637
|
|
},
|
|
{
|
|
"epoch": 0.4520255863539446,
|
|
"grad_norm": 0.09182254910300913,
|
|
"learning_rate": 3.559322033898305e-05,
|
|
"loss": 1.2189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2976386547088623,
|
|
"step": 106,
|
|
"valid_targets_mean": 15590.5,
|
|
"valid_targets_min": 13490
|
|
},
|
|
{
|
|
"epoch": 0.4562899786780384,
|
|
"grad_norm": 0.0894585182145817,
|
|
"learning_rate": 3.593220338983051e-05,
|
|
"loss": 1.2092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3275868892669678,
|
|
"step": 107,
|
|
"valid_targets_mean": 16067.1,
|
|
"valid_targets_min": 14508
|
|
},
|
|
{
|
|
"epoch": 0.4605543710021322,
|
|
"grad_norm": 0.09653235982748481,
|
|
"learning_rate": 3.627118644067797e-05,
|
|
"loss": 1.2125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27671319246292114,
|
|
"step": 108,
|
|
"valid_targets_mean": 12721.7,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 0.464818763326226,
|
|
"grad_norm": 0.08832561864477775,
|
|
"learning_rate": 3.6610169491525426e-05,
|
|
"loss": 1.1436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2949606776237488,
|
|
"step": 109,
|
|
"valid_targets_mean": 15928.9,
|
|
"valid_targets_min": 14467
|
|
},
|
|
{
|
|
"epoch": 0.4690831556503198,
|
|
"grad_norm": 0.09418828830281084,
|
|
"learning_rate": 3.6949152542372886e-05,
|
|
"loss": 1.1941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3312040865421295,
|
|
"step": 110,
|
|
"valid_targets_mean": 16182.0,
|
|
"valid_targets_min": 14986
|
|
},
|
|
{
|
|
"epoch": 0.47334754797441364,
|
|
"grad_norm": 0.08836789160003244,
|
|
"learning_rate": 3.728813559322034e-05,
|
|
"loss": 1.2115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.204851895570755,
|
|
"step": 111,
|
|
"valid_targets_mean": 9918.0,
|
|
"valid_targets_min": 1124
|
|
},
|
|
{
|
|
"epoch": 0.47761194029850745,
|
|
"grad_norm": 0.08853027497964926,
|
|
"learning_rate": 3.76271186440678e-05,
|
|
"loss": 1.155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27706432342529297,
|
|
"step": 112,
|
|
"valid_targets_mean": 16236.6,
|
|
"valid_targets_min": 15744
|
|
},
|
|
{
|
|
"epoch": 0.48187633262260127,
|
|
"grad_norm": 0.09510140176681595,
|
|
"learning_rate": 3.796610169491526e-05,
|
|
"loss": 1.1615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32165735960006714,
|
|
"step": 113,
|
|
"valid_targets_mean": 16123.2,
|
|
"valid_targets_min": 14949
|
|
},
|
|
{
|
|
"epoch": 0.4861407249466951,
|
|
"grad_norm": 0.0936335191414857,
|
|
"learning_rate": 3.8305084745762714e-05,
|
|
"loss": 1.1976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22146213054656982,
|
|
"step": 114,
|
|
"valid_targets_mean": 11659.7,
|
|
"valid_targets_min": 4847
|
|
},
|
|
{
|
|
"epoch": 0.4904051172707889,
|
|
"grad_norm": 0.08888075449448156,
|
|
"learning_rate": 3.8644067796610175e-05,
|
|
"loss": 1.1833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28964701294898987,
|
|
"step": 115,
|
|
"valid_targets_mean": 15751.3,
|
|
"valid_targets_min": 4956
|
|
},
|
|
{
|
|
"epoch": 0.4946695095948827,
|
|
"grad_norm": 0.09222844211192185,
|
|
"learning_rate": 3.898305084745763e-05,
|
|
"loss": 1.2062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33935752511024475,
|
|
"step": 116,
|
|
"valid_targets_mean": 16008.3,
|
|
"valid_targets_min": 14766
|
|
},
|
|
{
|
|
"epoch": 0.4989339019189765,
|
|
"grad_norm": 0.0888423071162921,
|
|
"learning_rate": 3.932203389830509e-05,
|
|
"loss": 1.1648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24357378482818604,
|
|
"step": 117,
|
|
"valid_targets_mean": 13000.5,
|
|
"valid_targets_min": 9878
|
|
},
|
|
{
|
|
"epoch": 0.5031982942430704,
|
|
"grad_norm": 0.08423642380104496,
|
|
"learning_rate": 3.966101694915255e-05,
|
|
"loss": 1.1899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3194831609725952,
|
|
"step": 118,
|
|
"valid_targets_mean": 16056.0,
|
|
"valid_targets_min": 12675
|
|
},
|
|
{
|
|
"epoch": 0.5074626865671642,
|
|
"grad_norm": 0.08935981849872705,
|
|
"learning_rate": 4e-05,
|
|
"loss": 1.1749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3175763189792633,
|
|
"step": 119,
|
|
"valid_targets_mean": 15889.4,
|
|
"valid_targets_min": 4695
|
|
},
|
|
{
|
|
"epoch": 0.511727078891258,
|
|
"grad_norm": 0.09493830267581599,
|
|
"learning_rate": 3.999991166161585e-05,
|
|
"loss": 1.1813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28965049982070923,
|
|
"step": 120,
|
|
"valid_targets_mean": 15728.3,
|
|
"valid_targets_min": 14155
|
|
},
|
|
{
|
|
"epoch": 0.5159914712153518,
|
|
"grad_norm": 0.0947950104425309,
|
|
"learning_rate": 3.999964664724376e-05,
|
|
"loss": 1.1357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32523101568222046,
|
|
"step": 121,
|
|
"valid_targets_mean": 16155.2,
|
|
"valid_targets_min": 15468
|
|
},
|
|
{
|
|
"epoch": 0.5202558635394456,
|
|
"grad_norm": 0.09855585461503978,
|
|
"learning_rate": 3.999920495922483e-05,
|
|
"loss": 1.1573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21284231543540955,
|
|
"step": 122,
|
|
"valid_targets_mean": 11165.0,
|
|
"valid_targets_min": 3068
|
|
},
|
|
{
|
|
"epoch": 0.5245202558635395,
|
|
"grad_norm": 0.09621591683969682,
|
|
"learning_rate": 3.999858660146085e-05,
|
|
"loss": 1.1725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29792749881744385,
|
|
"step": 123,
|
|
"valid_targets_mean": 16095.6,
|
|
"valid_targets_min": 14651
|
|
},
|
|
{
|
|
"epoch": 0.5287846481876333,
|
|
"grad_norm": 0.11910097306887993,
|
|
"learning_rate": 3.999779157941431e-05,
|
|
"loss": 1.1894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33394917845726013,
|
|
"step": 124,
|
|
"valid_targets_mean": 16011.9,
|
|
"valid_targets_min": 14262
|
|
},
|
|
{
|
|
"epoch": 0.5330490405117271,
|
|
"grad_norm": 0.10602343170573787,
|
|
"learning_rate": 3.99968199001083e-05,
|
|
"loss": 1.1397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19197851419448853,
|
|
"step": 125,
|
|
"valid_targets_mean": 9363.4,
|
|
"valid_targets_min": 2014
|
|
},
|
|
{
|
|
"epoch": 0.5373134328358209,
|
|
"grad_norm": 0.0899769340055125,
|
|
"learning_rate": 3.999567157212646e-05,
|
|
"loss": 1.1708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3002522587776184,
|
|
"step": 126,
|
|
"valid_targets_mean": 16117.3,
|
|
"valid_targets_min": 15027
|
|
},
|
|
{
|
|
"epoch": 0.5415778251599147,
|
|
"grad_norm": 0.11331696829531436,
|
|
"learning_rate": 3.9994346605612955e-05,
|
|
"loss": 1.1666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33196061849594116,
|
|
"step": 127,
|
|
"valid_targets_mean": 16147.7,
|
|
"valid_targets_min": 15401
|
|
},
|
|
{
|
|
"epoch": 0.5458422174840085,
|
|
"grad_norm": 0.10559884968569293,
|
|
"learning_rate": 3.999284501227232e-05,
|
|
"loss": 1.1464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2423875629901886,
|
|
"step": 128,
|
|
"valid_targets_mean": 12663.4,
|
|
"valid_targets_min": 8860
|
|
},
|
|
{
|
|
"epoch": 0.5501066098081023,
|
|
"grad_norm": 0.08546504494233909,
|
|
"learning_rate": 3.9991166805369393e-05,
|
|
"loss": 1.1814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3081364631652832,
|
|
"step": 129,
|
|
"valid_targets_mean": 16190.8,
|
|
"valid_targets_min": 15398
|
|
},
|
|
{
|
|
"epoch": 0.5543710021321961,
|
|
"grad_norm": 0.0977189724318588,
|
|
"learning_rate": 3.9989311999729166e-05,
|
|
"loss": 1.1863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3216251730918884,
|
|
"step": 130,
|
|
"valid_targets_mean": 16169.8,
|
|
"valid_targets_min": 15494
|
|
},
|
|
{
|
|
"epoch": 0.55863539445629,
|
|
"grad_norm": 0.08874493280849229,
|
|
"learning_rate": 3.99872806117367e-05,
|
|
"loss": 1.1422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2885793447494507,
|
|
"step": 131,
|
|
"valid_targets_mean": 15789.8,
|
|
"valid_targets_min": 14303
|
|
},
|
|
{
|
|
"epoch": 0.5628997867803838,
|
|
"grad_norm": 0.10094889295172872,
|
|
"learning_rate": 3.998507265933696e-05,
|
|
"loss": 1.1158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3062593340873718,
|
|
"step": 132,
|
|
"valid_targets_mean": 16151.2,
|
|
"valid_targets_min": 14737
|
|
},
|
|
{
|
|
"epoch": 0.5671641791044776,
|
|
"grad_norm": 0.09145503452160857,
|
|
"learning_rate": 3.9982688162034624e-05,
|
|
"loss": 1.164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2588319778442383,
|
|
"step": 133,
|
|
"valid_targets_mean": 13381.3,
|
|
"valid_targets_min": 2611
|
|
},
|
|
{
|
|
"epoch": 0.5714285714285714,
|
|
"grad_norm": 0.09146366551493089,
|
|
"learning_rate": 3.998012714089397e-05,
|
|
"loss": 1.1401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29369837045669556,
|
|
"step": 134,
|
|
"valid_targets_mean": 16084.1,
|
|
"valid_targets_min": 15199
|
|
},
|
|
{
|
|
"epoch": 0.5756929637526652,
|
|
"grad_norm": 0.09493788832945929,
|
|
"learning_rate": 3.997738961853863e-05,
|
|
"loss": 1.1516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3354269564151764,
|
|
"step": 135,
|
|
"valid_targets_mean": 16032.2,
|
|
"valid_targets_min": 14403
|
|
},
|
|
{
|
|
"epoch": 0.579957356076759,
|
|
"grad_norm": 0.09506696562373083,
|
|
"learning_rate": 3.9974475619151445e-05,
|
|
"loss": 1.097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20292367041110992,
|
|
"step": 136,
|
|
"valid_targets_mean": 11636.4,
|
|
"valid_targets_min": 4191
|
|
},
|
|
{
|
|
"epoch": 0.5842217484008528,
|
|
"grad_norm": 0.08684274601728144,
|
|
"learning_rate": 3.997138516847422e-05,
|
|
"loss": 1.1284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29237163066864014,
|
|
"step": 137,
|
|
"valid_targets_mean": 16056.7,
|
|
"valid_targets_min": 14458
|
|
},
|
|
{
|
|
"epoch": 0.5884861407249466,
|
|
"grad_norm": 0.09779627111272148,
|
|
"learning_rate": 3.9968118293807476e-05,
|
|
"loss": 1.1289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3285352289676666,
|
|
"step": 138,
|
|
"valid_targets_mean": 16153.5,
|
|
"valid_targets_min": 15578
|
|
},
|
|
{
|
|
"epoch": 0.5927505330490405,
|
|
"grad_norm": 0.08935139093497328,
|
|
"learning_rate": 3.996467502401028e-05,
|
|
"loss": 1.1809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2378307282924652,
|
|
"step": 139,
|
|
"valid_targets_mean": 11742.0,
|
|
"valid_targets_min": 7777
|
|
},
|
|
{
|
|
"epoch": 0.5970149253731343,
|
|
"grad_norm": 0.09771379624987,
|
|
"learning_rate": 3.9961055389499904e-05,
|
|
"loss": 1.1506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.299579918384552,
|
|
"step": 140,
|
|
"valid_targets_mean": 16057.5,
|
|
"valid_targets_min": 14623
|
|
},
|
|
{
|
|
"epoch": 0.6012793176972282,
|
|
"grad_norm": 0.09467997919857611,
|
|
"learning_rate": 3.995725942225162e-05,
|
|
"loss": 1.1876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.345403790473938,
|
|
"step": 141,
|
|
"valid_targets_mean": 16109.1,
|
|
"valid_targets_min": 15417
|
|
},
|
|
{
|
|
"epoch": 0.605543710021322,
|
|
"grad_norm": 0.10720765053864924,
|
|
"learning_rate": 3.995328715579839e-05,
|
|
"loss": 1.1649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2952474057674408,
|
|
"step": 142,
|
|
"valid_targets_mean": 14803.6,
|
|
"valid_targets_min": 12164
|
|
},
|
|
{
|
|
"epoch": 0.6098081023454158,
|
|
"grad_norm": 0.08985227020277041,
|
|
"learning_rate": 3.994913862523058e-05,
|
|
"loss": 1.1936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3270189166069031,
|
|
"step": 143,
|
|
"valid_targets_mean": 15989.1,
|
|
"valid_targets_min": 12847
|
|
},
|
|
{
|
|
"epoch": 0.6140724946695096,
|
|
"grad_norm": 0.101385211408821,
|
|
"learning_rate": 3.9944813867195624e-05,
|
|
"loss": 1.1602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30200088024139404,
|
|
"step": 144,
|
|
"valid_targets_mean": 16177.5,
|
|
"valid_targets_min": 15511
|
|
},
|
|
{
|
|
"epoch": 0.6183368869936035,
|
|
"grad_norm": 0.08635250718193511,
|
|
"learning_rate": 3.9940312919897744e-05,
|
|
"loss": 1.0965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27020391821861267,
|
|
"step": 145,
|
|
"valid_targets_mean": 16051.3,
|
|
"valid_targets_min": 14707
|
|
},
|
|
{
|
|
"epoch": 0.6226012793176973,
|
|
"grad_norm": 0.09348197620263732,
|
|
"learning_rate": 3.993563582309759e-05,
|
|
"loss": 1.105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30488336086273193,
|
|
"step": 146,
|
|
"valid_targets_mean": 16157.5,
|
|
"valid_targets_min": 15066
|
|
},
|
|
{
|
|
"epoch": 0.6268656716417911,
|
|
"grad_norm": 0.0921600379794015,
|
|
"learning_rate": 3.993078261811186e-05,
|
|
"loss": 1.1827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22883184254169464,
|
|
"step": 147,
|
|
"valid_targets_mean": 10589.2,
|
|
"valid_targets_min": 1761
|
|
},
|
|
{
|
|
"epoch": 0.6311300639658849,
|
|
"grad_norm": 0.08993221232619614,
|
|
"learning_rate": 3.9925753347813e-05,
|
|
"loss": 1.1429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2732120752334595,
|
|
"step": 148,
|
|
"valid_targets_mean": 16203.6,
|
|
"valid_targets_min": 15633
|
|
},
|
|
{
|
|
"epoch": 0.6353944562899787,
|
|
"grad_norm": 0.09016681017343858,
|
|
"learning_rate": 3.992054805662876e-05,
|
|
"loss": 1.2033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3402599096298218,
|
|
"step": 149,
|
|
"valid_targets_mean": 16129.5,
|
|
"valid_targets_min": 15266
|
|
},
|
|
{
|
|
"epoch": 0.6396588486140725,
|
|
"grad_norm": 0.09075617683543281,
|
|
"learning_rate": 3.991516679054185e-05,
|
|
"loss": 1.1355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16573843359947205,
|
|
"step": 150,
|
|
"valid_targets_mean": 7807.5,
|
|
"valid_targets_min": 1103
|
|
},
|
|
{
|
|
"epoch": 0.6439232409381663,
|
|
"grad_norm": 0.09033781534434782,
|
|
"learning_rate": 3.9909609597089496e-05,
|
|
"loss": 1.1057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27817484736442566,
|
|
"step": 151,
|
|
"valid_targets_mean": 16156.2,
|
|
"valid_targets_min": 15383
|
|
},
|
|
{
|
|
"epoch": 0.6481876332622601,
|
|
"grad_norm": 0.09393467069380017,
|
|
"learning_rate": 3.9903876525363055e-05,
|
|
"loss": 1.2064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35787343978881836,
|
|
"step": 152,
|
|
"valid_targets_mean": 15997.7,
|
|
"valid_targets_min": 15129
|
|
},
|
|
{
|
|
"epoch": 0.652452025586354,
|
|
"grad_norm": 0.09478863438694575,
|
|
"learning_rate": 3.989796762600755e-05,
|
|
"loss": 1.1537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24491862952709198,
|
|
"step": 153,
|
|
"valid_targets_mean": 13364.7,
|
|
"valid_targets_min": 8058
|
|
},
|
|
{
|
|
"epoch": 0.6567164179104478,
|
|
"grad_norm": 0.10693963113528412,
|
|
"learning_rate": 3.9891882951221246e-05,
|
|
"loss": 1.1684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28880056738853455,
|
|
"step": 154,
|
|
"valid_targets_mean": 16182.1,
|
|
"valid_targets_min": 15260
|
|
},
|
|
{
|
|
"epoch": 0.6609808102345416,
|
|
"grad_norm": 0.10228389076398879,
|
|
"learning_rate": 3.988562255475518e-05,
|
|
"loss": 1.2006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3309597969055176,
|
|
"step": 155,
|
|
"valid_targets_mean": 16138.2,
|
|
"valid_targets_min": 15546
|
|
},
|
|
{
|
|
"epoch": 0.6652452025586354,
|
|
"grad_norm": 0.1031616124552712,
|
|
"learning_rate": 3.987918649191268e-05,
|
|
"loss": 1.1539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2789710760116577,
|
|
"step": 156,
|
|
"valid_targets_mean": 16149.3,
|
|
"valid_targets_min": 15278
|
|
},
|
|
{
|
|
"epoch": 0.6695095948827292,
|
|
"grad_norm": 0.09354933359809818,
|
|
"learning_rate": 3.987257481954888e-05,
|
|
"loss": 1.1022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3065093159675598,
|
|
"step": 157,
|
|
"valid_targets_mean": 16142.6,
|
|
"valid_targets_min": 15150
|
|
},
|
|
{
|
|
"epoch": 0.673773987206823,
|
|
"grad_norm": 0.10243326792233647,
|
|
"learning_rate": 3.9865787596070236e-05,
|
|
"loss": 1.1353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2565004825592041,
|
|
"step": 158,
|
|
"valid_targets_mean": 13391.6,
|
|
"valid_targets_min": 2035
|
|
},
|
|
{
|
|
"epoch": 0.6780383795309168,
|
|
"grad_norm": 0.09017609445866975,
|
|
"learning_rate": 3.9858824881433975e-05,
|
|
"loss": 1.185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29887858033180237,
|
|
"step": 159,
|
|
"valid_targets_mean": 15965.5,
|
|
"valid_targets_min": 14425
|
|
},
|
|
{
|
|
"epoch": 0.6823027718550106,
|
|
"grad_norm": 0.10579930717824831,
|
|
"learning_rate": 3.9851686737147585e-05,
|
|
"loss": 1.1502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3364817500114441,
|
|
"step": 160,
|
|
"valid_targets_mean": 16054.0,
|
|
"valid_targets_min": 14705
|
|
},
|
|
{
|
|
"epoch": 0.6865671641791045,
|
|
"grad_norm": 0.09088142894674614,
|
|
"learning_rate": 3.9844373226268305e-05,
|
|
"loss": 1.1347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20277650654315948,
|
|
"step": 161,
|
|
"valid_targets_mean": 8316.7,
|
|
"valid_targets_min": 1438
|
|
},
|
|
{
|
|
"epoch": 0.6908315565031983,
|
|
"grad_norm": 0.10850966654304378,
|
|
"learning_rate": 3.983688441340249e-05,
|
|
"loss": 1.1481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2784193456172943,
|
|
"step": 162,
|
|
"valid_targets_mean": 16177.2,
|
|
"valid_targets_min": 15498
|
|
},
|
|
{
|
|
"epoch": 0.6950959488272921,
|
|
"grad_norm": 0.09766988253959775,
|
|
"learning_rate": 3.98292203647051e-05,
|
|
"loss": 1.175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3454211950302124,
|
|
"step": 163,
|
|
"valid_targets_mean": 16048.5,
|
|
"valid_targets_min": 15115
|
|
},
|
|
{
|
|
"epoch": 0.6993603411513859,
|
|
"grad_norm": 0.09993697064441993,
|
|
"learning_rate": 3.982138114787912e-05,
|
|
"loss": 1.1709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2013014853000641,
|
|
"step": 164,
|
|
"valid_targets_mean": 10785.7,
|
|
"valid_targets_min": 3449
|
|
},
|
|
{
|
|
"epoch": 0.7036247334754797,
|
|
"grad_norm": 0.09107691847057944,
|
|
"learning_rate": 3.98133668321749e-05,
|
|
"loss": 1.1151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2668771743774414,
|
|
"step": 165,
|
|
"valid_targets_mean": 16195.0,
|
|
"valid_targets_min": 15384
|
|
},
|
|
{
|
|
"epoch": 0.7078891257995735,
|
|
"grad_norm": 0.09773247058046776,
|
|
"learning_rate": 3.980517748838963e-05,
|
|
"loss": 1.1345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31354930996894836,
|
|
"step": 166,
|
|
"valid_targets_mean": 16103.7,
|
|
"valid_targets_min": 13667
|
|
},
|
|
{
|
|
"epoch": 0.7121535181236673,
|
|
"grad_norm": 0.09418876018598717,
|
|
"learning_rate": 3.979681318886664e-05,
|
|
"loss": 1.1319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26180019974708557,
|
|
"step": 167,
|
|
"valid_targets_mean": 14912.7,
|
|
"valid_targets_min": 12620
|
|
},
|
|
{
|
|
"epoch": 0.7164179104477612,
|
|
"grad_norm": 0.10022095359971053,
|
|
"learning_rate": 3.978827400749481e-05,
|
|
"loss": 1.1394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31708428263664246,
|
|
"step": 168,
|
|
"valid_targets_mean": 16150.3,
|
|
"valid_targets_min": 15597
|
|
},
|
|
{
|
|
"epoch": 0.720682302771855,
|
|
"grad_norm": 0.09757364719473079,
|
|
"learning_rate": 3.977956001970788e-05,
|
|
"loss": 1.0964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3082122802734375,
|
|
"step": 169,
|
|
"valid_targets_mean": 16166.2,
|
|
"valid_targets_min": 15079
|
|
},
|
|
{
|
|
"epoch": 0.7249466950959488,
|
|
"grad_norm": 0.10039554230255081,
|
|
"learning_rate": 3.977067130248381e-05,
|
|
"loss": 1.1367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2876758277416229,
|
|
"step": 170,
|
|
"valid_targets_mean": 16125.8,
|
|
"valid_targets_min": 14802
|
|
},
|
|
{
|
|
"epoch": 0.7292110874200426,
|
|
"grad_norm": 0.09002586551148536,
|
|
"learning_rate": 3.9761607934344095e-05,
|
|
"loss": 1.1262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32189834117889404,
|
|
"step": 171,
|
|
"valid_targets_mean": 16135.1,
|
|
"valid_targets_min": 15119
|
|
},
|
|
{
|
|
"epoch": 0.7334754797441365,
|
|
"grad_norm": 0.10506503613712179,
|
|
"learning_rate": 3.975236999535306e-05,
|
|
"loss": 1.123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23188666999340057,
|
|
"step": 172,
|
|
"valid_targets_mean": 10806.8,
|
|
"valid_targets_min": 1289
|
|
},
|
|
{
|
|
"epoch": 0.7377398720682303,
|
|
"grad_norm": 0.0856508629665133,
|
|
"learning_rate": 3.974295756711717e-05,
|
|
"loss": 1.1293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2757749855518341,
|
|
"step": 173,
|
|
"valid_targets_mean": 16207.7,
|
|
"valid_targets_min": 15425
|
|
},
|
|
{
|
|
"epoch": 0.7420042643923241,
|
|
"grad_norm": 0.10187687696421878,
|
|
"learning_rate": 3.9733370732784296e-05,
|
|
"loss": 1.1566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2970867156982422,
|
|
"step": 174,
|
|
"valid_targets_mean": 16120.1,
|
|
"valid_targets_min": 14220
|
|
},
|
|
{
|
|
"epoch": 0.746268656716418,
|
|
"grad_norm": 0.08871560074325952,
|
|
"learning_rate": 3.972360957704298e-05,
|
|
"loss": 1.1348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1963111013174057,
|
|
"step": 175,
|
|
"valid_targets_mean": 9107.8,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 0.7505330490405118,
|
|
"grad_norm": 0.10056598314760515,
|
|
"learning_rate": 3.97136741861217e-05,
|
|
"loss": 1.202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2947056293487549,
|
|
"step": 176,
|
|
"valid_targets_mean": 16076.4,
|
|
"valid_targets_min": 14826
|
|
},
|
|
{
|
|
"epoch": 0.7547974413646056,
|
|
"grad_norm": 0.10311644341970994,
|
|
"learning_rate": 3.970356464778808e-05,
|
|
"loss": 1.1066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3009605407714844,
|
|
"step": 177,
|
|
"valid_targets_mean": 16017.1,
|
|
"valid_targets_min": 12138
|
|
},
|
|
{
|
|
"epoch": 0.7590618336886994,
|
|
"grad_norm": 0.10579148903073073,
|
|
"learning_rate": 3.969328105134817e-05,
|
|
"loss": 1.1229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23442336916923523,
|
|
"step": 178,
|
|
"valid_targets_mean": 12726.8,
|
|
"valid_targets_min": 10360
|
|
},
|
|
{
|
|
"epoch": 0.7633262260127932,
|
|
"grad_norm": 0.09301339925327447,
|
|
"learning_rate": 3.9682823487645584e-05,
|
|
"loss": 1.1665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32673752307891846,
|
|
"step": 179,
|
|
"valid_targets_mean": 15850.9,
|
|
"valid_targets_min": 13815
|
|
},
|
|
{
|
|
"epoch": 0.767590618336887,
|
|
"grad_norm": 0.10125438375778621,
|
|
"learning_rate": 3.9672192049060745e-05,
|
|
"loss": 1.1674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29607778787612915,
|
|
"step": 180,
|
|
"valid_targets_mean": 16152.1,
|
|
"valid_targets_min": 14797
|
|
},
|
|
{
|
|
"epoch": 0.7718550106609808,
|
|
"grad_norm": 0.09482811361947403,
|
|
"learning_rate": 3.966138682951008e-05,
|
|
"loss": 1.1265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.273228257894516,
|
|
"step": 181,
|
|
"valid_targets_mean": 15792.7,
|
|
"valid_targets_min": 13462
|
|
},
|
|
{
|
|
"epoch": 0.7761194029850746,
|
|
"grad_norm": 0.09674038707929605,
|
|
"learning_rate": 3.9650407924445147e-05,
|
|
"loss": 1.1459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29572930932044983,
|
|
"step": 182,
|
|
"valid_targets_mean": 15968.9,
|
|
"valid_targets_min": 9254
|
|
},
|
|
{
|
|
"epoch": 0.7803837953091685,
|
|
"grad_norm": 0.09372362719194748,
|
|
"learning_rate": 3.963925543085181e-05,
|
|
"loss": 1.1763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27015525102615356,
|
|
"step": 183,
|
|
"valid_targets_mean": 13291.2,
|
|
"valid_targets_min": 2250
|
|
},
|
|
{
|
|
"epoch": 0.7846481876332623,
|
|
"grad_norm": 0.09043923766986006,
|
|
"learning_rate": 3.96279294472494e-05,
|
|
"loss": 1.1247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2805386185646057,
|
|
"step": 184,
|
|
"valid_targets_mean": 16146.7,
|
|
"valid_targets_min": 15232
|
|
},
|
|
{
|
|
"epoch": 0.7889125799573561,
|
|
"grad_norm": 0.0900606180645136,
|
|
"learning_rate": 3.961643007368984e-05,
|
|
"loss": 1.0853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31000715494155884,
|
|
"step": 185,
|
|
"valid_targets_mean": 16094.6,
|
|
"valid_targets_min": 15129
|
|
},
|
|
{
|
|
"epoch": 0.7931769722814499,
|
|
"grad_norm": 0.10866838467700593,
|
|
"learning_rate": 3.960475741175671e-05,
|
|
"loss": 1.16,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19901937246322632,
|
|
"step": 186,
|
|
"valid_targets_mean": 10486.2,
|
|
"valid_targets_min": 1315
|
|
},
|
|
{
|
|
"epoch": 0.7974413646055437,
|
|
"grad_norm": 0.09579766460716391,
|
|
"learning_rate": 3.959291156456444e-05,
|
|
"loss": 1.1291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2896297872066498,
|
|
"step": 187,
|
|
"valid_targets_mean": 16134.7,
|
|
"valid_targets_min": 14805
|
|
},
|
|
{
|
|
"epoch": 0.8017057569296375,
|
|
"grad_norm": 0.10193170370775437,
|
|
"learning_rate": 3.9580892636757334e-05,
|
|
"loss": 1.1458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32295069098472595,
|
|
"step": 188,
|
|
"valid_targets_mean": 16102.6,
|
|
"valid_targets_min": 15070
|
|
},
|
|
{
|
|
"epoch": 0.8059701492537313,
|
|
"grad_norm": 0.09901836357571368,
|
|
"learning_rate": 3.9568700734508645e-05,
|
|
"loss": 1.1219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21317507326602936,
|
|
"step": 189,
|
|
"valid_targets_mean": 10383.8,
|
|
"valid_targets_min": 4446
|
|
},
|
|
{
|
|
"epoch": 0.8102345415778252,
|
|
"grad_norm": 0.10023585510916225,
|
|
"learning_rate": 3.955633596551967e-05,
|
|
"loss": 1.1658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2971959114074707,
|
|
"step": 190,
|
|
"valid_targets_mean": 16068.4,
|
|
"valid_targets_min": 15133
|
|
},
|
|
{
|
|
"epoch": 0.814498933901919,
|
|
"grad_norm": 0.0979452895121752,
|
|
"learning_rate": 3.9543798439018776e-05,
|
|
"loss": 1.1319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32349517941474915,
|
|
"step": 191,
|
|
"valid_targets_mean": 16088.5,
|
|
"valid_targets_min": 15072
|
|
},
|
|
{
|
|
"epoch": 0.8187633262260128,
|
|
"grad_norm": 0.10218850179055465,
|
|
"learning_rate": 3.953108826576046e-05,
|
|
"loss": 1.1444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28032591938972473,
|
|
"step": 192,
|
|
"valid_targets_mean": 14237.3,
|
|
"valid_targets_min": 10814
|
|
},
|
|
{
|
|
"epoch": 0.8230277185501066,
|
|
"grad_norm": 0.10304401150670457,
|
|
"learning_rate": 3.9518205558024334e-05,
|
|
"loss": 1.113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32667791843414307,
|
|
"step": 193,
|
|
"valid_targets_mean": 16107.4,
|
|
"valid_targets_min": 15196
|
|
},
|
|
{
|
|
"epoch": 0.8272921108742004,
|
|
"grad_norm": 0.1069460824683147,
|
|
"learning_rate": 3.9505150429614154e-05,
|
|
"loss": 1.0928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31320929527282715,
|
|
"step": 194,
|
|
"valid_targets_mean": 16132.2,
|
|
"valid_targets_min": 15070
|
|
},
|
|
{
|
|
"epoch": 0.8315565031982942,
|
|
"grad_norm": 0.08857200762364219,
|
|
"learning_rate": 3.949192299585681e-05,
|
|
"loss": 1.1294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27548372745513916,
|
|
"step": 195,
|
|
"valid_targets_mean": 16083.0,
|
|
"valid_targets_min": 15079
|
|
},
|
|
{
|
|
"epoch": 0.835820895522388,
|
|
"grad_norm": 0.10200845557778795,
|
|
"learning_rate": 3.9478523373601325e-05,
|
|
"loss": 1.1945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3393228054046631,
|
|
"step": 196,
|
|
"valid_targets_mean": 15900.9,
|
|
"valid_targets_min": 13787
|
|
},
|
|
{
|
|
"epoch": 0.8400852878464818,
|
|
"grad_norm": 0.10954087955048088,
|
|
"learning_rate": 3.946495168121778e-05,
|
|
"loss": 1.14,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22450295090675354,
|
|
"step": 197,
|
|
"valid_targets_mean": 11008.3,
|
|
"valid_targets_min": 828
|
|
},
|
|
{
|
|
"epoch": 0.8443496801705757,
|
|
"grad_norm": 0.0989521625215589,
|
|
"learning_rate": 3.9451208038596325e-05,
|
|
"loss": 1.1045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27509748935699463,
|
|
"step": 198,
|
|
"valid_targets_mean": 16139.8,
|
|
"valid_targets_min": 15525
|
|
},
|
|
{
|
|
"epoch": 0.8486140724946695,
|
|
"grad_norm": 0.10742876295117282,
|
|
"learning_rate": 3.943729256714608e-05,
|
|
"loss": 1.1217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3281787633895874,
|
|
"step": 199,
|
|
"valid_targets_mean": 16218.7,
|
|
"valid_targets_min": 15687
|
|
},
|
|
{
|
|
"epoch": 0.8528784648187633,
|
|
"grad_norm": 0.09797807053513334,
|
|
"learning_rate": 3.942320538979408e-05,
|
|
"loss": 1.1336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17650890350341797,
|
|
"step": 200,
|
|
"valid_targets_mean": 8236.3,
|
|
"valid_targets_min": 2089
|
|
},
|
|
{
|
|
"epoch": 0.8571428571428571,
|
|
"grad_norm": 0.10011843133863756,
|
|
"learning_rate": 3.9408946630984144e-05,
|
|
"loss": 1.1773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3172979950904846,
|
|
"step": 201,
|
|
"valid_targets_mean": 15942.6,
|
|
"valid_targets_min": 12138
|
|
},
|
|
{
|
|
"epoch": 0.8614072494669509,
|
|
"grad_norm": 0.09425469523468114,
|
|
"learning_rate": 3.939451641667587e-05,
|
|
"loss": 1.1383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32402580976486206,
|
|
"step": 202,
|
|
"valid_targets_mean": 16060.5,
|
|
"valid_targets_min": 14762
|
|
},
|
|
{
|
|
"epoch": 0.8656716417910447,
|
|
"grad_norm": 0.09986270325000021,
|
|
"learning_rate": 3.937991487434342e-05,
|
|
"loss": 1.0787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21700716018676758,
|
|
"step": 203,
|
|
"valid_targets_mean": 12896.2,
|
|
"valid_targets_min": 9617
|
|
},
|
|
{
|
|
"epoch": 0.8699360341151386,
|
|
"grad_norm": 0.10563828901687995,
|
|
"learning_rate": 3.9365142132974484e-05,
|
|
"loss": 1.1419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29873669147491455,
|
|
"step": 204,
|
|
"valid_targets_mean": 16013.2,
|
|
"valid_targets_min": 13521
|
|
},
|
|
{
|
|
"epoch": 0.8742004264392325,
|
|
"grad_norm": 0.10381701644945565,
|
|
"learning_rate": 3.935019832306905e-05,
|
|
"loss": 1.1563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3266843557357788,
|
|
"step": 205,
|
|
"valid_targets_mean": 16123.5,
|
|
"valid_targets_min": 14977
|
|
},
|
|
{
|
|
"epoch": 0.8784648187633263,
|
|
"grad_norm": 0.1047864828157829,
|
|
"learning_rate": 3.933508357663832e-05,
|
|
"loss": 1.1358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2678084671497345,
|
|
"step": 206,
|
|
"valid_targets_mean": 15692.4,
|
|
"valid_targets_min": 13392
|
|
},
|
|
{
|
|
"epoch": 0.8827292110874201,
|
|
"grad_norm": 0.10131184402782463,
|
|
"learning_rate": 3.9319798027203544e-05,
|
|
"loss": 1.0852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29921239614486694,
|
|
"step": 207,
|
|
"valid_targets_mean": 16135.8,
|
|
"valid_targets_min": 15168
|
|
},
|
|
{
|
|
"epoch": 0.8869936034115139,
|
|
"grad_norm": 0.09709304079747372,
|
|
"learning_rate": 3.930434180979478e-05,
|
|
"loss": 1.0978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25960588455200195,
|
|
"step": 208,
|
|
"valid_targets_mean": 13328.7,
|
|
"valid_targets_min": 973
|
|
},
|
|
{
|
|
"epoch": 0.8912579957356077,
|
|
"grad_norm": 0.10356977949936048,
|
|
"learning_rate": 3.928871506094975e-05,
|
|
"loss": 1.125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2803318500518799,
|
|
"step": 209,
|
|
"valid_targets_mean": 16123.1,
|
|
"valid_targets_min": 14458
|
|
},
|
|
{
|
|
"epoch": 0.8955223880597015,
|
|
"grad_norm": 0.11255049930125063,
|
|
"learning_rate": 3.927291791871264e-05,
|
|
"loss": 1.1191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33228054642677307,
|
|
"step": 210,
|
|
"valid_targets_mean": 16117.8,
|
|
"valid_targets_min": 15356
|
|
},
|
|
{
|
|
"epoch": 0.8997867803837953,
|
|
"grad_norm": 0.09305549394404865,
|
|
"learning_rate": 3.925695052263284e-05,
|
|
"loss": 1.1139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19876155257225037,
|
|
"step": 211,
|
|
"valid_targets_mean": 9692.5,
|
|
"valid_targets_min": 2127
|
|
},
|
|
{
|
|
"epoch": 0.9040511727078892,
|
|
"grad_norm": 0.11593462381643857,
|
|
"learning_rate": 3.924081301376375e-05,
|
|
"loss": 1.1528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2912921905517578,
|
|
"step": 212,
|
|
"valid_targets_mean": 16126.8,
|
|
"valid_targets_min": 15414
|
|
},
|
|
{
|
|
"epoch": 0.908315565031983,
|
|
"grad_norm": 0.10325789493316319,
|
|
"learning_rate": 3.9224505534661525e-05,
|
|
"loss": 1.1796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32825785875320435,
|
|
"step": 213,
|
|
"valid_targets_mean": 16191.4,
|
|
"valid_targets_min": 15355
|
|
},
|
|
{
|
|
"epoch": 0.9125799573560768,
|
|
"grad_norm": 0.09877262501698293,
|
|
"learning_rate": 3.92080282293838e-05,
|
|
"loss": 1.1253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22680217027664185,
|
|
"step": 214,
|
|
"valid_targets_mean": 12585.9,
|
|
"valid_targets_min": 7122
|
|
},
|
|
{
|
|
"epoch": 0.9168443496801706,
|
|
"grad_norm": 0.10772465939466384,
|
|
"learning_rate": 3.9191381243488417e-05,
|
|
"loss": 1.117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26962435245513916,
|
|
"step": 215,
|
|
"valid_targets_mean": 16172.5,
|
|
"valid_targets_min": 14954
|
|
},
|
|
{
|
|
"epoch": 0.9211087420042644,
|
|
"grad_norm": 0.11755089935418887,
|
|
"learning_rate": 3.9174564724032167e-05,
|
|
"loss": 1.1217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31640106439590454,
|
|
"step": 216,
|
|
"valid_targets_mean": 16134.5,
|
|
"valid_targets_min": 15461
|
|
},
|
|
{
|
|
"epoch": 0.9253731343283582,
|
|
"grad_norm": 0.09826927112341902,
|
|
"learning_rate": 3.9157578819569455e-05,
|
|
"loss": 1.1065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24364183843135834,
|
|
"step": 217,
|
|
"valid_targets_mean": 14509.2,
|
|
"valid_targets_min": 12283
|
|
},
|
|
{
|
|
"epoch": 0.929637526652452,
|
|
"grad_norm": 0.10204985761524848,
|
|
"learning_rate": 3.9140423680151036e-05,
|
|
"loss": 1.1183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31558912992477417,
|
|
"step": 218,
|
|
"valid_targets_mean": 16086.2,
|
|
"valid_targets_min": 14668
|
|
},
|
|
{
|
|
"epoch": 0.9339019189765458,
|
|
"grad_norm": 0.10292319153285358,
|
|
"learning_rate": 3.9123099457322625e-05,
|
|
"loss": 1.1072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30415990948677063,
|
|
"step": 219,
|
|
"valid_targets_mean": 16148.6,
|
|
"valid_targets_min": 14352
|
|
},
|
|
{
|
|
"epoch": 0.9381663113006397,
|
|
"grad_norm": 0.09928985399113802,
|
|
"learning_rate": 3.9105606304123605e-05,
|
|
"loss": 1.1617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27917760610580444,
|
|
"step": 220,
|
|
"valid_targets_mean": 15443.3,
|
|
"valid_targets_min": 13794
|
|
},
|
|
{
|
|
"epoch": 0.9424307036247335,
|
|
"grad_norm": 0.09838612682599242,
|
|
"learning_rate": 3.908794437508567e-05,
|
|
"loss": 1.1144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31867414712905884,
|
|
"step": 221,
|
|
"valid_targets_mean": 16067.8,
|
|
"valid_targets_min": 14017
|
|
},
|
|
{
|
|
"epoch": 0.9466950959488273,
|
|
"grad_norm": 0.09897590405152108,
|
|
"learning_rate": 3.907011382623145e-05,
|
|
"loss": 1.162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23478242754936218,
|
|
"step": 222,
|
|
"valid_targets_mean": 10960.4,
|
|
"valid_targets_min": 1387
|
|
},
|
|
{
|
|
"epoch": 0.9509594882729211,
|
|
"grad_norm": 0.10219217099851002,
|
|
"learning_rate": 3.905211481507313e-05,
|
|
"loss": 1.0817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.270889014005661,
|
|
"step": 223,
|
|
"valid_targets_mean": 16137.7,
|
|
"valid_targets_min": 14217
|
|
},
|
|
{
|
|
"epoch": 0.9552238805970149,
|
|
"grad_norm": 0.10713202622912575,
|
|
"learning_rate": 3.903394750061106e-05,
|
|
"loss": 1.0727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29822540283203125,
|
|
"step": 224,
|
|
"valid_targets_mean": 16177.2,
|
|
"valid_targets_min": 15401
|
|
},
|
|
{
|
|
"epoch": 0.9594882729211087,
|
|
"grad_norm": 0.09070751657306529,
|
|
"learning_rate": 3.9015612043332375e-05,
|
|
"loss": 1.0921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16825994849205017,
|
|
"step": 225,
|
|
"valid_targets_mean": 7948.1,
|
|
"valid_targets_min": 1286
|
|
},
|
|
{
|
|
"epoch": 0.9637526652452025,
|
|
"grad_norm": 0.09511296484945553,
|
|
"learning_rate": 3.8997108605209535e-05,
|
|
"loss": 1.0932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2620267868041992,
|
|
"step": 226,
|
|
"valid_targets_mean": 16170.1,
|
|
"valid_targets_min": 15320
|
|
},
|
|
{
|
|
"epoch": 0.9680170575692963,
|
|
"grad_norm": 0.08668576463692115,
|
|
"learning_rate": 3.897843734969891e-05,
|
|
"loss": 1.1035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3015277683734894,
|
|
"step": 227,
|
|
"valid_targets_mean": 16224.0,
|
|
"valid_targets_min": 15603
|
|
},
|
|
{
|
|
"epoch": 0.9722814498933902,
|
|
"grad_norm": 0.10507117567158937,
|
|
"learning_rate": 3.895959844173937e-05,
|
|
"loss": 1.1097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22079966962337494,
|
|
"step": 228,
|
|
"valid_targets_mean": 12857.5,
|
|
"valid_targets_min": 9949
|
|
},
|
|
{
|
|
"epoch": 0.976545842217484,
|
|
"grad_norm": 0.09004630256057394,
|
|
"learning_rate": 3.8940592047750774e-05,
|
|
"loss": 1.1105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2932007908821106,
|
|
"step": 229,
|
|
"valid_targets_mean": 16166.8,
|
|
"valid_targets_min": 14908
|
|
},
|
|
{
|
|
"epoch": 0.9808102345415778,
|
|
"grad_norm": 0.10241316791166893,
|
|
"learning_rate": 3.892141833563255e-05,
|
|
"loss": 1.1426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34401440620422363,
|
|
"step": 230,
|
|
"valid_targets_mean": 16062.8,
|
|
"valid_targets_min": 15196
|
|
},
|
|
{
|
|
"epoch": 0.9850746268656716,
|
|
"grad_norm": 0.09201328398771727,
|
|
"learning_rate": 3.8902077474762155e-05,
|
|
"loss": 1.0743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24984166026115417,
|
|
"step": 231,
|
|
"valid_targets_mean": 14642.6,
|
|
"valid_targets_min": 12309
|
|
},
|
|
{
|
|
"epoch": 0.9893390191897654,
|
|
"grad_norm": 0.1066960679928066,
|
|
"learning_rate": 3.888256963599364e-05,
|
|
"loss": 1.1851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.343783438205719,
|
|
"step": 232,
|
|
"valid_targets_mean": 16037.1,
|
|
"valid_targets_min": 13918
|
|
},
|
|
{
|
|
"epoch": 0.9936034115138592,
|
|
"grad_norm": 0.10787577452105508,
|
|
"learning_rate": 3.886289499165609e-05,
|
|
"loss": 1.1552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2721879482269287,
|
|
"step": 233,
|
|
"valid_targets_mean": 13028.1,
|
|
"valid_targets_min": 1537
|
|
},
|
|
{
|
|
"epoch": 0.997867803837953,
|
|
"grad_norm": 0.11373480300240212,
|
|
"learning_rate": 3.884305371555215e-05,
|
|
"loss": 1.1484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2873799204826355,
|
|
"step": 234,
|
|
"valid_targets_mean": 15974.6,
|
|
"valid_targets_min": 13604
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 0.12541292873645682,
|
|
"learning_rate": 3.882304598295643e-05,
|
|
"loss": 1.1645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4884461760520935,
|
|
"step": 235,
|
|
"valid_targets_mean": 10016.4,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 1.004264392324094,
|
|
"grad_norm": 0.10683042281153865,
|
|
"learning_rate": 3.880287197061402e-05,
|
|
"loss": 1.1007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27881956100463867,
|
|
"step": 236,
|
|
"valid_targets_mean": 16117.8,
|
|
"valid_targets_min": 14716
|
|
},
|
|
{
|
|
"epoch": 1.0085287846481876,
|
|
"grad_norm": 0.10170588527668795,
|
|
"learning_rate": 3.878253185673888e-05,
|
|
"loss": 1.1369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.325032114982605,
|
|
"step": 237,
|
|
"valid_targets_mean": 16108.4,
|
|
"valid_targets_min": 14992
|
|
},
|
|
{
|
|
"epoch": 1.0127931769722816,
|
|
"grad_norm": 0.11439961271388291,
|
|
"learning_rate": 3.876202582101229e-05,
|
|
"loss": 1.1321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24244996905326843,
|
|
"step": 238,
|
|
"valid_targets_mean": 13048.1,
|
|
"valid_targets_min": 9235
|
|
},
|
|
{
|
|
"epoch": 1.0170575692963753,
|
|
"grad_norm": 0.10354471334180795,
|
|
"learning_rate": 3.874135404458125e-05,
|
|
"loss": 1.1421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29867154359817505,
|
|
"step": 239,
|
|
"valid_targets_mean": 16174.0,
|
|
"valid_targets_min": 15306
|
|
},
|
|
{
|
|
"epoch": 1.0213219616204692,
|
|
"grad_norm": 0.10710832671955754,
|
|
"learning_rate": 3.8720516710056905e-05,
|
|
"loss": 1.1159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.328958123922348,
|
|
"step": 240,
|
|
"valid_targets_mean": 16024.2,
|
|
"valid_targets_min": 14352
|
|
},
|
|
{
|
|
"epoch": 1.0255863539445629,
|
|
"grad_norm": 0.09296450227881878,
|
|
"learning_rate": 3.8699514001512885e-05,
|
|
"loss": 1.1321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29329919815063477,
|
|
"step": 241,
|
|
"valid_targets_mean": 15146.3,
|
|
"valid_targets_min": 13238
|
|
},
|
|
{
|
|
"epoch": 1.0298507462686568,
|
|
"grad_norm": 0.09670378719576792,
|
|
"learning_rate": 3.867834610448374e-05,
|
|
"loss": 1.1144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31909120082855225,
|
|
"step": 242,
|
|
"valid_targets_mean": 16168.6,
|
|
"valid_targets_min": 15428
|
|
},
|
|
{
|
|
"epoch": 1.0341151385927505,
|
|
"grad_norm": 0.11027094326230809,
|
|
"learning_rate": 3.865701320596324e-05,
|
|
"loss": 1.1122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24024316668510437,
|
|
"step": 243,
|
|
"valid_targets_mean": 13071.8,
|
|
"valid_targets_min": 1882
|
|
},
|
|
{
|
|
"epoch": 1.0383795309168444,
|
|
"grad_norm": 0.09516664155682011,
|
|
"learning_rate": 3.863551549440277e-05,
|
|
"loss": 1.1432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.273135244846344,
|
|
"step": 244,
|
|
"valid_targets_mean": 16043.3,
|
|
"valid_targets_min": 14802
|
|
},
|
|
{
|
|
"epoch": 1.0426439232409381,
|
|
"grad_norm": 0.09478333075343771,
|
|
"learning_rate": 3.861385315970964e-05,
|
|
"loss": 1.098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2820066809654236,
|
|
"step": 245,
|
|
"valid_targets_mean": 16161.9,
|
|
"valid_targets_min": 14599
|
|
},
|
|
{
|
|
"epoch": 1.046908315565032,
|
|
"grad_norm": 0.09495890645197919,
|
|
"learning_rate": 3.859202639324542e-05,
|
|
"loss": 1.1141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2000979781150818,
|
|
"step": 246,
|
|
"valid_targets_mean": 10506.8,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 1.0511727078891258,
|
|
"grad_norm": 0.10546266168229781,
|
|
"learning_rate": 3.8570035387824214e-05,
|
|
"loss": 1.1166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2905887961387634,
|
|
"step": 247,
|
|
"valid_targets_mean": 16070.8,
|
|
"valid_targets_min": 15129
|
|
},
|
|
{
|
|
"epoch": 1.0554371002132197,
|
|
"grad_norm": 0.09803266852048718,
|
|
"learning_rate": 3.8547880337711036e-05,
|
|
"loss": 1.1303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3248744010925293,
|
|
"step": 248,
|
|
"valid_targets_mean": 16120.7,
|
|
"valid_targets_min": 14980
|
|
},
|
|
{
|
|
"epoch": 1.0597014925373134,
|
|
"grad_norm": 0.08956323717781504,
|
|
"learning_rate": 3.8525561438620016e-05,
|
|
"loss": 1.0755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19720616936683655,
|
|
"step": 249,
|
|
"valid_targets_mean": 11242.8,
|
|
"valid_targets_min": 7521
|
|
},
|
|
{
|
|
"epoch": 1.0639658848614073,
|
|
"grad_norm": 0.11122611084444642,
|
|
"learning_rate": 3.850307888771269e-05,
|
|
"loss": 1.0845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2899143695831299,
|
|
"step": 250,
|
|
"valid_targets_mean": 16056.8,
|
|
"valid_targets_min": 14469
|
|
},
|
|
{
|
|
"epoch": 1.068230277185501,
|
|
"grad_norm": 0.10090308114859445,
|
|
"learning_rate": 3.848043288359629e-05,
|
|
"loss": 1.132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32051894068717957,
|
|
"step": 251,
|
|
"valid_targets_mean": 16144.1,
|
|
"valid_targets_min": 15335
|
|
},
|
|
{
|
|
"epoch": 1.072494669509595,
|
|
"grad_norm": 0.1146784072803254,
|
|
"learning_rate": 3.8457623626321944e-05,
|
|
"loss": 1.1203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2671715319156647,
|
|
"step": 252,
|
|
"valid_targets_mean": 14765.2,
|
|
"valid_targets_min": 13003
|
|
},
|
|
{
|
|
"epoch": 1.0767590618336886,
|
|
"grad_norm": 0.10602825488059299,
|
|
"learning_rate": 3.843465131738296e-05,
|
|
"loss": 1.0731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29393211007118225,
|
|
"step": 253,
|
|
"valid_targets_mean": 16154.9,
|
|
"valid_targets_min": 15293
|
|
},
|
|
{
|
|
"epoch": 1.0810234541577826,
|
|
"grad_norm": 0.10894450426578323,
|
|
"learning_rate": 3.8411516159713e-05,
|
|
"loss": 1.1172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3288154602050781,
|
|
"step": 254,
|
|
"valid_targets_mean": 16006.0,
|
|
"valid_targets_min": 15125
|
|
},
|
|
{
|
|
"epoch": 1.0852878464818763,
|
|
"grad_norm": 0.09828630613437796,
|
|
"learning_rate": 3.838821835768431e-05,
|
|
"loss": 1.1136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2681499123573303,
|
|
"step": 255,
|
|
"valid_targets_mean": 15540.6,
|
|
"valid_targets_min": 13162
|
|
},
|
|
{
|
|
"epoch": 1.0895522388059702,
|
|
"grad_norm": 0.10568967159707446,
|
|
"learning_rate": 3.83647581171059e-05,
|
|
"loss": 1.1117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2978174388408661,
|
|
"step": 256,
|
|
"valid_targets_mean": 16179.0,
|
|
"valid_targets_min": 15486
|
|
},
|
|
{
|
|
"epoch": 1.0938166311300639,
|
|
"grad_norm": 0.10882593060685666,
|
|
"learning_rate": 3.8341135645221744e-05,
|
|
"loss": 1.1413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20971739292144775,
|
|
"step": 257,
|
|
"valid_targets_mean": 10339.1,
|
|
"valid_targets_min": 1537
|
|
},
|
|
{
|
|
"epoch": 1.0980810234541578,
|
|
"grad_norm": 0.0964322840611778,
|
|
"learning_rate": 3.831735115070895e-05,
|
|
"loss": 1.1124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26707398891448975,
|
|
"step": 258,
|
|
"valid_targets_mean": 16071.9,
|
|
"valid_targets_min": 14458
|
|
},
|
|
{
|
|
"epoch": 1.1023454157782515,
|
|
"grad_norm": 0.1178055955195376,
|
|
"learning_rate": 3.8293404843675904e-05,
|
|
"loss": 1.0791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30339211225509644,
|
|
"step": 259,
|
|
"valid_targets_mean": 16169.2,
|
|
"valid_targets_min": 15132
|
|
},
|
|
{
|
|
"epoch": 1.1066098081023454,
|
|
"grad_norm": 0.10199560914886747,
|
|
"learning_rate": 3.8269296935660395e-05,
|
|
"loss": 1.1661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17471778392791748,
|
|
"step": 260,
|
|
"valid_targets_mean": 7504.1,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 1.1108742004264391,
|
|
"grad_norm": 0.11822884708517124,
|
|
"learning_rate": 3.82450276396278e-05,
|
|
"loss": 1.076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26952528953552246,
|
|
"step": 261,
|
|
"valid_targets_mean": 16171.7,
|
|
"valid_targets_min": 15591
|
|
},
|
|
{
|
|
"epoch": 1.115138592750533,
|
|
"grad_norm": 0.09921452428188579,
|
|
"learning_rate": 3.822059716996916e-05,
|
|
"loss": 1.0883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2882543206214905,
|
|
"step": 262,
|
|
"valid_targets_mean": 16043.6,
|
|
"valid_targets_min": 10358
|
|
},
|
|
{
|
|
"epoch": 1.1194029850746268,
|
|
"grad_norm": 0.11099556379499767,
|
|
"learning_rate": 3.819600574249929e-05,
|
|
"loss": 1.1008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23989221453666687,
|
|
"step": 263,
|
|
"valid_targets_mean": 13854.4,
|
|
"valid_targets_min": 11399
|
|
},
|
|
{
|
|
"epoch": 1.1236673773987207,
|
|
"grad_norm": 0.10003394838173331,
|
|
"learning_rate": 3.817125357445489e-05,
|
|
"loss": 1.1406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3058737516403198,
|
|
"step": 264,
|
|
"valid_targets_mean": 16105.5,
|
|
"valid_targets_min": 15062
|
|
},
|
|
{
|
|
"epoch": 1.1279317697228146,
|
|
"grad_norm": 0.10920050128520137,
|
|
"learning_rate": 3.814634088449261e-05,
|
|
"loss": 1.0889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2990608513355255,
|
|
"step": 265,
|
|
"valid_targets_mean": 16135.1,
|
|
"valid_targets_min": 14885
|
|
},
|
|
{
|
|
"epoch": 1.1321961620469083,
|
|
"grad_norm": 0.1005436287987819,
|
|
"learning_rate": 3.812126789268712e-05,
|
|
"loss": 1.0871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26521193981170654,
|
|
"step": 266,
|
|
"valid_targets_mean": 14780.2,
|
|
"valid_targets_min": 12243
|
|
},
|
|
{
|
|
"epoch": 1.136460554371002,
|
|
"grad_norm": 0.10741630346238575,
|
|
"learning_rate": 3.80960348205292e-05,
|
|
"loss": 1.1051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3251075744628906,
|
|
"step": 267,
|
|
"valid_targets_mean": 16115.9,
|
|
"valid_targets_min": 14623
|
|
},
|
|
{
|
|
"epoch": 1.140724946695096,
|
|
"grad_norm": 0.10881515443285794,
|
|
"learning_rate": 3.807064189092372e-05,
|
|
"loss": 1.1176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25577372312545776,
|
|
"step": 268,
|
|
"valid_targets_mean": 12977.3,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 1.1449893390191899,
|
|
"grad_norm": 0.1272358486484507,
|
|
"learning_rate": 3.804508932818771e-05,
|
|
"loss": 1.0729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2533995509147644,
|
|
"step": 269,
|
|
"valid_targets_mean": 16100.2,
|
|
"valid_targets_min": 13594
|
|
},
|
|
{
|
|
"epoch": 1.1492537313432836,
|
|
"grad_norm": 0.10614223483702025,
|
|
"learning_rate": 3.801937735804838e-05,
|
|
"loss": 1.1146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32767167687416077,
|
|
"step": 270,
|
|
"valid_targets_mean": 16038.6,
|
|
"valid_targets_min": 14217
|
|
},
|
|
{
|
|
"epoch": 1.1535181236673775,
|
|
"grad_norm": 0.11089286904660725,
|
|
"learning_rate": 3.799350620764114e-05,
|
|
"loss": 1.1534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20955806970596313,
|
|
"step": 271,
|
|
"valid_targets_mean": 9911.7,
|
|
"valid_targets_min": 1315
|
|
},
|
|
{
|
|
"epoch": 1.1577825159914712,
|
|
"grad_norm": 0.1125271074768582,
|
|
"learning_rate": 3.7967476105507535e-05,
|
|
"loss": 1.0882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26700156927108765,
|
|
"step": 272,
|
|
"valid_targets_mean": 16177.7,
|
|
"valid_targets_min": 15194
|
|
},
|
|
{
|
|
"epoch": 1.1620469083155651,
|
|
"grad_norm": 0.12637639551945468,
|
|
"learning_rate": 3.7941287281593284e-05,
|
|
"loss": 1.1525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3548439145088196,
|
|
"step": 273,
|
|
"valid_targets_mean": 16042.5,
|
|
"valid_targets_min": 14908
|
|
},
|
|
{
|
|
"epoch": 1.1663113006396588,
|
|
"grad_norm": 0.11343823200453264,
|
|
"learning_rate": 3.7914939967246227e-05,
|
|
"loss": 1.1026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2081296145915985,
|
|
"step": 274,
|
|
"valid_targets_mean": 11080.3,
|
|
"valid_targets_min": 4701
|
|
},
|
|
{
|
|
"epoch": 1.1705756929637527,
|
|
"grad_norm": 0.10479167008681609,
|
|
"learning_rate": 3.7888434395214285e-05,
|
|
"loss": 1.0843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28286051750183105,
|
|
"step": 275,
|
|
"valid_targets_mean": 16055.4,
|
|
"valid_targets_min": 12727
|
|
},
|
|
{
|
|
"epoch": 1.1748400852878464,
|
|
"grad_norm": 0.117365943612522,
|
|
"learning_rate": 3.786177079964339e-05,
|
|
"loss": 1.1355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3356223404407501,
|
|
"step": 276,
|
|
"valid_targets_mean": 15977.4,
|
|
"valid_targets_min": 12126
|
|
},
|
|
{
|
|
"epoch": 1.1791044776119404,
|
|
"grad_norm": 0.10864644833276692,
|
|
"learning_rate": 3.783494941607544e-05,
|
|
"loss": 1.0886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2578306198120117,
|
|
"step": 277,
|
|
"valid_targets_mean": 15631.6,
|
|
"valid_targets_min": 13219
|
|
},
|
|
{
|
|
"epoch": 1.183368869936034,
|
|
"grad_norm": 0.1264284270932619,
|
|
"learning_rate": 3.780797048144621e-05,
|
|
"loss": 1.0697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29789525270462036,
|
|
"step": 278,
|
|
"valid_targets_mean": 16159.2,
|
|
"valid_targets_min": 15439
|
|
},
|
|
{
|
|
"epoch": 1.187633262260128,
|
|
"grad_norm": 0.12052067974222681,
|
|
"learning_rate": 3.7780834234083236e-05,
|
|
"loss": 1.0893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2927483916282654,
|
|
"step": 279,
|
|
"valid_targets_mean": 16182.5,
|
|
"valid_targets_min": 15582
|
|
},
|
|
{
|
|
"epoch": 1.1918976545842217,
|
|
"grad_norm": 0.1147766292188054,
|
|
"learning_rate": 3.775354091370376e-05,
|
|
"loss": 1.1351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30200472474098206,
|
|
"step": 280,
|
|
"valid_targets_mean": 15908.9,
|
|
"valid_targets_min": 14750
|
|
},
|
|
{
|
|
"epoch": 1.1961620469083156,
|
|
"grad_norm": 0.1063619317702308,
|
|
"learning_rate": 3.772609076141255e-05,
|
|
"loss": 1.0923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3139856457710266,
|
|
"step": 281,
|
|
"valid_targets_mean": 16146.3,
|
|
"valid_targets_min": 15423
|
|
},
|
|
{
|
|
"epoch": 1.2004264392324093,
|
|
"grad_norm": 0.14401284132936118,
|
|
"learning_rate": 3.769848401969982e-05,
|
|
"loss": 1.1032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21710094809532166,
|
|
"step": 282,
|
|
"valid_targets_mean": 10285.3,
|
|
"valid_targets_min": 1355
|
|
},
|
|
{
|
|
"epoch": 1.2046908315565032,
|
|
"grad_norm": 0.09672172159038513,
|
|
"learning_rate": 3.767072093243907e-05,
|
|
"loss": 1.1486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2852203845977783,
|
|
"step": 283,
|
|
"valid_targets_mean": 15925.5,
|
|
"valid_targets_min": 14523
|
|
},
|
|
{
|
|
"epoch": 1.208955223880597,
|
|
"grad_norm": 0.1262000321021357,
|
|
"learning_rate": 3.7642801744884915e-05,
|
|
"loss": 1.1084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2958885729312897,
|
|
"step": 284,
|
|
"valid_targets_mean": 16094.1,
|
|
"valid_targets_min": 14136
|
|
},
|
|
{
|
|
"epoch": 1.2132196162046909,
|
|
"grad_norm": 0.10192041571028172,
|
|
"learning_rate": 3.761472670367096e-05,
|
|
"loss": 1.0859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19317100942134857,
|
|
"step": 285,
|
|
"valid_targets_mean": 10279.1,
|
|
"valid_targets_min": 2035
|
|
},
|
|
{
|
|
"epoch": 1.2174840085287846,
|
|
"grad_norm": 0.09362756333423236,
|
|
"learning_rate": 3.758649605680758e-05,
|
|
"loss": 1.1079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28284960985183716,
|
|
"step": 286,
|
|
"valid_targets_mean": 16083.7,
|
|
"valid_targets_min": 15168
|
|
},
|
|
{
|
|
"epoch": 1.2217484008528785,
|
|
"grad_norm": 0.12109602620076153,
|
|
"learning_rate": 3.755811005367974e-05,
|
|
"loss": 1.1179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3278191089630127,
|
|
"step": 287,
|
|
"valid_targets_mean": 16021.7,
|
|
"valid_targets_min": 13371
|
|
},
|
|
{
|
|
"epoch": 1.2260127931769722,
|
|
"grad_norm": 0.10877290484635202,
|
|
"learning_rate": 3.752956894504481e-05,
|
|
"loss": 1.099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23945553600788116,
|
|
"step": 288,
|
|
"valid_targets_mean": 13572.9,
|
|
"valid_targets_min": 11320
|
|
},
|
|
{
|
|
"epoch": 1.2302771855010661,
|
|
"grad_norm": 0.10807323977184002,
|
|
"learning_rate": 3.750087298303033e-05,
|
|
"loss": 1.1035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2769162654876709,
|
|
"step": 289,
|
|
"valid_targets_mean": 16235.5,
|
|
"valid_targets_min": 15897
|
|
},
|
|
{
|
|
"epoch": 1.2345415778251598,
|
|
"grad_norm": 0.10553321287298738,
|
|
"learning_rate": 3.7472022421131795e-05,
|
|
"loss": 1.0896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30424678325653076,
|
|
"step": 290,
|
|
"valid_targets_mean": 16193.4,
|
|
"valid_targets_min": 15433
|
|
},
|
|
{
|
|
"epoch": 1.2388059701492538,
|
|
"grad_norm": 0.11482588421498335,
|
|
"learning_rate": 3.7443017514210406e-05,
|
|
"loss": 1.1252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2691482901573181,
|
|
"step": 291,
|
|
"valid_targets_mean": 15633.1,
|
|
"valid_targets_min": 13683
|
|
},
|
|
{
|
|
"epoch": 1.2430703624733475,
|
|
"grad_norm": 0.12991678715706628,
|
|
"learning_rate": 3.7413858518490825e-05,
|
|
"loss": 1.0816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3057411313056946,
|
|
"step": 292,
|
|
"valid_targets_mean": 16081.9,
|
|
"valid_targets_min": 14675
|
|
},
|
|
{
|
|
"epoch": 1.2473347547974414,
|
|
"grad_norm": 0.10762602003132601,
|
|
"learning_rate": 3.7384545691558895e-05,
|
|
"loss": 1.1058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2845606207847595,
|
|
"step": 293,
|
|
"valid_targets_mean": 12802.2,
|
|
"valid_targets_min": 1629
|
|
},
|
|
{
|
|
"epoch": 1.251599147121535,
|
|
"grad_norm": 0.11379149826582216,
|
|
"learning_rate": 3.735507929235941e-05,
|
|
"loss": 1.0853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2856350839138031,
|
|
"step": 294,
|
|
"valid_targets_mean": 16167.1,
|
|
"valid_targets_min": 15708
|
|
},
|
|
{
|
|
"epoch": 1.255863539445629,
|
|
"grad_norm": 0.10842739788279533,
|
|
"learning_rate": 3.732545958119378e-05,
|
|
"loss": 1.0722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29539749026298523,
|
|
"step": 295,
|
|
"valid_targets_mean": 16142.6,
|
|
"valid_targets_min": 15200
|
|
},
|
|
{
|
|
"epoch": 1.260127931769723,
|
|
"grad_norm": 0.1134847515161196,
|
|
"learning_rate": 3.729568681971774e-05,
|
|
"loss": 1.1227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2004006803035736,
|
|
"step": 296,
|
|
"valid_targets_mean": 9944.9,
|
|
"valid_targets_min": 2295
|
|
},
|
|
{
|
|
"epoch": 1.2643923240938166,
|
|
"grad_norm": 0.11017861169396913,
|
|
"learning_rate": 3.726576127093905e-05,
|
|
"loss": 1.0609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2579469084739685,
|
|
"step": 297,
|
|
"valid_targets_mean": 16059.4,
|
|
"valid_targets_min": 13918
|
|
},
|
|
{
|
|
"epoch": 1.2686567164179103,
|
|
"grad_norm": 0.11764815282087383,
|
|
"learning_rate": 3.7235683199215177e-05,
|
|
"loss": 1.1024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34623605012893677,
|
|
"step": 298,
|
|
"valid_targets_mean": 16039.6,
|
|
"valid_targets_min": 13183
|
|
},
|
|
{
|
|
"epoch": 1.2729211087420043,
|
|
"grad_norm": 0.09977539111470445,
|
|
"learning_rate": 3.7205452870250944e-05,
|
|
"loss": 1.0964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22077472507953644,
|
|
"step": 299,
|
|
"valid_targets_mean": 11850.5,
|
|
"valid_targets_min": 6001
|
|
},
|
|
{
|
|
"epoch": 1.2771855010660982,
|
|
"grad_norm": 0.12187011813696585,
|
|
"learning_rate": 3.7175070551096204e-05,
|
|
"loss": 1.0764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29300639033317566,
|
|
"step": 300,
|
|
"valid_targets_mean": 16068.7,
|
|
"valid_targets_min": 15191
|
|
},
|
|
{
|
|
"epoch": 1.2814498933901919,
|
|
"grad_norm": 0.10609440463015371,
|
|
"learning_rate": 3.7144536510143436e-05,
|
|
"loss": 1.0845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3064601719379425,
|
|
"step": 301,
|
|
"valid_targets_mean": 16154.0,
|
|
"valid_targets_min": 15315
|
|
},
|
|
{
|
|
"epoch": 1.2857142857142856,
|
|
"grad_norm": 0.1038987939436186,
|
|
"learning_rate": 3.711385101712544e-05,
|
|
"loss": 1.102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25039976835250854,
|
|
"step": 302,
|
|
"valid_targets_mean": 14178.9,
|
|
"valid_targets_min": 11567
|
|
},
|
|
{
|
|
"epoch": 1.2899786780383795,
|
|
"grad_norm": 0.12647891000848335,
|
|
"learning_rate": 3.708301434311289e-05,
|
|
"loss": 1.0737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31871938705444336,
|
|
"step": 303,
|
|
"valid_targets_mean": 16119.6,
|
|
"valid_targets_min": 15308
|
|
},
|
|
{
|
|
"epoch": 1.2942430703624734,
|
|
"grad_norm": 0.10831118153826579,
|
|
"learning_rate": 3.7052026760511996e-05,
|
|
"loss": 1.1173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32566046714782715,
|
|
"step": 304,
|
|
"valid_targets_mean": 16050.9,
|
|
"valid_targets_min": 15335
|
|
},
|
|
{
|
|
"epoch": 1.2985074626865671,
|
|
"grad_norm": 0.11518145127910266,
|
|
"learning_rate": 3.7020888543062046e-05,
|
|
"loss": 1.132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26352035999298096,
|
|
"step": 305,
|
|
"valid_targets_mean": 15895.6,
|
|
"valid_targets_min": 7497
|
|
},
|
|
{
|
|
"epoch": 1.302771855010661,
|
|
"grad_norm": 0.10283973178996797,
|
|
"learning_rate": 3.6989599965833024e-05,
|
|
"loss": 1.0862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.285113126039505,
|
|
"step": 306,
|
|
"valid_targets_mean": 16193.2,
|
|
"valid_targets_min": 15270
|
|
},
|
|
{
|
|
"epoch": 1.3070362473347548,
|
|
"grad_norm": 0.11037690155204975,
|
|
"learning_rate": 3.695816130522317e-05,
|
|
"loss": 1.1128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22734081745147705,
|
|
"step": 307,
|
|
"valid_targets_mean": 11393.6,
|
|
"valid_targets_min": 1683
|
|
},
|
|
{
|
|
"epoch": 1.3113006396588487,
|
|
"grad_norm": 0.11239901923999157,
|
|
"learning_rate": 3.692657283895651e-05,
|
|
"loss": 1.0709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27877020835876465,
|
|
"step": 308,
|
|
"valid_targets_mean": 16109.6,
|
|
"valid_targets_min": 14997
|
|
},
|
|
{
|
|
"epoch": 1.3155650319829424,
|
|
"grad_norm": 0.11864238198607306,
|
|
"learning_rate": 3.689483484608048e-05,
|
|
"loss": 1.0553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2993168234825134,
|
|
"step": 309,
|
|
"valid_targets_mean": 16044.1,
|
|
"valid_targets_min": 13741
|
|
},
|
|
{
|
|
"epoch": 1.3198294243070363,
|
|
"grad_norm": 0.10575278296478315,
|
|
"learning_rate": 3.6862947606963364e-05,
|
|
"loss": 1.0961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18802747130393982,
|
|
"step": 310,
|
|
"valid_targets_mean": 9455.3,
|
|
"valid_targets_min": 973
|
|
},
|
|
{
|
|
"epoch": 1.32409381663113,
|
|
"grad_norm": 0.11500374424420988,
|
|
"learning_rate": 3.6830911403291885e-05,
|
|
"loss": 1.0728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.266864538192749,
|
|
"step": 311,
|
|
"valid_targets_mean": 16156.4,
|
|
"valid_targets_min": 15484
|
|
},
|
|
{
|
|
"epoch": 1.328358208955224,
|
|
"grad_norm": 0.1182758601949138,
|
|
"learning_rate": 3.679872651806869e-05,
|
|
"loss": 1.0676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3200005888938904,
|
|
"step": 312,
|
|
"valid_targets_mean": 15993.4,
|
|
"valid_targets_min": 13968
|
|
},
|
|
{
|
|
"epoch": 1.3326226012793176,
|
|
"grad_norm": 0.10643324623262179,
|
|
"learning_rate": 3.676639323560986e-05,
|
|
"loss": 1.0445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22822721302509308,
|
|
"step": 313,
|
|
"valid_targets_mean": 12746.8,
|
|
"valid_targets_min": 8559
|
|
},
|
|
{
|
|
"epoch": 1.3368869936034116,
|
|
"grad_norm": 0.12305969133047927,
|
|
"learning_rate": 3.6733911841542365e-05,
|
|
"loss": 1.0891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2844512462615967,
|
|
"step": 314,
|
|
"valid_targets_mean": 16179.3,
|
|
"valid_targets_min": 15291
|
|
},
|
|
{
|
|
"epoch": 1.3411513859275053,
|
|
"grad_norm": 0.10427845321006882,
|
|
"learning_rate": 3.6701282622801626e-05,
|
|
"loss": 1.1107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3305818736553192,
|
|
"step": 315,
|
|
"valid_targets_mean": 16071.6,
|
|
"valid_targets_min": 15194
|
|
},
|
|
{
|
|
"epoch": 1.3454157782515992,
|
|
"grad_norm": 0.10949831921374652,
|
|
"learning_rate": 3.666850586762886e-05,
|
|
"loss": 1.09,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2693677544593811,
|
|
"step": 316,
|
|
"valid_targets_mean": 15716.5,
|
|
"valid_targets_min": 13549
|
|
},
|
|
{
|
|
"epoch": 1.349680170575693,
|
|
"grad_norm": 0.12164844653003287,
|
|
"learning_rate": 3.663558186556863e-05,
|
|
"loss": 1.1011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.298969566822052,
|
|
"step": 317,
|
|
"valid_targets_mean": 16145.2,
|
|
"valid_targets_min": 15332
|
|
},
|
|
{
|
|
"epoch": 1.3539445628997868,
|
|
"grad_norm": 0.09738532863718533,
|
|
"learning_rate": 3.660251090746627e-05,
|
|
"loss": 1.1324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2715837359428406,
|
|
"step": 318,
|
|
"valid_targets_mean": 13275.5,
|
|
"valid_targets_min": 2099
|
|
},
|
|
{
|
|
"epoch": 1.3582089552238805,
|
|
"grad_norm": 0.11416736876501335,
|
|
"learning_rate": 3.656929328546526e-05,
|
|
"loss": 1.1063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27884674072265625,
|
|
"step": 319,
|
|
"valid_targets_mean": 16027.6,
|
|
"valid_targets_min": 14450
|
|
},
|
|
{
|
|
"epoch": 1.3624733475479744,
|
|
"grad_norm": 0.11083938998592933,
|
|
"learning_rate": 3.653592929300471e-05,
|
|
"loss": 1.1141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2976928949356079,
|
|
"step": 320,
|
|
"valid_targets_mean": 16165.7,
|
|
"valid_targets_min": 15097
|
|
},
|
|
{
|
|
"epoch": 1.3667377398720681,
|
|
"grad_norm": 0.10560687905361504,
|
|
"learning_rate": 3.650241922481675e-05,
|
|
"loss": 1.1027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19759415090084076,
|
|
"step": 321,
|
|
"valid_targets_mean": 10319.3,
|
|
"valid_targets_min": 2485
|
|
},
|
|
{
|
|
"epoch": 1.371002132196162,
|
|
"grad_norm": 0.12473180072590273,
|
|
"learning_rate": 3.6468763376923886e-05,
|
|
"loss": 1.0711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27477532625198364,
|
|
"step": 322,
|
|
"valid_targets_mean": 16049.0,
|
|
"valid_targets_min": 14009
|
|
},
|
|
{
|
|
"epoch": 1.375266524520256,
|
|
"grad_norm": 0.1058590423892418,
|
|
"learning_rate": 3.6434962046636464e-05,
|
|
"loss": 1.1066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30742985010147095,
|
|
"step": 323,
|
|
"valid_targets_mean": 16117.5,
|
|
"valid_targets_min": 15232
|
|
},
|
|
{
|
|
"epoch": 1.3795309168443497,
|
|
"grad_norm": 0.1174095447706954,
|
|
"learning_rate": 3.6401015532549957e-05,
|
|
"loss": 1.1226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2281045913696289,
|
|
"step": 324,
|
|
"valid_targets_mean": 11259.9,
|
|
"valid_targets_min": 7580
|
|
},
|
|
{
|
|
"epoch": 1.3837953091684434,
|
|
"grad_norm": 0.10738254664757793,
|
|
"learning_rate": 3.6366924134542386e-05,
|
|
"loss": 1.0844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2918463945388794,
|
|
"step": 325,
|
|
"valid_targets_mean": 16034.1,
|
|
"valid_targets_min": 13438
|
|
},
|
|
{
|
|
"epoch": 1.3880597014925373,
|
|
"grad_norm": 0.10609115049987881,
|
|
"learning_rate": 3.633268815377166e-05,
|
|
"loss": 1.0934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3158262372016907,
|
|
"step": 326,
|
|
"valid_targets_mean": 16068.8,
|
|
"valid_targets_min": 14138
|
|
},
|
|
{
|
|
"epoch": 1.3923240938166312,
|
|
"grad_norm": 0.10996080942387625,
|
|
"learning_rate": 3.6298307892672895e-05,
|
|
"loss": 1.0868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2533453702926636,
|
|
"step": 327,
|
|
"valid_targets_mean": 15131.8,
|
|
"valid_targets_min": 12946
|
|
},
|
|
{
|
|
"epoch": 1.396588486140725,
|
|
"grad_norm": 0.10500793178878119,
|
|
"learning_rate": 3.626378365495577e-05,
|
|
"loss": 1.1184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2994054853916168,
|
|
"step": 328,
|
|
"valid_targets_mean": 16195.5,
|
|
"valid_targets_min": 15684
|
|
},
|
|
{
|
|
"epoch": 1.4008528784648187,
|
|
"grad_norm": 0.11382782131750063,
|
|
"learning_rate": 3.622911574560181e-05,
|
|
"loss": 1.0828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3164530396461487,
|
|
"step": 329,
|
|
"valid_targets_mean": 16103.7,
|
|
"valid_targets_min": 14692
|
|
},
|
|
{
|
|
"epoch": 1.4051172707889126,
|
|
"grad_norm": 0.10278724516883292,
|
|
"learning_rate": 3.6194304470861744e-05,
|
|
"loss": 1.1121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.289953351020813,
|
|
"step": 330,
|
|
"valid_targets_mean": 15808.8,
|
|
"valid_targets_min": 13809
|
|
},
|
|
{
|
|
"epoch": 1.4093816631130065,
|
|
"grad_norm": 0.11442800682812106,
|
|
"learning_rate": 3.615935013825272e-05,
|
|
"loss": 1.1303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3147600293159485,
|
|
"step": 331,
|
|
"valid_targets_mean": 16092.8,
|
|
"valid_targets_min": 14491
|
|
},
|
|
{
|
|
"epoch": 1.4136460554371002,
|
|
"grad_norm": 0.09620723031666387,
|
|
"learning_rate": 3.612425305655569e-05,
|
|
"loss": 1.0848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2246423065662384,
|
|
"step": 332,
|
|
"valid_targets_mean": 10743.0,
|
|
"valid_targets_min": 1499
|
|
},
|
|
{
|
|
"epoch": 1.417910447761194,
|
|
"grad_norm": 0.11642293427670641,
|
|
"learning_rate": 3.6089013535812593e-05,
|
|
"loss": 1.0388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2640111446380615,
|
|
"step": 333,
|
|
"valid_targets_mean": 16181.2,
|
|
"valid_targets_min": 15494
|
|
},
|
|
{
|
|
"epoch": 1.4221748400852878,
|
|
"grad_norm": 0.10500906362714273,
|
|
"learning_rate": 3.6053631887323656e-05,
|
|
"loss": 1.1066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3187768757343292,
|
|
"step": 334,
|
|
"valid_targets_mean": 16085.9,
|
|
"valid_targets_min": 14017
|
|
},
|
|
{
|
|
"epoch": 1.4264392324093818,
|
|
"grad_norm": 0.11390522249165878,
|
|
"learning_rate": 3.601810842364465e-05,
|
|
"loss": 1.1373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17636647820472717,
|
|
"step": 335,
|
|
"valid_targets_mean": 9224.1,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 1.4307036247334755,
|
|
"grad_norm": 0.09323181573435452,
|
|
"learning_rate": 3.598244345858412e-05,
|
|
"loss": 1.1046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25841224193573,
|
|
"step": 336,
|
|
"valid_targets_mean": 16145.4,
|
|
"valid_targets_min": 14984
|
|
},
|
|
{
|
|
"epoch": 1.4349680170575694,
|
|
"grad_norm": 0.10650889590322855,
|
|
"learning_rate": 3.594663730720059e-05,
|
|
"loss": 1.0908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3043976128101349,
|
|
"step": 337,
|
|
"valid_targets_mean": 16177.5,
|
|
"valid_targets_min": 14939
|
|
},
|
|
{
|
|
"epoch": 1.439232409381663,
|
|
"grad_norm": 0.10833359739443055,
|
|
"learning_rate": 3.591069028579982e-05,
|
|
"loss": 1.1052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.234279066324234,
|
|
"step": 338,
|
|
"valid_targets_mean": 13436.0,
|
|
"valid_targets_min": 9373
|
|
},
|
|
{
|
|
"epoch": 1.443496801705757,
|
|
"grad_norm": 0.09317777212553223,
|
|
"learning_rate": 3.5874602711931994e-05,
|
|
"loss": 1.1005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29291391372680664,
|
|
"step": 339,
|
|
"valid_targets_mean": 16082.5,
|
|
"valid_targets_min": 15085
|
|
},
|
|
{
|
|
"epoch": 1.4477611940298507,
|
|
"grad_norm": 0.10802080941280771,
|
|
"learning_rate": 3.5838374904388904e-05,
|
|
"loss": 1.0884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31357914209365845,
|
|
"step": 340,
|
|
"valid_targets_mean": 16151.0,
|
|
"valid_targets_min": 14158
|
|
},
|
|
{
|
|
"epoch": 1.4520255863539446,
|
|
"grad_norm": 0.10500373689347785,
|
|
"learning_rate": 3.580200718320115e-05,
|
|
"loss": 1.0936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25797849893569946,
|
|
"step": 341,
|
|
"valid_targets_mean": 16148.4,
|
|
"valid_targets_min": 15079
|
|
},
|
|
{
|
|
"epoch": 1.4562899786780383,
|
|
"grad_norm": 0.08893316611041793,
|
|
"learning_rate": 3.576549986963531e-05,
|
|
"loss": 1.0755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29892832040786743,
|
|
"step": 342,
|
|
"valid_targets_mean": 16090.0,
|
|
"valid_targets_min": 14649
|
|
},
|
|
{
|
|
"epoch": 1.4605543710021323,
|
|
"grad_norm": 0.1129400479461237,
|
|
"learning_rate": 3.5728853286191075e-05,
|
|
"loss": 1.0718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25045305490493774,
|
|
"step": 343,
|
|
"valid_targets_mean": 12897.7,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 1.464818763326226,
|
|
"grad_norm": 0.09923721746532146,
|
|
"learning_rate": 3.5692067756598465e-05,
|
|
"loss": 1.0641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2657395303249359,
|
|
"step": 344,
|
|
"valid_targets_mean": 16182.4,
|
|
"valid_targets_min": 15622
|
|
},
|
|
{
|
|
"epoch": 1.4690831556503199,
|
|
"grad_norm": 0.10414322623359287,
|
|
"learning_rate": 3.5655143605814885e-05,
|
|
"loss": 1.116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30559101700782776,
|
|
"step": 345,
|
|
"valid_targets_mean": 15859.1,
|
|
"valid_targets_min": 9219
|
|
},
|
|
{
|
|
"epoch": 1.4733475479744136,
|
|
"grad_norm": 0.10311905725271821,
|
|
"learning_rate": 3.561808116002232e-05,
|
|
"loss": 1.0733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20559480786323547,
|
|
"step": 346,
|
|
"valid_targets_mean": 10813.5,
|
|
"valid_targets_min": 2163
|
|
},
|
|
{
|
|
"epoch": 1.4776119402985075,
|
|
"grad_norm": 0.08745762412229278,
|
|
"learning_rate": 3.5580880746624444e-05,
|
|
"loss": 1.0741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2763786315917969,
|
|
"step": 347,
|
|
"valid_targets_mean": 16058.7,
|
|
"valid_targets_min": 15027
|
|
},
|
|
{
|
|
"epoch": 1.4818763326226012,
|
|
"grad_norm": 0.10590896740145468,
|
|
"learning_rate": 3.5543542694243685e-05,
|
|
"loss": 1.1002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3163069188594818,
|
|
"step": 348,
|
|
"valid_targets_mean": 16115.7,
|
|
"valid_targets_min": 15416
|
|
},
|
|
{
|
|
"epoch": 1.4861407249466951,
|
|
"grad_norm": 0.1074938168153569,
|
|
"learning_rate": 3.5506067332718355e-05,
|
|
"loss": 1.0963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2302987277507782,
|
|
"step": 349,
|
|
"valid_targets_mean": 11411.9,
|
|
"valid_targets_min": 7691
|
|
},
|
|
{
|
|
"epoch": 1.4904051172707888,
|
|
"grad_norm": 0.1142086897237537,
|
|
"learning_rate": 3.546845499309976e-05,
|
|
"loss": 1.0759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2707352638244629,
|
|
"step": 350,
|
|
"valid_targets_mean": 15877.5,
|
|
"valid_targets_min": 6625
|
|
},
|
|
{
|
|
"epoch": 1.4946695095948828,
|
|
"grad_norm": 0.11252372143041216,
|
|
"learning_rate": 3.5430706007649225e-05,
|
|
"loss": 1.0935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3323717713356018,
|
|
"step": 351,
|
|
"valid_targets_mean": 16176.0,
|
|
"valid_targets_min": 15311
|
|
},
|
|
{
|
|
"epoch": 1.4989339019189765,
|
|
"grad_norm": 0.09905787925959554,
|
|
"learning_rate": 3.539282070983518e-05,
|
|
"loss": 1.1052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24915950000286102,
|
|
"step": 352,
|
|
"valid_targets_mean": 14213.2,
|
|
"valid_targets_min": 11390
|
|
},
|
|
{
|
|
"epoch": 1.5031982942430704,
|
|
"grad_norm": 0.09155440862974068,
|
|
"learning_rate": 3.535479943433023e-05,
|
|
"loss": 1.1641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33892518281936646,
|
|
"step": 353,
|
|
"valid_targets_mean": 15866.4,
|
|
"valid_targets_min": 14218
|
|
},
|
|
{
|
|
"epoch": 1.5074626865671643,
|
|
"grad_norm": 0.09675889044150787,
|
|
"learning_rate": 3.5316642517008184e-05,
|
|
"loss": 1.0952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30710119009017944,
|
|
"step": 354,
|
|
"valid_targets_mean": 16220.0,
|
|
"valid_targets_min": 15671
|
|
},
|
|
{
|
|
"epoch": 1.511727078891258,
|
|
"grad_norm": 0.09012121128169051,
|
|
"learning_rate": 3.5278350294941074e-05,
|
|
"loss": 1.0935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2905885875225067,
|
|
"step": 355,
|
|
"valid_targets_mean": 15915.4,
|
|
"valid_targets_min": 14392
|
|
},
|
|
{
|
|
"epoch": 1.5159914712153517,
|
|
"grad_norm": 0.09875180730881963,
|
|
"learning_rate": 3.523992310639622e-05,
|
|
"loss": 1.0666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29755884408950806,
|
|
"step": 356,
|
|
"valid_targets_mean": 16180.0,
|
|
"valid_targets_min": 15377
|
|
},
|
|
{
|
|
"epoch": 1.5202558635394456,
|
|
"grad_norm": 0.10278316161695568,
|
|
"learning_rate": 3.5201361290833165e-05,
|
|
"loss": 1.0882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2242705523967743,
|
|
"step": 357,
|
|
"valid_targets_mean": 11556.3,
|
|
"valid_targets_min": 1419
|
|
},
|
|
{
|
|
"epoch": 1.5245202558635396,
|
|
"grad_norm": 0.10574145835412747,
|
|
"learning_rate": 3.516266518890079e-05,
|
|
"loss": 1.0652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2752949297428131,
|
|
"step": 358,
|
|
"valid_targets_mean": 16074.3,
|
|
"valid_targets_min": 14619
|
|
},
|
|
{
|
|
"epoch": 1.5287846481876333,
|
|
"grad_norm": 0.10093142984805428,
|
|
"learning_rate": 3.512383514243419e-05,
|
|
"loss": 1.0878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28041160106658936,
|
|
"step": 359,
|
|
"valid_targets_mean": 16153.8,
|
|
"valid_targets_min": 14469
|
|
},
|
|
{
|
|
"epoch": 1.533049040511727,
|
|
"grad_norm": 0.09744746770292749,
|
|
"learning_rate": 3.5084871494451716e-05,
|
|
"loss": 1.0309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20054638385772705,
|
|
"step": 360,
|
|
"valid_targets_mean": 10304.3,
|
|
"valid_targets_min": 1471
|
|
},
|
|
{
|
|
"epoch": 1.537313432835821,
|
|
"grad_norm": 0.10122397991227469,
|
|
"learning_rate": 3.5045774589151955e-05,
|
|
"loss": 1.087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25514698028564453,
|
|
"step": 361,
|
|
"valid_targets_mean": 15944.1,
|
|
"valid_targets_min": 9862
|
|
},
|
|
{
|
|
"epoch": 1.5415778251599148,
|
|
"grad_norm": 0.10728996447574861,
|
|
"learning_rate": 3.500654477191064e-05,
|
|
"loss": 1.1213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32900580763816833,
|
|
"step": 362,
|
|
"valid_targets_mean": 16036.2,
|
|
"valid_targets_min": 14192
|
|
},
|
|
{
|
|
"epoch": 1.5458422174840085,
|
|
"grad_norm": 0.102447950723291,
|
|
"learning_rate": 3.496718238927764e-05,
|
|
"loss": 1.0993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20979315042495728,
|
|
"step": 363,
|
|
"valid_targets_mean": 11317.7,
|
|
"valid_targets_min": 7196
|
|
},
|
|
{
|
|
"epoch": 1.5501066098081022,
|
|
"grad_norm": 0.09806918778187139,
|
|
"learning_rate": 3.492768778897388e-05,
|
|
"loss": 1.089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29185420274734497,
|
|
"step": 364,
|
|
"valid_targets_mean": 16123.4,
|
|
"valid_targets_min": 15168
|
|
},
|
|
{
|
|
"epoch": 1.5543710021321961,
|
|
"grad_norm": 0.09742602819945653,
|
|
"learning_rate": 3.4888061319888276e-05,
|
|
"loss": 1.1038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3228592872619629,
|
|
"step": 365,
|
|
"valid_targets_mean": 16105.7,
|
|
"valid_targets_min": 15224
|
|
},
|
|
{
|
|
"epoch": 1.55863539445629,
|
|
"grad_norm": 0.098483398481098,
|
|
"learning_rate": 3.484830333207466e-05,
|
|
"loss": 1.0902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2537546157836914,
|
|
"step": 366,
|
|
"valid_targets_mean": 14593.3,
|
|
"valid_targets_min": 12496
|
|
},
|
|
{
|
|
"epoch": 1.5628997867803838,
|
|
"grad_norm": 0.1051913885625607,
|
|
"learning_rate": 3.4808414176748666e-05,
|
|
"loss": 1.0668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29644209146499634,
|
|
"step": 367,
|
|
"valid_targets_mean": 16160.7,
|
|
"valid_targets_min": 15232
|
|
},
|
|
{
|
|
"epoch": 1.5671641791044775,
|
|
"grad_norm": 0.09931568374140004,
|
|
"learning_rate": 3.476839420628466e-05,
|
|
"loss": 1.0817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2654361128807068,
|
|
"step": 368,
|
|
"valid_targets_mean": 13384.8,
|
|
"valid_targets_min": 1866
|
|
},
|
|
{
|
|
"epoch": 1.5714285714285714,
|
|
"grad_norm": 0.09532707827004713,
|
|
"learning_rate": 3.472824377421257e-05,
|
|
"loss": 1.0694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25141605734825134,
|
|
"step": 369,
|
|
"valid_targets_mean": 15808.2,
|
|
"valid_targets_min": 14240
|
|
},
|
|
{
|
|
"epoch": 1.5756929637526653,
|
|
"grad_norm": 0.11357695389208539,
|
|
"learning_rate": 3.4687963235214845e-05,
|
|
"loss": 1.098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3391112685203552,
|
|
"step": 370,
|
|
"valid_targets_mean": 16092.8,
|
|
"valid_targets_min": 15551
|
|
},
|
|
{
|
|
"epoch": 1.579957356076759,
|
|
"grad_norm": 0.09795030453789765,
|
|
"learning_rate": 3.464755294512325e-05,
|
|
"loss": 1.089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18399649858474731,
|
|
"step": 371,
|
|
"valid_targets_mean": 8826.2,
|
|
"valid_targets_min": 1940
|
|
},
|
|
{
|
|
"epoch": 1.5842217484008527,
|
|
"grad_norm": 0.11388249602359929,
|
|
"learning_rate": 3.4607013260915765e-05,
|
|
"loss": 1.078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27913787961006165,
|
|
"step": 372,
|
|
"valid_targets_mean": 16100.2,
|
|
"valid_targets_min": 14021
|
|
},
|
|
{
|
|
"epoch": 1.5884861407249466,
|
|
"grad_norm": 0.09720928523551613,
|
|
"learning_rate": 3.4566344540713404e-05,
|
|
"loss": 1.0662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2852057218551636,
|
|
"step": 373,
|
|
"valid_targets_mean": 16198.0,
|
|
"valid_targets_min": 15089
|
|
},
|
|
{
|
|
"epoch": 1.5927505330490406,
|
|
"grad_norm": 0.09350935460946826,
|
|
"learning_rate": 3.452554714377706e-05,
|
|
"loss": 1.0639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22961363196372986,
|
|
"step": 374,
|
|
"valid_targets_mean": 12244.4,
|
|
"valid_targets_min": 7199
|
|
},
|
|
{
|
|
"epoch": 1.5970149253731343,
|
|
"grad_norm": 0.10620638975343262,
|
|
"learning_rate": 3.448462143050436e-05,
|
|
"loss": 1.0834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2900192141532898,
|
|
"step": 375,
|
|
"valid_targets_mean": 16018.0,
|
|
"valid_targets_min": 13918
|
|
},
|
|
{
|
|
"epoch": 1.6012793176972282,
|
|
"grad_norm": 0.11895429282303907,
|
|
"learning_rate": 3.4443567762426444e-05,
|
|
"loss": 1.0865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31047362089157104,
|
|
"step": 376,
|
|
"valid_targets_mean": 16189.1,
|
|
"valid_targets_min": 15559
|
|
},
|
|
{
|
|
"epoch": 1.6055437100213221,
|
|
"grad_norm": 0.11770977465475815,
|
|
"learning_rate": 3.440238650220477e-05,
|
|
"loss": 1.053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2448553889989853,
|
|
"step": 377,
|
|
"valid_targets_mean": 14520.8,
|
|
"valid_targets_min": 12623
|
|
},
|
|
{
|
|
"epoch": 1.6098081023454158,
|
|
"grad_norm": 0.10811845345934151,
|
|
"learning_rate": 3.4361078013627945e-05,
|
|
"loss": 1.0757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28809183835983276,
|
|
"step": 378,
|
|
"valid_targets_mean": 16176.3,
|
|
"valid_targets_min": 15520
|
|
},
|
|
{
|
|
"epoch": 1.6140724946695095,
|
|
"grad_norm": 0.10527196242299709,
|
|
"learning_rate": 3.4319642661608474e-05,
|
|
"loss": 1.1079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2915457785129547,
|
|
"step": 379,
|
|
"valid_targets_mean": 16143.0,
|
|
"valid_targets_min": 15315
|
|
},
|
|
{
|
|
"epoch": 1.6183368869936035,
|
|
"grad_norm": 0.09062278280785045,
|
|
"learning_rate": 3.427808081217957e-05,
|
|
"loss": 1.1002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27763593196868896,
|
|
"step": 380,
|
|
"valid_targets_mean": 15684.0,
|
|
"valid_targets_min": 11382
|
|
},
|
|
{
|
|
"epoch": 1.6226012793176974,
|
|
"grad_norm": 0.10878250650706273,
|
|
"learning_rate": 3.423639283249189e-05,
|
|
"loss": 1.0762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29199931025505066,
|
|
"step": 381,
|
|
"valid_targets_mean": 16226.9,
|
|
"valid_targets_min": 15486
|
|
},
|
|
{
|
|
"epoch": 1.626865671641791,
|
|
"grad_norm": 0.0984304645252414,
|
|
"learning_rate": 3.419457909081032e-05,
|
|
"loss": 1.1048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24469570815563202,
|
|
"step": 382,
|
|
"valid_targets_mean": 11108.2,
|
|
"valid_targets_min": 1985
|
|
},
|
|
{
|
|
"epoch": 1.6311300639658848,
|
|
"grad_norm": 0.10239583293427146,
|
|
"learning_rate": 3.415263995651069e-05,
|
|
"loss": 1.1094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2764725685119629,
|
|
"step": 383,
|
|
"valid_targets_mean": 16018.8,
|
|
"valid_targets_min": 14477
|
|
},
|
|
{
|
|
"epoch": 1.6353944562899787,
|
|
"grad_norm": 0.09683744024921145,
|
|
"learning_rate": 3.411057580007653e-05,
|
|
"loss": 1.0728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3245445787906647,
|
|
"step": 384,
|
|
"valid_targets_mean": 16158.9,
|
|
"valid_targets_min": 15519
|
|
},
|
|
{
|
|
"epoch": 1.6396588486140726,
|
|
"grad_norm": 0.10033897172651648,
|
|
"learning_rate": 3.4068386993095806e-05,
|
|
"loss": 1.0394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17178496718406677,
|
|
"step": 385,
|
|
"valid_targets_mean": 9351.1,
|
|
"valid_targets_min": 2287
|
|
},
|
|
{
|
|
"epoch": 1.6439232409381663,
|
|
"grad_norm": 0.09317978660568199,
|
|
"learning_rate": 3.402607390825762e-05,
|
|
"loss": 1.0723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26262950897216797,
|
|
"step": 386,
|
|
"valid_targets_mean": 16111.7,
|
|
"valid_targets_min": 14960
|
|
},
|
|
{
|
|
"epoch": 1.64818763326226,
|
|
"grad_norm": 0.10420508406089538,
|
|
"learning_rate": 3.398363691934894e-05,
|
|
"loss": 1.0802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32096874713897705,
|
|
"step": 387,
|
|
"valid_targets_mean": 16045.2,
|
|
"valid_targets_min": 14965
|
|
},
|
|
{
|
|
"epoch": 1.652452025586354,
|
|
"grad_norm": 0.09700917897568581,
|
|
"learning_rate": 3.3941076401251244e-05,
|
|
"loss": 1.0953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21869328618049622,
|
|
"step": 388,
|
|
"valid_targets_mean": 12687.9,
|
|
"valid_targets_min": 10349
|
|
},
|
|
{
|
|
"epoch": 1.6567164179104479,
|
|
"grad_norm": 0.10774024481003694,
|
|
"learning_rate": 3.3898392729937295e-05,
|
|
"loss": 1.0647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28958582878112793,
|
|
"step": 389,
|
|
"valid_targets_mean": 16111.3,
|
|
"valid_targets_min": 14523
|
|
},
|
|
{
|
|
"epoch": 1.6609808102345416,
|
|
"grad_norm": 0.09957916226371884,
|
|
"learning_rate": 3.385558628246774e-05,
|
|
"loss": 1.1032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3041366934776306,
|
|
"step": 390,
|
|
"valid_targets_mean": 16108.1,
|
|
"valid_targets_min": 15458
|
|
},
|
|
{
|
|
"epoch": 1.6652452025586353,
|
|
"grad_norm": 0.09117601831492861,
|
|
"learning_rate": 3.381265743698781e-05,
|
|
"loss": 1.0901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2512508034706116,
|
|
"step": 391,
|
|
"valid_targets_mean": 15635.0,
|
|
"valid_targets_min": 14271
|
|
},
|
|
{
|
|
"epoch": 1.6695095948827292,
|
|
"grad_norm": 0.1000985389951321,
|
|
"learning_rate": 3.3769606572724e-05,
|
|
"loss": 1.0552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28618407249450684,
|
|
"step": 392,
|
|
"valid_targets_mean": 16034.4,
|
|
"valid_targets_min": 14662
|
|
},
|
|
{
|
|
"epoch": 1.6737739872068231,
|
|
"grad_norm": 0.09713785731020419,
|
|
"learning_rate": 3.3726434069980686e-05,
|
|
"loss": 1.0603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23872718214988708,
|
|
"step": 393,
|
|
"valid_targets_mean": 12945.9,
|
|
"valid_targets_min": 1387
|
|
},
|
|
{
|
|
"epoch": 1.6780383795309168,
|
|
"grad_norm": 0.10442998628673013,
|
|
"learning_rate": 3.368314031013678e-05,
|
|
"loss": 1.0737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2725452184677124,
|
|
"step": 394,
|
|
"valid_targets_mean": 16081.7,
|
|
"valid_targets_min": 13924
|
|
},
|
|
{
|
|
"epoch": 1.6823027718550105,
|
|
"grad_norm": 0.09104993661211773,
|
|
"learning_rate": 3.363972567564236e-05,
|
|
"loss": 1.0955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30256763100624084,
|
|
"step": 395,
|
|
"valid_targets_mean": 16174.0,
|
|
"valid_targets_min": 15264
|
|
},
|
|
{
|
|
"epoch": 1.6865671641791045,
|
|
"grad_norm": 0.11376441315983579,
|
|
"learning_rate": 3.35961905500153e-05,
|
|
"loss": 1.0923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1877935379743576,
|
|
"step": 396,
|
|
"valid_targets_mean": 9338.9,
|
|
"valid_targets_min": 2025
|
|
},
|
|
{
|
|
"epoch": 1.6908315565031984,
|
|
"grad_norm": 0.09657933997454504,
|
|
"learning_rate": 3.3552535317837855e-05,
|
|
"loss": 1.1192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2732711434364319,
|
|
"step": 397,
|
|
"valid_targets_mean": 16084.7,
|
|
"valid_targets_min": 14605
|
|
},
|
|
{
|
|
"epoch": 1.695095948827292,
|
|
"grad_norm": 0.11037803544797944,
|
|
"learning_rate": 3.35087603647533e-05,
|
|
"loss": 1.0973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31366705894470215,
|
|
"step": 398,
|
|
"valid_targets_mean": 16162.2,
|
|
"valid_targets_min": 15565
|
|
},
|
|
{
|
|
"epoch": 1.6993603411513858,
|
|
"grad_norm": 0.09221140393885656,
|
|
"learning_rate": 3.346486607746249e-05,
|
|
"loss": 1.105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18481725454330444,
|
|
"step": 399,
|
|
"valid_targets_mean": 10229.1,
|
|
"valid_targets_min": 4903
|
|
},
|
|
{
|
|
"epoch": 1.7036247334754797,
|
|
"grad_norm": 0.10402988598699307,
|
|
"learning_rate": 3.342085284372047e-05,
|
|
"loss": 1.0722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2809121012687683,
|
|
"step": 400,
|
|
"valid_targets_mean": 16118.6,
|
|
"valid_targets_min": 15447
|
|
},
|
|
{
|
|
"epoch": 1.7078891257995736,
|
|
"grad_norm": 0.10210082909581784,
|
|
"learning_rate": 3.337672105233303e-05,
|
|
"loss": 1.1048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28496190905570984,
|
|
"step": 401,
|
|
"valid_targets_mean": 16183.3,
|
|
"valid_targets_min": 14901
|
|
},
|
|
{
|
|
"epoch": 1.7121535181236673,
|
|
"grad_norm": 0.10430371713198956,
|
|
"learning_rate": 3.3332471093153296e-05,
|
|
"loss": 1.1007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.245585098862648,
|
|
"step": 402,
|
|
"valid_targets_mean": 14261.3,
|
|
"valid_targets_min": 10720
|
|
},
|
|
{
|
|
"epoch": 1.716417910447761,
|
|
"grad_norm": 0.09623944868007181,
|
|
"learning_rate": 3.3288103357078244e-05,
|
|
"loss": 1.116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3118829131126404,
|
|
"step": 403,
|
|
"valid_targets_mean": 16147.8,
|
|
"valid_targets_min": 15478
|
|
},
|
|
{
|
|
"epoch": 1.720682302771855,
|
|
"grad_norm": 0.10461905076401001,
|
|
"learning_rate": 3.324361823604529e-05,
|
|
"loss": 1.0837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31964877247810364,
|
|
"step": 404,
|
|
"valid_targets_mean": 16153.6,
|
|
"valid_targets_min": 15203
|
|
},
|
|
{
|
|
"epoch": 1.724946695095949,
|
|
"grad_norm": 0.10483890465887298,
|
|
"learning_rate": 3.319901612302881e-05,
|
|
"loss": 1.081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2522711753845215,
|
|
"step": 405,
|
|
"valid_targets_mean": 16198.2,
|
|
"valid_targets_min": 14984
|
|
},
|
|
{
|
|
"epoch": 1.7292110874200426,
|
|
"grad_norm": 0.10211663027091705,
|
|
"learning_rate": 3.315429741203666e-05,
|
|
"loss": 1.0659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2766035795211792,
|
|
"step": 406,
|
|
"valid_targets_mean": 16216.7,
|
|
"valid_targets_min": 15770
|
|
},
|
|
{
|
|
"epoch": 1.7334754797441365,
|
|
"grad_norm": 0.10313172183281555,
|
|
"learning_rate": 3.3109462498106705e-05,
|
|
"loss": 1.0433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2088719755411148,
|
|
"step": 407,
|
|
"valid_targets_mean": 11365.8,
|
|
"valid_targets_min": 1921
|
|
},
|
|
{
|
|
"epoch": 1.7377398720682304,
|
|
"grad_norm": 0.0941022308122513,
|
|
"learning_rate": 3.306451177730333e-05,
|
|
"loss": 1.0603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26309818029403687,
|
|
"step": 408,
|
|
"valid_targets_mean": 16114.1,
|
|
"valid_targets_min": 15283
|
|
},
|
|
{
|
|
"epoch": 1.7420042643923241,
|
|
"grad_norm": 0.09832557033424867,
|
|
"learning_rate": 3.301944564671394e-05,
|
|
"loss": 1.1024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31220537424087524,
|
|
"step": 409,
|
|
"valid_targets_mean": 16077.3,
|
|
"valid_targets_min": 13272
|
|
},
|
|
{
|
|
"epoch": 1.7462686567164178,
|
|
"grad_norm": 0.10380893729368296,
|
|
"learning_rate": 3.297426450444546e-05,
|
|
"loss": 1.0734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19773633778095245,
|
|
"step": 410,
|
|
"valid_targets_mean": 11509.2,
|
|
"valid_targets_min": 2844
|
|
},
|
|
{
|
|
"epoch": 1.7505330490405118,
|
|
"grad_norm": 0.09354209314994967,
|
|
"learning_rate": 3.292896874962078e-05,
|
|
"loss": 1.0592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2590077817440033,
|
|
"step": 411,
|
|
"valid_targets_mean": 16136.8,
|
|
"valid_targets_min": 15129
|
|
},
|
|
{
|
|
"epoch": 1.7547974413646057,
|
|
"grad_norm": 0.10457483158682969,
|
|
"learning_rate": 3.2883558782375294e-05,
|
|
"loss": 1.0741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2902870774269104,
|
|
"step": 412,
|
|
"valid_targets_mean": 16074.4,
|
|
"valid_targets_min": 13272
|
|
},
|
|
{
|
|
"epoch": 1.7590618336886994,
|
|
"grad_norm": 0.10164753127073986,
|
|
"learning_rate": 3.283803500385332e-05,
|
|
"loss": 1.1033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24309290945529938,
|
|
"step": 413,
|
|
"valid_targets_mean": 13536.3,
|
|
"valid_targets_min": 10258
|
|
},
|
|
{
|
|
"epoch": 1.763326226012793,
|
|
"grad_norm": 0.09647869256647515,
|
|
"learning_rate": 3.2792397816204546e-05,
|
|
"loss": 1.0904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27866899967193604,
|
|
"step": 414,
|
|
"valid_targets_mean": 16130.6,
|
|
"valid_targets_min": 14918
|
|
},
|
|
{
|
|
"epoch": 1.767590618336887,
|
|
"grad_norm": 0.10775061750755592,
|
|
"learning_rate": 3.2746647622580524e-05,
|
|
"loss": 1.1087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3050500750541687,
|
|
"step": 415,
|
|
"valid_targets_mean": 16080.5,
|
|
"valid_targets_min": 14737
|
|
},
|
|
{
|
|
"epoch": 1.771855010660981,
|
|
"grad_norm": 0.10677879014487571,
|
|
"learning_rate": 3.270078482713106e-05,
|
|
"loss": 1.0373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2534022629261017,
|
|
"step": 416,
|
|
"valid_targets_mean": 15262.0,
|
|
"valid_targets_min": 13024
|
|
},
|
|
{
|
|
"epoch": 1.7761194029850746,
|
|
"grad_norm": 0.09942107059165145,
|
|
"learning_rate": 3.265480983500069e-05,
|
|
"loss": 1.0759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30068522691726685,
|
|
"step": 417,
|
|
"valid_targets_mean": 16156.2,
|
|
"valid_targets_min": 14527
|
|
},
|
|
{
|
|
"epoch": 1.7803837953091683,
|
|
"grad_norm": 0.11045061529323917,
|
|
"learning_rate": 3.260872305232507e-05,
|
|
"loss": 1.0587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.257032573223114,
|
|
"step": 418,
|
|
"valid_targets_mean": 13181.9,
|
|
"valid_targets_min": 2200
|
|
},
|
|
{
|
|
"epoch": 1.7846481876332623,
|
|
"grad_norm": 0.0893749245536571,
|
|
"learning_rate": 3.256252488622738e-05,
|
|
"loss": 1.0487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24879369139671326,
|
|
"step": 419,
|
|
"valid_targets_mean": 16189.6,
|
|
"valid_targets_min": 14586
|
|
},
|
|
{
|
|
"epoch": 1.7889125799573562,
|
|
"grad_norm": 0.08876253659859583,
|
|
"learning_rate": 3.251621574481475e-05,
|
|
"loss": 1.0891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3002496063709259,
|
|
"step": 420,
|
|
"valid_targets_mean": 15984.7,
|
|
"valid_targets_min": 13761
|
|
},
|
|
{
|
|
"epoch": 1.79317697228145,
|
|
"grad_norm": 0.11253982334844297,
|
|
"learning_rate": 3.246979603717467e-05,
|
|
"loss": 1.0833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2119869887828827,
|
|
"step": 421,
|
|
"valid_targets_mean": 10624.0,
|
|
"valid_targets_min": 1255
|
|
},
|
|
{
|
|
"epoch": 1.7974413646055436,
|
|
"grad_norm": 0.0900422253501272,
|
|
"learning_rate": 3.242326617337133e-05,
|
|
"loss": 1.0628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25483453273773193,
|
|
"step": 422,
|
|
"valid_targets_mean": 16094.7,
|
|
"valid_targets_min": 14668
|
|
},
|
|
{
|
|
"epoch": 1.8017057569296375,
|
|
"grad_norm": 0.10151270314343064,
|
|
"learning_rate": 3.2376626564442016e-05,
|
|
"loss": 1.0492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30496302247047424,
|
|
"step": 423,
|
|
"valid_targets_mean": 16165.9,
|
|
"valid_targets_min": 15072
|
|
},
|
|
{
|
|
"epoch": 1.8059701492537314,
|
|
"grad_norm": 0.09425213423036802,
|
|
"learning_rate": 3.2329877622393515e-05,
|
|
"loss": 1.0628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23983056843280792,
|
|
"step": 424,
|
|
"valid_targets_mean": 11976.9,
|
|
"valid_targets_min": 8200
|
|
},
|
|
{
|
|
"epoch": 1.8102345415778252,
|
|
"grad_norm": 0.08962569266892263,
|
|
"learning_rate": 3.228301976019841e-05,
|
|
"loss": 1.115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29198259115219116,
|
|
"step": 425,
|
|
"valid_targets_mean": 15927.9,
|
|
"valid_targets_min": 14158
|
|
},
|
|
{
|
|
"epoch": 1.8144989339019189,
|
|
"grad_norm": 0.09580096774930781,
|
|
"learning_rate": 3.22360533917915e-05,
|
|
"loss": 1.0784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2894361913204193,
|
|
"step": 426,
|
|
"valid_targets_mean": 16185.9,
|
|
"valid_targets_min": 15142
|
|
},
|
|
{
|
|
"epoch": 1.8187633262260128,
|
|
"grad_norm": 0.09249553036023676,
|
|
"learning_rate": 3.218897893206608e-05,
|
|
"loss": 1.0487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24148689210414886,
|
|
"step": 427,
|
|
"valid_targets_mean": 14200.0,
|
|
"valid_targets_min": 11102
|
|
},
|
|
{
|
|
"epoch": 1.8230277185501067,
|
|
"grad_norm": 0.08309132262925692,
|
|
"learning_rate": 3.2141796796870335e-05,
|
|
"loss": 1.0652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28260403871536255,
|
|
"step": 428,
|
|
"valid_targets_mean": 16079.9,
|
|
"valid_targets_min": 13554
|
|
},
|
|
{
|
|
"epoch": 1.8272921108742004,
|
|
"grad_norm": 0.10307797886618732,
|
|
"learning_rate": 3.2094507403003614e-05,
|
|
"loss": 1.1063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31476035714149475,
|
|
"step": 429,
|
|
"valid_targets_mean": 16110.5,
|
|
"valid_targets_min": 13775
|
|
},
|
|
{
|
|
"epoch": 1.831556503198294,
|
|
"grad_norm": 0.1012754124763445,
|
|
"learning_rate": 3.2047111168212785e-05,
|
|
"loss": 1.0917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.281475305557251,
|
|
"step": 430,
|
|
"valid_targets_mean": 15939.2,
|
|
"valid_targets_min": 14217
|
|
},
|
|
{
|
|
"epoch": 1.835820895522388,
|
|
"grad_norm": 0.08777316069006215,
|
|
"learning_rate": 3.1999608511188524e-05,
|
|
"loss": 1.045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2696187496185303,
|
|
"step": 431,
|
|
"valid_targets_mean": 16192.9,
|
|
"valid_targets_min": 15498
|
|
},
|
|
{
|
|
"epoch": 1.840085287846482,
|
|
"grad_norm": 0.09668306687701554,
|
|
"learning_rate": 3.1951999851561625e-05,
|
|
"loss": 1.1411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21835455298423767,
|
|
"step": 432,
|
|
"valid_targets_mean": 11432.9,
|
|
"valid_targets_min": 2068
|
|
},
|
|
{
|
|
"epoch": 1.8443496801705757,
|
|
"grad_norm": 0.09028815400417374,
|
|
"learning_rate": 3.190428560989931e-05,
|
|
"loss": 1.0787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26674598455429077,
|
|
"step": 433,
|
|
"valid_targets_mean": 16036.1,
|
|
"valid_targets_min": 12433
|
|
},
|
|
{
|
|
"epoch": 1.8486140724946694,
|
|
"grad_norm": 0.09487927012756697,
|
|
"learning_rate": 3.185646620770146e-05,
|
|
"loss": 1.0461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3215363621711731,
|
|
"step": 434,
|
|
"valid_targets_mean": 16150.6,
|
|
"valid_targets_min": 15481
|
|
},
|
|
{
|
|
"epoch": 1.8528784648187633,
|
|
"grad_norm": 0.0973376338888817,
|
|
"learning_rate": 3.180854206739696e-05,
|
|
"loss": 1.1373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15969006717205048,
|
|
"step": 435,
|
|
"valid_targets_mean": 7823.2,
|
|
"valid_targets_min": 1225
|
|
},
|
|
{
|
|
"epoch": 1.8571428571428572,
|
|
"grad_norm": 0.10487639617317966,
|
|
"learning_rate": 3.176051361233991e-05,
|
|
"loss": 1.0839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26723185181617737,
|
|
"step": 436,
|
|
"valid_targets_mean": 16042.1,
|
|
"valid_targets_min": 13667
|
|
},
|
|
{
|
|
"epoch": 1.861407249466951,
|
|
"grad_norm": 0.09165658968925329,
|
|
"learning_rate": 3.171238126680594e-05,
|
|
"loss": 1.0663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2830873727798462,
|
|
"step": 437,
|
|
"valid_targets_mean": 16156.4,
|
|
"valid_targets_min": 15136
|
|
},
|
|
{
|
|
"epoch": 1.8656716417910446,
|
|
"grad_norm": 0.10327666654736484,
|
|
"learning_rate": 3.166414545598839e-05,
|
|
"loss": 1.0957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24485477805137634,
|
|
"step": 438,
|
|
"valid_targets_mean": 13620.7,
|
|
"valid_targets_min": 10131
|
|
},
|
|
{
|
|
"epoch": 1.8699360341151388,
|
|
"grad_norm": 0.0945829139478311,
|
|
"learning_rate": 3.161580660599464e-05,
|
|
"loss": 1.0747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2660416066646576,
|
|
"step": 439,
|
|
"valid_targets_mean": 16100.6,
|
|
"valid_targets_min": 14067
|
|
},
|
|
{
|
|
"epoch": 1.8742004264392325,
|
|
"grad_norm": 0.10970920033805336,
|
|
"learning_rate": 3.1567365143842264e-05,
|
|
"loss": 1.0816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31360703706741333,
|
|
"step": 440,
|
|
"valid_targets_mean": 16113.8,
|
|
"valid_targets_min": 15289
|
|
},
|
|
{
|
|
"epoch": 1.8784648187633262,
|
|
"grad_norm": 0.09582809458242247,
|
|
"learning_rate": 3.1518821497455326e-05,
|
|
"loss": 1.0577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23639875650405884,
|
|
"step": 441,
|
|
"valid_targets_mean": 14648.5,
|
|
"valid_targets_min": 12492
|
|
},
|
|
{
|
|
"epoch": 1.88272921108742,
|
|
"grad_norm": 0.09918680419048113,
|
|
"learning_rate": 3.147017609566054e-05,
|
|
"loss": 1.1188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33423301577568054,
|
|
"step": 442,
|
|
"valid_targets_mean": 15988.4,
|
|
"valid_targets_min": 14619
|
|
},
|
|
{
|
|
"epoch": 1.886993603411514,
|
|
"grad_norm": 0.08947481096269351,
|
|
"learning_rate": 3.142142936818353e-05,
|
|
"loss": 1.0567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2472197413444519,
|
|
"step": 443,
|
|
"valid_targets_mean": 13472.2,
|
|
"valid_targets_min": 2042
|
|
},
|
|
{
|
|
"epoch": 1.8912579957356077,
|
|
"grad_norm": 0.1007843490708954,
|
|
"learning_rate": 3.137258174564501e-05,
|
|
"loss": 1.1192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2855716347694397,
|
|
"step": 444,
|
|
"valid_targets_mean": 16017.3,
|
|
"valid_targets_min": 14970
|
|
},
|
|
{
|
|
"epoch": 1.8955223880597014,
|
|
"grad_norm": 0.09139419739728928,
|
|
"learning_rate": 3.1323633659556986e-05,
|
|
"loss": 1.0457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2839217782020569,
|
|
"step": 445,
|
|
"valid_targets_mean": 16226.5,
|
|
"valid_targets_min": 15711
|
|
},
|
|
{
|
|
"epoch": 1.8997867803837953,
|
|
"grad_norm": 0.08717872019723825,
|
|
"learning_rate": 3.127458554231894e-05,
|
|
"loss": 1.1112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19892370700836182,
|
|
"step": 446,
|
|
"valid_targets_mean": 10315.2,
|
|
"valid_targets_min": 2376
|
|
},
|
|
{
|
|
"epoch": 1.9040511727078893,
|
|
"grad_norm": 0.09837406702012674,
|
|
"learning_rate": 3.122543782721402e-05,
|
|
"loss": 1.0907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27118682861328125,
|
|
"step": 447,
|
|
"valid_targets_mean": 16048.4,
|
|
"valid_targets_min": 15014
|
|
},
|
|
{
|
|
"epoch": 1.908315565031983,
|
|
"grad_norm": 0.08769004060184687,
|
|
"learning_rate": 3.1176190948405194e-05,
|
|
"loss": 1.0795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3325099050998688,
|
|
"step": 448,
|
|
"valid_targets_mean": 16126.1,
|
|
"valid_targets_min": 15650
|
|
},
|
|
{
|
|
"epoch": 1.9125799573560767,
|
|
"grad_norm": 0.08889510617502625,
|
|
"learning_rate": 3.112684534093142e-05,
|
|
"loss": 1.0912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21588940918445587,
|
|
"step": 449,
|
|
"valid_targets_mean": 11767.8,
|
|
"valid_targets_min": 7026
|
|
},
|
|
{
|
|
"epoch": 1.9168443496801706,
|
|
"grad_norm": 0.0858352720178758,
|
|
"learning_rate": 3.107740144070385e-05,
|
|
"loss": 1.0253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26744675636291504,
|
|
"step": 450,
|
|
"valid_targets_mean": 16068.8,
|
|
"valid_targets_min": 12847
|
|
},
|
|
{
|
|
"epoch": 1.9211087420042645,
|
|
"grad_norm": 0.10824856130335428,
|
|
"learning_rate": 3.102785968450188e-05,
|
|
"loss": 1.0641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2974546253681183,
|
|
"step": 451,
|
|
"valid_targets_mean": 16119.1,
|
|
"valid_targets_min": 15078
|
|
},
|
|
{
|
|
"epoch": 1.9253731343283582,
|
|
"grad_norm": 0.0980509022752858,
|
|
"learning_rate": 3.09782205099694e-05,
|
|
"loss": 1.1083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27306246757507324,
|
|
"step": 452,
|
|
"valid_targets_mean": 14373.8,
|
|
"valid_targets_min": 11790
|
|
},
|
|
{
|
|
"epoch": 1.929637526652452,
|
|
"grad_norm": 0.08733013457492923,
|
|
"learning_rate": 3.092848435561084e-05,
|
|
"loss": 1.0579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2930656373500824,
|
|
"step": 453,
|
|
"valid_targets_mean": 16052.4,
|
|
"valid_targets_min": 13640
|
|
},
|
|
{
|
|
"epoch": 1.9339019189765458,
|
|
"grad_norm": 0.09029380171387119,
|
|
"learning_rate": 3.0878651660787376e-05,
|
|
"loss": 1.0718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2991414964199066,
|
|
"step": 454,
|
|
"valid_targets_mean": 16144.9,
|
|
"valid_targets_min": 14935
|
|
},
|
|
{
|
|
"epoch": 1.9381663113006398,
|
|
"grad_norm": 0.09483513945449228,
|
|
"learning_rate": 3.082872286571295e-05,
|
|
"loss": 1.0607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2535240054130554,
|
|
"step": 455,
|
|
"valid_targets_mean": 15435.9,
|
|
"valid_targets_min": 13490
|
|
},
|
|
{
|
|
"epoch": 1.9424307036247335,
|
|
"grad_norm": 0.09659671818958866,
|
|
"learning_rate": 3.077869841145049e-05,
|
|
"loss": 1.0621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30032506585121155,
|
|
"step": 456,
|
|
"valid_targets_mean": 16162.6,
|
|
"valid_targets_min": 15389
|
|
},
|
|
{
|
|
"epoch": 1.9466950959488272,
|
|
"grad_norm": 0.09319204581270674,
|
|
"learning_rate": 3.0728578739907934e-05,
|
|
"loss": 1.1139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22293119132518768,
|
|
"step": 457,
|
|
"valid_targets_mean": 10529.0,
|
|
"valid_targets_min": 2521
|
|
},
|
|
{
|
|
"epoch": 1.950959488272921,
|
|
"grad_norm": 0.09826725877410575,
|
|
"learning_rate": 3.067836429383437e-05,
|
|
"loss": 1.049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24382583796977997,
|
|
"step": 458,
|
|
"valid_targets_mean": 16235.1,
|
|
"valid_targets_min": 15758
|
|
},
|
|
{
|
|
"epoch": 1.955223880597015,
|
|
"grad_norm": 0.10409932634679904,
|
|
"learning_rate": 3.062805551681609e-05,
|
|
"loss": 1.0551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.283489465713501,
|
|
"step": 459,
|
|
"valid_targets_mean": 16094.2,
|
|
"valid_targets_min": 14984
|
|
},
|
|
{
|
|
"epoch": 1.9594882729211087,
|
|
"grad_norm": 0.10331048450758677,
|
|
"learning_rate": 3.057765285327271e-05,
|
|
"loss": 1.1454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.138018399477005,
|
|
"step": 460,
|
|
"valid_targets_mean": 6821.8,
|
|
"valid_targets_min": 2060
|
|
},
|
|
{
|
|
"epoch": 1.9637526652452024,
|
|
"grad_norm": 0.09743273108731598,
|
|
"learning_rate": 3.0527156748453214e-05,
|
|
"loss": 1.0411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2563630938529968,
|
|
"step": 461,
|
|
"valid_targets_mean": 16057.0,
|
|
"valid_targets_min": 13815
|
|
},
|
|
{
|
|
"epoch": 1.9680170575692963,
|
|
"grad_norm": 0.09483640386326188,
|
|
"learning_rate": 3.047656764843203e-05,
|
|
"loss": 1.1105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3199034333229065,
|
|
"step": 462,
|
|
"valid_targets_mean": 16112.1,
|
|
"valid_targets_min": 15191
|
|
},
|
|
{
|
|
"epoch": 1.9722814498933903,
|
|
"grad_norm": 0.0987190716986882,
|
|
"learning_rate": 3.0425886000105094e-05,
|
|
"loss": 1.0263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22087791562080383,
|
|
"step": 463,
|
|
"valid_targets_mean": 14163.8,
|
|
"valid_targets_min": 11998
|
|
},
|
|
{
|
|
"epoch": 1.976545842217484,
|
|
"grad_norm": 0.08677673603590218,
|
|
"learning_rate": 3.0375112251185892e-05,
|
|
"loss": 1.0787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29120445251464844,
|
|
"step": 464,
|
|
"valid_targets_mean": 16169.3,
|
|
"valid_targets_min": 15599
|
|
},
|
|
{
|
|
"epoch": 1.9808102345415777,
|
|
"grad_norm": 0.09166833964464276,
|
|
"learning_rate": 3.0324246850201527e-05,
|
|
"loss": 1.1103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31576794385910034,
|
|
"step": 465,
|
|
"valid_targets_mean": 16057.0,
|
|
"valid_targets_min": 15199
|
|
},
|
|
{
|
|
"epoch": 1.9850746268656716,
|
|
"grad_norm": 0.09200773190941762,
|
|
"learning_rate": 3.0273290246488732e-05,
|
|
"loss": 1.0915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24981939792633057,
|
|
"step": 466,
|
|
"valid_targets_mean": 15753.4,
|
|
"valid_targets_min": 13317
|
|
},
|
|
{
|
|
"epoch": 1.9893390191897655,
|
|
"grad_norm": 0.07612825414859399,
|
|
"learning_rate": 3.0222242890189904e-05,
|
|
"loss": 1.1086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3111185133457184,
|
|
"step": 467,
|
|
"valid_targets_mean": 16100.4,
|
|
"valid_targets_min": 15070
|
|
},
|
|
{
|
|
"epoch": 1.9936034115138592,
|
|
"grad_norm": 0.09309422008789678,
|
|
"learning_rate": 3.017110523224914e-05,
|
|
"loss": 1.0807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24047337472438812,
|
|
"step": 468,
|
|
"valid_targets_mean": 13183.9,
|
|
"valid_targets_min": 1438
|
|
},
|
|
{
|
|
"epoch": 1.997867803837953,
|
|
"grad_norm": 0.08916146057727253,
|
|
"learning_rate": 3.011987772440825e-05,
|
|
"loss": 1.0972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2952282130718231,
|
|
"step": 469,
|
|
"valid_targets_mean": 16010.9,
|
|
"valid_targets_min": 14902
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 0.12200005043821796,
|
|
"learning_rate": 3.006856081920277e-05,
|
|
"loss": 1.0973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.460677832365036,
|
|
"step": 470,
|
|
"valid_targets_mean": 10916.4,
|
|
"valid_targets_min": 1585
|
|
},
|
|
{
|
|
"epoch": 2.0042643923240937,
|
|
"grad_norm": 0.09176558333280496,
|
|
"learning_rate": 3.001715496995793e-05,
|
|
"loss": 1.0483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2725759446620941,
|
|
"step": 471,
|
|
"valid_targets_mean": 16051.5,
|
|
"valid_targets_min": 14286
|
|
},
|
|
{
|
|
"epoch": 2.008528784648188,
|
|
"grad_norm": 0.09841155454100518,
|
|
"learning_rate": 2.9965660630784715e-05,
|
|
"loss": 1.0827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3057122230529785,
|
|
"step": 472,
|
|
"valid_targets_mean": 16074.0,
|
|
"valid_targets_min": 14619
|
|
},
|
|
{
|
|
"epoch": 2.0127931769722816,
|
|
"grad_norm": 0.09668073623151177,
|
|
"learning_rate": 2.9914078256575782e-05,
|
|
"loss": 1.0389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20916098356246948,
|
|
"step": 473,
|
|
"valid_targets_mean": 12791.3,
|
|
"valid_targets_min": 7725
|
|
},
|
|
{
|
|
"epoch": 2.0170575692963753,
|
|
"grad_norm": 0.09290127772051483,
|
|
"learning_rate": 2.9862408303001492e-05,
|
|
"loss": 1.0396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28497183322906494,
|
|
"step": 474,
|
|
"valid_targets_mean": 16144.3,
|
|
"valid_targets_min": 15559
|
|
},
|
|
{
|
|
"epoch": 2.021321961620469,
|
|
"grad_norm": 0.09804603027418765,
|
|
"learning_rate": 2.9810651226505875e-05,
|
|
"loss": 1.0302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2852390706539154,
|
|
"step": 475,
|
|
"valid_targets_mean": 16115.0,
|
|
"valid_targets_min": 14772
|
|
},
|
|
{
|
|
"epoch": 2.025586353944563,
|
|
"grad_norm": 0.09334696069137993,
|
|
"learning_rate": 2.9758807484302566e-05,
|
|
"loss": 1.0329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24678093194961548,
|
|
"step": 476,
|
|
"valid_targets_mean": 15621.3,
|
|
"valid_targets_min": 13628
|
|
},
|
|
{
|
|
"epoch": 2.029850746268657,
|
|
"grad_norm": 0.08786286059100251,
|
|
"learning_rate": 2.9706877534370822e-05,
|
|
"loss": 1.058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28632238507270813,
|
|
"step": 477,
|
|
"valid_targets_mean": 16128.5,
|
|
"valid_targets_min": 14984
|
|
},
|
|
{
|
|
"epoch": 2.0341151385927505,
|
|
"grad_norm": 0.09555847589682456,
|
|
"learning_rate": 2.965486183545142e-05,
|
|
"loss": 1.0881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24608482420444489,
|
|
"step": 478,
|
|
"valid_targets_mean": 13119.6,
|
|
"valid_targets_min": 1692
|
|
},
|
|
{
|
|
"epoch": 2.038379530916844,
|
|
"grad_norm": 0.0902553844594556,
|
|
"learning_rate": 2.9602760847042645e-05,
|
|
"loss": 1.036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25147759914398193,
|
|
"step": 479,
|
|
"valid_targets_mean": 16147.4,
|
|
"valid_targets_min": 15653
|
|
},
|
|
{
|
|
"epoch": 2.0426439232409384,
|
|
"grad_norm": 0.09556026439579529,
|
|
"learning_rate": 2.955057502939621e-05,
|
|
"loss": 1.086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29309529066085815,
|
|
"step": 480,
|
|
"valid_targets_mean": 16048.8,
|
|
"valid_targets_min": 12317
|
|
},
|
|
{
|
|
"epoch": 2.046908315565032,
|
|
"grad_norm": 0.08887180746956706,
|
|
"learning_rate": 2.9498304843513193e-05,
|
|
"loss": 1.0616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1705121099948883,
|
|
"step": 481,
|
|
"valid_targets_mean": 9213.1,
|
|
"valid_targets_min": 1448
|
|
},
|
|
{
|
|
"epoch": 2.0511727078891258,
|
|
"grad_norm": 0.09339660077930691,
|
|
"learning_rate": 2.9445950751139957e-05,
|
|
"loss": 1.0159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26420682668685913,
|
|
"step": 482,
|
|
"valid_targets_mean": 16083.6,
|
|
"valid_targets_min": 15129
|
|
},
|
|
{
|
|
"epoch": 2.0554371002132195,
|
|
"grad_norm": 0.08739076334871473,
|
|
"learning_rate": 2.939351321476412e-05,
|
|
"loss": 1.0531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29836294054985046,
|
|
"step": 483,
|
|
"valid_targets_mean": 16190.3,
|
|
"valid_targets_min": 15570
|
|
},
|
|
{
|
|
"epoch": 2.0597014925373136,
|
|
"grad_norm": 0.09760700749605619,
|
|
"learning_rate": 2.9340992697610393e-05,
|
|
"loss": 1.1335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23754040896892548,
|
|
"step": 484,
|
|
"valid_targets_mean": 13633.5,
|
|
"valid_targets_min": 8011
|
|
},
|
|
{
|
|
"epoch": 2.0639658848614073,
|
|
"grad_norm": 0.08865244060864011,
|
|
"learning_rate": 2.9288389663636537e-05,
|
|
"loss": 1.063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2649306058883667,
|
|
"step": 485,
|
|
"valid_targets_mean": 16079.4,
|
|
"valid_targets_min": 14295
|
|
},
|
|
{
|
|
"epoch": 2.068230277185501,
|
|
"grad_norm": 0.09485313630907335,
|
|
"learning_rate": 2.923570457752925e-05,
|
|
"loss": 1.0241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2967156171798706,
|
|
"step": 486,
|
|
"valid_targets_mean": 16141.9,
|
|
"valid_targets_min": 15089
|
|
},
|
|
{
|
|
"epoch": 2.0724946695095947,
|
|
"grad_norm": 0.0989430448853122,
|
|
"learning_rate": 2.9182937904700078e-05,
|
|
"loss": 1.059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2551717162132263,
|
|
"step": 487,
|
|
"valid_targets_mean": 14786.7,
|
|
"valid_targets_min": 12348
|
|
},
|
|
{
|
|
"epoch": 2.076759061833689,
|
|
"grad_norm": 0.09208825102497632,
|
|
"learning_rate": 2.9130090111281278e-05,
|
|
"loss": 1.0411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2771487832069397,
|
|
"step": 488,
|
|
"valid_targets_mean": 16158.6,
|
|
"valid_targets_min": 15232
|
|
},
|
|
{
|
|
"epoch": 2.0810234541577826,
|
|
"grad_norm": 0.10625482170461517,
|
|
"learning_rate": 2.9077161664121722e-05,
|
|
"loss": 1.0627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3007223606109619,
|
|
"step": 489,
|
|
"valid_targets_mean": 15986.3,
|
|
"valid_targets_min": 15089
|
|
},
|
|
{
|
|
"epoch": 2.0852878464818763,
|
|
"grad_norm": 0.09764157570244536,
|
|
"learning_rate": 2.902415303078275e-05,
|
|
"loss": 1.0466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2514490485191345,
|
|
"step": 490,
|
|
"valid_targets_mean": 15997.3,
|
|
"valid_targets_min": 14245
|
|
},
|
|
{
|
|
"epoch": 2.08955223880597,
|
|
"grad_norm": 0.09359018304034655,
|
|
"learning_rate": 2.8971064679534072e-05,
|
|
"loss": 1.0517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28226613998413086,
|
|
"step": 491,
|
|
"valid_targets_mean": 16121.7,
|
|
"valid_targets_min": 15009
|
|
},
|
|
{
|
|
"epoch": 2.093816631130064,
|
|
"grad_norm": 0.11796609216315329,
|
|
"learning_rate": 2.8917897079349604e-05,
|
|
"loss": 1.0489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21255500614643097,
|
|
"step": 492,
|
|
"valid_targets_mean": 10721.3,
|
|
"valid_targets_min": 2114
|
|
},
|
|
{
|
|
"epoch": 2.098081023454158,
|
|
"grad_norm": 0.09079804203093109,
|
|
"learning_rate": 2.8864650699903336e-05,
|
|
"loss": 1.043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2579681873321533,
|
|
"step": 493,
|
|
"valid_targets_mean": 16138.6,
|
|
"valid_targets_min": 15489
|
|
},
|
|
{
|
|
"epoch": 2.1023454157782515,
|
|
"grad_norm": 0.1103632556047112,
|
|
"learning_rate": 2.881132601156518e-05,
|
|
"loss": 1.0639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.307340532541275,
|
|
"step": 494,
|
|
"valid_targets_mean": 16207.1,
|
|
"valid_targets_min": 15510
|
|
},
|
|
{
|
|
"epoch": 2.106609808102345,
|
|
"grad_norm": 0.10333153318842464,
|
|
"learning_rate": 2.8757923485396805e-05,
|
|
"loss": 1.0662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17362730205059052,
|
|
"step": 495,
|
|
"valid_targets_mean": 8745.5,
|
|
"valid_targets_min": 1803
|
|
},
|
|
{
|
|
"epoch": 2.1108742004264394,
|
|
"grad_norm": 0.09918496628325016,
|
|
"learning_rate": 2.8704443593147517e-05,
|
|
"loss": 1.0749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26736536622047424,
|
|
"step": 496,
|
|
"valid_targets_mean": 16156.1,
|
|
"valid_targets_min": 15475
|
|
},
|
|
{
|
|
"epoch": 2.115138592750533,
|
|
"grad_norm": 0.09620336438972982,
|
|
"learning_rate": 2.8650886807250024e-05,
|
|
"loss": 1.0948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3073892593383789,
|
|
"step": 497,
|
|
"valid_targets_mean": 16113.5,
|
|
"valid_targets_min": 14217
|
|
},
|
|
{
|
|
"epoch": 2.1194029850746268,
|
|
"grad_norm": 0.10839919501928215,
|
|
"learning_rate": 2.8597253600816332e-05,
|
|
"loss": 1.0909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23168393969535828,
|
|
"step": 498,
|
|
"valid_targets_mean": 12934.0,
|
|
"valid_targets_min": 10983
|
|
},
|
|
{
|
|
"epoch": 2.1236673773987205,
|
|
"grad_norm": 0.10211923283431626,
|
|
"learning_rate": 2.8543544447633517e-05,
|
|
"loss": 1.0918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3041832447052002,
|
|
"step": 499,
|
|
"valid_targets_mean": 16013.3,
|
|
"valid_targets_min": 14001
|
|
},
|
|
{
|
|
"epoch": 2.1279317697228146,
|
|
"grad_norm": 0.10158676433633469,
|
|
"learning_rate": 2.8489759822159558e-05,
|
|
"loss": 1.0991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32115983963012695,
|
|
"step": 500,
|
|
"valid_targets_mean": 16142.0,
|
|
"valid_targets_min": 15100
|
|
},
|
|
{
|
|
"epoch": 2.1321961620469083,
|
|
"grad_norm": 0.0971553252519814,
|
|
"learning_rate": 2.843590019951914e-05,
|
|
"loss": 1.0699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23883402347564697,
|
|
"step": 501,
|
|
"valid_targets_mean": 14461.2,
|
|
"valid_targets_min": 12582
|
|
},
|
|
{
|
|
"epoch": 2.136460554371002,
|
|
"grad_norm": 0.09406077380609203,
|
|
"learning_rate": 2.838196605549948e-05,
|
|
"loss": 1.0838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3106440305709839,
|
|
"step": 502,
|
|
"valid_targets_mean": 16153.6,
|
|
"valid_targets_min": 15516
|
|
},
|
|
{
|
|
"epoch": 2.140724946695096,
|
|
"grad_norm": 0.1026881876641519,
|
|
"learning_rate": 2.8327957866546082e-05,
|
|
"loss": 1.1189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25078085064888,
|
|
"step": 503,
|
|
"valid_targets_mean": 12903.4,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 2.14498933901919,
|
|
"grad_norm": 0.09739160756738767,
|
|
"learning_rate": 2.8273876109758568e-05,
|
|
"loss": 1.0927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2769235670566559,
|
|
"step": 504,
|
|
"valid_targets_mean": 15824.2,
|
|
"valid_targets_min": 9156
|
|
},
|
|
{
|
|
"epoch": 2.1492537313432836,
|
|
"grad_norm": 0.09686046383993906,
|
|
"learning_rate": 2.8219721262886427e-05,
|
|
"loss": 1.0626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30462872982025146,
|
|
"step": 505,
|
|
"valid_targets_mean": 16086.6,
|
|
"valid_targets_min": 13036
|
|
},
|
|
{
|
|
"epoch": 2.1535181236673773,
|
|
"grad_norm": 0.09703867672856833,
|
|
"learning_rate": 2.816549380432483e-05,
|
|
"loss": 1.051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18756161630153656,
|
|
"step": 506,
|
|
"valid_targets_mean": 9946.5,
|
|
"valid_targets_min": 1775
|
|
},
|
|
{
|
|
"epoch": 2.1577825159914714,
|
|
"grad_norm": 0.09376957262766217,
|
|
"learning_rate": 2.8111194213110386e-05,
|
|
"loss": 1.0476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2586948275566101,
|
|
"step": 507,
|
|
"valid_targets_mean": 15812.5,
|
|
"valid_targets_min": 9254
|
|
},
|
|
{
|
|
"epoch": 2.162046908315565,
|
|
"grad_norm": 0.09539612414771617,
|
|
"learning_rate": 2.805682296891691e-05,
|
|
"loss": 1.1008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3226960301399231,
|
|
"step": 508,
|
|
"valid_targets_mean": 16113.9,
|
|
"valid_targets_min": 15498
|
|
},
|
|
{
|
|
"epoch": 2.166311300639659,
|
|
"grad_norm": 0.10512962446590504,
|
|
"learning_rate": 2.8002380552051186e-05,
|
|
"loss": 1.0779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18008413910865784,
|
|
"step": 509,
|
|
"valid_targets_mean": 10279.2,
|
|
"valid_targets_min": 5940
|
|
},
|
|
{
|
|
"epoch": 2.1705756929637525,
|
|
"grad_norm": 0.12039488623311602,
|
|
"learning_rate": 2.7947867443448728e-05,
|
|
"loss": 1.0514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26744338870048523,
|
|
"step": 510,
|
|
"valid_targets_mean": 16102.2,
|
|
"valid_targets_min": 14599
|
|
},
|
|
{
|
|
"epoch": 2.1748400852878467,
|
|
"grad_norm": 0.09046303232473128,
|
|
"learning_rate": 2.789328412466953e-05,
|
|
"loss": 1.0837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30518412590026855,
|
|
"step": 511,
|
|
"valid_targets_mean": 16153.5,
|
|
"valid_targets_min": 15129
|
|
},
|
|
{
|
|
"epoch": 2.1791044776119404,
|
|
"grad_norm": 0.09937799897465573,
|
|
"learning_rate": 2.7838631077893813e-05,
|
|
"loss": 1.0687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23768314719200134,
|
|
"step": 512,
|
|
"valid_targets_mean": 13469.4,
|
|
"valid_targets_min": 10307
|
|
},
|
|
{
|
|
"epoch": 2.183368869936034,
|
|
"grad_norm": 0.11252647377620914,
|
|
"learning_rate": 2.7783908785917753e-05,
|
|
"loss": 1.0796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31257063150405884,
|
|
"step": 513,
|
|
"valid_targets_mean": 16052.1,
|
|
"valid_targets_min": 15176
|
|
},
|
|
{
|
|
"epoch": 2.1876332622601278,
|
|
"grad_norm": 0.09658737900285636,
|
|
"learning_rate": 2.7729117732149244e-05,
|
|
"loss": 1.0975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30403977632522583,
|
|
"step": 514,
|
|
"valid_targets_mean": 15911.6,
|
|
"valid_targets_min": 12126
|
|
},
|
|
{
|
|
"epoch": 2.191897654584222,
|
|
"grad_norm": 0.10103962179409216,
|
|
"learning_rate": 2.7674258400603587e-05,
|
|
"loss": 1.0432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2428993135690689,
|
|
"step": 515,
|
|
"valid_targets_mean": 15783.9,
|
|
"valid_targets_min": 14351
|
|
},
|
|
{
|
|
"epoch": 2.1961620469083156,
|
|
"grad_norm": 0.09805791582567548,
|
|
"learning_rate": 2.761933127589927e-05,
|
|
"loss": 1.1029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29865264892578125,
|
|
"step": 516,
|
|
"valid_targets_mean": 16191.0,
|
|
"valid_targets_min": 15129
|
|
},
|
|
{
|
|
"epoch": 2.2004264392324093,
|
|
"grad_norm": 0.09194157454946834,
|
|
"learning_rate": 2.7564336843253633e-05,
|
|
"loss": 1.0367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2001020461320877,
|
|
"step": 517,
|
|
"valid_targets_mean": 11201.6,
|
|
"valid_targets_min": 1438
|
|
},
|
|
{
|
|
"epoch": 2.204690831556503,
|
|
"grad_norm": 0.1098155536995358,
|
|
"learning_rate": 2.7509275588478606e-05,
|
|
"loss": 1.0721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2813820540904999,
|
|
"step": 518,
|
|
"valid_targets_mean": 16053.3,
|
|
"valid_targets_min": 15224
|
|
},
|
|
{
|
|
"epoch": 2.208955223880597,
|
|
"grad_norm": 0.10697269280452905,
|
|
"learning_rate": 2.7454147997976404e-05,
|
|
"loss": 1.0403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2819169759750366,
|
|
"step": 519,
|
|
"valid_targets_mean": 16197.4,
|
|
"valid_targets_min": 15522
|
|
},
|
|
{
|
|
"epoch": 2.213219616204691,
|
|
"grad_norm": 0.09267598592679255,
|
|
"learning_rate": 2.7398954558735272e-05,
|
|
"loss": 1.0714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1687195599079132,
|
|
"step": 520,
|
|
"valid_targets_mean": 9558.3,
|
|
"valid_targets_min": 2844
|
|
},
|
|
{
|
|
"epoch": 2.2174840085287846,
|
|
"grad_norm": 0.10245403903822868,
|
|
"learning_rate": 2.7343695758325125e-05,
|
|
"loss": 1.0562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27591824531555176,
|
|
"step": 521,
|
|
"valid_targets_mean": 16046.5,
|
|
"valid_targets_min": 14827
|
|
},
|
|
{
|
|
"epoch": 2.2217484008528783,
|
|
"grad_norm": 0.10726023143274162,
|
|
"learning_rate": 2.7288372084893282e-05,
|
|
"loss": 1.0282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28371572494506836,
|
|
"step": 522,
|
|
"valid_targets_mean": 16178.2,
|
|
"valid_targets_min": 15153
|
|
},
|
|
{
|
|
"epoch": 2.2260127931769724,
|
|
"grad_norm": 0.10419976323200554,
|
|
"learning_rate": 2.7232984027160126e-05,
|
|
"loss": 1.0573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24477174878120422,
|
|
"step": 523,
|
|
"valid_targets_mean": 13470.2,
|
|
"valid_targets_min": 10634
|
|
},
|
|
{
|
|
"epoch": 2.230277185501066,
|
|
"grad_norm": 0.09531009264383342,
|
|
"learning_rate": 2.7177532074414822e-05,
|
|
"loss": 1.075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28225386142730713,
|
|
"step": 524,
|
|
"valid_targets_mean": 16116.1,
|
|
"valid_targets_min": 15223
|
|
},
|
|
{
|
|
"epoch": 2.23454157782516,
|
|
"grad_norm": 0.09199261776650441,
|
|
"learning_rate": 2.712201671651094e-05,
|
|
"loss": 1.0752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31113922595977783,
|
|
"step": 525,
|
|
"valid_targets_mean": 16087.2,
|
|
"valid_targets_min": 13594
|
|
},
|
|
{
|
|
"epoch": 2.2388059701492535,
|
|
"grad_norm": 0.11331170842972083,
|
|
"learning_rate": 2.7066438443862205e-05,
|
|
"loss": 1.0939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25119367241859436,
|
|
"step": 526,
|
|
"valid_targets_mean": 14666.6,
|
|
"valid_targets_min": 12298
|
|
},
|
|
{
|
|
"epoch": 2.2430703624733477,
|
|
"grad_norm": 0.09742426554675039,
|
|
"learning_rate": 2.701079774743808e-05,
|
|
"loss": 1.0939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3013196587562561,
|
|
"step": 527,
|
|
"valid_targets_mean": 16185.2,
|
|
"valid_targets_min": 15104
|
|
},
|
|
{
|
|
"epoch": 2.2473347547974414,
|
|
"grad_norm": 0.09738088811529429,
|
|
"learning_rate": 2.6955095118759496e-05,
|
|
"loss": 1.0999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2396417260169983,
|
|
"step": 528,
|
|
"valid_targets_mean": 13162.1,
|
|
"valid_targets_min": 2083
|
|
},
|
|
{
|
|
"epoch": 2.251599147121535,
|
|
"grad_norm": 0.09007030181815612,
|
|
"learning_rate": 2.689933104989447e-05,
|
|
"loss": 1.0555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2562272250652313,
|
|
"step": 529,
|
|
"valid_targets_mean": 15991.2,
|
|
"valid_targets_min": 14106
|
|
},
|
|
{
|
|
"epoch": 2.2558635394456292,
|
|
"grad_norm": 0.10684867949521697,
|
|
"learning_rate": 2.6843506033453777e-05,
|
|
"loss": 1.1221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3164982497692108,
|
|
"step": 530,
|
|
"valid_targets_mean": 16024.4,
|
|
"valid_targets_min": 15129
|
|
},
|
|
{
|
|
"epoch": 2.260127931769723,
|
|
"grad_norm": 0.09358180156688657,
|
|
"learning_rate": 2.6787620562586587e-05,
|
|
"loss": 1.0595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1824861764907837,
|
|
"step": 531,
|
|
"valid_targets_mean": 9695.1,
|
|
"valid_targets_min": 3293
|
|
},
|
|
{
|
|
"epoch": 2.2643923240938166,
|
|
"grad_norm": 0.08613340661147308,
|
|
"learning_rate": 2.673167513097613e-05,
|
|
"loss": 1.0365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2596473693847656,
|
|
"step": 532,
|
|
"valid_targets_mean": 16170.2,
|
|
"valid_targets_min": 15400
|
|
},
|
|
{
|
|
"epoch": 2.2686567164179103,
|
|
"grad_norm": 0.09309414406005553,
|
|
"learning_rate": 2.6675670232835297e-05,
|
|
"loss": 1.0195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2916867733001709,
|
|
"step": 533,
|
|
"valid_targets_mean": 15980.8,
|
|
"valid_targets_min": 12675
|
|
},
|
|
{
|
|
"epoch": 2.272921108742004,
|
|
"grad_norm": 0.08350603036545372,
|
|
"learning_rate": 2.661960636290231e-05,
|
|
"loss": 1.0799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19942200183868408,
|
|
"step": 534,
|
|
"valid_targets_mean": 11341.7,
|
|
"valid_targets_min": 6100
|
|
},
|
|
{
|
|
"epoch": 2.277185501066098,
|
|
"grad_norm": 0.09547417400146033,
|
|
"learning_rate": 2.6563484016436346e-05,
|
|
"loss": 1.0938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27838388085365295,
|
|
"step": 535,
|
|
"valid_targets_mean": 16175.0,
|
|
"valid_targets_min": 15131
|
|
},
|
|
{
|
|
"epoch": 2.281449893390192,
|
|
"grad_norm": 0.09253905807148652,
|
|
"learning_rate": 2.6507303689213143e-05,
|
|
"loss": 1.0886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28802457451820374,
|
|
"step": 536,
|
|
"valid_targets_mean": 16204.8,
|
|
"valid_targets_min": 15481
|
|
},
|
|
{
|
|
"epoch": 2.2857142857142856,
|
|
"grad_norm": 0.08399921900159946,
|
|
"learning_rate": 2.6451065877520634e-05,
|
|
"loss": 1.0465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24142228066921234,
|
|
"step": 537,
|
|
"valid_targets_mean": 14239.5,
|
|
"valid_targets_min": 11768
|
|
},
|
|
{
|
|
"epoch": 2.2899786780383797,
|
|
"grad_norm": 0.09376208756853491,
|
|
"learning_rate": 2.639477107815455e-05,
|
|
"loss": 1.1103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3194657564163208,
|
|
"step": 538,
|
|
"valid_targets_mean": 16034.9,
|
|
"valid_targets_min": 14984
|
|
},
|
|
{
|
|
"epoch": 2.2942430703624734,
|
|
"grad_norm": 0.0842740402201318,
|
|
"learning_rate": 2.633841978841406e-05,
|
|
"loss": 1.0412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28417128324508667,
|
|
"step": 539,
|
|
"valid_targets_mean": 16142.6,
|
|
"valid_targets_min": 14262
|
|
},
|
|
{
|
|
"epoch": 2.298507462686567,
|
|
"grad_norm": 0.08908815136212513,
|
|
"learning_rate": 2.6282012506097347e-05,
|
|
"loss": 1.0111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25461068749427795,
|
|
"step": 540,
|
|
"valid_targets_mean": 15422.8,
|
|
"valid_targets_min": 13974
|
|
},
|
|
{
|
|
"epoch": 2.302771855010661,
|
|
"grad_norm": 0.087237064848523,
|
|
"learning_rate": 2.622554972949724e-05,
|
|
"loss": 1.0878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30043187737464905,
|
|
"step": 541,
|
|
"valid_targets_mean": 15996.0,
|
|
"valid_targets_min": 13856
|
|
},
|
|
{
|
|
"epoch": 2.307036247334755,
|
|
"grad_norm": 0.08917426243078419,
|
|
"learning_rate": 2.6169031957396778e-05,
|
|
"loss": 1.067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2115328162908554,
|
|
"step": 542,
|
|
"valid_targets_mean": 11270.4,
|
|
"valid_targets_min": 2648
|
|
},
|
|
{
|
|
"epoch": 2.3113006396588487,
|
|
"grad_norm": 0.08686669524747882,
|
|
"learning_rate": 2.611245968906482e-05,
|
|
"loss": 1.1028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2844540476799011,
|
|
"step": 543,
|
|
"valid_targets_mean": 16128.5,
|
|
"valid_targets_min": 15129
|
|
},
|
|
{
|
|
"epoch": 2.3155650319829424,
|
|
"grad_norm": 0.08563826929425315,
|
|
"learning_rate": 2.605583342425165e-05,
|
|
"loss": 1.0117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2681037187576294,
|
|
"step": 544,
|
|
"valid_targets_mean": 16175.0,
|
|
"valid_targets_min": 15115
|
|
},
|
|
{
|
|
"epoch": 2.319829424307036,
|
|
"grad_norm": 0.09278977352377574,
|
|
"learning_rate": 2.5999153663184546e-05,
|
|
"loss": 1.0839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1989366114139557,
|
|
"step": 545,
|
|
"valid_targets_mean": 9624.7,
|
|
"valid_targets_min": 1315
|
|
},
|
|
{
|
|
"epoch": 2.3240938166311302,
|
|
"grad_norm": 0.09766873302938348,
|
|
"learning_rate": 2.594242090656335e-05,
|
|
"loss": 1.0768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2582166790962219,
|
|
"step": 546,
|
|
"valid_targets_mean": 16130.9,
|
|
"valid_targets_min": 14668
|
|
},
|
|
{
|
|
"epoch": 2.328358208955224,
|
|
"grad_norm": 0.09437498321916052,
|
|
"learning_rate": 2.5885635655556075e-05,
|
|
"loss": 1.0445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28935620188713074,
|
|
"step": 547,
|
|
"valid_targets_mean": 16090.8,
|
|
"valid_targets_min": 14120
|
|
},
|
|
{
|
|
"epoch": 2.3326226012793176,
|
|
"grad_norm": 0.08028629237178618,
|
|
"learning_rate": 2.5828798411794443e-05,
|
|
"loss": 1.0938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24462056159973145,
|
|
"step": 548,
|
|
"valid_targets_mean": 13306.3,
|
|
"valid_targets_min": 9893
|
|
},
|
|
{
|
|
"epoch": 2.3368869936034113,
|
|
"grad_norm": 0.08822043797724845,
|
|
"learning_rate": 2.5771909677369484e-05,
|
|
"loss": 1.0428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28605037927627563,
|
|
"step": 549,
|
|
"valid_targets_mean": 16144.5,
|
|
"valid_targets_min": 14908
|
|
},
|
|
{
|
|
"epoch": 2.3411513859275055,
|
|
"grad_norm": 0.1018885991413037,
|
|
"learning_rate": 2.571496995482709e-05,
|
|
"loss": 1.0446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2790073752403259,
|
|
"step": 550,
|
|
"valid_targets_mean": 16086.9,
|
|
"valid_targets_min": 14667
|
|
},
|
|
{
|
|
"epoch": 2.345415778251599,
|
|
"grad_norm": 0.08141285680252859,
|
|
"learning_rate": 2.565797974716357e-05,
|
|
"loss": 1.0029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22980453073978424,
|
|
"step": 551,
|
|
"valid_targets_mean": 14974.6,
|
|
"valid_targets_min": 12834
|
|
},
|
|
{
|
|
"epoch": 2.349680170575693,
|
|
"grad_norm": 0.09826163522471919,
|
|
"learning_rate": 2.5600939557821205e-05,
|
|
"loss": 1.0808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3136899471282959,
|
|
"step": 552,
|
|
"valid_targets_mean": 16076.8,
|
|
"valid_targets_min": 14571
|
|
},
|
|
{
|
|
"epoch": 2.3539445628997866,
|
|
"grad_norm": 0.09055964067634564,
|
|
"learning_rate": 2.5543849890683813e-05,
|
|
"loss": 1.0612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2605438828468323,
|
|
"step": 553,
|
|
"valid_targets_mean": 13229.5,
|
|
"valid_targets_min": 3789
|
|
},
|
|
{
|
|
"epoch": 2.3582089552238807,
|
|
"grad_norm": 0.08508439683494926,
|
|
"learning_rate": 2.548671125007229e-05,
|
|
"loss": 1.0375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24964557588100433,
|
|
"step": 554,
|
|
"valid_targets_mean": 15937.6,
|
|
"valid_targets_min": 14303
|
|
},
|
|
{
|
|
"epoch": 2.3624733475479744,
|
|
"grad_norm": 0.09949260375320527,
|
|
"learning_rate": 2.5429524140740155e-05,
|
|
"loss": 1.0725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29377031326293945,
|
|
"step": 555,
|
|
"valid_targets_mean": 16159.1,
|
|
"valid_targets_min": 15475
|
|
},
|
|
{
|
|
"epoch": 2.366737739872068,
|
|
"grad_norm": 0.09855180198276682,
|
|
"learning_rate": 2.537228906786908e-05,
|
|
"loss": 1.0855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17214925587177277,
|
|
"step": 556,
|
|
"valid_targets_mean": 8862.3,
|
|
"valid_targets_min": 1314
|
|
},
|
|
{
|
|
"epoch": 2.3710021321961623,
|
|
"grad_norm": 0.08701884103326052,
|
|
"learning_rate": 2.5315006537064473e-05,
|
|
"loss": 1.0553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2840196192264557,
|
|
"step": 557,
|
|
"valid_targets_mean": 16049.0,
|
|
"valid_targets_min": 15129
|
|
},
|
|
{
|
|
"epoch": 2.375266524520256,
|
|
"grad_norm": 0.09497442288060436,
|
|
"learning_rate": 2.5257677054350927e-05,
|
|
"loss": 1.0449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3033146858215332,
|
|
"step": 558,
|
|
"valid_targets_mean": 16085.5,
|
|
"valid_targets_min": 12675
|
|
},
|
|
{
|
|
"epoch": 2.3795309168443497,
|
|
"grad_norm": 0.08450393269826374,
|
|
"learning_rate": 2.5200301126167857e-05,
|
|
"loss": 1.0408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21495604515075684,
|
|
"step": 559,
|
|
"valid_targets_mean": 12266.5,
|
|
"valid_targets_min": 8388
|
|
},
|
|
{
|
|
"epoch": 2.3837953091684434,
|
|
"grad_norm": 0.08420678681450235,
|
|
"learning_rate": 2.514287925936492e-05,
|
|
"loss": 1.0434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2823660969734192,
|
|
"step": 560,
|
|
"valid_targets_mean": 16086.2,
|
|
"valid_targets_min": 14619
|
|
},
|
|
{
|
|
"epoch": 2.388059701492537,
|
|
"grad_norm": 0.0916465468381091,
|
|
"learning_rate": 2.5085411961197626e-05,
|
|
"loss": 1.0534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30327335000038147,
|
|
"step": 561,
|
|
"valid_targets_mean": 16095.9,
|
|
"valid_targets_min": 15459
|
|
},
|
|
{
|
|
"epoch": 2.3923240938166312,
|
|
"grad_norm": 0.09173711602950196,
|
|
"learning_rate": 2.502789973932278e-05,
|
|
"loss": 1.0636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25487005710601807,
|
|
"step": 562,
|
|
"valid_targets_mean": 14109.6,
|
|
"valid_targets_min": 10952
|
|
},
|
|
{
|
|
"epoch": 2.396588486140725,
|
|
"grad_norm": 0.09953093887738519,
|
|
"learning_rate": 2.4970343101794073e-05,
|
|
"loss": 1.0554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27731502056121826,
|
|
"step": 563,
|
|
"valid_targets_mean": 16178.0,
|
|
"valid_targets_min": 15512
|
|
},
|
|
{
|
|
"epoch": 2.4008528784648187,
|
|
"grad_norm": 0.09465491809738068,
|
|
"learning_rate": 2.4912742557057538e-05,
|
|
"loss": 1.0532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2766163945198059,
|
|
"step": 564,
|
|
"valid_targets_mean": 16237.5,
|
|
"valid_targets_min": 15911
|
|
},
|
|
{
|
|
"epoch": 2.405117270788913,
|
|
"grad_norm": 0.0904721800033555,
|
|
"learning_rate": 2.485509861394708e-05,
|
|
"loss": 1.0582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2669386565685272,
|
|
"step": 565,
|
|
"valid_targets_mean": 15926.5,
|
|
"valid_targets_min": 14522
|
|
},
|
|
{
|
|
"epoch": 2.4093816631130065,
|
|
"grad_norm": 0.09567266324706612,
|
|
"learning_rate": 2.4797411781679975e-05,
|
|
"loss": 1.0495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2967975437641144,
|
|
"step": 566,
|
|
"valid_targets_mean": 16056.1,
|
|
"valid_targets_min": 13625
|
|
},
|
|
{
|
|
"epoch": 2.4136460554371,
|
|
"grad_norm": 0.09523909730522165,
|
|
"learning_rate": 2.473968256985238e-05,
|
|
"loss": 1.1103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2310684472322464,
|
|
"step": 567,
|
|
"valid_targets_mean": 11307.9,
|
|
"valid_targets_min": 2956
|
|
},
|
|
{
|
|
"epoch": 2.417910447761194,
|
|
"grad_norm": 0.09275084246781207,
|
|
"learning_rate": 2.4681911488434825e-05,
|
|
"loss": 0.9927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23717737197875977,
|
|
"step": 568,
|
|
"valid_targets_mean": 16158.3,
|
|
"valid_targets_min": 15181
|
|
},
|
|
{
|
|
"epoch": 2.4221748400852876,
|
|
"grad_norm": 0.09450997867610485,
|
|
"learning_rate": 2.4624099047767702e-05,
|
|
"loss": 1.0246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2907453775405884,
|
|
"step": 569,
|
|
"valid_targets_mean": 16083.8,
|
|
"valid_targets_min": 13667
|
|
},
|
|
{
|
|
"epoch": 2.4264392324093818,
|
|
"grad_norm": 0.09182682137904696,
|
|
"learning_rate": 2.4566245758556787e-05,
|
|
"loss": 1.0497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19886159896850586,
|
|
"step": 570,
|
|
"valid_targets_mean": 9855.8,
|
|
"valid_targets_min": 1977
|
|
},
|
|
{
|
|
"epoch": 2.4307036247334755,
|
|
"grad_norm": 0.09864371847792268,
|
|
"learning_rate": 2.4508352131868664e-05,
|
|
"loss": 1.0552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2572079300880432,
|
|
"step": 571,
|
|
"valid_targets_mean": 16167.2,
|
|
"valid_targets_min": 15430
|
|
},
|
|
{
|
|
"epoch": 2.434968017057569,
|
|
"grad_norm": 0.09878287912648348,
|
|
"learning_rate": 2.445041867912629e-05,
|
|
"loss": 1.0883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3043578863143921,
|
|
"step": 572,
|
|
"valid_targets_mean": 16130.8,
|
|
"valid_targets_min": 15027
|
|
},
|
|
{
|
|
"epoch": 2.4392324093816633,
|
|
"grad_norm": 0.10156957160070178,
|
|
"learning_rate": 2.439244591210443e-05,
|
|
"loss": 1.0565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24023163318634033,
|
|
"step": 573,
|
|
"valid_targets_mean": 13259.1,
|
|
"valid_targets_min": 9837
|
|
},
|
|
{
|
|
"epoch": 2.443496801705757,
|
|
"grad_norm": 0.09038562751782564,
|
|
"learning_rate": 2.4334434342925133e-05,
|
|
"loss": 1.0751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29611122608184814,
|
|
"step": 574,
|
|
"valid_targets_mean": 16147.0,
|
|
"valid_targets_min": 15555
|
|
},
|
|
{
|
|
"epoch": 2.4477611940298507,
|
|
"grad_norm": 0.10360668876436106,
|
|
"learning_rate": 2.4276384484053227e-05,
|
|
"loss": 1.0847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2908511757850647,
|
|
"step": 575,
|
|
"valid_targets_mean": 16104.9,
|
|
"valid_targets_min": 15129
|
|
},
|
|
{
|
|
"epoch": 2.4520255863539444,
|
|
"grad_norm": 0.09585543148058576,
|
|
"learning_rate": 2.4218296848291795e-05,
|
|
"loss": 1.0804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26168251037597656,
|
|
"step": 576,
|
|
"valid_targets_mean": 15398.4,
|
|
"valid_targets_min": 13003
|
|
},
|
|
{
|
|
"epoch": 2.4562899786780386,
|
|
"grad_norm": 0.08935973605132008,
|
|
"learning_rate": 2.4160171948777603e-05,
|
|
"loss": 1.0476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2958013415336609,
|
|
"step": 577,
|
|
"valid_targets_mean": 16137.8,
|
|
"valid_targets_min": 15104
|
|
},
|
|
{
|
|
"epoch": 2.4605543710021323,
|
|
"grad_norm": 0.10631691754984124,
|
|
"learning_rate": 2.410201029897665e-05,
|
|
"loss": 1.0867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24110427498817444,
|
|
"step": 578,
|
|
"valid_targets_mean": 12707.2,
|
|
"valid_targets_min": 1255
|
|
},
|
|
{
|
|
"epoch": 2.464818763326226,
|
|
"grad_norm": 0.09877537846808217,
|
|
"learning_rate": 2.4043812412679532e-05,
|
|
"loss": 1.0687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27633678913116455,
|
|
"step": 579,
|
|
"valid_targets_mean": 15769.1,
|
|
"valid_targets_min": 14559
|
|
},
|
|
{
|
|
"epoch": 2.4690831556503197,
|
|
"grad_norm": 0.0957668020744296,
|
|
"learning_rate": 2.3985578803996985e-05,
|
|
"loss": 1.0561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3015096187591553,
|
|
"step": 580,
|
|
"valid_targets_mean": 16138.8,
|
|
"valid_targets_min": 14136
|
|
},
|
|
{
|
|
"epoch": 2.473347547974414,
|
|
"grad_norm": 0.09958942178092568,
|
|
"learning_rate": 2.392730998735529e-05,
|
|
"loss": 1.0593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18700793385505676,
|
|
"step": 581,
|
|
"valid_targets_mean": 10680.2,
|
|
"valid_targets_min": 1851
|
|
},
|
|
{
|
|
"epoch": 2.4776119402985075,
|
|
"grad_norm": 0.08724549621874901,
|
|
"learning_rate": 2.3869006477491755e-05,
|
|
"loss": 1.0865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27407506108283997,
|
|
"step": 582,
|
|
"valid_targets_mean": 16011.8,
|
|
"valid_targets_min": 13640
|
|
},
|
|
{
|
|
"epoch": 2.481876332622601,
|
|
"grad_norm": 0.09583277749636405,
|
|
"learning_rate": 2.381066878945017e-05,
|
|
"loss": 1.027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28913775086402893,
|
|
"step": 583,
|
|
"valid_targets_mean": 16145.8,
|
|
"valid_targets_min": 14954
|
|
},
|
|
{
|
|
"epoch": 2.486140724946695,
|
|
"grad_norm": 0.10122624715346676,
|
|
"learning_rate": 2.3752297438576257e-05,
|
|
"loss": 1.0658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20112615823745728,
|
|
"step": 584,
|
|
"valid_targets_mean": 11290.2,
|
|
"valid_targets_min": 7187
|
|
},
|
|
{
|
|
"epoch": 2.490405117270789,
|
|
"grad_norm": 0.08812850417824268,
|
|
"learning_rate": 2.3693892940513074e-05,
|
|
"loss": 1.0917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2821218967437744,
|
|
"step": 585,
|
|
"valid_targets_mean": 15746.1,
|
|
"valid_targets_min": 4695
|
|
},
|
|
{
|
|
"epoch": 2.4946695095948828,
|
|
"grad_norm": 0.09377293957770948,
|
|
"learning_rate": 2.3635455811196536e-05,
|
|
"loss": 1.0552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3265020549297333,
|
|
"step": 586,
|
|
"valid_targets_mean": 16130.2,
|
|
"valid_targets_min": 15608
|
|
},
|
|
{
|
|
"epoch": 2.4989339019189765,
|
|
"grad_norm": 0.09026724257374404,
|
|
"learning_rate": 2.3576986566850796e-05,
|
|
"loss": 1.0589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2453971803188324,
|
|
"step": 587,
|
|
"valid_targets_mean": 15186.3,
|
|
"valid_targets_min": 12803
|
|
},
|
|
{
|
|
"epoch": 2.50319829424307,
|
|
"grad_norm": 0.09380666142746256,
|
|
"learning_rate": 2.351848572398371e-05,
|
|
"loss": 0.9969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2607404589653015,
|
|
"step": 588,
|
|
"valid_targets_mean": 16224.4,
|
|
"valid_targets_min": 15605
|
|
},
|
|
{
|
|
"epoch": 2.5074626865671643,
|
|
"grad_norm": 0.0905598717095679,
|
|
"learning_rate": 2.3459953799382276e-05,
|
|
"loss": 1.0378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.279967337846756,
|
|
"step": 589,
|
|
"valid_targets_mean": 16180.8,
|
|
"valid_targets_min": 15129
|
|
},
|
|
{
|
|
"epoch": 2.511727078891258,
|
|
"grad_norm": 0.08115896245533588,
|
|
"learning_rate": 2.3401391310108054e-05,
|
|
"loss": 1.0276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2608569860458374,
|
|
"step": 590,
|
|
"valid_targets_mean": 15676.0,
|
|
"valid_targets_min": 11382
|
|
},
|
|
{
|
|
"epoch": 2.5159914712153517,
|
|
"grad_norm": 0.09597649249911172,
|
|
"learning_rate": 2.3342798773492602e-05,
|
|
"loss": 1.0251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2837607264518738,
|
|
"step": 591,
|
|
"valid_targets_mean": 16127.7,
|
|
"valid_targets_min": 14935
|
|
},
|
|
{
|
|
"epoch": 2.520255863539446,
|
|
"grad_norm": 0.08979580619359138,
|
|
"learning_rate": 2.328417670713294e-05,
|
|
"loss": 1.0436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20027203857898712,
|
|
"step": 592,
|
|
"valid_targets_mean": 10758.2,
|
|
"valid_targets_min": 1416
|
|
},
|
|
{
|
|
"epoch": 2.5245202558635396,
|
|
"grad_norm": 0.0917895825557527,
|
|
"learning_rate": 2.3225525628886918e-05,
|
|
"loss": 1.0646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2650205194950104,
|
|
"step": 593,
|
|
"valid_targets_mean": 16056.8,
|
|
"valid_targets_min": 15067
|
|
},
|
|
{
|
|
"epoch": 2.5287846481876333,
|
|
"grad_norm": 0.08925888274628122,
|
|
"learning_rate": 2.3166846056868687e-05,
|
|
"loss": 1.0656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.316602885723114,
|
|
"step": 594,
|
|
"valid_targets_mean": 15594.3,
|
|
"valid_targets_min": 2768
|
|
},
|
|
{
|
|
"epoch": 2.533049040511727,
|
|
"grad_norm": 0.09646081841854295,
|
|
"learning_rate": 2.31081385094441e-05,
|
|
"loss": 1.0617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16233250498771667,
|
|
"step": 595,
|
|
"valid_targets_mean": 9016.2,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 2.5373134328358207,
|
|
"grad_norm": 0.0898959609666296,
|
|
"learning_rate": 2.304940350522615e-05,
|
|
"loss": 1.0703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2635151147842407,
|
|
"step": 596,
|
|
"valid_targets_mean": 15832.9,
|
|
"valid_targets_min": 9100
|
|
},
|
|
{
|
|
"epoch": 2.541577825159915,
|
|
"grad_norm": 0.09837235068395446,
|
|
"learning_rate": 2.299064156307037e-05,
|
|
"loss": 1.0462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29903918504714966,
|
|
"step": 597,
|
|
"valid_targets_mean": 16104.9,
|
|
"valid_targets_min": 15479
|
|
},
|
|
{
|
|
"epoch": 2.5458422174840085,
|
|
"grad_norm": 0.08154502959039621,
|
|
"learning_rate": 2.2931853202070275e-05,
|
|
"loss": 1.0721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23731958866119385,
|
|
"step": 598,
|
|
"valid_targets_mean": 14149.8,
|
|
"valid_targets_min": 9947
|
|
},
|
|
{
|
|
"epoch": 2.550106609808102,
|
|
"grad_norm": 0.09967109197451503,
|
|
"learning_rate": 2.2873038941552724e-05,
|
|
"loss": 1.0117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2779974043369293,
|
|
"step": 599,
|
|
"valid_targets_mean": 16118.4,
|
|
"valid_targets_min": 13253
|
|
},
|
|
{
|
|
"epoch": 2.5543710021321964,
|
|
"grad_norm": 0.09379251346435336,
|
|
"learning_rate": 2.2814199301073412e-05,
|
|
"loss": 1.0935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3131563663482666,
|
|
"step": 600,
|
|
"valid_targets_mean": 16064.8,
|
|
"valid_targets_min": 12847
|
|
},
|
|
{
|
|
"epoch": 2.55863539445629,
|
|
"grad_norm": 0.08308901712240907,
|
|
"learning_rate": 2.27553348004122e-05,
|
|
"loss": 1.0674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2528584599494934,
|
|
"step": 601,
|
|
"valid_targets_mean": 15148.1,
|
|
"valid_targets_min": 12332
|
|
},
|
|
{
|
|
"epoch": 2.5628997867803838,
|
|
"grad_norm": 0.09935763311364727,
|
|
"learning_rate": 2.2696445959568577e-05,
|
|
"loss": 1.0606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29011639952659607,
|
|
"step": 602,
|
|
"valid_targets_mean": 16166.5,
|
|
"valid_targets_min": 15255
|
|
},
|
|
{
|
|
"epoch": 2.5671641791044775,
|
|
"grad_norm": 0.08895218508180142,
|
|
"learning_rate": 2.2637533298757064e-05,
|
|
"loss": 1.0702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2428792417049408,
|
|
"step": 603,
|
|
"valid_targets_mean": 13273.6,
|
|
"valid_targets_min": 2063
|
|
},
|
|
{
|
|
"epoch": 2.571428571428571,
|
|
"grad_norm": 0.07932799338398493,
|
|
"learning_rate": 2.2578597338402567e-05,
|
|
"loss": 1.0521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2591966390609741,
|
|
"step": 604,
|
|
"valid_targets_mean": 16118.6,
|
|
"valid_targets_min": 15358
|
|
},
|
|
{
|
|
"epoch": 2.5756929637526653,
|
|
"grad_norm": 0.09418399274984757,
|
|
"learning_rate": 2.2519638599135844e-05,
|
|
"loss": 1.0862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27598637342453003,
|
|
"step": 605,
|
|
"valid_targets_mean": 16109.9,
|
|
"valid_targets_min": 14179
|
|
},
|
|
{
|
|
"epoch": 2.579957356076759,
|
|
"grad_norm": 0.10208448684911363,
|
|
"learning_rate": 2.2460657601788875e-05,
|
|
"loss": 1.0635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19333398342132568,
|
|
"step": 606,
|
|
"valid_targets_mean": 9404.8,
|
|
"valid_targets_min": 1940
|
|
},
|
|
{
|
|
"epoch": 2.5842217484008527,
|
|
"grad_norm": 0.08386655346303695,
|
|
"learning_rate": 2.2401654867390256e-05,
|
|
"loss": 1.0388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23825089633464813,
|
|
"step": 607,
|
|
"valid_targets_mean": 16151.7,
|
|
"valid_targets_min": 13787
|
|
},
|
|
{
|
|
"epoch": 2.588486140724947,
|
|
"grad_norm": 0.09444673657840978,
|
|
"learning_rate": 2.2342630917160605e-05,
|
|
"loss": 1.0878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3162749111652374,
|
|
"step": 608,
|
|
"valid_targets_mean": 16068.6,
|
|
"valid_targets_min": 12138
|
|
},
|
|
{
|
|
"epoch": 2.5927505330490406,
|
|
"grad_norm": 0.08722157210582174,
|
|
"learning_rate": 2.2283586272507975e-05,
|
|
"loss": 1.0382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21935707330703735,
|
|
"step": 609,
|
|
"valid_targets_mean": 13113.5,
|
|
"valid_targets_min": 8600
|
|
},
|
|
{
|
|
"epoch": 2.5970149253731343,
|
|
"grad_norm": 0.0856039920184606,
|
|
"learning_rate": 2.2224521455023193e-05,
|
|
"loss": 1.0502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2765524387359619,
|
|
"step": 610,
|
|
"valid_targets_mean": 16097.2,
|
|
"valid_targets_min": 14220
|
|
},
|
|
{
|
|
"epoch": 2.6012793176972284,
|
|
"grad_norm": 0.09013322301342568,
|
|
"learning_rate": 2.216543698647534e-05,
|
|
"loss": 1.0442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31274178624153137,
|
|
"step": 611,
|
|
"valid_targets_mean": 16129.2,
|
|
"valid_targets_min": 14888
|
|
},
|
|
{
|
|
"epoch": 2.605543710021322,
|
|
"grad_norm": 0.08761390657503183,
|
|
"learning_rate": 2.210633338880704e-05,
|
|
"loss": 1.0788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2490336000919342,
|
|
"step": 612,
|
|
"valid_targets_mean": 14565.0,
|
|
"valid_targets_min": 11583
|
|
},
|
|
{
|
|
"epoch": 2.609808102345416,
|
|
"grad_norm": 0.08663644751560258,
|
|
"learning_rate": 2.204721118412994e-05,
|
|
"loss": 1.0569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30070436000823975,
|
|
"step": 613,
|
|
"valid_targets_mean": 16050.4,
|
|
"valid_targets_min": 14922
|
|
},
|
|
{
|
|
"epoch": 2.6140724946695095,
|
|
"grad_norm": 0.09768040814255766,
|
|
"learning_rate": 2.1988070894720037e-05,
|
|
"loss": 1.0783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2989608645439148,
|
|
"step": 614,
|
|
"valid_targets_mean": 16149.8,
|
|
"valid_targets_min": 15342
|
|
},
|
|
{
|
|
"epoch": 2.6183368869936032,
|
|
"grad_norm": 0.08448213894861246,
|
|
"learning_rate": 2.192891304301309e-05,
|
|
"loss": 1.0781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25483831763267517,
|
|
"step": 615,
|
|
"valid_targets_mean": 15295.9,
|
|
"valid_targets_min": 13592
|
|
},
|
|
{
|
|
"epoch": 2.6226012793176974,
|
|
"grad_norm": 0.08916700063143083,
|
|
"learning_rate": 2.18697381516e-05,
|
|
"loss": 1.085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3167344033718109,
|
|
"step": 616,
|
|
"valid_targets_mean": 16101.7,
|
|
"valid_targets_min": 15570
|
|
},
|
|
{
|
|
"epoch": 2.626865671641791,
|
|
"grad_norm": 0.0979638014502062,
|
|
"learning_rate": 2.181054674322221e-05,
|
|
"loss": 1.0392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19789108633995056,
|
|
"step": 617,
|
|
"valid_targets_mean": 10173.2,
|
|
"valid_targets_min": 1866
|
|
},
|
|
{
|
|
"epoch": 2.631130063965885,
|
|
"grad_norm": 0.09307158702048642,
|
|
"learning_rate": 2.1751339340767043e-05,
|
|
"loss": 1.0028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26527971029281616,
|
|
"step": 618,
|
|
"valid_targets_mean": 16132.7,
|
|
"valid_targets_min": 14642
|
|
},
|
|
{
|
|
"epoch": 2.635394456289979,
|
|
"grad_norm": 0.09566557225722538,
|
|
"learning_rate": 2.169211646726313e-05,
|
|
"loss": 1.1052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3173242211341858,
|
|
"step": 619,
|
|
"valid_targets_mean": 16027.3,
|
|
"valid_targets_min": 13517
|
|
},
|
|
{
|
|
"epoch": 2.6396588486140726,
|
|
"grad_norm": 0.08801217873965989,
|
|
"learning_rate": 2.163287864587576e-05,
|
|
"loss": 1.0478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1655081808567047,
|
|
"step": 620,
|
|
"valid_targets_mean": 8770.6,
|
|
"valid_targets_min": 2406
|
|
},
|
|
{
|
|
"epoch": 2.6439232409381663,
|
|
"grad_norm": 0.09135625602776881,
|
|
"learning_rate": 2.157362639990229e-05,
|
|
"loss": 1.0131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2509617805480957,
|
|
"step": 621,
|
|
"valid_targets_mean": 16121.5,
|
|
"valid_targets_min": 14411
|
|
},
|
|
{
|
|
"epoch": 2.64818763326226,
|
|
"grad_norm": 0.10645906538472719,
|
|
"learning_rate": 2.151436025276747e-05,
|
|
"loss": 1.0543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3080757260322571,
|
|
"step": 622,
|
|
"valid_targets_mean": 16169.2,
|
|
"valid_targets_min": 15376
|
|
},
|
|
{
|
|
"epoch": 2.6524520255863537,
|
|
"grad_norm": 0.08701513893045446,
|
|
"learning_rate": 2.145508072801888e-05,
|
|
"loss": 1.0832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25946226716041565,
|
|
"step": 623,
|
|
"valid_targets_mean": 14384.0,
|
|
"valid_targets_min": 10809
|
|
},
|
|
{
|
|
"epoch": 2.656716417910448,
|
|
"grad_norm": 0.10487079125662509,
|
|
"learning_rate": 2.1395788349322256e-05,
|
|
"loss": 1.0826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2984422743320465,
|
|
"step": 624,
|
|
"valid_targets_mean": 16027.5,
|
|
"valid_targets_min": 14217
|
|
},
|
|
{
|
|
"epoch": 2.6609808102345416,
|
|
"grad_norm": 0.09018356912659332,
|
|
"learning_rate": 2.133648364045689e-05,
|
|
"loss": 1.0451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3041178584098816,
|
|
"step": 625,
|
|
"valid_targets_mean": 16088.3,
|
|
"valid_targets_min": 15039
|
|
},
|
|
{
|
|
"epoch": 2.6652452025586353,
|
|
"grad_norm": 0.09421151024968558,
|
|
"learning_rate": 2.1277167125310996e-05,
|
|
"loss": 1.0682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2619740068912506,
|
|
"step": 626,
|
|
"valid_targets_mean": 15990.1,
|
|
"valid_targets_min": 14799
|
|
},
|
|
{
|
|
"epoch": 2.6695095948827294,
|
|
"grad_norm": 0.09228819601401904,
|
|
"learning_rate": 2.1217839327877098e-05,
|
|
"loss": 1.0482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2840353846549988,
|
|
"step": 627,
|
|
"valid_targets_mean": 16196.8,
|
|
"valid_targets_min": 15092
|
|
},
|
|
{
|
|
"epoch": 2.673773987206823,
|
|
"grad_norm": 0.08481831252380187,
|
|
"learning_rate": 2.1158500772247352e-05,
|
|
"loss": 1.0724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2569167912006378,
|
|
"step": 628,
|
|
"valid_targets_mean": 13053.5,
|
|
"valid_targets_min": 1578
|
|
},
|
|
{
|
|
"epoch": 2.678038379530917,
|
|
"grad_norm": 0.09418033939174189,
|
|
"learning_rate": 2.1099151982608985e-05,
|
|
"loss": 1.0643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25559014081954956,
|
|
"step": 629,
|
|
"valid_targets_mean": 16127.6,
|
|
"valid_targets_min": 14642
|
|
},
|
|
{
|
|
"epoch": 2.6823027718550105,
|
|
"grad_norm": 0.08645301561904324,
|
|
"learning_rate": 2.1039793483239607e-05,
|
|
"loss": 1.1135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3004031479358673,
|
|
"step": 630,
|
|
"valid_targets_mean": 16166.0,
|
|
"valid_targets_min": 15212
|
|
},
|
|
{
|
|
"epoch": 2.6865671641791042,
|
|
"grad_norm": 0.08945224239369368,
|
|
"learning_rate": 2.0980425798502616e-05,
|
|
"loss": 1.0302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16778279840946198,
|
|
"step": 631,
|
|
"valid_targets_mean": 8263.4,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 2.6908315565031984,
|
|
"grad_norm": 0.08087704822582027,
|
|
"learning_rate": 2.092104945284255e-05,
|
|
"loss": 1.0178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24778887629508972,
|
|
"step": 632,
|
|
"valid_targets_mean": 16053.2,
|
|
"valid_targets_min": 13269
|
|
},
|
|
{
|
|
"epoch": 2.695095948827292,
|
|
"grad_norm": 0.09961364948863717,
|
|
"learning_rate": 2.0861664970780434e-05,
|
|
"loss": 1.0486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3055853247642517,
|
|
"step": 633,
|
|
"valid_targets_mean": 16009.6,
|
|
"valid_targets_min": 12683
|
|
},
|
|
{
|
|
"epoch": 2.699360341151386,
|
|
"grad_norm": 0.09475459444661861,
|
|
"learning_rate": 2.08022728769092e-05,
|
|
"loss": 1.0684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21806904673576355,
|
|
"step": 634,
|
|
"valid_targets_mean": 11421.9,
|
|
"valid_targets_min": 6740
|
|
},
|
|
{
|
|
"epoch": 2.70362473347548,
|
|
"grad_norm": 0.08913863794894203,
|
|
"learning_rate": 2.0742873695889005e-05,
|
|
"loss": 1.1028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29462793469429016,
|
|
"step": 635,
|
|
"valid_targets_mean": 16118.1,
|
|
"valid_targets_min": 15103
|
|
},
|
|
{
|
|
"epoch": 2.7078891257995736,
|
|
"grad_norm": 0.09177278858727192,
|
|
"learning_rate": 2.0683467952442626e-05,
|
|
"loss": 1.0583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29734280705451965,
|
|
"step": 636,
|
|
"valid_targets_mean": 16190.1,
|
|
"valid_targets_min": 15254
|
|
},
|
|
{
|
|
"epoch": 2.7121535181236673,
|
|
"grad_norm": 0.09369958136709157,
|
|
"learning_rate": 2.0624056171350785e-05,
|
|
"loss": 1.0595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2411797046661377,
|
|
"step": 637,
|
|
"valid_targets_mean": 14217.3,
|
|
"valid_targets_min": 11521
|
|
},
|
|
{
|
|
"epoch": 2.716417910447761,
|
|
"grad_norm": 0.08691845233070916,
|
|
"learning_rate": 2.0564638877447566e-05,
|
|
"loss": 1.0246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2900591492652893,
|
|
"step": 638,
|
|
"valid_targets_mean": 16037.3,
|
|
"valid_targets_min": 15179
|
|
},
|
|
{
|
|
"epoch": 2.7206823027718547,
|
|
"grad_norm": 0.09754082534076555,
|
|
"learning_rate": 2.0505216595615742e-05,
|
|
"loss": 1.1099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31647834181785583,
|
|
"step": 639,
|
|
"valid_targets_mean": 16078.1,
|
|
"valid_targets_min": 14705
|
|
},
|
|
{
|
|
"epoch": 2.724946695095949,
|
|
"grad_norm": 0.10045697191920915,
|
|
"learning_rate": 2.044578985078215e-05,
|
|
"loss": 1.0646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25796759128570557,
|
|
"step": 640,
|
|
"valid_targets_mean": 15966.5,
|
|
"valid_targets_min": 14565
|
|
},
|
|
{
|
|
"epoch": 2.7292110874200426,
|
|
"grad_norm": 0.0905527744835088,
|
|
"learning_rate": 2.0386359167913046e-05,
|
|
"loss": 1.0673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30935513973236084,
|
|
"step": 641,
|
|
"valid_targets_mean": 16132.6,
|
|
"valid_targets_min": 15251
|
|
},
|
|
{
|
|
"epoch": 2.7334754797441363,
|
|
"grad_norm": 0.09582440735029638,
|
|
"learning_rate": 2.0326925072009485e-05,
|
|
"loss": 1.1158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1900860220193863,
|
|
"step": 642,
|
|
"valid_targets_mean": 10590.5,
|
|
"valid_targets_min": 1210
|
|
},
|
|
{
|
|
"epoch": 2.7377398720682304,
|
|
"grad_norm": 0.10528177709130765,
|
|
"learning_rate": 2.0267488088102657e-05,
|
|
"loss": 1.0914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26242324709892273,
|
|
"step": 643,
|
|
"valid_targets_mean": 16125.2,
|
|
"valid_targets_min": 15316
|
|
},
|
|
{
|
|
"epoch": 2.742004264392324,
|
|
"grad_norm": 0.09041004611906639,
|
|
"learning_rate": 2.0208048741249288e-05,
|
|
"loss": 1.0357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30057209730148315,
|
|
"step": 644,
|
|
"valid_targets_mean": 16204.5,
|
|
"valid_targets_min": 15289
|
|
},
|
|
{
|
|
"epoch": 2.746268656716418,
|
|
"grad_norm": 0.0942373739177824,
|
|
"learning_rate": 2.014860755652695e-05,
|
|
"loss": 1.022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1844562292098999,
|
|
"step": 645,
|
|
"valid_targets_mean": 10178.1,
|
|
"valid_targets_min": 973
|
|
},
|
|
{
|
|
"epoch": 2.750533049040512,
|
|
"grad_norm": 0.10265182695504874,
|
|
"learning_rate": 2.0089165059029477e-05,
|
|
"loss": 1.0638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2515968084335327,
|
|
"step": 646,
|
|
"valid_targets_mean": 16183.7,
|
|
"valid_targets_min": 14523
|
|
},
|
|
{
|
|
"epoch": 2.7547974413646057,
|
|
"grad_norm": 0.0812573258054983,
|
|
"learning_rate": 2.0029721773862277e-05,
|
|
"loss": 1.0541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29949331283569336,
|
|
"step": 647,
|
|
"valid_targets_mean": 15876.1,
|
|
"valid_targets_min": 9034
|
|
},
|
|
{
|
|
"epoch": 2.7590618336886994,
|
|
"grad_norm": 0.09116136930060786,
|
|
"learning_rate": 1.997027822613773e-05,
|
|
"loss": 1.0244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21316248178482056,
|
|
"step": 648,
|
|
"valid_targets_mean": 13739.7,
|
|
"valid_targets_min": 10483
|
|
},
|
|
{
|
|
"epoch": 2.763326226012793,
|
|
"grad_norm": 0.09937349255103066,
|
|
"learning_rate": 1.9910834940970533e-05,
|
|
"loss": 1.0904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2828340530395508,
|
|
"step": 649,
|
|
"valid_targets_mean": 16141.0,
|
|
"valid_targets_min": 15510
|
|
},
|
|
{
|
|
"epoch": 2.767590618336887,
|
|
"grad_norm": 0.09363621572113295,
|
|
"learning_rate": 1.985139244347305e-05,
|
|
"loss": 1.0682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30304622650146484,
|
|
"step": 650,
|
|
"valid_targets_mean": 16120.6,
|
|
"valid_targets_min": 14716
|
|
},
|
|
{
|
|
"epoch": 2.771855010660981,
|
|
"grad_norm": 0.08901623783710519,
|
|
"learning_rate": 1.979195125875072e-05,
|
|
"loss": 1.0378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2547648549079895,
|
|
"step": 651,
|
|
"valid_targets_mean": 15226.9,
|
|
"valid_targets_min": 10474
|
|
},
|
|
{
|
|
"epoch": 2.7761194029850746,
|
|
"grad_norm": 0.09933019079562769,
|
|
"learning_rate": 1.9732511911897353e-05,
|
|
"loss": 1.0708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3006623685359955,
|
|
"step": 652,
|
|
"valid_targets_mean": 16086.7,
|
|
"valid_targets_min": 14762
|
|
},
|
|
{
|
|
"epoch": 2.7803837953091683,
|
|
"grad_norm": 0.1157715202566833,
|
|
"learning_rate": 1.9673074927990525e-05,
|
|
"loss": 1.0736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24784070253372192,
|
|
"step": 653,
|
|
"valid_targets_mean": 12677.6,
|
|
"valid_targets_min": 1169
|
|
},
|
|
{
|
|
"epoch": 2.7846481876332625,
|
|
"grad_norm": 0.08775943624308227,
|
|
"learning_rate": 1.9613640832086957e-05,
|
|
"loss": 1.0269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2655026614665985,
|
|
"step": 654,
|
|
"valid_targets_mean": 16157.6,
|
|
"valid_targets_min": 15679
|
|
},
|
|
{
|
|
"epoch": 2.788912579957356,
|
|
"grad_norm": 0.09716943567402454,
|
|
"learning_rate": 1.9554210149217855e-05,
|
|
"loss": 1.0261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2965433597564697,
|
|
"step": 655,
|
|
"valid_targets_mean": 16022.3,
|
|
"valid_targets_min": 13339
|
|
},
|
|
{
|
|
"epoch": 2.79317697228145,
|
|
"grad_norm": 0.10353007873763757,
|
|
"learning_rate": 1.9494783404384265e-05,
|
|
"loss": 1.0625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20122894644737244,
|
|
"step": 656,
|
|
"valid_targets_mean": 10107.4,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 2.7974413646055436,
|
|
"grad_norm": 0.08658540613783472,
|
|
"learning_rate": 1.9435361122552437e-05,
|
|
"loss": 1.0319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2642931342124939,
|
|
"step": 657,
|
|
"valid_targets_mean": 16139.8,
|
|
"valid_targets_min": 14737
|
|
},
|
|
{
|
|
"epoch": 2.8017057569296373,
|
|
"grad_norm": 0.09189866644265227,
|
|
"learning_rate": 1.9375943828649215e-05,
|
|
"loss": 1.0376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28480756282806396,
|
|
"step": 658,
|
|
"valid_targets_mean": 16094.2,
|
|
"valid_targets_min": 14801
|
|
},
|
|
{
|
|
"epoch": 2.8059701492537314,
|
|
"grad_norm": 0.09954696943113545,
|
|
"learning_rate": 1.9316532047557378e-05,
|
|
"loss": 1.0435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21189963817596436,
|
|
"step": 659,
|
|
"valid_targets_mean": 12354.2,
|
|
"valid_targets_min": 9117
|
|
},
|
|
{
|
|
"epoch": 2.810234541577825,
|
|
"grad_norm": 0.09026140756179053,
|
|
"learning_rate": 1.9257126304110998e-05,
|
|
"loss": 1.0305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2856801152229309,
|
|
"step": 660,
|
|
"valid_targets_mean": 16119.8,
|
|
"valid_targets_min": 15401
|
|
},
|
|
{
|
|
"epoch": 2.814498933901919,
|
|
"grad_norm": 0.10047095130781947,
|
|
"learning_rate": 1.919772712309081e-05,
|
|
"loss": 1.0514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28844642639160156,
|
|
"step": 661,
|
|
"valid_targets_mean": 16164.4,
|
|
"valid_targets_min": 15138
|
|
},
|
|
{
|
|
"epoch": 2.818763326226013,
|
|
"grad_norm": 0.09211308648106672,
|
|
"learning_rate": 1.9138335029219572e-05,
|
|
"loss": 1.0639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25729966163635254,
|
|
"step": 662,
|
|
"valid_targets_mean": 14424.8,
|
|
"valid_targets_min": 12864
|
|
},
|
|
{
|
|
"epoch": 2.8230277185501067,
|
|
"grad_norm": 0.09093797654391877,
|
|
"learning_rate": 1.9078950547157458e-05,
|
|
"loss": 1.0488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27843421697616577,
|
|
"step": 663,
|
|
"valid_targets_mean": 16069.9,
|
|
"valid_targets_min": 13521
|
|
},
|
|
{
|
|
"epoch": 2.8272921108742004,
|
|
"grad_norm": 0.09839874564174367,
|
|
"learning_rate": 1.9019574201497387e-05,
|
|
"loss": 1.1018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3204609155654907,
|
|
"step": 664,
|
|
"valid_targets_mean": 16060.7,
|
|
"valid_targets_min": 14411
|
|
},
|
|
{
|
|
"epoch": 2.831556503198294,
|
|
"grad_norm": 0.08788270686273389,
|
|
"learning_rate": 1.8960206516760396e-05,
|
|
"loss": 1.0505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26410871744155884,
|
|
"step": 665,
|
|
"valid_targets_mean": 15881.4,
|
|
"valid_targets_min": 10501
|
|
},
|
|
{
|
|
"epoch": 2.835820895522388,
|
|
"grad_norm": 0.08190235340682576,
|
|
"learning_rate": 1.890084801739102e-05,
|
|
"loss": 1.0373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28645169734954834,
|
|
"step": 666,
|
|
"valid_targets_mean": 16129.9,
|
|
"valid_targets_min": 15104
|
|
},
|
|
{
|
|
"epoch": 2.840085287846482,
|
|
"grad_norm": 0.08713533112771744,
|
|
"learning_rate": 1.884149922775265e-05,
|
|
"loss": 1.0407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22442081570625305,
|
|
"step": 667,
|
|
"valid_targets_mean": 11333.3,
|
|
"valid_targets_min": 2084
|
|
},
|
|
{
|
|
"epoch": 2.8443496801705757,
|
|
"grad_norm": 0.09217551519326586,
|
|
"learning_rate": 1.878216067212291e-05,
|
|
"loss": 1.0328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25745007395744324,
|
|
"step": 668,
|
|
"valid_targets_mean": 16075.4,
|
|
"valid_targets_min": 14499
|
|
},
|
|
{
|
|
"epoch": 2.8486140724946694,
|
|
"grad_norm": 0.08244449560128018,
|
|
"learning_rate": 1.8722832874689007e-05,
|
|
"loss": 1.0938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.307827889919281,
|
|
"step": 669,
|
|
"valid_targets_mean": 16107.0,
|
|
"valid_targets_min": 14970
|
|
},
|
|
{
|
|
"epoch": 2.8528784648187635,
|
|
"grad_norm": 0.08134349568484765,
|
|
"learning_rate": 1.8663516359543123e-05,
|
|
"loss": 1.017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15928973257541656,
|
|
"step": 670,
|
|
"valid_targets_mean": 8727.8,
|
|
"valid_targets_min": 1225
|
|
},
|
|
{
|
|
"epoch": 2.857142857142857,
|
|
"grad_norm": 0.08876031081453678,
|
|
"learning_rate": 1.860421165067775e-05,
|
|
"loss": 1.0358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2546696960926056,
|
|
"step": 671,
|
|
"valid_targets_mean": 16162.0,
|
|
"valid_targets_min": 15321
|
|
},
|
|
{
|
|
"epoch": 2.861407249466951,
|
|
"grad_norm": 0.08552632215546412,
|
|
"learning_rate": 1.8544919271981125e-05,
|
|
"loss": 1.0853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3161398768424988,
|
|
"step": 672,
|
|
"valid_targets_mean": 16003.1,
|
|
"valid_targets_min": 14737
|
|
},
|
|
{
|
|
"epoch": 2.8656716417910446,
|
|
"grad_norm": 0.08886288357967212,
|
|
"learning_rate": 1.8485639747232535e-05,
|
|
"loss": 1.0377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23762404918670654,
|
|
"step": 673,
|
|
"valid_targets_mean": 13637.6,
|
|
"valid_targets_min": 10567
|
|
},
|
|
{
|
|
"epoch": 2.8699360341151388,
|
|
"grad_norm": 0.08367097882277101,
|
|
"learning_rate": 1.8426373600097723e-05,
|
|
"loss": 1.1205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29552119970321655,
|
|
"step": 674,
|
|
"valid_targets_mean": 16018.7,
|
|
"valid_targets_min": 14102
|
|
},
|
|
{
|
|
"epoch": 2.8742004264392325,
|
|
"grad_norm": 0.09249938611035236,
|
|
"learning_rate": 1.836712135412424e-05,
|
|
"loss": 1.0457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2954329252243042,
|
|
"step": 675,
|
|
"valid_targets_mean": 16131.2,
|
|
"valid_targets_min": 14450
|
|
},
|
|
{
|
|
"epoch": 2.878464818763326,
|
|
"grad_norm": 0.08801005780337384,
|
|
"learning_rate": 1.8307883532736878e-05,
|
|
"loss": 1.0294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25021350383758545,
|
|
"step": 676,
|
|
"valid_targets_mean": 15554.6,
|
|
"valid_targets_min": 13735
|
|
},
|
|
{
|
|
"epoch": 2.88272921108742,
|
|
"grad_norm": 0.08655854084868371,
|
|
"learning_rate": 1.8248660659232964e-05,
|
|
"loss": 1.0223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28979241847991943,
|
|
"step": 677,
|
|
"valid_targets_mean": 16139.5,
|
|
"valid_targets_min": 15353
|
|
},
|
|
{
|
|
"epoch": 2.886993603411514,
|
|
"grad_norm": 0.09255037665222515,
|
|
"learning_rate": 1.8189453256777798e-05,
|
|
"loss": 1.0298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25256070494651794,
|
|
"step": 678,
|
|
"valid_targets_mean": 13409.3,
|
|
"valid_targets_min": 1104
|
|
},
|
|
{
|
|
"epoch": 2.8912579957356077,
|
|
"grad_norm": 0.08148522587940459,
|
|
"learning_rate": 1.8130261848399996e-05,
|
|
"loss": 1.0585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25883835554122925,
|
|
"step": 679,
|
|
"valid_targets_mean": 16064.2,
|
|
"valid_targets_min": 13548
|
|
},
|
|
{
|
|
"epoch": 2.8955223880597014,
|
|
"grad_norm": 0.09226840319305137,
|
|
"learning_rate": 1.8071086956986916e-05,
|
|
"loss": 1.0249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30064886808395386,
|
|
"step": 680,
|
|
"valid_targets_mean": 16108.7,
|
|
"valid_targets_min": 15214
|
|
},
|
|
{
|
|
"epoch": 2.8997867803837956,
|
|
"grad_norm": 0.08403735740030342,
|
|
"learning_rate": 1.8011929105279967e-05,
|
|
"loss": 1.0478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15606530010700226,
|
|
"step": 681,
|
|
"valid_targets_mean": 8671.9,
|
|
"valid_targets_min": 1068
|
|
},
|
|
{
|
|
"epoch": 2.9040511727078893,
|
|
"grad_norm": 0.08519245202948904,
|
|
"learning_rate": 1.795278881587007e-05,
|
|
"loss": 1.0669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27199992537498474,
|
|
"step": 682,
|
|
"valid_targets_mean": 16047.3,
|
|
"valid_targets_min": 15067
|
|
},
|
|
{
|
|
"epoch": 2.908315565031983,
|
|
"grad_norm": 0.08429690440125712,
|
|
"learning_rate": 1.7893666611192962e-05,
|
|
"loss": 1.0479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2856326103210449,
|
|
"step": 683,
|
|
"valid_targets_mean": 16150.4,
|
|
"valid_targets_min": 15440
|
|
},
|
|
{
|
|
"epoch": 2.9125799573560767,
|
|
"grad_norm": 0.08943226140070931,
|
|
"learning_rate": 1.783456301352467e-05,
|
|
"loss": 1.0827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19469720125198364,
|
|
"step": 684,
|
|
"valid_targets_mean": 11011.3,
|
|
"valid_targets_min": 4659
|
|
},
|
|
{
|
|
"epoch": 2.9168443496801704,
|
|
"grad_norm": 0.08887278986961576,
|
|
"learning_rate": 1.7775478544976813e-05,
|
|
"loss": 1.0713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27650293707847595,
|
|
"step": 685,
|
|
"valid_targets_mean": 16146.7,
|
|
"valid_targets_min": 15248
|
|
},
|
|
{
|
|
"epoch": 2.9211087420042645,
|
|
"grad_norm": 0.09143607431807814,
|
|
"learning_rate": 1.7716413727492035e-05,
|
|
"loss": 1.0803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3126012980937958,
|
|
"step": 686,
|
|
"valid_targets_mean": 16078.1,
|
|
"valid_targets_min": 14859
|
|
},
|
|
{
|
|
"epoch": 2.925373134328358,
|
|
"grad_norm": 0.0927055979756244,
|
|
"learning_rate": 1.7657369082839392e-05,
|
|
"loss": 1.0781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25906461477279663,
|
|
"step": 687,
|
|
"valid_targets_mean": 14083.9,
|
|
"valid_targets_min": 11371
|
|
},
|
|
{
|
|
"epoch": 2.929637526652452,
|
|
"grad_norm": 0.08988369556573016,
|
|
"learning_rate": 1.7598345132609747e-05,
|
|
"loss": 1.0583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3021179735660553,
|
|
"step": 688,
|
|
"valid_targets_mean": 16175.2,
|
|
"valid_targets_min": 15506
|
|
},
|
|
{
|
|
"epoch": 2.933901918976546,
|
|
"grad_norm": 0.0954727540456521,
|
|
"learning_rate": 1.7539342398211132e-05,
|
|
"loss": 1.0736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3158177137374878,
|
|
"step": 689,
|
|
"valid_targets_mean": 16110.4,
|
|
"valid_targets_min": 15536
|
|
},
|
|
{
|
|
"epoch": 2.9381663113006398,
|
|
"grad_norm": 0.08390472737307002,
|
|
"learning_rate": 1.748036140086416e-05,
|
|
"loss": 1.0803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.287629634141922,
|
|
"step": 690,
|
|
"valid_targets_mean": 15974.7,
|
|
"valid_targets_min": 14385
|
|
},
|
|
{
|
|
"epoch": 2.9424307036247335,
|
|
"grad_norm": 0.0878337107121928,
|
|
"learning_rate": 1.742140266159744e-05,
|
|
"loss": 1.0372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29759475588798523,
|
|
"step": 691,
|
|
"valid_targets_mean": 15999.4,
|
|
"valid_targets_min": 14158
|
|
},
|
|
{
|
|
"epoch": 2.946695095948827,
|
|
"grad_norm": 0.08199568617020675,
|
|
"learning_rate": 1.7362466701242943e-05,
|
|
"loss": 1.0557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23069460690021515,
|
|
"step": 692,
|
|
"valid_targets_mean": 11582.3,
|
|
"valid_targets_min": 2742
|
|
},
|
|
{
|
|
"epoch": 2.950959488272921,
|
|
"grad_norm": 0.08262759256210828,
|
|
"learning_rate": 1.7303554040431426e-05,
|
|
"loss": 1.0602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24836330115795135,
|
|
"step": 693,
|
|
"valid_targets_mean": 16079.3,
|
|
"valid_targets_min": 13705
|
|
},
|
|
{
|
|
"epoch": 2.955223880597015,
|
|
"grad_norm": 0.07942781505324352,
|
|
"learning_rate": 1.7244665199587812e-05,
|
|
"loss": 1.0629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2713623642921448,
|
|
"step": 694,
|
|
"valid_targets_mean": 16200.9,
|
|
"valid_targets_min": 15605
|
|
},
|
|
{
|
|
"epoch": 2.9594882729211087,
|
|
"grad_norm": 0.08783357798491587,
|
|
"learning_rate": 1.7185800698926594e-05,
|
|
"loss": 1.0861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18624457716941833,
|
|
"step": 695,
|
|
"valid_targets_mean": 9865.8,
|
|
"valid_targets_min": 2459
|
|
},
|
|
{
|
|
"epoch": 2.9637526652452024,
|
|
"grad_norm": 0.09435941222683722,
|
|
"learning_rate": 1.7126961058447276e-05,
|
|
"loss": 1.0827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2782531976699829,
|
|
"step": 696,
|
|
"valid_targets_mean": 15984.0,
|
|
"valid_targets_min": 14389
|
|
},
|
|
{
|
|
"epoch": 2.9680170575692966,
|
|
"grad_norm": 0.08080591893363676,
|
|
"learning_rate": 1.706814679792973e-05,
|
|
"loss": 1.079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28476324677467346,
|
|
"step": 697,
|
|
"valid_targets_mean": 16059.2,
|
|
"valid_targets_min": 13000
|
|
},
|
|
{
|
|
"epoch": 2.9722814498933903,
|
|
"grad_norm": 0.09047073514158338,
|
|
"learning_rate": 1.7009358436929632e-05,
|
|
"loss": 1.0683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24980148673057556,
|
|
"step": 698,
|
|
"valid_targets_mean": 13358.3,
|
|
"valid_targets_min": 10646
|
|
},
|
|
{
|
|
"epoch": 2.976545842217484,
|
|
"grad_norm": 0.08554962747391083,
|
|
"learning_rate": 1.6950596494773855e-05,
|
|
"loss": 1.0943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30067670345306396,
|
|
"step": 699,
|
|
"valid_targets_mean": 16102.5,
|
|
"valid_targets_min": 15302
|
|
},
|
|
{
|
|
"epoch": 2.9808102345415777,
|
|
"grad_norm": 0.08026104886245046,
|
|
"learning_rate": 1.6891861490555906e-05,
|
|
"loss": 1.0306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29716920852661133,
|
|
"step": 700,
|
|
"valid_targets_mean": 16050.8,
|
|
"valid_targets_min": 15170
|
|
},
|
|
{
|
|
"epoch": 2.9850746268656714,
|
|
"grad_norm": 0.09135139383937013,
|
|
"learning_rate": 1.683315394313132e-05,
|
|
"loss": 1.0629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2662363052368164,
|
|
"step": 701,
|
|
"valid_targets_mean": 15596.2,
|
|
"valid_targets_min": 14121
|
|
},
|
|
{
|
|
"epoch": 2.9893390191897655,
|
|
"grad_norm": 0.08480288597489376,
|
|
"learning_rate": 1.677447437111309e-05,
|
|
"loss": 1.0154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2818028926849365,
|
|
"step": 702,
|
|
"valid_targets_mean": 16165.1,
|
|
"valid_targets_min": 15383
|
|
},
|
|
{
|
|
"epoch": 2.9936034115138592,
|
|
"grad_norm": 0.09582846054758275,
|
|
"learning_rate": 1.671582329286707e-05,
|
|
"loss": 1.065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22809189558029175,
|
|
"step": 703,
|
|
"valid_targets_mean": 12674.2,
|
|
"valid_targets_min": 1310
|
|
},
|
|
{
|
|
"epoch": 2.997867803837953,
|
|
"grad_norm": 0.08687952220334387,
|
|
"learning_rate": 1.66572012265074e-05,
|
|
"loss": 1.0448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24771744012832642,
|
|
"step": 704,
|
|
"valid_targets_mean": 16156.9,
|
|
"valid_targets_min": 14523
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 0.10680242719207868,
|
|
"learning_rate": 1.6598608689891953e-05,
|
|
"loss": 1.0598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4925702214241028,
|
|
"step": 705,
|
|
"valid_targets_mean": 11504.4,
|
|
"valid_targets_min": 3101
|
|
},
|
|
{
|
|
"epoch": 3.0042643923240937,
|
|
"grad_norm": 0.10515542447994522,
|
|
"learning_rate": 1.654004620061773e-05,
|
|
"loss": 1.054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25454628467559814,
|
|
"step": 706,
|
|
"valid_targets_mean": 16103.3,
|
|
"valid_targets_min": 14403
|
|
},
|
|
{
|
|
"epoch": 3.008528784648188,
|
|
"grad_norm": 0.08856534871366123,
|
|
"learning_rate": 1.6481514276016297e-05,
|
|
"loss": 1.047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.274177223443985,
|
|
"step": 707,
|
|
"valid_targets_mean": 15985.2,
|
|
"valid_targets_min": 10358
|
|
},
|
|
{
|
|
"epoch": 3.0127931769722816,
|
|
"grad_norm": 0.08041928221288804,
|
|
"learning_rate": 1.6423013433149207e-05,
|
|
"loss": 1.0236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23566097021102905,
|
|
"step": 708,
|
|
"valid_targets_mean": 14330.6,
|
|
"valid_targets_min": 11007
|
|
},
|
|
{
|
|
"epoch": 3.0170575692963753,
|
|
"grad_norm": 0.09756672559708704,
|
|
"learning_rate": 1.636454418880347e-05,
|
|
"loss": 1.0411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27917659282684326,
|
|
"step": 709,
|
|
"valid_targets_mean": 16150.8,
|
|
"valid_targets_min": 15008
|
|
},
|
|
{
|
|
"epoch": 3.021321961620469,
|
|
"grad_norm": 0.07471363063527632,
|
|
"learning_rate": 1.630610705948693e-05,
|
|
"loss": 1.0327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.290149986743927,
|
|
"step": 710,
|
|
"valid_targets_mean": 16151.3,
|
|
"valid_targets_min": 15544
|
|
},
|
|
{
|
|
"epoch": 3.025586353944563,
|
|
"grad_norm": 0.08516084455848486,
|
|
"learning_rate": 1.6247702561423753e-05,
|
|
"loss": 1.0474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24989154934883118,
|
|
"step": 711,
|
|
"valid_targets_mean": 14289.8,
|
|
"valid_targets_min": 11741
|
|
},
|
|
{
|
|
"epoch": 3.029850746268657,
|
|
"grad_norm": 0.10281701520297704,
|
|
"learning_rate": 1.6189331210549828e-05,
|
|
"loss": 1.0963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3172495663166046,
|
|
"step": 712,
|
|
"valid_targets_mean": 16093.9,
|
|
"valid_targets_min": 15223
|
|
},
|
|
{
|
|
"epoch": 3.0341151385927505,
|
|
"grad_norm": 0.08407444251066593,
|
|
"learning_rate": 1.613099352250825e-05,
|
|
"loss": 1.0611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24888095259666443,
|
|
"step": 713,
|
|
"valid_targets_mean": 13047.3,
|
|
"valid_targets_min": 1369
|
|
},
|
|
{
|
|
"epoch": 3.038379530916844,
|
|
"grad_norm": 0.07691767611234396,
|
|
"learning_rate": 1.6072690012644717e-05,
|
|
"loss": 1.0412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26046547293663025,
|
|
"step": 714,
|
|
"valid_targets_mean": 15963.5,
|
|
"valid_targets_min": 14140
|
|
},
|
|
{
|
|
"epoch": 3.0426439232409384,
|
|
"grad_norm": 0.0858149302874892,
|
|
"learning_rate": 1.6014421196003022e-05,
|
|
"loss": 1.0368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2685975730419159,
|
|
"step": 715,
|
|
"valid_targets_mean": 16215.7,
|
|
"valid_targets_min": 15468
|
|
},
|
|
{
|
|
"epoch": 3.046908315565032,
|
|
"grad_norm": 0.08048585385716217,
|
|
"learning_rate": 1.5956187587320468e-05,
|
|
"loss": 1.0301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19261279702186584,
|
|
"step": 716,
|
|
"valid_targets_mean": 10785.5,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 3.0511727078891258,
|
|
"grad_norm": 0.08020147743722424,
|
|
"learning_rate": 1.5897989701023355e-05,
|
|
"loss": 1.0446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2643775939941406,
|
|
"step": 717,
|
|
"valid_targets_mean": 16099.7,
|
|
"valid_targets_min": 14827
|
|
},
|
|
{
|
|
"epoch": 3.0554371002132195,
|
|
"grad_norm": 0.08176292076944705,
|
|
"learning_rate": 1.58398280512224e-05,
|
|
"loss": 1.0669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29793334007263184,
|
|
"step": 718,
|
|
"valid_targets_mean": 16105.3,
|
|
"valid_targets_min": 15132
|
|
},
|
|
{
|
|
"epoch": 3.0597014925373136,
|
|
"grad_norm": 0.08471691627002831,
|
|
"learning_rate": 1.5781703151708215e-05,
|
|
"loss": 1.0444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2049698829650879,
|
|
"step": 719,
|
|
"valid_targets_mean": 11517.7,
|
|
"valid_targets_min": 4957
|
|
},
|
|
{
|
|
"epoch": 3.0639658848614073,
|
|
"grad_norm": 0.08166937114079702,
|
|
"learning_rate": 1.5723615515946773e-05,
|
|
"loss": 1.0885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27954721450805664,
|
|
"step": 720,
|
|
"valid_targets_mean": 16074.2,
|
|
"valid_targets_min": 14788
|
|
},
|
|
{
|
|
"epoch": 3.068230277185501,
|
|
"grad_norm": 0.0778508443300386,
|
|
"learning_rate": 1.5665565657074874e-05,
|
|
"loss": 1.0531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3021158277988434,
|
|
"step": 721,
|
|
"valid_targets_mean": 16093.6,
|
|
"valid_targets_min": 14192
|
|
},
|
|
{
|
|
"epoch": 3.0724946695095947,
|
|
"grad_norm": 0.08446386580454958,
|
|
"learning_rate": 1.560755408789558e-05,
|
|
"loss": 1.0349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2524661421775818,
|
|
"step": 722,
|
|
"valid_targets_mean": 15457.3,
|
|
"valid_targets_min": 13274
|
|
},
|
|
{
|
|
"epoch": 3.076759061833689,
|
|
"grad_norm": 0.07888570193850611,
|
|
"learning_rate": 1.5549581320873715e-05,
|
|
"loss": 1.0731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2961902320384979,
|
|
"step": 723,
|
|
"valid_targets_mean": 16061.1,
|
|
"valid_targets_min": 14523
|
|
},
|
|
{
|
|
"epoch": 3.0810234541577826,
|
|
"grad_norm": 0.08623371549451193,
|
|
"learning_rate": 1.5491647868131343e-05,
|
|
"loss": 1.0765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2871866822242737,
|
|
"step": 724,
|
|
"valid_targets_mean": 16201.7,
|
|
"valid_targets_min": 15332
|
|
},
|
|
{
|
|
"epoch": 3.0852878464818763,
|
|
"grad_norm": 0.08984633865392529,
|
|
"learning_rate": 1.5433754241443223e-05,
|
|
"loss": 1.03,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25260069966316223,
|
|
"step": 725,
|
|
"valid_targets_mean": 15874.2,
|
|
"valid_targets_min": 7513
|
|
},
|
|
{
|
|
"epoch": 3.08955223880597,
|
|
"grad_norm": 0.08235198582707394,
|
|
"learning_rate": 1.53759009522323e-05,
|
|
"loss": 1.0536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26522937417030334,
|
|
"step": 726,
|
|
"valid_targets_mean": 16216.5,
|
|
"valid_targets_min": 15554
|
|
},
|
|
{
|
|
"epoch": 3.093816631130064,
|
|
"grad_norm": 0.08398891074641789,
|
|
"learning_rate": 1.5318088511565185e-05,
|
|
"loss": 1.0099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21349315345287323,
|
|
"step": 727,
|
|
"valid_targets_mean": 12136.4,
|
|
"valid_targets_min": 2021
|
|
},
|
|
{
|
|
"epoch": 3.098081023454158,
|
|
"grad_norm": 0.08350463082538075,
|
|
"learning_rate": 1.5260317430147627e-05,
|
|
"loss": 1.0253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2605102062225342,
|
|
"step": 728,
|
|
"valid_targets_mean": 16000.9,
|
|
"valid_targets_min": 13392
|
|
},
|
|
{
|
|
"epoch": 3.1023454157782515,
|
|
"grad_norm": 0.08506927196089271,
|
|
"learning_rate": 1.5202588218320024e-05,
|
|
"loss": 1.0366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28882962465286255,
|
|
"step": 729,
|
|
"valid_targets_mean": 16186.7,
|
|
"valid_targets_min": 15129
|
|
},
|
|
{
|
|
"epoch": 3.106609808102345,
|
|
"grad_norm": 0.08251668999098907,
|
|
"learning_rate": 1.5144901386052924e-05,
|
|
"loss": 1.0137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15949462354183197,
|
|
"step": 730,
|
|
"valid_targets_mean": 9050.7,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 3.1108742004264394,
|
|
"grad_norm": 0.087607059124092,
|
|
"learning_rate": 1.5087257442942467e-05,
|
|
"loss": 1.0415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26088231801986694,
|
|
"step": 731,
|
|
"valid_targets_mean": 16143.6,
|
|
"valid_targets_min": 15092
|
|
},
|
|
{
|
|
"epoch": 3.115138592750533,
|
|
"grad_norm": 0.08374786309927856,
|
|
"learning_rate": 1.502965689820593e-05,
|
|
"loss": 1.054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28557437658309937,
|
|
"step": 732,
|
|
"valid_targets_mean": 16082.7,
|
|
"valid_targets_min": 14826
|
|
},
|
|
{
|
|
"epoch": 3.1194029850746268,
|
|
"grad_norm": 0.09109924731350247,
|
|
"learning_rate": 1.4972100260677222e-05,
|
|
"loss": 1.0868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22474735975265503,
|
|
"step": 733,
|
|
"valid_targets_mean": 13019.3,
|
|
"valid_targets_min": 7209
|
|
},
|
|
{
|
|
"epoch": 3.1236673773987205,
|
|
"grad_norm": 0.0839451784389262,
|
|
"learning_rate": 1.4914588038802383e-05,
|
|
"loss": 1.0738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28970491886138916,
|
|
"step": 734,
|
|
"valid_targets_mean": 16166.0,
|
|
"valid_targets_min": 15530
|
|
},
|
|
{
|
|
"epoch": 3.1279317697228146,
|
|
"grad_norm": 0.08175690644690248,
|
|
"learning_rate": 1.4857120740635084e-05,
|
|
"loss": 1.059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3130916953086853,
|
|
"step": 735,
|
|
"valid_targets_mean": 16063.3,
|
|
"valid_targets_min": 13918
|
|
},
|
|
{
|
|
"epoch": 3.1321961620469083,
|
|
"grad_norm": 0.08313460006153144,
|
|
"learning_rate": 1.4799698873832153e-05,
|
|
"loss": 1.0662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25852394104003906,
|
|
"step": 736,
|
|
"valid_targets_mean": 15362.5,
|
|
"valid_targets_min": 12879
|
|
},
|
|
{
|
|
"epoch": 3.136460554371002,
|
|
"grad_norm": 0.08495339256528775,
|
|
"learning_rate": 1.4742322945649073e-05,
|
|
"loss": 1.0164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2822078466415405,
|
|
"step": 737,
|
|
"valid_targets_mean": 16203.3,
|
|
"valid_targets_min": 15740
|
|
},
|
|
{
|
|
"epoch": 3.140724946695096,
|
|
"grad_norm": 0.08744798283001406,
|
|
"learning_rate": 1.4684993462935532e-05,
|
|
"loss": 1.025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23318520188331604,
|
|
"step": 738,
|
|
"valid_targets_mean": 13404.4,
|
|
"valid_targets_min": 1851
|
|
},
|
|
{
|
|
"epoch": 3.14498933901919,
|
|
"grad_norm": 0.07625156694544431,
|
|
"learning_rate": 1.462771093213092e-05,
|
|
"loss": 1.0303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26774466037750244,
|
|
"step": 739,
|
|
"valid_targets_mean": 16095.8,
|
|
"valid_targets_min": 14705
|
|
},
|
|
{
|
|
"epoch": 3.1492537313432836,
|
|
"grad_norm": 0.08584101585962192,
|
|
"learning_rate": 1.4570475859259856e-05,
|
|
"loss": 1.0321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26959651708602905,
|
|
"step": 740,
|
|
"valid_targets_mean": 16216.1,
|
|
"valid_targets_min": 15809
|
|
},
|
|
{
|
|
"epoch": 3.1535181236673773,
|
|
"grad_norm": 0.08503655489604069,
|
|
"learning_rate": 1.4513288749927714e-05,
|
|
"loss": 1.0278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18220467865467072,
|
|
"step": 741,
|
|
"valid_targets_mean": 9832.8,
|
|
"valid_targets_min": 2124
|
|
},
|
|
{
|
|
"epoch": 3.1577825159914714,
|
|
"grad_norm": 0.07922328089380902,
|
|
"learning_rate": 1.4456150109316192e-05,
|
|
"loss": 1.0187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2747794985771179,
|
|
"step": 742,
|
|
"valid_targets_mean": 15996.3,
|
|
"valid_targets_min": 14017
|
|
},
|
|
{
|
|
"epoch": 3.162046908315565,
|
|
"grad_norm": 0.07739864982182597,
|
|
"learning_rate": 1.4399060442178798e-05,
|
|
"loss": 1.0003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28899258375167847,
|
|
"step": 743,
|
|
"valid_targets_mean": 16165.8,
|
|
"valid_targets_min": 15422
|
|
},
|
|
{
|
|
"epoch": 3.166311300639659,
|
|
"grad_norm": 0.0810693167795834,
|
|
"learning_rate": 1.4342020252836437e-05,
|
|
"loss": 1.0627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21033403277397156,
|
|
"step": 744,
|
|
"valid_targets_mean": 11313.6,
|
|
"valid_targets_min": 6225
|
|
},
|
|
{
|
|
"epoch": 3.1705756929637525,
|
|
"grad_norm": 0.08453045627126494,
|
|
"learning_rate": 1.4285030045172913e-05,
|
|
"loss": 1.0471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26867228746414185,
|
|
"step": 745,
|
|
"valid_targets_mean": 16112.7,
|
|
"valid_targets_min": 13817
|
|
},
|
|
{
|
|
"epoch": 3.1748400852878467,
|
|
"grad_norm": 0.08300721244445816,
|
|
"learning_rate": 1.422809032263052e-05,
|
|
"loss": 1.0752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2795749306678772,
|
|
"step": 746,
|
|
"valid_targets_mean": 16137.6,
|
|
"valid_targets_min": 14586
|
|
},
|
|
{
|
|
"epoch": 3.1791044776119404,
|
|
"grad_norm": 0.07809408521071869,
|
|
"learning_rate": 1.4171201588205566e-05,
|
|
"loss": 1.0739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23751351237297058,
|
|
"step": 747,
|
|
"valid_targets_mean": 14687.8,
|
|
"valid_targets_min": 12060
|
|
},
|
|
{
|
|
"epoch": 3.183368869936034,
|
|
"grad_norm": 0.08074906582033739,
|
|
"learning_rate": 1.4114364344443935e-05,
|
|
"loss": 1.0234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.285407692193985,
|
|
"step": 748,
|
|
"valid_targets_mean": 16079.2,
|
|
"valid_targets_min": 13594
|
|
},
|
|
{
|
|
"epoch": 3.1876332622601278,
|
|
"grad_norm": 0.08035953914618814,
|
|
"learning_rate": 1.4057579093436653e-05,
|
|
"loss": 1.0413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3009850084781647,
|
|
"step": 749,
|
|
"valid_targets_mean": 16168.8,
|
|
"valid_targets_min": 15580
|
|
},
|
|
{
|
|
"epoch": 3.191897654584222,
|
|
"grad_norm": 0.08433245262298346,
|
|
"learning_rate": 1.400084633681546e-05,
|
|
"loss": 1.049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25368544459342957,
|
|
"step": 750,
|
|
"valid_targets_mean": 16015.7,
|
|
"valid_targets_min": 14737
|
|
},
|
|
{
|
|
"epoch": 3.1961620469083156,
|
|
"grad_norm": 0.07482100656365735,
|
|
"learning_rate": 1.3944166575748355e-05,
|
|
"loss": 1.0088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2836364507675171,
|
|
"step": 751,
|
|
"valid_targets_mean": 16104.0,
|
|
"valid_targets_min": 15381
|
|
},
|
|
{
|
|
"epoch": 3.2004264392324093,
|
|
"grad_norm": 0.08105388608403885,
|
|
"learning_rate": 1.3887540310935187e-05,
|
|
"loss": 1.0518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19851750135421753,
|
|
"step": 752,
|
|
"valid_targets_mean": 10778.7,
|
|
"valid_targets_min": 1871
|
|
},
|
|
{
|
|
"epoch": 3.204690831556503,
|
|
"grad_norm": 0.07522829388064263,
|
|
"learning_rate": 1.3830968042603226e-05,
|
|
"loss": 1.025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26374685764312744,
|
|
"step": 753,
|
|
"valid_targets_mean": 16032.7,
|
|
"valid_targets_min": 14458
|
|
},
|
|
{
|
|
"epoch": 3.208955223880597,
|
|
"grad_norm": 0.0804807281330305,
|
|
"learning_rate": 1.3774450270502762e-05,
|
|
"loss": 1.0274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3022557497024536,
|
|
"step": 754,
|
|
"valid_targets_mean": 16131.6,
|
|
"valid_targets_min": 14794
|
|
},
|
|
{
|
|
"epoch": 3.213219616204691,
|
|
"grad_norm": 0.08493468253697091,
|
|
"learning_rate": 1.3717987493902656e-05,
|
|
"loss": 1.0435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16654235124588013,
|
|
"step": 755,
|
|
"valid_targets_mean": 8879.4,
|
|
"valid_targets_min": 1727
|
|
},
|
|
{
|
|
"epoch": 3.2174840085287846,
|
|
"grad_norm": 0.07310833473180844,
|
|
"learning_rate": 1.3661580211585947e-05,
|
|
"loss": 1.0359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24599093198776245,
|
|
"step": 756,
|
|
"valid_targets_mean": 16172.4,
|
|
"valid_targets_min": 14652
|
|
},
|
|
{
|
|
"epoch": 3.2217484008528783,
|
|
"grad_norm": 0.07620405767387398,
|
|
"learning_rate": 1.3605228921845457e-05,
|
|
"loss": 1.0446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29698827862739563,
|
|
"step": 757,
|
|
"valid_targets_mean": 16124.7,
|
|
"valid_targets_min": 15163
|
|
},
|
|
{
|
|
"epoch": 3.2260127931769724,
|
|
"grad_norm": 0.08859458116481594,
|
|
"learning_rate": 1.3548934122479373e-05,
|
|
"loss": 1.0795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2227519303560257,
|
|
"step": 758,
|
|
"valid_targets_mean": 12696.9,
|
|
"valid_targets_min": 9336
|
|
},
|
|
{
|
|
"epoch": 3.230277185501066,
|
|
"grad_norm": 0.08182950216466174,
|
|
"learning_rate": 1.349269631078686e-05,
|
|
"loss": 1.0572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28481632471084595,
|
|
"step": 759,
|
|
"valid_targets_mean": 16107.1,
|
|
"valid_targets_min": 14450
|
|
},
|
|
{
|
|
"epoch": 3.23454157782516,
|
|
"grad_norm": 0.08014075445543474,
|
|
"learning_rate": 1.3436515983563659e-05,
|
|
"loss": 1.0821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3096708655357361,
|
|
"step": 760,
|
|
"valid_targets_mean": 16025.5,
|
|
"valid_targets_min": 15104
|
|
},
|
|
{
|
|
"epoch": 3.2388059701492535,
|
|
"grad_norm": 0.07332645065841083,
|
|
"learning_rate": 1.3380393637097692e-05,
|
|
"loss": 1.0033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24755217134952545,
|
|
"step": 761,
|
|
"valid_targets_mean": 15099.8,
|
|
"valid_targets_min": 12592
|
|
},
|
|
{
|
|
"epoch": 3.2430703624733477,
|
|
"grad_norm": 0.08332997121773575,
|
|
"learning_rate": 1.3324329767164708e-05,
|
|
"loss": 1.0367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27712202072143555,
|
|
"step": 762,
|
|
"valid_targets_mean": 16068.8,
|
|
"valid_targets_min": 13761
|
|
},
|
|
{
|
|
"epoch": 3.2473347547974414,
|
|
"grad_norm": 0.08390944400443621,
|
|
"learning_rate": 1.3268324869023878e-05,
|
|
"loss": 1.0451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2501915693283081,
|
|
"step": 763,
|
|
"valid_targets_mean": 13014.6,
|
|
"valid_targets_min": 1533
|
|
},
|
|
{
|
|
"epoch": 3.251599147121535,
|
|
"grad_norm": 0.07968751870661324,
|
|
"learning_rate": 1.3212379437413421e-05,
|
|
"loss": 1.0675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2662803530693054,
|
|
"step": 764,
|
|
"valid_targets_mean": 16116.9,
|
|
"valid_targets_min": 14766
|
|
},
|
|
{
|
|
"epoch": 3.2558635394456292,
|
|
"grad_norm": 0.08392247234673011,
|
|
"learning_rate": 1.3156493966546236e-05,
|
|
"loss": 1.0535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28962135314941406,
|
|
"step": 765,
|
|
"valid_targets_mean": 16140.2,
|
|
"valid_targets_min": 14763
|
|
},
|
|
{
|
|
"epoch": 3.260127931769723,
|
|
"grad_norm": 0.08301295032048293,
|
|
"learning_rate": 1.3100668950105534e-05,
|
|
"loss": 1.092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20063841342926025,
|
|
"step": 766,
|
|
"valid_targets_mean": 10939.7,
|
|
"valid_targets_min": 2394
|
|
},
|
|
{
|
|
"epoch": 3.2643923240938166,
|
|
"grad_norm": 0.07436513887547831,
|
|
"learning_rate": 1.3044904881240507e-05,
|
|
"loss": 1.0284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25602662563323975,
|
|
"step": 767,
|
|
"valid_targets_mean": 16058.7,
|
|
"valid_targets_min": 14218
|
|
},
|
|
{
|
|
"epoch": 3.2686567164179103,
|
|
"grad_norm": 0.08685881662671009,
|
|
"learning_rate": 1.2989202252561926e-05,
|
|
"loss": 1.0388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30896884202957153,
|
|
"step": 768,
|
|
"valid_targets_mean": 16140.7,
|
|
"valid_targets_min": 15551
|
|
},
|
|
{
|
|
"epoch": 3.272921108742004,
|
|
"grad_norm": 0.08086255283717991,
|
|
"learning_rate": 1.2933561556137806e-05,
|
|
"loss": 1.0281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2316463440656662,
|
|
"step": 769,
|
|
"valid_targets_mean": 12826.8,
|
|
"valid_targets_min": 9122
|
|
},
|
|
{
|
|
"epoch": 3.277185501066098,
|
|
"grad_norm": 0.0758541980280874,
|
|
"learning_rate": 1.2877983283489062e-05,
|
|
"loss": 1.0343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27142560482025146,
|
|
"step": 770,
|
|
"valid_targets_mean": 16069.5,
|
|
"valid_targets_min": 14394
|
|
},
|
|
{
|
|
"epoch": 3.281449893390192,
|
|
"grad_norm": 0.0807164281018909,
|
|
"learning_rate": 1.2822467925585186e-05,
|
|
"loss": 1.0487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2932860553264618,
|
|
"step": 771,
|
|
"valid_targets_mean": 16077.2,
|
|
"valid_targets_min": 14180
|
|
},
|
|
{
|
|
"epoch": 3.2857142857142856,
|
|
"grad_norm": 0.08770446014034257,
|
|
"learning_rate": 1.2767015972839879e-05,
|
|
"loss": 1.005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23489950597286224,
|
|
"step": 772,
|
|
"valid_targets_mean": 14571.2,
|
|
"valid_targets_min": 12484
|
|
},
|
|
{
|
|
"epoch": 3.2899786780383797,
|
|
"grad_norm": 0.08404569249177507,
|
|
"learning_rate": 1.2711627915106728e-05,
|
|
"loss": 1.0567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.290577232837677,
|
|
"step": 773,
|
|
"valid_targets_mean": 15977.8,
|
|
"valid_targets_min": 9100
|
|
},
|
|
{
|
|
"epoch": 3.2942430703624734,
|
|
"grad_norm": 0.09070021214510188,
|
|
"learning_rate": 1.2656304241674877e-05,
|
|
"loss": 1.0091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2998979687690735,
|
|
"step": 774,
|
|
"valid_targets_mean": 16154.4,
|
|
"valid_targets_min": 15543
|
|
},
|
|
{
|
|
"epoch": 3.298507462686567,
|
|
"grad_norm": 0.08096248668562325,
|
|
"learning_rate": 1.2601045441264734e-05,
|
|
"loss": 1.0089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2624076306819916,
|
|
"step": 775,
|
|
"valid_targets_mean": 16132.4,
|
|
"valid_targets_min": 14571
|
|
},
|
|
{
|
|
"epoch": 3.302771855010661,
|
|
"grad_norm": 0.0812426473005683,
|
|
"learning_rate": 1.2545852002023599e-05,
|
|
"loss": 1.0214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3013744652271271,
|
|
"step": 776,
|
|
"valid_targets_mean": 16136.2,
|
|
"valid_targets_min": 15232
|
|
},
|
|
{
|
|
"epoch": 3.307036247334755,
|
|
"grad_norm": 0.08488816766720501,
|
|
"learning_rate": 1.2490724411521406e-05,
|
|
"loss": 1.048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21847042441368103,
|
|
"step": 777,
|
|
"valid_targets_mean": 11578.8,
|
|
"valid_targets_min": 1438
|
|
},
|
|
{
|
|
"epoch": 3.3113006396588487,
|
|
"grad_norm": 0.0914984941550091,
|
|
"learning_rate": 1.243566315674637e-05,
|
|
"loss": 1.0503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27096492052078247,
|
|
"step": 778,
|
|
"valid_targets_mean": 16081.9,
|
|
"valid_targets_min": 15267
|
|
},
|
|
{
|
|
"epoch": 3.3155650319829424,
|
|
"grad_norm": 0.08085805806716165,
|
|
"learning_rate": 1.238066872410073e-05,
|
|
"loss": 0.9829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2824000120162964,
|
|
"step": 779,
|
|
"valid_targets_mean": 16201.9,
|
|
"valid_targets_min": 14984
|
|
},
|
|
{
|
|
"epoch": 3.319829424307036,
|
|
"grad_norm": 0.08704712247358391,
|
|
"learning_rate": 1.2325741599396418e-05,
|
|
"loss": 1.0224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14527681469917297,
|
|
"step": 780,
|
|
"valid_targets_mean": 7880.3,
|
|
"valid_targets_min": 2471
|
|
},
|
|
{
|
|
"epoch": 3.3240938166311302,
|
|
"grad_norm": 0.09009836071339113,
|
|
"learning_rate": 1.2270882267850765e-05,
|
|
"loss": 1.0952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27963101863861084,
|
|
"step": 781,
|
|
"valid_targets_mean": 15959.3,
|
|
"valid_targets_min": 11421
|
|
},
|
|
{
|
|
"epoch": 3.328358208955224,
|
|
"grad_norm": 0.08261017993444678,
|
|
"learning_rate": 1.2216091214082248e-05,
|
|
"loss": 1.0327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27638566493988037,
|
|
"step": 782,
|
|
"valid_targets_mean": 16181.3,
|
|
"valid_targets_min": 14295
|
|
},
|
|
{
|
|
"epoch": 3.3326226012793176,
|
|
"grad_norm": 0.07947568586678262,
|
|
"learning_rate": 1.2161368922106192e-05,
|
|
"loss": 1.0592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2128831148147583,
|
|
"step": 783,
|
|
"valid_targets_mean": 12418.2,
|
|
"valid_targets_min": 8716
|
|
},
|
|
{
|
|
"epoch": 3.3368869936034113,
|
|
"grad_norm": 0.08487573933677568,
|
|
"learning_rate": 1.2106715875330475e-05,
|
|
"loss": 1.0699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2875361144542694,
|
|
"step": 784,
|
|
"valid_targets_mean": 16147.5,
|
|
"valid_targets_min": 15036
|
|
},
|
|
{
|
|
"epoch": 3.3411513859275055,
|
|
"grad_norm": 0.08563349705116731,
|
|
"learning_rate": 1.2052132556551275e-05,
|
|
"loss": 1.052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2927917242050171,
|
|
"step": 785,
|
|
"valid_targets_mean": 16140.5,
|
|
"valid_targets_min": 15494
|
|
},
|
|
{
|
|
"epoch": 3.345415778251599,
|
|
"grad_norm": 0.08573257981441831,
|
|
"learning_rate": 1.1997619447948814e-05,
|
|
"loss": 1.0825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27188941836357117,
|
|
"step": 786,
|
|
"valid_targets_mean": 15075.0,
|
|
"valid_targets_min": 12292
|
|
},
|
|
{
|
|
"epoch": 3.349680170575693,
|
|
"grad_norm": 0.08590592996249169,
|
|
"learning_rate": 1.1943177031083094e-05,
|
|
"loss": 1.0404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28748810291290283,
|
|
"step": 787,
|
|
"valid_targets_mean": 16218.8,
|
|
"valid_targets_min": 15466
|
|
},
|
|
{
|
|
"epoch": 3.3539445628997866,
|
|
"grad_norm": 0.09084835040747904,
|
|
"learning_rate": 1.1888805786889621e-05,
|
|
"loss": 1.0515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25355660915374756,
|
|
"step": 788,
|
|
"valid_targets_mean": 13489.9,
|
|
"valid_targets_min": 3746
|
|
},
|
|
{
|
|
"epoch": 3.3582089552238807,
|
|
"grad_norm": 0.08563507748669529,
|
|
"learning_rate": 1.183450619567518e-05,
|
|
"loss": 1.0261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2646862268447876,
|
|
"step": 789,
|
|
"valid_targets_mean": 16101.4,
|
|
"valid_targets_min": 15406
|
|
},
|
|
{
|
|
"epoch": 3.3624733475479744,
|
|
"grad_norm": 0.07741474719353984,
|
|
"learning_rate": 1.1780278737113581e-05,
|
|
"loss": 1.0827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29512226581573486,
|
|
"step": 790,
|
|
"valid_targets_mean": 16062.8,
|
|
"valid_targets_min": 15295
|
|
},
|
|
{
|
|
"epoch": 3.366737739872068,
|
|
"grad_norm": 0.08337137803814042,
|
|
"learning_rate": 1.1726123890241439e-05,
|
|
"loss": 1.0135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18408507108688354,
|
|
"step": 791,
|
|
"valid_targets_mean": 10278.5,
|
|
"valid_targets_min": 1624
|
|
},
|
|
{
|
|
"epoch": 3.3710021321961623,
|
|
"grad_norm": 0.08331929896267676,
|
|
"learning_rate": 1.1672042133453925e-05,
|
|
"loss": 1.0308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25294822454452515,
|
|
"step": 792,
|
|
"valid_targets_mean": 16099.4,
|
|
"valid_targets_min": 13356
|
|
},
|
|
{
|
|
"epoch": 3.375266524520256,
|
|
"grad_norm": 0.08087250401864832,
|
|
"learning_rate": 1.1618033944500527e-05,
|
|
"loss": 1.0339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32065683603286743,
|
|
"step": 793,
|
|
"valid_targets_mean": 16095.9,
|
|
"valid_targets_min": 14499
|
|
},
|
|
{
|
|
"epoch": 3.3795309168443497,
|
|
"grad_norm": 0.0844904179928905,
|
|
"learning_rate": 1.1564099800480864e-05,
|
|
"loss": 1.0753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21265047788619995,
|
|
"step": 794,
|
|
"valid_targets_mean": 12352.5,
|
|
"valid_targets_min": 6494
|
|
},
|
|
{
|
|
"epoch": 3.3837953091684434,
|
|
"grad_norm": 0.0788206729519856,
|
|
"learning_rate": 1.151024017784045e-05,
|
|
"loss": 1.0262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26299676299095154,
|
|
"step": 795,
|
|
"valid_targets_mean": 16220.2,
|
|
"valid_targets_min": 15684
|
|
},
|
|
{
|
|
"epoch": 3.388059701492537,
|
|
"grad_norm": 0.08722601199699058,
|
|
"learning_rate": 1.1456455552366488e-05,
|
|
"loss": 1.0723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3091568648815155,
|
|
"step": 796,
|
|
"valid_targets_mean": 16136.1,
|
|
"valid_targets_min": 14633
|
|
},
|
|
{
|
|
"epoch": 3.3923240938166312,
|
|
"grad_norm": 0.07434935120680863,
|
|
"learning_rate": 1.1402746399183671e-05,
|
|
"loss": 1.0354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24940910935401917,
|
|
"step": 797,
|
|
"valid_targets_mean": 14850.7,
|
|
"valid_targets_min": 12098
|
|
},
|
|
{
|
|
"epoch": 3.396588486140725,
|
|
"grad_norm": 0.07541779394483183,
|
|
"learning_rate": 1.1349113192749986e-05,
|
|
"loss": 1.0688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28874462842941284,
|
|
"step": 798,
|
|
"valid_targets_mean": 16107.9,
|
|
"valid_targets_min": 14217
|
|
},
|
|
{
|
|
"epoch": 3.4008528784648187,
|
|
"grad_norm": 0.0779225683417915,
|
|
"learning_rate": 1.1295556406852488e-05,
|
|
"loss": 1.0393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2866628170013428,
|
|
"step": 799,
|
|
"valid_targets_mean": 16074.6,
|
|
"valid_targets_min": 15082
|
|
},
|
|
{
|
|
"epoch": 3.405117270788913,
|
|
"grad_norm": 0.08954034235692752,
|
|
"learning_rate": 1.1242076514603201e-05,
|
|
"loss": 1.0937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2663489282131195,
|
|
"step": 800,
|
|
"valid_targets_mean": 15337.4,
|
|
"valid_targets_min": 13809
|
|
},
|
|
{
|
|
"epoch": 3.4093816631130065,
|
|
"grad_norm": 0.07925739414448216,
|
|
"learning_rate": 1.1188673988434831e-05,
|
|
"loss": 1.081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.303800493478775,
|
|
"step": 801,
|
|
"valid_targets_mean": 16143.8,
|
|
"valid_targets_min": 15342
|
|
},
|
|
{
|
|
"epoch": 3.4136460554371,
|
|
"grad_norm": 0.08233310863584724,
|
|
"learning_rate": 1.1135349300096667e-05,
|
|
"loss": 1.0109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19249793887138367,
|
|
"step": 802,
|
|
"valid_targets_mean": 11036.7,
|
|
"valid_targets_min": 1258
|
|
},
|
|
{
|
|
"epoch": 3.417910447761194,
|
|
"grad_norm": 0.07665017116850284,
|
|
"learning_rate": 1.1082102920650397e-05,
|
|
"loss": 1.0398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2646782100200653,
|
|
"step": 803,
|
|
"valid_targets_mean": 16117.8,
|
|
"valid_targets_min": 15040
|
|
},
|
|
{
|
|
"epoch": 3.4221748400852876,
|
|
"grad_norm": 0.0794113584553994,
|
|
"learning_rate": 1.102893532046593e-05,
|
|
"loss": 1.0737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2841975688934326,
|
|
"step": 804,
|
|
"valid_targets_mean": 15978.8,
|
|
"valid_targets_min": 13873
|
|
},
|
|
{
|
|
"epoch": 3.4264392324093818,
|
|
"grad_norm": 0.08000714384885219,
|
|
"learning_rate": 1.0975846969217258e-05,
|
|
"loss": 1.0346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1705045998096466,
|
|
"step": 805,
|
|
"valid_targets_mean": 9113.8,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 3.4307036247334755,
|
|
"grad_norm": 0.07716585493090808,
|
|
"learning_rate": 1.092283833587829e-05,
|
|
"loss": 1.0553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24749581515789032,
|
|
"step": 806,
|
|
"valid_targets_mean": 16170.2,
|
|
"valid_targets_min": 15541
|
|
},
|
|
{
|
|
"epoch": 3.434968017057569,
|
|
"grad_norm": 0.07731539747264918,
|
|
"learning_rate": 1.086990988871873e-05,
|
|
"loss": 1.1059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29436999559402466,
|
|
"step": 807,
|
|
"valid_targets_mean": 16008.2,
|
|
"valid_targets_min": 12847
|
|
},
|
|
{
|
|
"epoch": 3.4392324093816633,
|
|
"grad_norm": 0.07228985790556945,
|
|
"learning_rate": 1.0817062095299929e-05,
|
|
"loss": 1.0546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2052912414073944,
|
|
"step": 808,
|
|
"valid_targets_mean": 11927.4,
|
|
"valid_targets_min": 8058
|
|
},
|
|
{
|
|
"epoch": 3.443496801705757,
|
|
"grad_norm": 0.08182946090266197,
|
|
"learning_rate": 1.0764295422470755e-05,
|
|
"loss": 1.0183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2847171425819397,
|
|
"step": 809,
|
|
"valid_targets_mean": 16043.9,
|
|
"valid_targets_min": 13036
|
|
},
|
|
{
|
|
"epoch": 3.4477611940298507,
|
|
"grad_norm": 0.08558903578818015,
|
|
"learning_rate": 1.0711610336363477e-05,
|
|
"loss": 1.0504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29839763045310974,
|
|
"step": 810,
|
|
"valid_targets_mean": 16063.0,
|
|
"valid_targets_min": 14112
|
|
},
|
|
{
|
|
"epoch": 3.4520255863539444,
|
|
"grad_norm": 0.07248737925505637,
|
|
"learning_rate": 1.065900730238961e-05,
|
|
"loss": 1.0397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2656533718109131,
|
|
"step": 811,
|
|
"valid_targets_mean": 15409.9,
|
|
"valid_targets_min": 13810
|
|
},
|
|
{
|
|
"epoch": 3.4562899786780386,
|
|
"grad_norm": 0.0766729365981363,
|
|
"learning_rate": 1.0606486785235879e-05,
|
|
"loss": 1.0319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29447564482688904,
|
|
"step": 812,
|
|
"valid_targets_mean": 16093.3,
|
|
"valid_targets_min": 14861
|
|
},
|
|
{
|
|
"epoch": 3.4605543710021323,
|
|
"grad_norm": 0.07624451035068759,
|
|
"learning_rate": 1.0554049248860045e-05,
|
|
"loss": 1.0711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2531646192073822,
|
|
"step": 813,
|
|
"valid_targets_mean": 12892.8,
|
|
"valid_targets_min": 2051
|
|
},
|
|
{
|
|
"epoch": 3.464818763326226,
|
|
"grad_norm": 0.08034726184443344,
|
|
"learning_rate": 1.0501695156486819e-05,
|
|
"loss": 1.0676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2647292912006378,
|
|
"step": 814,
|
|
"valid_targets_mean": 15961.7,
|
|
"valid_targets_min": 10501
|
|
},
|
|
{
|
|
"epoch": 3.4690831556503197,
|
|
"grad_norm": 0.08468441730544024,
|
|
"learning_rate": 1.0449424970603796e-05,
|
|
"loss": 1.0474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2860788106918335,
|
|
"step": 815,
|
|
"valid_targets_mean": 16135.7,
|
|
"valid_targets_min": 15104
|
|
},
|
|
{
|
|
"epoch": 3.473347547974414,
|
|
"grad_norm": 0.07134044950586191,
|
|
"learning_rate": 1.0397239152957356e-05,
|
|
"loss": 1.0294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18420040607452393,
|
|
"step": 816,
|
|
"valid_targets_mean": 9412.2,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 3.4776119402985075,
|
|
"grad_norm": 0.08219444381401039,
|
|
"learning_rate": 1.034513816454858e-05,
|
|
"loss": 1.0478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24757365882396698,
|
|
"step": 817,
|
|
"valid_targets_mean": 16087.3,
|
|
"valid_targets_min": 14932
|
|
},
|
|
{
|
|
"epoch": 3.481876332622601,
|
|
"grad_norm": 0.08220755081339834,
|
|
"learning_rate": 1.0293122465629186e-05,
|
|
"loss": 1.0358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27733665704727173,
|
|
"step": 818,
|
|
"valid_targets_mean": 16139.5,
|
|
"valid_targets_min": 13554
|
|
},
|
|
{
|
|
"epoch": 3.486140724946695,
|
|
"grad_norm": 0.07584775755481762,
|
|
"learning_rate": 1.0241192515697432e-05,
|
|
"loss": 1.0297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2229754775762558,
|
|
"step": 819,
|
|
"valid_targets_mean": 13068.2,
|
|
"valid_targets_min": 7910
|
|
},
|
|
{
|
|
"epoch": 3.490405117270789,
|
|
"grad_norm": 0.07888370560124351,
|
|
"learning_rate": 1.0189348773494135e-05,
|
|
"loss": 1.0405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2827190160751343,
|
|
"step": 820,
|
|
"valid_targets_mean": 16068.0,
|
|
"valid_targets_min": 15153
|
|
},
|
|
{
|
|
"epoch": 3.4946695095948828,
|
|
"grad_norm": 0.08442590063357983,
|
|
"learning_rate": 1.0137591696998514e-05,
|
|
"loss": 1.0571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30107390880584717,
|
|
"step": 821,
|
|
"valid_targets_mean": 16052.6,
|
|
"valid_targets_min": 15100
|
|
},
|
|
{
|
|
"epoch": 3.4989339019189765,
|
|
"grad_norm": 0.07575540661424213,
|
|
"learning_rate": 1.0085921743424225e-05,
|
|
"loss": 1.0486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24146324396133423,
|
|
"step": 822,
|
|
"valid_targets_mean": 14923.4,
|
|
"valid_targets_min": 12844
|
|
},
|
|
{
|
|
"epoch": 3.50319829424307,
|
|
"grad_norm": 0.0767158417258932,
|
|
"learning_rate": 1.0034339369215288e-05,
|
|
"loss": 1.0765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3005656599998474,
|
|
"step": 823,
|
|
"valid_targets_mean": 16008.7,
|
|
"valid_targets_min": 13855
|
|
},
|
|
{
|
|
"epoch": 3.5074626865671643,
|
|
"grad_norm": 0.0821964555281308,
|
|
"learning_rate": 9.982845030042068e-06,
|
|
"loss": 1.083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3367677927017212,
|
|
"step": 824,
|
|
"valid_targets_mean": 16019.8,
|
|
"valid_targets_min": 14879
|
|
},
|
|
{
|
|
"epoch": 3.511727078891258,
|
|
"grad_norm": 0.0761238152722416,
|
|
"learning_rate": 9.931439180797237e-06,
|
|
"loss": 1.0346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.240778386592865,
|
|
"step": 825,
|
|
"valid_targets_mean": 15292.6,
|
|
"valid_targets_min": 13091
|
|
},
|
|
{
|
|
"epoch": 3.5159914712153517,
|
|
"grad_norm": 0.07822139004943611,
|
|
"learning_rate": 9.880122275591752e-06,
|
|
"loss": 1.0723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3065621256828308,
|
|
"step": 826,
|
|
"valid_targets_mean": 16198.4,
|
|
"valid_targets_min": 15653
|
|
},
|
|
{
|
|
"epoch": 3.520255863539446,
|
|
"grad_norm": 0.07409821239561679,
|
|
"learning_rate": 9.828894767750865e-06,
|
|
"loss": 1.0818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21180576086044312,
|
|
"step": 827,
|
|
"valid_targets_mean": 10644.2,
|
|
"valid_targets_min": 828
|
|
},
|
|
{
|
|
"epoch": 3.5245202558635396,
|
|
"grad_norm": 0.07069238759255479,
|
|
"learning_rate": 9.777757109810102e-06,
|
|
"loss": 1.0488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2508220672607422,
|
|
"step": 828,
|
|
"valid_targets_mean": 16072.6,
|
|
"valid_targets_min": 15115
|
|
},
|
|
{
|
|
"epoch": 3.5287846481876333,
|
|
"grad_norm": 0.08198199214943709,
|
|
"learning_rate": 9.726709753511275e-06,
|
|
"loss": 1.086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29195117950439453,
|
|
"step": 829,
|
|
"valid_targets_mean": 16131.2,
|
|
"valid_targets_min": 15224
|
|
},
|
|
{
|
|
"epoch": 3.533049040511727,
|
|
"grad_norm": 0.08207405615977613,
|
|
"learning_rate": 9.675753149798474e-06,
|
|
"loss": 1.0533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18177372217178345,
|
|
"step": 830,
|
|
"valid_targets_mean": 10174.1,
|
|
"valid_targets_min": 1068
|
|
},
|
|
{
|
|
"epoch": 3.5373134328358207,
|
|
"grad_norm": 0.0776036210156942,
|
|
"learning_rate": 9.624887748814118e-06,
|
|
"loss": 1.0605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2637912929058075,
|
|
"step": 831,
|
|
"valid_targets_mean": 16138.4,
|
|
"valid_targets_min": 14090
|
|
},
|
|
{
|
|
"epoch": 3.541577825159915,
|
|
"grad_norm": 0.07878588810102302,
|
|
"learning_rate": 9.574113999894909e-06,
|
|
"loss": 1.0579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2910246253013611,
|
|
"step": 832,
|
|
"valid_targets_mean": 16171.9,
|
|
"valid_targets_min": 14772
|
|
},
|
|
{
|
|
"epoch": 3.5458422174840085,
|
|
"grad_norm": 0.08147072030448552,
|
|
"learning_rate": 9.523432351567979e-06,
|
|
"loss": 1.029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22465993463993073,
|
|
"step": 833,
|
|
"valid_targets_mean": 13193.6,
|
|
"valid_targets_min": 9488
|
|
},
|
|
{
|
|
"epoch": 3.550106609808102,
|
|
"grad_norm": 0.07654156288535757,
|
|
"learning_rate": 9.472843251546792e-06,
|
|
"loss": 1.0391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28869175910949707,
|
|
"step": 834,
|
|
"valid_targets_mean": 16128.7,
|
|
"valid_targets_min": 14586
|
|
},
|
|
{
|
|
"epoch": 3.5543710021321964,
|
|
"grad_norm": 0.0859727229224103,
|
|
"learning_rate": 9.422347146727294e-06,
|
|
"loss": 1.0692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2898827791213989,
|
|
"step": 835,
|
|
"valid_targets_mean": 16089.6,
|
|
"valid_targets_min": 14619
|
|
},
|
|
{
|
|
"epoch": 3.55863539445629,
|
|
"grad_norm": 0.07837509424347187,
|
|
"learning_rate": 9.371944483183912e-06,
|
|
"loss": 1.0437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2699970602989197,
|
|
"step": 836,
|
|
"valid_targets_mean": 14587.1,
|
|
"valid_targets_min": 12278
|
|
},
|
|
{
|
|
"epoch": 3.5628997867803838,
|
|
"grad_norm": 0.08352810163539401,
|
|
"learning_rate": 9.321635706165635e-06,
|
|
"loss": 1.072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3019229471683502,
|
|
"step": 837,
|
|
"valid_targets_mean": 16071.5,
|
|
"valid_targets_min": 14705
|
|
},
|
|
{
|
|
"epoch": 3.5671641791044775,
|
|
"grad_norm": 0.07736750975243195,
|
|
"learning_rate": 9.271421260092075e-06,
|
|
"loss": 1.0902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27271705865859985,
|
|
"step": 838,
|
|
"valid_targets_mean": 12901.5,
|
|
"valid_targets_min": 1374
|
|
},
|
|
{
|
|
"epoch": 3.571428571428571,
|
|
"grad_norm": 0.07536562046710664,
|
|
"learning_rate": 9.221301588549519e-06,
|
|
"loss": 1.0288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26623690128326416,
|
|
"step": 839,
|
|
"valid_targets_mean": 16042.8,
|
|
"valid_targets_min": 14411
|
|
},
|
|
{
|
|
"epoch": 3.5756929637526653,
|
|
"grad_norm": 0.07435679487328575,
|
|
"learning_rate": 9.171277134287057e-06,
|
|
"loss": 1.0467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27162933349609375,
|
|
"step": 840,
|
|
"valid_targets_mean": 16194.1,
|
|
"valid_targets_min": 15459
|
|
},
|
|
{
|
|
"epoch": 3.579957356076759,
|
|
"grad_norm": 0.0759710537303972,
|
|
"learning_rate": 9.121348339212634e-06,
|
|
"loss": 1.0918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20792828500270844,
|
|
"step": 841,
|
|
"valid_targets_mean": 11685.5,
|
|
"valid_targets_min": 3080
|
|
},
|
|
{
|
|
"epoch": 3.5842217484008527,
|
|
"grad_norm": 0.07317358302371058,
|
|
"learning_rate": 9.07151564438916e-06,
|
|
"loss": 1.049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25784173607826233,
|
|
"step": 842,
|
|
"valid_targets_mean": 16166.2,
|
|
"valid_targets_min": 15443
|
|
},
|
|
{
|
|
"epoch": 3.588486140724947,
|
|
"grad_norm": 0.07499139817566075,
|
|
"learning_rate": 9.021779490030611e-06,
|
|
"loss": 1.0243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28628432750701904,
|
|
"step": 843,
|
|
"valid_targets_mean": 16173.1,
|
|
"valid_targets_min": 15455
|
|
},
|
|
{
|
|
"epoch": 3.5927505330490406,
|
|
"grad_norm": 0.07129793619133343,
|
|
"learning_rate": 8.972140315498119e-06,
|
|
"loss": 1.0233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18102656304836273,
|
|
"step": 844,
|
|
"valid_targets_mean": 10328.5,
|
|
"valid_targets_min": 3948
|
|
},
|
|
{
|
|
"epoch": 3.5970149253731343,
|
|
"grad_norm": 0.06886745547214684,
|
|
"learning_rate": 8.922598559296154e-06,
|
|
"loss": 1.0143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2747657895088196,
|
|
"step": 845,
|
|
"valid_targets_mean": 16154.4,
|
|
"valid_targets_min": 15461
|
|
},
|
|
{
|
|
"epoch": 3.6012793176972284,
|
|
"grad_norm": 0.07272280814811578,
|
|
"learning_rate": 8.873154659068582e-06,
|
|
"loss": 1.0178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28416118025779724,
|
|
"step": 846,
|
|
"valid_targets_mean": 16084.8,
|
|
"valid_targets_min": 14469
|
|
},
|
|
{
|
|
"epoch": 3.605543710021322,
|
|
"grad_norm": 0.07164342094147862,
|
|
"learning_rate": 8.823809051594816e-06,
|
|
"loss": 1.0568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24486662447452545,
|
|
"step": 847,
|
|
"valid_targets_mean": 14392.8,
|
|
"valid_targets_min": 12578
|
|
},
|
|
{
|
|
"epoch": 3.609808102345416,
|
|
"grad_norm": 0.07180361510798312,
|
|
"learning_rate": 8.774562172785988e-06,
|
|
"loss": 1.0423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2928151488304138,
|
|
"step": 848,
|
|
"valid_targets_mean": 16149.0,
|
|
"valid_targets_min": 15540
|
|
},
|
|
{
|
|
"epoch": 3.6140724946695095,
|
|
"grad_norm": 0.0769295849974791,
|
|
"learning_rate": 8.725414457681063e-06,
|
|
"loss": 1.0962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3170536756515503,
|
|
"step": 849,
|
|
"valid_targets_mean": 16089.7,
|
|
"valid_targets_min": 14984
|
|
},
|
|
{
|
|
"epoch": 3.6183368869936032,
|
|
"grad_norm": 0.06845583746421251,
|
|
"learning_rate": 8.676366340443017e-06,
|
|
"loss": 1.0367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24786901473999023,
|
|
"step": 850,
|
|
"valid_targets_mean": 16055.3,
|
|
"valid_targets_min": 15196
|
|
},
|
|
{
|
|
"epoch": 3.6226012793176974,
|
|
"grad_norm": 0.07634890271169675,
|
|
"learning_rate": 8.627418254355e-06,
|
|
"loss": 1.0314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2956114411354065,
|
|
"step": 851,
|
|
"valid_targets_mean": 16038.9,
|
|
"valid_targets_min": 14619
|
|
},
|
|
{
|
|
"epoch": 3.626865671641791,
|
|
"grad_norm": 0.07293290633400086,
|
|
"learning_rate": 8.578570631816474e-06,
|
|
"loss": 1.0036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2018302083015442,
|
|
"step": 852,
|
|
"valid_targets_mean": 10947.8,
|
|
"valid_targets_min": 1697
|
|
},
|
|
{
|
|
"epoch": 3.631130063965885,
|
|
"grad_norm": 0.06950438994638478,
|
|
"learning_rate": 8.529823904339472e-06,
|
|
"loss": 1.0249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.264492928981781,
|
|
"step": 853,
|
|
"valid_targets_mean": 16101.7,
|
|
"valid_targets_min": 14741
|
|
},
|
|
{
|
|
"epoch": 3.635394456289979,
|
|
"grad_norm": 0.07818809753586015,
|
|
"learning_rate": 8.481178502544684e-06,
|
|
"loss": 1.0051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2863122224807739,
|
|
"step": 854,
|
|
"valid_targets_mean": 16039.0,
|
|
"valid_targets_min": 13183
|
|
},
|
|
{
|
|
"epoch": 3.6396588486140726,
|
|
"grad_norm": 0.08381029133291594,
|
|
"learning_rate": 8.43263485615774e-06,
|
|
"loss": 1.0567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17699049413204193,
|
|
"step": 855,
|
|
"valid_targets_mean": 9115.7,
|
|
"valid_targets_min": 1286
|
|
},
|
|
{
|
|
"epoch": 3.6439232409381663,
|
|
"grad_norm": 0.06806375868737402,
|
|
"learning_rate": 8.384193394005372e-06,
|
|
"loss": 1.0548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26565074920654297,
|
|
"step": 856,
|
|
"valid_targets_mean": 16156.7,
|
|
"valid_targets_min": 15653
|
|
},
|
|
{
|
|
"epoch": 3.64818763326226,
|
|
"grad_norm": 0.07268846940583555,
|
|
"learning_rate": 8.33585454401161e-06,
|
|
"loss": 1.0896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3231162428855896,
|
|
"step": 857,
|
|
"valid_targets_mean": 16049.3,
|
|
"valid_targets_min": 15364
|
|
},
|
|
{
|
|
"epoch": 3.6524520255863537,
|
|
"grad_norm": 0.07257811588516808,
|
|
"learning_rate": 8.287618733194073e-06,
|
|
"loss": 1.0466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2326294183731079,
|
|
"step": 858,
|
|
"valid_targets_mean": 13136.5,
|
|
"valid_targets_min": 10041
|
|
},
|
|
{
|
|
"epoch": 3.656716417910448,
|
|
"grad_norm": 0.07342172573533538,
|
|
"learning_rate": 8.239486387660096e-06,
|
|
"loss": 1.0105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2916714549064636,
|
|
"step": 859,
|
|
"valid_targets_mean": 16039.2,
|
|
"valid_targets_min": 14527
|
|
},
|
|
{
|
|
"epoch": 3.6609808102345416,
|
|
"grad_norm": 0.07295656916740743,
|
|
"learning_rate": 8.191457932603052e-06,
|
|
"loss": 1.0522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3118794560432434,
|
|
"step": 860,
|
|
"valid_targets_mean": 16099.7,
|
|
"valid_targets_min": 14712
|
|
},
|
|
{
|
|
"epoch": 3.6652452025586353,
|
|
"grad_norm": 0.07592719060691264,
|
|
"learning_rate": 8.143533792298545e-06,
|
|
"loss": 1.082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2843993902206421,
|
|
"step": 861,
|
|
"valid_targets_mean": 15843.9,
|
|
"valid_targets_min": 14501
|
|
},
|
|
{
|
|
"epoch": 3.6695095948827294,
|
|
"grad_norm": 0.07908364722295175,
|
|
"learning_rate": 8.095714390100698e-06,
|
|
"loss": 1.0428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26779454946517944,
|
|
"step": 862,
|
|
"valid_targets_mean": 16143.4,
|
|
"valid_targets_min": 13761
|
|
},
|
|
{
|
|
"epoch": 3.673773987206823,
|
|
"grad_norm": 0.07655437374174758,
|
|
"learning_rate": 8.048000148438375e-06,
|
|
"loss": 1.0776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2678048610687256,
|
|
"step": 863,
|
|
"valid_targets_mean": 13069.3,
|
|
"valid_targets_min": 1183
|
|
},
|
|
{
|
|
"epoch": 3.678038379530917,
|
|
"grad_norm": 0.06686525002552621,
|
|
"learning_rate": 8.000391488811485e-06,
|
|
"loss": 1.0327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27218443155288696,
|
|
"step": 864,
|
|
"valid_targets_mean": 16093.3,
|
|
"valid_targets_min": 14887
|
|
},
|
|
{
|
|
"epoch": 3.6823027718550105,
|
|
"grad_norm": 0.07312690467761619,
|
|
"learning_rate": 7.952888831787215e-06,
|
|
"loss": 1.078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30454814434051514,
|
|
"step": 865,
|
|
"valid_targets_mean": 16145.0,
|
|
"valid_targets_min": 14450
|
|
},
|
|
{
|
|
"epoch": 3.6865671641791042,
|
|
"grad_norm": 0.07710312277211263,
|
|
"learning_rate": 7.905492596996391e-06,
|
|
"loss": 1.0129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18659716844558716,
|
|
"step": 866,
|
|
"valid_targets_mean": 9675.5,
|
|
"valid_targets_min": 973
|
|
},
|
|
{
|
|
"epoch": 3.6908315565031984,
|
|
"grad_norm": 0.07224152024264159,
|
|
"learning_rate": 7.858203203129668e-06,
|
|
"loss": 1.0675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2758779525756836,
|
|
"step": 867,
|
|
"valid_targets_mean": 16188.8,
|
|
"valid_targets_min": 15352
|
|
},
|
|
{
|
|
"epoch": 3.695095948827292,
|
|
"grad_norm": 0.07366597163732407,
|
|
"learning_rate": 7.811021067933919e-06,
|
|
"loss": 1.061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26859456300735474,
|
|
"step": 868,
|
|
"valid_targets_mean": 16114.7,
|
|
"valid_targets_min": 14788
|
|
},
|
|
{
|
|
"epoch": 3.699360341151386,
|
|
"grad_norm": 0.07118458458898588,
|
|
"learning_rate": 7.763946608208504e-06,
|
|
"loss": 1.0314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22084073722362518,
|
|
"step": 869,
|
|
"valid_targets_mean": 11904.3,
|
|
"valid_targets_min": 8210
|
|
},
|
|
{
|
|
"epoch": 3.70362473347548,
|
|
"grad_norm": 0.07504417153600987,
|
|
"learning_rate": 7.716980239801588e-06,
|
|
"loss": 1.0196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27497339248657227,
|
|
"step": 870,
|
|
"valid_targets_mean": 16101.3,
|
|
"valid_targets_min": 14802
|
|
},
|
|
{
|
|
"epoch": 3.7078891257995736,
|
|
"grad_norm": 0.08363870747766138,
|
|
"learning_rate": 7.670122377606495e-06,
|
|
"loss": 1.1072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30358707904815674,
|
|
"step": 871,
|
|
"valid_targets_mean": 16106.4,
|
|
"valid_targets_min": 15185
|
|
},
|
|
{
|
|
"epoch": 3.7121535181236673,
|
|
"grad_norm": 0.07342120337124591,
|
|
"learning_rate": 7.623373435557988e-06,
|
|
"loss": 1.0491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24745066463947296,
|
|
"step": 872,
|
|
"valid_targets_mean": 14868.4,
|
|
"valid_targets_min": 12253
|
|
},
|
|
{
|
|
"epoch": 3.716417910447761,
|
|
"grad_norm": 0.07460204700205582,
|
|
"learning_rate": 7.5767338266286775e-06,
|
|
"loss": 1.0378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30622681975364685,
|
|
"step": 873,
|
|
"valid_targets_mean": 16102.9,
|
|
"valid_targets_min": 15350
|
|
},
|
|
{
|
|
"epoch": 3.7206823027718547,
|
|
"grad_norm": 0.07381109711488222,
|
|
"learning_rate": 7.530203962825331e-06,
|
|
"loss": 1.0393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29721248149871826,
|
|
"step": 874,
|
|
"valid_targets_mean": 16140.7,
|
|
"valid_targets_min": 15381
|
|
},
|
|
{
|
|
"epoch": 3.724946695095949,
|
|
"grad_norm": 0.07440372199724594,
|
|
"learning_rate": 7.483784255185249e-06,
|
|
"loss": 1.0592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2485685646533966,
|
|
"step": 875,
|
|
"valid_targets_mean": 15646.2,
|
|
"valid_targets_min": 13651
|
|
},
|
|
{
|
|
"epoch": 3.7292110874200426,
|
|
"grad_norm": 0.0807719092141346,
|
|
"learning_rate": 7.437475113772632e-06,
|
|
"loss": 1.048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29621434211730957,
|
|
"step": 876,
|
|
"valid_targets_mean": 16115.2,
|
|
"valid_targets_min": 15125
|
|
},
|
|
{
|
|
"epoch": 3.7334754797441363,
|
|
"grad_norm": 0.09062742289766168,
|
|
"learning_rate": 7.391276947674932e-06,
|
|
"loss": 1.0273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2134869396686554,
|
|
"step": 877,
|
|
"valid_targets_mean": 11268.6,
|
|
"valid_targets_min": 1540
|
|
},
|
|
{
|
|
"epoch": 3.7377398720682304,
|
|
"grad_norm": 0.07405098552303613,
|
|
"learning_rate": 7.345190164999307e-06,
|
|
"loss": 1.0195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24944131076335907,
|
|
"step": 878,
|
|
"valid_targets_mean": 16106.9,
|
|
"valid_targets_min": 14652
|
|
},
|
|
{
|
|
"epoch": 3.742004264392324,
|
|
"grad_norm": 0.07699297216224593,
|
|
"learning_rate": 7.299215172868947e-06,
|
|
"loss": 1.0342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2969021201133728,
|
|
"step": 879,
|
|
"valid_targets_mean": 16121.0,
|
|
"valid_targets_min": 15283
|
|
},
|
|
{
|
|
"epoch": 3.746268656716418,
|
|
"grad_norm": 0.07548970963595578,
|
|
"learning_rate": 7.2533523774194865e-06,
|
|
"loss": 1.0301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1697109043598175,
|
|
"step": 880,
|
|
"valid_targets_mean": 9391.1,
|
|
"valid_targets_min": 2048
|
|
},
|
|
{
|
|
"epoch": 3.750533049040512,
|
|
"grad_norm": 0.07010034605277483,
|
|
"learning_rate": 7.2076021837954616e-06,
|
|
"loss": 0.9963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25706565380096436,
|
|
"step": 881,
|
|
"valid_targets_mean": 16217.9,
|
|
"valid_targets_min": 15701
|
|
},
|
|
{
|
|
"epoch": 3.7547974413646057,
|
|
"grad_norm": 0.08205597196335078,
|
|
"learning_rate": 7.161964996146689e-06,
|
|
"loss": 1.0073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27641984820365906,
|
|
"step": 882,
|
|
"valid_targets_mean": 16195.4,
|
|
"valid_targets_min": 15519
|
|
},
|
|
{
|
|
"epoch": 3.7590618336886994,
|
|
"grad_norm": 0.0762904774356316,
|
|
"learning_rate": 7.116441217624708e-06,
|
|
"loss": 1.0521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2224830985069275,
|
|
"step": 883,
|
|
"valid_targets_mean": 12615.6,
|
|
"valid_targets_min": 8044
|
|
},
|
|
{
|
|
"epoch": 3.763326226012793,
|
|
"grad_norm": 0.07437738673341121,
|
|
"learning_rate": 7.071031250379228e-06,
|
|
"loss": 1.0091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2763974070549011,
|
|
"step": 884,
|
|
"valid_targets_mean": 16184.5,
|
|
"valid_targets_min": 15385
|
|
},
|
|
{
|
|
"epoch": 3.767590618336887,
|
|
"grad_norm": 0.08263239223754486,
|
|
"learning_rate": 7.0257354955545466e-06,
|
|
"loss": 1.0574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2995729446411133,
|
|
"step": 885,
|
|
"valid_targets_mean": 16147.8,
|
|
"valid_targets_min": 14885
|
|
},
|
|
{
|
|
"epoch": 3.771855010660981,
|
|
"grad_norm": 0.08187468622832252,
|
|
"learning_rate": 6.980554353286066e-06,
|
|
"loss": 1.0158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2629240155220032,
|
|
"step": 886,
|
|
"valid_targets_mean": 15197.4,
|
|
"valid_targets_min": 13273
|
|
},
|
|
{
|
|
"epoch": 3.7761194029850746,
|
|
"grad_norm": 0.07042328561058592,
|
|
"learning_rate": 6.935488222696676e-06,
|
|
"loss": 1.0566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29480981826782227,
|
|
"step": 887,
|
|
"valid_targets_mean": 16141.4,
|
|
"valid_targets_min": 15061
|
|
},
|
|
{
|
|
"epoch": 3.7803837953091683,
|
|
"grad_norm": 0.07090884112846695,
|
|
"learning_rate": 6.890537501893302e-06,
|
|
"loss": 1.047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23563900589942932,
|
|
"step": 888,
|
|
"valid_targets_mean": 12636.3,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 3.7846481876332625,
|
|
"grad_norm": 0.07318482906420633,
|
|
"learning_rate": 6.845702587963352e-06,
|
|
"loss": 1.0624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23754438757896423,
|
|
"step": 889,
|
|
"valid_targets_mean": 16161.3,
|
|
"valid_targets_min": 15447
|
|
},
|
|
{
|
|
"epoch": 3.788912579957356,
|
|
"grad_norm": 0.07704716168073238,
|
|
"learning_rate": 6.800983876971192e-06,
|
|
"loss": 1.01,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2919682562351227,
|
|
"step": 890,
|
|
"valid_targets_mean": 15981.9,
|
|
"valid_targets_min": 13253
|
|
},
|
|
{
|
|
"epoch": 3.79317697228145,
|
|
"grad_norm": 0.08241043348553403,
|
|
"learning_rate": 6.756381763954718e-06,
|
|
"loss": 1.0539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1859930455684662,
|
|
"step": 891,
|
|
"valid_targets_mean": 10454.7,
|
|
"valid_targets_min": 1715
|
|
},
|
|
{
|
|
"epoch": 3.7974413646055436,
|
|
"grad_norm": 0.07601752565040058,
|
|
"learning_rate": 6.7118966429217645e-06,
|
|
"loss": 1.0472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26191335916519165,
|
|
"step": 892,
|
|
"valid_targets_mean": 16112.2,
|
|
"valid_targets_min": 15358
|
|
},
|
|
{
|
|
"epoch": 3.8017057569296373,
|
|
"grad_norm": 0.082384383673037,
|
|
"learning_rate": 6.667528906846714e-06,
|
|
"loss": 1.0514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3109520673751831,
|
|
"step": 893,
|
|
"valid_targets_mean": 16144.4,
|
|
"valid_targets_min": 15125
|
|
},
|
|
{
|
|
"epoch": 3.8059701492537314,
|
|
"grad_norm": 0.07953016017118761,
|
|
"learning_rate": 6.623278947666974e-06,
|
|
"loss": 1.0801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21396487951278687,
|
|
"step": 894,
|
|
"valid_targets_mean": 11676.1,
|
|
"valid_targets_min": 7061
|
|
},
|
|
{
|
|
"epoch": 3.810234541577825,
|
|
"grad_norm": 0.0751868140397465,
|
|
"learning_rate": 6.579147156279538e-06,
|
|
"loss": 1.0432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2563432455062866,
|
|
"step": 895,
|
|
"valid_targets_mean": 16165.0,
|
|
"valid_targets_min": 14102
|
|
},
|
|
{
|
|
"epoch": 3.814498933901919,
|
|
"grad_norm": 0.07382117897142344,
|
|
"learning_rate": 6.535133922537513e-06,
|
|
"loss": 1.0487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27644339203834534,
|
|
"step": 896,
|
|
"valid_targets_mean": 16166.2,
|
|
"valid_targets_min": 15383
|
|
},
|
|
{
|
|
"epoch": 3.818763326226013,
|
|
"grad_norm": 0.0807252059695892,
|
|
"learning_rate": 6.491239635246709e-06,
|
|
"loss": 1.0449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23054011166095734,
|
|
"step": 897,
|
|
"valid_targets_mean": 14405.0,
|
|
"valid_targets_min": 11593
|
|
},
|
|
{
|
|
"epoch": 3.8230277185501067,
|
|
"grad_norm": 0.07159107994397992,
|
|
"learning_rate": 6.447464682162143e-06,
|
|
"loss": 1.0489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2962377667427063,
|
|
"step": 898,
|
|
"valid_targets_mean": 16133.2,
|
|
"valid_targets_min": 14748
|
|
},
|
|
{
|
|
"epoch": 3.8272921108742004,
|
|
"grad_norm": 0.0766944425757295,
|
|
"learning_rate": 6.403809449984704e-06,
|
|
"loss": 1.0442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28581321239471436,
|
|
"step": 899,
|
|
"valid_targets_mean": 16171.7,
|
|
"valid_targets_min": 15585
|
|
},
|
|
{
|
|
"epoch": 3.831556503198294,
|
|
"grad_norm": 0.06359344154612387,
|
|
"learning_rate": 6.3602743243576405e-06,
|
|
"loss": 1.0717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28169578313827515,
|
|
"step": 900,
|
|
"valid_targets_mean": 15786.9,
|
|
"valid_targets_min": 14346
|
|
},
|
|
{
|
|
"epoch": 3.835820895522388,
|
|
"grad_norm": 0.07619583507456905,
|
|
"learning_rate": 6.316859689863222e-06,
|
|
"loss": 1.0378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27937543392181396,
|
|
"step": 901,
|
|
"valid_targets_mean": 16076.4,
|
|
"valid_targets_min": 14140
|
|
},
|
|
{
|
|
"epoch": 3.840085287846482,
|
|
"grad_norm": 0.07851109754820623,
|
|
"learning_rate": 6.273565930019316e-06,
|
|
"loss": 1.0426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19868823885917664,
|
|
"step": 902,
|
|
"valid_targets_mean": 9929.7,
|
|
"valid_targets_min": 1761
|
|
},
|
|
{
|
|
"epoch": 3.8443496801705757,
|
|
"grad_norm": 0.06988564704508825,
|
|
"learning_rate": 6.230393427276e-06,
|
|
"loss": 1.0141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2478978931903839,
|
|
"step": 903,
|
|
"valid_targets_mean": 16186.2,
|
|
"valid_targets_min": 15264
|
|
},
|
|
{
|
|
"epoch": 3.8486140724946694,
|
|
"grad_norm": 0.07213375181585238,
|
|
"learning_rate": 6.187342563012198e-06,
|
|
"loss": 1.0434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2959016263484955,
|
|
"step": 904,
|
|
"valid_targets_mean": 16155.6,
|
|
"valid_targets_min": 15153
|
|
},
|
|
{
|
|
"epoch": 3.8528784648187635,
|
|
"grad_norm": 0.07745833155049299,
|
|
"learning_rate": 6.144413717532269e-06,
|
|
"loss": 0.9986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15152940154075623,
|
|
"step": 905,
|
|
"valid_targets_mean": 8307.7,
|
|
"valid_targets_min": 1877
|
|
},
|
|
{
|
|
"epoch": 3.857142857142857,
|
|
"grad_norm": 0.07936342753804639,
|
|
"learning_rate": 6.1016072700627106e-06,
|
|
"loss": 1.0826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2805943489074707,
|
|
"step": 906,
|
|
"valid_targets_mean": 16094.2,
|
|
"valid_targets_min": 15190
|
|
},
|
|
{
|
|
"epoch": 3.861407249466951,
|
|
"grad_norm": 0.07115683459000141,
|
|
"learning_rate": 6.058923598748756e-06,
|
|
"loss": 1.0248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27546238899230957,
|
|
"step": 907,
|
|
"valid_targets_mean": 16116.7,
|
|
"valid_targets_min": 15112
|
|
},
|
|
{
|
|
"epoch": 3.8656716417910446,
|
|
"grad_norm": 0.06870793361232751,
|
|
"learning_rate": 6.016363080651066e-06,
|
|
"loss": 1.0302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2435663640499115,
|
|
"step": 908,
|
|
"valid_targets_mean": 13569.0,
|
|
"valid_targets_min": 10284
|
|
},
|
|
{
|
|
"epoch": 3.8699360341151388,
|
|
"grad_norm": 0.07512059059899659,
|
|
"learning_rate": 5.973926091742386e-06,
|
|
"loss": 1.0525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2829138934612274,
|
|
"step": 909,
|
|
"valid_targets_mean": 15807.6,
|
|
"valid_targets_min": 10474
|
|
},
|
|
{
|
|
"epoch": 3.8742004264392325,
|
|
"grad_norm": 0.0746090910791797,
|
|
"learning_rate": 5.931613006904196e-06,
|
|
"loss": 1.0084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27891212701797485,
|
|
"step": 910,
|
|
"valid_targets_mean": 16172.7,
|
|
"valid_targets_min": 15426
|
|
},
|
|
{
|
|
"epoch": 3.878464818763326,
|
|
"grad_norm": 0.07365699218842088,
|
|
"learning_rate": 5.889424199923473e-06,
|
|
"loss": 1.0254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24674835801124573,
|
|
"step": 911,
|
|
"valid_targets_mean": 15981.2,
|
|
"valid_targets_min": 14961
|
|
},
|
|
{
|
|
"epoch": 3.88272921108742,
|
|
"grad_norm": 0.07403583695107757,
|
|
"learning_rate": 5.847360043489318e-06,
|
|
"loss": 1.0771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26985955238342285,
|
|
"step": 912,
|
|
"valid_targets_mean": 16128.2,
|
|
"valid_targets_min": 15098
|
|
},
|
|
{
|
|
"epoch": 3.886993603411514,
|
|
"grad_norm": 0.07528796670093024,
|
|
"learning_rate": 5.805420909189683e-06,
|
|
"loss": 1.0206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2377902865409851,
|
|
"step": 913,
|
|
"valid_targets_mean": 13036.5,
|
|
"valid_targets_min": 1499
|
|
},
|
|
{
|
|
"epoch": 3.8912579957356077,
|
|
"grad_norm": 0.07475040924999732,
|
|
"learning_rate": 5.7636071675081076e-06,
|
|
"loss": 1.0355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2565905451774597,
|
|
"step": 914,
|
|
"valid_targets_mean": 16050.0,
|
|
"valid_targets_min": 15395
|
|
},
|
|
{
|
|
"epoch": 3.8955223880597014,
|
|
"grad_norm": 0.0781767228384843,
|
|
"learning_rate": 5.721919187820431e-06,
|
|
"loss": 1.0688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30468934774398804,
|
|
"step": 915,
|
|
"valid_targets_mean": 16155.1,
|
|
"valid_targets_min": 14576
|
|
},
|
|
{
|
|
"epoch": 3.8997867803837956,
|
|
"grad_norm": 0.06961949211855231,
|
|
"learning_rate": 5.6803573383915265e-06,
|
|
"loss": 1.0222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17528824508190155,
|
|
"step": 916,
|
|
"valid_targets_mean": 9254.7,
|
|
"valid_targets_min": 2281
|
|
},
|
|
{
|
|
"epoch": 3.9040511727078893,
|
|
"grad_norm": 0.0801950737804898,
|
|
"learning_rate": 5.638921986372064e-06,
|
|
"loss": 1.072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2781122326850891,
|
|
"step": 917,
|
|
"valid_targets_mean": 16079.2,
|
|
"valid_targets_min": 15214
|
|
},
|
|
{
|
|
"epoch": 3.908315565031983,
|
|
"grad_norm": 0.07985162052536784,
|
|
"learning_rate": 5.5976134977952315e-06,
|
|
"loss": 1.038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2955649793148041,
|
|
"step": 918,
|
|
"valid_targets_mean": 16091.0,
|
|
"valid_targets_min": 15036
|
|
},
|
|
{
|
|
"epoch": 3.9125799573560767,
|
|
"grad_norm": 0.07199015330074526,
|
|
"learning_rate": 5.556432237573564e-06,
|
|
"loss": 1.0503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22211569547653198,
|
|
"step": 919,
|
|
"valid_targets_mean": 12468.5,
|
|
"valid_targets_min": 7646
|
|
},
|
|
{
|
|
"epoch": 3.9168443496801704,
|
|
"grad_norm": 0.07413306910192108,
|
|
"learning_rate": 5.5153785694956416e-06,
|
|
"loss": 1.0134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2512298822402954,
|
|
"step": 920,
|
|
"valid_targets_mean": 16178.9,
|
|
"valid_targets_min": 15265
|
|
},
|
|
{
|
|
"epoch": 3.9211087420042645,
|
|
"grad_norm": 0.08103794799628959,
|
|
"learning_rate": 5.474452856222942e-06,
|
|
"loss": 1.0702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3269347548484802,
|
|
"step": 921,
|
|
"valid_targets_mean": 15798.6,
|
|
"valid_targets_min": 4956
|
|
},
|
|
{
|
|
"epoch": 3.925373134328358,
|
|
"grad_norm": 0.07184157470878752,
|
|
"learning_rate": 5.433655459286611e-06,
|
|
"loss": 1.0427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23847901821136475,
|
|
"step": 922,
|
|
"valid_targets_mean": 14551.4,
|
|
"valid_targets_min": 10999
|
|
},
|
|
{
|
|
"epoch": 3.929637526652452,
|
|
"grad_norm": 0.07539568461508808,
|
|
"learning_rate": 5.392986739084238e-06,
|
|
"loss": 1.0488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27987009286880493,
|
|
"step": 923,
|
|
"valid_targets_mean": 16152.1,
|
|
"valid_targets_min": 15528
|
|
},
|
|
{
|
|
"epoch": 3.933901918976546,
|
|
"grad_norm": 0.07049417270833788,
|
|
"learning_rate": 5.352447054876755e-06,
|
|
"loss": 1.059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.288784384727478,
|
|
"step": 924,
|
|
"valid_targets_mean": 16194.7,
|
|
"valid_targets_min": 15150
|
|
},
|
|
{
|
|
"epoch": 3.9381663113006398,
|
|
"grad_norm": 0.06428794071718126,
|
|
"learning_rate": 5.31203676478516e-06,
|
|
"loss": 1.0302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25225114822387695,
|
|
"step": 925,
|
|
"valid_targets_mean": 15663.3,
|
|
"valid_targets_min": 13454
|
|
},
|
|
{
|
|
"epoch": 3.9424307036247335,
|
|
"grad_norm": 0.06814352126849005,
|
|
"learning_rate": 5.271756225787434e-06,
|
|
"loss": 1.0538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31240683794021606,
|
|
"step": 926,
|
|
"valid_targets_mean": 16151.5,
|
|
"valid_targets_min": 15257
|
|
},
|
|
{
|
|
"epoch": 3.946695095948827,
|
|
"grad_norm": 0.07210863099415549,
|
|
"learning_rate": 5.231605793715348e-06,
|
|
"loss": 1.0722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2166639268398285,
|
|
"step": 927,
|
|
"valid_targets_mean": 10531.7,
|
|
"valid_targets_min": 1124
|
|
},
|
|
{
|
|
"epoch": 3.950959488272921,
|
|
"grad_norm": 0.06917339690868352,
|
|
"learning_rate": 5.191585823251335e-06,
|
|
"loss": 1.0066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25547680258750916,
|
|
"step": 928,
|
|
"valid_targets_mean": 16075.4,
|
|
"valid_targets_min": 15145
|
|
},
|
|
{
|
|
"epoch": 3.955223880597015,
|
|
"grad_norm": 0.07600753611412961,
|
|
"learning_rate": 5.151696667925348e-06,
|
|
"loss": 1.074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3284090459346771,
|
|
"step": 929,
|
|
"valid_targets_mean": 16053.1,
|
|
"valid_targets_min": 14567
|
|
},
|
|
{
|
|
"epoch": 3.9594882729211087,
|
|
"grad_norm": 0.0704456977588901,
|
|
"learning_rate": 5.111938680111732e-06,
|
|
"loss": 1.0151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17298215627670288,
|
|
"step": 930,
|
|
"valid_targets_mean": 8765.9,
|
|
"valid_targets_min": 2646
|
|
},
|
|
{
|
|
"epoch": 3.9637526652452024,
|
|
"grad_norm": 0.0660769792009937,
|
|
"learning_rate": 5.072312211026125e-06,
|
|
"loss": 0.9976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2634711265563965,
|
|
"step": 931,
|
|
"valid_targets_mean": 16128.8,
|
|
"valid_targets_min": 15297
|
|
},
|
|
{
|
|
"epoch": 3.9680170575692966,
|
|
"grad_norm": 0.07330019533514108,
|
|
"learning_rate": 5.032817610722369e-06,
|
|
"loss": 1.0329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2785566449165344,
|
|
"step": 932,
|
|
"valid_targets_mean": 16218.6,
|
|
"valid_targets_min": 15384
|
|
},
|
|
{
|
|
"epoch": 3.9722814498933903,
|
|
"grad_norm": 0.07250993096677776,
|
|
"learning_rate": 4.993455228089366e-06,
|
|
"loss": 1.0076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21679943799972534,
|
|
"step": 933,
|
|
"valid_targets_mean": 13328.1,
|
|
"valid_targets_min": 9040
|
|
},
|
|
{
|
|
"epoch": 3.976545842217484,
|
|
"grad_norm": 0.07043669985174822,
|
|
"learning_rate": 4.954225410848048e-06,
|
|
"loss": 1.0666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30351001024246216,
|
|
"step": 934,
|
|
"valid_targets_mean": 16080.5,
|
|
"valid_targets_min": 15104
|
|
},
|
|
{
|
|
"epoch": 3.9808102345415777,
|
|
"grad_norm": 0.07983719816323807,
|
|
"learning_rate": 4.915128505548284e-06,
|
|
"loss": 1.0504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28455278277397156,
|
|
"step": 935,
|
|
"valid_targets_mean": 16129.4,
|
|
"valid_targets_min": 14523
|
|
},
|
|
{
|
|
"epoch": 3.9850746268656714,
|
|
"grad_norm": 0.06657195182952366,
|
|
"learning_rate": 4.8761648575658145e-06,
|
|
"loss": 1.0017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24082420766353607,
|
|
"step": 936,
|
|
"valid_targets_mean": 15462.1,
|
|
"valid_targets_min": 12591
|
|
},
|
|
{
|
|
"epoch": 3.9893390191897655,
|
|
"grad_norm": 0.07428830905250548,
|
|
"learning_rate": 4.837334811099217e-06,
|
|
"loss": 1.0483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3054385185241699,
|
|
"step": 937,
|
|
"valid_targets_mean": 16083.8,
|
|
"valid_targets_min": 14605
|
|
},
|
|
{
|
|
"epoch": 3.9936034115138592,
|
|
"grad_norm": 0.06899694031636615,
|
|
"learning_rate": 4.7986387091668365e-06,
|
|
"loss": 1.0851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24436715245246887,
|
|
"step": 938,
|
|
"valid_targets_mean": 13353.7,
|
|
"valid_targets_min": 1758
|
|
},
|
|
{
|
|
"epoch": 3.997867803837953,
|
|
"grad_norm": 0.06714050627709228,
|
|
"learning_rate": 4.760076893603791e-06,
|
|
"loss": 0.997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25163882970809937,
|
|
"step": 939,
|
|
"valid_targets_mean": 16128.8,
|
|
"valid_targets_min": 14943
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"grad_norm": 0.10813958254351896,
|
|
"learning_rate": 4.721649705058926e-06,
|
|
"loss": 1.0449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4667865037918091,
|
|
"step": 940,
|
|
"valid_targets_mean": 11576.2,
|
|
"valid_targets_min": 1629
|
|
},
|
|
{
|
|
"epoch": 4.004264392324094,
|
|
"grad_norm": 0.07433456126618207,
|
|
"learning_rate": 4.683357482991819e-06,
|
|
"loss": 0.9923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24734915792942047,
|
|
"step": 941,
|
|
"valid_targets_mean": 16086.4,
|
|
"valid_targets_min": 14385
|
|
},
|
|
{
|
|
"epoch": 4.008528784648187,
|
|
"grad_norm": 0.07278408488158716,
|
|
"learning_rate": 4.645200565669776e-06,
|
|
"loss": 1.0457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3114967346191406,
|
|
"step": 942,
|
|
"valid_targets_mean": 15978.8,
|
|
"valid_targets_min": 13604
|
|
},
|
|
{
|
|
"epoch": 4.0127931769722816,
|
|
"grad_norm": 0.07067102602044609,
|
|
"learning_rate": 4.607179290164823e-06,
|
|
"loss": 1.0479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2320965826511383,
|
|
"step": 943,
|
|
"valid_targets_mean": 14448.2,
|
|
"valid_targets_min": 11162
|
|
},
|
|
{
|
|
"epoch": 4.017057569296376,
|
|
"grad_norm": 0.07115665800900481,
|
|
"learning_rate": 4.569293992350783e-06,
|
|
"loss": 1.0126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26585066318511963,
|
|
"step": 944,
|
|
"valid_targets_mean": 16213.6,
|
|
"valid_targets_min": 15232
|
|
},
|
|
{
|
|
"epoch": 4.021321961620469,
|
|
"grad_norm": 0.07336182419391064,
|
|
"learning_rate": 4.531545006900244e-06,
|
|
"loss": 1.0436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2934248745441437,
|
|
"step": 945,
|
|
"valid_targets_mean": 16105.2,
|
|
"valid_targets_min": 15197
|
|
},
|
|
{
|
|
"epoch": 4.025586353944563,
|
|
"grad_norm": 0.06869929397911596,
|
|
"learning_rate": 4.493932667281646e-06,
|
|
"loss": 1.0354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25927960872650146,
|
|
"step": 946,
|
|
"valid_targets_mean": 15658.3,
|
|
"valid_targets_min": 13130
|
|
},
|
|
{
|
|
"epoch": 4.029850746268656,
|
|
"grad_norm": 0.06831465730131694,
|
|
"learning_rate": 4.456457305756321e-06,
|
|
"loss": 1.0379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29613256454467773,
|
|
"step": 947,
|
|
"valid_targets_mean": 16066.0,
|
|
"valid_targets_min": 15224
|
|
},
|
|
{
|
|
"epoch": 4.0341151385927505,
|
|
"grad_norm": 0.0693374980485343,
|
|
"learning_rate": 4.419119253375557e-06,
|
|
"loss": 1.0423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26073312759399414,
|
|
"step": 948,
|
|
"valid_targets_mean": 13006.6,
|
|
"valid_targets_min": 2555
|
|
},
|
|
{
|
|
"epoch": 4.038379530916845,
|
|
"grad_norm": 0.06900347113017803,
|
|
"learning_rate": 4.381918839977675e-06,
|
|
"loss": 1.0513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24768483638763428,
|
|
"step": 949,
|
|
"valid_targets_mean": 16156.4,
|
|
"valid_targets_min": 15614
|
|
},
|
|
{
|
|
"epoch": 4.042643923240938,
|
|
"grad_norm": 0.08032570475058655,
|
|
"learning_rate": 4.344856394185122e-06,
|
|
"loss": 1.0436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30467864871025085,
|
|
"step": 950,
|
|
"valid_targets_mean": 16158.7,
|
|
"valid_targets_min": 15362
|
|
},
|
|
{
|
|
"epoch": 4.046908315565032,
|
|
"grad_norm": 0.07432077478517161,
|
|
"learning_rate": 4.307932243401538e-06,
|
|
"loss": 1.0751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18688882887363434,
|
|
"step": 951,
|
|
"valid_targets_mean": 9768.4,
|
|
"valid_targets_min": 1464
|
|
},
|
|
{
|
|
"epoch": 4.051172707889126,
|
|
"grad_norm": 0.06728414760706075,
|
|
"learning_rate": 4.271146713808927e-06,
|
|
"loss": 1.0037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2607046365737915,
|
|
"step": 952,
|
|
"valid_targets_mean": 15941.6,
|
|
"valid_targets_min": 13183
|
|
},
|
|
{
|
|
"epoch": 4.0554371002132195,
|
|
"grad_norm": 0.07748004830079153,
|
|
"learning_rate": 4.234500130364698e-06,
|
|
"loss": 1.0887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31053346395492554,
|
|
"step": 953,
|
|
"valid_targets_mean": 16087.4,
|
|
"valid_targets_min": 14971
|
|
},
|
|
{
|
|
"epoch": 4.059701492537314,
|
|
"grad_norm": 0.06898309753405626,
|
|
"learning_rate": 4.197992816798851e-06,
|
|
"loss": 1.0322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18571747839450836,
|
|
"step": 954,
|
|
"valid_targets_mean": 10989.3,
|
|
"valid_targets_min": 5606
|
|
},
|
|
{
|
|
"epoch": 4.063965884861407,
|
|
"grad_norm": 0.06904022569083743,
|
|
"learning_rate": 4.161625095611101e-06,
|
|
"loss": 1.0385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2804241180419922,
|
|
"step": 955,
|
|
"valid_targets_mean": 16066.4,
|
|
"valid_targets_min": 14599
|
|
},
|
|
{
|
|
"epoch": 4.068230277185501,
|
|
"grad_norm": 0.06749103137762973,
|
|
"learning_rate": 4.125397288068007e-06,
|
|
"loss": 1.0527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29748380184173584,
|
|
"step": 956,
|
|
"valid_targets_mean": 15989.4,
|
|
"valid_targets_min": 14932
|
|
},
|
|
{
|
|
"epoch": 4.072494669509595,
|
|
"grad_norm": 0.06958233792187381,
|
|
"learning_rate": 4.089309714200187e-06,
|
|
"loss": 1.0422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23831385374069214,
|
|
"step": 957,
|
|
"valid_targets_mean": 13906.6,
|
|
"valid_targets_min": 11415
|
|
},
|
|
{
|
|
"epoch": 4.076759061833688,
|
|
"grad_norm": 0.07119843993552165,
|
|
"learning_rate": 4.0533626927994185e-06,
|
|
"loss": 1.0629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27871373295783997,
|
|
"step": 958,
|
|
"valid_targets_mean": 16201.2,
|
|
"valid_targets_min": 15749
|
|
},
|
|
{
|
|
"epoch": 4.081023454157783,
|
|
"grad_norm": 0.0759106778292143,
|
|
"learning_rate": 4.017556541415888e-06,
|
|
"loss": 1.0602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2981345057487488,
|
|
"step": 959,
|
|
"valid_targets_mean": 16089.6,
|
|
"valid_targets_min": 15179
|
|
},
|
|
{
|
|
"epoch": 4.085287846481877,
|
|
"grad_norm": 0.06879670836544767,
|
|
"learning_rate": 3.981891576355352e-06,
|
|
"loss": 1.0589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.259943425655365,
|
|
"step": 960,
|
|
"valid_targets_mean": 15381.2,
|
|
"valid_targets_min": 12372
|
|
},
|
|
{
|
|
"epoch": 4.08955223880597,
|
|
"grad_norm": 0.07096698222264053,
|
|
"learning_rate": 3.946368112676346e-06,
|
|
"loss": 0.9916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2931315302848816,
|
|
"step": 961,
|
|
"valid_targets_mean": 16006.5,
|
|
"valid_targets_min": 14001
|
|
},
|
|
{
|
|
"epoch": 4.093816631130064,
|
|
"grad_norm": 0.07308485241608609,
|
|
"learning_rate": 3.9109864641874166e-06,
|
|
"loss": 1.0092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20273205637931824,
|
|
"step": 962,
|
|
"valid_targets_mean": 11488.2,
|
|
"valid_targets_min": 1939
|
|
},
|
|
{
|
|
"epoch": 4.098081023454157,
|
|
"grad_norm": 0.07254876147285848,
|
|
"learning_rate": 3.875746943444316e-06,
|
|
"loss": 1.0273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26414158940315247,
|
|
"step": 963,
|
|
"valid_targets_mean": 15903.6,
|
|
"valid_targets_min": 13811
|
|
},
|
|
{
|
|
"epoch": 4.1023454157782515,
|
|
"grad_norm": 0.0759551852724171,
|
|
"learning_rate": 3.840649861747278e-06,
|
|
"loss": 1.0526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30791565775871277,
|
|
"step": 964,
|
|
"valid_targets_mean": 16148.3,
|
|
"valid_targets_min": 15497
|
|
},
|
|
{
|
|
"epoch": 4.106609808102346,
|
|
"grad_norm": 0.0773464591390532,
|
|
"learning_rate": 3.8056955291382667e-06,
|
|
"loss": 1.0716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16122660040855408,
|
|
"step": 965,
|
|
"valid_targets_mean": 8965.2,
|
|
"valid_targets_min": 1811
|
|
},
|
|
{
|
|
"epoch": 4.110874200426439,
|
|
"grad_norm": 0.0665508397540495,
|
|
"learning_rate": 3.7708842543981928e-06,
|
|
"loss": 1.0263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26521602272987366,
|
|
"step": 966,
|
|
"valid_targets_mean": 16074.9,
|
|
"valid_targets_min": 15194
|
|
},
|
|
{
|
|
"epoch": 4.115138592750533,
|
|
"grad_norm": 0.07649782096499873,
|
|
"learning_rate": 3.736216345044237e-06,
|
|
"loss": 1.0461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2981080412864685,
|
|
"step": 967,
|
|
"valid_targets_mean": 16189.7,
|
|
"valid_targets_min": 15766
|
|
},
|
|
{
|
|
"epoch": 4.119402985074627,
|
|
"grad_norm": 0.07120115593146852,
|
|
"learning_rate": 3.7016921073271084e-06,
|
|
"loss": 1.0545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2343193143606186,
|
|
"step": 968,
|
|
"valid_targets_mean": 13523.9,
|
|
"valid_targets_min": 10232
|
|
},
|
|
{
|
|
"epoch": 4.1236673773987205,
|
|
"grad_norm": 0.06771140156506235,
|
|
"learning_rate": 3.6673118462283453e-06,
|
|
"loss": 1.0553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28202947974205017,
|
|
"step": 969,
|
|
"valid_targets_mean": 16140.2,
|
|
"valid_targets_min": 15041
|
|
},
|
|
{
|
|
"epoch": 4.127931769722815,
|
|
"grad_norm": 0.07442228715015706,
|
|
"learning_rate": 3.6330758654576227e-06,
|
|
"loss": 1.0078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28538715839385986,
|
|
"step": 970,
|
|
"valid_targets_mean": 16143.7,
|
|
"valid_targets_min": 14705
|
|
},
|
|
{
|
|
"epoch": 4.132196162046908,
|
|
"grad_norm": 0.07197164544183474,
|
|
"learning_rate": 3.598984467450055e-06,
|
|
"loss": 1.0638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27692121267318726,
|
|
"step": 971,
|
|
"valid_targets_mean": 15285.8,
|
|
"valid_targets_min": 13725
|
|
},
|
|
{
|
|
"epoch": 4.136460554371002,
|
|
"grad_norm": 0.07146765445446748,
|
|
"learning_rate": 3.565037953363546e-06,
|
|
"loss": 1.0612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2920125126838684,
|
|
"step": 972,
|
|
"valid_targets_mean": 16085.7,
|
|
"valid_targets_min": 15092
|
|
},
|
|
{
|
|
"epoch": 4.140724946695096,
|
|
"grad_norm": 0.07201901371863746,
|
|
"learning_rate": 3.5312366230761154e-06,
|
|
"loss": 1.0536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23472663760185242,
|
|
"step": 973,
|
|
"valid_targets_mean": 13218.3,
|
|
"valid_targets_min": 3437
|
|
},
|
|
{
|
|
"epoch": 4.144989339019189,
|
|
"grad_norm": 0.07127658588302571,
|
|
"learning_rate": 3.497580775183258e-06,
|
|
"loss": 1.0175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25968703627586365,
|
|
"step": 974,
|
|
"valid_targets_mean": 15978.1,
|
|
"valid_targets_min": 14683
|
|
},
|
|
{
|
|
"epoch": 4.149253731343284,
|
|
"grad_norm": 0.06716433426918972,
|
|
"learning_rate": 3.464070706995295e-06,
|
|
"loss": 1.0493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3319670259952545,
|
|
"step": 975,
|
|
"valid_targets_mean": 15963.7,
|
|
"valid_targets_min": 14825
|
|
},
|
|
{
|
|
"epoch": 4.153518123667378,
|
|
"grad_norm": 0.07197261850341137,
|
|
"learning_rate": 3.4307067145347417e-06,
|
|
"loss": 1.0715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1870976835489273,
|
|
"step": 976,
|
|
"valid_targets_mean": 8936.3,
|
|
"valid_targets_min": 1684
|
|
},
|
|
{
|
|
"epoch": 4.157782515991471,
|
|
"grad_norm": 0.06736433141374622,
|
|
"learning_rate": 3.397489092533739e-06,
|
|
"loss": 1.0551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26337099075317383,
|
|
"step": 977,
|
|
"valid_targets_mean": 16069.0,
|
|
"valid_targets_min": 14815
|
|
},
|
|
{
|
|
"epoch": 4.162046908315565,
|
|
"grad_norm": 0.07433200435424105,
|
|
"learning_rate": 3.364418134431371e-06,
|
|
"loss": 1.0802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.304631769657135,
|
|
"step": 978,
|
|
"valid_targets_mean": 16045.3,
|
|
"valid_targets_min": 14766
|
|
},
|
|
{
|
|
"epoch": 4.166311300639659,
|
|
"grad_norm": 0.0728858816458251,
|
|
"learning_rate": 3.331494132371149e-06,
|
|
"loss": 1.0273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20439061522483826,
|
|
"step": 979,
|
|
"valid_targets_mean": 11240.3,
|
|
"valid_targets_min": 5323
|
|
},
|
|
{
|
|
"epoch": 4.1705756929637525,
|
|
"grad_norm": 0.06509047360298113,
|
|
"learning_rate": 3.2987173771983816e-06,
|
|
"loss": 1.0377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26393136382102966,
|
|
"step": 980,
|
|
"valid_targets_mean": 16068.0,
|
|
"valid_targets_min": 14469
|
|
},
|
|
{
|
|
"epoch": 4.174840085287847,
|
|
"grad_norm": 0.07179495331872453,
|
|
"learning_rate": 3.266088158457634e-06,
|
|
"loss": 1.0201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3215472996234894,
|
|
"step": 981,
|
|
"valid_targets_mean": 16133.2,
|
|
"valid_targets_min": 15268
|
|
},
|
|
{
|
|
"epoch": 4.17910447761194,
|
|
"grad_norm": 0.06716269120635336,
|
|
"learning_rate": 3.233606764390147e-06,
|
|
"loss": 1.0402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25857558846473694,
|
|
"step": 982,
|
|
"valid_targets_mean": 15046.1,
|
|
"valid_targets_min": 12819
|
|
},
|
|
{
|
|
"epoch": 4.183368869936034,
|
|
"grad_norm": 0.06654935104423865,
|
|
"learning_rate": 3.2012734819313127e-06,
|
|
"loss": 1.0044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2863087058067322,
|
|
"step": 983,
|
|
"valid_targets_mean": 16176.4,
|
|
"valid_targets_min": 15485
|
|
},
|
|
{
|
|
"epoch": 4.187633262260128,
|
|
"grad_norm": 0.06776252446854797,
|
|
"learning_rate": 3.1690885967081187e-06,
|
|
"loss": 1.0237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2943454384803772,
|
|
"step": 984,
|
|
"valid_targets_mean": 16168.8,
|
|
"valid_targets_min": 15136
|
|
},
|
|
{
|
|
"epoch": 4.1918976545842215,
|
|
"grad_norm": 0.06795327232701845,
|
|
"learning_rate": 3.1370523930366393e-06,
|
|
"loss": 1.0362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24246574938297272,
|
|
"step": 985,
|
|
"valid_targets_mean": 15625.8,
|
|
"valid_targets_min": 12908
|
|
},
|
|
{
|
|
"epoch": 4.196162046908316,
|
|
"grad_norm": 0.07125273070831338,
|
|
"learning_rate": 3.105165153919525e-06,
|
|
"loss": 1.0951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3236227035522461,
|
|
"step": 986,
|
|
"valid_targets_mean": 15982.6,
|
|
"valid_targets_min": 12847
|
|
},
|
|
{
|
|
"epoch": 4.20042643923241,
|
|
"grad_norm": 0.07464669553273583,
|
|
"learning_rate": 3.073427161043492e-06,
|
|
"loss": 1.086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21960565447807312,
|
|
"step": 987,
|
|
"valid_targets_mean": 10786.6,
|
|
"valid_targets_min": 1169
|
|
},
|
|
{
|
|
"epoch": 4.204690831556503,
|
|
"grad_norm": 0.0683501977903201,
|
|
"learning_rate": 3.0418386947768463e-06,
|
|
"loss": 1.0429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2675953507423401,
|
|
"step": 988,
|
|
"valid_targets_mean": 16158.9,
|
|
"valid_targets_min": 15548
|
|
},
|
|
{
|
|
"epoch": 4.208955223880597,
|
|
"grad_norm": 0.06740993316254999,
|
|
"learning_rate": 3.01040003416698e-06,
|
|
"loss": 1.0725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3078218102455139,
|
|
"step": 989,
|
|
"valid_targets_mean": 16002.3,
|
|
"valid_targets_min": 12727
|
|
},
|
|
{
|
|
"epoch": 4.21321961620469,
|
|
"grad_norm": 0.06866415633234638,
|
|
"learning_rate": 2.97911145693796e-06,
|
|
"loss": 1.0228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17873141169548035,
|
|
"step": 990,
|
|
"valid_targets_mean": 9445.2,
|
|
"valid_targets_min": 1533
|
|
},
|
|
{
|
|
"epoch": 4.217484008528785,
|
|
"grad_norm": 0.06736053370456448,
|
|
"learning_rate": 2.947973239488009e-06,
|
|
"loss": 1.0379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26110607385635376,
|
|
"step": 991,
|
|
"valid_targets_mean": 16088.0,
|
|
"valid_targets_min": 13371
|
|
},
|
|
{
|
|
"epoch": 4.221748400852879,
|
|
"grad_norm": 0.07285556682110207,
|
|
"learning_rate": 2.91698565688711e-06,
|
|
"loss": 1.0865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.310644268989563,
|
|
"step": 992,
|
|
"valid_targets_mean": 16095.7,
|
|
"valid_targets_min": 15461
|
|
},
|
|
{
|
|
"epoch": 4.226012793176972,
|
|
"grad_norm": 0.06374261724811926,
|
|
"learning_rate": 2.886148982874566e-06,
|
|
"loss": 1.0048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20932836830615997,
|
|
"step": 993,
|
|
"valid_targets_mean": 13374.2,
|
|
"valid_targets_min": 11304
|
|
},
|
|
{
|
|
"epoch": 4.230277185501066,
|
|
"grad_norm": 0.0710302287460953,
|
|
"learning_rate": 2.8554634898565668e-06,
|
|
"loss": 1.0378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28549161553382874,
|
|
"step": 994,
|
|
"valid_targets_mean": 16169.9,
|
|
"valid_targets_min": 15551
|
|
},
|
|
{
|
|
"epoch": 4.23454157782516,
|
|
"grad_norm": 0.07298028102396883,
|
|
"learning_rate": 2.824929448903806e-06,
|
|
"loss": 1.0088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2863517999649048,
|
|
"step": 995,
|
|
"valid_targets_mean": 16140.5,
|
|
"valid_targets_min": 14586
|
|
},
|
|
{
|
|
"epoch": 4.2388059701492535,
|
|
"grad_norm": 0.06435374211480067,
|
|
"learning_rate": 2.794547129749059e-06,
|
|
"loss": 1.0152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2323729395866394,
|
|
"step": 996,
|
|
"valid_targets_mean": 15276.1,
|
|
"valid_targets_min": 13502
|
|
},
|
|
{
|
|
"epoch": 4.243070362473348,
|
|
"grad_norm": 0.06432980810966171,
|
|
"learning_rate": 2.7643168007848255e-06,
|
|
"loss": 1.0138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2869635820388794,
|
|
"step": 997,
|
|
"valid_targets_mean": 16086.2,
|
|
"valid_targets_min": 14984
|
|
},
|
|
{
|
|
"epoch": 4.247334754797441,
|
|
"grad_norm": 0.0699461088874161,
|
|
"learning_rate": 2.734238729060956e-06,
|
|
"loss": 1.0343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2177237570285797,
|
|
"step": 998,
|
|
"valid_targets_mean": 13341.9,
|
|
"valid_targets_min": 1692
|
|
},
|
|
{
|
|
"epoch": 4.251599147121535,
|
|
"grad_norm": 0.0689495137703975,
|
|
"learning_rate": 2.7043131802822653e-06,
|
|
"loss": 1.0306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2551552951335907,
|
|
"step": 999,
|
|
"valid_targets_mean": 16104.0,
|
|
"valid_targets_min": 15062
|
|
},
|
|
{
|
|
"epoch": 4.255863539445629,
|
|
"grad_norm": 0.0780963851962524,
|
|
"learning_rate": 2.674540418806222e-06,
|
|
"loss": 1.0917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30785664916038513,
|
|
"step": 1000,
|
|
"valid_targets_mean": 16060.6,
|
|
"valid_targets_min": 14772
|
|
},
|
|
{
|
|
"epoch": 4.2601279317697225,
|
|
"grad_norm": 0.06990267375223602,
|
|
"learning_rate": 2.6449207076405857e-06,
|
|
"loss": 1.0547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19598382711410522,
|
|
"step": 1001,
|
|
"valid_targets_mean": 9404.0,
|
|
"valid_targets_min": 1629
|
|
},
|
|
{
|
|
"epoch": 4.264392324093817,
|
|
"grad_norm": 0.06519760766513462,
|
|
"learning_rate": 2.6154543084411035e-06,
|
|
"loss": 1.017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23901152610778809,
|
|
"step": 1002,
|
|
"valid_targets_mean": 16214.6,
|
|
"valid_targets_min": 15563
|
|
},
|
|
{
|
|
"epoch": 4.268656716417911,
|
|
"grad_norm": 0.07123309948770153,
|
|
"learning_rate": 2.5861414815091834e-06,
|
|
"loss": 1.038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2898581624031067,
|
|
"step": 1003,
|
|
"valid_targets_mean": 16140.6,
|
|
"valid_targets_min": 14861
|
|
},
|
|
{
|
|
"epoch": 4.272921108742004,
|
|
"grad_norm": 0.06777379764604702,
|
|
"learning_rate": 2.5569824857895987e-06,
|
|
"loss": 1.018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17651090025901794,
|
|
"step": 1004,
|
|
"valid_targets_mean": 11383.6,
|
|
"valid_targets_min": 6098
|
|
},
|
|
{
|
|
"epoch": 4.277185501066098,
|
|
"grad_norm": 0.06812521749395846,
|
|
"learning_rate": 2.5279775788682083e-06,
|
|
"loss": 1.0019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25137028098106384,
|
|
"step": 1005,
|
|
"valid_targets_mean": 16192.1,
|
|
"valid_targets_min": 15638
|
|
},
|
|
{
|
|
"epoch": 4.281449893390192,
|
|
"grad_norm": 0.06891007527637337,
|
|
"learning_rate": 2.499127016969671e-06,
|
|
"loss": 1.0566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.312665730714798,
|
|
"step": 1006,
|
|
"valid_targets_mean": 15998.5,
|
|
"valid_targets_min": 13604
|
|
},
|
|
{
|
|
"epoch": 4.285714285714286,
|
|
"grad_norm": 0.0641557737697559,
|
|
"learning_rate": 2.4704310549551934e-06,
|
|
"loss": 1.0598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25579750537872314,
|
|
"step": 1007,
|
|
"valid_targets_mean": 15847.2,
|
|
"valid_targets_min": 13892
|
|
},
|
|
{
|
|
"epoch": 4.28997867803838,
|
|
"grad_norm": 0.07022018210659366,
|
|
"learning_rate": 2.441889946320266e-06,
|
|
"loss": 1.0187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2647198736667633,
|
|
"step": 1008,
|
|
"valid_targets_mean": 16143.3,
|
|
"valid_targets_min": 15342
|
|
},
|
|
{
|
|
"epoch": 4.294243070362473,
|
|
"grad_norm": 0.07380810139158309,
|
|
"learning_rate": 2.4135039431924233e-06,
|
|
"loss": 1.033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3124866783618927,
|
|
"step": 1009,
|
|
"valid_targets_mean": 16120.2,
|
|
"valid_targets_min": 15039
|
|
},
|
|
{
|
|
"epoch": 4.298507462686567,
|
|
"grad_norm": 0.06391205296321988,
|
|
"learning_rate": 2.3852732963290426e-06,
|
|
"loss": 1.0156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2500407099723816,
|
|
"step": 1010,
|
|
"valid_targets_mean": 16033.1,
|
|
"valid_targets_min": 13787
|
|
},
|
|
{
|
|
"epoch": 4.302771855010661,
|
|
"grad_norm": 0.07271677962618527,
|
|
"learning_rate": 2.3571982551150853e-06,
|
|
"loss": 1.0433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2994212508201599,
|
|
"step": 1011,
|
|
"valid_targets_mean": 16221.1,
|
|
"valid_targets_min": 15759
|
|
},
|
|
{
|
|
"epoch": 4.3070362473347545,
|
|
"grad_norm": 0.06703478743811428,
|
|
"learning_rate": 2.329279067560937e-06,
|
|
"loss": 1.0413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20435109734535217,
|
|
"step": 1012,
|
|
"valid_targets_mean": 11467.0,
|
|
"valid_targets_min": 2981
|
|
},
|
|
{
|
|
"epoch": 4.311300639658849,
|
|
"grad_norm": 0.06613811500956916,
|
|
"learning_rate": 2.301515980300182e-06,
|
|
"loss": 0.9936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.258282870054245,
|
|
"step": 1013,
|
|
"valid_targets_mean": 16119.7,
|
|
"valid_targets_min": 15104
|
|
},
|
|
{
|
|
"epoch": 4.315565031982943,
|
|
"grad_norm": 0.0697458672663331,
|
|
"learning_rate": 2.2739092385874527e-06,
|
|
"loss": 1.0578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3114057183265686,
|
|
"step": 1014,
|
|
"valid_targets_mean": 15924.1,
|
|
"valid_targets_min": 13815
|
|
},
|
|
{
|
|
"epoch": 4.319829424307036,
|
|
"grad_norm": 0.07105802801319078,
|
|
"learning_rate": 2.2464590862962443e-06,
|
|
"loss": 1.0966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17770794034004211,
|
|
"step": 1015,
|
|
"valid_targets_mean": 10586.6,
|
|
"valid_targets_min": 1775
|
|
},
|
|
{
|
|
"epoch": 4.32409381663113,
|
|
"grad_norm": 0.06703576684260368,
|
|
"learning_rate": 2.219165765916769e-06,
|
|
"loss": 1.0019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23432299494743347,
|
|
"step": 1016,
|
|
"valid_targets_mean": 16205.3,
|
|
"valid_targets_min": 15179
|
|
},
|
|
{
|
|
"epoch": 4.3283582089552235,
|
|
"grad_norm": 0.07216055575974788,
|
|
"learning_rate": 2.192029518553798e-06,
|
|
"loss": 1.0531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27970319986343384,
|
|
"step": 1017,
|
|
"valid_targets_mean": 16134.5,
|
|
"valid_targets_min": 14949
|
|
},
|
|
{
|
|
"epoch": 4.332622601279318,
|
|
"grad_norm": 0.06723621064299429,
|
|
"learning_rate": 2.165050583924566e-06,
|
|
"loss": 1.0325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19378241896629333,
|
|
"step": 1018,
|
|
"valid_targets_mean": 11559.8,
|
|
"valid_targets_min": 8634
|
|
},
|
|
{
|
|
"epoch": 4.336886993603412,
|
|
"grad_norm": 0.06684231534746397,
|
|
"learning_rate": 2.1382292003566163e-06,
|
|
"loss": 1.0248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28307196497917175,
|
|
"step": 1019,
|
|
"valid_targets_mean": 16052.6,
|
|
"valid_targets_min": 13517
|
|
},
|
|
{
|
|
"epoch": 4.341151385927505,
|
|
"grad_norm": 0.07280863626467103,
|
|
"learning_rate": 2.1115656047857213e-06,
|
|
"loss": 1.0374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.302565336227417,
|
|
"step": 1020,
|
|
"valid_targets_mean": 16113.6,
|
|
"valid_targets_min": 13856
|
|
},
|
|
{
|
|
"epoch": 4.345415778251599,
|
|
"grad_norm": 0.06684167081043829,
|
|
"learning_rate": 2.0850600327537806e-06,
|
|
"loss": 1.0436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24881529808044434,
|
|
"step": 1021,
|
|
"valid_targets_mean": 15664.3,
|
|
"valid_targets_min": 13709
|
|
},
|
|
{
|
|
"epoch": 4.349680170575693,
|
|
"grad_norm": 0.06963796275425553,
|
|
"learning_rate": 2.058712718406719e-06,
|
|
"loss": 1.012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2769511938095093,
|
|
"step": 1022,
|
|
"valid_targets_mean": 16217.0,
|
|
"valid_targets_min": 15452
|
|
},
|
|
{
|
|
"epoch": 4.353944562899787,
|
|
"grad_norm": 0.06723847907744494,
|
|
"learning_rate": 2.032523894492471e-06,
|
|
"loss": 1.0331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23674507439136505,
|
|
"step": 1023,
|
|
"valid_targets_mean": 13897.5,
|
|
"valid_targets_min": 1374
|
|
},
|
|
{
|
|
"epoch": 4.358208955223881,
|
|
"grad_norm": 0.07004483551063212,
|
|
"learning_rate": 2.0064937923588634e-06,
|
|
"loss": 1.0131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25714924931526184,
|
|
"step": 1024,
|
|
"valid_targets_mean": 16017.8,
|
|
"valid_targets_min": 15127
|
|
},
|
|
{
|
|
"epoch": 4.362473347547974,
|
|
"grad_norm": 0.07072859219085252,
|
|
"learning_rate": 1.9806226419516195e-06,
|
|
"loss": 1.0537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32045555114746094,
|
|
"step": 1025,
|
|
"valid_targets_mean": 16117.2,
|
|
"valid_targets_min": 15272
|
|
},
|
|
{
|
|
"epoch": 4.366737739872068,
|
|
"grad_norm": 0.07365397687682998,
|
|
"learning_rate": 1.954910671812298e-06,
|
|
"loss": 1.0119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18778666853904724,
|
|
"step": 1026,
|
|
"valid_targets_mean": 11222.7,
|
|
"valid_targets_min": 1998
|
|
},
|
|
{
|
|
"epoch": 4.371002132196162,
|
|
"grad_norm": 0.06539371933818432,
|
|
"learning_rate": 1.9293581090762894e-06,
|
|
"loss": 0.9973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26029354333877563,
|
|
"step": 1027,
|
|
"valid_targets_mean": 16076.6,
|
|
"valid_targets_min": 14970
|
|
},
|
|
{
|
|
"epoch": 4.3752665245202556,
|
|
"grad_norm": 0.07078832920742793,
|
|
"learning_rate": 1.9039651794708058e-06,
|
|
"loss": 1.0537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31659555435180664,
|
|
"step": 1028,
|
|
"valid_targets_mean": 16039.2,
|
|
"valid_targets_min": 14344
|
|
},
|
|
{
|
|
"epoch": 4.37953091684435,
|
|
"grad_norm": 0.07061051493126577,
|
|
"learning_rate": 1.8787321073128817e-06,
|
|
"loss": 1.0214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1872818022966385,
|
|
"step": 1029,
|
|
"valid_targets_mean": 11576.3,
|
|
"valid_targets_min": 7268
|
|
},
|
|
{
|
|
"epoch": 4.383795309168444,
|
|
"grad_norm": 0.07031163204042408,
|
|
"learning_rate": 1.8536591155073958e-06,
|
|
"loss": 1.0607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2762349247932434,
|
|
"step": 1030,
|
|
"valid_targets_mean": 16112.5,
|
|
"valid_targets_min": 13968
|
|
},
|
|
{
|
|
"epoch": 4.388059701492537,
|
|
"grad_norm": 0.07543497683668358,
|
|
"learning_rate": 1.8287464255451181e-06,
|
|
"loss": 1.0462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3015264868736267,
|
|
"step": 1031,
|
|
"valid_targets_mean": 16160.5,
|
|
"valid_targets_min": 15459
|
|
},
|
|
{
|
|
"epoch": 4.392324093816631,
|
|
"grad_norm": 0.06835529500729307,
|
|
"learning_rate": 1.803994257500714e-06,
|
|
"loss": 1.1054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24763526022434235,
|
|
"step": 1032,
|
|
"valid_targets_mean": 13697.7,
|
|
"valid_targets_min": 10900
|
|
},
|
|
{
|
|
"epoch": 4.396588486140725,
|
|
"grad_norm": 0.06365070315459512,
|
|
"learning_rate": 1.7794028300308474e-06,
|
|
"loss": 1.0041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2799084782600403,
|
|
"step": 1033,
|
|
"valid_targets_mean": 16079.0,
|
|
"valid_targets_min": 14017
|
|
},
|
|
{
|
|
"epoch": 4.400852878464819,
|
|
"grad_norm": 0.0664232338163114,
|
|
"learning_rate": 1.7549723603722003e-06,
|
|
"loss": 1.0209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26145485043525696,
|
|
"step": 1034,
|
|
"valid_targets_mean": 16106.7,
|
|
"valid_targets_min": 13000
|
|
},
|
|
{
|
|
"epoch": 4.405117270788913,
|
|
"grad_norm": 0.07008406289782707,
|
|
"learning_rate": 1.730703064339605e-06,
|
|
"loss": 1.0503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24974822998046875,
|
|
"step": 1035,
|
|
"valid_targets_mean": 15922.4,
|
|
"valid_targets_min": 14902
|
|
},
|
|
{
|
|
"epoch": 4.409381663113006,
|
|
"grad_norm": 0.06880313951859286,
|
|
"learning_rate": 1.7065951563241022e-06,
|
|
"loss": 1.0326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27286356687545776,
|
|
"step": 1036,
|
|
"valid_targets_mean": 16216.2,
|
|
"valid_targets_min": 15695
|
|
},
|
|
{
|
|
"epoch": 4.4136460554371,
|
|
"grad_norm": 0.07284244980587297,
|
|
"learning_rate": 1.682648849291051e-06,
|
|
"loss": 1.0458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2113986760377884,
|
|
"step": 1037,
|
|
"valid_targets_mean": 12109.8,
|
|
"valid_targets_min": 1781
|
|
},
|
|
{
|
|
"epoch": 4.417910447761194,
|
|
"grad_norm": 0.06468264282581432,
|
|
"learning_rate": 1.6588643547782579e-06,
|
|
"loss": 0.9979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2549009323120117,
|
|
"step": 1038,
|
|
"valid_targets_mean": 16121.2,
|
|
"valid_targets_min": 15598
|
|
},
|
|
{
|
|
"epoch": 4.422174840085288,
|
|
"grad_norm": 0.07241549473457982,
|
|
"learning_rate": 1.6352418828941052e-06,
|
|
"loss": 1.0606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2903823256492615,
|
|
"step": 1039,
|
|
"valid_targets_mean": 16151.0,
|
|
"valid_targets_min": 15082
|
|
},
|
|
{
|
|
"epoch": 4.426439232409382,
|
|
"grad_norm": 0.07615125090087548,
|
|
"learning_rate": 1.6117816423156952e-06,
|
|
"loss": 1.0408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17649801075458527,
|
|
"step": 1040,
|
|
"valid_targets_mean": 9591.9,
|
|
"valid_targets_min": 2271
|
|
},
|
|
{
|
|
"epoch": 4.430703624733475,
|
|
"grad_norm": 0.06345557574338923,
|
|
"learning_rate": 1.5884838402870029e-06,
|
|
"loss": 1.0131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2513384222984314,
|
|
"step": 1041,
|
|
"valid_targets_mean": 15705.8,
|
|
"valid_targets_min": 9100
|
|
},
|
|
{
|
|
"epoch": 4.434968017057569,
|
|
"grad_norm": 0.06628803179770525,
|
|
"learning_rate": 1.5653486826170384e-06,
|
|
"loss": 1.0369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2801644802093506,
|
|
"step": 1042,
|
|
"valid_targets_mean": 16129.2,
|
|
"valid_targets_min": 14112
|
|
},
|
|
{
|
|
"epoch": 4.439232409381663,
|
|
"grad_norm": 0.06479973133605925,
|
|
"learning_rate": 1.5423763736780583e-06,
|
|
"loss": 1.0095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21767209470272064,
|
|
"step": 1043,
|
|
"valid_targets_mean": 12284.5,
|
|
"valid_targets_min": 9773
|
|
},
|
|
{
|
|
"epoch": 4.443496801705757,
|
|
"grad_norm": 0.07379888048796672,
|
|
"learning_rate": 1.5195671164037173e-06,
|
|
"loss": 1.0472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.274654746055603,
|
|
"step": 1044,
|
|
"valid_targets_mean": 16176.4,
|
|
"valid_targets_min": 15185
|
|
},
|
|
{
|
|
"epoch": 4.447761194029851,
|
|
"grad_norm": 0.06955688858711356,
|
|
"learning_rate": 1.496921112287315e-06,
|
|
"loss": 1.0426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2881823778152466,
|
|
"step": 1045,
|
|
"valid_targets_mean": 16105.3,
|
|
"valid_targets_min": 14392
|
|
},
|
|
{
|
|
"epoch": 4.452025586353945,
|
|
"grad_norm": 0.06642438120714346,
|
|
"learning_rate": 1.4744385613799894e-06,
|
|
"loss": 1.0143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24886992573738098,
|
|
"step": 1046,
|
|
"valid_targets_mean": 15998.3,
|
|
"valid_targets_min": 15218
|
|
},
|
|
{
|
|
"epoch": 4.456289978678038,
|
|
"grad_norm": 0.06860740910546882,
|
|
"learning_rate": 1.4521196622889644e-06,
|
|
"loss": 1.0508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29240933060646057,
|
|
"step": 1047,
|
|
"valid_targets_mean": 16125.4,
|
|
"valid_targets_min": 15356
|
|
},
|
|
{
|
|
"epoch": 4.460554371002132,
|
|
"grad_norm": 0.06904289491635444,
|
|
"learning_rate": 1.4299646121757892e-06,
|
|
"loss": 0.9867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2177426517009735,
|
|
"step": 1048,
|
|
"valid_targets_mean": 12781.1,
|
|
"valid_targets_min": 1707
|
|
},
|
|
{
|
|
"epoch": 4.464818763326226,
|
|
"grad_norm": 0.06673715351700876,
|
|
"learning_rate": 1.4079736067545912e-06,
|
|
"loss": 1.0534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2762781083583832,
|
|
"step": 1049,
|
|
"valid_targets_mean": 16059.2,
|
|
"valid_targets_min": 15266
|
|
},
|
|
{
|
|
"epoch": 4.46908315565032,
|
|
"grad_norm": 0.0722373130141504,
|
|
"learning_rate": 1.3861468402903634e-06,
|
|
"loss": 1.0855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29924508929252625,
|
|
"step": 1050,
|
|
"valid_targets_mean": 15682.5,
|
|
"valid_targets_min": 7497
|
|
},
|
|
{
|
|
"epoch": 4.473347547974414,
|
|
"grad_norm": 0.0688488497687797,
|
|
"learning_rate": 1.3644845055972322e-06,
|
|
"loss": 1.0482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18955601751804352,
|
|
"step": 1051,
|
|
"valid_targets_mean": 9859.2,
|
|
"valid_targets_min": 1985
|
|
},
|
|
{
|
|
"epoch": 4.477611940298507,
|
|
"grad_norm": 0.06601653303968784,
|
|
"learning_rate": 1.3429867940367626e-06,
|
|
"loss": 1.0361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2514811158180237,
|
|
"step": 1052,
|
|
"valid_targets_mean": 16089.0,
|
|
"valid_targets_min": 14652
|
|
},
|
|
{
|
|
"epoch": 4.481876332622601,
|
|
"grad_norm": 0.06739938501301718,
|
|
"learning_rate": 1.321653895516264e-06,
|
|
"loss": 1.0756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30614376068115234,
|
|
"step": 1053,
|
|
"valid_targets_mean": 16067.7,
|
|
"valid_targets_min": 14106
|
|
},
|
|
{
|
|
"epoch": 4.486140724946695,
|
|
"grad_norm": 0.06965344420657194,
|
|
"learning_rate": 1.3004859984871199e-06,
|
|
"loss": 1.0942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20910713076591492,
|
|
"step": 1054,
|
|
"valid_targets_mean": 11169.9,
|
|
"valid_targets_min": 7018
|
|
},
|
|
{
|
|
"epoch": 4.490405117270789,
|
|
"grad_norm": 0.06391352963208008,
|
|
"learning_rate": 1.279483289943102e-06,
|
|
"loss": 1.0783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.265006959438324,
|
|
"step": 1055,
|
|
"valid_targets_mean": 16121.8,
|
|
"valid_targets_min": 15181
|
|
},
|
|
{
|
|
"epoch": 4.494669509594883,
|
|
"grad_norm": 0.07181504174561626,
|
|
"learning_rate": 1.2586459554187558e-06,
|
|
"loss": 1.0046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28799471259117126,
|
|
"step": 1056,
|
|
"valid_targets_mean": 16175.4,
|
|
"valid_targets_min": 15508
|
|
},
|
|
{
|
|
"epoch": 4.498933901918977,
|
|
"grad_norm": 0.061573481646947426,
|
|
"learning_rate": 1.2379741789877175e-06,
|
|
"loss": 1.0149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2298075258731842,
|
|
"step": 1057,
|
|
"valid_targets_mean": 14635.2,
|
|
"valid_targets_min": 12401
|
|
},
|
|
{
|
|
"epoch": 4.50319829424307,
|
|
"grad_norm": 0.06920854560823408,
|
|
"learning_rate": 1.2174681432611245e-06,
|
|
"loss": 1.0732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3100632429122925,
|
|
"step": 1058,
|
|
"valid_targets_mean": 16088.8,
|
|
"valid_targets_min": 15300
|
|
},
|
|
{
|
|
"epoch": 4.507462686567164,
|
|
"grad_norm": 0.07130345682673504,
|
|
"learning_rate": 1.1971280293859811e-06,
|
|
"loss": 1.0642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3007884621620178,
|
|
"step": 1059,
|
|
"valid_targets_mean": 16112.3,
|
|
"valid_targets_min": 15214
|
|
},
|
|
{
|
|
"epoch": 4.5117270788912585,
|
|
"grad_norm": 0.06369804978722103,
|
|
"learning_rate": 1.17695401704357e-06,
|
|
"loss": 1.026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26148420572280884,
|
|
"step": 1060,
|
|
"valid_targets_mean": 15896.0,
|
|
"valid_targets_min": 14589
|
|
},
|
|
{
|
|
"epoch": 4.515991471215352,
|
|
"grad_norm": 0.07168246525830331,
|
|
"learning_rate": 1.1569462844478552e-06,
|
|
"loss": 1.0064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26662689447402954,
|
|
"step": 1061,
|
|
"valid_targets_mean": 16175.7,
|
|
"valid_targets_min": 15214
|
|
},
|
|
{
|
|
"epoch": 4.520255863539446,
|
|
"grad_norm": 0.06866864827017016,
|
|
"learning_rate": 1.1371050083439107e-06,
|
|
"loss": 1.0705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21038126945495605,
|
|
"step": 1062,
|
|
"valid_targets_mean": 10682.1,
|
|
"valid_targets_min": 1369
|
|
},
|
|
{
|
|
"epoch": 4.524520255863539,
|
|
"grad_norm": 0.06451554485895601,
|
|
"learning_rate": 1.1174303640063622e-06,
|
|
"loss": 1.0412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26922640204429626,
|
|
"step": 1063,
|
|
"valid_targets_mean": 16087.5,
|
|
"valid_targets_min": 14977
|
|
},
|
|
{
|
|
"epoch": 4.528784648187633,
|
|
"grad_norm": 0.06625664038557702,
|
|
"learning_rate": 1.097922525237849e-06,
|
|
"loss": 1.0001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27882564067840576,
|
|
"step": 1064,
|
|
"valid_targets_mean": 16206.2,
|
|
"valid_targets_min": 15583
|
|
},
|
|
{
|
|
"epoch": 4.533049040511727,
|
|
"grad_norm": 0.06469299252688529,
|
|
"learning_rate": 1.078581664367455e-06,
|
|
"loss": 1.0248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19300132989883423,
|
|
"step": 1065,
|
|
"valid_targets_mean": 10719.3,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 4.537313432835821,
|
|
"grad_norm": 0.06232454751348464,
|
|
"learning_rate": 1.0594079522492274e-06,
|
|
"loss": 0.9697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24433079361915588,
|
|
"step": 1066,
|
|
"valid_targets_mean": 16146.9,
|
|
"valid_targets_min": 14997
|
|
},
|
|
{
|
|
"epoch": 4.541577825159915,
|
|
"grad_norm": 0.07202635183742086,
|
|
"learning_rate": 1.040401558260633e-06,
|
|
"loss": 1.0932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30619311332702637,
|
|
"step": 1067,
|
|
"valid_targets_mean": 16092.0,
|
|
"valid_targets_min": 15255
|
|
},
|
|
{
|
|
"epoch": 4.545842217484008,
|
|
"grad_norm": 0.06767269822427965,
|
|
"learning_rate": 1.0215626503010911e-06,
|
|
"loss": 1.0512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23298615217208862,
|
|
"step": 1068,
|
|
"valid_targets_mean": 13324.3,
|
|
"valid_targets_min": 10965
|
|
},
|
|
{
|
|
"epoch": 4.550106609808102,
|
|
"grad_norm": 0.06670794792621995,
|
|
"learning_rate": 1.002891394790475e-06,
|
|
"loss": 1.0177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2670523226261139,
|
|
"step": 1069,
|
|
"valid_targets_mean": 16089.6,
|
|
"valid_targets_min": 15011
|
|
},
|
|
{
|
|
"epoch": 4.554371002132196,
|
|
"grad_norm": 0.06583351341175893,
|
|
"learning_rate": 9.843879566676273e-07,
|
|
"loss": 1.0495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2825944423675537,
|
|
"step": 1070,
|
|
"valid_targets_mean": 16111.3,
|
|
"valid_targets_min": 14636
|
|
},
|
|
{
|
|
"epoch": 4.55863539445629,
|
|
"grad_norm": 0.06432602873777708,
|
|
"learning_rate": 9.660524993889386e-07,
|
|
"loss": 1.0375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2546829581260681,
|
|
"step": 1071,
|
|
"valid_targets_mean": 15280.9,
|
|
"valid_targets_min": 12984
|
|
},
|
|
{
|
|
"epoch": 4.562899786780384,
|
|
"grad_norm": 0.06612525121096763,
|
|
"learning_rate": 9.478851849268733e-07,
|
|
"loss": 1.0118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2707669138908386,
|
|
"step": 1072,
|
|
"valid_targets_mean": 16183.2,
|
|
"valid_targets_min": 15291
|
|
},
|
|
{
|
|
"epoch": 4.567164179104478,
|
|
"grad_norm": 0.06658209204743323,
|
|
"learning_rate": 9.298861737685527e-07,
|
|
"loss": 1.03,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23429641127586365,
|
|
"step": 1073,
|
|
"valid_targets_mean": 13335.6,
|
|
"valid_targets_min": 2051
|
|
},
|
|
{
|
|
"epoch": 4.571428571428571,
|
|
"grad_norm": 0.06382496000612249,
|
|
"learning_rate": 9.120556249143341e-07,
|
|
"loss": 1.0345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27515697479248047,
|
|
"step": 1074,
|
|
"valid_targets_mean": 15894.7,
|
|
"valid_targets_min": 14647
|
|
},
|
|
{
|
|
"epoch": 4.575692963752665,
|
|
"grad_norm": 0.06561260017611426,
|
|
"learning_rate": 8.943936958763988e-07,
|
|
"loss": 1.0628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3208537697792053,
|
|
"step": 1075,
|
|
"valid_targets_mean": 16156.1,
|
|
"valid_targets_min": 15475
|
|
},
|
|
{
|
|
"epoch": 4.5799573560767595,
|
|
"grad_norm": 0.07106352914740932,
|
|
"learning_rate": 8.769005426773836e-07,
|
|
"loss": 1.058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16240765154361725,
|
|
"step": 1076,
|
|
"valid_targets_mean": 8552.3,
|
|
"valid_targets_min": 1851
|
|
},
|
|
{
|
|
"epoch": 4.584221748400853,
|
|
"grad_norm": 0.06001533264433857,
|
|
"learning_rate": 8.595763198489714e-07,
|
|
"loss": 1.0309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26004844903945923,
|
|
"step": 1077,
|
|
"valid_targets_mean": 16121.2,
|
|
"valid_targets_min": 14794
|
|
},
|
|
{
|
|
"epoch": 4.588486140724947,
|
|
"grad_norm": 0.07223354108772592,
|
|
"learning_rate": 8.42421180430546e-07,
|
|
"loss": 1.032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3050524890422821,
|
|
"step": 1078,
|
|
"valid_targets_mean": 16070.5,
|
|
"valid_targets_min": 14279
|
|
},
|
|
{
|
|
"epoch": 4.59275053304904,
|
|
"grad_norm": 0.0645080708327429,
|
|
"learning_rate": 8.254352759678386e-07,
|
|
"loss": 1.0357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21567600965499878,
|
|
"step": 1079,
|
|
"valid_targets_mean": 11904.4,
|
|
"valid_targets_min": 7771
|
|
},
|
|
{
|
|
"epoch": 4.597014925373134,
|
|
"grad_norm": 0.07375564809773025,
|
|
"learning_rate": 8.086187565115877e-07,
|
|
"loss": 1.0825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2741999924182892,
|
|
"step": 1080,
|
|
"valid_targets_mean": 16097.8,
|
|
"valid_targets_min": 13809
|
|
},
|
|
{
|
|
"epoch": 4.601279317697228,
|
|
"grad_norm": 0.0693912442546476,
|
|
"learning_rate": 7.919717706162067e-07,
|
|
"loss": 1.0504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2897847294807434,
|
|
"step": 1081,
|
|
"valid_targets_mean": 16137.2,
|
|
"valid_targets_min": 15241
|
|
},
|
|
{
|
|
"epoch": 4.605543710021322,
|
|
"grad_norm": 0.06540483077964447,
|
|
"learning_rate": 7.754944653384777e-07,
|
|
"loss": 1.0184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2378586381673813,
|
|
"step": 1082,
|
|
"valid_targets_mean": 14494.3,
|
|
"valid_targets_min": 11152
|
|
},
|
|
{
|
|
"epoch": 4.609808102345416,
|
|
"grad_norm": 0.06936332651935755,
|
|
"learning_rate": 7.591869862362534e-07,
|
|
"loss": 1.0434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28583645820617676,
|
|
"step": 1083,
|
|
"valid_targets_mean": 15942.8,
|
|
"valid_targets_min": 12675
|
|
},
|
|
{
|
|
"epoch": 4.61407249466951,
|
|
"grad_norm": 0.0708085825812139,
|
|
"learning_rate": 7.430494773671682e-07,
|
|
"loss": 0.9905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29754379391670227,
|
|
"step": 1084,
|
|
"valid_targets_mean": 16195.6,
|
|
"valid_targets_min": 15551
|
|
},
|
|
{
|
|
"epoch": 4.618336886993603,
|
|
"grad_norm": 0.061823977015555275,
|
|
"learning_rate": 7.270820812873714e-07,
|
|
"loss": 1.0368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26821067929267883,
|
|
"step": 1085,
|
|
"valid_targets_mean": 15413.0,
|
|
"valid_targets_min": 13393
|
|
},
|
|
{
|
|
"epoch": 4.622601279317697,
|
|
"grad_norm": 0.06935712846473616,
|
|
"learning_rate": 7.112849390502563e-07,
|
|
"loss": 1.0137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2697770595550537,
|
|
"step": 1086,
|
|
"valid_targets_mean": 16176.0,
|
|
"valid_targets_min": 15138
|
|
},
|
|
{
|
|
"epoch": 4.6268656716417915,
|
|
"grad_norm": 0.06868637845504715,
|
|
"learning_rate": 6.956581902052306e-07,
|
|
"loss": 1.0565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21737951040267944,
|
|
"step": 1087,
|
|
"valid_targets_mean": 12028.6,
|
|
"valid_targets_min": 2076
|
|
},
|
|
{
|
|
"epoch": 4.631130063965885,
|
|
"grad_norm": 0.06093208631154221,
|
|
"learning_rate": 6.802019727964593e-07,
|
|
"loss": 1.0078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23558054864406586,
|
|
"step": 1088,
|
|
"valid_targets_mean": 16225.4,
|
|
"valid_targets_min": 15394
|
|
},
|
|
{
|
|
"epoch": 4.635394456289979,
|
|
"grad_norm": 0.07230940748636551,
|
|
"learning_rate": 6.64916423361679e-07,
|
|
"loss": 1.0411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2762644588947296,
|
|
"step": 1089,
|
|
"valid_targets_mean": 16148.3,
|
|
"valid_targets_min": 15125
|
|
},
|
|
{
|
|
"epoch": 4.639658848614072,
|
|
"grad_norm": 0.0673186601332428,
|
|
"learning_rate": 6.498016769309567e-07,
|
|
"loss": 1.0512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16368666291236877,
|
|
"step": 1090,
|
|
"valid_targets_mean": 8629.6,
|
|
"valid_targets_min": 2583
|
|
},
|
|
{
|
|
"epoch": 4.643923240938166,
|
|
"grad_norm": 0.0618856437454137,
|
|
"learning_rate": 6.348578670255224e-07,
|
|
"loss": 1.0343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25088608264923096,
|
|
"step": 1091,
|
|
"valid_targets_mean": 16217.7,
|
|
"valid_targets_min": 15486
|
|
},
|
|
{
|
|
"epoch": 4.6481876332622605,
|
|
"grad_norm": 0.06743807196455773,
|
|
"learning_rate": 6.200851256565799e-07,
|
|
"loss": 1.0272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28733301162719727,
|
|
"step": 1092,
|
|
"valid_targets_mean": 15882.4,
|
|
"valid_targets_min": 6625
|
|
},
|
|
{
|
|
"epoch": 4.652452025586354,
|
|
"grad_norm": 0.06672077692966995,
|
|
"learning_rate": 6.054835833241357e-07,
|
|
"loss": 0.9989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21731820702552795,
|
|
"step": 1093,
|
|
"valid_targets_mean": 14358.2,
|
|
"valid_targets_min": 11462
|
|
},
|
|
{
|
|
"epoch": 4.656716417910448,
|
|
"grad_norm": 0.064391973867486,
|
|
"learning_rate": 5.910533690158593e-07,
|
|
"loss": 1.0277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2513645887374878,
|
|
"step": 1094,
|
|
"valid_targets_mean": 16259.5,
|
|
"valid_targets_min": 15726
|
|
},
|
|
{
|
|
"epoch": 4.660980810234541,
|
|
"grad_norm": 0.06768561216177127,
|
|
"learning_rate": 5.767946102059307e-07,
|
|
"loss": 1.0467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27974191308021545,
|
|
"step": 1095,
|
|
"valid_targets_mean": 16177.2,
|
|
"valid_targets_min": 15601
|
|
},
|
|
{
|
|
"epoch": 4.665245202558635,
|
|
"grad_norm": 0.061099747918315565,
|
|
"learning_rate": 5.627074328539173e-07,
|
|
"loss": 1.0337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23972144722938538,
|
|
"step": 1096,
|
|
"valid_targets_mean": 14613.4,
|
|
"valid_targets_min": 12560
|
|
},
|
|
{
|
|
"epoch": 4.669509594882729,
|
|
"grad_norm": 0.0675116861112385,
|
|
"learning_rate": 5.487919614036741e-07,
|
|
"loss": 1.0632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29405105113983154,
|
|
"step": 1097,
|
|
"valid_targets_mean": 16116.4,
|
|
"valid_targets_min": 14651
|
|
},
|
|
{
|
|
"epoch": 4.673773987206823,
|
|
"grad_norm": 0.06479318068695483,
|
|
"learning_rate": 5.350483187822231e-07,
|
|
"loss": 1.0597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25372886657714844,
|
|
"step": 1098,
|
|
"valid_targets_mean": 12747.4,
|
|
"valid_targets_min": 1432
|
|
},
|
|
{
|
|
"epoch": 4.678038379530917,
|
|
"grad_norm": 0.06544295091478003,
|
|
"learning_rate": 5.214766263986848e-07,
|
|
"loss": 1.0645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26814526319503784,
|
|
"step": 1099,
|
|
"valid_targets_mean": 16155.9,
|
|
"valid_targets_min": 15425
|
|
},
|
|
{
|
|
"epoch": 4.682302771855011,
|
|
"grad_norm": 0.06658040199927837,
|
|
"learning_rate": 5.080770041431926e-07,
|
|
"loss": 1.0268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28944432735443115,
|
|
"step": 1100,
|
|
"valid_targets_mean": 16096.7,
|
|
"valid_targets_min": 15137
|
|
},
|
|
{
|
|
"epoch": 4.686567164179104,
|
|
"grad_norm": 0.06589457302733992,
|
|
"learning_rate": 4.948495703858492e-07,
|
|
"loss": 0.9948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1565166562795639,
|
|
"step": 1101,
|
|
"valid_targets_mean": 9297.7,
|
|
"valid_targets_min": 1510
|
|
},
|
|
{
|
|
"epoch": 4.690831556503198,
|
|
"grad_norm": 0.06500791806447159,
|
|
"learning_rate": 4.81794441975667e-07,
|
|
"loss": 1.0293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25875699520111084,
|
|
"step": 1102,
|
|
"valid_targets_mean": 16155.0,
|
|
"valid_targets_min": 15447
|
|
},
|
|
{
|
|
"epoch": 4.6950959488272925,
|
|
"grad_norm": 0.06828578037907539,
|
|
"learning_rate": 4.689117342395388e-07,
|
|
"loss": 1.0441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28119850158691406,
|
|
"step": 1103,
|
|
"valid_targets_mean": 16144.2,
|
|
"valid_targets_min": 15104
|
|
},
|
|
{
|
|
"epoch": 4.699360341151386,
|
|
"grad_norm": 0.06412214894242424,
|
|
"learning_rate": 4.5620156098122204e-07,
|
|
"loss": 1.0325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20923319458961487,
|
|
"step": 1104,
|
|
"valid_targets_mean": 11503.0,
|
|
"valid_targets_min": 5870
|
|
},
|
|
{
|
|
"epoch": 4.70362473347548,
|
|
"grad_norm": 0.06885165201427158,
|
|
"learning_rate": 4.4366403448033334e-07,
|
|
"loss": 1.0553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2798612713813782,
|
|
"step": 1105,
|
|
"valid_targets_mean": 16125.3,
|
|
"valid_targets_min": 14943
|
|
},
|
|
{
|
|
"epoch": 4.707889125799573,
|
|
"grad_norm": 0.066816495583206,
|
|
"learning_rate": 4.3129926549136057e-07,
|
|
"loss": 1.0674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28703272342681885,
|
|
"step": 1106,
|
|
"valid_targets_mean": 16136.9,
|
|
"valid_targets_min": 15517
|
|
},
|
|
{
|
|
"epoch": 4.712153518123667,
|
|
"grad_norm": 0.059758956642163075,
|
|
"learning_rate": 4.191073632426701e-07,
|
|
"loss": 1.008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23712573945522308,
|
|
"step": 1107,
|
|
"valid_targets_mean": 14461.9,
|
|
"valid_targets_min": 11314
|
|
},
|
|
{
|
|
"epoch": 4.7164179104477615,
|
|
"grad_norm": 0.06457469437650706,
|
|
"learning_rate": 4.0708843543555643e-07,
|
|
"loss": 1.0273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2816098630428314,
|
|
"step": 1108,
|
|
"valid_targets_mean": 15913.7,
|
|
"valid_targets_min": 9862
|
|
},
|
|
{
|
|
"epoch": 4.720682302771855,
|
|
"grad_norm": 0.06730000250489519,
|
|
"learning_rate": 3.95242588243292e-07,
|
|
"loss": 1.0241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28961244225502014,
|
|
"step": 1109,
|
|
"valid_targets_mean": 16107.2,
|
|
"valid_targets_min": 15178
|
|
},
|
|
{
|
|
"epoch": 4.724946695095949,
|
|
"grad_norm": 0.06466183888314715,
|
|
"learning_rate": 3.8356992631017e-07,
|
|
"loss": 1.0552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24020272493362427,
|
|
"step": 1110,
|
|
"valid_targets_mean": 15695.3,
|
|
"valid_targets_min": 12126
|
|
},
|
|
{
|
|
"epoch": 4.729211087420042,
|
|
"grad_norm": 0.06973501051257218,
|
|
"learning_rate": 3.720705527506008e-07,
|
|
"loss": 1.042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31237542629241943,
|
|
"step": 1111,
|
|
"valid_targets_mean": 15998.1,
|
|
"valid_targets_min": 14474
|
|
},
|
|
{
|
|
"epoch": 4.733475479744136,
|
|
"grad_norm": 0.07127486114248104,
|
|
"learning_rate": 3.60744569148197e-07,
|
|
"loss": 1.1054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21069574356079102,
|
|
"step": 1112,
|
|
"valid_targets_mean": 11659.0,
|
|
"valid_targets_min": 2405
|
|
},
|
|
{
|
|
"epoch": 4.73773987206823,
|
|
"grad_norm": 0.05952959240368819,
|
|
"learning_rate": 3.4959207555485873e-07,
|
|
"loss": 1.0614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2783857583999634,
|
|
"step": 1113,
|
|
"valid_targets_mean": 15965.8,
|
|
"valid_targets_min": 13693
|
|
},
|
|
{
|
|
"epoch": 4.742004264392325,
|
|
"grad_norm": 0.07151836138537747,
|
|
"learning_rate": 3.3861317048992317e-07,
|
|
"loss": 1.078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30463409423828125,
|
|
"step": 1114,
|
|
"valid_targets_mean": 16166.5,
|
|
"valid_targets_min": 15134
|
|
},
|
|
{
|
|
"epoch": 4.746268656716418,
|
|
"grad_norm": 0.06166926226723204,
|
|
"learning_rate": 3.278079509392562e-07,
|
|
"loss": 0.9898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19415485858917236,
|
|
"step": 1115,
|
|
"valid_targets_mean": 10432.7,
|
|
"valid_targets_min": 3393
|
|
},
|
|
{
|
|
"epoch": 4.750533049040512,
|
|
"grad_norm": 0.061835003743109226,
|
|
"learning_rate": 3.171765123544224e-07,
|
|
"loss": 1.038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25987979769706726,
|
|
"step": 1116,
|
|
"valid_targets_mean": 16038.8,
|
|
"valid_targets_min": 12684
|
|
},
|
|
{
|
|
"epoch": 4.754797441364605,
|
|
"grad_norm": 0.06815063847450221,
|
|
"learning_rate": 3.06718948651834e-07,
|
|
"loss": 1.0366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28946614265441895,
|
|
"step": 1117,
|
|
"valid_targets_mean": 16163.2,
|
|
"valid_targets_min": 15104
|
|
},
|
|
{
|
|
"epoch": 4.759061833688699,
|
|
"grad_norm": 0.06010820977051542,
|
|
"learning_rate": 2.964353522119168e-07,
|
|
"loss": 1.0305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2345275580883026,
|
|
"step": 1118,
|
|
"valid_targets_mean": 14487.7,
|
|
"valid_targets_min": 12184
|
|
},
|
|
{
|
|
"epoch": 4.7633262260127935,
|
|
"grad_norm": 0.06826659172974868,
|
|
"learning_rate": 2.863258138783032e-07,
|
|
"loss": 1.0618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29986581206321716,
|
|
"step": 1119,
|
|
"valid_targets_mean": 16080.4,
|
|
"valid_targets_min": 15446
|
|
},
|
|
{
|
|
"epoch": 4.767590618336887,
|
|
"grad_norm": 0.06370521954598973,
|
|
"learning_rate": 2.7639042295702245e-07,
|
|
"loss": 1.0247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2782517075538635,
|
|
"step": 1120,
|
|
"valid_targets_mean": 16153.0,
|
|
"valid_targets_min": 15671
|
|
},
|
|
{
|
|
"epoch": 4.771855010660981,
|
|
"grad_norm": 0.06710798780901804,
|
|
"learning_rate": 2.666292672157056e-07,
|
|
"loss": 1.0175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24605801701545715,
|
|
"step": 1121,
|
|
"valid_targets_mean": 15804.6,
|
|
"valid_targets_min": 12138
|
|
},
|
|
{
|
|
"epoch": 4.776119402985074,
|
|
"grad_norm": 0.06705152689172797,
|
|
"learning_rate": 2.570424328828325e-07,
|
|
"loss": 1.0438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2704864740371704,
|
|
"step": 1122,
|
|
"valid_targets_mean": 16170.6,
|
|
"valid_targets_min": 15587
|
|
},
|
|
{
|
|
"epoch": 4.780383795309168,
|
|
"grad_norm": 0.06546688280201717,
|
|
"learning_rate": 2.4763000464694377e-07,
|
|
"loss": 1.0373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23335450887680054,
|
|
"step": 1123,
|
|
"valid_targets_mean": 12874.6,
|
|
"valid_targets_min": 1520
|
|
},
|
|
{
|
|
"epoch": 4.7846481876332625,
|
|
"grad_norm": 0.06312737568797826,
|
|
"learning_rate": 2.383920656559102e-07,
|
|
"loss": 1.0428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2678661048412323,
|
|
"step": 1124,
|
|
"valid_targets_mean": 16095.7,
|
|
"valid_targets_min": 15077
|
|
},
|
|
{
|
|
"epoch": 4.788912579957356,
|
|
"grad_norm": 0.06459132066944305,
|
|
"learning_rate": 2.2932869751619568e-07,
|
|
"loss": 1.0367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29518774151802063,
|
|
"step": 1125,
|
|
"valid_targets_mean": 16067.9,
|
|
"valid_targets_min": 14705
|
|
},
|
|
{
|
|
"epoch": 4.79317697228145,
|
|
"grad_norm": 0.06540542687389801,
|
|
"learning_rate": 2.2043998029212643e-07,
|
|
"loss": 1.0623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2064974308013916,
|
|
"step": 1126,
|
|
"valid_targets_mean": 10802.1,
|
|
"valid_targets_min": 1871
|
|
},
|
|
{
|
|
"epoch": 4.797441364605544,
|
|
"grad_norm": 0.0682386709501433,
|
|
"learning_rate": 2.1172599250519398e-07,
|
|
"loss": 1.0318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2581126391887665,
|
|
"step": 1127,
|
|
"valid_targets_mean": 16153.9,
|
|
"valid_targets_min": 15640
|
|
},
|
|
{
|
|
"epoch": 4.801705756929637,
|
|
"grad_norm": 0.06977903744379234,
|
|
"learning_rate": 2.0318681113336013e-07,
|
|
"loss": 1.0549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29566651582717896,
|
|
"step": 1128,
|
|
"valid_targets_mean": 16128.9,
|
|
"valid_targets_min": 15125
|
|
},
|
|
{
|
|
"epoch": 4.8059701492537314,
|
|
"grad_norm": 0.06392056420417282,
|
|
"learning_rate": 1.9482251161037302e-07,
|
|
"loss": 1.0285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.198150634765625,
|
|
"step": 1129,
|
|
"valid_targets_mean": 10550.7,
|
|
"valid_targets_min": 6061
|
|
},
|
|
{
|
|
"epoch": 4.810234541577826,
|
|
"grad_norm": 0.07155980727379735,
|
|
"learning_rate": 1.866331678251032e-07,
|
|
"loss": 1.0539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28694137930870056,
|
|
"step": 1130,
|
|
"valid_targets_mean": 16094.8,
|
|
"valid_targets_min": 15093
|
|
},
|
|
{
|
|
"epoch": 4.814498933901919,
|
|
"grad_norm": 0.06509303964966795,
|
|
"learning_rate": 1.7861885212088869e-07,
|
|
"loss": 1.0375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28865548968315125,
|
|
"step": 1131,
|
|
"valid_targets_mean": 15910.7,
|
|
"valid_targets_min": 13269
|
|
},
|
|
{
|
|
"epoch": 4.818763326226013,
|
|
"grad_norm": 0.06573651779981234,
|
|
"learning_rate": 1.7077963529490204e-07,
|
|
"loss": 1.018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22500956058502197,
|
|
"step": 1132,
|
|
"valid_targets_mean": 14480.6,
|
|
"valid_targets_min": 11724
|
|
},
|
|
{
|
|
"epoch": 4.823027718550106,
|
|
"grad_norm": 0.06571728566957954,
|
|
"learning_rate": 1.6311558659751535e-07,
|
|
"loss": 1.0332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2683248519897461,
|
|
"step": 1133,
|
|
"valid_targets_mean": 16175.8,
|
|
"valid_targets_min": 15466
|
|
},
|
|
{
|
|
"epoch": 4.8272921108742,
|
|
"grad_norm": 0.06764612682397515,
|
|
"learning_rate": 1.5562677373169855e-07,
|
|
"loss": 1.0749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31197530031204224,
|
|
"step": 1134,
|
|
"valid_targets_mean": 15839.0,
|
|
"valid_targets_min": 10501
|
|
},
|
|
{
|
|
"epoch": 4.8315565031982945,
|
|
"grad_norm": 0.06264356585172555,
|
|
"learning_rate": 1.483132628524131e-07,
|
|
"loss": 1.0599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24763375520706177,
|
|
"step": 1135,
|
|
"valid_targets_mean": 15570.5,
|
|
"valid_targets_min": 14161
|
|
},
|
|
{
|
|
"epoch": 4.835820895522388,
|
|
"grad_norm": 0.06585407930969372,
|
|
"learning_rate": 1.4117511856603262e-07,
|
|
"loss": 1.0786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30529558658599854,
|
|
"step": 1136,
|
|
"valid_targets_mean": 16078.8,
|
|
"valid_targets_min": 15212
|
|
},
|
|
{
|
|
"epoch": 4.840085287846482,
|
|
"grad_norm": 0.0684457554267718,
|
|
"learning_rate": 1.342124039297721e-07,
|
|
"loss": 1.0697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21791598200798035,
|
|
"step": 1137,
|
|
"valid_targets_mean": 10824.7,
|
|
"valid_targets_min": 992
|
|
},
|
|
{
|
|
"epoch": 4.844349680170575,
|
|
"grad_norm": 0.06386376612460452,
|
|
"learning_rate": 1.2742518045112396e-07,
|
|
"loss": 1.0516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28313618898391724,
|
|
"step": 1138,
|
|
"valid_targets_mean": 15987.8,
|
|
"valid_targets_min": 14605
|
|
},
|
|
{
|
|
"epoch": 4.848614072494669,
|
|
"grad_norm": 0.07048432807147949,
|
|
"learning_rate": 1.2081350808732518e-07,
|
|
"loss": 1.0662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3174133896827698,
|
|
"step": 1139,
|
|
"valid_targets_mean": 16126.2,
|
|
"valid_targets_min": 15266
|
|
},
|
|
{
|
|
"epoch": 4.8528784648187635,
|
|
"grad_norm": 0.06694823135008761,
|
|
"learning_rate": 1.143774452448243e-07,
|
|
"loss": 1.0592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18154577910900116,
|
|
"step": 1140,
|
|
"valid_targets_mean": 10349.8,
|
|
"valid_targets_min": 5169
|
|
},
|
|
{
|
|
"epoch": 4.857142857142857,
|
|
"grad_norm": 0.0638872396787473,
|
|
"learning_rate": 1.0811704877875528e-07,
|
|
"loss": 1.0124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2517280578613281,
|
|
"step": 1141,
|
|
"valid_targets_mean": 16188.1,
|
|
"valid_targets_min": 15112
|
|
},
|
|
{
|
|
"epoch": 4.861407249466951,
|
|
"grad_norm": 0.06753051419783793,
|
|
"learning_rate": 1.0203237399245336e-07,
|
|
"loss": 1.0487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29785820841789246,
|
|
"step": 1142,
|
|
"valid_targets_mean": 16082.3,
|
|
"valid_targets_min": 15198
|
|
},
|
|
{
|
|
"epoch": 4.865671641791045,
|
|
"grad_norm": 0.06697734646551638,
|
|
"learning_rate": 9.612347463694882e-08,
|
|
"loss": 1.0574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22490859031677246,
|
|
"step": 1143,
|
|
"valid_targets_mean": 13146.6,
|
|
"valid_targets_min": 9388
|
|
},
|
|
{
|
|
"epoch": 4.869936034115138,
|
|
"grad_norm": 0.06301664964205182,
|
|
"learning_rate": 9.039040291050738e-08,
|
|
"loss": 1.0171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27475059032440186,
|
|
"step": 1144,
|
|
"valid_targets_mean": 16223.3,
|
|
"valid_targets_min": 15738
|
|
},
|
|
{
|
|
"epoch": 4.8742004264392325,
|
|
"grad_norm": 0.06887119372901562,
|
|
"learning_rate": 8.483320945815499e-08,
|
|
"loss": 1.0254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2776992917060852,
|
|
"step": 1145,
|
|
"valid_targets_mean": 16212.5,
|
|
"valid_targets_min": 15674
|
|
},
|
|
{
|
|
"epoch": 4.878464818763327,
|
|
"grad_norm": 0.06326621766795197,
|
|
"learning_rate": 7.945194337124262e-08,
|
|
"loss": 1.0479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25974565744400024,
|
|
"step": 1146,
|
|
"valid_targets_mean": 15235.2,
|
|
"valid_targets_min": 13189
|
|
},
|
|
{
|
|
"epoch": 4.88272921108742,
|
|
"grad_norm": 0.06637882034086907,
|
|
"learning_rate": 7.424665218700444e-08,
|
|
"loss": 1.0551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2915082275867462,
|
|
"step": 1147,
|
|
"valid_targets_mean": 16142.1,
|
|
"valid_targets_min": 15261
|
|
},
|
|
{
|
|
"epoch": 4.886993603411514,
|
|
"grad_norm": 0.06300558912272562,
|
|
"learning_rate": 6.921738188814254e-08,
|
|
"loss": 1.0102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23722125589847565,
|
|
"step": 1148,
|
|
"valid_targets_mean": 13079.8,
|
|
"valid_targets_min": 2456
|
|
},
|
|
{
|
|
"epoch": 4.891257995735607,
|
|
"grad_norm": 0.06047710824370665,
|
|
"learning_rate": 6.436417690241614e-08,
|
|
"loss": 1.0064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24455218017101288,
|
|
"step": 1149,
|
|
"valid_targets_mean": 15945.2,
|
|
"valid_targets_min": 14525
|
|
},
|
|
{
|
|
"epoch": 4.895522388059701,
|
|
"grad_norm": 0.06713044476954044,
|
|
"learning_rate": 5.968708010225532e-08,
|
|
"loss": 1.0006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2649267017841339,
|
|
"step": 1150,
|
|
"valid_targets_mean": 16236.9,
|
|
"valid_targets_min": 15682
|
|
},
|
|
{
|
|
"epoch": 4.899786780383796,
|
|
"grad_norm": 0.06467284414976224,
|
|
"learning_rate": 5.518613280437901e-08,
|
|
"loss": 1.0586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1860632300376892,
|
|
"step": 1151,
|
|
"valid_targets_mean": 9696.4,
|
|
"valid_targets_min": 918
|
|
},
|
|
{
|
|
"epoch": 4.904051172707889,
|
|
"grad_norm": 0.06903710424675084,
|
|
"learning_rate": 5.0861374769426433e-08,
|
|
"loss": 1.0462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27779239416122437,
|
|
"step": 1152,
|
|
"valid_targets_mean": 16040.4,
|
|
"valid_targets_min": 14861
|
|
},
|
|
{
|
|
"epoch": 4.908315565031983,
|
|
"grad_norm": 0.06955086449587287,
|
|
"learning_rate": 4.671284420161071e-08,
|
|
"loss": 1.0756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3218618631362915,
|
|
"step": 1153,
|
|
"valid_targets_mean": 15914.3,
|
|
"valid_targets_min": 14192
|
|
},
|
|
{
|
|
"epoch": 4.912579957356077,
|
|
"grad_norm": 0.0643495655815214,
|
|
"learning_rate": 4.274057774838136e-08,
|
|
"loss": 1.0555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19928057491779327,
|
|
"step": 1154,
|
|
"valid_targets_mean": 11584.6,
|
|
"valid_targets_min": 7857
|
|
},
|
|
{
|
|
"epoch": 4.91684434968017,
|
|
"grad_norm": 0.06491243352644827,
|
|
"learning_rate": 3.894461050010012e-08,
|
|
"loss": 1.029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27964040637016296,
|
|
"step": 1155,
|
|
"valid_targets_mean": 16055.9,
|
|
"valid_targets_min": 15188
|
|
},
|
|
{
|
|
"epoch": 4.9211087420042645,
|
|
"grad_norm": 0.06616457941197208,
|
|
"learning_rate": 3.5324975989725615e-08,
|
|
"loss": 1.0196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2951641380786896,
|
|
"step": 1156,
|
|
"valid_targets_mean": 16127.8,
|
|
"valid_targets_min": 15522
|
|
},
|
|
{
|
|
"epoch": 4.925373134328359,
|
|
"grad_norm": 0.06341880275101203,
|
|
"learning_rate": 3.188170619252473e-08,
|
|
"loss": 0.997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2247222363948822,
|
|
"step": 1157,
|
|
"valid_targets_mean": 13761.2,
|
|
"valid_targets_min": 10952
|
|
},
|
|
{
|
|
"epoch": 4.929637526652452,
|
|
"grad_norm": 0.06642180700640127,
|
|
"learning_rate": 2.8614831525786147e-08,
|
|
"loss": 1.0365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27605557441711426,
|
|
"step": 1158,
|
|
"valid_targets_mean": 16088.0,
|
|
"valid_targets_min": 13817
|
|
},
|
|
{
|
|
"epoch": 4.933901918976546,
|
|
"grad_norm": 0.06568394875762185,
|
|
"learning_rate": 2.552438084855613e-08,
|
|
"loss": 1.0445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2995665669441223,
|
|
"step": 1159,
|
|
"valid_targets_mean": 16092.3,
|
|
"valid_targets_min": 14286
|
|
},
|
|
{
|
|
"epoch": 4.938166311300639,
|
|
"grad_norm": 0.06354156579012393,
|
|
"learning_rate": 2.2610381461372068e-08,
|
|
"loss": 1.0547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2744227647781372,
|
|
"step": 1160,
|
|
"valid_targets_mean": 15969.0,
|
|
"valid_targets_min": 13656
|
|
},
|
|
{
|
|
"epoch": 4.9424307036247335,
|
|
"grad_norm": 0.06503160501595615,
|
|
"learning_rate": 1.987285910603598e-08,
|
|
"loss": 1.0461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3043082356452942,
|
|
"step": 1161,
|
|
"valid_targets_mean": 16148.0,
|
|
"valid_targets_min": 15203
|
|
},
|
|
{
|
|
"epoch": 4.946695095948828,
|
|
"grad_norm": 0.06643305572194895,
|
|
"learning_rate": 1.7311837965379164e-08,
|
|
"loss": 1.0267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20282933115959167,
|
|
"step": 1162,
|
|
"valid_targets_mean": 10971.9,
|
|
"valid_targets_min": 1474
|
|
},
|
|
{
|
|
"epoch": 4.950959488272921,
|
|
"grad_norm": 0.06549657312606202,
|
|
"learning_rate": 1.4927340663046798e-08,
|
|
"loss": 1.0677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2822372317314148,
|
|
"step": 1163,
|
|
"valid_targets_mean": 16121.8,
|
|
"valid_targets_min": 15512
|
|
},
|
|
{
|
|
"epoch": 4.955223880597015,
|
|
"grad_norm": 0.06772049546045192,
|
|
"learning_rate": 1.2719388263300325e-08,
|
|
"loss": 1.0651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2889268398284912,
|
|
"step": 1164,
|
|
"valid_targets_mean": 16087.9,
|
|
"valid_targets_min": 14737
|
|
},
|
|
{
|
|
"epoch": 4.959488272921108,
|
|
"grad_norm": 0.062464363719840606,
|
|
"learning_rate": 1.0688000270839827e-08,
|
|
"loss": 1.0132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19231122732162476,
|
|
"step": 1165,
|
|
"valid_targets_mean": 10646.6,
|
|
"valid_targets_min": 1715
|
|
},
|
|
{
|
|
"epoch": 4.963752665245202,
|
|
"grad_norm": 0.061897459275745643,
|
|
"learning_rate": 8.833194630615271e-09,
|
|
"loss": 1.0309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2584786117076874,
|
|
"step": 1166,
|
|
"valid_targets_mean": 16138.4,
|
|
"valid_targets_min": 14766
|
|
},
|
|
{
|
|
"epoch": 4.968017057569297,
|
|
"grad_norm": 0.06515569752759716,
|
|
"learning_rate": 7.154987727682194e-09,
|
|
"loss": 0.9859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.279852032661438,
|
|
"step": 1167,
|
|
"valid_targets_mean": 16175.0,
|
|
"valid_targets_min": 15420
|
|
},
|
|
{
|
|
"epoch": 4.97228144989339,
|
|
"grad_norm": 0.06497032812699527,
|
|
"learning_rate": 5.6533943870462625e-09,
|
|
"loss": 1.0353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22625817358493805,
|
|
"step": 1168,
|
|
"valid_targets_mean": 13536.1,
|
|
"valid_targets_min": 10473
|
|
},
|
|
{
|
|
"epoch": 4.976545842217484,
|
|
"grad_norm": 0.06405533512857071,
|
|
"learning_rate": 4.328427873541152e-09,
|
|
"loss": 1.0477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2674109637737274,
|
|
"step": 1169,
|
|
"valid_targets_mean": 16142.6,
|
|
"valid_targets_min": 15158
|
|
},
|
|
{
|
|
"epoch": 4.980810234541578,
|
|
"grad_norm": 0.06521980936870843,
|
|
"learning_rate": 3.1800998917086432e-09,
|
|
"loss": 1.0492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.286263644695282,
|
|
"step": 1170,
|
|
"valid_targets_mean": 16114.5,
|
|
"valid_targets_min": 15272
|
|
},
|
|
{
|
|
"epoch": 4.985074626865671,
|
|
"grad_norm": 0.06592660738353633,
|
|
"learning_rate": 2.2084205856920393e-09,
|
|
"loss": 1.0445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24204930663108826,
|
|
"step": 1171,
|
|
"valid_targets_mean": 15201.3,
|
|
"valid_targets_min": 13301
|
|
},
|
|
{
|
|
"epoch": 4.9893390191897655,
|
|
"grad_norm": 0.06498580207674572,
|
|
"learning_rate": 1.4133985391473482e-09,
|
|
"loss": 1.0526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2830965518951416,
|
|
"step": 1172,
|
|
"valid_targets_mean": 16157.4,
|
|
"valid_targets_min": 15523
|
|
},
|
|
{
|
|
"epoch": 4.99360341151386,
|
|
"grad_norm": 0.06658414566152455,
|
|
"learning_rate": 7.950407751722288e-10,
|
|
"loss": 1.0309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25384026765823364,
|
|
"step": 1173,
|
|
"valid_targets_mean": 13038.8,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 4.997867803837953,
|
|
"grad_norm": 0.06329365572428539,
|
|
"learning_rate": 3.5335275624159835e-10,
|
|
"loss": 1.0799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2702412009239197,
|
|
"step": 1174,
|
|
"valid_targets_mean": 16105.5,
|
|
"valid_targets_min": 14252
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.10024296834326855,
|
|
"learning_rate": 8.833838415212014e-11,
|
|
"loss": 1.0349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47408732771873474,
|
|
"step": 1175,
|
|
"valid_targets_mean": 11629.8,
|
|
"valid_targets_min": 2098
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47408732771873474,
|
|
"step": 1175,
|
|
"total_flos": 1367215063302144.0,
|
|
"train_loss": 1.0890607003455466,
|
|
"train_runtime": 4651.4769,
|
|
"train_samples_per_second": 32.248,
|
|
"train_steps_per_second": 0.253,
|
|
"valid_targets_mean": 11629.8,
|
|
"valid_targets_min": 2098
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 1175,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 5,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1367215063302144.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|