Model: open-sci/sft__ot30k_Qwen2.5-1.5B-SFT-Tulu3-decontaminated Source: Original Platform
12973 lines
362 KiB
JSON
12973 lines
362 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 5.0,
|
|
"eval_steps": 500,
|
|
"global_step": 1175,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0042643923240938165,
|
|
"grad_norm": 2.5366310886513554,
|
|
"learning_rate": 0.0,
|
|
"loss": 1.4664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3633503019809723,
|
|
"step": 1,
|
|
"valid_targets_mean": 16166.3,
|
|
"valid_targets_min": 15435
|
|
},
|
|
{
|
|
"epoch": 0.008528784648187633,
|
|
"grad_norm": 2.5801694932581136,
|
|
"learning_rate": 3.3898305084745766e-07,
|
|
"loss": 1.4775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43468669056892395,
|
|
"step": 2,
|
|
"valid_targets_mean": 16231.4,
|
|
"valid_targets_min": 15861
|
|
},
|
|
{
|
|
"epoch": 0.01279317697228145,
|
|
"grad_norm": 2.5436827836237237,
|
|
"learning_rate": 6.779661016949153e-07,
|
|
"loss": 1.498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32442036271095276,
|
|
"step": 3,
|
|
"valid_targets_mean": 12902.7,
|
|
"valid_targets_min": 9698
|
|
},
|
|
{
|
|
"epoch": 0.017057569296375266,
|
|
"grad_norm": 2.557611495365603,
|
|
"learning_rate": 1.016949152542373e-06,
|
|
"loss": 1.5634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38837745785713196,
|
|
"step": 4,
|
|
"valid_targets_mean": 16193.7,
|
|
"valid_targets_min": 15332
|
|
},
|
|
{
|
|
"epoch": 0.021321961620469083,
|
|
"grad_norm": 2.501076169530371,
|
|
"learning_rate": 1.3559322033898307e-06,
|
|
"loss": 1.4884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4217827320098877,
|
|
"step": 5,
|
|
"valid_targets_mean": 16192.6,
|
|
"valid_targets_min": 15561
|
|
},
|
|
{
|
|
"epoch": 0.0255863539445629,
|
|
"grad_norm": 2.5083867469645673,
|
|
"learning_rate": 1.6949152542372882e-06,
|
|
"loss": 1.4775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33818453550338745,
|
|
"step": 6,
|
|
"valid_targets_mean": 14216.8,
|
|
"valid_targets_min": 11918
|
|
},
|
|
{
|
|
"epoch": 0.029850746268656716,
|
|
"grad_norm": 2.408038285775719,
|
|
"learning_rate": 2.033898305084746e-06,
|
|
"loss": 1.4603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44682514667510986,
|
|
"step": 7,
|
|
"valid_targets_mean": 15977.0,
|
|
"valid_targets_min": 14845
|
|
},
|
|
{
|
|
"epoch": 0.03411513859275053,
|
|
"grad_norm": 2.3292454609481386,
|
|
"learning_rate": 2.372881355932204e-06,
|
|
"loss": 1.4851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3509920835494995,
|
|
"step": 8,
|
|
"valid_targets_mean": 12883.5,
|
|
"valid_targets_min": 1369
|
|
},
|
|
{
|
|
"epoch": 0.03837953091684435,
|
|
"grad_norm": 1.967700745194145,
|
|
"learning_rate": 2.7118644067796613e-06,
|
|
"loss": 1.4199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3503936231136322,
|
|
"step": 9,
|
|
"valid_targets_mean": 15800.5,
|
|
"valid_targets_min": 14349
|
|
},
|
|
{
|
|
"epoch": 0.042643923240938165,
|
|
"grad_norm": 1.8916462786230832,
|
|
"learning_rate": 3.0508474576271192e-06,
|
|
"loss": 1.479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43284642696380615,
|
|
"step": 10,
|
|
"valid_targets_mean": 16118.9,
|
|
"valid_targets_min": 15042
|
|
},
|
|
{
|
|
"epoch": 0.046908315565031986,
|
|
"grad_norm": 1.6595343637173545,
|
|
"learning_rate": 3.3898305084745763e-06,
|
|
"loss": 1.4307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2499767243862152,
|
|
"step": 11,
|
|
"valid_targets_mean": 9185.1,
|
|
"valid_targets_min": 1758
|
|
},
|
|
{
|
|
"epoch": 0.0511727078891258,
|
|
"grad_norm": 1.5151937956251524,
|
|
"learning_rate": 3.7288135593220342e-06,
|
|
"loss": 1.4265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3513546884059906,
|
|
"step": 12,
|
|
"valid_targets_mean": 16135.2,
|
|
"valid_targets_min": 14794
|
|
},
|
|
{
|
|
"epoch": 0.05543710021321962,
|
|
"grad_norm": 1.5292214435231881,
|
|
"learning_rate": 4.067796610169492e-06,
|
|
"loss": 1.4258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39262068271636963,
|
|
"step": 13,
|
|
"valid_targets_mean": 16090.6,
|
|
"valid_targets_min": 14826
|
|
},
|
|
{
|
|
"epoch": 0.05970149253731343,
|
|
"grad_norm": 1.393250104462496,
|
|
"learning_rate": 4.40677966101695e-06,
|
|
"loss": 1.4146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2585723102092743,
|
|
"step": 14,
|
|
"valid_targets_mean": 10057.8,
|
|
"valid_targets_min": 6101
|
|
},
|
|
{
|
|
"epoch": 0.06396588486140725,
|
|
"grad_norm": 1.1760003188047685,
|
|
"learning_rate": 4.745762711864408e-06,
|
|
"loss": 1.4096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38163837790489197,
|
|
"step": 15,
|
|
"valid_targets_mean": 16117.9,
|
|
"valid_targets_min": 14544
|
|
},
|
|
{
|
|
"epoch": 0.06823027718550106,
|
|
"grad_norm": 1.8086044060522266,
|
|
"learning_rate": 5.084745762711865e-06,
|
|
"loss": 1.4154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.413485050201416,
|
|
"step": 16,
|
|
"valid_targets_mean": 16074.6,
|
|
"valid_targets_min": 14755
|
|
},
|
|
{
|
|
"epoch": 0.07249466950959488,
|
|
"grad_norm": 1.7462651292016687,
|
|
"learning_rate": 5.423728813559323e-06,
|
|
"loss": 1.4223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3601645231246948,
|
|
"step": 17,
|
|
"valid_targets_mean": 14765.2,
|
|
"valid_targets_min": 12313
|
|
},
|
|
{
|
|
"epoch": 0.0767590618336887,
|
|
"grad_norm": 1.4910941336606407,
|
|
"learning_rate": 5.7627118644067805e-06,
|
|
"loss": 1.3985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3598688840866089,
|
|
"step": 18,
|
|
"valid_targets_mean": 16238.3,
|
|
"valid_targets_min": 15660
|
|
},
|
|
{
|
|
"epoch": 0.08102345415778252,
|
|
"grad_norm": 1.2152389918873905,
|
|
"learning_rate": 6.1016949152542385e-06,
|
|
"loss": 1.3569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3777955174446106,
|
|
"step": 19,
|
|
"valid_targets_mean": 16181.4,
|
|
"valid_targets_min": 15422
|
|
},
|
|
{
|
|
"epoch": 0.08528784648187633,
|
|
"grad_norm": 0.9439219563284131,
|
|
"learning_rate": 6.440677966101695e-06,
|
|
"loss": 1.4081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3570820689201355,
|
|
"step": 20,
|
|
"valid_targets_mean": 15211.0,
|
|
"valid_targets_min": 13345
|
|
},
|
|
{
|
|
"epoch": 0.08955223880597014,
|
|
"grad_norm": 0.8614841201368815,
|
|
"learning_rate": 6.779661016949153e-06,
|
|
"loss": 1.3596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39180970191955566,
|
|
"step": 21,
|
|
"valid_targets_mean": 16066.3,
|
|
"valid_targets_min": 13888
|
|
},
|
|
{
|
|
"epoch": 0.09381663113006397,
|
|
"grad_norm": 0.989712835987842,
|
|
"learning_rate": 7.1186440677966106e-06,
|
|
"loss": 1.3796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2866818308830261,
|
|
"step": 22,
|
|
"valid_targets_mean": 11589.2,
|
|
"valid_targets_min": 2526
|
|
},
|
|
{
|
|
"epoch": 0.09808102345415778,
|
|
"grad_norm": 0.8838701674340025,
|
|
"learning_rate": 7.4576271186440685e-06,
|
|
"loss": 1.3191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3461511731147766,
|
|
"step": 23,
|
|
"valid_targets_mean": 16071.5,
|
|
"valid_targets_min": 14705
|
|
},
|
|
{
|
|
"epoch": 0.1023454157782516,
|
|
"grad_norm": 0.7102511059049913,
|
|
"learning_rate": 7.796610169491526e-06,
|
|
"loss": 1.334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38423407077789307,
|
|
"step": 24,
|
|
"valid_targets_mean": 16015.3,
|
|
"valid_targets_min": 13108
|
|
},
|
|
{
|
|
"epoch": 0.10660980810234541,
|
|
"grad_norm": 0.8200282677360692,
|
|
"learning_rate": 8.135593220338983e-06,
|
|
"loss": 1.3382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23730315268039703,
|
|
"step": 25,
|
|
"valid_targets_mean": 10277.5,
|
|
"valid_targets_min": 2154
|
|
},
|
|
{
|
|
"epoch": 0.11087420042643924,
|
|
"grad_norm": 0.7588068868281355,
|
|
"learning_rate": 8.47457627118644e-06,
|
|
"loss": 1.3254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.360531210899353,
|
|
"step": 26,
|
|
"valid_targets_mean": 16022.2,
|
|
"valid_targets_min": 14908
|
|
},
|
|
{
|
|
"epoch": 0.11513859275053305,
|
|
"grad_norm": 0.6220593529622817,
|
|
"learning_rate": 8.8135593220339e-06,
|
|
"loss": 1.3142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35754430294036865,
|
|
"step": 27,
|
|
"valid_targets_mean": 16100.3,
|
|
"valid_targets_min": 13427
|
|
},
|
|
{
|
|
"epoch": 0.11940298507462686,
|
|
"grad_norm": 0.5954017359076195,
|
|
"learning_rate": 9.152542372881356e-06,
|
|
"loss": 1.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2550112009048462,
|
|
"step": 28,
|
|
"valid_targets_mean": 11641.7,
|
|
"valid_targets_min": 8792
|
|
},
|
|
{
|
|
"epoch": 0.12366737739872068,
|
|
"grad_norm": 0.641528051079028,
|
|
"learning_rate": 9.491525423728815e-06,
|
|
"loss": 1.3399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3624398708343506,
|
|
"step": 29,
|
|
"valid_targets_mean": 16077.6,
|
|
"valid_targets_min": 15116
|
|
},
|
|
{
|
|
"epoch": 0.1279317697228145,
|
|
"grad_norm": 0.4920851160355056,
|
|
"learning_rate": 9.830508474576272e-06,
|
|
"loss": 1.3,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3876349627971649,
|
|
"step": 30,
|
|
"valid_targets_mean": 16075.6,
|
|
"valid_targets_min": 15222
|
|
},
|
|
{
|
|
"epoch": 0.13219616204690832,
|
|
"grad_norm": 0.5222548588184942,
|
|
"learning_rate": 1.016949152542373e-05,
|
|
"loss": 1.2629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29042887687683105,
|
|
"step": 31,
|
|
"valid_targets_mean": 14641.4,
|
|
"valid_targets_min": 12602
|
|
},
|
|
{
|
|
"epoch": 0.13646055437100213,
|
|
"grad_norm": 0.5397850083552871,
|
|
"learning_rate": 1.0508474576271188e-05,
|
|
"loss": 1.319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3626946806907654,
|
|
"step": 32,
|
|
"valid_targets_mean": 16118.6,
|
|
"valid_targets_min": 14547
|
|
},
|
|
{
|
|
"epoch": 0.14072494669509594,
|
|
"grad_norm": 0.4793360544551292,
|
|
"learning_rate": 1.0847457627118645e-05,
|
|
"loss": 1.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28542160987854004,
|
|
"step": 33,
|
|
"valid_targets_mean": 13292.3,
|
|
"valid_targets_min": 2298
|
|
},
|
|
{
|
|
"epoch": 0.14498933901918976,
|
|
"grad_norm": 0.45405854099713594,
|
|
"learning_rate": 1.1186440677966102e-05,
|
|
"loss": 1.2351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30616092681884766,
|
|
"step": 34,
|
|
"valid_targets_mean": 15516.8,
|
|
"valid_targets_min": 13685
|
|
},
|
|
{
|
|
"epoch": 0.14925373134328357,
|
|
"grad_norm": 0.4319692781535827,
|
|
"learning_rate": 1.1525423728813561e-05,
|
|
"loss": 1.2525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3785768747329712,
|
|
"step": 35,
|
|
"valid_targets_mean": 16160.1,
|
|
"valid_targets_min": 15418
|
|
},
|
|
{
|
|
"epoch": 0.1535181236673774,
|
|
"grad_norm": 0.4527662177446445,
|
|
"learning_rate": 1.1864406779661018e-05,
|
|
"loss": 1.262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22713381052017212,
|
|
"step": 36,
|
|
"valid_targets_mean": 9924.8,
|
|
"valid_targets_min": 1242
|
|
},
|
|
{
|
|
"epoch": 0.15778251599147122,
|
|
"grad_norm": 0.3765831906436275,
|
|
"learning_rate": 1.2203389830508477e-05,
|
|
"loss": 1.2654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30958038568496704,
|
|
"step": 37,
|
|
"valid_targets_mean": 16209.8,
|
|
"valid_targets_min": 15560
|
|
},
|
|
{
|
|
"epoch": 0.16204690831556504,
|
|
"grad_norm": 0.42044497087118887,
|
|
"learning_rate": 1.2542372881355932e-05,
|
|
"loss": 1.2353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3550320565700531,
|
|
"step": 38,
|
|
"valid_targets_mean": 16175.6,
|
|
"valid_targets_min": 15189
|
|
},
|
|
{
|
|
"epoch": 0.16631130063965885,
|
|
"grad_norm": 0.39664232457510257,
|
|
"learning_rate": 1.288135593220339e-05,
|
|
"loss": 1.287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23336681723594666,
|
|
"step": 39,
|
|
"valid_targets_mean": 9991.4,
|
|
"valid_targets_min": 6726
|
|
},
|
|
{
|
|
"epoch": 0.17057569296375266,
|
|
"grad_norm": 0.39504221280618673,
|
|
"learning_rate": 1.3220338983050848e-05,
|
|
"loss": 1.2264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3280348777770996,
|
|
"step": 40,
|
|
"valid_targets_mean": 16142.2,
|
|
"valid_targets_min": 14273
|
|
},
|
|
{
|
|
"epoch": 0.17484008528784648,
|
|
"grad_norm": 0.34053684953238916,
|
|
"learning_rate": 1.3559322033898305e-05,
|
|
"loss": 1.2227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33567771315574646,
|
|
"step": 41,
|
|
"valid_targets_mean": 16186.6,
|
|
"valid_targets_min": 14994
|
|
},
|
|
{
|
|
"epoch": 0.1791044776119403,
|
|
"grad_norm": 0.3295620699042012,
|
|
"learning_rate": 1.3898305084745764e-05,
|
|
"loss": 1.2683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27557477355003357,
|
|
"step": 42,
|
|
"valid_targets_mean": 13378.8,
|
|
"valid_targets_min": 11254
|
|
},
|
|
{
|
|
"epoch": 0.18336886993603413,
|
|
"grad_norm": 0.3280221166685567,
|
|
"learning_rate": 1.4237288135593221e-05,
|
|
"loss": 1.2093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34312349557876587,
|
|
"step": 43,
|
|
"valid_targets_mean": 16160.8,
|
|
"valid_targets_min": 15560
|
|
},
|
|
{
|
|
"epoch": 0.18763326226012794,
|
|
"grad_norm": 0.32067396279656807,
|
|
"learning_rate": 1.4576271186440678e-05,
|
|
"loss": 1.1888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31136569380760193,
|
|
"step": 44,
|
|
"valid_targets_mean": 16207.8,
|
|
"valid_targets_min": 14732
|
|
},
|
|
{
|
|
"epoch": 0.19189765458422176,
|
|
"grad_norm": 0.3204956775534723,
|
|
"learning_rate": 1.4915254237288137e-05,
|
|
"loss": 1.3217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32411104440689087,
|
|
"step": 45,
|
|
"valid_targets_mean": 14813.5,
|
|
"valid_targets_min": 13169
|
|
},
|
|
{
|
|
"epoch": 0.19616204690831557,
|
|
"grad_norm": 0.31309834097280076,
|
|
"learning_rate": 1.5254237288135594e-05,
|
|
"loss": 1.1979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3351410925388336,
|
|
"step": 46,
|
|
"valid_targets_mean": 16167.5,
|
|
"valid_targets_min": 14831
|
|
},
|
|
{
|
|
"epoch": 0.20042643923240938,
|
|
"grad_norm": 0.3062761729865377,
|
|
"learning_rate": 1.5593220338983053e-05,
|
|
"loss": 1.2702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2603297829627991,
|
|
"step": 47,
|
|
"valid_targets_mean": 11703.6,
|
|
"valid_targets_min": 3390
|
|
},
|
|
{
|
|
"epoch": 0.2046908315565032,
|
|
"grad_norm": 0.3059044906159298,
|
|
"learning_rate": 1.593220338983051e-05,
|
|
"loss": 1.2089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31020739674568176,
|
|
"step": 48,
|
|
"valid_targets_mean": 15695.9,
|
|
"valid_targets_min": 14068
|
|
},
|
|
{
|
|
"epoch": 0.208955223880597,
|
|
"grad_norm": 0.29729679774217543,
|
|
"learning_rate": 1.6271186440677967e-05,
|
|
"loss": 1.2575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3794437348842621,
|
|
"step": 49,
|
|
"valid_targets_mean": 16029.5,
|
|
"valid_targets_min": 13688
|
|
},
|
|
{
|
|
"epoch": 0.21321961620469082,
|
|
"grad_norm": 0.28568244337662785,
|
|
"learning_rate": 1.6610169491525424e-05,
|
|
"loss": 1.22,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17967450618743896,
|
|
"step": 50,
|
|
"valid_targets_mean": 7348.0,
|
|
"valid_targets_min": 1535
|
|
},
|
|
{
|
|
"epoch": 0.21748400852878466,
|
|
"grad_norm": 0.30679422108986126,
|
|
"learning_rate": 1.694915254237288e-05,
|
|
"loss": 1.1838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30095893144607544,
|
|
"step": 51,
|
|
"valid_targets_mean": 16203.1,
|
|
"valid_targets_min": 15803
|
|
},
|
|
{
|
|
"epoch": 0.22174840085287847,
|
|
"grad_norm": 0.30903751456062006,
|
|
"learning_rate": 1.728813559322034e-05,
|
|
"loss": 1.2364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36445748805999756,
|
|
"step": 52,
|
|
"valid_targets_mean": 16132.4,
|
|
"valid_targets_min": 15171
|
|
},
|
|
{
|
|
"epoch": 0.2260127931769723,
|
|
"grad_norm": 0.2734644653742701,
|
|
"learning_rate": 1.76271186440678e-05,
|
|
"loss": 1.2401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2611805200576782,
|
|
"step": 53,
|
|
"valid_targets_mean": 12511.2,
|
|
"valid_targets_min": 10082
|
|
},
|
|
{
|
|
"epoch": 0.2302771855010661,
|
|
"grad_norm": 0.296167365405311,
|
|
"learning_rate": 1.7966101694915256e-05,
|
|
"loss": 1.2035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33024922013282776,
|
|
"step": 54,
|
|
"valid_targets_mean": 16137.9,
|
|
"valid_targets_min": 14934
|
|
},
|
|
{
|
|
"epoch": 0.2345415778251599,
|
|
"grad_norm": 0.2857314835724685,
|
|
"learning_rate": 1.8305084745762713e-05,
|
|
"loss": 1.2213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36636629700660706,
|
|
"step": 55,
|
|
"valid_targets_mean": 16097.3,
|
|
"valid_targets_min": 15345
|
|
},
|
|
{
|
|
"epoch": 0.23880597014925373,
|
|
"grad_norm": 0.30832952787337065,
|
|
"learning_rate": 1.864406779661017e-05,
|
|
"loss": 1.2059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2799396216869354,
|
|
"step": 56,
|
|
"valid_targets_mean": 14551.7,
|
|
"valid_targets_min": 12631
|
|
},
|
|
{
|
|
"epoch": 0.24307036247334754,
|
|
"grad_norm": 0.2742963507475523,
|
|
"learning_rate": 1.898305084745763e-05,
|
|
"loss": 1.2492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33785948157310486,
|
|
"step": 57,
|
|
"valid_targets_mean": 15843.4,
|
|
"valid_targets_min": 9286
|
|
},
|
|
{
|
|
"epoch": 0.24733475479744135,
|
|
"grad_norm": 0.3353292347627481,
|
|
"learning_rate": 1.9322033898305087e-05,
|
|
"loss": 1.1739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30091214179992676,
|
|
"step": 58,
|
|
"valid_targets_mean": 13183.4,
|
|
"valid_targets_min": 2731
|
|
},
|
|
{
|
|
"epoch": 0.2515991471215352,
|
|
"grad_norm": 0.28940885000192057,
|
|
"learning_rate": 1.9661016949152545e-05,
|
|
"loss": 1.2328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3025071620941162,
|
|
"step": 59,
|
|
"valid_targets_mean": 15810.2,
|
|
"valid_targets_min": 14420
|
|
},
|
|
{
|
|
"epoch": 0.255863539445629,
|
|
"grad_norm": 0.3158097390368932,
|
|
"learning_rate": 2e-05,
|
|
"loss": 1.1723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34030646085739136,
|
|
"step": 60,
|
|
"valid_targets_mean": 16229.3,
|
|
"valid_targets_min": 15657
|
|
},
|
|
{
|
|
"epoch": 0.2601279317697228,
|
|
"grad_norm": 0.3421290994544124,
|
|
"learning_rate": 2.033898305084746e-05,
|
|
"loss": 1.2155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2056933045387268,
|
|
"step": 61,
|
|
"valid_targets_mean": 8622.5,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 0.26439232409381663,
|
|
"grad_norm": 0.2840875671625852,
|
|
"learning_rate": 2.0677966101694916e-05,
|
|
"loss": 1.2035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3078186511993408,
|
|
"step": 62,
|
|
"valid_targets_mean": 16106.1,
|
|
"valid_targets_min": 14943
|
|
},
|
|
{
|
|
"epoch": 0.26865671641791045,
|
|
"grad_norm": 0.36023223069817156,
|
|
"learning_rate": 2.1016949152542376e-05,
|
|
"loss": 1.1707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34042203426361084,
|
|
"step": 63,
|
|
"valid_targets_mean": 16136.8,
|
|
"valid_targets_min": 15314
|
|
},
|
|
{
|
|
"epoch": 0.27292110874200426,
|
|
"grad_norm": 0.3230768382997439,
|
|
"learning_rate": 2.1355932203389833e-05,
|
|
"loss": 1.2295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2322762906551361,
|
|
"step": 64,
|
|
"valid_targets_mean": 10336.6,
|
|
"valid_targets_min": 5623
|
|
},
|
|
{
|
|
"epoch": 0.2771855010660981,
|
|
"grad_norm": 0.3201591381046788,
|
|
"learning_rate": 2.169491525423729e-05,
|
|
"loss": 1.1718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3034995496273041,
|
|
"step": 65,
|
|
"valid_targets_mean": 16176.8,
|
|
"valid_targets_min": 15278
|
|
},
|
|
{
|
|
"epoch": 0.2814498933901919,
|
|
"grad_norm": 0.3665430118978998,
|
|
"learning_rate": 2.2033898305084748e-05,
|
|
"loss": 1.162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35378801822662354,
|
|
"step": 66,
|
|
"valid_targets_mean": 16186.6,
|
|
"valid_targets_min": 15380
|
|
},
|
|
{
|
|
"epoch": 0.2857142857142857,
|
|
"grad_norm": 0.29217465033844975,
|
|
"learning_rate": 2.2372881355932205e-05,
|
|
"loss": 1.206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2516692280769348,
|
|
"step": 67,
|
|
"valid_targets_mean": 12693.9,
|
|
"valid_targets_min": 10543
|
|
},
|
|
{
|
|
"epoch": 0.2899786780383795,
|
|
"grad_norm": 0.411568614568208,
|
|
"learning_rate": 2.2711864406779665e-05,
|
|
"loss": 1.1956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3390015661716461,
|
|
"step": 68,
|
|
"valid_targets_mean": 16133.4,
|
|
"valid_targets_min": 15452
|
|
},
|
|
{
|
|
"epoch": 0.2942430703624733,
|
|
"grad_norm": 0.34592991350624464,
|
|
"learning_rate": 2.3050847457627122e-05,
|
|
"loss": 1.1607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3600677251815796,
|
|
"step": 69,
|
|
"valid_targets_mean": 16142.4,
|
|
"valid_targets_min": 15328
|
|
},
|
|
{
|
|
"epoch": 0.29850746268656714,
|
|
"grad_norm": 0.4123090858740144,
|
|
"learning_rate": 2.338983050847458e-05,
|
|
"loss": 1.2265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28057190775871277,
|
|
"step": 70,
|
|
"valid_targets_mean": 14594.2,
|
|
"valid_targets_min": 13086
|
|
},
|
|
{
|
|
"epoch": 0.302771855010661,
|
|
"grad_norm": 0.4890966579587274,
|
|
"learning_rate": 2.3728813559322036e-05,
|
|
"loss": 1.1575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3367359936237335,
|
|
"step": 71,
|
|
"valid_targets_mean": 16185.4,
|
|
"valid_targets_min": 15260
|
|
},
|
|
{
|
|
"epoch": 0.3070362473347548,
|
|
"grad_norm": 0.4147372197442472,
|
|
"learning_rate": 2.406779661016949e-05,
|
|
"loss": 1.2517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.253772497177124,
|
|
"step": 72,
|
|
"valid_targets_mean": 11549.2,
|
|
"valid_targets_min": 1417
|
|
},
|
|
{
|
|
"epoch": 0.31130063965884863,
|
|
"grad_norm": 0.3466568226950996,
|
|
"learning_rate": 2.4406779661016954e-05,
|
|
"loss": 1.2116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3072333335876465,
|
|
"step": 73,
|
|
"valid_targets_mean": 16113.0,
|
|
"valid_targets_min": 13998
|
|
},
|
|
{
|
|
"epoch": 0.31556503198294245,
|
|
"grad_norm": 0.42271407097276814,
|
|
"learning_rate": 2.474576271186441e-05,
|
|
"loss": 1.1922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31899935007095337,
|
|
"step": 74,
|
|
"valid_targets_mean": 16215.8,
|
|
"valid_targets_min": 15958
|
|
},
|
|
{
|
|
"epoch": 0.31982942430703626,
|
|
"grad_norm": 0.4615831928578139,
|
|
"learning_rate": 2.5084745762711865e-05,
|
|
"loss": 1.2115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17871591448783875,
|
|
"step": 75,
|
|
"valid_targets_mean": 8364.3,
|
|
"valid_targets_min": 2376
|
|
},
|
|
{
|
|
"epoch": 0.32409381663113007,
|
|
"grad_norm": 0.4916646150801305,
|
|
"learning_rate": 2.5423728813559322e-05,
|
|
"loss": 1.1658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3067433834075928,
|
|
"step": 76,
|
|
"valid_targets_mean": 16091.7,
|
|
"valid_targets_min": 14826
|
|
},
|
|
{
|
|
"epoch": 0.3283582089552239,
|
|
"grad_norm": 0.3818058190469012,
|
|
"learning_rate": 2.576271186440678e-05,
|
|
"loss": 1.1853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34559395909309387,
|
|
"step": 77,
|
|
"valid_targets_mean": 16167.6,
|
|
"valid_targets_min": 15351
|
|
},
|
|
{
|
|
"epoch": 0.3326226012793177,
|
|
"grad_norm": 0.5328297519583807,
|
|
"learning_rate": 2.610169491525424e-05,
|
|
"loss": 1.1718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23474229872226715,
|
|
"step": 78,
|
|
"valid_targets_mean": 12202.2,
|
|
"valid_targets_min": 8376
|
|
},
|
|
{
|
|
"epoch": 0.3368869936034115,
|
|
"grad_norm": 0.4530723381013144,
|
|
"learning_rate": 2.6440677966101696e-05,
|
|
"loss": 1.1892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31477317214012146,
|
|
"step": 79,
|
|
"valid_targets_mean": 16147.0,
|
|
"valid_targets_min": 15397
|
|
},
|
|
{
|
|
"epoch": 0.3411513859275053,
|
|
"grad_norm": 0.37980374625899643,
|
|
"learning_rate": 2.6779661016949153e-05,
|
|
"loss": 1.1893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34435707330703735,
|
|
"step": 80,
|
|
"valid_targets_mean": 16172.6,
|
|
"valid_targets_min": 14636
|
|
},
|
|
{
|
|
"epoch": 0.34541577825159914,
|
|
"grad_norm": 0.443903919371291,
|
|
"learning_rate": 2.711864406779661e-05,
|
|
"loss": 1.2273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2900157868862152,
|
|
"step": 81,
|
|
"valid_targets_mean": 14128.0,
|
|
"valid_targets_min": 10472
|
|
},
|
|
{
|
|
"epoch": 0.34968017057569295,
|
|
"grad_norm": 0.42006078121550716,
|
|
"learning_rate": 2.7457627118644068e-05,
|
|
"loss": 1.1602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34231239557266235,
|
|
"step": 82,
|
|
"valid_targets_mean": 16067.8,
|
|
"valid_targets_min": 15370
|
|
},
|
|
{
|
|
"epoch": 0.35394456289978676,
|
|
"grad_norm": 0.4771362126592886,
|
|
"learning_rate": 2.7796610169491528e-05,
|
|
"loss": 1.1927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29910576343536377,
|
|
"step": 83,
|
|
"valid_targets_mean": 13262.2,
|
|
"valid_targets_min": 2435
|
|
},
|
|
{
|
|
"epoch": 0.3582089552238806,
|
|
"grad_norm": 0.5188342182814704,
|
|
"learning_rate": 2.8135593220338985e-05,
|
|
"loss": 1.1403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2677900195121765,
|
|
"step": 84,
|
|
"valid_targets_mean": 15934.3,
|
|
"valid_targets_min": 14185
|
|
},
|
|
{
|
|
"epoch": 0.3624733475479744,
|
|
"grad_norm": 0.44679103859423375,
|
|
"learning_rate": 2.8474576271186442e-05,
|
|
"loss": 1.2264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36229631304740906,
|
|
"step": 85,
|
|
"valid_targets_mean": 16101.3,
|
|
"valid_targets_min": 14897
|
|
},
|
|
{
|
|
"epoch": 0.36673773987206826,
|
|
"grad_norm": 0.5537923610373594,
|
|
"learning_rate": 2.88135593220339e-05,
|
|
"loss": 1.221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2012385129928589,
|
|
"step": 86,
|
|
"valid_targets_mean": 9425.7,
|
|
"valid_targets_min": 1851
|
|
},
|
|
{
|
|
"epoch": 0.37100213219616207,
|
|
"grad_norm": 0.40877363516574183,
|
|
"learning_rate": 2.9152542372881356e-05,
|
|
"loss": 1.1826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3085198998451233,
|
|
"step": 87,
|
|
"valid_targets_mean": 16128.9,
|
|
"valid_targets_min": 15263
|
|
},
|
|
{
|
|
"epoch": 0.3752665245202559,
|
|
"grad_norm": 0.531131956607798,
|
|
"learning_rate": 2.9491525423728817e-05,
|
|
"loss": 1.1847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3547207713127136,
|
|
"step": 88,
|
|
"valid_targets_mean": 16217.1,
|
|
"valid_targets_min": 15697
|
|
},
|
|
{
|
|
"epoch": 0.3795309168443497,
|
|
"grad_norm": 0.4821539701368115,
|
|
"learning_rate": 2.9830508474576274e-05,
|
|
"loss": 1.1545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20888623595237732,
|
|
"step": 89,
|
|
"valid_targets_mean": 10271.9,
|
|
"valid_targets_min": 5871
|
|
},
|
|
{
|
|
"epoch": 0.3837953091684435,
|
|
"grad_norm": 0.44479347429852767,
|
|
"learning_rate": 3.016949152542373e-05,
|
|
"loss": 1.195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3040030598640442,
|
|
"step": 90,
|
|
"valid_targets_mean": 16159.1,
|
|
"valid_targets_min": 15337
|
|
},
|
|
{
|
|
"epoch": 0.3880597014925373,
|
|
"grad_norm": 0.49611777623662034,
|
|
"learning_rate": 3.0508474576271188e-05,
|
|
"loss": 1.1665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36148935556411743,
|
|
"step": 91,
|
|
"valid_targets_mean": 16088.1,
|
|
"valid_targets_min": 14827
|
|
},
|
|
{
|
|
"epoch": 0.39232409381663114,
|
|
"grad_norm": 0.47030432730526317,
|
|
"learning_rate": 3.084745762711865e-05,
|
|
"loss": 1.2088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27276670932769775,
|
|
"step": 92,
|
|
"valid_targets_mean": 13574.6,
|
|
"valid_targets_min": 10444
|
|
},
|
|
{
|
|
"epoch": 0.39658848614072495,
|
|
"grad_norm": 0.45488608147303355,
|
|
"learning_rate": 3.1186440677966106e-05,
|
|
"loss": 1.1785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33878713846206665,
|
|
"step": 93,
|
|
"valid_targets_mean": 16158.7,
|
|
"valid_targets_min": 15460
|
|
},
|
|
{
|
|
"epoch": 0.40085287846481876,
|
|
"grad_norm": 0.485936892703445,
|
|
"learning_rate": 3.152542372881356e-05,
|
|
"loss": 1.1511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34667015075683594,
|
|
"step": 94,
|
|
"valid_targets_mean": 16128.6,
|
|
"valid_targets_min": 13927
|
|
},
|
|
{
|
|
"epoch": 0.4051172707889126,
|
|
"grad_norm": 0.39962744663044864,
|
|
"learning_rate": 3.186440677966102e-05,
|
|
"loss": 1.1833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29390081763267517,
|
|
"step": 95,
|
|
"valid_targets_mean": 15844.7,
|
|
"valid_targets_min": 14737
|
|
},
|
|
{
|
|
"epoch": 0.4093816631130064,
|
|
"grad_norm": 0.4535828391796072,
|
|
"learning_rate": 3.2203389830508473e-05,
|
|
"loss": 1.2341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3389269709587097,
|
|
"step": 96,
|
|
"valid_targets_mean": 16087.4,
|
|
"valid_targets_min": 13635
|
|
},
|
|
{
|
|
"epoch": 0.4136460554371002,
|
|
"grad_norm": 0.4208222066539856,
|
|
"learning_rate": 3.2542372881355934e-05,
|
|
"loss": 1.1837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23698459565639496,
|
|
"step": 97,
|
|
"valid_targets_mean": 10771.6,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 0.417910447761194,
|
|
"grad_norm": 0.6033426037267763,
|
|
"learning_rate": 3.2881355932203394e-05,
|
|
"loss": 1.1683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29125797748565674,
|
|
"step": 98,
|
|
"valid_targets_mean": 15845.2,
|
|
"valid_targets_min": 14972
|
|
},
|
|
{
|
|
"epoch": 0.42217484008528783,
|
|
"grad_norm": 0.6430996304411255,
|
|
"learning_rate": 3.322033898305085e-05,
|
|
"loss": 1.1804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3551673889160156,
|
|
"step": 99,
|
|
"valid_targets_mean": 16180.6,
|
|
"valid_targets_min": 15678
|
|
},
|
|
{
|
|
"epoch": 0.42643923240938164,
|
|
"grad_norm": 0.565665763179061,
|
|
"learning_rate": 3.355932203389831e-05,
|
|
"loss": 1.1789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19581037759780884,
|
|
"step": 100,
|
|
"valid_targets_mean": 8886.4,
|
|
"valid_targets_min": 2484
|
|
},
|
|
{
|
|
"epoch": 0.43070362473347545,
|
|
"grad_norm": 0.5396605416088711,
|
|
"learning_rate": 3.389830508474576e-05,
|
|
"loss": 1.1322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27528661489486694,
|
|
"step": 101,
|
|
"valid_targets_mean": 16190.2,
|
|
"valid_targets_min": 15322
|
|
},
|
|
{
|
|
"epoch": 0.4349680170575693,
|
|
"grad_norm": 0.872782047618966,
|
|
"learning_rate": 3.423728813559322e-05,
|
|
"loss": 1.1835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3470345437526703,
|
|
"step": 102,
|
|
"valid_targets_mean": 16177.2,
|
|
"valid_targets_min": 15635
|
|
},
|
|
{
|
|
"epoch": 0.43923240938166314,
|
|
"grad_norm": 1.0278755291690116,
|
|
"learning_rate": 3.457627118644068e-05,
|
|
"loss": 1.1275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23301859200000763,
|
|
"step": 103,
|
|
"valid_targets_mean": 12241.3,
|
|
"valid_targets_min": 9421
|
|
},
|
|
{
|
|
"epoch": 0.44349680170575695,
|
|
"grad_norm": 0.7125446740464207,
|
|
"learning_rate": 3.491525423728814e-05,
|
|
"loss": 1.1447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29817789793014526,
|
|
"step": 104,
|
|
"valid_targets_mean": 16131.7,
|
|
"valid_targets_min": 15263
|
|
},
|
|
{
|
|
"epoch": 0.44776119402985076,
|
|
"grad_norm": 0.5563012628453976,
|
|
"learning_rate": 3.52542372881356e-05,
|
|
"loss": 1.1743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3532581329345703,
|
|
"step": 105,
|
|
"valid_targets_mean": 15808.0,
|
|
"valid_targets_min": 5994
|
|
},
|
|
{
|
|
"epoch": 0.4520255863539446,
|
|
"grad_norm": 0.5790943913836976,
|
|
"learning_rate": 3.559322033898305e-05,
|
|
"loss": 1.2051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2807878851890564,
|
|
"step": 106,
|
|
"valid_targets_mean": 14647.7,
|
|
"valid_targets_min": 12782
|
|
},
|
|
{
|
|
"epoch": 0.4562899786780384,
|
|
"grad_norm": 0.44736003462007023,
|
|
"learning_rate": 3.593220338983051e-05,
|
|
"loss": 1.1903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3288416862487793,
|
|
"step": 107,
|
|
"valid_targets_mean": 16161.3,
|
|
"valid_targets_min": 15446
|
|
},
|
|
{
|
|
"epoch": 0.4605543710021322,
|
|
"grad_norm": 0.5622289779961762,
|
|
"learning_rate": 3.627118644067797e-05,
|
|
"loss": 1.1981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28626295924186707,
|
|
"step": 108,
|
|
"valid_targets_mean": 12701.5,
|
|
"valid_targets_min": 755
|
|
},
|
|
{
|
|
"epoch": 0.464818763326226,
|
|
"grad_norm": 0.6298643931105169,
|
|
"learning_rate": 3.6610169491525426e-05,
|
|
"loss": 1.1212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27614328265190125,
|
|
"step": 109,
|
|
"valid_targets_mean": 15305.1,
|
|
"valid_targets_min": 13189
|
|
},
|
|
{
|
|
"epoch": 0.4690831556503198,
|
|
"grad_norm": 0.5185621603579966,
|
|
"learning_rate": 3.6949152542372886e-05,
|
|
"loss": 1.1703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33316150307655334,
|
|
"step": 110,
|
|
"valid_targets_mean": 16089.3,
|
|
"valid_targets_min": 13573
|
|
},
|
|
{
|
|
"epoch": 0.47334754797441364,
|
|
"grad_norm": 0.42559122357589,
|
|
"learning_rate": 3.728813559322034e-05,
|
|
"loss": 1.2036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19938254356384277,
|
|
"step": 111,
|
|
"valid_targets_mean": 9455.9,
|
|
"valid_targets_min": 1155
|
|
},
|
|
{
|
|
"epoch": 0.47761194029850745,
|
|
"grad_norm": 0.547538253832718,
|
|
"learning_rate": 3.76271186440678e-05,
|
|
"loss": 1.1543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2797814905643463,
|
|
"step": 112,
|
|
"valid_targets_mean": 16189.5,
|
|
"valid_targets_min": 14479
|
|
},
|
|
{
|
|
"epoch": 0.48187633262260127,
|
|
"grad_norm": 0.4953749309045982,
|
|
"learning_rate": 3.796610169491526e-05,
|
|
"loss": 1.1223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3203335404396057,
|
|
"step": 113,
|
|
"valid_targets_mean": 16139.4,
|
|
"valid_targets_min": 15023
|
|
},
|
|
{
|
|
"epoch": 0.4861407249466951,
|
|
"grad_norm": 0.5008062729973279,
|
|
"learning_rate": 3.8305084745762714e-05,
|
|
"loss": 1.1945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20449399948120117,
|
|
"step": 114,
|
|
"valid_targets_mean": 10739.7,
|
|
"valid_targets_min": 4961
|
|
},
|
|
{
|
|
"epoch": 0.4904051172707889,
|
|
"grad_norm": 0.5386560071865651,
|
|
"learning_rate": 3.8644067796610175e-05,
|
|
"loss": 1.1995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3071936368942261,
|
|
"step": 115,
|
|
"valid_targets_mean": 16131.4,
|
|
"valid_targets_min": 15295
|
|
},
|
|
{
|
|
"epoch": 0.4946695095948827,
|
|
"grad_norm": 0.40711995591176553,
|
|
"learning_rate": 3.898305084745763e-05,
|
|
"loss": 1.1794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35818758606910706,
|
|
"step": 116,
|
|
"valid_targets_mean": 16016.1,
|
|
"valid_targets_min": 14837
|
|
},
|
|
{
|
|
"epoch": 0.4989339019189765,
|
|
"grad_norm": 0.5473817794095149,
|
|
"learning_rate": 3.932203389830509e-05,
|
|
"loss": 1.1484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22010652720928192,
|
|
"step": 117,
|
|
"valid_targets_mean": 12074.7,
|
|
"valid_targets_min": 9259
|
|
},
|
|
{
|
|
"epoch": 0.5031982942430704,
|
|
"grad_norm": 0.5549393845209296,
|
|
"learning_rate": 3.966101694915255e-05,
|
|
"loss": 1.2017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33431297540664673,
|
|
"step": 118,
|
|
"valid_targets_mean": 16067.9,
|
|
"valid_targets_min": 13427
|
|
},
|
|
{
|
|
"epoch": 0.5074626865671642,
|
|
"grad_norm": 0.5146152133859702,
|
|
"learning_rate": 4e-05,
|
|
"loss": 1.1359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32214197516441345,
|
|
"step": 119,
|
|
"valid_targets_mean": 15887.3,
|
|
"valid_targets_min": 4483
|
|
},
|
|
{
|
|
"epoch": 0.511727078891258,
|
|
"grad_norm": 0.6228833876090004,
|
|
"learning_rate": 3.999991166161585e-05,
|
|
"loss": 1.1709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2574143409729004,
|
|
"step": 120,
|
|
"valid_targets_mean": 15023.2,
|
|
"valid_targets_min": 12822
|
|
},
|
|
{
|
|
"epoch": 0.5159914712153518,
|
|
"grad_norm": 0.6342331737079137,
|
|
"learning_rate": 3.999964664724376e-05,
|
|
"loss": 1.1309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33971458673477173,
|
|
"step": 121,
|
|
"valid_targets_mean": 16139.8,
|
|
"valid_targets_min": 15345
|
|
},
|
|
{
|
|
"epoch": 0.5202558635394456,
|
|
"grad_norm": 0.7094986485415746,
|
|
"learning_rate": 3.999920495922483e-05,
|
|
"loss": 1.1437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20790129899978638,
|
|
"step": 122,
|
|
"valid_targets_mean": 10948.9,
|
|
"valid_targets_min": 2964
|
|
},
|
|
{
|
|
"epoch": 0.5245202558635395,
|
|
"grad_norm": 0.7145033987132133,
|
|
"learning_rate": 3.999858660146085e-05,
|
|
"loss": 1.1595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29542362689971924,
|
|
"step": 123,
|
|
"valid_targets_mean": 16067.6,
|
|
"valid_targets_min": 13788
|
|
},
|
|
{
|
|
"epoch": 0.5287846481876333,
|
|
"grad_norm": 0.7457689773326728,
|
|
"learning_rate": 3.999779157941431e-05,
|
|
"loss": 1.1707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3226638734340668,
|
|
"step": 124,
|
|
"valid_targets_mean": 16100.9,
|
|
"valid_targets_min": 14446
|
|
},
|
|
{
|
|
"epoch": 0.5330490405117271,
|
|
"grad_norm": 0.5239696778473059,
|
|
"learning_rate": 3.99968199001083e-05,
|
|
"loss": 1.132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17688265442848206,
|
|
"step": 125,
|
|
"valid_targets_mean": 8757.8,
|
|
"valid_targets_min": 1946
|
|
},
|
|
{
|
|
"epoch": 0.5373134328358209,
|
|
"grad_norm": 0.6724127021853642,
|
|
"learning_rate": 3.999567157212646e-05,
|
|
"loss": 1.1538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29729247093200684,
|
|
"step": 126,
|
|
"valid_targets_mean": 16084.3,
|
|
"valid_targets_min": 15180
|
|
},
|
|
{
|
|
"epoch": 0.5415778251599147,
|
|
"grad_norm": 0.5970809000359247,
|
|
"learning_rate": 3.9994346605612955e-05,
|
|
"loss": 1.1575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3342553675174713,
|
|
"step": 127,
|
|
"valid_targets_mean": 16168.2,
|
|
"valid_targets_min": 15465
|
|
},
|
|
{
|
|
"epoch": 0.5458422174840085,
|
|
"grad_norm": 0.6500359122095334,
|
|
"learning_rate": 3.999284501227232e-05,
|
|
"loss": 1.1443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22495630383491516,
|
|
"step": 128,
|
|
"valid_targets_mean": 11782.7,
|
|
"valid_targets_min": 8043
|
|
},
|
|
{
|
|
"epoch": 0.5501066098081023,
|
|
"grad_norm": 0.5131778839895256,
|
|
"learning_rate": 3.9991166805369393e-05,
|
|
"loss": 1.1667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31228113174438477,
|
|
"step": 129,
|
|
"valid_targets_mean": 16157.4,
|
|
"valid_targets_min": 15459
|
|
},
|
|
{
|
|
"epoch": 0.5543710021321961,
|
|
"grad_norm": 0.6887330054802512,
|
|
"learning_rate": 3.9989311999729166e-05,
|
|
"loss": 1.1831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33670181035995483,
|
|
"step": 130,
|
|
"valid_targets_mean": 16114.2,
|
|
"valid_targets_min": 14385
|
|
},
|
|
{
|
|
"epoch": 0.55863539445629,
|
|
"grad_norm": 0.754673773153781,
|
|
"learning_rate": 3.99872806117367e-05,
|
|
"loss": 1.1204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2688842713832855,
|
|
"step": 131,
|
|
"valid_targets_mean": 14860.6,
|
|
"valid_targets_min": 13129
|
|
},
|
|
{
|
|
"epoch": 0.5628997867803838,
|
|
"grad_norm": 0.6456491151478851,
|
|
"learning_rate": 3.998507265933696e-05,
|
|
"loss": 1.1058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2996610403060913,
|
|
"step": 132,
|
|
"valid_targets_mean": 15748.8,
|
|
"valid_targets_min": 2605
|
|
},
|
|
{
|
|
"epoch": 0.5671641791044776,
|
|
"grad_norm": 0.7739595345647093,
|
|
"learning_rate": 3.9982688162034624e-05,
|
|
"loss": 1.1461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2546170949935913,
|
|
"step": 133,
|
|
"valid_targets_mean": 13342.2,
|
|
"valid_targets_min": 2588
|
|
},
|
|
{
|
|
"epoch": 0.5714285714285714,
|
|
"grad_norm": 0.9331641630918175,
|
|
"learning_rate": 3.998012714089397e-05,
|
|
"loss": 1.14,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28307855129241943,
|
|
"step": 134,
|
|
"valid_targets_mean": 15889.0,
|
|
"valid_targets_min": 14972
|
|
},
|
|
{
|
|
"epoch": 0.5756929637526652,
|
|
"grad_norm": 0.9902818305571806,
|
|
"learning_rate": 3.997738961853863e-05,
|
|
"loss": 1.1315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3365092873573303,
|
|
"step": 135,
|
|
"valid_targets_mean": 16079.2,
|
|
"valid_targets_min": 14609
|
|
},
|
|
{
|
|
"epoch": 0.579957356076759,
|
|
"grad_norm": 0.8715174096943445,
|
|
"learning_rate": 3.9974475619151445e-05,
|
|
"loss": 1.1013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1926274299621582,
|
|
"step": 136,
|
|
"valid_targets_mean": 11098.3,
|
|
"valid_targets_min": 4124
|
|
},
|
|
{
|
|
"epoch": 0.5842217484008528,
|
|
"grad_norm": 0.5818454793871639,
|
|
"learning_rate": 3.997138516847422e-05,
|
|
"loss": 1.0952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2703971266746521,
|
|
"step": 137,
|
|
"valid_targets_mean": 16150.4,
|
|
"valid_targets_min": 15263
|
|
},
|
|
{
|
|
"epoch": 0.5884861407249466,
|
|
"grad_norm": 0.5206090871135683,
|
|
"learning_rate": 3.9968118293807476e-05,
|
|
"loss": 1.1584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36197149753570557,
|
|
"step": 138,
|
|
"valid_targets_mean": 16076.4,
|
|
"valid_targets_min": 15001
|
|
},
|
|
{
|
|
"epoch": 0.5927505330490405,
|
|
"grad_norm": 0.47851414710060025,
|
|
"learning_rate": 3.996467502401028e-05,
|
|
"loss": 1.156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2182786762714386,
|
|
"step": 139,
|
|
"valid_targets_mean": 10867.7,
|
|
"valid_targets_min": 7289
|
|
},
|
|
{
|
|
"epoch": 0.5970149253731343,
|
|
"grad_norm": 0.5229917474490419,
|
|
"learning_rate": 3.9961055389499904e-05,
|
|
"loss": 1.1422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.301999032497406,
|
|
"step": 140,
|
|
"valid_targets_mean": 16133.0,
|
|
"valid_targets_min": 14726
|
|
},
|
|
{
|
|
"epoch": 0.6012793176972282,
|
|
"grad_norm": 0.4640175563151738,
|
|
"learning_rate": 3.995725942225162e-05,
|
|
"loss": 1.1762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34188324213027954,
|
|
"step": 141,
|
|
"valid_targets_mean": 16148.1,
|
|
"valid_targets_min": 15478
|
|
},
|
|
{
|
|
"epoch": 0.605543710021322,
|
|
"grad_norm": 0.653443767069592,
|
|
"learning_rate": 3.995328715579839e-05,
|
|
"loss": 1.1606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27668648958206177,
|
|
"step": 142,
|
|
"valid_targets_mean": 13650.9,
|
|
"valid_targets_min": 11524
|
|
},
|
|
{
|
|
"epoch": 0.6098081023454158,
|
|
"grad_norm": 0.4986487612919465,
|
|
"learning_rate": 3.994913862523058e-05,
|
|
"loss": 1.1816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3264992833137512,
|
|
"step": 143,
|
|
"valid_targets_mean": 16144.9,
|
|
"valid_targets_min": 14836
|
|
},
|
|
{
|
|
"epoch": 0.6140724946695096,
|
|
"grad_norm": 0.5588149612000367,
|
|
"learning_rate": 3.9944813867195624e-05,
|
|
"loss": 1.1544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30848005414009094,
|
|
"step": 144,
|
|
"valid_targets_mean": 16204.7,
|
|
"valid_targets_min": 15523
|
|
},
|
|
{
|
|
"epoch": 0.6183368869936035,
|
|
"grad_norm": 0.7453941380994512,
|
|
"learning_rate": 3.9940312919897744e-05,
|
|
"loss": 1.084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25924232602119446,
|
|
"step": 145,
|
|
"valid_targets_mean": 15518.0,
|
|
"valid_targets_min": 13968
|
|
},
|
|
{
|
|
"epoch": 0.6226012793176973,
|
|
"grad_norm": 0.5464420276850471,
|
|
"learning_rate": 3.993563582309759e-05,
|
|
"loss": 1.0965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31063953042030334,
|
|
"step": 146,
|
|
"valid_targets_mean": 16188.8,
|
|
"valid_targets_min": 15135
|
|
},
|
|
{
|
|
"epoch": 0.6268656716417911,
|
|
"grad_norm": 0.42469028732271724,
|
|
"learning_rate": 3.993078261811186e-05,
|
|
"loss": 1.1803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23097334802150726,
|
|
"step": 147,
|
|
"valid_targets_mean": 10491.1,
|
|
"valid_targets_min": 1680
|
|
},
|
|
{
|
|
"epoch": 0.6311300639658849,
|
|
"grad_norm": 0.5478065261122042,
|
|
"learning_rate": 3.9925753347813e-05,
|
|
"loss": 1.1282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.265234112739563,
|
|
"step": 148,
|
|
"valid_targets_mean": 15896.2,
|
|
"valid_targets_min": 14353
|
|
},
|
|
{
|
|
"epoch": 0.6353944562899787,
|
|
"grad_norm": 0.5043516530535429,
|
|
"learning_rate": 3.992054805662876e-05,
|
|
"loss": 1.1885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3632124662399292,
|
|
"step": 149,
|
|
"valid_targets_mean": 16139.4,
|
|
"valid_targets_min": 15325
|
|
},
|
|
{
|
|
"epoch": 0.6396588486140725,
|
|
"grad_norm": 0.4865290796482085,
|
|
"learning_rate": 3.991516679054185e-05,
|
|
"loss": 1.1321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15352267026901245,
|
|
"step": 150,
|
|
"valid_targets_mean": 7331.1,
|
|
"valid_targets_min": 1086
|
|
},
|
|
{
|
|
"epoch": 0.6439232409381663,
|
|
"grad_norm": 0.4746388637360173,
|
|
"learning_rate": 3.9909609597089496e-05,
|
|
"loss": 1.0899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2732847332954407,
|
|
"step": 151,
|
|
"valid_targets_mean": 16209.2,
|
|
"valid_targets_min": 15437
|
|
},
|
|
{
|
|
"epoch": 0.6481876332622601,
|
|
"grad_norm": 0.5814090165743458,
|
|
"learning_rate": 3.9903876525363055e-05,
|
|
"loss": 1.2186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37276649475097656,
|
|
"step": 152,
|
|
"valid_targets_mean": 15970.0,
|
|
"valid_targets_min": 15153
|
|
},
|
|
{
|
|
"epoch": 0.652452025586354,
|
|
"grad_norm": 0.47405089117198984,
|
|
"learning_rate": 3.989796762600755e-05,
|
|
"loss": 1.1468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23082441091537476,
|
|
"step": 153,
|
|
"valid_targets_mean": 12333.0,
|
|
"valid_targets_min": 7440
|
|
},
|
|
{
|
|
"epoch": 0.6567164179104478,
|
|
"grad_norm": 0.5802677001801227,
|
|
"learning_rate": 3.9891882951221246e-05,
|
|
"loss": 1.1678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.297301709651947,
|
|
"step": 154,
|
|
"valid_targets_mean": 16176.9,
|
|
"valid_targets_min": 15348
|
|
},
|
|
{
|
|
"epoch": 0.6609808102345416,
|
|
"grad_norm": 0.4830864833929257,
|
|
"learning_rate": 3.988562255475518e-05,
|
|
"loss": 1.1887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3305373191833496,
|
|
"step": 155,
|
|
"valid_targets_mean": 16113.9,
|
|
"valid_targets_min": 15057
|
|
},
|
|
{
|
|
"epoch": 0.6652452025586354,
|
|
"grad_norm": 0.5582433112020596,
|
|
"learning_rate": 3.987918649191268e-05,
|
|
"loss": 1.1626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30254873633384705,
|
|
"step": 156,
|
|
"valid_targets_mean": 15849.1,
|
|
"valid_targets_min": 14908
|
|
},
|
|
{
|
|
"epoch": 0.6695095948827292,
|
|
"grad_norm": 0.5375408464387431,
|
|
"learning_rate": 3.987257481954888e-05,
|
|
"loss": 1.0919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3167189955711365,
|
|
"step": 157,
|
|
"valid_targets_mean": 16035.5,
|
|
"valid_targets_min": 13635
|
|
},
|
|
{
|
|
"epoch": 0.673773987206823,
|
|
"grad_norm": 0.45790634920349066,
|
|
"learning_rate": 3.9865787596070236e-05,
|
|
"loss": 1.1224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2649564743041992,
|
|
"step": 158,
|
|
"valid_targets_mean": 13319.5,
|
|
"valid_targets_min": 1947
|
|
},
|
|
{
|
|
"epoch": 0.6780383795309168,
|
|
"grad_norm": 0.4764551363056651,
|
|
"learning_rate": 3.9858824881433975e-05,
|
|
"loss": 1.1808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2935394048690796,
|
|
"step": 159,
|
|
"valid_targets_mean": 15898.2,
|
|
"valid_targets_min": 14591
|
|
},
|
|
{
|
|
"epoch": 0.6823027718550106,
|
|
"grad_norm": 0.5022076446019287,
|
|
"learning_rate": 3.9851686737147585e-05,
|
|
"loss": 1.1168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31322112679481506,
|
|
"step": 160,
|
|
"valid_targets_mean": 16092.8,
|
|
"valid_targets_min": 14793
|
|
},
|
|
{
|
|
"epoch": 0.6865671641791045,
|
|
"grad_norm": 0.5018966263143605,
|
|
"learning_rate": 3.9844373226268305e-05,
|
|
"loss": 1.1379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19624844193458557,
|
|
"step": 161,
|
|
"valid_targets_mean": 8096.2,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 0.6908315565031983,
|
|
"grad_norm": 0.42890520031468626,
|
|
"learning_rate": 3.983688441340249e-05,
|
|
"loss": 1.1309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2715575098991394,
|
|
"step": 162,
|
|
"valid_targets_mean": 16160.3,
|
|
"valid_targets_min": 15572
|
|
},
|
|
{
|
|
"epoch": 0.6950959488272921,
|
|
"grad_norm": 0.6118186311739395,
|
|
"learning_rate": 3.98292203647051e-05,
|
|
"loss": 1.1715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33360719680786133,
|
|
"step": 163,
|
|
"valid_targets_mean": 16126.5,
|
|
"valid_targets_min": 15564
|
|
},
|
|
{
|
|
"epoch": 0.6993603411513859,
|
|
"grad_norm": 0.48975314855537266,
|
|
"learning_rate": 3.982138114787912e-05,
|
|
"loss": 1.1766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18657957017421722,
|
|
"step": 164,
|
|
"valid_targets_mean": 10036.9,
|
|
"valid_targets_min": 3346
|
|
},
|
|
{
|
|
"epoch": 0.7036247334754797,
|
|
"grad_norm": 0.4611278946702579,
|
|
"learning_rate": 3.98133668321749e-05,
|
|
"loss": 1.1189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2684292197227478,
|
|
"step": 165,
|
|
"valid_targets_mean": 16222.7,
|
|
"valid_targets_min": 15436
|
|
},
|
|
{
|
|
"epoch": 0.7078891257995735,
|
|
"grad_norm": 0.4698326366462607,
|
|
"learning_rate": 3.980517748838963e-05,
|
|
"loss": 1.093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.302453875541687,
|
|
"step": 166,
|
|
"valid_targets_mean": 16143.4,
|
|
"valid_targets_min": 13886
|
|
},
|
|
{
|
|
"epoch": 0.7121535181236673,
|
|
"grad_norm": 0.6089019650141191,
|
|
"learning_rate": 3.979681318886664e-05,
|
|
"loss": 1.1503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2620971202850342,
|
|
"step": 167,
|
|
"valid_targets_mean": 13617.7,
|
|
"valid_targets_min": 11839
|
|
},
|
|
{
|
|
"epoch": 0.7164179104477612,
|
|
"grad_norm": 0.6121308914161944,
|
|
"learning_rate": 3.978827400749481e-05,
|
|
"loss": 1.1294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3074718713760376,
|
|
"step": 168,
|
|
"valid_targets_mean": 16190.0,
|
|
"valid_targets_min": 15712
|
|
},
|
|
{
|
|
"epoch": 0.720682302771855,
|
|
"grad_norm": 0.4181329572456291,
|
|
"learning_rate": 3.977956001970788e-05,
|
|
"loss": 1.0923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.316328227519989,
|
|
"step": 169,
|
|
"valid_targets_mean": 16177.5,
|
|
"valid_targets_min": 15165
|
|
},
|
|
{
|
|
"epoch": 0.7249466950959488,
|
|
"grad_norm": 0.3820694759598411,
|
|
"learning_rate": 3.977067130248381e-05,
|
|
"loss": 1.1247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2775130271911621,
|
|
"step": 170,
|
|
"valid_targets_mean": 16127.6,
|
|
"valid_targets_min": 15280
|
|
},
|
|
{
|
|
"epoch": 0.7292110874200426,
|
|
"grad_norm": 0.42918885224199965,
|
|
"learning_rate": 3.9761607934344095e-05,
|
|
"loss": 1.1151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3212129473686218,
|
|
"step": 171,
|
|
"valid_targets_mean": 16080.7,
|
|
"valid_targets_min": 14124
|
|
},
|
|
{
|
|
"epoch": 0.7334754797441365,
|
|
"grad_norm": 0.38146809378376195,
|
|
"learning_rate": 3.975236999535306e-05,
|
|
"loss": 1.1169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2378343939781189,
|
|
"step": 172,
|
|
"valid_targets_mean": 10701.9,
|
|
"valid_targets_min": 1268
|
|
},
|
|
{
|
|
"epoch": 0.7377398720682303,
|
|
"grad_norm": 0.398396946713863,
|
|
"learning_rate": 3.974295756711717e-05,
|
|
"loss": 1.1167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2819068729877472,
|
|
"step": 173,
|
|
"valid_targets_mean": 16045.4,
|
|
"valid_targets_min": 15437
|
|
},
|
|
{
|
|
"epoch": 0.7420042643923241,
|
|
"grad_norm": 0.5961222558082274,
|
|
"learning_rate": 3.9733370732784296e-05,
|
|
"loss": 1.1651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30488142371177673,
|
|
"step": 174,
|
|
"valid_targets_mean": 16151.0,
|
|
"valid_targets_min": 14351
|
|
},
|
|
{
|
|
"epoch": 0.746268656716418,
|
|
"grad_norm": 0.5021411112013566,
|
|
"learning_rate": 3.972360957704298e-05,
|
|
"loss": 1.1145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18337294459342957,
|
|
"step": 175,
|
|
"valid_targets_mean": 8556.6,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 0.7505330490405118,
|
|
"grad_norm": 0.32592792574295393,
|
|
"learning_rate": 3.97136741861217e-05,
|
|
"loss": 1.1762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2854708135128021,
|
|
"step": 176,
|
|
"valid_targets_mean": 16134.4,
|
|
"valid_targets_min": 14897
|
|
},
|
|
{
|
|
"epoch": 0.7547974413646056,
|
|
"grad_norm": 0.4732969959201329,
|
|
"learning_rate": 3.970356464778808e-05,
|
|
"loss": 1.1243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3042914867401123,
|
|
"step": 177,
|
|
"valid_targets_mean": 16085.6,
|
|
"valid_targets_min": 12492
|
|
},
|
|
{
|
|
"epoch": 0.7590618336886994,
|
|
"grad_norm": 0.47942982862373595,
|
|
"learning_rate": 3.969328105134817e-05,
|
|
"loss": 1.1127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22245652973651886,
|
|
"step": 178,
|
|
"valid_targets_mean": 11892.4,
|
|
"valid_targets_min": 9743
|
|
},
|
|
{
|
|
"epoch": 0.7633262260127932,
|
|
"grad_norm": 0.357596810395852,
|
|
"learning_rate": 3.9682823487645584e-05,
|
|
"loss": 1.1982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35377755761146545,
|
|
"step": 179,
|
|
"valid_targets_mean": 15843.5,
|
|
"valid_targets_min": 13927
|
|
},
|
|
{
|
|
"epoch": 0.767590618336887,
|
|
"grad_norm": 0.536182510954731,
|
|
"learning_rate": 3.9672192049060745e-05,
|
|
"loss": 1.1299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2949283719062805,
|
|
"step": 180,
|
|
"valid_targets_mean": 16161.1,
|
|
"valid_targets_min": 14871
|
|
},
|
|
{
|
|
"epoch": 0.7718550106609808,
|
|
"grad_norm": 0.5883409953635337,
|
|
"learning_rate": 3.966138682951008e-05,
|
|
"loss": 1.1181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2617324888706207,
|
|
"step": 181,
|
|
"valid_targets_mean": 15097.6,
|
|
"valid_targets_min": 12384
|
|
},
|
|
{
|
|
"epoch": 0.7761194029850746,
|
|
"grad_norm": 0.44943437576431317,
|
|
"learning_rate": 3.9650407924445147e-05,
|
|
"loss": 1.1358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.302264928817749,
|
|
"step": 182,
|
|
"valid_targets_mean": 15968.6,
|
|
"valid_targets_min": 9799
|
|
},
|
|
{
|
|
"epoch": 0.7803837953091685,
|
|
"grad_norm": 0.348496876382619,
|
|
"learning_rate": 3.963925543085181e-05,
|
|
"loss": 1.1741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2787725031375885,
|
|
"step": 183,
|
|
"valid_targets_mean": 13224.8,
|
|
"valid_targets_min": 2180
|
|
},
|
|
{
|
|
"epoch": 0.7846481876332623,
|
|
"grad_norm": 0.5040058663611126,
|
|
"learning_rate": 3.96279294472494e-05,
|
|
"loss": 1.1152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2726561427116394,
|
|
"step": 184,
|
|
"valid_targets_mean": 16042.3,
|
|
"valid_targets_min": 15121
|
|
},
|
|
{
|
|
"epoch": 0.7889125799573561,
|
|
"grad_norm": 0.4983532852453396,
|
|
"learning_rate": 3.961643007368984e-05,
|
|
"loss": 1.0954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3302464783191681,
|
|
"step": 185,
|
|
"valid_targets_mean": 16088.2,
|
|
"valid_targets_min": 15153
|
|
},
|
|
{
|
|
"epoch": 0.7931769722814499,
|
|
"grad_norm": 0.3970225467932824,
|
|
"learning_rate": 3.960475741175671e-05,
|
|
"loss": 1.1351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18872615694999695,
|
|
"step": 186,
|
|
"valid_targets_mean": 10063.5,
|
|
"valid_targets_min": 1284
|
|
},
|
|
{
|
|
"epoch": 0.7974413646055437,
|
|
"grad_norm": 0.3378836915461822,
|
|
"learning_rate": 3.959291156456444e-05,
|
|
"loss": 1.1136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2803310751914978,
|
|
"step": 187,
|
|
"valid_targets_mean": 16124.9,
|
|
"valid_targets_min": 14690
|
|
},
|
|
{
|
|
"epoch": 0.8017057569296375,
|
|
"grad_norm": 0.35346936929216877,
|
|
"learning_rate": 3.9580892636757334e-05,
|
|
"loss": 1.1666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34958887100219727,
|
|
"step": 188,
|
|
"valid_targets_mean": 16013.7,
|
|
"valid_targets_min": 13414
|
|
},
|
|
{
|
|
"epoch": 0.8059701492537313,
|
|
"grad_norm": 0.3533808674922251,
|
|
"learning_rate": 3.9568700734508645e-05,
|
|
"loss": 1.1105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.198248952627182,
|
|
"step": 189,
|
|
"valid_targets_mean": 9630.3,
|
|
"valid_targets_min": 4230
|
|
},
|
|
{
|
|
"epoch": 0.8102345415778252,
|
|
"grad_norm": 0.3223232955392409,
|
|
"learning_rate": 3.955633596551967e-05,
|
|
"loss": 1.1622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2920791506767273,
|
|
"step": 190,
|
|
"valid_targets_mean": 16130.3,
|
|
"valid_targets_min": 15217
|
|
},
|
|
{
|
|
"epoch": 0.814498933901919,
|
|
"grad_norm": 0.37818416609817124,
|
|
"learning_rate": 3.9543798439018776e-05,
|
|
"loss": 1.1361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33160802721977234,
|
|
"step": 191,
|
|
"valid_targets_mean": 16100.2,
|
|
"valid_targets_min": 15121
|
|
},
|
|
{
|
|
"epoch": 0.8187633262260128,
|
|
"grad_norm": 0.36234404052990765,
|
|
"learning_rate": 3.953108826576046e-05,
|
|
"loss": 1.139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2630240023136139,
|
|
"step": 192,
|
|
"valid_targets_mean": 13192.3,
|
|
"valid_targets_min": 10124
|
|
},
|
|
{
|
|
"epoch": 0.8230277185501066,
|
|
"grad_norm": 0.3496189221525795,
|
|
"learning_rate": 3.9518205558024334e-05,
|
|
"loss": 1.1019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3344437777996063,
|
|
"step": 193,
|
|
"valid_targets_mean": 16110.0,
|
|
"valid_targets_min": 15222
|
|
},
|
|
{
|
|
"epoch": 0.8272921108742004,
|
|
"grad_norm": 0.3247000167494858,
|
|
"learning_rate": 3.9505150429614154e-05,
|
|
"loss": 1.0864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32415539026260376,
|
|
"step": 194,
|
|
"valid_targets_mean": 16157.9,
|
|
"valid_targets_min": 15195
|
|
},
|
|
{
|
|
"epoch": 0.8315565031982942,
|
|
"grad_norm": 0.3707009591283196,
|
|
"learning_rate": 3.949192299585681e-05,
|
|
"loss": 1.1231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28162047266960144,
|
|
"step": 195,
|
|
"valid_targets_mean": 15813.3,
|
|
"valid_targets_min": 14450
|
|
},
|
|
{
|
|
"epoch": 0.835820895522388,
|
|
"grad_norm": 0.3701248794721312,
|
|
"learning_rate": 3.9478523373601325e-05,
|
|
"loss": 1.1892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34016382694244385,
|
|
"step": 196,
|
|
"valid_targets_mean": 15987.5,
|
|
"valid_targets_min": 14241
|
|
},
|
|
{
|
|
"epoch": 0.8400852878464818,
|
|
"grad_norm": 0.41382661620814537,
|
|
"learning_rate": 3.946495168121778e-05,
|
|
"loss": 1.1448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2231186181306839,
|
|
"step": 197,
|
|
"valid_targets_mean": 10847.8,
|
|
"valid_targets_min": 793
|
|
},
|
|
{
|
|
"epoch": 0.8443496801705757,
|
|
"grad_norm": 0.3912544164406225,
|
|
"learning_rate": 3.9451208038596325e-05,
|
|
"loss": 1.1034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2738224267959595,
|
|
"step": 198,
|
|
"valid_targets_mean": 15738.3,
|
|
"valid_targets_min": 14685
|
|
},
|
|
{
|
|
"epoch": 0.8486140724946695,
|
|
"grad_norm": 0.37275760589482754,
|
|
"learning_rate": 3.943729256714608e-05,
|
|
"loss": 1.0951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33501487970352173,
|
|
"step": 199,
|
|
"valid_targets_mean": 16183.1,
|
|
"valid_targets_min": 15737
|
|
},
|
|
{
|
|
"epoch": 0.8528784648187633,
|
|
"grad_norm": 0.3523981647759341,
|
|
"learning_rate": 3.942320538979408e-05,
|
|
"loss": 1.1474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1693504899740219,
|
|
"step": 200,
|
|
"valid_targets_mean": 7677.2,
|
|
"valid_targets_min": 2065
|
|
},
|
|
{
|
|
"epoch": 0.8571428571428571,
|
|
"grad_norm": 0.38769471517671245,
|
|
"learning_rate": 3.9408946630984144e-05,
|
|
"loss": 1.1976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3229611814022064,
|
|
"step": 201,
|
|
"valid_targets_mean": 16004.1,
|
|
"valid_targets_min": 12492
|
|
},
|
|
{
|
|
"epoch": 0.8614072494669509,
|
|
"grad_norm": 0.4029174334792178,
|
|
"learning_rate": 3.939451641667587e-05,
|
|
"loss": 1.1066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3232455253601074,
|
|
"step": 202,
|
|
"valid_targets_mean": 16134.0,
|
|
"valid_targets_min": 14956
|
|
},
|
|
{
|
|
"epoch": 0.8656716417910447,
|
|
"grad_norm": 0.3945056161774726,
|
|
"learning_rate": 3.937991487434342e-05,
|
|
"loss": 1.0754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19846779108047485,
|
|
"step": 203,
|
|
"valid_targets_mean": 11952.2,
|
|
"valid_targets_min": 9228
|
|
},
|
|
{
|
|
"epoch": 0.8699360341151386,
|
|
"grad_norm": 0.3911566718594917,
|
|
"learning_rate": 3.9365142132974484e-05,
|
|
"loss": 1.1028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27967995405197144,
|
|
"step": 204,
|
|
"valid_targets_mean": 16192.1,
|
|
"valid_targets_min": 15300
|
|
},
|
|
{
|
|
"epoch": 0.8742004264392325,
|
|
"grad_norm": 0.3960299479310709,
|
|
"learning_rate": 3.935019832306905e-05,
|
|
"loss": 1.198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3388521373271942,
|
|
"step": 205,
|
|
"valid_targets_mean": 16104.9,
|
|
"valid_targets_min": 15016
|
|
},
|
|
{
|
|
"epoch": 0.8784648187633263,
|
|
"grad_norm": 0.379226669898679,
|
|
"learning_rate": 3.933508357663832e-05,
|
|
"loss": 1.1316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2571180462837219,
|
|
"step": 206,
|
|
"valid_targets_mean": 14878.7,
|
|
"valid_targets_min": 13210
|
|
},
|
|
{
|
|
"epoch": 0.8827292110874201,
|
|
"grad_norm": 0.309941973007609,
|
|
"learning_rate": 3.9319798027203544e-05,
|
|
"loss": 1.0723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3000110983848572,
|
|
"step": 207,
|
|
"valid_targets_mean": 16179.5,
|
|
"valid_targets_min": 15250
|
|
},
|
|
{
|
|
"epoch": 0.8869936034115139,
|
|
"grad_norm": 0.3590551775559692,
|
|
"learning_rate": 3.930434180979478e-05,
|
|
"loss": 1.1104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27522504329681396,
|
|
"step": 208,
|
|
"valid_targets_mean": 13244.8,
|
|
"valid_targets_min": 979
|
|
},
|
|
{
|
|
"epoch": 0.8912579957356077,
|
|
"grad_norm": 0.3794972767149023,
|
|
"learning_rate": 3.928871506094975e-05,
|
|
"loss": 1.0999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27033573389053345,
|
|
"step": 209,
|
|
"valid_targets_mean": 15530.1,
|
|
"valid_targets_min": 14077
|
|
},
|
|
{
|
|
"epoch": 0.8955223880597015,
|
|
"grad_norm": 0.3468110890945256,
|
|
"learning_rate": 3.927291791871264e-05,
|
|
"loss": 1.1048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3237696886062622,
|
|
"step": 210,
|
|
"valid_targets_mean": 16145.0,
|
|
"valid_targets_min": 15408
|
|
},
|
|
{
|
|
"epoch": 0.8997867803837953,
|
|
"grad_norm": 0.28612789274214856,
|
|
"learning_rate": 3.925695052263284e-05,
|
|
"loss": 1.1222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19704505801200867,
|
|
"step": 211,
|
|
"valid_targets_mean": 9302.1,
|
|
"valid_targets_min": 2002
|
|
},
|
|
{
|
|
"epoch": 0.9040511727078892,
|
|
"grad_norm": 0.37829699375162723,
|
|
"learning_rate": 3.924081301376375e-05,
|
|
"loss": 1.1534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2830193042755127,
|
|
"step": 212,
|
|
"valid_targets_mean": 16123.4,
|
|
"valid_targets_min": 15464
|
|
},
|
|
{
|
|
"epoch": 0.908315565031983,
|
|
"grad_norm": 0.41084813490365696,
|
|
"learning_rate": 3.9224505534661525e-05,
|
|
"loss": 1.1657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3296535611152649,
|
|
"step": 213,
|
|
"valid_targets_mean": 16164.3,
|
|
"valid_targets_min": 15414
|
|
},
|
|
{
|
|
"epoch": 0.9125799573560768,
|
|
"grad_norm": 0.3448857418337144,
|
|
"learning_rate": 3.92080282293838e-05,
|
|
"loss": 1.1359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21162542700767517,
|
|
"step": 214,
|
|
"valid_targets_mean": 11598.8,
|
|
"valid_targets_min": 6923
|
|
},
|
|
{
|
|
"epoch": 0.9168443496801706,
|
|
"grad_norm": 0.4216988848704001,
|
|
"learning_rate": 3.9191381243488417e-05,
|
|
"loss": 1.119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2782018184661865,
|
|
"step": 215,
|
|
"valid_targets_mean": 16140.2,
|
|
"valid_targets_min": 15054
|
|
},
|
|
{
|
|
"epoch": 0.9211087420042644,
|
|
"grad_norm": 0.4520192538398874,
|
|
"learning_rate": 3.9174564724032167e-05,
|
|
"loss": 1.1101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3245173990726471,
|
|
"step": 216,
|
|
"valid_targets_mean": 16168.6,
|
|
"valid_targets_min": 15492
|
|
},
|
|
{
|
|
"epoch": 0.9253731343283582,
|
|
"grad_norm": 0.5445990650271554,
|
|
"learning_rate": 3.9157578819569455e-05,
|
|
"loss": 1.1074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23481634259223938,
|
|
"step": 217,
|
|
"valid_targets_mean": 13440.2,
|
|
"valid_targets_min": 11543
|
|
},
|
|
{
|
|
"epoch": 0.929637526652452,
|
|
"grad_norm": 0.5450244324043093,
|
|
"learning_rate": 3.9140423680151036e-05,
|
|
"loss": 1.1147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30909502506256104,
|
|
"step": 218,
|
|
"valid_targets_mean": 16076.0,
|
|
"valid_targets_min": 14699
|
|
},
|
|
{
|
|
"epoch": 0.9339019189765458,
|
|
"grad_norm": 0.3585828701583468,
|
|
"learning_rate": 3.9123099457322625e-05,
|
|
"loss": 1.1154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3166894316673279,
|
|
"step": 219,
|
|
"valid_targets_mean": 16097.1,
|
|
"valid_targets_min": 14737
|
|
},
|
|
{
|
|
"epoch": 0.9381663113006397,
|
|
"grad_norm": 0.3698144959370942,
|
|
"learning_rate": 3.9105606304123605e-05,
|
|
"loss": 1.1456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2708337604999542,
|
|
"step": 220,
|
|
"valid_targets_mean": 14339.8,
|
|
"valid_targets_min": 12604
|
|
},
|
|
{
|
|
"epoch": 0.9424307036247335,
|
|
"grad_norm": 0.43891439561975154,
|
|
"learning_rate": 3.908794437508567e-05,
|
|
"loss": 1.1189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3262331485748291,
|
|
"step": 221,
|
|
"valid_targets_mean": 16064.1,
|
|
"valid_targets_min": 14428
|
|
},
|
|
{
|
|
"epoch": 0.9466950959488273,
|
|
"grad_norm": 0.396764810157014,
|
|
"learning_rate": 3.907011382623145e-05,
|
|
"loss": 1.1524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23768097162246704,
|
|
"step": 222,
|
|
"valid_targets_mean": 10776.3,
|
|
"valid_targets_min": 1379
|
|
},
|
|
{
|
|
"epoch": 0.9509594882729211,
|
|
"grad_norm": 0.3808667174396742,
|
|
"learning_rate": 3.905211481507313e-05,
|
|
"loss": 1.0704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2573968470096588,
|
|
"step": 223,
|
|
"valid_targets_mean": 16155.4,
|
|
"valid_targets_min": 14394
|
|
},
|
|
{
|
|
"epoch": 0.9552238805970149,
|
|
"grad_norm": 0.34271746822127364,
|
|
"learning_rate": 3.903394750061106e-05,
|
|
"loss": 1.0411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2858397662639618,
|
|
"step": 224,
|
|
"valid_targets_mean": 16217.8,
|
|
"valid_targets_min": 15574
|
|
},
|
|
{
|
|
"epoch": 0.9594882729211087,
|
|
"grad_norm": 0.4090699476429991,
|
|
"learning_rate": 3.9015612043332375e-05,
|
|
"loss": 1.1108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15227608382701874,
|
|
"step": 225,
|
|
"valid_targets_mean": 7463.5,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 0.9637526652452025,
|
|
"grad_norm": 0.4330402554544449,
|
|
"learning_rate": 3.8997108605209535e-05,
|
|
"loss": 1.1013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26998600363731384,
|
|
"step": 226,
|
|
"valid_targets_mean": 16178.3,
|
|
"valid_targets_min": 15368
|
|
},
|
|
{
|
|
"epoch": 0.9680170575692963,
|
|
"grad_norm": 0.3674055145879997,
|
|
"learning_rate": 3.897843734969891e-05,
|
|
"loss": 1.0907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3121291697025299,
|
|
"step": 227,
|
|
"valid_targets_mean": 16218.0,
|
|
"valid_targets_min": 15492
|
|
},
|
|
{
|
|
"epoch": 0.9722814498933902,
|
|
"grad_norm": 0.3443785580263128,
|
|
"learning_rate": 3.895959844173937e-05,
|
|
"loss": 1.1083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20776158571243286,
|
|
"step": 228,
|
|
"valid_targets_mean": 11823.1,
|
|
"valid_targets_min": 9219
|
|
},
|
|
{
|
|
"epoch": 0.976545842217484,
|
|
"grad_norm": 0.5250278239914289,
|
|
"learning_rate": 3.8940592047750774e-05,
|
|
"loss": 1.1067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2968442142009735,
|
|
"step": 229,
|
|
"valid_targets_mean": 16117.2,
|
|
"valid_targets_min": 14958
|
|
},
|
|
{
|
|
"epoch": 0.9808102345415778,
|
|
"grad_norm": 0.47342613018347207,
|
|
"learning_rate": 3.892141833563255e-05,
|
|
"loss": 1.1474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3667376935482025,
|
|
"step": 230,
|
|
"valid_targets_mean": 16062.8,
|
|
"valid_targets_min": 15242
|
|
},
|
|
{
|
|
"epoch": 0.9850746268656716,
|
|
"grad_norm": 0.4334492194409921,
|
|
"learning_rate": 3.8902077474762155e-05,
|
|
"loss": 1.0568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22673600912094116,
|
|
"step": 231,
|
|
"valid_targets_mean": 13741.2,
|
|
"valid_targets_min": 11500
|
|
},
|
|
{
|
|
"epoch": 0.9893390191897654,
|
|
"grad_norm": 0.38506194972597113,
|
|
"learning_rate": 3.888256963599364e-05,
|
|
"loss": 1.1959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3469485938549042,
|
|
"step": 232,
|
|
"valid_targets_mean": 16078.0,
|
|
"valid_targets_min": 14904
|
|
},
|
|
{
|
|
"epoch": 0.9936034115138592,
|
|
"grad_norm": 0.45946627720688377,
|
|
"learning_rate": 3.886289499165609e-05,
|
|
"loss": 1.1409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27444905042648315,
|
|
"step": 233,
|
|
"valid_targets_mean": 13027.3,
|
|
"valid_targets_min": 1521
|
|
},
|
|
{
|
|
"epoch": 0.997867803837953,
|
|
"grad_norm": 0.37072479002064795,
|
|
"learning_rate": 3.884305371555215e-05,
|
|
"loss": 1.1578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30341586470603943,
|
|
"step": 234,
|
|
"valid_targets_mean": 15647.9,
|
|
"valid_targets_min": 14103
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 0.3713655362758878,
|
|
"learning_rate": 3.882304598295643e-05,
|
|
"loss": 1.164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5115901827812195,
|
|
"step": 235,
|
|
"valid_targets_mean": 9944.1,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 1.004264392324094,
|
|
"grad_norm": 0.492828062086341,
|
|
"learning_rate": 3.880287197061402e-05,
|
|
"loss": 1.1026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28225335478782654,
|
|
"step": 236,
|
|
"valid_targets_mean": 16134.4,
|
|
"valid_targets_min": 15417
|
|
},
|
|
{
|
|
"epoch": 1.0085287846481876,
|
|
"grad_norm": 0.48528813108169805,
|
|
"learning_rate": 3.878253185673888e-05,
|
|
"loss": 1.1163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34507322311401367,
|
|
"step": 237,
|
|
"valid_targets_mean": 16085.3,
|
|
"valid_targets_min": 15115
|
|
},
|
|
{
|
|
"epoch": 1.0127931769722816,
|
|
"grad_norm": 0.4657002193996111,
|
|
"learning_rate": 3.876202582101229e-05,
|
|
"loss": 1.119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22333009541034698,
|
|
"step": 238,
|
|
"valid_targets_mean": 11956.3,
|
|
"valid_targets_min": 8620
|
|
},
|
|
{
|
|
"epoch": 1.0170575692963753,
|
|
"grad_norm": 0.5797505215217232,
|
|
"learning_rate": 3.874135404458125e-05,
|
|
"loss": 1.1085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30123043060302734,
|
|
"step": 239,
|
|
"valid_targets_mean": 16189.2,
|
|
"valid_targets_min": 15410
|
|
},
|
|
{
|
|
"epoch": 1.0213219616204692,
|
|
"grad_norm": 0.5565920299930178,
|
|
"learning_rate": 3.8720516710056905e-05,
|
|
"loss": 1.115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3295617699623108,
|
|
"step": 240,
|
|
"valid_targets_mean": 16079.0,
|
|
"valid_targets_min": 14919
|
|
},
|
|
{
|
|
"epoch": 1.0255863539445629,
|
|
"grad_norm": 0.46016269563519424,
|
|
"learning_rate": 3.8699514001512885e-05,
|
|
"loss": 1.1288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2568991780281067,
|
|
"step": 241,
|
|
"valid_targets_mean": 14073.2,
|
|
"valid_targets_min": 12010
|
|
},
|
|
{
|
|
"epoch": 1.0298507462686568,
|
|
"grad_norm": 0.45864038975323523,
|
|
"learning_rate": 3.867834610448374e-05,
|
|
"loss": 1.0814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31548625230789185,
|
|
"step": 242,
|
|
"valid_targets_mean": 16126.4,
|
|
"valid_targets_min": 15263
|
|
},
|
|
{
|
|
"epoch": 1.0341151385927505,
|
|
"grad_norm": 0.40278729122913604,
|
|
"learning_rate": 3.865701320596324e-05,
|
|
"loss": 1.117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24541878700256348,
|
|
"step": 243,
|
|
"valid_targets_mean": 13054.0,
|
|
"valid_targets_min": 1882
|
|
},
|
|
{
|
|
"epoch": 1.0383795309168444,
|
|
"grad_norm": 0.4100179953102017,
|
|
"learning_rate": 3.863551549440277e-05,
|
|
"loss": 1.1184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27249300479888916,
|
|
"step": 244,
|
|
"valid_targets_mean": 15801.2,
|
|
"valid_targets_min": 14436
|
|
},
|
|
{
|
|
"epoch": 1.0426439232409381,
|
|
"grad_norm": 0.5075800018498047,
|
|
"learning_rate": 3.861385315970964e-05,
|
|
"loss": 1.1045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2811691164970398,
|
|
"step": 245,
|
|
"valid_targets_mean": 16140.1,
|
|
"valid_targets_min": 14444
|
|
},
|
|
{
|
|
"epoch": 1.046908315565032,
|
|
"grad_norm": 0.421480437018899,
|
|
"learning_rate": 3.859202639324542e-05,
|
|
"loss": 1.0968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19223734736442566,
|
|
"step": 246,
|
|
"valid_targets_mean": 10079.0,
|
|
"valid_targets_min": 1025
|
|
},
|
|
{
|
|
"epoch": 1.0511727078891258,
|
|
"grad_norm": 0.42648584594833727,
|
|
"learning_rate": 3.8570035387824214e-05,
|
|
"loss": 1.101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2835865020751953,
|
|
"step": 247,
|
|
"valid_targets_mean": 16107.3,
|
|
"valid_targets_min": 15290
|
|
},
|
|
{
|
|
"epoch": 1.0554371002132197,
|
|
"grad_norm": 0.44244263245684756,
|
|
"learning_rate": 3.8547880337711036e-05,
|
|
"loss": 1.1155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32503050565719604,
|
|
"step": 248,
|
|
"valid_targets_mean": 16154.4,
|
|
"valid_targets_min": 15031
|
|
},
|
|
{
|
|
"epoch": 1.0597014925373134,
|
|
"grad_norm": 0.5157379647358489,
|
|
"learning_rate": 3.8525561438620016e-05,
|
|
"loss": 1.0586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18939271569252014,
|
|
"step": 249,
|
|
"valid_targets_mean": 10379.2,
|
|
"valid_targets_min": 6997
|
|
},
|
|
{
|
|
"epoch": 1.0639658848614073,
|
|
"grad_norm": 0.4449900776791044,
|
|
"learning_rate": 3.850307888771269e-05,
|
|
"loss": 1.073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2888108193874359,
|
|
"step": 250,
|
|
"valid_targets_mean": 16118.5,
|
|
"valid_targets_min": 14698
|
|
},
|
|
{
|
|
"epoch": 1.068230277185501,
|
|
"grad_norm": 0.48149873655188175,
|
|
"learning_rate": 3.848043288359629e-05,
|
|
"loss": 1.1492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35437071323394775,
|
|
"step": 251,
|
|
"valid_targets_mean": 16145.3,
|
|
"valid_targets_min": 15417
|
|
},
|
|
{
|
|
"epoch": 1.072494669509595,
|
|
"grad_norm": 0.4859386612268441,
|
|
"learning_rate": 3.8457623626321944e-05,
|
|
"loss": 1.1038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2482079416513443,
|
|
"step": 252,
|
|
"valid_targets_mean": 13666.4,
|
|
"valid_targets_min": 11928
|
|
},
|
|
{
|
|
"epoch": 1.0767590618336886,
|
|
"grad_norm": 0.3906169267825302,
|
|
"learning_rate": 3.843465131738296e-05,
|
|
"loss": 1.0505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29448628425598145,
|
|
"step": 253,
|
|
"valid_targets_mean": 16168.5,
|
|
"valid_targets_min": 15367
|
|
},
|
|
{
|
|
"epoch": 1.0810234541577826,
|
|
"grad_norm": 0.46947961764436325,
|
|
"learning_rate": 3.8411516159713e-05,
|
|
"loss": 1.1048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33546367287635803,
|
|
"step": 254,
|
|
"valid_targets_mean": 16058.3,
|
|
"valid_targets_min": 15122
|
|
},
|
|
{
|
|
"epoch": 1.0852878464818763,
|
|
"grad_norm": 0.46691663929706273,
|
|
"learning_rate": 3.838821835768431e-05,
|
|
"loss": 1.0998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2541680634021759,
|
|
"step": 255,
|
|
"valid_targets_mean": 14634.8,
|
|
"valid_targets_min": 12318
|
|
},
|
|
{
|
|
"epoch": 1.0895522388059702,
|
|
"grad_norm": 0.3703812856992591,
|
|
"learning_rate": 3.83647581171059e-05,
|
|
"loss": 1.0719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2811293601989746,
|
|
"step": 256,
|
|
"valid_targets_mean": 16229.5,
|
|
"valid_targets_min": 15678
|
|
},
|
|
{
|
|
"epoch": 1.0938166311300639,
|
|
"grad_norm": 0.3996476088742633,
|
|
"learning_rate": 3.8341135645221744e-05,
|
|
"loss": 1.1548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2120198756456375,
|
|
"step": 257,
|
|
"valid_targets_mean": 10198.9,
|
|
"valid_targets_min": 1521
|
|
},
|
|
{
|
|
"epoch": 1.0980810234541578,
|
|
"grad_norm": 0.4111239192875592,
|
|
"learning_rate": 3.831735115070895e-05,
|
|
"loss": 1.1119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2730482220649719,
|
|
"step": 258,
|
|
"valid_targets_mean": 15648.3,
|
|
"valid_targets_min": 14587
|
|
},
|
|
{
|
|
"epoch": 1.1023454157782515,
|
|
"grad_norm": 0.40793417859239417,
|
|
"learning_rate": 3.8293404843675904e-05,
|
|
"loss": 1.0701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3305908441543579,
|
|
"step": 259,
|
|
"valid_targets_mean": 16135.6,
|
|
"valid_targets_min": 15231
|
|
},
|
|
{
|
|
"epoch": 1.1066098081023454,
|
|
"grad_norm": 0.4148453446560916,
|
|
"learning_rate": 3.8269296935660395e-05,
|
|
"loss": 1.1264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15724597871303558,
|
|
"step": 260,
|
|
"valid_targets_mean": 7092.5,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 1.1108742004264391,
|
|
"grad_norm": 0.3969229604496845,
|
|
"learning_rate": 3.82450276396278e-05,
|
|
"loss": 1.0551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26030534505844116,
|
|
"step": 261,
|
|
"valid_targets_mean": 16152.0,
|
|
"valid_targets_min": 14548
|
|
},
|
|
{
|
|
"epoch": 1.115138592750533,
|
|
"grad_norm": 0.42210203028372206,
|
|
"learning_rate": 3.822059716996916e-05,
|
|
"loss": 1.0822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29231420159339905,
|
|
"step": 262,
|
|
"valid_targets_mean": 16084.3,
|
|
"valid_targets_min": 11563
|
|
},
|
|
{
|
|
"epoch": 1.1194029850746268,
|
|
"grad_norm": 0.4689989288485153,
|
|
"learning_rate": 3.819600574249929e-05,
|
|
"loss": 1.0943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22371648252010345,
|
|
"step": 263,
|
|
"valid_targets_mean": 12894.8,
|
|
"valid_targets_min": 10816
|
|
},
|
|
{
|
|
"epoch": 1.1236673773987207,
|
|
"grad_norm": 0.4556738705049439,
|
|
"learning_rate": 3.817125357445489e-05,
|
|
"loss": 1.1106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2985258102416992,
|
|
"step": 264,
|
|
"valid_targets_mean": 16128.0,
|
|
"valid_targets_min": 15288
|
|
},
|
|
{
|
|
"epoch": 1.1279317697228146,
|
|
"grad_norm": 0.3800765405485939,
|
|
"learning_rate": 3.814634088449261e-05,
|
|
"loss": 1.0971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30357831716537476,
|
|
"step": 265,
|
|
"valid_targets_mean": 16191.3,
|
|
"valid_targets_min": 15360
|
|
},
|
|
{
|
|
"epoch": 1.1321961620469083,
|
|
"grad_norm": 0.4833962669327678,
|
|
"learning_rate": 3.812126789268712e-05,
|
|
"loss": 1.0661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2506658434867859,
|
|
"step": 266,
|
|
"valid_targets_mean": 13714.1,
|
|
"valid_targets_min": 10885
|
|
},
|
|
{
|
|
"epoch": 1.136460554371002,
|
|
"grad_norm": 0.518902812754926,
|
|
"learning_rate": 3.80960348205292e-05,
|
|
"loss": 1.0766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30003392696380615,
|
|
"step": 267,
|
|
"valid_targets_mean": 16230.6,
|
|
"valid_targets_min": 14726
|
|
},
|
|
{
|
|
"epoch": 1.140724946695096,
|
|
"grad_norm": 0.462341573969094,
|
|
"learning_rate": 3.807064189092372e-05,
|
|
"loss": 1.115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.258874773979187,
|
|
"step": 268,
|
|
"valid_targets_mean": 12928.8,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 1.1449893390191899,
|
|
"grad_norm": 0.35268494216191165,
|
|
"learning_rate": 3.804508932818771e-05,
|
|
"loss": 1.0729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2675431966781616,
|
|
"step": 269,
|
|
"valid_targets_mean": 15937.6,
|
|
"valid_targets_min": 14274
|
|
},
|
|
{
|
|
"epoch": 1.1492537313432836,
|
|
"grad_norm": 0.5517836141192989,
|
|
"learning_rate": 3.801937735804838e-05,
|
|
"loss": 1.0772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31933826208114624,
|
|
"step": 270,
|
|
"valid_targets_mean": 16088.9,
|
|
"valid_targets_min": 14394
|
|
},
|
|
{
|
|
"epoch": 1.1535181236673775,
|
|
"grad_norm": 0.5323326988029511,
|
|
"learning_rate": 3.799350620764114e-05,
|
|
"loss": 1.1669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2066635638475418,
|
|
"step": 271,
|
|
"valid_targets_mean": 9558.3,
|
|
"valid_targets_min": 1284
|
|
},
|
|
{
|
|
"epoch": 1.1577825159914712,
|
|
"grad_norm": 0.45852592023556543,
|
|
"learning_rate": 3.7967476105507535e-05,
|
|
"loss": 1.0728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2466944456100464,
|
|
"step": 272,
|
|
"valid_targets_mean": 16074.0,
|
|
"valid_targets_min": 15120
|
|
},
|
|
{
|
|
"epoch": 1.1620469083155651,
|
|
"grad_norm": 0.52730269806394,
|
|
"learning_rate": 3.7941287281593284e-05,
|
|
"loss": 1.1183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3480404019355774,
|
|
"step": 273,
|
|
"valid_targets_mean": 16051.9,
|
|
"valid_targets_min": 14958
|
|
},
|
|
{
|
|
"epoch": 1.1663113006396588,
|
|
"grad_norm": 0.5317950143572628,
|
|
"learning_rate": 3.7914939967246227e-05,
|
|
"loss": 1.1104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19135290384292603,
|
|
"step": 274,
|
|
"valid_targets_mean": 10320.2,
|
|
"valid_targets_min": 4592
|
|
},
|
|
{
|
|
"epoch": 1.1705756929637527,
|
|
"grad_norm": 0.47104182647424275,
|
|
"learning_rate": 3.7888434395214285e-05,
|
|
"loss": 1.0645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2776111364364624,
|
|
"step": 275,
|
|
"valid_targets_mean": 16176.9,
|
|
"valid_targets_min": 15525
|
|
},
|
|
{
|
|
"epoch": 1.1748400852878464,
|
|
"grad_norm": 0.5015585288642224,
|
|
"learning_rate": 3.786177079964339e-05,
|
|
"loss": 1.1372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34761297702789307,
|
|
"step": 276,
|
|
"valid_targets_mean": 15977.2,
|
|
"valid_targets_min": 12151
|
|
},
|
|
{
|
|
"epoch": 1.1791044776119404,
|
|
"grad_norm": 0.46156337605426656,
|
|
"learning_rate": 3.783494941607544e-05,
|
|
"loss": 1.0778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24728554487228394,
|
|
"step": 277,
|
|
"valid_targets_mean": 14817.6,
|
|
"valid_targets_min": 12394
|
|
},
|
|
{
|
|
"epoch": 1.183368869936034,
|
|
"grad_norm": 0.5663761071799178,
|
|
"learning_rate": 3.780797048144621e-05,
|
|
"loss": 1.0673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29986995458602905,
|
|
"step": 278,
|
|
"valid_targets_mean": 16170.4,
|
|
"valid_targets_min": 15476
|
|
},
|
|
{
|
|
"epoch": 1.187633262260128,
|
|
"grad_norm": 0.4937926180202986,
|
|
"learning_rate": 3.7780834234083236e-05,
|
|
"loss": 1.0691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29886218905448914,
|
|
"step": 279,
|
|
"valid_targets_mean": 16205.1,
|
|
"valid_targets_min": 15621
|
|
},
|
|
{
|
|
"epoch": 1.1918976545842217,
|
|
"grad_norm": 0.5114978702997742,
|
|
"learning_rate": 3.775354091370376e-05,
|
|
"loss": 1.129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26027584075927734,
|
|
"step": 280,
|
|
"valid_targets_mean": 15438.3,
|
|
"valid_targets_min": 13444
|
|
},
|
|
{
|
|
"epoch": 1.1961620469083156,
|
|
"grad_norm": 0.44901505238938466,
|
|
"learning_rate": 3.772609076141255e-05,
|
|
"loss": 1.0729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3146480619907379,
|
|
"step": 281,
|
|
"valid_targets_mean": 16119.0,
|
|
"valid_targets_min": 15507
|
|
},
|
|
{
|
|
"epoch": 1.2004264392324093,
|
|
"grad_norm": 0.4209579614378078,
|
|
"learning_rate": 3.769848401969982e-05,
|
|
"loss": 1.1076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21626295149326324,
|
|
"step": 282,
|
|
"valid_targets_mean": 10172.0,
|
|
"valid_targets_min": 1340
|
|
},
|
|
{
|
|
"epoch": 1.2046908315565032,
|
|
"grad_norm": 0.3888087959337898,
|
|
"learning_rate": 3.767072093243907e-05,
|
|
"loss": 1.1623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2947539985179901,
|
|
"step": 283,
|
|
"valid_targets_mean": 15828.4,
|
|
"valid_targets_min": 14460
|
|
},
|
|
{
|
|
"epoch": 1.208955223880597,
|
|
"grad_norm": 0.34764295291226055,
|
|
"learning_rate": 3.7642801744884915e-05,
|
|
"loss": 1.0957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29402148723602295,
|
|
"step": 284,
|
|
"valid_targets_mean": 16200.7,
|
|
"valid_targets_min": 15297
|
|
},
|
|
{
|
|
"epoch": 1.2132196162046909,
|
|
"grad_norm": 0.292454378743343,
|
|
"learning_rate": 3.761472670367096e-05,
|
|
"loss": 1.0746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17497391998767853,
|
|
"step": 285,
|
|
"valid_targets_mean": 9475.9,
|
|
"valid_targets_min": 1947
|
|
},
|
|
{
|
|
"epoch": 1.2174840085287846,
|
|
"grad_norm": 0.428465132297418,
|
|
"learning_rate": 3.758649605680758e-05,
|
|
"loss": 1.1034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28571125864982605,
|
|
"step": 286,
|
|
"valid_targets_mean": 16135.3,
|
|
"valid_targets_min": 15278
|
|
},
|
|
{
|
|
"epoch": 1.2217484008528785,
|
|
"grad_norm": 0.35353654724450573,
|
|
"learning_rate": 3.755811005367974e-05,
|
|
"loss": 1.1299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34899359941482544,
|
|
"step": 287,
|
|
"valid_targets_mean": 15927.2,
|
|
"valid_targets_min": 13538
|
|
},
|
|
{
|
|
"epoch": 1.2260127931769722,
|
|
"grad_norm": 0.32107825889625696,
|
|
"learning_rate": 3.752956894504481e-05,
|
|
"loss": 1.0786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21786460280418396,
|
|
"step": 288,
|
|
"valid_targets_mean": 12555.1,
|
|
"valid_targets_min": 10326
|
|
},
|
|
{
|
|
"epoch": 1.2302771855010661,
|
|
"grad_norm": 0.4011903267761661,
|
|
"learning_rate": 3.750087298303033e-05,
|
|
"loss": 1.086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26555633544921875,
|
|
"step": 289,
|
|
"valid_targets_mean": 16137.8,
|
|
"valid_targets_min": 12492
|
|
},
|
|
{
|
|
"epoch": 1.2345415778251598,
|
|
"grad_norm": 0.4140800310411746,
|
|
"learning_rate": 3.7472022421131795e-05,
|
|
"loss": 1.09,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3115846812725067,
|
|
"step": 290,
|
|
"valid_targets_mean": 16206.4,
|
|
"valid_targets_min": 15481
|
|
},
|
|
{
|
|
"epoch": 1.2388059701492538,
|
|
"grad_norm": 0.3342400344520096,
|
|
"learning_rate": 3.7443017514210406e-05,
|
|
"loss": 1.1209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25025108456611633,
|
|
"step": 291,
|
|
"valid_targets_mean": 14772.3,
|
|
"valid_targets_min": 12576
|
|
},
|
|
{
|
|
"epoch": 1.2430703624733475,
|
|
"grad_norm": 0.4193850159643592,
|
|
"learning_rate": 3.7413858518490825e-05,
|
|
"loss": 1.0596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30145931243896484,
|
|
"step": 292,
|
|
"valid_targets_mean": 16124.6,
|
|
"valid_targets_min": 14727
|
|
},
|
|
{
|
|
"epoch": 1.2473347547974414,
|
|
"grad_norm": 0.3428033141851263,
|
|
"learning_rate": 3.7384545691558895e-05,
|
|
"loss": 1.1054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2986433506011963,
|
|
"step": 293,
|
|
"valid_targets_mean": 12797.4,
|
|
"valid_targets_min": 1628
|
|
},
|
|
{
|
|
"epoch": 1.251599147121535,
|
|
"grad_norm": 0.46779767077850404,
|
|
"learning_rate": 3.735507929235941e-05,
|
|
"loss": 1.0796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28899234533309937,
|
|
"step": 294,
|
|
"valid_targets_mean": 15906.0,
|
|
"valid_targets_min": 14468
|
|
},
|
|
{
|
|
"epoch": 1.255863539445629,
|
|
"grad_norm": 0.3279641154358886,
|
|
"learning_rate": 3.732545958119378e-05,
|
|
"loss": 1.0802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3267977833747864,
|
|
"step": 295,
|
|
"valid_targets_mean": 16134.5,
|
|
"valid_targets_min": 15264
|
|
},
|
|
{
|
|
"epoch": 1.260127931769723,
|
|
"grad_norm": 0.32806455507915105,
|
|
"learning_rate": 3.729568681971774e-05,
|
|
"loss": 1.0908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19333700835704803,
|
|
"step": 296,
|
|
"valid_targets_mean": 9574.0,
|
|
"valid_targets_min": 2245
|
|
},
|
|
{
|
|
"epoch": 1.2643923240938166,
|
|
"grad_norm": 0.32374524860685283,
|
|
"learning_rate": 3.726576127093905e-05,
|
|
"loss": 1.0493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2695855498313904,
|
|
"step": 297,
|
|
"valid_targets_mean": 16036.6,
|
|
"valid_targets_min": 13771
|
|
},
|
|
{
|
|
"epoch": 1.2686567164179103,
|
|
"grad_norm": 0.33439627193487537,
|
|
"learning_rate": 3.7235683199215177e-05,
|
|
"loss": 1.0988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3584269881248474,
|
|
"step": 298,
|
|
"valid_targets_mean": 16034.9,
|
|
"valid_targets_min": 13226
|
|
},
|
|
{
|
|
"epoch": 1.2729211087420043,
|
|
"grad_norm": 0.3364668190663482,
|
|
"learning_rate": 3.7205452870250944e-05,
|
|
"loss": 1.0891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20322629809379578,
|
|
"step": 299,
|
|
"valid_targets_mean": 11007.0,
|
|
"valid_targets_min": 5744
|
|
},
|
|
{
|
|
"epoch": 1.2771855010660982,
|
|
"grad_norm": 0.3283276606591826,
|
|
"learning_rate": 3.7175070551096204e-05,
|
|
"loss": 1.0611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2728656530380249,
|
|
"step": 300,
|
|
"valid_targets_mean": 16107.3,
|
|
"valid_targets_min": 14275
|
|
},
|
|
{
|
|
"epoch": 1.2814498933901919,
|
|
"grad_norm": 0.3410129571670106,
|
|
"learning_rate": 3.7144536510143436e-05,
|
|
"loss": 1.0794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.312499463558197,
|
|
"step": 301,
|
|
"valid_targets_mean": 16167.8,
|
|
"valid_targets_min": 15391
|
|
},
|
|
{
|
|
"epoch": 1.2857142857142856,
|
|
"grad_norm": 0.33178138136357144,
|
|
"learning_rate": 3.711385101712544e-05,
|
|
"loss": 1.088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2378830462694168,
|
|
"step": 302,
|
|
"valid_targets_mean": 13015.2,
|
|
"valid_targets_min": 10716
|
|
},
|
|
{
|
|
"epoch": 1.2899786780383795,
|
|
"grad_norm": 0.3052471145988171,
|
|
"learning_rate": 3.708301434311289e-05,
|
|
"loss": 1.0735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3335162103176117,
|
|
"step": 303,
|
|
"valid_targets_mean": 16074.2,
|
|
"valid_targets_min": 15513
|
|
},
|
|
{
|
|
"epoch": 1.2942430703624734,
|
|
"grad_norm": 0.3837352985797052,
|
|
"learning_rate": 3.7052026760511996e-05,
|
|
"loss": 1.0981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33790653944015503,
|
|
"step": 304,
|
|
"valid_targets_mean": 16062.2,
|
|
"valid_targets_min": 15222
|
|
},
|
|
{
|
|
"epoch": 1.2985074626865671,
|
|
"grad_norm": 0.3321958535826849,
|
|
"learning_rate": 3.7020888543062046e-05,
|
|
"loss": 1.1241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28074121475219727,
|
|
"step": 305,
|
|
"valid_targets_mean": 15560.0,
|
|
"valid_targets_min": 7641
|
|
},
|
|
{
|
|
"epoch": 1.302771855010661,
|
|
"grad_norm": 0.3501961919464328,
|
|
"learning_rate": 3.6989599965833024e-05,
|
|
"loss": 1.0766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2851867377758026,
|
|
"step": 306,
|
|
"valid_targets_mean": 16099.2,
|
|
"valid_targets_min": 13108
|
|
},
|
|
{
|
|
"epoch": 1.3070362473347548,
|
|
"grad_norm": 0.2955366487957757,
|
|
"learning_rate": 3.695816130522317e-05,
|
|
"loss": 1.0946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22300812602043152,
|
|
"step": 307,
|
|
"valid_targets_mean": 11237.3,
|
|
"valid_targets_min": 1669
|
|
},
|
|
{
|
|
"epoch": 1.3113006396588487,
|
|
"grad_norm": 0.325132523785184,
|
|
"learning_rate": 3.692657283895651e-05,
|
|
"loss": 1.0553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2740810215473175,
|
|
"step": 308,
|
|
"valid_targets_mean": 16148.6,
|
|
"valid_targets_min": 15513
|
|
},
|
|
{
|
|
"epoch": 1.3155650319829424,
|
|
"grad_norm": 0.3924334579310958,
|
|
"learning_rate": 3.689483484608048e-05,
|
|
"loss": 1.0616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30613797903060913,
|
|
"step": 309,
|
|
"valid_targets_mean": 16065.3,
|
|
"valid_targets_min": 13956
|
|
},
|
|
{
|
|
"epoch": 1.3198294243070363,
|
|
"grad_norm": 0.3701546337432741,
|
|
"learning_rate": 3.6862947606963364e-05,
|
|
"loss": 1.0866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1709344983100891,
|
|
"step": 310,
|
|
"valid_targets_mean": 8786.3,
|
|
"valid_targets_min": 979
|
|
},
|
|
{
|
|
"epoch": 1.32409381663113,
|
|
"grad_norm": 0.3069231801232966,
|
|
"learning_rate": 3.6830911403291885e-05,
|
|
"loss": 1.0579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26452311873435974,
|
|
"step": 311,
|
|
"valid_targets_mean": 16189.7,
|
|
"valid_targets_min": 15573
|
|
},
|
|
{
|
|
"epoch": 1.328358208955224,
|
|
"grad_norm": 0.37555960792372617,
|
|
"learning_rate": 3.679872651806869e-05,
|
|
"loss": 1.0453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30860525369644165,
|
|
"step": 312,
|
|
"valid_targets_mean": 16043.3,
|
|
"valid_targets_min": 13998
|
|
},
|
|
{
|
|
"epoch": 1.3326226012793176,
|
|
"grad_norm": 0.3570150907119278,
|
|
"learning_rate": 3.676639323560986e-05,
|
|
"loss": 1.0489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21335217356681824,
|
|
"step": 313,
|
|
"valid_targets_mean": 11691.9,
|
|
"valid_targets_min": 8218
|
|
},
|
|
{
|
|
"epoch": 1.3368869936034116,
|
|
"grad_norm": 0.3495179049936286,
|
|
"learning_rate": 3.6733911841542365e-05,
|
|
"loss": 1.0771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2860753536224365,
|
|
"step": 314,
|
|
"valid_targets_mean": 16156.4,
|
|
"valid_targets_min": 15261
|
|
},
|
|
{
|
|
"epoch": 1.3411513859275053,
|
|
"grad_norm": 0.314152802186107,
|
|
"learning_rate": 3.6701282622801626e-05,
|
|
"loss": 1.1131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34913378953933716,
|
|
"step": 315,
|
|
"valid_targets_mean": 16031.2,
|
|
"valid_targets_min": 14724
|
|
},
|
|
{
|
|
"epoch": 1.3454157782515992,
|
|
"grad_norm": 0.3525399586735576,
|
|
"learning_rate": 3.666850586762886e-05,
|
|
"loss": 1.0882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26787278056144714,
|
|
"step": 316,
|
|
"valid_targets_mean": 15082.3,
|
|
"valid_targets_min": 12379
|
|
},
|
|
{
|
|
"epoch": 1.349680170575693,
|
|
"grad_norm": 0.35426346625897037,
|
|
"learning_rate": 3.663558186556863e-05,
|
|
"loss": 1.0944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3138875365257263,
|
|
"step": 317,
|
|
"valid_targets_mean": 16143.6,
|
|
"valid_targets_min": 15422
|
|
},
|
|
{
|
|
"epoch": 1.3539445628997868,
|
|
"grad_norm": 0.329789873292349,
|
|
"learning_rate": 3.660251090746627e-05,
|
|
"loss": 1.1292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27487125992774963,
|
|
"step": 318,
|
|
"valid_targets_mean": 13230.5,
|
|
"valid_targets_min": 2050
|
|
},
|
|
{
|
|
"epoch": 1.3582089552238805,
|
|
"grad_norm": 0.3254671506286237,
|
|
"learning_rate": 3.656929328546526e-05,
|
|
"loss": 1.104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2868189811706543,
|
|
"step": 319,
|
|
"valid_targets_mean": 15947.4,
|
|
"valid_targets_min": 14844
|
|
},
|
|
{
|
|
"epoch": 1.3624733475479744,
|
|
"grad_norm": 0.37585500252720244,
|
|
"learning_rate": 3.653592929300471e-05,
|
|
"loss": 1.1131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3040839731693268,
|
|
"step": 320,
|
|
"valid_targets_mean": 16161.1,
|
|
"valid_targets_min": 15333
|
|
},
|
|
{
|
|
"epoch": 1.3667377398720681,
|
|
"grad_norm": 0.3427210209211445,
|
|
"learning_rate": 3.650241922481675e-05,
|
|
"loss": 1.0733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1861184984445572,
|
|
"step": 321,
|
|
"valid_targets_mean": 9992.6,
|
|
"valid_targets_min": 2412
|
|
},
|
|
{
|
|
"epoch": 1.371002132196162,
|
|
"grad_norm": 0.3384349571500406,
|
|
"learning_rate": 3.6468763376923886e-05,
|
|
"loss": 1.068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2832762598991394,
|
|
"step": 322,
|
|
"valid_targets_mean": 16080.6,
|
|
"valid_targets_min": 15212
|
|
},
|
|
{
|
|
"epoch": 1.375266524520256,
|
|
"grad_norm": 0.3681645742819213,
|
|
"learning_rate": 3.6434962046636464e-05,
|
|
"loss": 1.0953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32412612438201904,
|
|
"step": 323,
|
|
"valid_targets_mean": 16106.3,
|
|
"valid_targets_min": 15153
|
|
},
|
|
{
|
|
"epoch": 1.3795309168443497,
|
|
"grad_norm": 0.38460123447575495,
|
|
"learning_rate": 3.6401015532549957e-05,
|
|
"loss": 1.1213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21516337990760803,
|
|
"step": 324,
|
|
"valid_targets_mean": 10360.2,
|
|
"valid_targets_min": 7125
|
|
},
|
|
{
|
|
"epoch": 1.3837953091684434,
|
|
"grad_norm": 0.3220307097756737,
|
|
"learning_rate": 3.6366924134542386e-05,
|
|
"loss": 1.0624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.289733350276947,
|
|
"step": 325,
|
|
"valid_targets_mean": 16060.8,
|
|
"valid_targets_min": 13555
|
|
},
|
|
{
|
|
"epoch": 1.3880597014925373,
|
|
"grad_norm": 0.3669520161660149,
|
|
"learning_rate": 3.633268815377166e-05,
|
|
"loss": 1.0962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3282416760921478,
|
|
"step": 326,
|
|
"valid_targets_mean": 16054.6,
|
|
"valid_targets_min": 14273
|
|
},
|
|
{
|
|
"epoch": 1.3923240938166312,
|
|
"grad_norm": 0.33768050522683196,
|
|
"learning_rate": 3.6298307892672895e-05,
|
|
"loss": 1.0716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23629745841026306,
|
|
"step": 327,
|
|
"valid_targets_mean": 14108.6,
|
|
"valid_targets_min": 11744
|
|
},
|
|
{
|
|
"epoch": 1.396588486140725,
|
|
"grad_norm": 0.31959918290802164,
|
|
"learning_rate": 3.626378365495577e-05,
|
|
"loss": 1.1137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29224511981010437,
|
|
"step": 328,
|
|
"valid_targets_mean": 16144.3,
|
|
"valid_targets_min": 15182
|
|
},
|
|
{
|
|
"epoch": 1.4008528784648187,
|
|
"grad_norm": 0.3584955638744689,
|
|
"learning_rate": 3.622911574560181e-05,
|
|
"loss": 1.0662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3278372585773468,
|
|
"step": 329,
|
|
"valid_targets_mean": 16119.1,
|
|
"valid_targets_min": 14732
|
|
},
|
|
{
|
|
"epoch": 1.4051172707889126,
|
|
"grad_norm": 0.3551164253468689,
|
|
"learning_rate": 3.6194304470861744e-05,
|
|
"loss": 1.1081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2731977701187134,
|
|
"step": 330,
|
|
"valid_targets_mean": 15151.9,
|
|
"valid_targets_min": 13814
|
|
},
|
|
{
|
|
"epoch": 1.4093816631130065,
|
|
"grad_norm": 0.33384857656040146,
|
|
"learning_rate": 3.615935013825272e-05,
|
|
"loss": 1.107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30953341722488403,
|
|
"step": 331,
|
|
"valid_targets_mean": 16155.7,
|
|
"valid_targets_min": 15560
|
|
},
|
|
{
|
|
"epoch": 1.4136460554371002,
|
|
"grad_norm": 0.3465598489468695,
|
|
"learning_rate": 3.612425305655569e-05,
|
|
"loss": 1.0806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22017063200473785,
|
|
"step": 332,
|
|
"valid_targets_mean": 10614.2,
|
|
"valid_targets_min": 1489
|
|
},
|
|
{
|
|
"epoch": 1.417910447761194,
|
|
"grad_norm": 0.37042791165061556,
|
|
"learning_rate": 3.6089013535812593e-05,
|
|
"loss": 1.0543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2671017348766327,
|
|
"step": 333,
|
|
"valid_targets_mean": 16140.9,
|
|
"valid_targets_min": 15217
|
|
},
|
|
{
|
|
"epoch": 1.4221748400852878,
|
|
"grad_norm": 0.32387094747577067,
|
|
"learning_rate": 3.6053631887323656e-05,
|
|
"loss": 1.0887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3278307616710663,
|
|
"step": 334,
|
|
"valid_targets_mean": 16096.1,
|
|
"valid_targets_min": 14473
|
|
},
|
|
{
|
|
"epoch": 1.4264392324093818,
|
|
"grad_norm": 0.3699947447682408,
|
|
"learning_rate": 3.601810842364465e-05,
|
|
"loss": 1.1212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1788579821586609,
|
|
"step": 335,
|
|
"valid_targets_mean": 8554.1,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 1.4307036247334755,
|
|
"grad_norm": 0.32886822517451314,
|
|
"learning_rate": 3.598244345858412e-05,
|
|
"loss": 1.1056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2595828175544739,
|
|
"step": 336,
|
|
"valid_targets_mean": 16153.3,
|
|
"valid_targets_min": 15042
|
|
},
|
|
{
|
|
"epoch": 1.4349680170575694,
|
|
"grad_norm": 0.3523255078156776,
|
|
"learning_rate": 3.594663730720059e-05,
|
|
"loss": 1.0832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31492581963539124,
|
|
"step": 337,
|
|
"valid_targets_mean": 16167.1,
|
|
"valid_targets_min": 15032
|
|
},
|
|
{
|
|
"epoch": 1.439232409381663,
|
|
"grad_norm": 0.3705392055125923,
|
|
"learning_rate": 3.591069028579982e-05,
|
|
"loss": 1.0987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22190961241722107,
|
|
"step": 338,
|
|
"valid_targets_mean": 12233.7,
|
|
"valid_targets_min": 8961
|
|
},
|
|
{
|
|
"epoch": 1.443496801705757,
|
|
"grad_norm": 0.3047525795134772,
|
|
"learning_rate": 3.5874602711931994e-05,
|
|
"loss": 1.1007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2945648431777954,
|
|
"step": 339,
|
|
"valid_targets_mean": 16054.2,
|
|
"valid_targets_min": 15148
|
|
},
|
|
{
|
|
"epoch": 1.4477611940298507,
|
|
"grad_norm": 0.34833144896778817,
|
|
"learning_rate": 3.5838374904388904e-05,
|
|
"loss": 1.0703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32256120443344116,
|
|
"step": 340,
|
|
"valid_targets_mean": 16200.9,
|
|
"valid_targets_min": 15576
|
|
},
|
|
{
|
|
"epoch": 1.4520255863539446,
|
|
"grad_norm": 0.33905394371438524,
|
|
"learning_rate": 3.580200718320115e-05,
|
|
"loss": 1.0949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25068533420562744,
|
|
"step": 341,
|
|
"valid_targets_mean": 15703.2,
|
|
"valid_targets_min": 12835
|
|
},
|
|
{
|
|
"epoch": 1.4562899786780383,
|
|
"grad_norm": 0.36592609151750577,
|
|
"learning_rate": 3.576549986963531e-05,
|
|
"loss": 1.0774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3194271922111511,
|
|
"step": 342,
|
|
"valid_targets_mean": 16056.5,
|
|
"valid_targets_min": 14789
|
|
},
|
|
{
|
|
"epoch": 1.4605543710021323,
|
|
"grad_norm": 0.35390446845440837,
|
|
"learning_rate": 3.5728853286191075e-05,
|
|
"loss": 1.0643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2540942132472992,
|
|
"step": 343,
|
|
"valid_targets_mean": 12879.8,
|
|
"valid_targets_min": 1242
|
|
},
|
|
{
|
|
"epoch": 1.464818763326226,
|
|
"grad_norm": 0.2755780659749987,
|
|
"learning_rate": 3.5692067756598465e-05,
|
|
"loss": 1.0582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26042819023132324,
|
|
"step": 344,
|
|
"valid_targets_mean": 16118.8,
|
|
"valid_targets_min": 15426
|
|
},
|
|
{
|
|
"epoch": 1.4690831556503199,
|
|
"grad_norm": 0.38020197667496863,
|
|
"learning_rate": 3.5655143605814885e-05,
|
|
"loss": 1.0941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2965239882469177,
|
|
"step": 345,
|
|
"valid_targets_mean": 15897.9,
|
|
"valid_targets_min": 9286
|
|
},
|
|
{
|
|
"epoch": 1.4733475479744136,
|
|
"grad_norm": 0.2901011570573045,
|
|
"learning_rate": 3.561808116002232e-05,
|
|
"loss": 1.0811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19708998501300812,
|
|
"step": 346,
|
|
"valid_targets_mean": 10358.7,
|
|
"valid_targets_min": 2096
|
|
},
|
|
{
|
|
"epoch": 1.4776119402985075,
|
|
"grad_norm": 0.275342451326057,
|
|
"learning_rate": 3.5580880746624444e-05,
|
|
"loss": 1.0537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24683061242103577,
|
|
"step": 347,
|
|
"valid_targets_mean": 16129.1,
|
|
"valid_targets_min": 15180
|
|
},
|
|
{
|
|
"epoch": 1.4818763326226012,
|
|
"grad_norm": 0.312505048766249,
|
|
"learning_rate": 3.5543542694243685e-05,
|
|
"loss": 1.1107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3103181719779968,
|
|
"step": 348,
|
|
"valid_targets_mean": 16167.1,
|
|
"valid_targets_min": 15444
|
|
},
|
|
{
|
|
"epoch": 1.4861407249466951,
|
|
"grad_norm": 0.29880209782535827,
|
|
"learning_rate": 3.5506067332718355e-05,
|
|
"loss": 1.0879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21556365489959717,
|
|
"step": 349,
|
|
"valid_targets_mean": 10549.6,
|
|
"valid_targets_min": 6923
|
|
},
|
|
{
|
|
"epoch": 1.4904051172707888,
|
|
"grad_norm": 0.3366257575023693,
|
|
"learning_rate": 3.546845499309976e-05,
|
|
"loss": 1.0783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27540475130081177,
|
|
"step": 350,
|
|
"valid_targets_mean": 16196.4,
|
|
"valid_targets_min": 15446
|
|
},
|
|
{
|
|
"epoch": 1.4946695095948828,
|
|
"grad_norm": 0.2949817898380746,
|
|
"learning_rate": 3.5430706007649225e-05,
|
|
"loss": 1.0752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3441014289855957,
|
|
"step": 351,
|
|
"valid_targets_mean": 16196.4,
|
|
"valid_targets_min": 15577
|
|
},
|
|
{
|
|
"epoch": 1.4989339019189765,
|
|
"grad_norm": 0.33561416621297063,
|
|
"learning_rate": 3.539282070983518e-05,
|
|
"loss": 1.0948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24721038341522217,
|
|
"step": 352,
|
|
"valid_targets_mean": 13119.6,
|
|
"valid_targets_min": 10183
|
|
},
|
|
{
|
|
"epoch": 1.5031982942430704,
|
|
"grad_norm": 0.33633932267657934,
|
|
"learning_rate": 3.535479943433023e-05,
|
|
"loss": 1.1638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3473033607006073,
|
|
"step": 353,
|
|
"valid_targets_mean": 15859.2,
|
|
"valid_targets_min": 14337
|
|
},
|
|
{
|
|
"epoch": 1.5074626865671643,
|
|
"grad_norm": 0.30291988931748776,
|
|
"learning_rate": 3.5316642517008184e-05,
|
|
"loss": 1.0757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30852532386779785,
|
|
"step": 354,
|
|
"valid_targets_mean": 16225.0,
|
|
"valid_targets_min": 15717
|
|
},
|
|
{
|
|
"epoch": 1.511727078891258,
|
|
"grad_norm": 0.35609152737731914,
|
|
"learning_rate": 3.5278350294941074e-05,
|
|
"loss": 1.0929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.281000018119812,
|
|
"step": 355,
|
|
"valid_targets_mean": 15363.8,
|
|
"valid_targets_min": 13266
|
|
},
|
|
{
|
|
"epoch": 1.5159914712153517,
|
|
"grad_norm": 0.3753461112007819,
|
|
"learning_rate": 3.523992310639622e-05,
|
|
"loss": 1.0561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2891591191291809,
|
|
"step": 356,
|
|
"valid_targets_mean": 16191.6,
|
|
"valid_targets_min": 15466
|
|
},
|
|
{
|
|
"epoch": 1.5202558635394456,
|
|
"grad_norm": 0.3055423589980571,
|
|
"learning_rate": 3.5201361290833165e-05,
|
|
"loss": 1.0817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2250066101551056,
|
|
"step": 357,
|
|
"valid_targets_mean": 11349.5,
|
|
"valid_targets_min": 1359
|
|
},
|
|
{
|
|
"epoch": 1.5245202558635396,
|
|
"grad_norm": 0.34286072931150735,
|
|
"learning_rate": 3.516266518890079e-05,
|
|
"loss": 1.066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2844963073730469,
|
|
"step": 358,
|
|
"valid_targets_mean": 16115.3,
|
|
"valid_targets_min": 14702
|
|
},
|
|
{
|
|
"epoch": 1.5287846481876333,
|
|
"grad_norm": 0.3449248650441334,
|
|
"learning_rate": 3.512383514243419e-05,
|
|
"loss": 1.0596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2860240340232849,
|
|
"step": 359,
|
|
"valid_targets_mean": 16163.1,
|
|
"valid_targets_min": 14547
|
|
},
|
|
{
|
|
"epoch": 1.533049040511727,
|
|
"grad_norm": 0.3771111536637937,
|
|
"learning_rate": 3.5084871494451716e-05,
|
|
"loss": 1.0225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1857428401708603,
|
|
"step": 360,
|
|
"valid_targets_mean": 9576.8,
|
|
"valid_targets_min": 1522
|
|
},
|
|
{
|
|
"epoch": 1.537313432835821,
|
|
"grad_norm": 0.3018932724883368,
|
|
"learning_rate": 3.5045774589151955e-05,
|
|
"loss": 1.0808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2678816318511963,
|
|
"step": 361,
|
|
"valid_targets_mean": 15993.7,
|
|
"valid_targets_min": 10506
|
|
},
|
|
{
|
|
"epoch": 1.5415778251599148,
|
|
"grad_norm": 0.40558509955640737,
|
|
"learning_rate": 3.500654477191064e-05,
|
|
"loss": 1.112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34176117181777954,
|
|
"step": 362,
|
|
"valid_targets_mean": 15984.2,
|
|
"valid_targets_min": 14307
|
|
},
|
|
{
|
|
"epoch": 1.5458422174840085,
|
|
"grad_norm": 0.43847114516949215,
|
|
"learning_rate": 3.496718238927764e-05,
|
|
"loss": 1.0905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19546037912368774,
|
|
"step": 363,
|
|
"valid_targets_mean": 10465.0,
|
|
"valid_targets_min": 6740
|
|
},
|
|
{
|
|
"epoch": 1.5501066098081022,
|
|
"grad_norm": 0.355769107924039,
|
|
"learning_rate": 3.492768778897388e-05,
|
|
"loss": 1.0736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2820717692375183,
|
|
"step": 364,
|
|
"valid_targets_mean": 16167.3,
|
|
"valid_targets_min": 14513
|
|
},
|
|
{
|
|
"epoch": 1.5543710021321961,
|
|
"grad_norm": 0.4284452026174569,
|
|
"learning_rate": 3.4888061319888276e-05,
|
|
"loss": 1.1042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3305700421333313,
|
|
"step": 365,
|
|
"valid_targets_mean": 16124.3,
|
|
"valid_targets_min": 15263
|
|
},
|
|
{
|
|
"epoch": 1.55863539445629,
|
|
"grad_norm": 0.4382707341244304,
|
|
"learning_rate": 3.484830333207466e-05,
|
|
"loss": 1.0999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24906116724014282,
|
|
"step": 366,
|
|
"valid_targets_mean": 13535.5,
|
|
"valid_targets_min": 11622
|
|
},
|
|
{
|
|
"epoch": 1.5628997867803838,
|
|
"grad_norm": 0.3455333246727441,
|
|
"learning_rate": 3.4808414176748666e-05,
|
|
"loss": 1.0576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2977335751056671,
|
|
"step": 367,
|
|
"valid_targets_mean": 16211.1,
|
|
"valid_targets_min": 15301
|
|
},
|
|
{
|
|
"epoch": 1.5671641791044775,
|
|
"grad_norm": 0.40729051659063775,
|
|
"learning_rate": 3.476839420628466e-05,
|
|
"loss": 1.0815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2757585346698761,
|
|
"step": 368,
|
|
"valid_targets_mean": 13357.7,
|
|
"valid_targets_min": 1733
|
|
},
|
|
{
|
|
"epoch": 1.5714285714285714,
|
|
"grad_norm": 0.4306050253331128,
|
|
"learning_rate": 3.472824377421257e-05,
|
|
"loss": 1.0562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2567337155342102,
|
|
"step": 369,
|
|
"valid_targets_mean": 15113.8,
|
|
"valid_targets_min": 13189
|
|
},
|
|
{
|
|
"epoch": 1.5756929637526653,
|
|
"grad_norm": 0.340921039883713,
|
|
"learning_rate": 3.4687963235214845e-05,
|
|
"loss": 1.0906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3353586792945862,
|
|
"step": 370,
|
|
"valid_targets_mean": 16135.2,
|
|
"valid_targets_min": 15576
|
|
},
|
|
{
|
|
"epoch": 1.579957356076759,
|
|
"grad_norm": 0.352901062487588,
|
|
"learning_rate": 3.464755294512325e-05,
|
|
"loss": 1.0873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1842219978570938,
|
|
"step": 371,
|
|
"valid_targets_mean": 8529.3,
|
|
"valid_targets_min": 1895
|
|
},
|
|
{
|
|
"epoch": 1.5842217484008527,
|
|
"grad_norm": 0.3252571700975914,
|
|
"learning_rate": 3.4607013260915765e-05,
|
|
"loss": 1.0876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2786511480808258,
|
|
"step": 372,
|
|
"valid_targets_mean": 16116.1,
|
|
"valid_targets_min": 14467
|
|
},
|
|
{
|
|
"epoch": 1.5884861407249466,
|
|
"grad_norm": 0.32289879581539677,
|
|
"learning_rate": 3.4566344540713404e-05,
|
|
"loss": 1.0283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.283419132232666,
|
|
"step": 373,
|
|
"valid_targets_mean": 16224.1,
|
|
"valid_targets_min": 15168
|
|
},
|
|
{
|
|
"epoch": 1.5927505330490406,
|
|
"grad_norm": 0.2898095739507456,
|
|
"learning_rate": 3.452554714377706e-05,
|
|
"loss": 1.0681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2174898087978363,
|
|
"step": 374,
|
|
"valid_targets_mean": 11231.9,
|
|
"valid_targets_min": 7068
|
|
},
|
|
{
|
|
"epoch": 1.5970149253731343,
|
|
"grad_norm": 0.32701686595227597,
|
|
"learning_rate": 3.448462143050436e-05,
|
|
"loss": 1.0558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28061437606811523,
|
|
"step": 375,
|
|
"valid_targets_mean": 15767.3,
|
|
"valid_targets_min": 9286
|
|
},
|
|
{
|
|
"epoch": 1.6012793176972282,
|
|
"grad_norm": 0.3273263196787299,
|
|
"learning_rate": 3.4443567762426444e-05,
|
|
"loss": 1.1019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31637561321258545,
|
|
"step": 376,
|
|
"valid_targets_mean": 16184.6,
|
|
"valid_targets_min": 15606
|
|
},
|
|
{
|
|
"epoch": 1.6055437100213221,
|
|
"grad_norm": 0.28906601817080585,
|
|
"learning_rate": 3.440238650220477e-05,
|
|
"loss": 1.0381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.223393052816391,
|
|
"step": 377,
|
|
"valid_targets_mean": 13476.9,
|
|
"valid_targets_min": 11755
|
|
},
|
|
{
|
|
"epoch": 1.6098081023454158,
|
|
"grad_norm": 0.35312103375959536,
|
|
"learning_rate": 3.4361078013627945e-05,
|
|
"loss": 1.048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28802990913391113,
|
|
"step": 378,
|
|
"valid_targets_mean": 16131.5,
|
|
"valid_targets_min": 15301
|
|
},
|
|
{
|
|
"epoch": 1.6140724946695095,
|
|
"grad_norm": 0.3752371927643447,
|
|
"learning_rate": 3.4319642661608474e-05,
|
|
"loss": 1.1179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3004334568977356,
|
|
"step": 379,
|
|
"valid_targets_mean": 16156.4,
|
|
"valid_targets_min": 15345
|
|
},
|
|
{
|
|
"epoch": 1.6183368869936035,
|
|
"grad_norm": 0.32696956300360913,
|
|
"learning_rate": 3.427808081217957e-05,
|
|
"loss": 1.102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2737543284893036,
|
|
"step": 380,
|
|
"valid_targets_mean": 15046.2,
|
|
"valid_targets_min": 10805
|
|
},
|
|
{
|
|
"epoch": 1.6226012793176974,
|
|
"grad_norm": 0.32425215210692304,
|
|
"learning_rate": 3.423639283249189e-05,
|
|
"loss": 1.0783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2964897155761719,
|
|
"step": 381,
|
|
"valid_targets_mean": 16231.2,
|
|
"valid_targets_min": 15531
|
|
},
|
|
{
|
|
"epoch": 1.626865671641791,
|
|
"grad_norm": 0.3425598006408661,
|
|
"learning_rate": 3.419457909081032e-05,
|
|
"loss": 1.0768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23575259745121002,
|
|
"step": 382,
|
|
"valid_targets_mean": 10985.6,
|
|
"valid_targets_min": 1923
|
|
},
|
|
{
|
|
"epoch": 1.6311300639658848,
|
|
"grad_norm": 0.34150364485281803,
|
|
"learning_rate": 3.415263995651069e-05,
|
|
"loss": 1.111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28503063321113586,
|
|
"step": 383,
|
|
"valid_targets_mean": 15444.5,
|
|
"valid_targets_min": 13058
|
|
},
|
|
{
|
|
"epoch": 1.6353944562899787,
|
|
"grad_norm": 0.26306448756733036,
|
|
"learning_rate": 3.411057580007653e-05,
|
|
"loss": 1.0587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31446677446365356,
|
|
"step": 384,
|
|
"valid_targets_mean": 16178.7,
|
|
"valid_targets_min": 15572
|
|
},
|
|
{
|
|
"epoch": 1.6396588486140726,
|
|
"grad_norm": 0.3965336853796899,
|
|
"learning_rate": 3.4068386993095806e-05,
|
|
"loss": 1.0459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16267594695091248,
|
|
"step": 385,
|
|
"valid_targets_mean": 8596.7,
|
|
"valid_targets_min": 2213
|
|
},
|
|
{
|
|
"epoch": 1.6439232409381663,
|
|
"grad_norm": 0.3169819070461087,
|
|
"learning_rate": 3.402607390825762e-05,
|
|
"loss": 1.0559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24213969707489014,
|
|
"step": 386,
|
|
"valid_targets_mean": 16232.5,
|
|
"valid_targets_min": 15539
|
|
},
|
|
{
|
|
"epoch": 1.64818763326226,
|
|
"grad_norm": 0.30538475252532604,
|
|
"learning_rate": 3.398363691934894e-05,
|
|
"loss": 1.0899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33324548602104187,
|
|
"step": 387,
|
|
"valid_targets_mean": 16060.8,
|
|
"valid_targets_min": 15033
|
|
},
|
|
{
|
|
"epoch": 1.652452025586354,
|
|
"grad_norm": 0.3569751677678015,
|
|
"learning_rate": 3.3941076401251244e-05,
|
|
"loss": 1.089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1977802813053131,
|
|
"step": 388,
|
|
"valid_targets_mean": 11657.1,
|
|
"valid_targets_min": 9535
|
|
},
|
|
{
|
|
"epoch": 1.6567164179104479,
|
|
"grad_norm": 0.3676226867999698,
|
|
"learning_rate": 3.3898392729937295e-05,
|
|
"loss": 1.0538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2911812961101532,
|
|
"step": 389,
|
|
"valid_targets_mean": 16162.9,
|
|
"valid_targets_min": 15023
|
|
},
|
|
{
|
|
"epoch": 1.6609808102345416,
|
|
"grad_norm": 0.28999735215403477,
|
|
"learning_rate": 3.385558628246774e-05,
|
|
"loss": 1.1031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31773680448532104,
|
|
"step": 390,
|
|
"valid_targets_mean": 16113.8,
|
|
"valid_targets_min": 15576
|
|
},
|
|
{
|
|
"epoch": 1.6652452025586353,
|
|
"grad_norm": 0.31093356074677225,
|
|
"learning_rate": 3.381265743698781e-05,
|
|
"loss": 1.0782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2374432384967804,
|
|
"step": 391,
|
|
"valid_targets_mean": 14455.2,
|
|
"valid_targets_min": 12866
|
|
},
|
|
{
|
|
"epoch": 1.6695095948827292,
|
|
"grad_norm": 0.35211344055285015,
|
|
"learning_rate": 3.3769606572724e-05,
|
|
"loss": 1.0542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2905210852622986,
|
|
"step": 392,
|
|
"valid_targets_mean": 16126.9,
|
|
"valid_targets_min": 14699
|
|
},
|
|
{
|
|
"epoch": 1.6737739872068231,
|
|
"grad_norm": 0.33113775807720236,
|
|
"learning_rate": 3.3726434069980686e-05,
|
|
"loss": 1.0381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2336466759443283,
|
|
"step": 393,
|
|
"valid_targets_mean": 12935.7,
|
|
"valid_targets_min": 1379
|
|
},
|
|
{
|
|
"epoch": 1.6780383795309168,
|
|
"grad_norm": 0.34400941915401295,
|
|
"learning_rate": 3.368314031013678e-05,
|
|
"loss": 1.0792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26921963691711426,
|
|
"step": 394,
|
|
"valid_targets_mean": 16041.7,
|
|
"valid_targets_min": 14053
|
|
},
|
|
{
|
|
"epoch": 1.6823027718550105,
|
|
"grad_norm": 0.35946946376014055,
|
|
"learning_rate": 3.363972567564236e-05,
|
|
"loss": 1.0771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29747501015663147,
|
|
"step": 395,
|
|
"valid_targets_mean": 16214.8,
|
|
"valid_targets_min": 15371
|
|
},
|
|
{
|
|
"epoch": 1.6865671641791045,
|
|
"grad_norm": 0.3554906035816403,
|
|
"learning_rate": 3.35961905500153e-05,
|
|
"loss": 1.099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18330803513526917,
|
|
"step": 396,
|
|
"valid_targets_mean": 8966.2,
|
|
"valid_targets_min": 2017
|
|
},
|
|
{
|
|
"epoch": 1.6908315565031984,
|
|
"grad_norm": 0.3216736094709334,
|
|
"learning_rate": 3.3552535317837855e-05,
|
|
"loss": 1.1295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28136104345321655,
|
|
"step": 397,
|
|
"valid_targets_mean": 15754.8,
|
|
"valid_targets_min": 13899
|
|
},
|
|
{
|
|
"epoch": 1.695095948827292,
|
|
"grad_norm": 0.3400173672246051,
|
|
"learning_rate": 3.35087603647533e-05,
|
|
"loss": 1.0812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.322651207447052,
|
|
"step": 398,
|
|
"valid_targets_mean": 16187.2,
|
|
"valid_targets_min": 15624
|
|
},
|
|
{
|
|
"epoch": 1.6993603411513858,
|
|
"grad_norm": 0.34927667719012767,
|
|
"learning_rate": 3.346486607746249e-05,
|
|
"loss": 1.0958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16751596331596375,
|
|
"step": 399,
|
|
"valid_targets_mean": 9466.9,
|
|
"valid_targets_min": 4761
|
|
},
|
|
{
|
|
"epoch": 1.7036247334754797,
|
|
"grad_norm": 0.3022943680117074,
|
|
"learning_rate": 3.342085284372047e-05,
|
|
"loss": 1.0973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3175053596496582,
|
|
"step": 400,
|
|
"valid_targets_mean": 16054.5,
|
|
"valid_targets_min": 15525
|
|
},
|
|
{
|
|
"epoch": 1.7078891257995736,
|
|
"grad_norm": 0.35250167872229066,
|
|
"learning_rate": 3.337672105233303e-05,
|
|
"loss": 1.0742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29572397470474243,
|
|
"step": 401,
|
|
"valid_targets_mean": 16123.2,
|
|
"valid_targets_min": 14638
|
|
},
|
|
{
|
|
"epoch": 1.7121535181236673,
|
|
"grad_norm": 0.3732899225473128,
|
|
"learning_rate": 3.3332471093153296e-05,
|
|
"loss": 1.1069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22419768571853638,
|
|
"step": 402,
|
|
"valid_targets_mean": 13257.8,
|
|
"valid_targets_min": 10153
|
|
},
|
|
{
|
|
"epoch": 1.716417910447761,
|
|
"grad_norm": 0.32688882568803734,
|
|
"learning_rate": 3.3288103357078244e-05,
|
|
"loss": 1.1048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30736756324768066,
|
|
"step": 403,
|
|
"valid_targets_mean": 16162.8,
|
|
"valid_targets_min": 15337
|
|
},
|
|
{
|
|
"epoch": 1.720682302771855,
|
|
"grad_norm": 0.3331310863968516,
|
|
"learning_rate": 3.324361823604529e-05,
|
|
"loss": 1.0879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33133578300476074,
|
|
"step": 404,
|
|
"valid_targets_mean": 16163.1,
|
|
"valid_targets_min": 15308
|
|
},
|
|
{
|
|
"epoch": 1.724946695095949,
|
|
"grad_norm": 0.35818787168114613,
|
|
"learning_rate": 3.319901612302881e-05,
|
|
"loss": 1.0842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26041942834854126,
|
|
"step": 405,
|
|
"valid_targets_mean": 15849.7,
|
|
"valid_targets_min": 14606
|
|
},
|
|
{
|
|
"epoch": 1.7292110874200426,
|
|
"grad_norm": 0.4007074600786429,
|
|
"learning_rate": 3.315429741203666e-05,
|
|
"loss": 1.0603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27965423464775085,
|
|
"step": 406,
|
|
"valid_targets_mean": 16208.5,
|
|
"valid_targets_min": 15536
|
|
},
|
|
{
|
|
"epoch": 1.7334754797441365,
|
|
"grad_norm": 0.3203604311420045,
|
|
"learning_rate": 3.3109462498106705e-05,
|
|
"loss": 1.0399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20618872344493866,
|
|
"step": 407,
|
|
"valid_targets_mean": 11185.1,
|
|
"valid_targets_min": 1926
|
|
},
|
|
{
|
|
"epoch": 1.7377398720682304,
|
|
"grad_norm": 0.3333080315095021,
|
|
"learning_rate": 3.306451177730333e-05,
|
|
"loss": 1.0416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2642345428466797,
|
|
"step": 408,
|
|
"valid_targets_mean": 15921.4,
|
|
"valid_targets_min": 15152
|
|
},
|
|
{
|
|
"epoch": 1.7420042643923241,
|
|
"grad_norm": 0.3843812441465554,
|
|
"learning_rate": 3.301944564671394e-05,
|
|
"loss": 1.0927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3201923966407776,
|
|
"step": 409,
|
|
"valid_targets_mean": 16101.6,
|
|
"valid_targets_min": 13635
|
|
},
|
|
{
|
|
"epoch": 1.7462686567164178,
|
|
"grad_norm": 0.32315567146113006,
|
|
"learning_rate": 3.297426450444546e-05,
|
|
"loss": 1.0773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18927864730358124,
|
|
"step": 410,
|
|
"valid_targets_mean": 10622.0,
|
|
"valid_targets_min": 2749
|
|
},
|
|
{
|
|
"epoch": 1.7505330490405118,
|
|
"grad_norm": 0.2968381836926156,
|
|
"learning_rate": 3.292896874962078e-05,
|
|
"loss": 1.0479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2641192376613617,
|
|
"step": 411,
|
|
"valid_targets_mean": 16163.8,
|
|
"valid_targets_min": 14690
|
|
},
|
|
{
|
|
"epoch": 1.7547974413646057,
|
|
"grad_norm": 0.3352633067258525,
|
|
"learning_rate": 3.2883558782375294e-05,
|
|
"loss": 1.0779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29602476954460144,
|
|
"step": 412,
|
|
"valid_targets_mean": 16117.5,
|
|
"valid_targets_min": 13635
|
|
},
|
|
{
|
|
"epoch": 1.7590618336886994,
|
|
"grad_norm": 0.28476969110178263,
|
|
"learning_rate": 3.283803500385332e-05,
|
|
"loss": 1.0934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22423139214515686,
|
|
"step": 413,
|
|
"valid_targets_mean": 12500.9,
|
|
"valid_targets_min": 9491
|
|
},
|
|
{
|
|
"epoch": 1.763326226012793,
|
|
"grad_norm": 0.30474507510851556,
|
|
"learning_rate": 3.2792397816204546e-05,
|
|
"loss": 1.0923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29216915369033813,
|
|
"step": 414,
|
|
"valid_targets_mean": 16139.3,
|
|
"valid_targets_min": 15077
|
|
},
|
|
{
|
|
"epoch": 1.767590618336887,
|
|
"grad_norm": 0.2888866831703543,
|
|
"learning_rate": 3.2746647622580524e-05,
|
|
"loss": 1.0929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3108062148094177,
|
|
"step": 415,
|
|
"valid_targets_mean": 16148.9,
|
|
"valid_targets_min": 15485
|
|
},
|
|
{
|
|
"epoch": 1.771855010660981,
|
|
"grad_norm": 0.32915338022522184,
|
|
"learning_rate": 3.270078482713106e-05,
|
|
"loss": 1.0337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23528018593788147,
|
|
"step": 416,
|
|
"valid_targets_mean": 14075.5,
|
|
"valid_targets_min": 11572
|
|
},
|
|
{
|
|
"epoch": 1.7761194029850746,
|
|
"grad_norm": 0.30919496917968614,
|
|
"learning_rate": 3.265480983500069e-05,
|
|
"loss": 1.063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30584001541137695,
|
|
"step": 417,
|
|
"valid_targets_mean": 16051.5,
|
|
"valid_targets_min": 13927
|
|
},
|
|
{
|
|
"epoch": 1.7803837953091683,
|
|
"grad_norm": 0.3441949263098652,
|
|
"learning_rate": 3.260872305232507e-05,
|
|
"loss": 1.041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25926971435546875,
|
|
"step": 418,
|
|
"valid_targets_mean": 13168.9,
|
|
"valid_targets_min": 2225
|
|
},
|
|
{
|
|
"epoch": 1.7846481876332623,
|
|
"grad_norm": 0.36423291496651444,
|
|
"learning_rate": 3.256252488622738e-05,
|
|
"loss": 1.0468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24720856547355652,
|
|
"step": 419,
|
|
"valid_targets_mean": 16116.3,
|
|
"valid_targets_min": 13473
|
|
},
|
|
{
|
|
"epoch": 1.7889125799573562,
|
|
"grad_norm": 0.4247004875254001,
|
|
"learning_rate": 3.251621574481475e-05,
|
|
"loss": 1.0639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29388993978500366,
|
|
"step": 420,
|
|
"valid_targets_mean": 16112.0,
|
|
"valid_targets_min": 14287
|
|
},
|
|
{
|
|
"epoch": 1.79317697228145,
|
|
"grad_norm": 0.29766378137701294,
|
|
"learning_rate": 3.246979603717467e-05,
|
|
"loss": 1.1038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20671671628952026,
|
|
"step": 421,
|
|
"valid_targets_mean": 10187.9,
|
|
"valid_targets_min": 1240
|
|
},
|
|
{
|
|
"epoch": 1.7974413646055436,
|
|
"grad_norm": 0.40271557718751394,
|
|
"learning_rate": 3.242326617337133e-05,
|
|
"loss": 1.0478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2497304528951645,
|
|
"step": 422,
|
|
"valid_targets_mean": 16181.6,
|
|
"valid_targets_min": 14875
|
|
},
|
|
{
|
|
"epoch": 1.8017057569296375,
|
|
"grad_norm": 0.3782484850120709,
|
|
"learning_rate": 3.2376626564442016e-05,
|
|
"loss": 1.0654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31483617424964905,
|
|
"step": 423,
|
|
"valid_targets_mean": 16183.7,
|
|
"valid_targets_min": 15121
|
|
},
|
|
{
|
|
"epoch": 1.8059701492537314,
|
|
"grad_norm": 0.30649531604027724,
|
|
"learning_rate": 3.2329877622393515e-05,
|
|
"loss": 1.0504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2193414866924286,
|
|
"step": 424,
|
|
"valid_targets_mean": 11132.9,
|
|
"valid_targets_min": 7632
|
|
},
|
|
{
|
|
"epoch": 1.8102345415778252,
|
|
"grad_norm": 0.35448072761495,
|
|
"learning_rate": 3.228301976019841e-05,
|
|
"loss": 1.1135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.291428804397583,
|
|
"step": 425,
|
|
"valid_targets_mean": 16018.5,
|
|
"valid_targets_min": 14821
|
|
},
|
|
{
|
|
"epoch": 1.8144989339019189,
|
|
"grad_norm": 0.3390792473030516,
|
|
"learning_rate": 3.22360533917915e-05,
|
|
"loss": 1.0773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2975163459777832,
|
|
"step": 426,
|
|
"valid_targets_mean": 16201.6,
|
|
"valid_targets_min": 15221
|
|
},
|
|
{
|
|
"epoch": 1.8187633262260128,
|
|
"grad_norm": 0.3147806679424687,
|
|
"learning_rate": 3.218897893206608e-05,
|
|
"loss": 1.0406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2215021848678589,
|
|
"step": 427,
|
|
"valid_targets_mean": 13069.1,
|
|
"valid_targets_min": 8831
|
|
},
|
|
{
|
|
"epoch": 1.8230277185501067,
|
|
"grad_norm": 0.36019990325979584,
|
|
"learning_rate": 3.2141796796870335e-05,
|
|
"loss": 1.0823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2969133257865906,
|
|
"step": 428,
|
|
"valid_targets_mean": 16014.8,
|
|
"valid_targets_min": 13788
|
|
},
|
|
{
|
|
"epoch": 1.8272921108742004,
|
|
"grad_norm": 0.2981772660966227,
|
|
"learning_rate": 3.2094507403003614e-05,
|
|
"loss": 1.0768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31634145975112915,
|
|
"step": 429,
|
|
"valid_targets_mean": 16206.0,
|
|
"valid_targets_min": 15778
|
|
},
|
|
{
|
|
"epoch": 1.831556503198294,
|
|
"grad_norm": 0.3562159773344419,
|
|
"learning_rate": 3.2047111168212785e-05,
|
|
"loss": 1.1017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2830113470554352,
|
|
"step": 430,
|
|
"valid_targets_mean": 15181.8,
|
|
"valid_targets_min": 13169
|
|
},
|
|
{
|
|
"epoch": 1.835820895522388,
|
|
"grad_norm": 0.3462357812701073,
|
|
"learning_rate": 3.1999608511188524e-05,
|
|
"loss": 1.0453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28061801195144653,
|
|
"step": 431,
|
|
"valid_targets_mean": 16113.2,
|
|
"valid_targets_min": 13828
|
|
},
|
|
{
|
|
"epoch": 1.840085287846482,
|
|
"grad_norm": 0.2712904109633026,
|
|
"learning_rate": 3.1951999851561625e-05,
|
|
"loss": 1.1287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2145303189754486,
|
|
"step": 432,
|
|
"valid_targets_mean": 11254.6,
|
|
"valid_targets_min": 1974
|
|
},
|
|
{
|
|
"epoch": 1.8443496801705757,
|
|
"grad_norm": 0.3372878988948171,
|
|
"learning_rate": 3.190428560989931e-05,
|
|
"loss": 1.0754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.269620805978775,
|
|
"step": 433,
|
|
"valid_targets_mean": 15552.6,
|
|
"valid_targets_min": 14147
|
|
},
|
|
{
|
|
"epoch": 1.8486140724946694,
|
|
"grad_norm": 0.33502781437527734,
|
|
"learning_rate": 3.185646620770146e-05,
|
|
"loss": 1.0554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3428090810775757,
|
|
"step": 434,
|
|
"valid_targets_mean": 16115.7,
|
|
"valid_targets_min": 15331
|
|
},
|
|
{
|
|
"epoch": 1.8528784648187633,
|
|
"grad_norm": 0.32083840655486917,
|
|
"learning_rate": 3.180854206739696e-05,
|
|
"loss": 1.1193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15823984146118164,
|
|
"step": 435,
|
|
"valid_targets_mean": 7300.2,
|
|
"valid_targets_min": 1215
|
|
},
|
|
{
|
|
"epoch": 1.8571428571428572,
|
|
"grad_norm": 0.3655942212180277,
|
|
"learning_rate": 3.176051361233991e-05,
|
|
"loss": 1.0793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27045610547065735,
|
|
"step": 436,
|
|
"valid_targets_mean": 16174.1,
|
|
"valid_targets_min": 15414
|
|
},
|
|
{
|
|
"epoch": 1.861407249466951,
|
|
"grad_norm": 0.37573240088501814,
|
|
"learning_rate": 3.171238126680594e-05,
|
|
"loss": 1.0503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29048848152160645,
|
|
"step": 437,
|
|
"valid_targets_mean": 16202.2,
|
|
"valid_targets_min": 15560
|
|
},
|
|
{
|
|
"epoch": 1.8656716417910446,
|
|
"grad_norm": 0.352557840532509,
|
|
"learning_rate": 3.166414545598839e-05,
|
|
"loss": 1.0993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23219341039657593,
|
|
"step": 438,
|
|
"valid_targets_mean": 12580.4,
|
|
"valid_targets_min": 10074
|
|
},
|
|
{
|
|
"epoch": 1.8699360341151388,
|
|
"grad_norm": 0.40604007958968985,
|
|
"learning_rate": 3.161580660599464e-05,
|
|
"loss": 1.0707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27523332834243774,
|
|
"step": 439,
|
|
"valid_targets_mean": 15703.7,
|
|
"valid_targets_min": 4778
|
|
},
|
|
{
|
|
"epoch": 1.8742004264392325,
|
|
"grad_norm": 0.3578916382684771,
|
|
"learning_rate": 3.1567365143842264e-05,
|
|
"loss": 1.0602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3120695650577545,
|
|
"step": 440,
|
|
"valid_targets_mean": 16157.0,
|
|
"valid_targets_min": 15368
|
|
},
|
|
{
|
|
"epoch": 1.8784648187633262,
|
|
"grad_norm": 0.29322622999687875,
|
|
"learning_rate": 3.1518821497455326e-05,
|
|
"loss": 1.0661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22893603146076202,
|
|
"step": 441,
|
|
"valid_targets_mean": 13424.1,
|
|
"valid_targets_min": 11379
|
|
},
|
|
{
|
|
"epoch": 1.88272921108742,
|
|
"grad_norm": 0.36915524627767804,
|
|
"learning_rate": 3.147017609566054e-05,
|
|
"loss": 1.0954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3331189453601837,
|
|
"step": 442,
|
|
"valid_targets_mean": 16049.3,
|
|
"valid_targets_min": 14702
|
|
},
|
|
{
|
|
"epoch": 1.886993603411514,
|
|
"grad_norm": 0.25054962444606416,
|
|
"learning_rate": 3.142142936818353e-05,
|
|
"loss": 1.0737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25321656465530396,
|
|
"step": 443,
|
|
"valid_targets_mean": 13397.8,
|
|
"valid_targets_min": 2017
|
|
},
|
|
{
|
|
"epoch": 1.8912579957356077,
|
|
"grad_norm": 0.3509714430545398,
|
|
"learning_rate": 3.137258174564501e-05,
|
|
"loss": 1.1309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27710413932800293,
|
|
"step": 444,
|
|
"valid_targets_mean": 16007.1,
|
|
"valid_targets_min": 15085
|
|
},
|
|
{
|
|
"epoch": 1.8955223880597014,
|
|
"grad_norm": 0.35482602042188444,
|
|
"learning_rate": 3.1323633659556986e-05,
|
|
"loss": 1.0238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2886030673980713,
|
|
"step": 445,
|
|
"valid_targets_mean": 16240.1,
|
|
"valid_targets_min": 15762
|
|
},
|
|
{
|
|
"epoch": 1.8997867803837953,
|
|
"grad_norm": 0.26773049702383594,
|
|
"learning_rate": 3.127458554231894e-05,
|
|
"loss": 1.1115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1944742053747177,
|
|
"step": 446,
|
|
"valid_targets_mean": 9927.9,
|
|
"valid_targets_min": 2329
|
|
},
|
|
{
|
|
"epoch": 1.9040511727078893,
|
|
"grad_norm": 0.3483780481045023,
|
|
"learning_rate": 3.122543782721402e-05,
|
|
"loss": 1.0928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27462565898895264,
|
|
"step": 447,
|
|
"valid_targets_mean": 16032.5,
|
|
"valid_targets_min": 15102
|
|
},
|
|
{
|
|
"epoch": 1.908315565031983,
|
|
"grad_norm": 0.3368829130798326,
|
|
"learning_rate": 3.1176190948405194e-05,
|
|
"loss": 1.0736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34951770305633545,
|
|
"step": 448,
|
|
"valid_targets_mean": 16119.2,
|
|
"valid_targets_min": 15712
|
|
},
|
|
{
|
|
"epoch": 1.9125799573560767,
|
|
"grad_norm": 0.30268057780648044,
|
|
"learning_rate": 3.112684534093142e-05,
|
|
"loss": 1.0877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19990648329257965,
|
|
"step": 449,
|
|
"valid_targets_mean": 10913.1,
|
|
"valid_targets_min": 6367
|
|
},
|
|
{
|
|
"epoch": 1.9168443496801706,
|
|
"grad_norm": 0.30358006933130305,
|
|
"learning_rate": 3.107740144070385e-05,
|
|
"loss": 1.0234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2891259789466858,
|
|
"step": 450,
|
|
"valid_targets_mean": 16040.9,
|
|
"valid_targets_min": 13080
|
|
},
|
|
{
|
|
"epoch": 1.9211087420042645,
|
|
"grad_norm": 0.3628808145609311,
|
|
"learning_rate": 3.102785968450188e-05,
|
|
"loss": 1.0428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3000900149345398,
|
|
"step": 451,
|
|
"valid_targets_mean": 16137.6,
|
|
"valid_targets_min": 15228
|
|
},
|
|
{
|
|
"epoch": 1.9253731343283582,
|
|
"grad_norm": 0.2764530866126593,
|
|
"learning_rate": 3.09782205099694e-05,
|
|
"loss": 1.1202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2544265389442444,
|
|
"step": 452,
|
|
"valid_targets_mean": 13191.7,
|
|
"valid_targets_min": 10839
|
|
},
|
|
{
|
|
"epoch": 1.929637526652452,
|
|
"grad_norm": 0.28797232262388883,
|
|
"learning_rate": 3.092848435561084e-05,
|
|
"loss": 1.0382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28947630524635315,
|
|
"step": 453,
|
|
"valid_targets_mean": 16126.2,
|
|
"valid_targets_min": 14702
|
|
},
|
|
{
|
|
"epoch": 1.9339019189765458,
|
|
"grad_norm": 0.31616751921565583,
|
|
"learning_rate": 3.0878651660787376e-05,
|
|
"loss": 1.0735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2999494671821594,
|
|
"step": 454,
|
|
"valid_targets_mean": 16165.5,
|
|
"valid_targets_min": 14997
|
|
},
|
|
{
|
|
"epoch": 1.9381663113006398,
|
|
"grad_norm": 0.26746073217550403,
|
|
"learning_rate": 3.082872286571295e-05,
|
|
"loss": 1.061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2623918652534485,
|
|
"step": 455,
|
|
"valid_targets_mean": 14387.2,
|
|
"valid_targets_min": 12697
|
|
},
|
|
{
|
|
"epoch": 1.9424307036247335,
|
|
"grad_norm": 0.2840271608173446,
|
|
"learning_rate": 3.077869841145049e-05,
|
|
"loss": 1.0563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30360060930252075,
|
|
"step": 456,
|
|
"valid_targets_mean": 16208.8,
|
|
"valid_targets_min": 15627
|
|
},
|
|
{
|
|
"epoch": 1.9466950959488272,
|
|
"grad_norm": 0.28298849548309235,
|
|
"learning_rate": 3.0728578739907934e-05,
|
|
"loss": 1.1073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2182055413722992,
|
|
"step": 457,
|
|
"valid_targets_mean": 10413.9,
|
|
"valid_targets_min": 2491
|
|
},
|
|
{
|
|
"epoch": 1.950959488272921,
|
|
"grad_norm": 0.267402368213611,
|
|
"learning_rate": 3.067836429383437e-05,
|
|
"loss": 1.0376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2391853779554367,
|
|
"step": 458,
|
|
"valid_targets_mean": 16266.1,
|
|
"valid_targets_min": 16019
|
|
},
|
|
{
|
|
"epoch": 1.955223880597015,
|
|
"grad_norm": 0.27519623772632346,
|
|
"learning_rate": 3.062805551681609e-05,
|
|
"loss": 1.078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3138701915740967,
|
|
"step": 459,
|
|
"valid_targets_mean": 16080.7,
|
|
"valid_targets_min": 14943
|
|
},
|
|
{
|
|
"epoch": 1.9594882729211087,
|
|
"grad_norm": 0.3412396846851224,
|
|
"learning_rate": 3.057765285327271e-05,
|
|
"loss": 1.1299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13240568339824677,
|
|
"step": 460,
|
|
"valid_targets_mean": 6411.7,
|
|
"valid_targets_min": 2004
|
|
},
|
|
{
|
|
"epoch": 1.9637526652452024,
|
|
"grad_norm": 0.27637519468535493,
|
|
"learning_rate": 3.0527156748453214e-05,
|
|
"loss": 1.0246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2705056965351105,
|
|
"step": 461,
|
|
"valid_targets_mean": 16094.2,
|
|
"valid_targets_min": 13927
|
|
},
|
|
{
|
|
"epoch": 1.9680170575692963,
|
|
"grad_norm": 0.2491017774469789,
|
|
"learning_rate": 3.047656764843203e-05,
|
|
"loss": 1.1089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31651365756988525,
|
|
"step": 462,
|
|
"valid_targets_mean": 16166.5,
|
|
"valid_targets_min": 15218
|
|
},
|
|
{
|
|
"epoch": 1.9722814498933903,
|
|
"grad_norm": 0.2898466387677518,
|
|
"learning_rate": 3.0425886000105094e-05,
|
|
"loss": 1.043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2042815089225769,
|
|
"step": 463,
|
|
"valid_targets_mean": 12991.4,
|
|
"valid_targets_min": 11115
|
|
},
|
|
{
|
|
"epoch": 1.976545842217484,
|
|
"grad_norm": 0.24076807322050023,
|
|
"learning_rate": 3.0375112251185892e-05,
|
|
"loss": 1.0471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28346341848373413,
|
|
"step": 464,
|
|
"valid_targets_mean": 16167.2,
|
|
"valid_targets_min": 15368
|
|
},
|
|
{
|
|
"epoch": 1.9808102345415777,
|
|
"grad_norm": 0.2987602021633277,
|
|
"learning_rate": 3.0324246850201527e-05,
|
|
"loss": 1.1343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3213338851928711,
|
|
"step": 465,
|
|
"valid_targets_mean": 16105.9,
|
|
"valid_targets_min": 15564
|
|
},
|
|
{
|
|
"epoch": 1.9850746268656716,
|
|
"grad_norm": 0.2728017482563389,
|
|
"learning_rate": 3.0273290246488732e-05,
|
|
"loss": 1.0956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25564104318618774,
|
|
"step": 466,
|
|
"valid_targets_mean": 15038.5,
|
|
"valid_targets_min": 13132
|
|
},
|
|
{
|
|
"epoch": 1.9893390191897655,
|
|
"grad_norm": 0.27178003591024674,
|
|
"learning_rate": 3.0222242890189904e-05,
|
|
"loss": 1.1019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3196239471435547,
|
|
"step": 467,
|
|
"valid_targets_mean": 16099.2,
|
|
"valid_targets_min": 15195
|
|
},
|
|
{
|
|
"epoch": 1.9936034115138592,
|
|
"grad_norm": 0.2960811264621479,
|
|
"learning_rate": 3.017110523224914e-05,
|
|
"loss": 1.0748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.243838369846344,
|
|
"step": 468,
|
|
"valid_targets_mean": 13108.7,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 1.997867803837953,
|
|
"grad_norm": 0.2900638231868673,
|
|
"learning_rate": 3.011987772440825e-05,
|
|
"loss": 1.1061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.269381582736969,
|
|
"step": 469,
|
|
"valid_targets_mean": 15314.6,
|
|
"valid_targets_min": 13039
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 0.368934520127521,
|
|
"learning_rate": 3.006856081920277e-05,
|
|
"loss": 1.0759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4675736427307129,
|
|
"step": 470,
|
|
"valid_targets_mean": 10737.9,
|
|
"valid_targets_min": 1562
|
|
},
|
|
{
|
|
"epoch": 2.0042643923240937,
|
|
"grad_norm": 0.3552345026600377,
|
|
"learning_rate": 3.001715496995793e-05,
|
|
"loss": 1.0287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2579972743988037,
|
|
"step": 471,
|
|
"valid_targets_mean": 16126.5,
|
|
"valid_targets_min": 15300
|
|
},
|
|
{
|
|
"epoch": 2.008528784648188,
|
|
"grad_norm": 0.39123834746131897,
|
|
"learning_rate": 2.9965660630784715e-05,
|
|
"loss": 1.0687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31611669063568115,
|
|
"step": 472,
|
|
"valid_targets_mean": 16121.2,
|
|
"valid_targets_min": 14702
|
|
},
|
|
{
|
|
"epoch": 2.0127931769722816,
|
|
"grad_norm": 0.41374695875844714,
|
|
"learning_rate": 2.9914078256575782e-05,
|
|
"loss": 1.0199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19053825736045837,
|
|
"step": 473,
|
|
"valid_targets_mean": 11766.6,
|
|
"valid_targets_min": 7044
|
|
},
|
|
{
|
|
"epoch": 2.0170575692963753,
|
|
"grad_norm": 0.40073365553126444,
|
|
"learning_rate": 2.9862408303001492e-05,
|
|
"loss": 1.0289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2865005135536194,
|
|
"step": 474,
|
|
"valid_targets_mean": 16154.8,
|
|
"valid_targets_min": 15606
|
|
},
|
|
{
|
|
"epoch": 2.021321961620469,
|
|
"grad_norm": 0.46584459230727177,
|
|
"learning_rate": 2.9810651226505875e-05,
|
|
"loss": 1.0107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29241612553596497,
|
|
"step": 475,
|
|
"valid_targets_mean": 16143.0,
|
|
"valid_targets_min": 14917
|
|
},
|
|
{
|
|
"epoch": 2.025586353944563,
|
|
"grad_norm": 0.40495999614766015,
|
|
"learning_rate": 2.9758807484302566e-05,
|
|
"loss": 1.0117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22781220078468323,
|
|
"step": 476,
|
|
"valid_targets_mean": 14726.9,
|
|
"valid_targets_min": 13072
|
|
},
|
|
{
|
|
"epoch": 2.029850746268657,
|
|
"grad_norm": 0.34368125435399516,
|
|
"learning_rate": 2.9706877534370822e-05,
|
|
"loss": 1.0104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2631028890609741,
|
|
"step": 477,
|
|
"valid_targets_mean": 16200.3,
|
|
"valid_targets_min": 15042
|
|
},
|
|
{
|
|
"epoch": 2.0341151385927505,
|
|
"grad_norm": 0.4136824421856256,
|
|
"learning_rate": 2.965486183545142e-05,
|
|
"loss": 1.0937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24957922101020813,
|
|
"step": 478,
|
|
"valid_targets_mean": 13077.4,
|
|
"valid_targets_min": 1630
|
|
},
|
|
{
|
|
"epoch": 2.038379530916844,
|
|
"grad_norm": 0.3581098227344428,
|
|
"learning_rate": 2.9602760847042645e-05,
|
|
"loss": 1.0235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24706491827964783,
|
|
"step": 479,
|
|
"valid_targets_mean": 15628.0,
|
|
"valid_targets_min": 14094
|
|
},
|
|
{
|
|
"epoch": 2.0426439232409384,
|
|
"grad_norm": 0.3287300126568842,
|
|
"learning_rate": 2.955057502939621e-05,
|
|
"loss": 1.0477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2831410765647888,
|
|
"step": 480,
|
|
"valid_targets_mean": 16204.5,
|
|
"valid_targets_min": 15520
|
|
},
|
|
{
|
|
"epoch": 2.046908315565032,
|
|
"grad_norm": 0.36544509127089886,
|
|
"learning_rate": 2.9498304843513193e-05,
|
|
"loss": 1.0476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16354572772979736,
|
|
"step": 481,
|
|
"valid_targets_mean": 8899.1,
|
|
"valid_targets_min": 1392
|
|
},
|
|
{
|
|
"epoch": 2.0511727078891258,
|
|
"grad_norm": 0.29290892374778665,
|
|
"learning_rate": 2.9445950751139957e-05,
|
|
"loss": 0.9926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25088614225387573,
|
|
"step": 482,
|
|
"valid_targets_mean": 16143.0,
|
|
"valid_targets_min": 15153
|
|
},
|
|
{
|
|
"epoch": 2.0554371002132195,
|
|
"grad_norm": 0.3413415806747987,
|
|
"learning_rate": 2.939351321476412e-05,
|
|
"loss": 1.0442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3098033368587494,
|
|
"step": 483,
|
|
"valid_targets_mean": 16180.8,
|
|
"valid_targets_min": 15538
|
|
},
|
|
{
|
|
"epoch": 2.0597014925373136,
|
|
"grad_norm": 0.3477262822317496,
|
|
"learning_rate": 2.9340992697610393e-05,
|
|
"loss": 1.1174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23189440369606018,
|
|
"step": 484,
|
|
"valid_targets_mean": 12631.6,
|
|
"valid_targets_min": 7725
|
|
},
|
|
{
|
|
"epoch": 2.0639658848614073,
|
|
"grad_norm": 0.25626551753147225,
|
|
"learning_rate": 2.9288389663636537e-05,
|
|
"loss": 1.0595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2773468494415283,
|
|
"step": 485,
|
|
"valid_targets_mean": 16057.7,
|
|
"valid_targets_min": 14555
|
|
},
|
|
{
|
|
"epoch": 2.068230277185501,
|
|
"grad_norm": 0.2975738669309161,
|
|
"learning_rate": 2.923570457752925e-05,
|
|
"loss": 0.9844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2952193021774292,
|
|
"step": 486,
|
|
"valid_targets_mean": 16159.5,
|
|
"valid_targets_min": 15168
|
|
},
|
|
{
|
|
"epoch": 2.0724946695095947,
|
|
"grad_norm": 0.3185341360493201,
|
|
"learning_rate": 2.9182937904700078e-05,
|
|
"loss": 1.0428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23297083377838135,
|
|
"step": 487,
|
|
"valid_targets_mean": 13743.6,
|
|
"valid_targets_min": 11030
|
|
},
|
|
{
|
|
"epoch": 2.076759061833689,
|
|
"grad_norm": 0.33249335218498505,
|
|
"learning_rate": 2.9130090111281278e-05,
|
|
"loss": 1.0131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27144575119018555,
|
|
"step": 488,
|
|
"valid_targets_mean": 16219.4,
|
|
"valid_targets_min": 15554
|
|
},
|
|
{
|
|
"epoch": 2.0810234541577826,
|
|
"grad_norm": 0.2996105345731296,
|
|
"learning_rate": 2.9077161664121722e-05,
|
|
"loss": 1.0553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3078475892543793,
|
|
"step": 489,
|
|
"valid_targets_mean": 16029.9,
|
|
"valid_targets_min": 15168
|
|
},
|
|
{
|
|
"epoch": 2.0852878464818763,
|
|
"grad_norm": 0.368268651082118,
|
|
"learning_rate": 2.902415303078275e-05,
|
|
"loss": 1.0256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23868870735168457,
|
|
"step": 490,
|
|
"valid_targets_mean": 15594.2,
|
|
"valid_targets_min": 13709
|
|
},
|
|
{
|
|
"epoch": 2.08955223880597,
|
|
"grad_norm": 0.3316811583389926,
|
|
"learning_rate": 2.8971064679534072e-05,
|
|
"loss": 1.0153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2731947600841522,
|
|
"step": 491,
|
|
"valid_targets_mean": 16149.3,
|
|
"valid_targets_min": 15057
|
|
},
|
|
{
|
|
"epoch": 2.093816631130064,
|
|
"grad_norm": 0.3417240842034942,
|
|
"learning_rate": 2.8917897079349604e-05,
|
|
"loss": 1.0379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2092648297548294,
|
|
"step": 492,
|
|
"valid_targets_mean": 10589.6,
|
|
"valid_targets_min": 2052
|
|
},
|
|
{
|
|
"epoch": 2.098081023454158,
|
|
"grad_norm": 0.40376931007632894,
|
|
"learning_rate": 2.8864650699903336e-05,
|
|
"loss": 1.0248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2569881081581116,
|
|
"step": 493,
|
|
"valid_targets_mean": 15575.6,
|
|
"valid_targets_min": 12875
|
|
},
|
|
{
|
|
"epoch": 2.1023454157782515,
|
|
"grad_norm": 0.30769674885232867,
|
|
"learning_rate": 2.881132601156518e-05,
|
|
"loss": 1.0579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3187497854232788,
|
|
"step": 494,
|
|
"valid_targets_mean": 16201.0,
|
|
"valid_targets_min": 15588
|
|
},
|
|
{
|
|
"epoch": 2.106609808102345,
|
|
"grad_norm": 0.33552517299223356,
|
|
"learning_rate": 2.8757923485396805e-05,
|
|
"loss": 1.0356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15743032097816467,
|
|
"step": 495,
|
|
"valid_targets_mean": 8183.4,
|
|
"valid_targets_min": 1761
|
|
},
|
|
{
|
|
"epoch": 2.1108742004264394,
|
|
"grad_norm": 0.3424571034637037,
|
|
"learning_rate": 2.8704443593147517e-05,
|
|
"loss": 1.0719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27219459414482117,
|
|
"step": 496,
|
|
"valid_targets_mean": 16106.9,
|
|
"valid_targets_min": 15525
|
|
},
|
|
{
|
|
"epoch": 2.115138592750533,
|
|
"grad_norm": 0.2808950772476962,
|
|
"learning_rate": 2.8650886807250024e-05,
|
|
"loss": 1.0675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3192838728427887,
|
|
"step": 497,
|
|
"valid_targets_mean": 16090.3,
|
|
"valid_targets_min": 14394
|
|
},
|
|
{
|
|
"epoch": 2.1194029850746268,
|
|
"grad_norm": 0.37199865580909014,
|
|
"learning_rate": 2.8597253600816332e-05,
|
|
"loss": 1.0764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2070493996143341,
|
|
"step": 498,
|
|
"valid_targets_mean": 11938.5,
|
|
"valid_targets_min": 10146
|
|
},
|
|
{
|
|
"epoch": 2.1236673773987205,
|
|
"grad_norm": 0.3091647855511728,
|
|
"learning_rate": 2.8543544447633517e-05,
|
|
"loss": 1.0565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2848047614097595,
|
|
"step": 499,
|
|
"valid_targets_mean": 16117.8,
|
|
"valid_targets_min": 14547
|
|
},
|
|
{
|
|
"epoch": 2.1279317697228146,
|
|
"grad_norm": 0.4360073570883157,
|
|
"learning_rate": 2.8489759822159558e-05,
|
|
"loss": 1.1129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3340121805667877,
|
|
"step": 500,
|
|
"valid_targets_mean": 16178.9,
|
|
"valid_targets_min": 15565
|
|
},
|
|
{
|
|
"epoch": 2.1321961620469083,
|
|
"grad_norm": 0.35047972464084715,
|
|
"learning_rate": 2.843590019951914e-05,
|
|
"loss": 1.0502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22108137607574463,
|
|
"step": 501,
|
|
"valid_targets_mean": 13181.2,
|
|
"valid_targets_min": 11593
|
|
},
|
|
{
|
|
"epoch": 2.136460554371002,
|
|
"grad_norm": 0.40037840006291353,
|
|
"learning_rate": 2.838196605549948e-05,
|
|
"loss": 1.0656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31084370613098145,
|
|
"step": 502,
|
|
"valid_targets_mean": 16180.7,
|
|
"valid_targets_min": 15667
|
|
},
|
|
{
|
|
"epoch": 2.140724946695096,
|
|
"grad_norm": 0.3146422639655215,
|
|
"learning_rate": 2.8327957866546082e-05,
|
|
"loss": 1.107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25872519612312317,
|
|
"step": 503,
|
|
"valid_targets_mean": 12895.1,
|
|
"valid_targets_min": 755
|
|
},
|
|
{
|
|
"epoch": 2.14498933901919,
|
|
"grad_norm": 0.36862933115455493,
|
|
"learning_rate": 2.8273876109758568e-05,
|
|
"loss": 1.0786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26297274231910706,
|
|
"step": 504,
|
|
"valid_targets_mean": 15436.5,
|
|
"valid_targets_min": 14157
|
|
},
|
|
{
|
|
"epoch": 2.1492537313432836,
|
|
"grad_norm": 0.361459762761618,
|
|
"learning_rate": 2.8219721262886427e-05,
|
|
"loss": 1.0455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31806501746177673,
|
|
"step": 505,
|
|
"valid_targets_mean": 16037.3,
|
|
"valid_targets_min": 13211
|
|
},
|
|
{
|
|
"epoch": 2.1535181236673773,
|
|
"grad_norm": 0.2864709994302262,
|
|
"learning_rate": 2.816549380432483e-05,
|
|
"loss": 1.0265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18135976791381836,
|
|
"step": 506,
|
|
"valid_targets_mean": 9544.8,
|
|
"valid_targets_min": 1763
|
|
},
|
|
{
|
|
"epoch": 2.1577825159914714,
|
|
"grad_norm": 0.3350310583584809,
|
|
"learning_rate": 2.8111194213110386e-05,
|
|
"loss": 1.0286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2634314298629761,
|
|
"step": 507,
|
|
"valid_targets_mean": 15868.5,
|
|
"valid_targets_min": 9799
|
|
},
|
|
{
|
|
"epoch": 2.162046908315565,
|
|
"grad_norm": 0.3255526769969389,
|
|
"learning_rate": 2.805682296891691e-05,
|
|
"loss": 1.074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3196669816970825,
|
|
"step": 508,
|
|
"valid_targets_mean": 16133.4,
|
|
"valid_targets_min": 15537
|
|
},
|
|
{
|
|
"epoch": 2.166311300639659,
|
|
"grad_norm": 0.32487336416940715,
|
|
"learning_rate": 2.8002380552051186e-05,
|
|
"loss": 1.0715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16130326688289642,
|
|
"step": 509,
|
|
"valid_targets_mean": 9578.0,
|
|
"valid_targets_min": 5674
|
|
},
|
|
{
|
|
"epoch": 2.1705756929637525,
|
|
"grad_norm": 0.32268877543461205,
|
|
"learning_rate": 2.7947867443448728e-05,
|
|
"loss": 1.0267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2731488049030304,
|
|
"step": 510,
|
|
"valid_targets_mean": 16089.0,
|
|
"valid_targets_min": 14444
|
|
},
|
|
{
|
|
"epoch": 2.1748400852878467,
|
|
"grad_norm": 0.320456670152423,
|
|
"learning_rate": 2.789328412466953e-05,
|
|
"loss": 1.0742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29730743169784546,
|
|
"step": 511,
|
|
"valid_targets_mean": 16163.5,
|
|
"valid_targets_min": 15222
|
|
},
|
|
{
|
|
"epoch": 2.1791044776119404,
|
|
"grad_norm": 0.3321709887681174,
|
|
"learning_rate": 2.7838631077893813e-05,
|
|
"loss": 1.0546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21399202942848206,
|
|
"step": 512,
|
|
"valid_targets_mean": 12320.2,
|
|
"valid_targets_min": 10472
|
|
},
|
|
{
|
|
"epoch": 2.183368869936034,
|
|
"grad_norm": 0.39115337491407043,
|
|
"learning_rate": 2.7783908785917753e-05,
|
|
"loss": 1.0746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30008408427238464,
|
|
"step": 513,
|
|
"valid_targets_mean": 16060.1,
|
|
"valid_targets_min": 14908
|
|
},
|
|
{
|
|
"epoch": 2.1876332622601278,
|
|
"grad_norm": 0.3092000821820992,
|
|
"learning_rate": 2.7729117732149244e-05,
|
|
"loss": 1.0625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2982850670814514,
|
|
"step": 514,
|
|
"valid_targets_mean": 15956.0,
|
|
"valid_targets_min": 12151
|
|
},
|
|
{
|
|
"epoch": 2.191897654584222,
|
|
"grad_norm": 0.39058023646491175,
|
|
"learning_rate": 2.7674258400603587e-05,
|
|
"loss": 1.043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2451504021883011,
|
|
"step": 515,
|
|
"valid_targets_mean": 14654.4,
|
|
"valid_targets_min": 12701
|
|
},
|
|
{
|
|
"epoch": 2.1961620469083156,
|
|
"grad_norm": 0.37207685669025975,
|
|
"learning_rate": 2.761933127589927e-05,
|
|
"loss": 1.0718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2868039608001709,
|
|
"step": 516,
|
|
"valid_targets_mean": 16187.1,
|
|
"valid_targets_min": 15153
|
|
},
|
|
{
|
|
"epoch": 2.2004264392324093,
|
|
"grad_norm": 0.3478055090012624,
|
|
"learning_rate": 2.7564336843253633e-05,
|
|
"loss": 1.0367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20308353006839752,
|
|
"step": 517,
|
|
"valid_targets_mean": 11016.9,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 2.204690831556503,
|
|
"grad_norm": 0.2834140615062104,
|
|
"learning_rate": 2.7509275588478606e-05,
|
|
"loss": 1.0648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2778732180595398,
|
|
"step": 518,
|
|
"valid_targets_mean": 16131.2,
|
|
"valid_targets_min": 15565
|
|
},
|
|
{
|
|
"epoch": 2.208955223880597,
|
|
"grad_norm": 0.35932831920478553,
|
|
"learning_rate": 2.7454147997976404e-05,
|
|
"loss": 1.0134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2866620421409607,
|
|
"step": 519,
|
|
"valid_targets_mean": 16204.6,
|
|
"valid_targets_min": 15596
|
|
},
|
|
{
|
|
"epoch": 2.213219616204691,
|
|
"grad_norm": 0.2839057580653145,
|
|
"learning_rate": 2.7398954558735272e-05,
|
|
"loss": 1.0505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1549297571182251,
|
|
"step": 520,
|
|
"valid_targets_mean": 8753.9,
|
|
"valid_targets_min": 2749
|
|
},
|
|
{
|
|
"epoch": 2.2174840085287846,
|
|
"grad_norm": 0.37158603632443654,
|
|
"learning_rate": 2.7343695758325125e-05,
|
|
"loss": 1.0292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2592717111110687,
|
|
"step": 521,
|
|
"valid_targets_mean": 16047.2,
|
|
"valid_targets_min": 15012
|
|
},
|
|
{
|
|
"epoch": 2.2217484008528783,
|
|
"grad_norm": 0.3434129611890377,
|
|
"learning_rate": 2.7288372084893282e-05,
|
|
"loss": 1.0267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2959210276603699,
|
|
"step": 522,
|
|
"valid_targets_mean": 16112.9,
|
|
"valid_targets_min": 14609
|
|
},
|
|
{
|
|
"epoch": 2.2260127931769724,
|
|
"grad_norm": 0.28134739630779465,
|
|
"learning_rate": 2.7232984027160126e-05,
|
|
"loss": 1.037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22468724846839905,
|
|
"step": 523,
|
|
"valid_targets_mean": 12362.7,
|
|
"valid_targets_min": 10008
|
|
},
|
|
{
|
|
"epoch": 2.230277185501066,
|
|
"grad_norm": 0.3786459000817747,
|
|
"learning_rate": 2.7177532074414822e-05,
|
|
"loss": 1.0744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29006755352020264,
|
|
"step": 524,
|
|
"valid_targets_mean": 16111.7,
|
|
"valid_targets_min": 14958
|
|
},
|
|
{
|
|
"epoch": 2.23454157782516,
|
|
"grad_norm": 0.31526672316574644,
|
|
"learning_rate": 2.712201671651094e-05,
|
|
"loss": 1.0374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3149791359901428,
|
|
"step": 525,
|
|
"valid_targets_mean": 16096.5,
|
|
"valid_targets_min": 13688
|
|
},
|
|
{
|
|
"epoch": 2.2388059701492535,
|
|
"grad_norm": 0.3251796622007667,
|
|
"learning_rate": 2.7066438443862205e-05,
|
|
"loss": 1.0949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26117759943008423,
|
|
"step": 526,
|
|
"valid_targets_mean": 13684.5,
|
|
"valid_targets_min": 11557
|
|
},
|
|
{
|
|
"epoch": 2.2430703624733477,
|
|
"grad_norm": 0.3571788387924515,
|
|
"learning_rate": 2.701079774743808e-05,
|
|
"loss": 1.0497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31474095582962036,
|
|
"step": 527,
|
|
"valid_targets_mean": 16176.9,
|
|
"valid_targets_min": 15185
|
|
},
|
|
{
|
|
"epoch": 2.2473347547974414,
|
|
"grad_norm": 0.31126045337404046,
|
|
"learning_rate": 2.6955095118759496e-05,
|
|
"loss": 1.1136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2477262169122696,
|
|
"step": 528,
|
|
"valid_targets_mean": 13108.9,
|
|
"valid_targets_min": 2016
|
|
},
|
|
{
|
|
"epoch": 2.251599147121535,
|
|
"grad_norm": 0.3704432931191125,
|
|
"learning_rate": 2.689933104989447e-05,
|
|
"loss": 1.0356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2543630599975586,
|
|
"step": 529,
|
|
"valid_targets_mean": 15346.7,
|
|
"valid_targets_min": 14049
|
|
},
|
|
{
|
|
"epoch": 2.2558635394456292,
|
|
"grad_norm": 0.2890683538315524,
|
|
"learning_rate": 2.6843506033453777e-05,
|
|
"loss": 1.0919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3212983012199402,
|
|
"step": 530,
|
|
"valid_targets_mean": 15995.2,
|
|
"valid_targets_min": 13688
|
|
},
|
|
{
|
|
"epoch": 2.260127931769723,
|
|
"grad_norm": 0.3612846459506985,
|
|
"learning_rate": 2.6787620562586587e-05,
|
|
"loss": 1.048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.172796368598938,
|
|
"step": 531,
|
|
"valid_targets_mean": 9370.6,
|
|
"valid_targets_min": 3012
|
|
},
|
|
{
|
|
"epoch": 2.2643923240938166,
|
|
"grad_norm": 0.2753181517426568,
|
|
"learning_rate": 2.673167513097613e-05,
|
|
"loss": 0.9943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23673027753829956,
|
|
"step": 532,
|
|
"valid_targets_mean": 16198.4,
|
|
"valid_targets_min": 15665
|
|
},
|
|
{
|
|
"epoch": 2.2686567164179103,
|
|
"grad_norm": 0.3283853477750083,
|
|
"learning_rate": 2.6675670232835297e-05,
|
|
"loss": 1.0375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29759296774864197,
|
|
"step": 533,
|
|
"valid_targets_mean": 16011.2,
|
|
"valid_targets_min": 13427
|
|
},
|
|
{
|
|
"epoch": 2.272921108742004,
|
|
"grad_norm": 0.31579501674451577,
|
|
"learning_rate": 2.661960636290231e-05,
|
|
"loss": 1.052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18113857507705688,
|
|
"step": 534,
|
|
"valid_targets_mean": 10492.3,
|
|
"valid_targets_min": 5775
|
|
},
|
|
{
|
|
"epoch": 2.277185501066098,
|
|
"grad_norm": 0.32463975391375466,
|
|
"learning_rate": 2.6563484016436346e-05,
|
|
"loss": 1.056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27056264877319336,
|
|
"step": 535,
|
|
"valid_targets_mean": 16125.0,
|
|
"valid_targets_min": 15091
|
|
},
|
|
{
|
|
"epoch": 2.281449893390192,
|
|
"grad_norm": 0.30961628810230585,
|
|
"learning_rate": 2.6507303689213143e-05,
|
|
"loss": 1.0926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29040077328681946,
|
|
"step": 536,
|
|
"valid_targets_mean": 16207.7,
|
|
"valid_targets_min": 15562
|
|
},
|
|
{
|
|
"epoch": 2.2857142857142856,
|
|
"grad_norm": 0.30310963051771506,
|
|
"learning_rate": 2.6451065877520634e-05,
|
|
"loss": 1.0289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2182932049036026,
|
|
"step": 537,
|
|
"valid_targets_mean": 13124.1,
|
|
"valid_targets_min": 10613
|
|
},
|
|
{
|
|
"epoch": 2.2899786780383797,
|
|
"grad_norm": 0.2783552318907119,
|
|
"learning_rate": 2.639477107815455e-05,
|
|
"loss": 1.0857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29935818910598755,
|
|
"step": 538,
|
|
"valid_targets_mean": 16085.9,
|
|
"valid_targets_min": 15042
|
|
},
|
|
{
|
|
"epoch": 2.2942430703624734,
|
|
"grad_norm": 0.3031273792078499,
|
|
"learning_rate": 2.633841978841406e-05,
|
|
"loss": 1.0427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3005887567996979,
|
|
"step": 539,
|
|
"valid_targets_mean": 16127.8,
|
|
"valid_targets_min": 14446
|
|
},
|
|
{
|
|
"epoch": 2.298507462686567,
|
|
"grad_norm": 0.26682988538100505,
|
|
"learning_rate": 2.6282012506097347e-05,
|
|
"loss": 0.9944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2365766316652298,
|
|
"step": 540,
|
|
"valid_targets_mean": 14421.1,
|
|
"valid_targets_min": 12694
|
|
},
|
|
{
|
|
"epoch": 2.302771855010661,
|
|
"grad_norm": 0.2989830647187291,
|
|
"learning_rate": 2.622554972949724e-05,
|
|
"loss": 1.07,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2973644733428955,
|
|
"step": 541,
|
|
"valid_targets_mean": 16063.4,
|
|
"valid_targets_min": 14259
|
|
},
|
|
{
|
|
"epoch": 2.307036247334755,
|
|
"grad_norm": 0.3320158283938759,
|
|
"learning_rate": 2.6169031957396778e-05,
|
|
"loss": 1.0506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21213622391223907,
|
|
"step": 542,
|
|
"valid_targets_mean": 11104.2,
|
|
"valid_targets_min": 2564
|
|
},
|
|
{
|
|
"epoch": 2.3113006396588487,
|
|
"grad_norm": 0.24704517072991675,
|
|
"learning_rate": 2.611245968906482e-05,
|
|
"loss": 1.0956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28201478719711304,
|
|
"step": 543,
|
|
"valid_targets_mean": 15902.0,
|
|
"valid_targets_min": 13609
|
|
},
|
|
{
|
|
"epoch": 2.3155650319829424,
|
|
"grad_norm": 0.29276197236011303,
|
|
"learning_rate": 2.605583342425165e-05,
|
|
"loss": 0.9868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27429062128067017,
|
|
"step": 544,
|
|
"valid_targets_mean": 16183.0,
|
|
"valid_targets_min": 15463
|
|
},
|
|
{
|
|
"epoch": 2.319829424307036,
|
|
"grad_norm": 0.2729037664782278,
|
|
"learning_rate": 2.5999153663184546e-05,
|
|
"loss": 1.0622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17813602089881897,
|
|
"step": 545,
|
|
"valid_targets_mean": 9009.8,
|
|
"valid_targets_min": 1284
|
|
},
|
|
{
|
|
"epoch": 2.3240938166311302,
|
|
"grad_norm": 0.2798009780695239,
|
|
"learning_rate": 2.594242090656335e-05,
|
|
"loss": 1.0542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2451293170452118,
|
|
"step": 546,
|
|
"valid_targets_mean": 16194.9,
|
|
"valid_targets_min": 14875
|
|
},
|
|
{
|
|
"epoch": 2.328358208955224,
|
|
"grad_norm": 0.3141975194679562,
|
|
"learning_rate": 2.5885635655556075e-05,
|
|
"loss": 1.0407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28895652294158936,
|
|
"step": 547,
|
|
"valid_targets_mean": 16138.1,
|
|
"valid_targets_min": 14550
|
|
},
|
|
{
|
|
"epoch": 2.3326226012793176,
|
|
"grad_norm": 0.26163605590717465,
|
|
"learning_rate": 2.5828798411794443e-05,
|
|
"loss": 1.0783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22583462297916412,
|
|
"step": 548,
|
|
"valid_targets_mean": 12317.6,
|
|
"valid_targets_min": 8720
|
|
},
|
|
{
|
|
"epoch": 2.3368869936034113,
|
|
"grad_norm": 0.31763720417289987,
|
|
"learning_rate": 2.5771909677369484e-05,
|
|
"loss": 1.0127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2829974889755249,
|
|
"step": 549,
|
|
"valid_targets_mean": 16203.8,
|
|
"valid_targets_min": 15058
|
|
},
|
|
{
|
|
"epoch": 2.3411513859275055,
|
|
"grad_norm": 0.3056235028373722,
|
|
"learning_rate": 2.571496995482709e-05,
|
|
"loss": 1.0358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2864728569984436,
|
|
"step": 550,
|
|
"valid_targets_mean": 16102.8,
|
|
"valid_targets_min": 14826
|
|
},
|
|
{
|
|
"epoch": 2.345415778251599,
|
|
"grad_norm": 0.2863509643615744,
|
|
"learning_rate": 2.565797974716357e-05,
|
|
"loss": 0.9894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2198735773563385,
|
|
"step": 551,
|
|
"valid_targets_mean": 13737.8,
|
|
"valid_targets_min": 11566
|
|
},
|
|
{
|
|
"epoch": 2.349680170575693,
|
|
"grad_norm": 0.3540898427883017,
|
|
"learning_rate": 2.5600939557821205e-05,
|
|
"loss": 1.0639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31675875186920166,
|
|
"step": 552,
|
|
"valid_targets_mean": 16058.5,
|
|
"valid_targets_min": 14649
|
|
},
|
|
{
|
|
"epoch": 2.3539445628997866,
|
|
"grad_norm": 0.30587327840171247,
|
|
"learning_rate": 2.5543849890683813e-05,
|
|
"loss": 1.041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.259478896856308,
|
|
"step": 553,
|
|
"valid_targets_mean": 13189.4,
|
|
"valid_targets_min": 3578
|
|
},
|
|
{
|
|
"epoch": 2.3582089552238807,
|
|
"grad_norm": 0.26927479797465537,
|
|
"learning_rate": 2.548671125007229e-05,
|
|
"loss": 1.0295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24672149121761322,
|
|
"step": 554,
|
|
"valid_targets_mean": 15325.0,
|
|
"valid_targets_min": 13465
|
|
},
|
|
{
|
|
"epoch": 2.3624733475479744,
|
|
"grad_norm": 0.35359364585088576,
|
|
"learning_rate": 2.5429524140740155e-05,
|
|
"loss": 1.0446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29348552227020264,
|
|
"step": 555,
|
|
"valid_targets_mean": 16192.3,
|
|
"valid_targets_min": 15636
|
|
},
|
|
{
|
|
"epoch": 2.366737739872068,
|
|
"grad_norm": 0.2755949486919777,
|
|
"learning_rate": 2.537228906786908e-05,
|
|
"loss": 1.0759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16546456515789032,
|
|
"step": 556,
|
|
"valid_targets_mean": 8583.2,
|
|
"valid_targets_min": 1277
|
|
},
|
|
{
|
|
"epoch": 2.3710021321961623,
|
|
"grad_norm": 0.3417479182549748,
|
|
"learning_rate": 2.5315006537064473e-05,
|
|
"loss": 1.0346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27253687381744385,
|
|
"step": 557,
|
|
"valid_targets_mean": 16119.8,
|
|
"valid_targets_min": 15222
|
|
},
|
|
{
|
|
"epoch": 2.375266524520256,
|
|
"grad_norm": 0.26979098561187287,
|
|
"learning_rate": 2.5257677054350927e-05,
|
|
"loss": 1.0451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3248399794101715,
|
|
"step": 558,
|
|
"valid_targets_mean": 16112.5,
|
|
"valid_targets_min": 13427
|
|
},
|
|
{
|
|
"epoch": 2.3795309168443497,
|
|
"grad_norm": 0.2908440350974731,
|
|
"learning_rate": 2.5200301126167857e-05,
|
|
"loss": 1.0183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1952691376209259,
|
|
"step": 559,
|
|
"valid_targets_mean": 11318.0,
|
|
"valid_targets_min": 7978
|
|
},
|
|
{
|
|
"epoch": 2.3837953091684434,
|
|
"grad_norm": 0.3078098175009307,
|
|
"learning_rate": 2.514287925936492e-05,
|
|
"loss": 1.0268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2714043855667114,
|
|
"step": 560,
|
|
"valid_targets_mean": 16124.8,
|
|
"valid_targets_min": 14702
|
|
},
|
|
{
|
|
"epoch": 2.388059701492537,
|
|
"grad_norm": 0.28078401099422134,
|
|
"learning_rate": 2.5085411961197626e-05,
|
|
"loss": 1.0382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3055068850517273,
|
|
"step": 561,
|
|
"valid_targets_mean": 16119.8,
|
|
"valid_targets_min": 15507
|
|
},
|
|
{
|
|
"epoch": 2.3923240938166312,
|
|
"grad_norm": 0.29354290379337106,
|
|
"learning_rate": 2.502789973932278e-05,
|
|
"loss": 1.0541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23758560419082642,
|
|
"step": 562,
|
|
"valid_targets_mean": 13217.6,
|
|
"valid_targets_min": 9927
|
|
},
|
|
{
|
|
"epoch": 2.396588486140725,
|
|
"grad_norm": 0.3126625252447049,
|
|
"learning_rate": 2.4970343101794073e-05,
|
|
"loss": 1.0578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29068368673324585,
|
|
"step": 563,
|
|
"valid_targets_mean": 16146.4,
|
|
"valid_targets_min": 15545
|
|
},
|
|
{
|
|
"epoch": 2.4008528784648187,
|
|
"grad_norm": 0.3034641047561609,
|
|
"learning_rate": 2.4912742557057538e-05,
|
|
"loss": 1.0254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2866404056549072,
|
|
"step": 564,
|
|
"valid_targets_mean": 16232.5,
|
|
"valid_targets_min": 15938
|
|
},
|
|
{
|
|
"epoch": 2.405117270788913,
|
|
"grad_norm": 0.3849857823998909,
|
|
"learning_rate": 2.485509861394708e-05,
|
|
"loss": 1.0408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25188344717025757,
|
|
"step": 565,
|
|
"valid_targets_mean": 15403.8,
|
|
"valid_targets_min": 13098
|
|
},
|
|
{
|
|
"epoch": 2.4093816631130065,
|
|
"grad_norm": 0.2917300569778498,
|
|
"learning_rate": 2.4797411781679975e-05,
|
|
"loss": 1.0334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29883241653442383,
|
|
"step": 566,
|
|
"valid_targets_mean": 16083.7,
|
|
"valid_targets_min": 13883
|
|
},
|
|
{
|
|
"epoch": 2.4136460554371,
|
|
"grad_norm": 0.3346796905024958,
|
|
"learning_rate": 2.473968256985238e-05,
|
|
"loss": 1.0992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2282615453004837,
|
|
"step": 567,
|
|
"valid_targets_mean": 11219.6,
|
|
"valid_targets_min": 2899
|
|
},
|
|
{
|
|
"epoch": 2.417910447761194,
|
|
"grad_norm": 0.26051979425264027,
|
|
"learning_rate": 2.4681911488434825e-05,
|
|
"loss": 0.9763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23321135342121124,
|
|
"step": 568,
|
|
"valid_targets_mean": 16104.4,
|
|
"valid_targets_min": 14771
|
|
},
|
|
{
|
|
"epoch": 2.4221748400852876,
|
|
"grad_norm": 0.38129667322859684,
|
|
"learning_rate": 2.4624099047767702e-05,
|
|
"loss": 1.0098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30090954899787903,
|
|
"step": 569,
|
|
"valid_targets_mean": 16132.6,
|
|
"valid_targets_min": 13886
|
|
},
|
|
{
|
|
"epoch": 2.4264392324093818,
|
|
"grad_norm": 0.2604040577862216,
|
|
"learning_rate": 2.4566245758556787e-05,
|
|
"loss": 1.0388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18206843733787537,
|
|
"step": 570,
|
|
"valid_targets_mean": 9096.2,
|
|
"valid_targets_min": 2044
|
|
},
|
|
{
|
|
"epoch": 2.4307036247334755,
|
|
"grad_norm": 0.32823939336311225,
|
|
"learning_rate": 2.4508352131868664e-05,
|
|
"loss": 1.0308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2432955950498581,
|
|
"step": 571,
|
|
"valid_targets_mean": 16176.6,
|
|
"valid_targets_min": 14769
|
|
},
|
|
{
|
|
"epoch": 2.434968017057569,
|
|
"grad_norm": 0.2935285323392409,
|
|
"learning_rate": 2.445041867912629e-05,
|
|
"loss": 1.0949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31023919582366943,
|
|
"step": 572,
|
|
"valid_targets_mean": 16152.8,
|
|
"valid_targets_min": 15180
|
|
},
|
|
{
|
|
"epoch": 2.4392324093816633,
|
|
"grad_norm": 0.29705761968405225,
|
|
"learning_rate": 2.439244591210443e-05,
|
|
"loss": 1.0378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22531543672084808,
|
|
"step": 573,
|
|
"valid_targets_mean": 12206.7,
|
|
"valid_targets_min": 8908
|
|
},
|
|
{
|
|
"epoch": 2.443496801705757,
|
|
"grad_norm": 0.33119219679210354,
|
|
"learning_rate": 2.4334434342925133e-05,
|
|
"loss": 1.0412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2790338099002838,
|
|
"step": 574,
|
|
"valid_targets_mean": 16198.4,
|
|
"valid_targets_min": 15622
|
|
},
|
|
{
|
|
"epoch": 2.4477611940298507,
|
|
"grad_norm": 0.2628526445996163,
|
|
"learning_rate": 2.4276384484053227e-05,
|
|
"loss": 1.103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30810728669166565,
|
|
"step": 575,
|
|
"valid_targets_mean": 16086.4,
|
|
"valid_targets_min": 15153
|
|
},
|
|
{
|
|
"epoch": 2.4520255863539444,
|
|
"grad_norm": 0.3636611140247231,
|
|
"learning_rate": 2.4218296848291795e-05,
|
|
"loss": 1.0571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2677125036716461,
|
|
"step": 576,
|
|
"valid_targets_mean": 14360.7,
|
|
"valid_targets_min": 12313
|
|
},
|
|
{
|
|
"epoch": 2.4562899786780386,
|
|
"grad_norm": 0.27541640733521017,
|
|
"learning_rate": 2.4160171948777603e-05,
|
|
"loss": 1.0158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28583091497421265,
|
|
"step": 577,
|
|
"valid_targets_mean": 16193.1,
|
|
"valid_targets_min": 15374
|
|
},
|
|
{
|
|
"epoch": 2.4605543710021323,
|
|
"grad_norm": 0.28867613165733325,
|
|
"learning_rate": 2.410201029897665e-05,
|
|
"loss": 1.0913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2489250898361206,
|
|
"step": 578,
|
|
"valid_targets_mean": 12682.9,
|
|
"valid_targets_min": 1240
|
|
},
|
|
{
|
|
"epoch": 2.464818763326226,
|
|
"grad_norm": 0.2920196335006851,
|
|
"learning_rate": 2.4043812412679532e-05,
|
|
"loss": 1.0545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2607208490371704,
|
|
"step": 579,
|
|
"valid_targets_mean": 14894.9,
|
|
"valid_targets_min": 13767
|
|
},
|
|
{
|
|
"epoch": 2.4690831556503197,
|
|
"grad_norm": 0.2521489707227015,
|
|
"learning_rate": 2.3985578803996985e-05,
|
|
"loss": 1.0264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2972157299518585,
|
|
"step": 580,
|
|
"valid_targets_mean": 16157.9,
|
|
"valid_targets_min": 14275
|
|
},
|
|
{
|
|
"epoch": 2.473347547974414,
|
|
"grad_norm": 0.299043517373016,
|
|
"learning_rate": 2.392730998735529e-05,
|
|
"loss": 1.0499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17381398379802704,
|
|
"step": 581,
|
|
"valid_targets_mean": 10229.2,
|
|
"valid_targets_min": 1759
|
|
},
|
|
{
|
|
"epoch": 2.4776119402985075,
|
|
"grad_norm": 0.27413297037766,
|
|
"learning_rate": 2.3869006477491755e-05,
|
|
"loss": 1.0934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.281957745552063,
|
|
"step": 582,
|
|
"valid_targets_mean": 15959.8,
|
|
"valid_targets_min": 13835
|
|
},
|
|
{
|
|
"epoch": 2.481876332622601,
|
|
"grad_norm": 0.27270978121053446,
|
|
"learning_rate": 2.381066878945017e-05,
|
|
"loss": 1.0005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29876717925071716,
|
|
"step": 583,
|
|
"valid_targets_mean": 16156.6,
|
|
"valid_targets_min": 15054
|
|
},
|
|
{
|
|
"epoch": 2.486140724946695,
|
|
"grad_norm": 0.2651931261771279,
|
|
"learning_rate": 2.3752297438576257e-05,
|
|
"loss": 1.0479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18752694129943848,
|
|
"step": 584,
|
|
"valid_targets_mean": 10441.4,
|
|
"valid_targets_min": 7074
|
|
},
|
|
{
|
|
"epoch": 2.490405117270789,
|
|
"grad_norm": 0.23564039562850964,
|
|
"learning_rate": 2.3693892940513074e-05,
|
|
"loss": 1.0593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2778427004814148,
|
|
"step": 585,
|
|
"valid_targets_mean": 16191.9,
|
|
"valid_targets_min": 15389
|
|
},
|
|
{
|
|
"epoch": 2.4946695095948828,
|
|
"grad_norm": 0.2414011698937567,
|
|
"learning_rate": 2.3635455811196536e-05,
|
|
"loss": 1.0473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3203165531158447,
|
|
"step": 586,
|
|
"valid_targets_mean": 16169.7,
|
|
"valid_targets_min": 15678
|
|
},
|
|
{
|
|
"epoch": 2.4989339019189765,
|
|
"grad_norm": 0.2670451356339994,
|
|
"learning_rate": 2.3576986566850796e-05,
|
|
"loss": 1.058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2431665062904358,
|
|
"step": 587,
|
|
"valid_targets_mean": 13982.8,
|
|
"valid_targets_min": 11677
|
|
},
|
|
{
|
|
"epoch": 2.50319829424307,
|
|
"grad_norm": 0.2555070879578905,
|
|
"learning_rate": 2.351848572398371e-05,
|
|
"loss": 0.9892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2670406103134155,
|
|
"step": 588,
|
|
"valid_targets_mean": 16228.9,
|
|
"valid_targets_min": 15674
|
|
},
|
|
{
|
|
"epoch": 2.5074626865671643,
|
|
"grad_norm": 0.272956121938524,
|
|
"learning_rate": 2.3459953799382276e-05,
|
|
"loss": 1.0058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28342461585998535,
|
|
"step": 589,
|
|
"valid_targets_mean": 16192.3,
|
|
"valid_targets_min": 15222
|
|
},
|
|
{
|
|
"epoch": 2.511727078891258,
|
|
"grad_norm": 0.2778901562741278,
|
|
"learning_rate": 2.3401391310108054e-05,
|
|
"loss": 1.0117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25146615505218506,
|
|
"step": 590,
|
|
"valid_targets_mean": 15235.8,
|
|
"valid_targets_min": 10805
|
|
},
|
|
{
|
|
"epoch": 2.5159914712153517,
|
|
"grad_norm": 0.28858163538064635,
|
|
"learning_rate": 2.3342798773492602e-05,
|
|
"loss": 1.0093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27801817655563354,
|
|
"step": 591,
|
|
"valid_targets_mean": 16150.8,
|
|
"valid_targets_min": 14997
|
|
},
|
|
{
|
|
"epoch": 2.520255863539446,
|
|
"grad_norm": 0.29292018521548724,
|
|
"learning_rate": 2.328417670713294e-05,
|
|
"loss": 1.0332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2068709135055542,
|
|
"step": 592,
|
|
"valid_targets_mean": 10595.8,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 2.5245202558635396,
|
|
"grad_norm": 0.2578187511591653,
|
|
"learning_rate": 2.3225525628886918e-05,
|
|
"loss": 1.0435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25130176544189453,
|
|
"step": 593,
|
|
"valid_targets_mean": 16091.4,
|
|
"valid_targets_min": 15167
|
|
},
|
|
{
|
|
"epoch": 2.5287846481876333,
|
|
"grad_norm": 0.27303874490141483,
|
|
"learning_rate": 2.3166846056868687e-05,
|
|
"loss": 1.0582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3334190249443054,
|
|
"step": 594,
|
|
"valid_targets_mean": 15568.4,
|
|
"valid_targets_min": 2605
|
|
},
|
|
{
|
|
"epoch": 2.533049040511727,
|
|
"grad_norm": 0.25870877325527797,
|
|
"learning_rate": 2.31081385094441e-05,
|
|
"loss": 1.0387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15699400007724762,
|
|
"step": 595,
|
|
"valid_targets_mean": 8344.5,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 2.5373134328358207,
|
|
"grad_norm": 0.24477176242002976,
|
|
"learning_rate": 2.304940350522615e-05,
|
|
"loss": 1.0552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2562256157398224,
|
|
"step": 596,
|
|
"valid_targets_mean": 16130.5,
|
|
"valid_targets_min": 14384
|
|
},
|
|
{
|
|
"epoch": 2.541577825159915,
|
|
"grad_norm": 0.2692001623707129,
|
|
"learning_rate": 2.299064156307037e-05,
|
|
"loss": 1.0373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30675721168518066,
|
|
"step": 597,
|
|
"valid_targets_mean": 16067.1,
|
|
"valid_targets_min": 14103
|
|
},
|
|
{
|
|
"epoch": 2.5458422174840085,
|
|
"grad_norm": 0.2500888984021968,
|
|
"learning_rate": 2.2931853202070275e-05,
|
|
"loss": 1.0624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22056426107883453,
|
|
"step": 598,
|
|
"valid_targets_mean": 13112.9,
|
|
"valid_targets_min": 9219
|
|
},
|
|
{
|
|
"epoch": 2.550106609808102,
|
|
"grad_norm": 0.25852515386306235,
|
|
"learning_rate": 2.2873038941552724e-05,
|
|
"loss": 0.9908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2784058153629303,
|
|
"step": 599,
|
|
"valid_targets_mean": 16142.3,
|
|
"valid_targets_min": 13573
|
|
},
|
|
{
|
|
"epoch": 2.5543710021321964,
|
|
"grad_norm": 0.3256240388119314,
|
|
"learning_rate": 2.2814199301073412e-05,
|
|
"loss": 1.0716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3061351776123047,
|
|
"step": 600,
|
|
"valid_targets_mean": 16099.6,
|
|
"valid_targets_min": 13080
|
|
},
|
|
{
|
|
"epoch": 2.55863539445629,
|
|
"grad_norm": 0.23178965477142577,
|
|
"learning_rate": 2.27553348004122e-05,
|
|
"loss": 1.0725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2541932761669159,
|
|
"step": 601,
|
|
"valid_targets_mean": 14181.0,
|
|
"valid_targets_min": 11734
|
|
},
|
|
{
|
|
"epoch": 2.5628997867803838,
|
|
"grad_norm": 0.29686777150066457,
|
|
"learning_rate": 2.2696445959568577e-05,
|
|
"loss": 1.0465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3028012216091156,
|
|
"step": 602,
|
|
"valid_targets_mean": 16122.7,
|
|
"valid_targets_min": 15319
|
|
},
|
|
{
|
|
"epoch": 2.5671641791044775,
|
|
"grad_norm": 0.23765180354060697,
|
|
"learning_rate": 2.2637533298757064e-05,
|
|
"loss": 1.0579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24337582290172577,
|
|
"step": 603,
|
|
"valid_targets_mean": 13220.1,
|
|
"valid_targets_min": 2036
|
|
},
|
|
{
|
|
"epoch": 2.571428571428571,
|
|
"grad_norm": 0.2782471466241931,
|
|
"learning_rate": 2.2578597338402567e-05,
|
|
"loss": 1.0428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2604105472564697,
|
|
"step": 604,
|
|
"valid_targets_mean": 16067.2,
|
|
"valid_targets_min": 14862
|
|
},
|
|
{
|
|
"epoch": 2.5756929637526653,
|
|
"grad_norm": 0.23032037185955748,
|
|
"learning_rate": 2.2519638599135844e-05,
|
|
"loss": 1.0983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28923940658569336,
|
|
"step": 605,
|
|
"valid_targets_mean": 16087.8,
|
|
"valid_targets_min": 14331
|
|
},
|
|
{
|
|
"epoch": 2.579957356076759,
|
|
"grad_norm": 0.2684594483983001,
|
|
"learning_rate": 2.2460657601788875e-05,
|
|
"loss": 1.0366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18617041409015656,
|
|
"step": 606,
|
|
"valid_targets_mean": 9125.1,
|
|
"valid_targets_min": 1895
|
|
},
|
|
{
|
|
"epoch": 2.5842217484008527,
|
|
"grad_norm": 0.232317126831534,
|
|
"learning_rate": 2.2401654867390256e-05,
|
|
"loss": 1.032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23461005091667175,
|
|
"step": 607,
|
|
"valid_targets_mean": 16230.6,
|
|
"valid_targets_min": 15761
|
|
},
|
|
{
|
|
"epoch": 2.588486140724947,
|
|
"grad_norm": 0.2813001202797154,
|
|
"learning_rate": 2.2342630917160605e-05,
|
|
"loss": 1.0525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31943753361701965,
|
|
"step": 608,
|
|
"valid_targets_mean": 16008.2,
|
|
"valid_targets_min": 12492
|
|
},
|
|
{
|
|
"epoch": 2.5927505330490406,
|
|
"grad_norm": 0.2602911096217211,
|
|
"learning_rate": 2.2283586272507975e-05,
|
|
"loss": 1.0367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20762485265731812,
|
|
"step": 609,
|
|
"valid_targets_mean": 12059.9,
|
|
"valid_targets_min": 7841
|
|
},
|
|
{
|
|
"epoch": 2.5970149253731343,
|
|
"grad_norm": 0.2631319658641279,
|
|
"learning_rate": 2.2224521455023193e-05,
|
|
"loss": 1.0351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27428776025772095,
|
|
"step": 610,
|
|
"valid_targets_mean": 16181.1,
|
|
"valid_targets_min": 15563
|
|
},
|
|
{
|
|
"epoch": 2.6012793176972284,
|
|
"grad_norm": 0.3101522318161875,
|
|
"learning_rate": 2.216543698647534e-05,
|
|
"loss": 1.0351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3216131031513214,
|
|
"step": 611,
|
|
"valid_targets_mean": 16136.7,
|
|
"valid_targets_min": 14943
|
|
},
|
|
{
|
|
"epoch": 2.605543710021322,
|
|
"grad_norm": 0.23841926854456202,
|
|
"learning_rate": 2.210633338880704e-05,
|
|
"loss": 1.0672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23634910583496094,
|
|
"step": 612,
|
|
"valid_targets_mean": 13411.0,
|
|
"valid_targets_min": 11613
|
|
},
|
|
{
|
|
"epoch": 2.609808102345416,
|
|
"grad_norm": 0.3422990747125992,
|
|
"learning_rate": 2.204721118412994e-05,
|
|
"loss": 1.063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3301277160644531,
|
|
"step": 613,
|
|
"valid_targets_mean": 16006.3,
|
|
"valid_targets_min": 15028
|
|
},
|
|
{
|
|
"epoch": 2.6140724946695095,
|
|
"grad_norm": 0.2443735209147375,
|
|
"learning_rate": 2.1988070894720037e-05,
|
|
"loss": 1.0509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30868402123451233,
|
|
"step": 614,
|
|
"valid_targets_mean": 16166.3,
|
|
"valid_targets_min": 15389
|
|
},
|
|
{
|
|
"epoch": 2.6183368869936032,
|
|
"grad_norm": 0.2769791836864251,
|
|
"learning_rate": 2.192891304301309e-05,
|
|
"loss": 1.071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24009822309017181,
|
|
"step": 615,
|
|
"valid_targets_mean": 14385.6,
|
|
"valid_targets_min": 12637
|
|
},
|
|
{
|
|
"epoch": 2.6226012793176974,
|
|
"grad_norm": 0.2463937911425666,
|
|
"learning_rate": 2.18697381516e-05,
|
|
"loss": 1.0466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29944130778312683,
|
|
"step": 616,
|
|
"valid_targets_mean": 16184.0,
|
|
"valid_targets_min": 15614
|
|
},
|
|
{
|
|
"epoch": 2.626865671641791,
|
|
"grad_norm": 0.2625493187358278,
|
|
"learning_rate": 2.181054674322221e-05,
|
|
"loss": 1.0306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19397038221359253,
|
|
"step": 617,
|
|
"valid_targets_mean": 10109.2,
|
|
"valid_targets_min": 1733
|
|
},
|
|
{
|
|
"epoch": 2.631130063965885,
|
|
"grad_norm": 0.26131036300015253,
|
|
"learning_rate": 2.1751339340767043e-05,
|
|
"loss": 0.9939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26601845026016235,
|
|
"step": 618,
|
|
"valid_targets_mean": 16138.7,
|
|
"valid_targets_min": 14734
|
|
},
|
|
{
|
|
"epoch": 2.635394456289979,
|
|
"grad_norm": 0.27251382003765656,
|
|
"learning_rate": 2.169211646726313e-05,
|
|
"loss": 1.0788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3152587115764618,
|
|
"step": 619,
|
|
"valid_targets_mean": 16145.5,
|
|
"valid_targets_min": 15576
|
|
},
|
|
{
|
|
"epoch": 2.6396588486140726,
|
|
"grad_norm": 0.28166028840154184,
|
|
"learning_rate": 2.163287864587576e-05,
|
|
"loss": 1.0437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15223997831344604,
|
|
"step": 620,
|
|
"valid_targets_mean": 8127.2,
|
|
"valid_targets_min": 2379
|
|
},
|
|
{
|
|
"epoch": 2.6439232409381663,
|
|
"grad_norm": 0.2702402182533795,
|
|
"learning_rate": 2.157362639990229e-05,
|
|
"loss": 0.9963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25352510809898376,
|
|
"step": 621,
|
|
"valid_targets_mean": 16050.8,
|
|
"valid_targets_min": 14159
|
|
},
|
|
{
|
|
"epoch": 2.64818763326226,
|
|
"grad_norm": 0.2770114802483943,
|
|
"learning_rate": 2.151436025276747e-05,
|
|
"loss": 1.0471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3116888403892517,
|
|
"step": 622,
|
|
"valid_targets_mean": 16133.9,
|
|
"valid_targets_min": 13934
|
|
},
|
|
{
|
|
"epoch": 2.6524520255863537,
|
|
"grad_norm": 0.2764848889031583,
|
|
"learning_rate": 2.145508072801888e-05,
|
|
"loss": 1.0769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2531043589115143,
|
|
"step": 623,
|
|
"valid_targets_mean": 13166.8,
|
|
"valid_targets_min": 9709
|
|
},
|
|
{
|
|
"epoch": 2.656716417910448,
|
|
"grad_norm": 0.2516592344298666,
|
|
"learning_rate": 2.1395788349322256e-05,
|
|
"loss": 1.0671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27907490730285645,
|
|
"step": 624,
|
|
"valid_targets_mean": 16132.4,
|
|
"valid_targets_min": 14394
|
|
},
|
|
{
|
|
"epoch": 2.6609808102345416,
|
|
"grad_norm": 0.31791207712989383,
|
|
"learning_rate": 2.133648364045689e-05,
|
|
"loss": 1.0399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3172053098678589,
|
|
"step": 625,
|
|
"valid_targets_mean": 16104.8,
|
|
"valid_targets_min": 14688
|
|
},
|
|
{
|
|
"epoch": 2.6652452025586353,
|
|
"grad_norm": 0.23359854517165743,
|
|
"learning_rate": 2.1277167125310996e-05,
|
|
"loss": 1.0454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2476862370967865,
|
|
"step": 626,
|
|
"valid_targets_mean": 15302.1,
|
|
"valid_targets_min": 13615
|
|
},
|
|
{
|
|
"epoch": 2.6695095948827294,
|
|
"grad_norm": 0.2967447305403079,
|
|
"learning_rate": 2.1217839327877098e-05,
|
|
"loss": 1.0464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2926100194454193,
|
|
"step": 627,
|
|
"valid_targets_mean": 16166.6,
|
|
"valid_targets_min": 15148
|
|
},
|
|
{
|
|
"epoch": 2.673773987206823,
|
|
"grad_norm": 0.24300960387242868,
|
|
"learning_rate": 2.1158500772247352e-05,
|
|
"loss": 1.0539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26239803433418274,
|
|
"step": 628,
|
|
"valid_targets_mean": 13016.6,
|
|
"valid_targets_min": 1533
|
|
},
|
|
{
|
|
"epoch": 2.678038379530917,
|
|
"grad_norm": 0.2794531726229126,
|
|
"learning_rate": 2.1099151982608985e-05,
|
|
"loss": 1.0742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2824642062187195,
|
|
"step": 629,
|
|
"valid_targets_mean": 15976.4,
|
|
"valid_targets_min": 14810
|
|
},
|
|
{
|
|
"epoch": 2.6823027718550105,
|
|
"grad_norm": 0.2583131390220038,
|
|
"learning_rate": 2.1039793483239607e-05,
|
|
"loss": 1.0682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3015969395637512,
|
|
"step": 630,
|
|
"valid_targets_mean": 16166.0,
|
|
"valid_targets_min": 15282
|
|
},
|
|
{
|
|
"epoch": 2.6865671641791042,
|
|
"grad_norm": 0.2406239146234153,
|
|
"learning_rate": 2.0980425798502616e-05,
|
|
"loss": 1.021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1628062129020691,
|
|
"step": 631,
|
|
"valid_targets_mean": 8038.8,
|
|
"valid_targets_min": 1420
|
|
},
|
|
{
|
|
"epoch": 2.6908315565031984,
|
|
"grad_norm": 0.2634618094704433,
|
|
"learning_rate": 2.092104945284255e-05,
|
|
"loss": 1.0157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24490898847579956,
|
|
"step": 632,
|
|
"valid_targets_mean": 15944.4,
|
|
"valid_targets_min": 14420
|
|
},
|
|
{
|
|
"epoch": 2.695095948827292,
|
|
"grad_norm": 0.2513490052749398,
|
|
"learning_rate": 2.0861664970780434e-05,
|
|
"loss": 1.0376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3211136758327484,
|
|
"step": 633,
|
|
"valid_targets_mean": 16005.0,
|
|
"valid_targets_min": 12680
|
|
},
|
|
{
|
|
"epoch": 2.699360341151386,
|
|
"grad_norm": 0.23777199420967957,
|
|
"learning_rate": 2.08022728769092e-05,
|
|
"loss": 1.0514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19891712069511414,
|
|
"step": 634,
|
|
"valid_targets_mean": 10561.3,
|
|
"valid_targets_min": 6389
|
|
},
|
|
{
|
|
"epoch": 2.70362473347548,
|
|
"grad_norm": 0.22784442937559377,
|
|
"learning_rate": 2.0742873695889005e-05,
|
|
"loss": 1.0733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2798650860786438,
|
|
"step": 635,
|
|
"valid_targets_mean": 16137.4,
|
|
"valid_targets_min": 15187
|
|
},
|
|
{
|
|
"epoch": 2.7078891257995736,
|
|
"grad_norm": 0.2578640629204441,
|
|
"learning_rate": 2.0683467952442626e-05,
|
|
"loss": 1.0577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3054782748222351,
|
|
"step": 636,
|
|
"valid_targets_mean": 16199.2,
|
|
"valid_targets_min": 15333
|
|
},
|
|
{
|
|
"epoch": 2.7121535181236673,
|
|
"grad_norm": 0.23590469762852542,
|
|
"learning_rate": 2.0624056171350785e-05,
|
|
"loss": 1.0577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2311520129442215,
|
|
"step": 637,
|
|
"valid_targets_mean": 13013.5,
|
|
"valid_targets_min": 10913
|
|
},
|
|
{
|
|
"epoch": 2.716417910447761,
|
|
"grad_norm": 0.24606557103992302,
|
|
"learning_rate": 2.0564638877447566e-05,
|
|
"loss": 0.9992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2815939784049988,
|
|
"step": 638,
|
|
"valid_targets_mean": 16089.1,
|
|
"valid_targets_min": 15225
|
|
},
|
|
{
|
|
"epoch": 2.7206823027718547,
|
|
"grad_norm": 0.2648957939437123,
|
|
"learning_rate": 2.0505216595615742e-05,
|
|
"loss": 1.1097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32964062690734863,
|
|
"step": 639,
|
|
"valid_targets_mean": 16108.9,
|
|
"valid_targets_min": 14793
|
|
},
|
|
{
|
|
"epoch": 2.724946695095949,
|
|
"grad_norm": 0.21696403842331907,
|
|
"learning_rate": 2.044578985078215e-05,
|
|
"loss": 1.0646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2445964217185974,
|
|
"step": 640,
|
|
"valid_targets_mean": 15094.3,
|
|
"valid_targets_min": 13211
|
|
},
|
|
{
|
|
"epoch": 2.7292110874200426,
|
|
"grad_norm": 0.2686175393069833,
|
|
"learning_rate": 2.0386359167913046e-05,
|
|
"loss": 1.0648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3203163743019104,
|
|
"step": 641,
|
|
"valid_targets_mean": 16148.0,
|
|
"valid_targets_min": 15288
|
|
},
|
|
{
|
|
"epoch": 2.7334754797441363,
|
|
"grad_norm": 0.23381083889228563,
|
|
"learning_rate": 2.0326925072009485e-05,
|
|
"loss": 1.0857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18711988627910614,
|
|
"step": 642,
|
|
"valid_targets_mean": 10481.8,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 2.7377398720682304,
|
|
"grad_norm": 0.24796603748809626,
|
|
"learning_rate": 2.0267488088102657e-05,
|
|
"loss": 1.0849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25230154395103455,
|
|
"step": 643,
|
|
"valid_targets_mean": 15958.4,
|
|
"valid_targets_min": 15228
|
|
},
|
|
{
|
|
"epoch": 2.742004264392324,
|
|
"grad_norm": 0.25484572893188645,
|
|
"learning_rate": 2.0208048741249288e-05,
|
|
"loss": 0.9915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29769226908683777,
|
|
"step": 644,
|
|
"valid_targets_mean": 16206.8,
|
|
"valid_targets_min": 15398
|
|
},
|
|
{
|
|
"epoch": 2.746268656716418,
|
|
"grad_norm": 0.21276746285567366,
|
|
"learning_rate": 2.014860755652695e-05,
|
|
"loss": 1.0248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1710774004459381,
|
|
"step": 645,
|
|
"valid_targets_mean": 9416.3,
|
|
"valid_targets_min": 979
|
|
},
|
|
{
|
|
"epoch": 2.750533049040512,
|
|
"grad_norm": 0.2333199165638852,
|
|
"learning_rate": 2.0089165059029477e-05,
|
|
"loss": 1.0624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24201640486717224,
|
|
"step": 646,
|
|
"valid_targets_mean": 16177.7,
|
|
"valid_targets_min": 14460
|
|
},
|
|
{
|
|
"epoch": 2.7547974413646057,
|
|
"grad_norm": 0.2135880074160673,
|
|
"learning_rate": 2.0029721773862277e-05,
|
|
"loss": 1.0362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30199944972991943,
|
|
"step": 647,
|
|
"valid_targets_mean": 15917.4,
|
|
"valid_targets_min": 9299
|
|
},
|
|
{
|
|
"epoch": 2.7590618336886994,
|
|
"grad_norm": 0.2362137019123995,
|
|
"learning_rate": 1.997027822613773e-05,
|
|
"loss": 1.0104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2068403959274292,
|
|
"step": 648,
|
|
"valid_targets_mean": 12600.8,
|
|
"valid_targets_min": 9289
|
|
},
|
|
{
|
|
"epoch": 2.763326226012793,
|
|
"grad_norm": 0.21479351229165758,
|
|
"learning_rate": 1.9910834940970533e-05,
|
|
"loss": 1.0881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30875372886657715,
|
|
"step": 649,
|
|
"valid_targets_mean": 16163.2,
|
|
"valid_targets_min": 15488
|
|
},
|
|
{
|
|
"epoch": 2.767590618336887,
|
|
"grad_norm": 0.2549516060520758,
|
|
"learning_rate": 1.985139244347305e-05,
|
|
"loss": 1.0175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28148266673088074,
|
|
"step": 650,
|
|
"valid_targets_mean": 16152.5,
|
|
"valid_targets_min": 14851
|
|
},
|
|
{
|
|
"epoch": 2.771855010660981,
|
|
"grad_norm": 0.2291803277727466,
|
|
"learning_rate": 1.979195125875072e-05,
|
|
"loss": 1.0532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24805381894111633,
|
|
"step": 651,
|
|
"valid_targets_mean": 14425.4,
|
|
"valid_targets_min": 12081
|
|
},
|
|
{
|
|
"epoch": 2.7761194029850746,
|
|
"grad_norm": 0.2784673614966153,
|
|
"learning_rate": 1.9732511911897353e-05,
|
|
"loss": 1.0534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29927659034729004,
|
|
"step": 652,
|
|
"valid_targets_mean": 16109.8,
|
|
"valid_targets_min": 14956
|
|
},
|
|
{
|
|
"epoch": 2.7803837953091683,
|
|
"grad_norm": 0.2665693367529166,
|
|
"learning_rate": 1.9673074927990525e-05,
|
|
"loss": 1.054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2553492784500122,
|
|
"step": 653,
|
|
"valid_targets_mean": 12686.2,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 2.7846481876332625,
|
|
"grad_norm": 0.258366490540349,
|
|
"learning_rate": 1.9613640832086957e-05,
|
|
"loss": 1.0174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2649986743927002,
|
|
"step": 654,
|
|
"valid_targets_mean": 16176.4,
|
|
"valid_targets_min": 15720
|
|
},
|
|
{
|
|
"epoch": 2.788912579957356,
|
|
"grad_norm": 0.27182690784206887,
|
|
"learning_rate": 1.9554210149217855e-05,
|
|
"loss": 1.0149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2867087125778198,
|
|
"step": 655,
|
|
"valid_targets_mean": 16022.2,
|
|
"valid_targets_min": 13473
|
|
},
|
|
{
|
|
"epoch": 2.79317697228145,
|
|
"grad_norm": 0.26722859528629267,
|
|
"learning_rate": 1.9494783404384265e-05,
|
|
"loss": 1.0539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20252707600593567,
|
|
"step": 656,
|
|
"valid_targets_mean": 9653.7,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 2.7974413646055436,
|
|
"grad_norm": 0.2316912477336916,
|
|
"learning_rate": 1.9435361122552437e-05,
|
|
"loss": 0.9846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24132490158081055,
|
|
"step": 657,
|
|
"valid_targets_mean": 16161.2,
|
|
"valid_targets_min": 14851
|
|
},
|
|
{
|
|
"epoch": 2.8017057569296373,
|
|
"grad_norm": 0.25748752302717803,
|
|
"learning_rate": 1.9375943828649215e-05,
|
|
"loss": 1.0607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29179710149765015,
|
|
"step": 658,
|
|
"valid_targets_mean": 16125.6,
|
|
"valid_targets_min": 14877
|
|
},
|
|
{
|
|
"epoch": 2.8059701492537314,
|
|
"grad_norm": 0.25746042952829756,
|
|
"learning_rate": 1.9316532047557378e-05,
|
|
"loss": 1.0235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19404509663581848,
|
|
"step": 659,
|
|
"valid_targets_mean": 11347.2,
|
|
"valid_targets_min": 7388
|
|
},
|
|
{
|
|
"epoch": 2.810234541577825,
|
|
"grad_norm": 0.2302939992167745,
|
|
"learning_rate": 1.9257126304110998e-05,
|
|
"loss": 1.031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27259528636932373,
|
|
"step": 660,
|
|
"valid_targets_mean": 16103.5,
|
|
"valid_targets_min": 15242
|
|
},
|
|
{
|
|
"epoch": 2.814498933901919,
|
|
"grad_norm": 0.25198259548480473,
|
|
"learning_rate": 1.919772712309081e-05,
|
|
"loss": 1.0209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2935293912887573,
|
|
"step": 661,
|
|
"valid_targets_mean": 16163.3,
|
|
"valid_targets_min": 15198
|
|
},
|
|
{
|
|
"epoch": 2.818763326226013,
|
|
"grad_norm": 0.2200286922289353,
|
|
"learning_rate": 1.9138335029219572e-05,
|
|
"loss": 1.0536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23220394551753998,
|
|
"step": 662,
|
|
"valid_targets_mean": 13117.1,
|
|
"valid_targets_min": 11573
|
|
},
|
|
{
|
|
"epoch": 2.8230277185501067,
|
|
"grad_norm": 0.23440034064219098,
|
|
"learning_rate": 1.9078950547157458e-05,
|
|
"loss": 1.0368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2831166684627533,
|
|
"step": 663,
|
|
"valid_targets_mean": 16066.2,
|
|
"valid_targets_min": 13927
|
|
},
|
|
{
|
|
"epoch": 2.8272921108742004,
|
|
"grad_norm": 0.2183113289524723,
|
|
"learning_rate": 1.9019574201497387e-05,
|
|
"loss": 1.0857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33316051959991455,
|
|
"step": 664,
|
|
"valid_targets_mean": 16098.2,
|
|
"valid_targets_min": 14646
|
|
},
|
|
{
|
|
"epoch": 2.831556503198294,
|
|
"grad_norm": 0.22078950062464048,
|
|
"learning_rate": 1.8960206516760396e-05,
|
|
"loss": 1.0492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25347208976745605,
|
|
"step": 665,
|
|
"valid_targets_mean": 15457.3,
|
|
"valid_targets_min": 14161
|
|
},
|
|
{
|
|
"epoch": 2.835820895522388,
|
|
"grad_norm": 0.2134148999740181,
|
|
"learning_rate": 1.890084801739102e-05,
|
|
"loss": 0.9944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27177226543426514,
|
|
"step": 666,
|
|
"valid_targets_mean": 16156.0,
|
|
"valid_targets_min": 15128
|
|
},
|
|
{
|
|
"epoch": 2.840085287846482,
|
|
"grad_norm": 0.22257594579115064,
|
|
"learning_rate": 1.884149922775265e-05,
|
|
"loss": 1.0345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21412542462348938,
|
|
"step": 667,
|
|
"valid_targets_mean": 11165.4,
|
|
"valid_targets_min": 2031
|
|
},
|
|
{
|
|
"epoch": 2.8443496801705757,
|
|
"grad_norm": 0.21178683195544276,
|
|
"learning_rate": 1.878216067212291e-05,
|
|
"loss": 1.0247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25402650237083435,
|
|
"step": 668,
|
|
"valid_targets_mean": 15901.6,
|
|
"valid_targets_min": 14526
|
|
},
|
|
{
|
|
"epoch": 2.8486140724946694,
|
|
"grad_norm": 0.25074264951872627,
|
|
"learning_rate": 1.8722832874689007e-05,
|
|
"loss": 1.0656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28927081823349,
|
|
"step": 669,
|
|
"valid_targets_mean": 16190.7,
|
|
"valid_targets_min": 15714
|
|
},
|
|
{
|
|
"epoch": 2.8528784648187635,
|
|
"grad_norm": 0.23287240870568066,
|
|
"learning_rate": 1.8663516359543123e-05,
|
|
"loss": 1.0172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14716637134552002,
|
|
"step": 670,
|
|
"valid_targets_mean": 8172.2,
|
|
"valid_targets_min": 1215
|
|
},
|
|
{
|
|
"epoch": 2.857142857142857,
|
|
"grad_norm": 0.23950131807780825,
|
|
"learning_rate": 1.860421165067775e-05,
|
|
"loss": 1.0239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2460516393184662,
|
|
"step": 671,
|
|
"valid_targets_mean": 16181.2,
|
|
"valid_targets_min": 15380
|
|
},
|
|
{
|
|
"epoch": 2.861407249466951,
|
|
"grad_norm": 0.23157562595418954,
|
|
"learning_rate": 1.8544919271981125e-05,
|
|
"loss": 1.0617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31192123889923096,
|
|
"step": 672,
|
|
"valid_targets_mean": 16093.1,
|
|
"valid_targets_min": 14929
|
|
},
|
|
{
|
|
"epoch": 2.8656716417910446,
|
|
"grad_norm": 0.22833207374850398,
|
|
"learning_rate": 1.8485639747232535e-05,
|
|
"loss": 1.035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22791972756385803,
|
|
"step": 673,
|
|
"valid_targets_mean": 12507.3,
|
|
"valid_targets_min": 10158
|
|
},
|
|
{
|
|
"epoch": 2.8699360341151388,
|
|
"grad_norm": 0.24941293281250398,
|
|
"learning_rate": 1.8426373600097723e-05,
|
|
"loss": 1.1083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2764943838119507,
|
|
"step": 674,
|
|
"valid_targets_mean": 16078.8,
|
|
"valid_targets_min": 13788
|
|
},
|
|
{
|
|
"epoch": 2.8742004264392325,
|
|
"grad_norm": 0.22245905457862583,
|
|
"learning_rate": 1.836712135412424e-05,
|
|
"loss": 1.0315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29476335644721985,
|
|
"step": 675,
|
|
"valid_targets_mean": 16158.5,
|
|
"valid_targets_min": 14904
|
|
},
|
|
{
|
|
"epoch": 2.878464818763326,
|
|
"grad_norm": 0.2527922980176215,
|
|
"learning_rate": 1.8307883532736878e-05,
|
|
"loss": 1.0229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23116479814052582,
|
|
"step": 676,
|
|
"valid_targets_mean": 14858.4,
|
|
"valid_targets_min": 12939
|
|
},
|
|
{
|
|
"epoch": 2.88272921108742,
|
|
"grad_norm": 0.22475933770657133,
|
|
"learning_rate": 1.8248660659232964e-05,
|
|
"loss": 1.0149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29866713285446167,
|
|
"step": 677,
|
|
"valid_targets_mean": 16104.2,
|
|
"valid_targets_min": 15187
|
|
},
|
|
{
|
|
"epoch": 2.886993603411514,
|
|
"grad_norm": 0.231273821884454,
|
|
"learning_rate": 1.8189453256777798e-05,
|
|
"loss": 1.0114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25470566749572754,
|
|
"step": 678,
|
|
"valid_targets_mean": 13330.7,
|
|
"valid_targets_min": 1100
|
|
},
|
|
{
|
|
"epoch": 2.8912579957356077,
|
|
"grad_norm": 0.23910386595968475,
|
|
"learning_rate": 1.8130261848399996e-05,
|
|
"loss": 1.0467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2502293288707733,
|
|
"step": 679,
|
|
"valid_targets_mean": 15772.3,
|
|
"valid_targets_min": 14001
|
|
},
|
|
{
|
|
"epoch": 2.8955223880597014,
|
|
"grad_norm": 0.22121884183094487,
|
|
"learning_rate": 1.8071086956986916e-05,
|
|
"loss": 1.0073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3032669723033905,
|
|
"step": 680,
|
|
"valid_targets_mean": 16085.4,
|
|
"valid_targets_min": 14479
|
|
},
|
|
{
|
|
"epoch": 2.8997867803837956,
|
|
"grad_norm": 0.24253258088598406,
|
|
"learning_rate": 1.8011929105279967e-05,
|
|
"loss": 1.041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15033088624477386,
|
|
"step": 681,
|
|
"valid_targets_mean": 8402.1,
|
|
"valid_targets_min": 1075
|
|
},
|
|
{
|
|
"epoch": 2.9040511727078893,
|
|
"grad_norm": 0.20793563860463674,
|
|
"learning_rate": 1.795278881587007e-05,
|
|
"loss": 1.0471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2602580785751343,
|
|
"step": 682,
|
|
"valid_targets_mean": 16153.5,
|
|
"valid_targets_min": 15636
|
|
},
|
|
{
|
|
"epoch": 2.908315565031983,
|
|
"grad_norm": 0.21533915056637562,
|
|
"learning_rate": 1.7893666611192962e-05,
|
|
"loss": 1.0346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2978869080543518,
|
|
"step": 683,
|
|
"valid_targets_mean": 16154.4,
|
|
"valid_targets_min": 15505
|
|
},
|
|
{
|
|
"epoch": 2.9125799573560767,
|
|
"grad_norm": 0.20511026387766632,
|
|
"learning_rate": 1.783456301352467e-05,
|
|
"loss": 1.0781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18348674476146698,
|
|
"step": 684,
|
|
"valid_targets_mean": 10106.0,
|
|
"valid_targets_min": 4326
|
|
},
|
|
{
|
|
"epoch": 2.9168443496801704,
|
|
"grad_norm": 0.21034257392870168,
|
|
"learning_rate": 1.7775478544976813e-05,
|
|
"loss": 1.0677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2854713201522827,
|
|
"step": 685,
|
|
"valid_targets_mean": 16062.8,
|
|
"valid_targets_min": 14365
|
|
},
|
|
{
|
|
"epoch": 2.9211087420042645,
|
|
"grad_norm": 0.21266707523484377,
|
|
"learning_rate": 1.7716413727492035e-05,
|
|
"loss": 1.0752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32130110263824463,
|
|
"step": 686,
|
|
"valid_targets_mean": 16094.8,
|
|
"valid_targets_min": 14920
|
|
},
|
|
{
|
|
"epoch": 2.925373134328358,
|
|
"grad_norm": 0.21812191169716444,
|
|
"learning_rate": 1.7657369082839392e-05,
|
|
"loss": 1.0722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23229658603668213,
|
|
"step": 687,
|
|
"valid_targets_mean": 13003.5,
|
|
"valid_targets_min": 10625
|
|
},
|
|
{
|
|
"epoch": 2.929637526652452,
|
|
"grad_norm": 0.2162686524716888,
|
|
"learning_rate": 1.7598345132609747e-05,
|
|
"loss": 1.0324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28563833236694336,
|
|
"step": 688,
|
|
"valid_targets_mean": 16220.6,
|
|
"valid_targets_min": 15582
|
|
},
|
|
{
|
|
"epoch": 2.933901918976546,
|
|
"grad_norm": 0.22879288653799318,
|
|
"learning_rate": 1.7539342398211132e-05,
|
|
"loss": 1.0593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32399559020996094,
|
|
"step": 689,
|
|
"valid_targets_mean": 16127.6,
|
|
"valid_targets_min": 15595
|
|
},
|
|
{
|
|
"epoch": 2.9381663113006398,
|
|
"grad_norm": 0.2302506984015126,
|
|
"learning_rate": 1.748036140086416e-05,
|
|
"loss": 1.0494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.260400652885437,
|
|
"step": 690,
|
|
"valid_targets_mean": 15390.4,
|
|
"valid_targets_min": 14171
|
|
},
|
|
{
|
|
"epoch": 2.9424307036247335,
|
|
"grad_norm": 0.21850318614421996,
|
|
"learning_rate": 1.742140266159744e-05,
|
|
"loss": 1.0286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2908626198768616,
|
|
"step": 691,
|
|
"valid_targets_mean": 16097.5,
|
|
"valid_targets_min": 15042
|
|
},
|
|
{
|
|
"epoch": 2.946695095948827,
|
|
"grad_norm": 0.2298053216214348,
|
|
"learning_rate": 1.7362466701242943e-05,
|
|
"loss": 1.0351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2131451517343521,
|
|
"step": 692,
|
|
"valid_targets_mean": 11418.3,
|
|
"valid_targets_min": 2707
|
|
},
|
|
{
|
|
"epoch": 2.950959488272921,
|
|
"grad_norm": 0.2133637637780579,
|
|
"learning_rate": 1.7303554040431426e-05,
|
|
"loss": 1.0485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2517058253288269,
|
|
"step": 693,
|
|
"valid_targets_mean": 16096.1,
|
|
"valid_targets_min": 13929
|
|
},
|
|
{
|
|
"epoch": 2.955223880597015,
|
|
"grad_norm": 0.2485989853614221,
|
|
"learning_rate": 1.7244665199587812e-05,
|
|
"loss": 1.0689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2812129557132721,
|
|
"step": 694,
|
|
"valid_targets_mean": 16215.6,
|
|
"valid_targets_min": 15674
|
|
},
|
|
{
|
|
"epoch": 2.9594882729211087,
|
|
"grad_norm": 0.21752076241663548,
|
|
"learning_rate": 1.7185800698926594e-05,
|
|
"loss": 1.0718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16977828741073608,
|
|
"step": 695,
|
|
"valid_targets_mean": 9167.3,
|
|
"valid_targets_min": 2370
|
|
},
|
|
{
|
|
"epoch": 2.9637526652452024,
|
|
"grad_norm": 0.20427771557408353,
|
|
"learning_rate": 1.7126961058447276e-05,
|
|
"loss": 1.0792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27761727571487427,
|
|
"step": 696,
|
|
"valid_targets_mean": 16013.3,
|
|
"valid_targets_min": 14420
|
|
},
|
|
{
|
|
"epoch": 2.9680170575692966,
|
|
"grad_norm": 0.23787771224554388,
|
|
"learning_rate": 1.706814679792973e-05,
|
|
"loss": 1.0605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27882838249206543,
|
|
"step": 697,
|
|
"valid_targets_mean": 16093.7,
|
|
"valid_targets_min": 13108
|
|
},
|
|
{
|
|
"epoch": 2.9722814498933903,
|
|
"grad_norm": 0.2233058393397929,
|
|
"learning_rate": 1.7009358436929632e-05,
|
|
"loss": 1.0598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2286585420370102,
|
|
"step": 698,
|
|
"valid_targets_mean": 12394.0,
|
|
"valid_targets_min": 9653
|
|
},
|
|
{
|
|
"epoch": 2.976545842217484,
|
|
"grad_norm": 0.23102733923169372,
|
|
"learning_rate": 1.6950596494773855e-05,
|
|
"loss": 1.091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30752837657928467,
|
|
"step": 699,
|
|
"valid_targets_mean": 16081.6,
|
|
"valid_targets_min": 14632
|
|
},
|
|
{
|
|
"epoch": 2.9808102345415777,
|
|
"grad_norm": 0.21524273504458513,
|
|
"learning_rate": 1.6891861490555906e-05,
|
|
"loss": 0.9983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2923296093940735,
|
|
"step": 700,
|
|
"valid_targets_mean": 16114.5,
|
|
"valid_targets_min": 15221
|
|
},
|
|
{
|
|
"epoch": 2.9850746268656714,
|
|
"grad_norm": 0.21635269462461057,
|
|
"learning_rate": 1.683315394313132e-05,
|
|
"loss": 1.0638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24893790483474731,
|
|
"step": 701,
|
|
"valid_targets_mean": 14577.7,
|
|
"valid_targets_min": 12869
|
|
},
|
|
{
|
|
"epoch": 2.9893390191897655,
|
|
"grad_norm": 0.23777660105141393,
|
|
"learning_rate": 1.677447437111309e-05,
|
|
"loss": 1.0082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2949479818344116,
|
|
"step": 702,
|
|
"valid_targets_mean": 16146.8,
|
|
"valid_targets_min": 15427
|
|
},
|
|
{
|
|
"epoch": 2.9936034115138592,
|
|
"grad_norm": 0.2313537698655914,
|
|
"learning_rate": 1.671582329286707e-05,
|
|
"loss": 1.0479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23207740485668182,
|
|
"step": 703,
|
|
"valid_targets_mean": 12671.8,
|
|
"valid_targets_min": 1334
|
|
},
|
|
{
|
|
"epoch": 2.997867803837953,
|
|
"grad_norm": 0.2370830165882117,
|
|
"learning_rate": 1.66572012265074e-05,
|
|
"loss": 1.0403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2588110566139221,
|
|
"step": 704,
|
|
"valid_targets_mean": 15686.2,
|
|
"valid_targets_min": 14947
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 0.2916023608705293,
|
|
"learning_rate": 1.6598608689891953e-05,
|
|
"loss": 1.003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47874170541763306,
|
|
"step": 705,
|
|
"valid_targets_mean": 11341.6,
|
|
"valid_targets_min": 3066
|
|
},
|
|
{
|
|
"epoch": 3.0042643923240937,
|
|
"grad_norm": 0.22546620007126184,
|
|
"learning_rate": 1.654004620061773e-05,
|
|
"loss": 1.0221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2473573088645935,
|
|
"step": 706,
|
|
"valid_targets_mean": 16072.4,
|
|
"valid_targets_min": 14698
|
|
},
|
|
{
|
|
"epoch": 3.008528784648188,
|
|
"grad_norm": 0.24435078478549777,
|
|
"learning_rate": 1.6481514276016297e-05,
|
|
"loss": 1.0196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26537612080574036,
|
|
"step": 707,
|
|
"valid_targets_mean": 16044.9,
|
|
"valid_targets_min": 11563
|
|
},
|
|
{
|
|
"epoch": 3.0127931769722816,
|
|
"grad_norm": 0.21816044935934423,
|
|
"learning_rate": 1.6423013433149207e-05,
|
|
"loss": 1.0045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21564412117004395,
|
|
"step": 708,
|
|
"valid_targets_mean": 13379.7,
|
|
"valid_targets_min": 10328
|
|
},
|
|
{
|
|
"epoch": 3.0170575692963753,
|
|
"grad_norm": 0.23232216206932588,
|
|
"learning_rate": 1.636454418880347e-05,
|
|
"loss": 1.0164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2604576647281647,
|
|
"step": 709,
|
|
"valid_targets_mean": 16162.3,
|
|
"valid_targets_min": 15122
|
|
},
|
|
{
|
|
"epoch": 3.021321961620469,
|
|
"grad_norm": 0.2340541867037144,
|
|
"learning_rate": 1.630610705948693e-05,
|
|
"loss": 1.0107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2890893816947937,
|
|
"step": 710,
|
|
"valid_targets_mean": 16157.8,
|
|
"valid_targets_min": 15578
|
|
},
|
|
{
|
|
"epoch": 3.025586353944563,
|
|
"grad_norm": 0.22984553170284047,
|
|
"learning_rate": 1.6247702561423753e-05,
|
|
"loss": 1.0195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2258562445640564,
|
|
"step": 711,
|
|
"valid_targets_mean": 13078.1,
|
|
"valid_targets_min": 11469
|
|
},
|
|
{
|
|
"epoch": 3.029850746268657,
|
|
"grad_norm": 0.22706069554415406,
|
|
"learning_rate": 1.6189331210549828e-05,
|
|
"loss": 1.0563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3094644248485565,
|
|
"step": 712,
|
|
"valid_targets_mean": 16113.1,
|
|
"valid_targets_min": 15270
|
|
},
|
|
{
|
|
"epoch": 3.0341151385927505,
|
|
"grad_norm": 0.2517273448178296,
|
|
"learning_rate": 1.613099352250825e-05,
|
|
"loss": 1.0505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2539500594139099,
|
|
"step": 713,
|
|
"valid_targets_mean": 13016.7,
|
|
"valid_targets_min": 1369
|
|
},
|
|
{
|
|
"epoch": 3.038379530916844,
|
|
"grad_norm": 0.24724247646042918,
|
|
"learning_rate": 1.6072690012644717e-05,
|
|
"loss": 1.0232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24730217456817627,
|
|
"step": 714,
|
|
"valid_targets_mean": 16104.4,
|
|
"valid_targets_min": 14551
|
|
},
|
|
{
|
|
"epoch": 3.0426439232409384,
|
|
"grad_norm": 0.22341197798882587,
|
|
"learning_rate": 1.6014421196003022e-05,
|
|
"loss": 0.9959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27211296558380127,
|
|
"step": 715,
|
|
"valid_targets_mean": 16228.6,
|
|
"valid_targets_min": 15711
|
|
},
|
|
{
|
|
"epoch": 3.046908315565032,
|
|
"grad_norm": 0.2372792533087285,
|
|
"learning_rate": 1.5956187587320468e-05,
|
|
"loss": 1.0163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1803392767906189,
|
|
"step": 716,
|
|
"valid_targets_mean": 10309.6,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 3.0511727078891258,
|
|
"grad_norm": 0.2246449853128541,
|
|
"learning_rate": 1.5897989701023355e-05,
|
|
"loss": 1.0308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26668739318847656,
|
|
"step": 717,
|
|
"valid_targets_mean": 16109.7,
|
|
"valid_targets_min": 14769
|
|
},
|
|
{
|
|
"epoch": 3.0554371002132195,
|
|
"grad_norm": 0.22388036653405088,
|
|
"learning_rate": 1.58398280512224e-05,
|
|
"loss": 1.0403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29415494203567505,
|
|
"step": 718,
|
|
"valid_targets_mean": 15781.9,
|
|
"valid_targets_min": 4778
|
|
},
|
|
{
|
|
"epoch": 3.0597014925373136,
|
|
"grad_norm": 0.2166445618932895,
|
|
"learning_rate": 1.5781703151708215e-05,
|
|
"loss": 1.0193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1880883276462555,
|
|
"step": 719,
|
|
"valid_targets_mean": 10639.8,
|
|
"valid_targets_min": 4800
|
|
},
|
|
{
|
|
"epoch": 3.0639658848614073,
|
|
"grad_norm": 0.2341552816551429,
|
|
"learning_rate": 1.5723615515946773e-05,
|
|
"loss": 1.0522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27435302734375,
|
|
"step": 720,
|
|
"valid_targets_mean": 16126.2,
|
|
"valid_targets_min": 15410
|
|
},
|
|
{
|
|
"epoch": 3.068230277185501,
|
|
"grad_norm": 0.22436101078864107,
|
|
"learning_rate": 1.5665565657074874e-05,
|
|
"loss": 1.0562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31322503089904785,
|
|
"step": 721,
|
|
"valid_targets_mean": 16086.5,
|
|
"valid_targets_min": 14307
|
|
},
|
|
{
|
|
"epoch": 3.0724946695095947,
|
|
"grad_norm": 0.2327080346340859,
|
|
"learning_rate": 1.560755408789558e-05,
|
|
"loss": 1.0112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22447748482227325,
|
|
"step": 722,
|
|
"valid_targets_mean": 14448.2,
|
|
"valid_targets_min": 12279
|
|
},
|
|
{
|
|
"epoch": 3.076759061833689,
|
|
"grad_norm": 0.21443885865740964,
|
|
"learning_rate": 1.5549581320873715e-05,
|
|
"loss": 1.0388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2837938070297241,
|
|
"step": 723,
|
|
"valid_targets_mean": 16100.5,
|
|
"valid_targets_min": 14460
|
|
},
|
|
{
|
|
"epoch": 3.0810234541577826,
|
|
"grad_norm": 0.22515220337257996,
|
|
"learning_rate": 1.5491647868131343e-05,
|
|
"loss": 1.0702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28435924649238586,
|
|
"step": 724,
|
|
"valid_targets_mean": 16215.7,
|
|
"valid_targets_min": 15372
|
|
},
|
|
{
|
|
"epoch": 3.0852878464818763,
|
|
"grad_norm": 0.217034946809235,
|
|
"learning_rate": 1.5433754241443223e-05,
|
|
"loss": 1.0092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23863840103149414,
|
|
"step": 725,
|
|
"valid_targets_mean": 15597.2,
|
|
"valid_targets_min": 13274
|
|
},
|
|
{
|
|
"epoch": 3.08955223880597,
|
|
"grad_norm": 0.23567098500537823,
|
|
"learning_rate": 1.53759009522323e-05,
|
|
"loss": 1.0144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2680036425590515,
|
|
"step": 726,
|
|
"valid_targets_mean": 16214.7,
|
|
"valid_targets_min": 15574
|
|
},
|
|
{
|
|
"epoch": 3.093816631130064,
|
|
"grad_norm": 0.23150148218193378,
|
|
"learning_rate": 1.5318088511565185e-05,
|
|
"loss": 0.9811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20448531210422516,
|
|
"step": 727,
|
|
"valid_targets_mean": 11884.8,
|
|
"valid_targets_min": 1898
|
|
},
|
|
{
|
|
"epoch": 3.098081023454158,
|
|
"grad_norm": 0.24963689320764465,
|
|
"learning_rate": 1.5260317430147627e-05,
|
|
"loss": 0.9949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23797962069511414,
|
|
"step": 728,
|
|
"valid_targets_mean": 15739.9,
|
|
"valid_targets_min": 13414
|
|
},
|
|
{
|
|
"epoch": 3.1023454157782515,
|
|
"grad_norm": 0.23523132452561607,
|
|
"learning_rate": 1.5202588218320024e-05,
|
|
"loss": 1.0148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28550830483436584,
|
|
"step": 729,
|
|
"valid_targets_mean": 16185.2,
|
|
"valid_targets_min": 15222
|
|
},
|
|
{
|
|
"epoch": 3.106609808102345,
|
|
"grad_norm": 0.2300225672707401,
|
|
"learning_rate": 1.5144901386052924e-05,
|
|
"loss": 0.985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14239609241485596,
|
|
"step": 730,
|
|
"valid_targets_mean": 8392.7,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 3.1108742004264394,
|
|
"grad_norm": 0.22194726120343725,
|
|
"learning_rate": 1.5087257442942467e-05,
|
|
"loss": 1.0368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2581402063369751,
|
|
"step": 731,
|
|
"valid_targets_mean": 16164.5,
|
|
"valid_targets_min": 15163
|
|
},
|
|
{
|
|
"epoch": 3.115138592750533,
|
|
"grad_norm": 0.21659593022650356,
|
|
"learning_rate": 1.502965689820593e-05,
|
|
"loss": 1.0126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2931985855102539,
|
|
"step": 732,
|
|
"valid_targets_mean": 16121.2,
|
|
"valid_targets_min": 15579
|
|
},
|
|
{
|
|
"epoch": 3.1194029850746268,
|
|
"grad_norm": 0.1979214228996395,
|
|
"learning_rate": 1.4972100260677222e-05,
|
|
"loss": 1.0678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.203348308801651,
|
|
"step": 733,
|
|
"valid_targets_mean": 12018.5,
|
|
"valid_targets_min": 6953
|
|
},
|
|
{
|
|
"epoch": 3.1236673773987205,
|
|
"grad_norm": 0.2141897771266866,
|
|
"learning_rate": 1.4914588038802383e-05,
|
|
"loss": 1.0256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2846953868865967,
|
|
"step": 734,
|
|
"valid_targets_mean": 16149.8,
|
|
"valid_targets_min": 15222
|
|
},
|
|
{
|
|
"epoch": 3.1279317697228146,
|
|
"grad_norm": 0.22134730992124976,
|
|
"learning_rate": 1.4857120740635084e-05,
|
|
"loss": 1.0357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.308135986328125,
|
|
"step": 735,
|
|
"valid_targets_mean": 16092.7,
|
|
"valid_targets_min": 14058
|
|
},
|
|
{
|
|
"epoch": 3.1321961620469083,
|
|
"grad_norm": 0.21199663364784577,
|
|
"learning_rate": 1.4799698873832153e-05,
|
|
"loss": 1.0532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25040268898010254,
|
|
"step": 736,
|
|
"valid_targets_mean": 14434.2,
|
|
"valid_targets_min": 12077
|
|
},
|
|
{
|
|
"epoch": 3.136460554371002,
|
|
"grad_norm": 0.22408687687739112,
|
|
"learning_rate": 1.4742322945649073e-05,
|
|
"loss": 0.9979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28990089893341064,
|
|
"step": 737,
|
|
"valid_targets_mean": 16217.7,
|
|
"valid_targets_min": 15808
|
|
},
|
|
{
|
|
"epoch": 3.140724946695096,
|
|
"grad_norm": 0.22090528353836641,
|
|
"learning_rate": 1.4684993462935532e-05,
|
|
"loss": 0.9881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2285807728767395,
|
|
"step": 738,
|
|
"valid_targets_mean": 13375.1,
|
|
"valid_targets_min": 1759
|
|
},
|
|
{
|
|
"epoch": 3.14498933901919,
|
|
"grad_norm": 0.21227037281282393,
|
|
"learning_rate": 1.462771093213092e-05,
|
|
"loss": 1.0113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2540406584739685,
|
|
"step": 739,
|
|
"valid_targets_mean": 15798.1,
|
|
"valid_targets_min": 13840
|
|
},
|
|
{
|
|
"epoch": 3.1492537313432836,
|
|
"grad_norm": 0.22645562879511819,
|
|
"learning_rate": 1.4570475859259856e-05,
|
|
"loss": 0.9978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28150904178619385,
|
|
"step": 740,
|
|
"valid_targets_mean": 16178.0,
|
|
"valid_targets_min": 15576
|
|
},
|
|
{
|
|
"epoch": 3.1535181236673773,
|
|
"grad_norm": 0.2553953055802415,
|
|
"learning_rate": 1.4513288749927714e-05,
|
|
"loss": 0.9978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1643623411655426,
|
|
"step": 741,
|
|
"valid_targets_mean": 9426.2,
|
|
"valid_targets_min": 2154
|
|
},
|
|
{
|
|
"epoch": 3.1577825159914714,
|
|
"grad_norm": 0.21553654472222183,
|
|
"learning_rate": 1.4456150109316192e-05,
|
|
"loss": 1.024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26102060079574585,
|
|
"step": 742,
|
|
"valid_targets_mean": 16006.3,
|
|
"valid_targets_min": 14473
|
|
},
|
|
{
|
|
"epoch": 3.162046908315565,
|
|
"grad_norm": 0.24417735632997145,
|
|
"learning_rate": 1.4399060442178798e-05,
|
|
"loss": 0.947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28816813230514526,
|
|
"step": 743,
|
|
"valid_targets_mean": 16178.9,
|
|
"valid_targets_min": 15476
|
|
},
|
|
{
|
|
"epoch": 3.166311300639659,
|
|
"grad_norm": 0.22991537644479015,
|
|
"learning_rate": 1.4342020252836437e-05,
|
|
"loss": 1.0389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19212383031845093,
|
|
"step": 744,
|
|
"valid_targets_mean": 10538.3,
|
|
"valid_targets_min": 6177
|
|
},
|
|
{
|
|
"epoch": 3.1705756929637525,
|
|
"grad_norm": 0.22349717871874022,
|
|
"learning_rate": 1.4285030045172913e-05,
|
|
"loss": 1.0197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26045793294906616,
|
|
"step": 745,
|
|
"valid_targets_mean": 16127.6,
|
|
"valid_targets_min": 14124
|
|
},
|
|
{
|
|
"epoch": 3.1748400852878467,
|
|
"grad_norm": 0.21543348222421577,
|
|
"learning_rate": 1.422809032263052e-05,
|
|
"loss": 1.0468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28180545568466187,
|
|
"step": 746,
|
|
"valid_targets_mean": 16165.9,
|
|
"valid_targets_min": 15366
|
|
},
|
|
{
|
|
"epoch": 3.1791044776119404,
|
|
"grad_norm": 0.20075036785144845,
|
|
"learning_rate": 1.4171201588205566e-05,
|
|
"loss": 1.0547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.244021475315094,
|
|
"step": 747,
|
|
"valid_targets_mean": 13604.9,
|
|
"valid_targets_min": 10519
|
|
},
|
|
{
|
|
"epoch": 3.183368869936034,
|
|
"grad_norm": 0.20359529763623488,
|
|
"learning_rate": 1.4114364344443935e-05,
|
|
"loss": 0.9755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26434558629989624,
|
|
"step": 748,
|
|
"valid_targets_mean": 16148.0,
|
|
"valid_targets_min": 13688
|
|
},
|
|
{
|
|
"epoch": 3.1876332622601278,
|
|
"grad_norm": 0.206317596580328,
|
|
"learning_rate": 1.4057579093436653e-05,
|
|
"loss": 1.019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2964480519294739,
|
|
"step": 749,
|
|
"valid_targets_mean": 16185.6,
|
|
"valid_targets_min": 15616
|
|
},
|
|
{
|
|
"epoch": 3.191897654584222,
|
|
"grad_norm": 0.19965426313624246,
|
|
"learning_rate": 1.400084633681546e-05,
|
|
"loss": 1.0369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25970005989074707,
|
|
"step": 750,
|
|
"valid_targets_mean": 15437.9,
|
|
"valid_targets_min": 13886
|
|
},
|
|
{
|
|
"epoch": 3.1961620469083156,
|
|
"grad_norm": 0.1931550450078641,
|
|
"learning_rate": 1.3944166575748355e-05,
|
|
"loss": 0.9745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27987536787986755,
|
|
"step": 751,
|
|
"valid_targets_mean": 16108.0,
|
|
"valid_targets_min": 15148
|
|
},
|
|
{
|
|
"epoch": 3.2004264392324093,
|
|
"grad_norm": 0.2285498671414749,
|
|
"learning_rate": 1.3887540310935187e-05,
|
|
"loss": 1.0267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19410404562950134,
|
|
"step": 752,
|
|
"valid_targets_mean": 10666.6,
|
|
"valid_targets_min": 1758
|
|
},
|
|
{
|
|
"epoch": 3.204690831556503,
|
|
"grad_norm": 0.1950207609526711,
|
|
"learning_rate": 1.3830968042603226e-05,
|
|
"loss": 1.0083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2649402618408203,
|
|
"step": 753,
|
|
"valid_targets_mean": 15740.0,
|
|
"valid_targets_min": 14420
|
|
},
|
|
{
|
|
"epoch": 3.208955223880597,
|
|
"grad_norm": 0.23761770896996695,
|
|
"learning_rate": 1.3774450270502762e-05,
|
|
"loss": 0.9949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31608280539512634,
|
|
"step": 754,
|
|
"valid_targets_mean": 16115.1,
|
|
"valid_targets_min": 14966
|
|
},
|
|
{
|
|
"epoch": 3.213219616204691,
|
|
"grad_norm": 0.22083848523624852,
|
|
"learning_rate": 1.3717987493902656e-05,
|
|
"loss": 1.0205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14785869419574738,
|
|
"step": 755,
|
|
"valid_targets_mean": 8256.4,
|
|
"valid_targets_min": 1725
|
|
},
|
|
{
|
|
"epoch": 3.2174840085287846,
|
|
"grad_norm": 0.2271481305402101,
|
|
"learning_rate": 1.3661580211585947e-05,
|
|
"loss": 1.0203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2505563497543335,
|
|
"step": 756,
|
|
"valid_targets_mean": 16184.0,
|
|
"valid_targets_min": 14698
|
|
},
|
|
{
|
|
"epoch": 3.2217484008528783,
|
|
"grad_norm": 0.23538681792469754,
|
|
"learning_rate": 1.3605228921845457e-05,
|
|
"loss": 1.0205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30578216910362244,
|
|
"step": 757,
|
|
"valid_targets_mean": 16165.3,
|
|
"valid_targets_min": 15239
|
|
},
|
|
{
|
|
"epoch": 3.2260127931769724,
|
|
"grad_norm": 0.23014469470104712,
|
|
"learning_rate": 1.3548934122479373e-05,
|
|
"loss": 1.0558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20352551341056824,
|
|
"step": 758,
|
|
"valid_targets_mean": 11775.9,
|
|
"valid_targets_min": 8991
|
|
},
|
|
{
|
|
"epoch": 3.230277185501066,
|
|
"grad_norm": 0.23398942351829313,
|
|
"learning_rate": 1.349269631078686e-05,
|
|
"loss": 1.0173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27417778968811035,
|
|
"step": 759,
|
|
"valid_targets_mean": 16214.9,
|
|
"valid_targets_min": 15357
|
|
},
|
|
{
|
|
"epoch": 3.23454157782516,
|
|
"grad_norm": 0.23487663215223234,
|
|
"learning_rate": 1.3436515983563659e-05,
|
|
"loss": 1.0685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3089166283607483,
|
|
"step": 760,
|
|
"valid_targets_mean": 16046.1,
|
|
"valid_targets_min": 15157
|
|
},
|
|
{
|
|
"epoch": 3.2388059701492535,
|
|
"grad_norm": 0.22707354630172036,
|
|
"learning_rate": 1.3380393637097692e-05,
|
|
"loss": 0.9868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24889841675758362,
|
|
"step": 761,
|
|
"valid_targets_mean": 14222.7,
|
|
"valid_targets_min": 12022
|
|
},
|
|
{
|
|
"epoch": 3.2430703624733477,
|
|
"grad_norm": 0.2250444800568611,
|
|
"learning_rate": 1.3324329767164708e-05,
|
|
"loss": 1.0181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2840118408203125,
|
|
"step": 762,
|
|
"valid_targets_mean": 16097.0,
|
|
"valid_targets_min": 13888
|
|
},
|
|
{
|
|
"epoch": 3.2473347547974414,
|
|
"grad_norm": 0.2180264015784957,
|
|
"learning_rate": 1.3268324869023878e-05,
|
|
"loss": 0.9988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2439872920513153,
|
|
"step": 763,
|
|
"valid_targets_mean": 13022.2,
|
|
"valid_targets_min": 1535
|
|
},
|
|
{
|
|
"epoch": 3.251599147121535,
|
|
"grad_norm": 0.20409803688058786,
|
|
"learning_rate": 1.3212379437413421e-05,
|
|
"loss": 1.0638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.266012966632843,
|
|
"step": 764,
|
|
"valid_targets_mean": 15528.1,
|
|
"valid_targets_min": 14452
|
|
},
|
|
{
|
|
"epoch": 3.2558635394456292,
|
|
"grad_norm": 0.2228089775488321,
|
|
"learning_rate": 1.3156493966546236e-05,
|
|
"loss": 1.0167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28855788707733154,
|
|
"step": 765,
|
|
"valid_targets_mean": 16141.7,
|
|
"valid_targets_min": 14987
|
|
},
|
|
{
|
|
"epoch": 3.260127931769723,
|
|
"grad_norm": 0.23085883706791083,
|
|
"learning_rate": 1.3100668950105534e-05,
|
|
"loss": 1.0809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19959942996501923,
|
|
"step": 766,
|
|
"valid_targets_mean": 10493.4,
|
|
"valid_targets_min": 2195
|
|
},
|
|
{
|
|
"epoch": 3.2643923240938166,
|
|
"grad_norm": 0.19998322552016268,
|
|
"learning_rate": 1.3044904881240507e-05,
|
|
"loss": 1.0014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2537648677825928,
|
|
"step": 767,
|
|
"valid_targets_mean": 16092.9,
|
|
"valid_targets_min": 15480
|
|
},
|
|
{
|
|
"epoch": 3.2686567164179103,
|
|
"grad_norm": 0.2288404706980349,
|
|
"learning_rate": 1.2989202252561926e-05,
|
|
"loss": 1.0208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3139684796333313,
|
|
"step": 768,
|
|
"valid_targets_mean": 16164.0,
|
|
"valid_targets_min": 15618
|
|
},
|
|
{
|
|
"epoch": 3.272921108742004,
|
|
"grad_norm": 0.23154431096127515,
|
|
"learning_rate": 1.2933561556137806e-05,
|
|
"loss": 0.9937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20685717463493347,
|
|
"step": 769,
|
|
"valid_targets_mean": 11801.2,
|
|
"valid_targets_min": 9058
|
|
},
|
|
{
|
|
"epoch": 3.277185501066098,
|
|
"grad_norm": 0.238643564108478,
|
|
"learning_rate": 1.2877983283489062e-05,
|
|
"loss": 1.0106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26336807012557983,
|
|
"step": 770,
|
|
"valid_targets_mean": 16097.4,
|
|
"valid_targets_min": 14732
|
|
},
|
|
{
|
|
"epoch": 3.281449893390192,
|
|
"grad_norm": 0.210463166838876,
|
|
"learning_rate": 1.2822467925585186e-05,
|
|
"loss": 1.0258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28999316692352295,
|
|
"step": 771,
|
|
"valid_targets_mean": 16153.3,
|
|
"valid_targets_min": 15217
|
|
},
|
|
{
|
|
"epoch": 3.2857142857142856,
|
|
"grad_norm": 0.21515324831546198,
|
|
"learning_rate": 1.2767015972839879e-05,
|
|
"loss": 0.9892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21576184034347534,
|
|
"step": 772,
|
|
"valid_targets_mean": 13491.8,
|
|
"valid_targets_min": 11383
|
|
},
|
|
{
|
|
"epoch": 3.2899786780383797,
|
|
"grad_norm": 0.19718742065687034,
|
|
"learning_rate": 1.2711627915106728e-05,
|
|
"loss": 1.0051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28501254320144653,
|
|
"step": 773,
|
|
"valid_targets_mean": 16005.6,
|
|
"valid_targets_min": 9286
|
|
},
|
|
{
|
|
"epoch": 3.2942430703624734,
|
|
"grad_norm": 0.20931056590488556,
|
|
"learning_rate": 1.2656304241674877e-05,
|
|
"loss": 0.9973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3030652105808258,
|
|
"step": 774,
|
|
"valid_targets_mean": 16154.3,
|
|
"valid_targets_min": 15623
|
|
},
|
|
{
|
|
"epoch": 3.298507462686567,
|
|
"grad_norm": 0.1864644163173028,
|
|
"learning_rate": 1.2601045441264734e-05,
|
|
"loss": 0.9799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24769797921180725,
|
|
"step": 775,
|
|
"valid_targets_mean": 15966.1,
|
|
"valid_targets_min": 14649
|
|
},
|
|
{
|
|
"epoch": 3.302771855010661,
|
|
"grad_norm": 0.21175345768619538,
|
|
"learning_rate": 1.2545852002023599e-05,
|
|
"loss": 0.9942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30079352855682373,
|
|
"step": 776,
|
|
"valid_targets_mean": 16077.0,
|
|
"valid_targets_min": 14795
|
|
},
|
|
{
|
|
"epoch": 3.307036247334755,
|
|
"grad_norm": 0.23951421924454977,
|
|
"learning_rate": 1.2490724411521406e-05,
|
|
"loss": 1.0185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20463250577449799,
|
|
"step": 777,
|
|
"valid_targets_mean": 11349.3,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 3.3113006396588487,
|
|
"grad_norm": 0.2266010059067405,
|
|
"learning_rate": 1.243566315674637e-05,
|
|
"loss": 1.0384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26843178272247314,
|
|
"step": 778,
|
|
"valid_targets_mean": 15895.2,
|
|
"valid_targets_min": 14853
|
|
},
|
|
{
|
|
"epoch": 3.3155650319829424,
|
|
"grad_norm": 0.21876189679359032,
|
|
"learning_rate": 1.238066872410073e-05,
|
|
"loss": 0.9662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2956579923629761,
|
|
"step": 779,
|
|
"valid_targets_mean": 16154.4,
|
|
"valid_targets_min": 15042
|
|
},
|
|
{
|
|
"epoch": 3.319829424307036,
|
|
"grad_norm": 0.224276681573692,
|
|
"learning_rate": 1.2325741599396418e-05,
|
|
"loss": 0.9855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13216564059257507,
|
|
"step": 780,
|
|
"valid_targets_mean": 7354.0,
|
|
"valid_targets_min": 2380
|
|
},
|
|
{
|
|
"epoch": 3.3240938166311302,
|
|
"grad_norm": 0.19662079347142808,
|
|
"learning_rate": 1.2270882267850765e-05,
|
|
"loss": 1.0673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2683402895927429,
|
|
"step": 781,
|
|
"valid_targets_mean": 16123.9,
|
|
"valid_targets_min": 15331
|
|
},
|
|
{
|
|
"epoch": 3.328358208955224,
|
|
"grad_norm": 0.20498062466960942,
|
|
"learning_rate": 1.2216091214082248e-05,
|
|
"loss": 1.0287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28701385855674744,
|
|
"step": 782,
|
|
"valid_targets_mean": 16087.6,
|
|
"valid_targets_min": 13835
|
|
},
|
|
{
|
|
"epoch": 3.3326226012793176,
|
|
"grad_norm": 0.22236633186009508,
|
|
"learning_rate": 1.2161368922106192e-05,
|
|
"loss": 1.0277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1911384016275406,
|
|
"step": 783,
|
|
"valid_targets_mean": 11460.7,
|
|
"valid_targets_min": 8049
|
|
},
|
|
{
|
|
"epoch": 3.3368869936034113,
|
|
"grad_norm": 0.20791046573398056,
|
|
"learning_rate": 1.2106715875330475e-05,
|
|
"loss": 1.05,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2731104791164398,
|
|
"step": 784,
|
|
"valid_targets_mean": 16209.4,
|
|
"valid_targets_min": 15350
|
|
},
|
|
{
|
|
"epoch": 3.3411513859275055,
|
|
"grad_norm": 0.21261925120846067,
|
|
"learning_rate": 1.2052132556551275e-05,
|
|
"loss": 1.0367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2992277145385742,
|
|
"step": 785,
|
|
"valid_targets_mean": 16167.2,
|
|
"valid_targets_min": 15730
|
|
},
|
|
{
|
|
"epoch": 3.345415778251599,
|
|
"grad_norm": 0.216522475968083,
|
|
"learning_rate": 1.1997619447948814e-05,
|
|
"loss": 1.0697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24031764268875122,
|
|
"step": 786,
|
|
"valid_targets_mean": 13789.7,
|
|
"valid_targets_min": 11708
|
|
},
|
|
{
|
|
"epoch": 3.349680170575693,
|
|
"grad_norm": 0.20962862955449657,
|
|
"learning_rate": 1.1943177031083094e-05,
|
|
"loss": 1.0053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2922288179397583,
|
|
"step": 787,
|
|
"valid_targets_mean": 16186.4,
|
|
"valid_targets_min": 14810
|
|
},
|
|
{
|
|
"epoch": 3.3539445628997866,
|
|
"grad_norm": 0.20784049608898483,
|
|
"learning_rate": 1.1888805786889621e-05,
|
|
"loss": 1.0411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26215359568595886,
|
|
"step": 788,
|
|
"valid_targets_mean": 13417.4,
|
|
"valid_targets_min": 3667
|
|
},
|
|
{
|
|
"epoch": 3.3582089552238807,
|
|
"grad_norm": 0.19438778955082836,
|
|
"learning_rate": 1.183450619567518e-05,
|
|
"loss": 1.0037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25706902146339417,
|
|
"step": 789,
|
|
"valid_targets_mean": 15748.0,
|
|
"valid_targets_min": 13975
|
|
},
|
|
{
|
|
"epoch": 3.3624733475479744,
|
|
"grad_norm": 0.18480593385926,
|
|
"learning_rate": 1.1780278737113581e-05,
|
|
"loss": 1.0402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2852016091346741,
|
|
"step": 790,
|
|
"valid_targets_mean": 15791.8,
|
|
"valid_targets_min": 5994
|
|
},
|
|
{
|
|
"epoch": 3.366737739872068,
|
|
"grad_norm": 0.19045874412467398,
|
|
"learning_rate": 1.1726123890241439e-05,
|
|
"loss": 0.9975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1702101230621338,
|
|
"step": 791,
|
|
"valid_targets_mean": 9803.9,
|
|
"valid_targets_min": 1575
|
|
},
|
|
{
|
|
"epoch": 3.3710021321961623,
|
|
"grad_norm": 0.18142250180118896,
|
|
"learning_rate": 1.1672042133453925e-05,
|
|
"loss": 1.0138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24327364563941956,
|
|
"step": 792,
|
|
"valid_targets_mean": 16135.9,
|
|
"valid_targets_min": 15270
|
|
},
|
|
{
|
|
"epoch": 3.375266524520256,
|
|
"grad_norm": 0.18976083835973934,
|
|
"learning_rate": 1.1618033944500527e-05,
|
|
"loss": 0.9981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3210751414299011,
|
|
"step": 793,
|
|
"valid_targets_mean": 16139.2,
|
|
"valid_targets_min": 15361
|
|
},
|
|
{
|
|
"epoch": 3.3795309168443497,
|
|
"grad_norm": 0.19818634585506895,
|
|
"learning_rate": 1.1564099800480864e-05,
|
|
"loss": 1.048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19776135683059692,
|
|
"step": 794,
|
|
"valid_targets_mean": 11481.5,
|
|
"valid_targets_min": 6068
|
|
},
|
|
{
|
|
"epoch": 3.3837953091684434,
|
|
"grad_norm": 0.19760424713484523,
|
|
"learning_rate": 1.151024017784045e-05,
|
|
"loss": 1.0054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24625568091869354,
|
|
"step": 795,
|
|
"valid_targets_mean": 16202.7,
|
|
"valid_targets_min": 15135
|
|
},
|
|
{
|
|
"epoch": 3.388059701492537,
|
|
"grad_norm": 0.21328717602610783,
|
|
"learning_rate": 1.1456455552366488e-05,
|
|
"loss": 1.043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3155757486820221,
|
|
"step": 796,
|
|
"valid_targets_mean": 16132.7,
|
|
"valid_targets_min": 14783
|
|
},
|
|
{
|
|
"epoch": 3.3923240938166312,
|
|
"grad_norm": 0.18428037676198075,
|
|
"learning_rate": 1.1402746399183671e-05,
|
|
"loss": 1.0136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24852502346038818,
|
|
"step": 797,
|
|
"valid_targets_mean": 13810.4,
|
|
"valid_targets_min": 11194
|
|
},
|
|
{
|
|
"epoch": 3.396588486140725,
|
|
"grad_norm": 0.19469315419306438,
|
|
"learning_rate": 1.1349113192749986e-05,
|
|
"loss": 1.0325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27724477648735046,
|
|
"step": 798,
|
|
"valid_targets_mean": 16136.4,
|
|
"valid_targets_min": 13927
|
|
},
|
|
{
|
|
"epoch": 3.4008528784648187,
|
|
"grad_norm": 0.19335058306425573,
|
|
"learning_rate": 1.1295556406852488e-05,
|
|
"loss": 1.0257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2919307351112366,
|
|
"step": 799,
|
|
"valid_targets_mean": 16118.9,
|
|
"valid_targets_min": 15147
|
|
},
|
|
{
|
|
"epoch": 3.405117270788913,
|
|
"grad_norm": 0.17514958145184512,
|
|
"learning_rate": 1.1242076514603201e-05,
|
|
"loss": 1.0816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25642216205596924,
|
|
"step": 800,
|
|
"valid_targets_mean": 14175.8,
|
|
"valid_targets_min": 12875
|
|
},
|
|
{
|
|
"epoch": 3.4093816631130065,
|
|
"grad_norm": 0.20228613416602578,
|
|
"learning_rate": 1.1188673988434831e-05,
|
|
"loss": 1.0399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3179895281791687,
|
|
"step": 801,
|
|
"valid_targets_mean": 16137.8,
|
|
"valid_targets_min": 15389
|
|
},
|
|
{
|
|
"epoch": 3.4136460554371,
|
|
"grad_norm": 0.18851992182694163,
|
|
"learning_rate": 1.1135349300096667e-05,
|
|
"loss": 1.0053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2002144306898117,
|
|
"step": 802,
|
|
"valid_targets_mean": 10829.2,
|
|
"valid_targets_min": 1238
|
|
},
|
|
{
|
|
"epoch": 3.417910447761194,
|
|
"grad_norm": 0.19124496378036196,
|
|
"learning_rate": 1.1082102920650397e-05,
|
|
"loss": 1.0102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2536805272102356,
|
|
"step": 803,
|
|
"valid_targets_mean": 16158.3,
|
|
"valid_targets_min": 15109
|
|
},
|
|
{
|
|
"epoch": 3.4221748400852876,
|
|
"grad_norm": 0.22587700464345187,
|
|
"learning_rate": 1.102893532046593e-05,
|
|
"loss": 1.0389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2835776209831238,
|
|
"step": 804,
|
|
"valid_targets_mean": 16059.1,
|
|
"valid_targets_min": 14384
|
|
},
|
|
{
|
|
"epoch": 3.4264392324093818,
|
|
"grad_norm": 0.19199882848957545,
|
|
"learning_rate": 1.0975846969217258e-05,
|
|
"loss": 1.0246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1586105227470398,
|
|
"step": 805,
|
|
"valid_targets_mean": 8455.3,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 3.4307036247334755,
|
|
"grad_norm": 0.19052360147803976,
|
|
"learning_rate": 1.092283833587829e-05,
|
|
"loss": 1.0397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25331923365592957,
|
|
"step": 806,
|
|
"valid_targets_mean": 16085.5,
|
|
"valid_targets_min": 14929
|
|
},
|
|
{
|
|
"epoch": 3.434968017057569,
|
|
"grad_norm": 0.2022605935609196,
|
|
"learning_rate": 1.086990988871873e-05,
|
|
"loss": 1.0609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29131874442100525,
|
|
"step": 807,
|
|
"valid_targets_mean": 16047.4,
|
|
"valid_targets_min": 13080
|
|
},
|
|
{
|
|
"epoch": 3.4392324093816633,
|
|
"grad_norm": 0.1901835998943412,
|
|
"learning_rate": 1.0817062095299929e-05,
|
|
"loss": 1.0441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18520936369895935,
|
|
"step": 808,
|
|
"valid_targets_mean": 11042.4,
|
|
"valid_targets_min": 7440
|
|
},
|
|
{
|
|
"epoch": 3.443496801705757,
|
|
"grad_norm": 0.20070764647087036,
|
|
"learning_rate": 1.0764295422470755e-05,
|
|
"loss": 1.0013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2895042896270752,
|
|
"step": 809,
|
|
"valid_targets_mean": 16087.7,
|
|
"valid_targets_min": 13211
|
|
},
|
|
{
|
|
"epoch": 3.4477611940298507,
|
|
"grad_norm": 0.1878010136021189,
|
|
"learning_rate": 1.0711610336363477e-05,
|
|
"loss": 1.0257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30274391174316406,
|
|
"step": 810,
|
|
"valid_targets_mean": 16093.6,
|
|
"valid_targets_min": 14556
|
|
},
|
|
{
|
|
"epoch": 3.4520255863539444,
|
|
"grad_norm": 0.17667053615946723,
|
|
"learning_rate": 1.065900730238961e-05,
|
|
"loss": 1.0249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23889347910881042,
|
|
"step": 811,
|
|
"valid_targets_mean": 14490.8,
|
|
"valid_targets_min": 12326
|
|
},
|
|
{
|
|
"epoch": 3.4562899786780386,
|
|
"grad_norm": 0.18797859932961541,
|
|
"learning_rate": 1.0606486785235879e-05,
|
|
"loss": 0.9799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28064554929733276,
|
|
"step": 812,
|
|
"valid_targets_mean": 16143.8,
|
|
"valid_targets_min": 14907
|
|
},
|
|
{
|
|
"epoch": 3.4605543710021323,
|
|
"grad_norm": 0.1885835078230724,
|
|
"learning_rate": 1.0554049248860045e-05,
|
|
"loss": 1.0568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25304943323135376,
|
|
"step": 813,
|
|
"valid_targets_mean": 12887.4,
|
|
"valid_targets_min": 1961
|
|
},
|
|
{
|
|
"epoch": 3.464818763326226,
|
|
"grad_norm": 0.19425937698400536,
|
|
"learning_rate": 1.0501695156486819e-05,
|
|
"loss": 1.0567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2560194432735443,
|
|
"step": 814,
|
|
"valid_targets_mean": 15759.6,
|
|
"valid_targets_min": 11287
|
|
},
|
|
{
|
|
"epoch": 3.4690831556503197,
|
|
"grad_norm": 0.2009073306626454,
|
|
"learning_rate": 1.0449424970603796e-05,
|
|
"loss": 0.9965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.265529990196228,
|
|
"step": 815,
|
|
"valid_targets_mean": 16230.9,
|
|
"valid_targets_min": 15773
|
|
},
|
|
{
|
|
"epoch": 3.473347547974414,
|
|
"grad_norm": 0.19160044921213984,
|
|
"learning_rate": 1.0397239152957356e-05,
|
|
"loss": 1.0171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16818805038928986,
|
|
"step": 816,
|
|
"valid_targets_mean": 9027.7,
|
|
"valid_targets_min": 1228
|
|
},
|
|
{
|
|
"epoch": 3.4776119402985075,
|
|
"grad_norm": 0.20133406216973854,
|
|
"learning_rate": 1.034513816454858e-05,
|
|
"loss": 1.0378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25717800855636597,
|
|
"step": 817,
|
|
"valid_targets_mean": 16111.5,
|
|
"valid_targets_min": 15042
|
|
},
|
|
{
|
|
"epoch": 3.481876332622601,
|
|
"grad_norm": 0.20058057382990999,
|
|
"learning_rate": 1.0293122465629186e-05,
|
|
"loss": 1.0049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28611189126968384,
|
|
"step": 818,
|
|
"valid_targets_mean": 16084.9,
|
|
"valid_targets_min": 13788
|
|
},
|
|
{
|
|
"epoch": 3.486140724946695,
|
|
"grad_norm": 0.21471526294953153,
|
|
"learning_rate": 1.0241192515697432e-05,
|
|
"loss": 1.011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20527414977550507,
|
|
"step": 819,
|
|
"valid_targets_mean": 12038.9,
|
|
"valid_targets_min": 7752
|
|
},
|
|
{
|
|
"epoch": 3.490405117270789,
|
|
"grad_norm": 0.18698327284105212,
|
|
"learning_rate": 1.0189348773494135e-05,
|
|
"loss": 1.024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27734407782554626,
|
|
"step": 820,
|
|
"valid_targets_mean": 16122.8,
|
|
"valid_targets_min": 15219
|
|
},
|
|
{
|
|
"epoch": 3.4946695095948828,
|
|
"grad_norm": 0.1936151742736665,
|
|
"learning_rate": 1.0137591696998514e-05,
|
|
"loss": 1.0267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3137998580932617,
|
|
"step": 821,
|
|
"valid_targets_mean": 16039.4,
|
|
"valid_targets_min": 15167
|
|
},
|
|
{
|
|
"epoch": 3.4989339019189765,
|
|
"grad_norm": 0.23242629979557722,
|
|
"learning_rate": 1.0085921743424225e-05,
|
|
"loss": 1.0271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22472453117370605,
|
|
"step": 822,
|
|
"valid_targets_mean": 13571.1,
|
|
"valid_targets_min": 11707
|
|
},
|
|
{
|
|
"epoch": 3.50319829424307,
|
|
"grad_norm": 0.18107341012796876,
|
|
"learning_rate": 1.0034339369215288e-05,
|
|
"loss": 1.0324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28194552659988403,
|
|
"step": 823,
|
|
"valid_targets_mean": 16043.7,
|
|
"valid_targets_min": 13934
|
|
},
|
|
{
|
|
"epoch": 3.5074626865671643,
|
|
"grad_norm": 0.19598860481753452,
|
|
"learning_rate": 9.982845030042068e-06,
|
|
"loss": 1.0656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33745408058166504,
|
|
"step": 824,
|
|
"valid_targets_mean": 16131.1,
|
|
"valid_targets_min": 15258
|
|
},
|
|
{
|
|
"epoch": 3.511727078891258,
|
|
"grad_norm": 0.19947657391724366,
|
|
"learning_rate": 9.931439180797237e-06,
|
|
"loss": 1.0218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22845852375030518,
|
|
"step": 825,
|
|
"valid_targets_mean": 14083.7,
|
|
"valid_targets_min": 12058
|
|
},
|
|
{
|
|
"epoch": 3.5159914712153517,
|
|
"grad_norm": 0.19144720106478683,
|
|
"learning_rate": 9.880122275591752e-06,
|
|
"loss": 1.0479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2947565019130707,
|
|
"step": 826,
|
|
"valid_targets_mean": 16235.3,
|
|
"valid_targets_min": 15869
|
|
},
|
|
{
|
|
"epoch": 3.520255863539446,
|
|
"grad_norm": 0.2078277408605803,
|
|
"learning_rate": 9.828894767750865e-06,
|
|
"loss": 1.0607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20672515034675598,
|
|
"step": 827,
|
|
"valid_targets_mean": 10532.3,
|
|
"valid_targets_min": 793
|
|
},
|
|
{
|
|
"epoch": 3.5245202558635396,
|
|
"grad_norm": 0.19751630348586982,
|
|
"learning_rate": 9.777757109810102e-06,
|
|
"loss": 1.0312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2553499937057495,
|
|
"step": 828,
|
|
"valid_targets_mean": 16077.7,
|
|
"valid_targets_min": 15153
|
|
},
|
|
{
|
|
"epoch": 3.5287846481876333,
|
|
"grad_norm": 0.19658509693876208,
|
|
"learning_rate": 9.726709753511275e-06,
|
|
"loss": 1.0715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29346132278442383,
|
|
"step": 829,
|
|
"valid_targets_mean": 16147.0,
|
|
"valid_targets_min": 15263
|
|
},
|
|
{
|
|
"epoch": 3.533049040511727,
|
|
"grad_norm": 0.20929693034454866,
|
|
"learning_rate": 9.675753149798474e-06,
|
|
"loss": 1.0262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1624056100845337,
|
|
"step": 830,
|
|
"valid_targets_mean": 9455.6,
|
|
"valid_targets_min": 1075
|
|
},
|
|
{
|
|
"epoch": 3.5373134328358207,
|
|
"grad_norm": 0.18635446379853654,
|
|
"learning_rate": 9.624887748814118e-06,
|
|
"loss": 1.0308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24756374955177307,
|
|
"step": 831,
|
|
"valid_targets_mean": 16233.1,
|
|
"valid_targets_min": 15409
|
|
},
|
|
{
|
|
"epoch": 3.541577825159915,
|
|
"grad_norm": 0.21157734136180095,
|
|
"learning_rate": 9.574113999894909e-06,
|
|
"loss": 1.0297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.274283230304718,
|
|
"step": 832,
|
|
"valid_targets_mean": 16230.2,
|
|
"valid_targets_min": 14917
|
|
},
|
|
{
|
|
"epoch": 3.5458422174840085,
|
|
"grad_norm": 0.20909402716707864,
|
|
"learning_rate": 9.523432351567979e-06,
|
|
"loss": 1.0222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20221446454524994,
|
|
"step": 833,
|
|
"valid_targets_mean": 12193.7,
|
|
"valid_targets_min": 9042
|
|
},
|
|
{
|
|
"epoch": 3.550106609808102,
|
|
"grad_norm": 0.2126134326170871,
|
|
"learning_rate": 9.472843251546792e-06,
|
|
"loss": 1.0422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3187773823738098,
|
|
"step": 834,
|
|
"valid_targets_mean": 16074.3,
|
|
"valid_targets_min": 14705
|
|
},
|
|
{
|
|
"epoch": 3.5543710021321964,
|
|
"grad_norm": 0.2248429864051284,
|
|
"learning_rate": 9.422347146727294e-06,
|
|
"loss": 1.017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2925642430782318,
|
|
"step": 835,
|
|
"valid_targets_mean": 16097.2,
|
|
"valid_targets_min": 14702
|
|
},
|
|
{
|
|
"epoch": 3.55863539445629,
|
|
"grad_norm": 0.19764809114586646,
|
|
"learning_rate": 9.371944483183912e-06,
|
|
"loss": 1.0199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24568304419517517,
|
|
"step": 836,
|
|
"valid_targets_mean": 13546.5,
|
|
"valid_targets_min": 11766
|
|
},
|
|
{
|
|
"epoch": 3.5628997867803838,
|
|
"grad_norm": 0.19852269613513113,
|
|
"learning_rate": 9.321635706165635e-06,
|
|
"loss": 1.0502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2961001396179199,
|
|
"step": 837,
|
|
"valid_targets_mean": 16135.5,
|
|
"valid_targets_min": 15091
|
|
},
|
|
{
|
|
"epoch": 3.5671641791044775,
|
|
"grad_norm": 0.22539551639713004,
|
|
"learning_rate": 9.271421260092075e-06,
|
|
"loss": 1.0581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2696417272090912,
|
|
"step": 838,
|
|
"valid_targets_mean": 12912.3,
|
|
"valid_targets_min": 1400
|
|
},
|
|
{
|
|
"epoch": 3.571428571428571,
|
|
"grad_norm": 0.17369169122057854,
|
|
"learning_rate": 9.221301588549519e-06,
|
|
"loss": 1.0186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25950783491134644,
|
|
"step": 839,
|
|
"valid_targets_mean": 15765.0,
|
|
"valid_targets_min": 14646
|
|
},
|
|
{
|
|
"epoch": 3.5756929637526653,
|
|
"grad_norm": 0.19826394949704745,
|
|
"learning_rate": 9.171277134287057e-06,
|
|
"loss": 1.0423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28724998235702515,
|
|
"step": 840,
|
|
"valid_targets_mean": 16137.4,
|
|
"valid_targets_min": 14841
|
|
},
|
|
{
|
|
"epoch": 3.579957356076759,
|
|
"grad_norm": 0.22605116372463568,
|
|
"learning_rate": 9.121348339212634e-06,
|
|
"loss": 1.0514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19209089875221252,
|
|
"step": 841,
|
|
"valid_targets_mean": 11219.8,
|
|
"valid_targets_min": 3007
|
|
},
|
|
{
|
|
"epoch": 3.5842217484008527,
|
|
"grad_norm": 0.18985231179138834,
|
|
"learning_rate": 9.07151564438916e-06,
|
|
"loss": 1.0271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26109862327575684,
|
|
"step": 842,
|
|
"valid_targets_mean": 15881.8,
|
|
"valid_targets_min": 15016
|
|
},
|
|
{
|
|
"epoch": 3.588486140724947,
|
|
"grad_norm": 0.19525270465200253,
|
|
"learning_rate": 9.021779490030611e-06,
|
|
"loss": 1.0068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29757362604141235,
|
|
"step": 843,
|
|
"valid_targets_mean": 16168.9,
|
|
"valid_targets_min": 15515
|
|
},
|
|
{
|
|
"epoch": 3.5927505330490406,
|
|
"grad_norm": 0.20273161279088872,
|
|
"learning_rate": 8.972140315498119e-06,
|
|
"loss": 0.9891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1628081500530243,
|
|
"step": 844,
|
|
"valid_targets_mean": 9614.0,
|
|
"valid_targets_min": 3748
|
|
},
|
|
{
|
|
"epoch": 3.5970149253731343,
|
|
"grad_norm": 0.17829706313717747,
|
|
"learning_rate": 8.922598559296154e-06,
|
|
"loss": 0.9995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26611262559890747,
|
|
"step": 845,
|
|
"valid_targets_mean": 16162.2,
|
|
"valid_targets_min": 15506
|
|
},
|
|
{
|
|
"epoch": 3.6012793176972284,
|
|
"grad_norm": 0.21027782800390699,
|
|
"learning_rate": 8.873154659068582e-06,
|
|
"loss": 0.9777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27415943145751953,
|
|
"step": 846,
|
|
"valid_targets_mean": 16113.0,
|
|
"valid_targets_min": 14547
|
|
},
|
|
{
|
|
"epoch": 3.605543710021322,
|
|
"grad_norm": 0.1933481346322796,
|
|
"learning_rate": 8.823809051594816e-06,
|
|
"loss": 1.0405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2183433175086975,
|
|
"step": 847,
|
|
"valid_targets_mean": 13214.4,
|
|
"valid_targets_min": 11479
|
|
},
|
|
{
|
|
"epoch": 3.609808102345416,
|
|
"grad_norm": 0.1911863955607189,
|
|
"learning_rate": 8.774562172785988e-06,
|
|
"loss": 1.0198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27938979864120483,
|
|
"step": 848,
|
|
"valid_targets_mean": 16196.8,
|
|
"valid_targets_min": 15650
|
|
},
|
|
{
|
|
"epoch": 3.6140724946695095,
|
|
"grad_norm": 0.1996827380897634,
|
|
"learning_rate": 8.725414457681063e-06,
|
|
"loss": 1.0721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31652724742889404,
|
|
"step": 849,
|
|
"valid_targets_mean": 16141.3,
|
|
"valid_targets_min": 15560
|
|
},
|
|
{
|
|
"epoch": 3.6183368869936032,
|
|
"grad_norm": 0.18875838410836185,
|
|
"learning_rate": 8.676366340443017e-06,
|
|
"loss": 1.0259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23282839357852936,
|
|
"step": 850,
|
|
"valid_targets_mean": 15714.5,
|
|
"valid_targets_min": 13969
|
|
},
|
|
{
|
|
"epoch": 3.6226012793176974,
|
|
"grad_norm": 0.1922747408101709,
|
|
"learning_rate": 8.627418254355e-06,
|
|
"loss": 0.9953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2925228476524353,
|
|
"step": 851,
|
|
"valid_targets_mean": 16100.1,
|
|
"valid_targets_min": 14702
|
|
},
|
|
{
|
|
"epoch": 3.626865671641791,
|
|
"grad_norm": 0.1871926147470615,
|
|
"learning_rate": 8.578570631816474e-06,
|
|
"loss": 0.9884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1962984800338745,
|
|
"step": 852,
|
|
"valid_targets_mean": 10805.0,
|
|
"valid_targets_min": 1647
|
|
},
|
|
{
|
|
"epoch": 3.631130063965885,
|
|
"grad_norm": 0.18840256973106104,
|
|
"learning_rate": 8.529823904339472e-06,
|
|
"loss": 1.0003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26471132040023804,
|
|
"step": 853,
|
|
"valid_targets_mean": 16080.2,
|
|
"valid_targets_min": 14810
|
|
},
|
|
{
|
|
"epoch": 3.635394456289979,
|
|
"grad_norm": 0.19166828194360724,
|
|
"learning_rate": 8.481178502544684e-06,
|
|
"loss": 0.98,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2843315899372101,
|
|
"step": 854,
|
|
"valid_targets_mean": 16059.2,
|
|
"valid_targets_min": 13226
|
|
},
|
|
{
|
|
"epoch": 3.6396588486140726,
|
|
"grad_norm": 0.19558513955706905,
|
|
"learning_rate": 8.43263485615774e-06,
|
|
"loss": 1.0439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1629284918308258,
|
|
"step": 855,
|
|
"valid_targets_mean": 8378.9,
|
|
"valid_targets_min": 1276
|
|
},
|
|
{
|
|
"epoch": 3.6439232409381663,
|
|
"grad_norm": 0.18871230748742543,
|
|
"learning_rate": 8.384193394005372e-06,
|
|
"loss": 1.0378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25322186946868896,
|
|
"step": 856,
|
|
"valid_targets_mean": 16128.1,
|
|
"valid_targets_min": 14943
|
|
},
|
|
{
|
|
"epoch": 3.64818763326226,
|
|
"grad_norm": 0.19849367481773825,
|
|
"learning_rate": 8.33585454401161e-06,
|
|
"loss": 1.0521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32179251313209534,
|
|
"step": 857,
|
|
"valid_targets_mean": 16055.6,
|
|
"valid_targets_min": 15389
|
|
},
|
|
{
|
|
"epoch": 3.6524520255863537,
|
|
"grad_norm": 0.18765256209710596,
|
|
"learning_rate": 8.287618733194073e-06,
|
|
"loss": 1.029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21054477989673615,
|
|
"step": 858,
|
|
"valid_targets_mean": 12071.9,
|
|
"valid_targets_min": 9257
|
|
},
|
|
{
|
|
"epoch": 3.656716417910448,
|
|
"grad_norm": 0.193326982284514,
|
|
"learning_rate": 8.239486387660096e-06,
|
|
"loss": 0.9993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29164233803749084,
|
|
"step": 859,
|
|
"valid_targets_mean": 16002.4,
|
|
"valid_targets_min": 14544
|
|
},
|
|
{
|
|
"epoch": 3.6609808102345416,
|
|
"grad_norm": 0.20503274346576894,
|
|
"learning_rate": 8.191457932603052e-06,
|
|
"loss": 0.9978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3028004765510559,
|
|
"step": 860,
|
|
"valid_targets_mean": 16125.6,
|
|
"valid_targets_min": 14765
|
|
},
|
|
{
|
|
"epoch": 3.6652452025586353,
|
|
"grad_norm": 0.20103325130745195,
|
|
"learning_rate": 8.143533792298545e-06,
|
|
"loss": 1.0762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25633686780929565,
|
|
"step": 861,
|
|
"valid_targets_mean": 15072.8,
|
|
"valid_targets_min": 13354
|
|
},
|
|
{
|
|
"epoch": 3.6695095948827294,
|
|
"grad_norm": 0.19253920882400277,
|
|
"learning_rate": 8.095714390100698e-06,
|
|
"loss": 1.0125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25641998648643494,
|
|
"step": 862,
|
|
"valid_targets_mean": 16131.2,
|
|
"valid_targets_min": 13888
|
|
},
|
|
{
|
|
"epoch": 3.673773987206823,
|
|
"grad_norm": 0.20352579026745357,
|
|
"learning_rate": 8.048000148438375e-06,
|
|
"loss": 1.0594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2648036777973175,
|
|
"step": 863,
|
|
"valid_targets_mean": 13058.5,
|
|
"valid_targets_min": 1185
|
|
},
|
|
{
|
|
"epoch": 3.678038379530917,
|
|
"grad_norm": 0.19639948915191724,
|
|
"learning_rate": 8.000391488811485e-06,
|
|
"loss": 1.015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25266677141189575,
|
|
"step": 864,
|
|
"valid_targets_mean": 15782.9,
|
|
"valid_targets_min": 15070
|
|
},
|
|
{
|
|
"epoch": 3.6823027718550105,
|
|
"grad_norm": 0.19386053477890763,
|
|
"learning_rate": 7.952888831787215e-06,
|
|
"loss": 1.0346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2928309738636017,
|
|
"step": 865,
|
|
"valid_targets_mean": 16152.8,
|
|
"valid_targets_min": 14904
|
|
},
|
|
{
|
|
"epoch": 3.6865671641791042,
|
|
"grad_norm": 0.19745476342088736,
|
|
"learning_rate": 7.905492596996391e-06,
|
|
"loss": 0.9991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1756155639886856,
|
|
"step": 866,
|
|
"valid_targets_mean": 9315.6,
|
|
"valid_targets_min": 979
|
|
},
|
|
{
|
|
"epoch": 3.6908315565031984,
|
|
"grad_norm": 0.20138153016069346,
|
|
"learning_rate": 7.858203203129668e-06,
|
|
"loss": 1.0574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26242029666900635,
|
|
"step": 867,
|
|
"valid_targets_mean": 16006.4,
|
|
"valid_targets_min": 14911
|
|
},
|
|
{
|
|
"epoch": 3.695095948827292,
|
|
"grad_norm": 0.19412757800945293,
|
|
"learning_rate": 7.811021067933919e-06,
|
|
"loss": 1.0214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2751907706260681,
|
|
"step": 868,
|
|
"valid_targets_mean": 16138.8,
|
|
"valid_targets_min": 14951
|
|
},
|
|
{
|
|
"epoch": 3.699360341151386,
|
|
"grad_norm": 0.21012497339785624,
|
|
"learning_rate": 7.763946608208504e-06,
|
|
"loss": 1.0114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20527902245521545,
|
|
"step": 869,
|
|
"valid_targets_mean": 11099.1,
|
|
"valid_targets_min": 7003
|
|
},
|
|
{
|
|
"epoch": 3.70362473347548,
|
|
"grad_norm": 0.19117367793417456,
|
|
"learning_rate": 7.716980239801588e-06,
|
|
"loss": 1.0112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28386640548706055,
|
|
"step": 870,
|
|
"valid_targets_mean": 16065.7,
|
|
"valid_targets_min": 14908
|
|
},
|
|
{
|
|
"epoch": 3.7078891257995736,
|
|
"grad_norm": 0.19692586454539995,
|
|
"learning_rate": 7.670122377606495e-06,
|
|
"loss": 1.0628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3160354495048523,
|
|
"step": 871,
|
|
"valid_targets_mean": 16118.9,
|
|
"valid_targets_min": 15290
|
|
},
|
|
{
|
|
"epoch": 3.7121535181236673,
|
|
"grad_norm": 0.1909097750601415,
|
|
"learning_rate": 7.623373435557988e-06,
|
|
"loss": 1.0436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24374912679195404,
|
|
"step": 872,
|
|
"valid_targets_mean": 13757.4,
|
|
"valid_targets_min": 11218
|
|
},
|
|
{
|
|
"epoch": 3.716417910447761,
|
|
"grad_norm": 0.20030508106272105,
|
|
"learning_rate": 7.5767338266286775e-06,
|
|
"loss": 1.0006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28267043828964233,
|
|
"step": 873,
|
|
"valid_targets_mean": 16168.4,
|
|
"valid_targets_min": 14830
|
|
},
|
|
{
|
|
"epoch": 3.7206823027718547,
|
|
"grad_norm": 0.20812341049975572,
|
|
"learning_rate": 7.530203962825331e-06,
|
|
"loss": 1.0316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30265045166015625,
|
|
"step": 874,
|
|
"valid_targets_mean": 16155.6,
|
|
"valid_targets_min": 15464
|
|
},
|
|
{
|
|
"epoch": 3.724946695095949,
|
|
"grad_norm": 0.18360113210996984,
|
|
"learning_rate": 7.483784255185249e-06,
|
|
"loss": 1.0431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23530153930187225,
|
|
"step": 875,
|
|
"valid_targets_mean": 14721.8,
|
|
"valid_targets_min": 12209
|
|
},
|
|
{
|
|
"epoch": 3.7292110874200426,
|
|
"grad_norm": 0.20221222455542814,
|
|
"learning_rate": 7.437475113772632e-06,
|
|
"loss": 1.0017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2918761372566223,
|
|
"step": 876,
|
|
"valid_targets_mean": 16160.6,
|
|
"valid_targets_min": 15234
|
|
},
|
|
{
|
|
"epoch": 3.7334754797441363,
|
|
"grad_norm": 0.18530134539495166,
|
|
"learning_rate": 7.391276947674932e-06,
|
|
"loss": 1.0165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21132834255695343,
|
|
"step": 877,
|
|
"valid_targets_mean": 11079.2,
|
|
"valid_targets_min": 1520
|
|
},
|
|
{
|
|
"epoch": 3.7377398720682304,
|
|
"grad_norm": 0.21377599326496985,
|
|
"learning_rate": 7.345190164999307e-06,
|
|
"loss": 1.007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24113556742668152,
|
|
"step": 878,
|
|
"valid_targets_mean": 15988.7,
|
|
"valid_targets_min": 14889
|
|
},
|
|
{
|
|
"epoch": 3.742004264392324,
|
|
"grad_norm": 0.18807427523413936,
|
|
"learning_rate": 7.299215172868947e-06,
|
|
"loss": 1.0029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3014865517616272,
|
|
"step": 879,
|
|
"valid_targets_mean": 16100.1,
|
|
"valid_targets_min": 15198
|
|
},
|
|
{
|
|
"epoch": 3.746268656716418,
|
|
"grad_norm": 0.17169001499638226,
|
|
"learning_rate": 7.2533523774194865e-06,
|
|
"loss": 0.9928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15240547060966492,
|
|
"step": 880,
|
|
"valid_targets_mean": 8749.8,
|
|
"valid_targets_min": 1971
|
|
},
|
|
{
|
|
"epoch": 3.750533049040512,
|
|
"grad_norm": 0.1679285384728096,
|
|
"learning_rate": 7.2076021837954616e-06,
|
|
"loss": 0.9577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24019278585910797,
|
|
"step": 881,
|
|
"valid_targets_mean": 16235.8,
|
|
"valid_targets_min": 15737
|
|
},
|
|
{
|
|
"epoch": 3.7547974413646057,
|
|
"grad_norm": 0.19434300770296428,
|
|
"learning_rate": 7.161964996146689e-06,
|
|
"loss": 0.9948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28557753562927246,
|
|
"step": 882,
|
|
"valid_targets_mean": 16172.3,
|
|
"valid_targets_min": 15386
|
|
},
|
|
{
|
|
"epoch": 3.7590618336886994,
|
|
"grad_norm": 0.17407388837457433,
|
|
"learning_rate": 7.116441217624708e-06,
|
|
"loss": 1.0277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20178121328353882,
|
|
"step": 883,
|
|
"valid_targets_mean": 11656.7,
|
|
"valid_targets_min": 7621
|
|
},
|
|
{
|
|
"epoch": 3.763326226012793,
|
|
"grad_norm": 0.18193248717461616,
|
|
"learning_rate": 7.071031250379228e-06,
|
|
"loss": 0.998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2768580913543701,
|
|
"step": 884,
|
|
"valid_targets_mean": 16177.2,
|
|
"valid_targets_min": 15576
|
|
},
|
|
{
|
|
"epoch": 3.767590618336887,
|
|
"grad_norm": 0.18181519483778066,
|
|
"learning_rate": 7.0257354955545466e-06,
|
|
"loss": 1.0266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2880393862724304,
|
|
"step": 885,
|
|
"valid_targets_mean": 16203.8,
|
|
"valid_targets_min": 15219
|
|
},
|
|
{
|
|
"epoch": 3.771855010660981,
|
|
"grad_norm": 0.17358900056666338,
|
|
"learning_rate": 6.980554353286066e-06,
|
|
"loss": 0.9932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24835087358951569,
|
|
"step": 886,
|
|
"valid_targets_mean": 14070.1,
|
|
"valid_targets_min": 12079
|
|
},
|
|
{
|
|
"epoch": 3.7761194029850746,
|
|
"grad_norm": 0.1969409285627566,
|
|
"learning_rate": 6.935488222696676e-06,
|
|
"loss": 1.0337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.294018030166626,
|
|
"step": 887,
|
|
"valid_targets_mean": 16149.6,
|
|
"valid_targets_min": 15401
|
|
},
|
|
{
|
|
"epoch": 3.7803837953091683,
|
|
"grad_norm": 0.1790611891295005,
|
|
"learning_rate": 6.890537501893302e-06,
|
|
"loss": 1.0152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23835018277168274,
|
|
"step": 888,
|
|
"valid_targets_mean": 12633.7,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 3.7846481876332625,
|
|
"grad_norm": 0.17338948935102383,
|
|
"learning_rate": 6.845702587963352e-06,
|
|
"loss": 1.047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25005704164505005,
|
|
"step": 889,
|
|
"valid_targets_mean": 15490.9,
|
|
"valid_targets_min": 13898
|
|
},
|
|
{
|
|
"epoch": 3.788912579957356,
|
|
"grad_norm": 0.17636192922700825,
|
|
"learning_rate": 6.800983876971192e-06,
|
|
"loss": 0.967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.277923047542572,
|
|
"step": 890,
|
|
"valid_targets_mean": 16048.4,
|
|
"valid_targets_min": 13573
|
|
},
|
|
{
|
|
"epoch": 3.79317697228145,
|
|
"grad_norm": 0.19529875362281152,
|
|
"learning_rate": 6.756381763954718e-06,
|
|
"loss": 1.0392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1701369434595108,
|
|
"step": 891,
|
|
"valid_targets_mean": 9984.5,
|
|
"valid_targets_min": 1676
|
|
},
|
|
{
|
|
"epoch": 3.7974413646055436,
|
|
"grad_norm": 0.1719459899710793,
|
|
"learning_rate": 6.7118966429217645e-06,
|
|
"loss": 1.0305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25571155548095703,
|
|
"step": 892,
|
|
"valid_targets_mean": 15897.7,
|
|
"valid_targets_min": 15188
|
|
},
|
|
{
|
|
"epoch": 3.8017057569296373,
|
|
"grad_norm": 0.19200006643367323,
|
|
"learning_rate": 6.667528906846714e-06,
|
|
"loss": 1.0152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30839601159095764,
|
|
"step": 893,
|
|
"valid_targets_mean": 16174.7,
|
|
"valid_targets_min": 15665
|
|
},
|
|
{
|
|
"epoch": 3.8059701492537314,
|
|
"grad_norm": 0.17799336332146048,
|
|
"learning_rate": 6.623278947666974e-06,
|
|
"loss": 1.0596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19059264659881592,
|
|
"step": 894,
|
|
"valid_targets_mean": 10780.8,
|
|
"valid_targets_min": 6697
|
|
},
|
|
{
|
|
"epoch": 3.810234541577825,
|
|
"grad_norm": 0.18003393409064003,
|
|
"learning_rate": 6.579147156279538e-06,
|
|
"loss": 1.017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24897435307502747,
|
|
"step": 895,
|
|
"valid_targets_mean": 16181.2,
|
|
"valid_targets_min": 14287
|
|
},
|
|
{
|
|
"epoch": 3.814498933901919,
|
|
"grad_norm": 0.18280328337121599,
|
|
"learning_rate": 6.535133922537513e-06,
|
|
"loss": 1.0184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27476099133491516,
|
|
"step": 896,
|
|
"valid_targets_mean": 16171.2,
|
|
"valid_targets_min": 15437
|
|
},
|
|
{
|
|
"epoch": 3.818763326226013,
|
|
"grad_norm": 0.18875212290983107,
|
|
"learning_rate": 6.491239635246709e-06,
|
|
"loss": 1.029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2043631672859192,
|
|
"step": 897,
|
|
"valid_targets_mean": 13337.2,
|
|
"valid_targets_min": 10600
|
|
},
|
|
{
|
|
"epoch": 3.8230277185501067,
|
|
"grad_norm": 0.1804494494953742,
|
|
"learning_rate": 6.447464682162143e-06,
|
|
"loss": 1.0278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28921037912368774,
|
|
"step": 898,
|
|
"valid_targets_mean": 16177.3,
|
|
"valid_targets_min": 15404
|
|
},
|
|
{
|
|
"epoch": 3.8272921108742004,
|
|
"grad_norm": 0.19211175028697716,
|
|
"learning_rate": 6.403809449984704e-06,
|
|
"loss": 1.0163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2918737530708313,
|
|
"step": 899,
|
|
"valid_targets_mean": 16179.9,
|
|
"valid_targets_min": 15636
|
|
},
|
|
{
|
|
"epoch": 3.831556503198294,
|
|
"grad_norm": 0.1765273759141979,
|
|
"learning_rate": 6.3602743243576405e-06,
|
|
"loss": 1.0557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2659085690975189,
|
|
"step": 900,
|
|
"valid_targets_mean": 15050.4,
|
|
"valid_targets_min": 12744
|
|
},
|
|
{
|
|
"epoch": 3.835820895522388,
|
|
"grad_norm": 0.17748639822638573,
|
|
"learning_rate": 6.316859689863222e-06,
|
|
"loss": 1.0028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2765544056892395,
|
|
"step": 901,
|
|
"valid_targets_mean": 16103.3,
|
|
"valid_targets_min": 14365
|
|
},
|
|
{
|
|
"epoch": 3.840085287846482,
|
|
"grad_norm": 0.17420461378371643,
|
|
"learning_rate": 6.273565930019316e-06,
|
|
"loss": 1.0172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19079825282096863,
|
|
"step": 902,
|
|
"valid_targets_mean": 9851.0,
|
|
"valid_targets_min": 1680
|
|
},
|
|
{
|
|
"epoch": 3.8443496801705757,
|
|
"grad_norm": 0.16560599770246665,
|
|
"learning_rate": 6.230393427276e-06,
|
|
"loss": 0.9984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2510971128940582,
|
|
"step": 903,
|
|
"valid_targets_mean": 16164.2,
|
|
"valid_targets_min": 15371
|
|
},
|
|
{
|
|
"epoch": 3.8486140724946694,
|
|
"grad_norm": 0.1792965626692126,
|
|
"learning_rate": 6.187342563012198e-06,
|
|
"loss": 0.9932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2955405116081238,
|
|
"step": 904,
|
|
"valid_targets_mean": 16153.7,
|
|
"valid_targets_min": 15258
|
|
},
|
|
{
|
|
"epoch": 3.8528784648187635,
|
|
"grad_norm": 0.17574840664926,
|
|
"learning_rate": 6.144413717532269e-06,
|
|
"loss": 0.985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1359398365020752,
|
|
"step": 905,
|
|
"valid_targets_mean": 7781.8,
|
|
"valid_targets_min": 1738
|
|
},
|
|
{
|
|
"epoch": 3.857142857142857,
|
|
"grad_norm": 0.16345377393674543,
|
|
"learning_rate": 6.1016072700627106e-06,
|
|
"loss": 1.0492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2529211640357971,
|
|
"step": 906,
|
|
"valid_targets_mean": 16108.3,
|
|
"valid_targets_min": 14821
|
|
},
|
|
{
|
|
"epoch": 3.861407249466951,
|
|
"grad_norm": 0.16703781468036705,
|
|
"learning_rate": 6.058923598748756e-06,
|
|
"loss": 1.0049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27441149950027466,
|
|
"step": 907,
|
|
"valid_targets_mean": 16135.9,
|
|
"valid_targets_min": 15182
|
|
},
|
|
{
|
|
"epoch": 3.8656716417910446,
|
|
"grad_norm": 0.18499862087725924,
|
|
"learning_rate": 6.016363080651066e-06,
|
|
"loss": 1.0129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22376945614814758,
|
|
"step": 908,
|
|
"valid_targets_mean": 12611.5,
|
|
"valid_targets_min": 9636
|
|
},
|
|
{
|
|
"epoch": 3.8699360341151388,
|
|
"grad_norm": 0.18549970597241158,
|
|
"learning_rate": 5.973926091742386e-06,
|
|
"loss": 1.0333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2838277816772461,
|
|
"step": 909,
|
|
"valid_targets_mean": 15943.4,
|
|
"valid_targets_min": 11336
|
|
},
|
|
{
|
|
"epoch": 3.8742004264392325,
|
|
"grad_norm": 0.18280157811663325,
|
|
"learning_rate": 5.931613006904196e-06,
|
|
"loss": 0.9666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27379751205444336,
|
|
"step": 910,
|
|
"valid_targets_mean": 16215.7,
|
|
"valid_targets_min": 15773
|
|
},
|
|
{
|
|
"epoch": 3.878464818763326,
|
|
"grad_norm": 0.1670060683446909,
|
|
"learning_rate": 5.889424199923473e-06,
|
|
"loss": 1.0194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2533782124519348,
|
|
"step": 911,
|
|
"valid_targets_mean": 15084.9,
|
|
"valid_targets_min": 13687
|
|
},
|
|
{
|
|
"epoch": 3.88272921108742,
|
|
"grad_norm": 0.17872960329716236,
|
|
"learning_rate": 5.847360043489318e-06,
|
|
"loss": 1.0414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27177613973617554,
|
|
"step": 912,
|
|
"valid_targets_mean": 16055.0,
|
|
"valid_targets_min": 13080
|
|
},
|
|
{
|
|
"epoch": 3.886993603411514,
|
|
"grad_norm": 0.18737262682718556,
|
|
"learning_rate": 5.805420909189683e-06,
|
|
"loss": 0.9968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23658359050750732,
|
|
"step": 913,
|
|
"valid_targets_mean": 12997.0,
|
|
"valid_targets_min": 1489
|
|
},
|
|
{
|
|
"epoch": 3.8912579957356077,
|
|
"grad_norm": 0.19688812946072382,
|
|
"learning_rate": 5.7636071675081076e-06,
|
|
"loss": 1.0155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2453310787677765,
|
|
"step": 914,
|
|
"valid_targets_mean": 15195.4,
|
|
"valid_targets_min": 13288
|
|
},
|
|
{
|
|
"epoch": 3.8955223880597014,
|
|
"grad_norm": 0.1801164773199151,
|
|
"learning_rate": 5.721919187820431e-06,
|
|
"loss": 1.0186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30522605776786804,
|
|
"step": 915,
|
|
"valid_targets_mean": 16162.2,
|
|
"valid_targets_min": 14786
|
|
},
|
|
{
|
|
"epoch": 3.8997867803837956,
|
|
"grad_norm": 0.20184345060283754,
|
|
"learning_rate": 5.6803573383915265e-06,
|
|
"loss": 1.0158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16402074694633484,
|
|
"step": 916,
|
|
"valid_targets_mean": 8992.8,
|
|
"valid_targets_min": 2237
|
|
},
|
|
{
|
|
"epoch": 3.9040511727078893,
|
|
"grad_norm": 0.19429181652200073,
|
|
"learning_rate": 5.638921986372064e-06,
|
|
"loss": 1.051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25736844539642334,
|
|
"step": 917,
|
|
"valid_targets_mean": 16127.8,
|
|
"valid_targets_min": 15305
|
|
},
|
|
{
|
|
"epoch": 3.908315565031983,
|
|
"grad_norm": 0.17669068878546865,
|
|
"learning_rate": 5.5976134977952315e-06,
|
|
"loss": 1.0119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2880447506904602,
|
|
"step": 918,
|
|
"valid_targets_mean": 16123.2,
|
|
"valid_targets_min": 15121
|
|
},
|
|
{
|
|
"epoch": 3.9125799573560767,
|
|
"grad_norm": 0.17299784357769227,
|
|
"learning_rate": 5.556432237573564e-06,
|
|
"loss": 1.0279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2095554769039154,
|
|
"step": 919,
|
|
"valid_targets_mean": 11376.9,
|
|
"valid_targets_min": 7337
|
|
},
|
|
{
|
|
"epoch": 3.9168443496801704,
|
|
"grad_norm": 0.16934613260599296,
|
|
"learning_rate": 5.5153785694956416e-06,
|
|
"loss": 0.9933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2432553768157959,
|
|
"step": 920,
|
|
"valid_targets_mean": 16196.0,
|
|
"valid_targets_min": 15345
|
|
},
|
|
{
|
|
"epoch": 3.9211087420042645,
|
|
"grad_norm": 0.18745002576395378,
|
|
"learning_rate": 5.474452856222942e-06,
|
|
"loss": 1.0192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3223433494567871,
|
|
"step": 921,
|
|
"valid_targets_mean": 15823.3,
|
|
"valid_targets_min": 4778
|
|
},
|
|
{
|
|
"epoch": 3.925373134328358,
|
|
"grad_norm": 0.16413131624487595,
|
|
"learning_rate": 5.433655459286611e-06,
|
|
"loss": 1.0356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23510609567165375,
|
|
"step": 922,
|
|
"valid_targets_mean": 13433.8,
|
|
"valid_targets_min": 9965
|
|
},
|
|
{
|
|
"epoch": 3.929637526652452,
|
|
"grad_norm": 0.17157667668614518,
|
|
"learning_rate": 5.392986739084238e-06,
|
|
"loss": 1.0416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28263452649116516,
|
|
"step": 923,
|
|
"valid_targets_mean": 16122.8,
|
|
"valid_targets_min": 15294
|
|
},
|
|
{
|
|
"epoch": 3.933901918976546,
|
|
"grad_norm": 0.18312844983042084,
|
|
"learning_rate": 5.352447054876755e-06,
|
|
"loss": 1.0217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2850250005722046,
|
|
"step": 924,
|
|
"valid_targets_mean": 16221.2,
|
|
"valid_targets_min": 15261
|
|
},
|
|
{
|
|
"epoch": 3.9381663113006398,
|
|
"grad_norm": 0.1730633555170915,
|
|
"learning_rate": 5.31203676478516e-06,
|
|
"loss": 1.0177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2412496656179428,
|
|
"step": 925,
|
|
"valid_targets_mean": 14959.6,
|
|
"valid_targets_min": 12523
|
|
},
|
|
{
|
|
"epoch": 3.9424307036247335,
|
|
"grad_norm": 0.18254919910899342,
|
|
"learning_rate": 5.271756225787434e-06,
|
|
"loss": 1.0219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3025828003883362,
|
|
"step": 926,
|
|
"valid_targets_mean": 16134.2,
|
|
"valid_targets_min": 15345
|
|
},
|
|
{
|
|
"epoch": 3.946695095948827,
|
|
"grad_norm": 0.18547006293160873,
|
|
"learning_rate": 5.231605793715348e-06,
|
|
"loss": 1.0402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20674429833889008,
|
|
"step": 927,
|
|
"valid_targets_mean": 10423.7,
|
|
"valid_targets_min": 1155
|
|
},
|
|
{
|
|
"epoch": 3.950959488272921,
|
|
"grad_norm": 0.17483882087778208,
|
|
"learning_rate": 5.191585823251335e-06,
|
|
"loss": 1.0086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2677437663078308,
|
|
"step": 928,
|
|
"valid_targets_mean": 15745.4,
|
|
"valid_targets_min": 12492
|
|
},
|
|
{
|
|
"epoch": 3.955223880597015,
|
|
"grad_norm": 0.18340491692906996,
|
|
"learning_rate": 5.151696667925348e-06,
|
|
"loss": 1.0196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30543091893196106,
|
|
"step": 929,
|
|
"valid_targets_mean": 15883.8,
|
|
"valid_targets_min": 9286
|
|
},
|
|
{
|
|
"epoch": 3.9594882729211087,
|
|
"grad_norm": 0.17353634019809588,
|
|
"learning_rate": 5.111938680111732e-06,
|
|
"loss": 1.0082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15605176985263824,
|
|
"step": 930,
|
|
"valid_targets_mean": 8194.7,
|
|
"valid_targets_min": 2514
|
|
},
|
|
{
|
|
"epoch": 3.9637526652452024,
|
|
"grad_norm": 0.16013301608797142,
|
|
"learning_rate": 5.072312211026125e-06,
|
|
"loss": 0.9481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23745262622833252,
|
|
"step": 931,
|
|
"valid_targets_mean": 16183.0,
|
|
"valid_targets_min": 15263
|
|
},
|
|
{
|
|
"epoch": 3.9680170575692966,
|
|
"grad_norm": 0.18752852307138054,
|
|
"learning_rate": 5.032817610722369e-06,
|
|
"loss": 1.0416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28059718012809753,
|
|
"step": 932,
|
|
"valid_targets_mean": 16226.6,
|
|
"valid_targets_min": 15460
|
|
},
|
|
{
|
|
"epoch": 3.9722814498933903,
|
|
"grad_norm": 0.16759127501176574,
|
|
"learning_rate": 4.993455228089366e-06,
|
|
"loss": 0.98,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1952660083770752,
|
|
"step": 933,
|
|
"valid_targets_mean": 12267.8,
|
|
"valid_targets_min": 9004
|
|
},
|
|
{
|
|
"epoch": 3.976545842217484,
|
|
"grad_norm": 0.16670340148359164,
|
|
"learning_rate": 4.954225410848048e-06,
|
|
"loss": 1.0271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2915708124637604,
|
|
"step": 934,
|
|
"valid_targets_mean": 16006.9,
|
|
"valid_targets_min": 13688
|
|
},
|
|
{
|
|
"epoch": 3.9808102345415777,
|
|
"grad_norm": 0.1799964499560203,
|
|
"learning_rate": 4.915128505548284e-06,
|
|
"loss": 1.0526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29275304079055786,
|
|
"step": 935,
|
|
"valid_targets_mean": 16135.8,
|
|
"valid_targets_min": 14460
|
|
},
|
|
{
|
|
"epoch": 3.9850746268656714,
|
|
"grad_norm": 0.17170353398696742,
|
|
"learning_rate": 4.8761648575658145e-06,
|
|
"loss": 0.9824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22930490970611572,
|
|
"step": 936,
|
|
"valid_targets_mean": 14490.9,
|
|
"valid_targets_min": 12290
|
|
},
|
|
{
|
|
"epoch": 3.9893390191897655,
|
|
"grad_norm": 0.17908275507086058,
|
|
"learning_rate": 4.837334811099217e-06,
|
|
"loss": 1.041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3175300657749176,
|
|
"step": 937,
|
|
"valid_targets_mean": 16084.7,
|
|
"valid_targets_min": 14795
|
|
},
|
|
{
|
|
"epoch": 3.9936034115138592,
|
|
"grad_norm": 0.16982624399836438,
|
|
"learning_rate": 4.7986387091668365e-06,
|
|
"loss": 1.0495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.244386225938797,
|
|
"step": 938,
|
|
"valid_targets_mean": 13311.0,
|
|
"valid_targets_min": 1753
|
|
},
|
|
{
|
|
"epoch": 3.997867803837953,
|
|
"grad_norm": 0.17348971376797556,
|
|
"learning_rate": 4.760076893603791e-06,
|
|
"loss": 0.9879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24180036783218384,
|
|
"step": 939,
|
|
"valid_targets_mean": 16049.2,
|
|
"valid_targets_min": 15191
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"grad_norm": 0.25328745288321697,
|
|
"learning_rate": 4.721649705058926e-06,
|
|
"loss": 0.9824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4427715837955475,
|
|
"step": 940,
|
|
"valid_targets_mean": 11319.7,
|
|
"valid_targets_min": 1545
|
|
},
|
|
{
|
|
"epoch": 4.004264392324094,
|
|
"grad_norm": 0.17162594790757177,
|
|
"learning_rate": 4.683357482991819e-06,
|
|
"loss": 0.9659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2455882728099823,
|
|
"step": 941,
|
|
"valid_targets_mean": 16066.7,
|
|
"valid_targets_min": 14483
|
|
},
|
|
{
|
|
"epoch": 4.008528784648187,
|
|
"grad_norm": 0.18660661591121844,
|
|
"learning_rate": 4.645200565669776e-06,
|
|
"loss": 1.0174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3127480745315552,
|
|
"step": 942,
|
|
"valid_targets_mean": 16023.8,
|
|
"valid_targets_min": 14103
|
|
},
|
|
{
|
|
"epoch": 4.0127931769722816,
|
|
"grad_norm": 0.1797699085533669,
|
|
"learning_rate": 4.607179290164823e-06,
|
|
"loss": 1.0181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21285849809646606,
|
|
"step": 943,
|
|
"valid_targets_mean": 13279.5,
|
|
"valid_targets_min": 10443
|
|
},
|
|
{
|
|
"epoch": 4.017057569296376,
|
|
"grad_norm": 0.1815782872447276,
|
|
"learning_rate": 4.569293992350783e-06,
|
|
"loss": 0.9927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26089972257614136,
|
|
"step": 944,
|
|
"valid_targets_mean": 16190.7,
|
|
"valid_targets_min": 14904
|
|
},
|
|
{
|
|
"epoch": 4.021321961620469,
|
|
"grad_norm": 0.18902901787261803,
|
|
"learning_rate": 4.531545006900244e-06,
|
|
"loss": 1.0005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29636943340301514,
|
|
"step": 945,
|
|
"valid_targets_mean": 16119.8,
|
|
"valid_targets_min": 15234
|
|
},
|
|
{
|
|
"epoch": 4.025586353944563,
|
|
"grad_norm": 0.16917585233331373,
|
|
"learning_rate": 4.493932667281646e-06,
|
|
"loss": 1.0045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2362777590751648,
|
|
"step": 946,
|
|
"valid_targets_mean": 14883.1,
|
|
"valid_targets_min": 12676
|
|
},
|
|
{
|
|
"epoch": 4.029850746268656,
|
|
"grad_norm": 0.17323089731676028,
|
|
"learning_rate": 4.456457305756321e-06,
|
|
"loss": 1.0169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29187196493148804,
|
|
"step": 947,
|
|
"valid_targets_mean": 16098.3,
|
|
"valid_targets_min": 15263
|
|
},
|
|
{
|
|
"epoch": 4.0341151385927505,
|
|
"grad_norm": 0.17607985421245612,
|
|
"learning_rate": 4.419119253375557e-06,
|
|
"loss": 1.0203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2674245834350586,
|
|
"step": 948,
|
|
"valid_targets_mean": 12999.3,
|
|
"valid_targets_min": 2666
|
|
},
|
|
{
|
|
"epoch": 4.038379530916845,
|
|
"grad_norm": 0.197620639593689,
|
|
"learning_rate": 4.381918839977675e-06,
|
|
"loss": 1.0165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25065767765045166,
|
|
"step": 949,
|
|
"valid_targets_mean": 15620.4,
|
|
"valid_targets_min": 13538
|
|
},
|
|
{
|
|
"epoch": 4.042643923240938,
|
|
"grad_norm": 0.19309410606119287,
|
|
"learning_rate": 4.344856394185122e-06,
|
|
"loss": 0.9905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3093436360359192,
|
|
"step": 950,
|
|
"valid_targets_mean": 16151.6,
|
|
"valid_targets_min": 15414
|
|
},
|
|
{
|
|
"epoch": 4.046908315565032,
|
|
"grad_norm": 0.17737637616480462,
|
|
"learning_rate": 4.307932243401538e-06,
|
|
"loss": 1.0561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17596712708473206,
|
|
"step": 951,
|
|
"valid_targets_mean": 9453.4,
|
|
"valid_targets_min": 1451
|
|
},
|
|
{
|
|
"epoch": 4.051172707889126,
|
|
"grad_norm": 0.15964671170178713,
|
|
"learning_rate": 4.271146713808927e-06,
|
|
"loss": 0.9751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24877549707889557,
|
|
"step": 952,
|
|
"valid_targets_mean": 16083.6,
|
|
"valid_targets_min": 13226
|
|
},
|
|
{
|
|
"epoch": 4.0554371002132195,
|
|
"grad_norm": 0.17690684966733292,
|
|
"learning_rate": 4.234500130364698e-06,
|
|
"loss": 1.0566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31012070178985596,
|
|
"step": 953,
|
|
"valid_targets_mean": 16129.1,
|
|
"valid_targets_min": 15302
|
|
},
|
|
{
|
|
"epoch": 4.059701492537314,
|
|
"grad_norm": 0.16959319370168027,
|
|
"learning_rate": 4.197992816798851e-06,
|
|
"loss": 0.9986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16628019511699677,
|
|
"step": 954,
|
|
"valid_targets_mean": 10084.4,
|
|
"valid_targets_min": 5271
|
|
},
|
|
{
|
|
"epoch": 4.063965884861407,
|
|
"grad_norm": 0.17645660091887394,
|
|
"learning_rate": 4.161625095611101e-06,
|
|
"loss": 1.0102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2812536358833313,
|
|
"step": 955,
|
|
"valid_targets_mean": 16028.5,
|
|
"valid_targets_min": 14444
|
|
},
|
|
{
|
|
"epoch": 4.068230277185501,
|
|
"grad_norm": 0.18743764773437324,
|
|
"learning_rate": 4.125397288068007e-06,
|
|
"loss": 1.0075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.295288622379303,
|
|
"step": 956,
|
|
"valid_targets_mean": 16037.3,
|
|
"valid_targets_min": 15042
|
|
},
|
|
{
|
|
"epoch": 4.072494669509595,
|
|
"grad_norm": 0.16311422806532486,
|
|
"learning_rate": 4.089309714200187e-06,
|
|
"loss": 1.0173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2155115008354187,
|
|
"step": 957,
|
|
"valid_targets_mean": 12814.3,
|
|
"valid_targets_min": 10793
|
|
},
|
|
{
|
|
"epoch": 4.076759061833688,
|
|
"grad_norm": 0.17008518119709745,
|
|
"learning_rate": 4.0533626927994185e-06,
|
|
"loss": 1.0294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2760984003543854,
|
|
"step": 958,
|
|
"valid_targets_mean": 16213.8,
|
|
"valid_targets_min": 15779
|
|
},
|
|
{
|
|
"epoch": 4.081023454157783,
|
|
"grad_norm": 0.17203247691720971,
|
|
"learning_rate": 4.017556541415888e-06,
|
|
"loss": 1.0215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2968873381614685,
|
|
"step": 959,
|
|
"valid_targets_mean": 16128.7,
|
|
"valid_targets_min": 15225
|
|
},
|
|
{
|
|
"epoch": 4.085287846481877,
|
|
"grad_norm": 0.15759084987503583,
|
|
"learning_rate": 3.981891576355352e-06,
|
|
"loss": 1.033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23213127255439758,
|
|
"step": 960,
|
|
"valid_targets_mean": 14446.7,
|
|
"valid_targets_min": 12362
|
|
},
|
|
{
|
|
"epoch": 4.08955223880597,
|
|
"grad_norm": 0.16464042746741422,
|
|
"learning_rate": 3.946368112676346e-06,
|
|
"loss": 0.9613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2920254170894623,
|
|
"step": 961,
|
|
"valid_targets_mean": 16109.6,
|
|
"valid_targets_min": 15314
|
|
},
|
|
{
|
|
"epoch": 4.093816631130064,
|
|
"grad_norm": 0.17425631840044417,
|
|
"learning_rate": 3.9109864641874166e-06,
|
|
"loss": 0.9768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19915470480918884,
|
|
"step": 962,
|
|
"valid_targets_mean": 11302.1,
|
|
"valid_targets_min": 1971
|
|
},
|
|
{
|
|
"epoch": 4.098081023454157,
|
|
"grad_norm": 0.16209513754356383,
|
|
"learning_rate": 3.875746943444316e-06,
|
|
"loss": 1.0004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26291438937187195,
|
|
"step": 963,
|
|
"valid_targets_mean": 14906.4,
|
|
"valid_targets_min": 13182
|
|
},
|
|
{
|
|
"epoch": 4.1023454157782515,
|
|
"grad_norm": 0.17081354223050138,
|
|
"learning_rate": 3.840649861747278e-06,
|
|
"loss": 1.0001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29953575134277344,
|
|
"step": 964,
|
|
"valid_targets_mean": 16189.0,
|
|
"valid_targets_min": 15541
|
|
},
|
|
{
|
|
"epoch": 4.106609808102346,
|
|
"grad_norm": 0.1677930354720929,
|
|
"learning_rate": 3.8056955291382667e-06,
|
|
"loss": 1.0416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15104900300502777,
|
|
"step": 965,
|
|
"valid_targets_mean": 8370.9,
|
|
"valid_targets_min": 1779
|
|
},
|
|
{
|
|
"epoch": 4.110874200426439,
|
|
"grad_norm": 0.16354509231928108,
|
|
"learning_rate": 3.7708842543981928e-06,
|
|
"loss": 1.0102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2592680752277374,
|
|
"step": 966,
|
|
"valid_targets_mean": 16096.9,
|
|
"valid_targets_min": 15058
|
|
},
|
|
{
|
|
"epoch": 4.115138592750533,
|
|
"grad_norm": 0.16447018627054752,
|
|
"learning_rate": 3.736216345044237e-06,
|
|
"loss": 0.9855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3046714961528778,
|
|
"step": 967,
|
|
"valid_targets_mean": 16148.6,
|
|
"valid_targets_min": 15180
|
|
},
|
|
{
|
|
"epoch": 4.119402985074627,
|
|
"grad_norm": 0.17326586136059488,
|
|
"learning_rate": 3.7016921073271084e-06,
|
|
"loss": 1.0408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21079136431217194,
|
|
"step": 968,
|
|
"valid_targets_mean": 12434.7,
|
|
"valid_targets_min": 9731
|
|
},
|
|
{
|
|
"epoch": 4.1236673773987205,
|
|
"grad_norm": 0.15867633625313174,
|
|
"learning_rate": 3.6673118462283453e-06,
|
|
"loss": 1.0082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25667932629585266,
|
|
"step": 969,
|
|
"valid_targets_mean": 16190.2,
|
|
"valid_targets_min": 15606
|
|
},
|
|
{
|
|
"epoch": 4.127931769722815,
|
|
"grad_norm": 0.17485078203246107,
|
|
"learning_rate": 3.6330758654576227e-06,
|
|
"loss": 0.9784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2849656343460083,
|
|
"step": 970,
|
|
"valid_targets_mean": 16164.5,
|
|
"valid_targets_min": 14793
|
|
},
|
|
{
|
|
"epoch": 4.132196162046908,
|
|
"grad_norm": 0.15447506956117477,
|
|
"learning_rate": 3.598984467450055e-06,
|
|
"loss": 1.0411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2483430951833725,
|
|
"step": 971,
|
|
"valid_targets_mean": 14369.9,
|
|
"valid_targets_min": 12092
|
|
},
|
|
{
|
|
"epoch": 4.136460554371002,
|
|
"grad_norm": 0.17016881271259185,
|
|
"learning_rate": 3.565037953363546e-06,
|
|
"loss": 1.0143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2875226140022278,
|
|
"step": 972,
|
|
"valid_targets_mean": 16104.4,
|
|
"valid_targets_min": 15148
|
|
},
|
|
{
|
|
"epoch": 4.140724946695096,
|
|
"grad_norm": 0.17959212116223483,
|
|
"learning_rate": 3.5312366230761154e-06,
|
|
"loss": 1.0354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23283183574676514,
|
|
"step": 973,
|
|
"valid_targets_mean": 13172.6,
|
|
"valid_targets_min": 3256
|
|
},
|
|
{
|
|
"epoch": 4.144989339019189,
|
|
"grad_norm": 0.15432162678640157,
|
|
"learning_rate": 3.497580775183258e-06,
|
|
"loss": 0.9921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25189337134361267,
|
|
"step": 974,
|
|
"valid_targets_mean": 15601.2,
|
|
"valid_targets_min": 14121
|
|
},
|
|
{
|
|
"epoch": 4.149253731343284,
|
|
"grad_norm": 0.1634454106505598,
|
|
"learning_rate": 3.464070706995295e-06,
|
|
"loss": 1.0218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3254646956920624,
|
|
"step": 975,
|
|
"valid_targets_mean": 16042.2,
|
|
"valid_targets_min": 14897
|
|
},
|
|
{
|
|
"epoch": 4.153518123667378,
|
|
"grad_norm": 0.1708252383415284,
|
|
"learning_rate": 3.4307067145347417e-06,
|
|
"loss": 1.038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18007853627204895,
|
|
"step": 976,
|
|
"valid_targets_mean": 8586.8,
|
|
"valid_targets_min": 1768
|
|
},
|
|
{
|
|
"epoch": 4.157782515991471,
|
|
"grad_norm": 0.1542398896632874,
|
|
"learning_rate": 3.397489092533739e-06,
|
|
"loss": 1.0342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2551620900630951,
|
|
"step": 977,
|
|
"valid_targets_mean": 16131.9,
|
|
"valid_targets_min": 15537
|
|
},
|
|
{
|
|
"epoch": 4.162046908315565,
|
|
"grad_norm": 0.18247902007514508,
|
|
"learning_rate": 3.364418134431371e-06,
|
|
"loss": 1.0482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3169439136981964,
|
|
"step": 978,
|
|
"valid_targets_mean": 16057.0,
|
|
"valid_targets_min": 15080
|
|
},
|
|
{
|
|
"epoch": 4.166311300639659,
|
|
"grad_norm": 0.16694802259065614,
|
|
"learning_rate": 3.331494132371149e-06,
|
|
"loss": 0.9846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18681488931179047,
|
|
"step": 979,
|
|
"valid_targets_mean": 10399.8,
|
|
"valid_targets_min": 5144
|
|
},
|
|
{
|
|
"epoch": 4.1705756929637525,
|
|
"grad_norm": 0.16907813619908482,
|
|
"learning_rate": 3.2987173771983816e-06,
|
|
"loss": 1.0193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26041150093078613,
|
|
"step": 980,
|
|
"valid_targets_mean": 16078.6,
|
|
"valid_targets_min": 14547
|
|
},
|
|
{
|
|
"epoch": 4.174840085287847,
|
|
"grad_norm": 0.1770777712940949,
|
|
"learning_rate": 3.266088158457634e-06,
|
|
"loss": 0.9868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3240050673484802,
|
|
"step": 981,
|
|
"valid_targets_mean": 16150.1,
|
|
"valid_targets_min": 15327
|
|
},
|
|
{
|
|
"epoch": 4.17910447761194,
|
|
"grad_norm": 0.15912496641507942,
|
|
"learning_rate": 3.233606764390147e-06,
|
|
"loss": 1.0079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24741700291633606,
|
|
"step": 982,
|
|
"valid_targets_mean": 14295.7,
|
|
"valid_targets_min": 11896
|
|
},
|
|
{
|
|
"epoch": 4.183368869936034,
|
|
"grad_norm": 0.16265146944586914,
|
|
"learning_rate": 3.2012734819313127e-06,
|
|
"loss": 0.9714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27438461780548096,
|
|
"step": 983,
|
|
"valid_targets_mean": 16193.6,
|
|
"valid_targets_min": 15576
|
|
},
|
|
{
|
|
"epoch": 4.187633262260128,
|
|
"grad_norm": 0.16083598802712898,
|
|
"learning_rate": 3.1690885967081187e-06,
|
|
"loss": 1.0139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30279818177223206,
|
|
"step": 984,
|
|
"valid_targets_mean": 16187.3,
|
|
"valid_targets_min": 15688
|
|
},
|
|
{
|
|
"epoch": 4.1918976545842215,
|
|
"grad_norm": 0.14979200671170342,
|
|
"learning_rate": 3.1370523930366393e-06,
|
|
"loss": 1.0096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.232590913772583,
|
|
"step": 985,
|
|
"valid_targets_mean": 15108.0,
|
|
"valid_targets_min": 13057
|
|
},
|
|
{
|
|
"epoch": 4.196162046908316,
|
|
"grad_norm": 0.17621147779251822,
|
|
"learning_rate": 3.105165153919525e-06,
|
|
"loss": 1.0543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3233955502510071,
|
|
"step": 986,
|
|
"valid_targets_mean": 16057.4,
|
|
"valid_targets_min": 13080
|
|
},
|
|
{
|
|
"epoch": 4.20042643923241,
|
|
"grad_norm": 0.1756035801626048,
|
|
"learning_rate": 3.073427161043492e-06,
|
|
"loss": 1.0446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19718195497989655,
|
|
"step": 987,
|
|
"valid_targets_mean": 10671.0,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 4.204690831556503,
|
|
"grad_norm": 0.1658033352673812,
|
|
"learning_rate": 3.0418386947768463e-06,
|
|
"loss": 1.0348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2562304735183716,
|
|
"step": 988,
|
|
"valid_targets_mean": 15663.9,
|
|
"valid_targets_min": 13837
|
|
},
|
|
{
|
|
"epoch": 4.208955223880597,
|
|
"grad_norm": 0.1682898687908902,
|
|
"learning_rate": 3.01040003416698e-06,
|
|
"loss": 1.0535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3200420141220093,
|
|
"step": 989,
|
|
"valid_targets_mean": 16049.9,
|
|
"valid_targets_min": 14594
|
|
},
|
|
{
|
|
"epoch": 4.21321961620469,
|
|
"grad_norm": 0.16795589651540707,
|
|
"learning_rate": 2.97911145693796e-06,
|
|
"loss": 0.9836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15380680561065674,
|
|
"step": 990,
|
|
"valid_targets_mean": 8766.8,
|
|
"valid_targets_min": 1535
|
|
},
|
|
{
|
|
"epoch": 4.217484008528785,
|
|
"grad_norm": 0.15635899916733167,
|
|
"learning_rate": 2.947973239488009e-06,
|
|
"loss": 1.0194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25666990876197815,
|
|
"step": 991,
|
|
"valid_targets_mean": 16073.6,
|
|
"valid_targets_min": 13538
|
|
},
|
|
{
|
|
"epoch": 4.221748400852879,
|
|
"grad_norm": 0.18479556506671047,
|
|
"learning_rate": 2.91698565688711e-06,
|
|
"loss": 1.0424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3063018321990967,
|
|
"step": 992,
|
|
"valid_targets_mean": 16132.2,
|
|
"valid_targets_min": 15545
|
|
},
|
|
{
|
|
"epoch": 4.226012793176972,
|
|
"grad_norm": 0.16647450476412914,
|
|
"learning_rate": 2.886148982874566e-06,
|
|
"loss": 0.9808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1856815069913864,
|
|
"step": 993,
|
|
"valid_targets_mean": 12287.9,
|
|
"valid_targets_min": 10321
|
|
},
|
|
{
|
|
"epoch": 4.230277185501066,
|
|
"grad_norm": 0.1650596049711428,
|
|
"learning_rate": 2.8554634898565668e-06,
|
|
"loss": 1.0019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27796709537506104,
|
|
"step": 994,
|
|
"valid_targets_mean": 16178.9,
|
|
"valid_targets_min": 15636
|
|
},
|
|
{
|
|
"epoch": 4.23454157782516,
|
|
"grad_norm": 0.1809480512413989,
|
|
"learning_rate": 2.824929448903806e-06,
|
|
"loss": 0.9823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28689736127853394,
|
|
"step": 995,
|
|
"valid_targets_mean": 16156.2,
|
|
"valid_targets_min": 14699
|
|
},
|
|
{
|
|
"epoch": 4.2388059701492535,
|
|
"grad_norm": 0.15370745546883308,
|
|
"learning_rate": 2.794547129749059e-06,
|
|
"loss": 0.9896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23388049006462097,
|
|
"step": 996,
|
|
"valid_targets_mean": 14443.9,
|
|
"valid_targets_min": 12790
|
|
},
|
|
{
|
|
"epoch": 4.243070362473348,
|
|
"grad_norm": 0.16203382511484396,
|
|
"learning_rate": 2.7643168007848255e-06,
|
|
"loss": 0.9708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27245384454727173,
|
|
"step": 997,
|
|
"valid_targets_mean": 16089.3,
|
|
"valid_targets_min": 15042
|
|
},
|
|
{
|
|
"epoch": 4.247334754797441,
|
|
"grad_norm": 0.16967353030673493,
|
|
"learning_rate": 2.734238729060956e-06,
|
|
"loss": 1.0114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20903266966342926,
|
|
"step": 998,
|
|
"valid_targets_mean": 13288.2,
|
|
"valid_targets_min": 1630
|
|
},
|
|
{
|
|
"epoch": 4.251599147121535,
|
|
"grad_norm": 0.168705045038412,
|
|
"learning_rate": 2.7043131802822653e-06,
|
|
"loss": 1.0119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.257682740688324,
|
|
"step": 999,
|
|
"valid_targets_mean": 15930.2,
|
|
"valid_targets_min": 14795
|
|
},
|
|
{
|
|
"epoch": 4.255863539445629,
|
|
"grad_norm": 0.19231465994332417,
|
|
"learning_rate": 2.674540418806222e-06,
|
|
"loss": 1.0501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3070492744445801,
|
|
"step": 1000,
|
|
"valid_targets_mean": 16080.7,
|
|
"valid_targets_min": 14917
|
|
},
|
|
{
|
|
"epoch": 4.2601279317697225,
|
|
"grad_norm": 0.1710007863233519,
|
|
"learning_rate": 2.6449207076405857e-06,
|
|
"loss": 1.0332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18669047951698303,
|
|
"step": 1001,
|
|
"valid_targets_mean": 9100.4,
|
|
"valid_targets_min": 1545
|
|
},
|
|
{
|
|
"epoch": 4.264392324093817,
|
|
"grad_norm": 0.16262781178453378,
|
|
"learning_rate": 2.6154543084411035e-06,
|
|
"loss": 0.9947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23665502667427063,
|
|
"step": 1002,
|
|
"valid_targets_mean": 16176.8,
|
|
"valid_targets_min": 15348
|
|
},
|
|
{
|
|
"epoch": 4.268656716417911,
|
|
"grad_norm": 0.17595307243521127,
|
|
"learning_rate": 2.5861414815091834e-06,
|
|
"loss": 1.0022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29712724685668945,
|
|
"step": 1003,
|
|
"valid_targets_mean": 16140.2,
|
|
"valid_targets_min": 14907
|
|
},
|
|
{
|
|
"epoch": 4.272921108742004,
|
|
"grad_norm": 0.15446894931777008,
|
|
"learning_rate": 2.5569824857895987e-06,
|
|
"loss": 0.9775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15721696615219116,
|
|
"step": 1004,
|
|
"valid_targets_mean": 10518.3,
|
|
"valid_targets_min": 5601
|
|
},
|
|
{
|
|
"epoch": 4.277185501066098,
|
|
"grad_norm": 0.16471914325864995,
|
|
"learning_rate": 2.5279775788682083e-06,
|
|
"loss": 0.9857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25779882073402405,
|
|
"step": 1005,
|
|
"valid_targets_mean": 16179.2,
|
|
"valid_targets_min": 15505
|
|
},
|
|
{
|
|
"epoch": 4.281449893390192,
|
|
"grad_norm": 0.16593655371423804,
|
|
"learning_rate": 2.499127016969671e-06,
|
|
"loss": 1.0034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3106514811515808,
|
|
"step": 1006,
|
|
"valid_targets_mean": 16071.4,
|
|
"valid_targets_min": 14103
|
|
},
|
|
{
|
|
"epoch": 4.285714285714286,
|
|
"grad_norm": 0.15988426257142174,
|
|
"learning_rate": 2.4704310549551934e-06,
|
|
"loss": 1.0443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2495417445898056,
|
|
"step": 1007,
|
|
"valid_targets_mean": 15003.7,
|
|
"valid_targets_min": 12787
|
|
},
|
|
{
|
|
"epoch": 4.28997867803838,
|
|
"grad_norm": 0.1649193232476725,
|
|
"learning_rate": 2.441889946320266e-06,
|
|
"loss": 0.9922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27028751373291016,
|
|
"step": 1008,
|
|
"valid_targets_mean": 16140.7,
|
|
"valid_targets_min": 15389
|
|
},
|
|
{
|
|
"epoch": 4.294243070362473,
|
|
"grad_norm": 0.16539209263258192,
|
|
"learning_rate": 2.4135039431924233e-06,
|
|
"loss": 1.0159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3285196125507355,
|
|
"step": 1009,
|
|
"valid_targets_mean": 16154.0,
|
|
"valid_targets_min": 15556
|
|
},
|
|
{
|
|
"epoch": 4.298507462686567,
|
|
"grad_norm": 0.15009629164504895,
|
|
"learning_rate": 2.3852732963290426e-06,
|
|
"loss": 0.9767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22692888975143433,
|
|
"step": 1010,
|
|
"valid_targets_mean": 15455.0,
|
|
"valid_targets_min": 13975
|
|
},
|
|
{
|
|
"epoch": 4.302771855010661,
|
|
"grad_norm": 0.17494261785315382,
|
|
"learning_rate": 2.3571982551150853e-06,
|
|
"loss": 1.0086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2918718755245209,
|
|
"step": 1011,
|
|
"valid_targets_mean": 16222.7,
|
|
"valid_targets_min": 15806
|
|
},
|
|
{
|
|
"epoch": 4.3070362473347545,
|
|
"grad_norm": 0.16531887286752694,
|
|
"learning_rate": 2.329279067560937e-06,
|
|
"loss": 1.018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19952315092086792,
|
|
"step": 1012,
|
|
"valid_targets_mean": 11263.8,
|
|
"valid_targets_min": 2852
|
|
},
|
|
{
|
|
"epoch": 4.311300639658849,
|
|
"grad_norm": 0.15243334089024038,
|
|
"learning_rate": 2.301515980300182e-06,
|
|
"loss": 0.9703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24621377885341644,
|
|
"step": 1013,
|
|
"valid_targets_mean": 15904.6,
|
|
"valid_targets_min": 14269
|
|
},
|
|
{
|
|
"epoch": 4.315565031982943,
|
|
"grad_norm": 0.1696156362286878,
|
|
"learning_rate": 2.2739092385874527e-06,
|
|
"loss": 1.0225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31331756711006165,
|
|
"step": 1014,
|
|
"valid_targets_mean": 15890.2,
|
|
"valid_targets_min": 13080
|
|
},
|
|
{
|
|
"epoch": 4.319829424307036,
|
|
"grad_norm": 0.16944189227686046,
|
|
"learning_rate": 2.2464590862962443e-06,
|
|
"loss": 1.0715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15785104036331177,
|
|
"step": 1015,
|
|
"valid_targets_mean": 9874.5,
|
|
"valid_targets_min": 1763
|
|
},
|
|
{
|
|
"epoch": 4.32409381663113,
|
|
"grad_norm": 0.15163721169159744,
|
|
"learning_rate": 2.219165765916769e-06,
|
|
"loss": 0.9763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25100529193878174,
|
|
"step": 1016,
|
|
"valid_targets_mean": 16141.7,
|
|
"valid_targets_min": 15225
|
|
},
|
|
{
|
|
"epoch": 4.3283582089552235,
|
|
"grad_norm": 0.16139062458464656,
|
|
"learning_rate": 2.192029518553798e-06,
|
|
"loss": 1.0176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27301234006881714,
|
|
"step": 1017,
|
|
"valid_targets_mean": 16163.5,
|
|
"valid_targets_min": 15023
|
|
},
|
|
{
|
|
"epoch": 4.332622601279318,
|
|
"grad_norm": 0.16698422619677605,
|
|
"learning_rate": 2.165050583924566e-06,
|
|
"loss": 1.0073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17534929513931274,
|
|
"step": 1018,
|
|
"valid_targets_mean": 10685.0,
|
|
"valid_targets_min": 8089
|
|
},
|
|
{
|
|
"epoch": 4.336886993603412,
|
|
"grad_norm": 0.15697779416050714,
|
|
"learning_rate": 2.1382292003566163e-06,
|
|
"loss": 0.9786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26187610626220703,
|
|
"step": 1019,
|
|
"valid_targets_mean": 16124.3,
|
|
"valid_targets_min": 14958
|
|
},
|
|
{
|
|
"epoch": 4.341151385927505,
|
|
"grad_norm": 0.16866112206724562,
|
|
"learning_rate": 2.1115656047857213e-06,
|
|
"loss": 1.0342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31126171350479126,
|
|
"step": 1020,
|
|
"valid_targets_mean": 16112.7,
|
|
"valid_targets_min": 14259
|
|
},
|
|
{
|
|
"epoch": 4.345415778251599,
|
|
"grad_norm": 0.15065213807725636,
|
|
"learning_rate": 2.0850600327537806e-06,
|
|
"loss": 1.0104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22538825869560242,
|
|
"step": 1021,
|
|
"valid_targets_mean": 14805.6,
|
|
"valid_targets_min": 12847
|
|
},
|
|
{
|
|
"epoch": 4.349680170575693,
|
|
"grad_norm": 0.16502741819996938,
|
|
"learning_rate": 2.058712718406719e-06,
|
|
"loss": 0.9824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26563185453414917,
|
|
"step": 1022,
|
|
"valid_targets_mean": 16220.5,
|
|
"valid_targets_min": 15492
|
|
},
|
|
{
|
|
"epoch": 4.353944562899787,
|
|
"grad_norm": 0.1573403450572898,
|
|
"learning_rate": 2.032523894492471e-06,
|
|
"loss": 0.9938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2223053276538849,
|
|
"step": 1023,
|
|
"valid_targets_mean": 13809.1,
|
|
"valid_targets_min": 1400
|
|
},
|
|
{
|
|
"epoch": 4.358208955223881,
|
|
"grad_norm": 0.15014771494971305,
|
|
"learning_rate": 2.0064937923588634e-06,
|
|
"loss": 0.9892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24883639812469482,
|
|
"step": 1024,
|
|
"valid_targets_mean": 15233.9,
|
|
"valid_targets_min": 13301
|
|
},
|
|
{
|
|
"epoch": 4.362473347547974,
|
|
"grad_norm": 0.17815349436266908,
|
|
"learning_rate": 1.9806226419516195e-06,
|
|
"loss": 1.0345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33343401551246643,
|
|
"step": 1025,
|
|
"valid_targets_mean": 16119.5,
|
|
"valid_targets_min": 15337
|
|
},
|
|
{
|
|
"epoch": 4.366737739872068,
|
|
"grad_norm": 0.1571520268395417,
|
|
"learning_rate": 1.954910671812298e-06,
|
|
"loss": 0.9604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16843195259571075,
|
|
"step": 1026,
|
|
"valid_targets_mean": 10671.9,
|
|
"valid_targets_min": 1922
|
|
},
|
|
{
|
|
"epoch": 4.371002132196162,
|
|
"grad_norm": 0.1600877305624945,
|
|
"learning_rate": 1.9293581090762894e-06,
|
|
"loss": 0.9774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25779786705970764,
|
|
"step": 1027,
|
|
"valid_targets_mean": 16111.6,
|
|
"valid_targets_min": 15024
|
|
},
|
|
{
|
|
"epoch": 4.3752665245202556,
|
|
"grad_norm": 0.1640378891646234,
|
|
"learning_rate": 1.9039651794708058e-06,
|
|
"loss": 1.0046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30417150259017944,
|
|
"step": 1028,
|
|
"valid_targets_mean": 16166.4,
|
|
"valid_targets_min": 15153
|
|
},
|
|
{
|
|
"epoch": 4.37953091684435,
|
|
"grad_norm": 0.1592978210094137,
|
|
"learning_rate": 1.8787321073128817e-06,
|
|
"loss": 1.0003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1675044298171997,
|
|
"step": 1029,
|
|
"valid_targets_mean": 10813.5,
|
|
"valid_targets_min": 7229
|
|
},
|
|
{
|
|
"epoch": 4.383795309168444,
|
|
"grad_norm": 0.15196142418764982,
|
|
"learning_rate": 1.8536591155073958e-06,
|
|
"loss": 1.0349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26272517442703247,
|
|
"step": 1030,
|
|
"valid_targets_mean": 16132.2,
|
|
"valid_targets_min": 13998
|
|
},
|
|
{
|
|
"epoch": 4.388059701492537,
|
|
"grad_norm": 0.16137244722688085,
|
|
"learning_rate": 1.8287464255451181e-06,
|
|
"loss": 1.0366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31955209374427795,
|
|
"step": 1031,
|
|
"valid_targets_mean": 16103.5,
|
|
"valid_targets_min": 15221
|
|
},
|
|
{
|
|
"epoch": 4.392324093816631,
|
|
"grad_norm": 0.1560253877137604,
|
|
"learning_rate": 1.803994257500714e-06,
|
|
"loss": 1.0578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22638271749019623,
|
|
"step": 1032,
|
|
"valid_targets_mean": 12657.4,
|
|
"valid_targets_min": 10184
|
|
},
|
|
{
|
|
"epoch": 4.396588486140725,
|
|
"grad_norm": 0.1592937648378557,
|
|
"learning_rate": 1.7794028300308474e-06,
|
|
"loss": 0.97,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27584147453308105,
|
|
"step": 1033,
|
|
"valid_targets_mean": 16116.2,
|
|
"valid_targets_min": 14473
|
|
},
|
|
{
|
|
"epoch": 4.400852878464819,
|
|
"grad_norm": 0.15663526905175879,
|
|
"learning_rate": 1.7549723603722003e-06,
|
|
"loss": 0.9841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26768210530281067,
|
|
"step": 1034,
|
|
"valid_targets_mean": 16112.7,
|
|
"valid_targets_min": 13108
|
|
},
|
|
{
|
|
"epoch": 4.405117270788913,
|
|
"grad_norm": 0.15097542648465498,
|
|
"learning_rate": 1.730703064339605e-06,
|
|
"loss": 1.0184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2370276004076004,
|
|
"step": 1035,
|
|
"valid_targets_mean": 15234.6,
|
|
"valid_targets_min": 13039
|
|
},
|
|
{
|
|
"epoch": 4.409381663113006,
|
|
"grad_norm": 0.16244715692164918,
|
|
"learning_rate": 1.7065951563241022e-06,
|
|
"loss": 1.013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2689659297466278,
|
|
"step": 1036,
|
|
"valid_targets_mean": 16181.4,
|
|
"valid_targets_min": 15559
|
|
},
|
|
{
|
|
"epoch": 4.4136460554371,
|
|
"grad_norm": 0.15813153445282555,
|
|
"learning_rate": 1.682648849291051e-06,
|
|
"loss": 1.0179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20376157760620117,
|
|
"step": 1037,
|
|
"valid_targets_mean": 11859.0,
|
|
"valid_targets_min": 1729
|
|
},
|
|
{
|
|
"epoch": 4.417910447761194,
|
|
"grad_norm": 0.1510387947396715,
|
|
"learning_rate": 1.6588643547782579e-06,
|
|
"loss": 0.9837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26324427127838135,
|
|
"step": 1038,
|
|
"valid_targets_mean": 16072.0,
|
|
"valid_targets_min": 14287
|
|
},
|
|
{
|
|
"epoch": 4.422174840085288,
|
|
"grad_norm": 0.1631530373592602,
|
|
"learning_rate": 1.6352418828941052e-06,
|
|
"loss": 1.021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30005693435668945,
|
|
"step": 1039,
|
|
"valid_targets_mean": 16157.0,
|
|
"valid_targets_min": 15147
|
|
},
|
|
{
|
|
"epoch": 4.426439232409382,
|
|
"grad_norm": 0.1653269557190585,
|
|
"learning_rate": 1.6117816423156952e-06,
|
|
"loss": 1.0062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1579434871673584,
|
|
"step": 1040,
|
|
"valid_targets_mean": 8889.2,
|
|
"valid_targets_min": 2228
|
|
},
|
|
{
|
|
"epoch": 4.430703624733475,
|
|
"grad_norm": 0.14601140512061164,
|
|
"learning_rate": 1.5884838402870029e-06,
|
|
"loss": 0.9856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24858561158180237,
|
|
"step": 1041,
|
|
"valid_targets_mean": 15932.7,
|
|
"valid_targets_min": 9286
|
|
},
|
|
{
|
|
"epoch": 4.434968017057569,
|
|
"grad_norm": 0.1592912536183881,
|
|
"learning_rate": 1.5653486826170384e-06,
|
|
"loss": 1.01,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.284557580947876,
|
|
"step": 1042,
|
|
"valid_targets_mean": 16150.2,
|
|
"valid_targets_min": 14556
|
|
},
|
|
{
|
|
"epoch": 4.439232409381663,
|
|
"grad_norm": 0.14634296882098108,
|
|
"learning_rate": 1.5423763736780583e-06,
|
|
"loss": 0.9726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19540275633335114,
|
|
"step": 1043,
|
|
"valid_targets_mean": 11303.3,
|
|
"valid_targets_min": 8757
|
|
},
|
|
{
|
|
"epoch": 4.443496801705757,
|
|
"grad_norm": 0.1606028911978746,
|
|
"learning_rate": 1.5195671164037173e-06,
|
|
"loss": 1.0075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26238173246383667,
|
|
"step": 1044,
|
|
"valid_targets_mean": 16184.0,
|
|
"valid_targets_min": 15290
|
|
},
|
|
{
|
|
"epoch": 4.447761194029851,
|
|
"grad_norm": 0.15880887563413223,
|
|
"learning_rate": 1.496921112287315e-06,
|
|
"loss": 1.0156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28872594237327576,
|
|
"step": 1045,
|
|
"valid_targets_mean": 16116.8,
|
|
"valid_targets_min": 14479
|
|
},
|
|
{
|
|
"epoch": 4.452025586353945,
|
|
"grad_norm": 0.1503886439979893,
|
|
"learning_rate": 1.4744385613799894e-06,
|
|
"loss": 0.991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23140674829483032,
|
|
"step": 1046,
|
|
"valid_targets_mean": 15366.7,
|
|
"valid_targets_min": 14043
|
|
},
|
|
{
|
|
"epoch": 4.456289978678038,
|
|
"grad_norm": 0.15398807665192815,
|
|
"learning_rate": 1.4521196622889644e-06,
|
|
"loss": 1.0175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28529155254364014,
|
|
"step": 1047,
|
|
"valid_targets_mean": 16143.6,
|
|
"valid_targets_min": 15408
|
|
},
|
|
{
|
|
"epoch": 4.460554371002132,
|
|
"grad_norm": 0.15846972825741604,
|
|
"learning_rate": 1.4299646121757892e-06,
|
|
"loss": 0.9533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21494928002357483,
|
|
"step": 1048,
|
|
"valid_targets_mean": 12775.2,
|
|
"valid_targets_min": 1665
|
|
},
|
|
{
|
|
"epoch": 4.464818763326226,
|
|
"grad_norm": 0.14883790923907791,
|
|
"learning_rate": 1.4079736067545912e-06,
|
|
"loss": 1.0303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26430243253707886,
|
|
"step": 1049,
|
|
"valid_targets_mean": 15916.8,
|
|
"valid_targets_min": 13208
|
|
},
|
|
{
|
|
"epoch": 4.46908315565032,
|
|
"grad_norm": 0.15949849865118698,
|
|
"learning_rate": 1.3861468402903634e-06,
|
|
"loss": 1.0528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3019446134567261,
|
|
"step": 1050,
|
|
"valid_targets_mean": 15767.2,
|
|
"valid_targets_min": 7641
|
|
},
|
|
{
|
|
"epoch": 4.473347547974414,
|
|
"grad_norm": 0.16006328824608898,
|
|
"learning_rate": 1.3644845055972322e-06,
|
|
"loss": 1.0067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17121657729148865,
|
|
"step": 1051,
|
|
"valid_targets_mean": 9511.8,
|
|
"valid_targets_min": 1923
|
|
},
|
|
{
|
|
"epoch": 4.477611940298507,
|
|
"grad_norm": 0.1520066722967756,
|
|
"learning_rate": 1.3429867940367626e-06,
|
|
"loss": 1.0094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25051623582839966,
|
|
"step": 1052,
|
|
"valid_targets_mean": 16088.7,
|
|
"valid_targets_min": 14698
|
|
},
|
|
{
|
|
"epoch": 4.481876332622601,
|
|
"grad_norm": 0.15543351987099552,
|
|
"learning_rate": 1.321653895516264e-06,
|
|
"loss": 1.058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3083563446998596,
|
|
"step": 1053,
|
|
"valid_targets_mean": 16089.3,
|
|
"valid_targets_min": 14313
|
|
},
|
|
{
|
|
"epoch": 4.486140724946695,
|
|
"grad_norm": 0.15500162373470328,
|
|
"learning_rate": 1.3004859984871199e-06,
|
|
"loss": 1.0584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1911185383796692,
|
|
"step": 1054,
|
|
"valid_targets_mean": 10396.3,
|
|
"valid_targets_min": 6721
|
|
},
|
|
{
|
|
"epoch": 4.490405117270789,
|
|
"grad_norm": 0.15723817636653856,
|
|
"learning_rate": 1.279483289943102e-06,
|
|
"loss": 1.0568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.262037068605423,
|
|
"step": 1055,
|
|
"valid_targets_mean": 16159.7,
|
|
"valid_targets_min": 15182
|
|
},
|
|
{
|
|
"epoch": 4.494669509594883,
|
|
"grad_norm": 0.16006229679549797,
|
|
"learning_rate": 1.2586459554187558e-06,
|
|
"loss": 0.9644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28707319498062134,
|
|
"step": 1056,
|
|
"valid_targets_mean": 16055.6,
|
|
"valid_targets_min": 11563
|
|
},
|
|
{
|
|
"epoch": 4.498933901918977,
|
|
"grad_norm": 0.14334699676474721,
|
|
"learning_rate": 1.2379741789877175e-06,
|
|
"loss": 0.9871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2111390084028244,
|
|
"step": 1057,
|
|
"valid_targets_mean": 13391.1,
|
|
"valid_targets_min": 11725
|
|
},
|
|
{
|
|
"epoch": 4.50319829424307,
|
|
"grad_norm": 0.1530149837424847,
|
|
"learning_rate": 1.2174681432611245e-06,
|
|
"loss": 1.0221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3102961480617523,
|
|
"step": 1058,
|
|
"valid_targets_mean": 16118.4,
|
|
"valid_targets_min": 15359
|
|
},
|
|
{
|
|
"epoch": 4.507462686567164,
|
|
"grad_norm": 0.1666265526562195,
|
|
"learning_rate": 1.1971280293859811e-06,
|
|
"loss": 1.0477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2946819067001343,
|
|
"step": 1059,
|
|
"valid_targets_mean": 16146.3,
|
|
"valid_targets_min": 15305
|
|
},
|
|
{
|
|
"epoch": 4.5117270788912585,
|
|
"grad_norm": 0.1457230775806161,
|
|
"learning_rate": 1.17695401704357e-06,
|
|
"loss": 1.0072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25192567706108093,
|
|
"step": 1060,
|
|
"valid_targets_mean": 15245.5,
|
|
"valid_targets_min": 13728
|
|
},
|
|
{
|
|
"epoch": 4.515991471215352,
|
|
"grad_norm": 0.1620480528171536,
|
|
"learning_rate": 1.1569462844478552e-06,
|
|
"loss": 0.9563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2645701766014099,
|
|
"step": 1061,
|
|
"valid_targets_mean": 16195.4,
|
|
"valid_targets_min": 15305
|
|
},
|
|
{
|
|
"epoch": 4.520255863539446,
|
|
"grad_norm": 0.16142683707754352,
|
|
"learning_rate": 1.1371050083439107e-06,
|
|
"loss": 1.0315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19992002844810486,
|
|
"step": 1062,
|
|
"valid_targets_mean": 10561.4,
|
|
"valid_targets_min": 1369
|
|
},
|
|
{
|
|
"epoch": 4.524520255863539,
|
|
"grad_norm": 0.14622862418070282,
|
|
"learning_rate": 1.1174303640063622e-06,
|
|
"loss": 1.0269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2594802975654602,
|
|
"step": 1063,
|
|
"valid_targets_mean": 16008.1,
|
|
"valid_targets_min": 15016
|
|
},
|
|
{
|
|
"epoch": 4.528784648187633,
|
|
"grad_norm": 0.1582067915541115,
|
|
"learning_rate": 1.097922525237849e-06,
|
|
"loss": 0.9584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27360403537750244,
|
|
"step": 1064,
|
|
"valid_targets_mean": 16250.7,
|
|
"valid_targets_min": 15657
|
|
},
|
|
{
|
|
"epoch": 4.533049040511727,
|
|
"grad_norm": 0.15399864010070974,
|
|
"learning_rate": 1.078581664367455e-06,
|
|
"loss": 1.0058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1768619269132614,
|
|
"step": 1065,
|
|
"valid_targets_mean": 9856.1,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 4.537313432835821,
|
|
"grad_norm": 0.15167944596731006,
|
|
"learning_rate": 1.0594079522492274e-06,
|
|
"loss": 0.9306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25108233094215393,
|
|
"step": 1066,
|
|
"valid_targets_mean": 16141.2,
|
|
"valid_targets_min": 14841
|
|
},
|
|
{
|
|
"epoch": 4.541577825159915,
|
|
"grad_norm": 0.16155044487851364,
|
|
"learning_rate": 1.040401558260633e-06,
|
|
"loss": 1.0514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29427772760391235,
|
|
"step": 1067,
|
|
"valid_targets_mean": 16152.6,
|
|
"valid_targets_min": 15628
|
|
},
|
|
{
|
|
"epoch": 4.545842217484008,
|
|
"grad_norm": 0.1608449787433378,
|
|
"learning_rate": 1.0215626503010911e-06,
|
|
"loss": 1.032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21294349431991577,
|
|
"step": 1068,
|
|
"valid_targets_mean": 12159.6,
|
|
"valid_targets_min": 10098
|
|
},
|
|
{
|
|
"epoch": 4.550106609808102,
|
|
"grad_norm": 0.14555507264167167,
|
|
"learning_rate": 1.002891394790475e-06,
|
|
"loss": 1.0014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27116233110427856,
|
|
"step": 1069,
|
|
"valid_targets_mean": 16114.3,
|
|
"valid_targets_min": 15154
|
|
},
|
|
{
|
|
"epoch": 4.554371002132196,
|
|
"grad_norm": 0.15514665887054913,
|
|
"learning_rate": 9.843879566676273e-07,
|
|
"loss": 1.0051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2847168445587158,
|
|
"step": 1070,
|
|
"valid_targets_mean": 16146.2,
|
|
"valid_targets_min": 15081
|
|
},
|
|
{
|
|
"epoch": 4.55863539445629,
|
|
"grad_norm": 0.14935522772969523,
|
|
"learning_rate": 9.660524993889386e-07,
|
|
"loss": 1.0106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22059902548789978,
|
|
"step": 1071,
|
|
"valid_targets_mean": 14322.9,
|
|
"valid_targets_min": 12453
|
|
},
|
|
{
|
|
"epoch": 4.562899786780384,
|
|
"grad_norm": 0.1508180934872635,
|
|
"learning_rate": 9.478851849268733e-07,
|
|
"loss": 0.9695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26452839374542236,
|
|
"step": 1072,
|
|
"valid_targets_mean": 16183.2,
|
|
"valid_targets_min": 15361
|
|
},
|
|
{
|
|
"epoch": 4.567164179104478,
|
|
"grad_norm": 0.15771190777876098,
|
|
"learning_rate": 9.298861737685527e-07,
|
|
"loss": 0.9916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22417476773262024,
|
|
"step": 1073,
|
|
"valid_targets_mean": 13273.8,
|
|
"valid_targets_min": 1961
|
|
},
|
|
{
|
|
"epoch": 4.571428571428571,
|
|
"grad_norm": 0.14841479834324803,
|
|
"learning_rate": 9.120556249143341e-07,
|
|
"loss": 1.0185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2562493681907654,
|
|
"step": 1074,
|
|
"valid_targets_mean": 15460.6,
|
|
"valid_targets_min": 13041
|
|
},
|
|
{
|
|
"epoch": 4.575692963752665,
|
|
"grad_norm": 0.15977629857912234,
|
|
"learning_rate": 8.943936958763988e-07,
|
|
"loss": 1.0483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3335935175418854,
|
|
"step": 1075,
|
|
"valid_targets_mean": 16153.5,
|
|
"valid_targets_min": 15474
|
|
},
|
|
{
|
|
"epoch": 4.5799573560767595,
|
|
"grad_norm": 0.16477286952064188,
|
|
"learning_rate": 8.769005426773836e-07,
|
|
"loss": 1.0093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15570859611034393,
|
|
"step": 1076,
|
|
"valid_targets_mean": 8296.6,
|
|
"valid_targets_min": 1759
|
|
},
|
|
{
|
|
"epoch": 4.584221748400853,
|
|
"grad_norm": 0.14398661458138798,
|
|
"learning_rate": 8.595763198489714e-07,
|
|
"loss": 0.991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25216251611709595,
|
|
"step": 1077,
|
|
"valid_targets_mean": 15818.3,
|
|
"valid_targets_min": 7637
|
|
},
|
|
{
|
|
"epoch": 4.588486140724947,
|
|
"grad_norm": 0.15797679062016337,
|
|
"learning_rate": 8.42421180430546e-07,
|
|
"loss": 1.0011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2939158082008362,
|
|
"step": 1078,
|
|
"valid_targets_mean": 16082.0,
|
|
"valid_targets_min": 14558
|
|
},
|
|
{
|
|
"epoch": 4.59275053304904,
|
|
"grad_norm": 0.15853786185199106,
|
|
"learning_rate": 8.254352759678386e-07,
|
|
"loss": 1.0096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1950775682926178,
|
|
"step": 1079,
|
|
"valid_targets_mean": 10988.8,
|
|
"valid_targets_min": 7524
|
|
},
|
|
{
|
|
"epoch": 4.597014925373134,
|
|
"grad_norm": 0.15315161643048186,
|
|
"learning_rate": 8.086187565115877e-07,
|
|
"loss": 1.0579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27116912603378296,
|
|
"step": 1080,
|
|
"valid_targets_mean": 16105.0,
|
|
"valid_targets_min": 14248
|
|
},
|
|
{
|
|
"epoch": 4.601279317697228,
|
|
"grad_norm": 0.15729652259472846,
|
|
"learning_rate": 7.919717706162067e-07,
|
|
"loss": 1.0249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.289058119058609,
|
|
"step": 1081,
|
|
"valid_targets_mean": 16170.6,
|
|
"valid_targets_min": 15348
|
|
},
|
|
{
|
|
"epoch": 4.605543710021322,
|
|
"grad_norm": 0.15299507799822196,
|
|
"learning_rate": 7.754944653384777e-07,
|
|
"loss": 0.9961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2105131447315216,
|
|
"step": 1082,
|
|
"valid_targets_mean": 13349.8,
|
|
"valid_targets_min": 10099
|
|
},
|
|
{
|
|
"epoch": 4.609808102345416,
|
|
"grad_norm": 0.15991928398901403,
|
|
"learning_rate": 7.591869862362534e-07,
|
|
"loss": 1.0078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26621466875076294,
|
|
"step": 1083,
|
|
"valid_targets_mean": 16033.9,
|
|
"valid_targets_min": 13427
|
|
},
|
|
{
|
|
"epoch": 4.61407249466951,
|
|
"grad_norm": 0.15354881551455993,
|
|
"learning_rate": 7.430494773671682e-07,
|
|
"loss": 0.9676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30778414011001587,
|
|
"step": 1084,
|
|
"valid_targets_mean": 16172.6,
|
|
"valid_targets_min": 15109
|
|
},
|
|
{
|
|
"epoch": 4.618336886993603,
|
|
"grad_norm": 0.1743479691706049,
|
|
"learning_rate": 7.270820812873714e-07,
|
|
"loss": 1.0087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2505803108215332,
|
|
"step": 1085,
|
|
"valid_targets_mean": 14830.2,
|
|
"valid_targets_min": 12981
|
|
},
|
|
{
|
|
"epoch": 4.622601279317697,
|
|
"grad_norm": 0.15448738581055893,
|
|
"learning_rate": 7.112849390502563e-07,
|
|
"loss": 0.9816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26799455285072327,
|
|
"step": 1086,
|
|
"valid_targets_mean": 16209.5,
|
|
"valid_targets_min": 15350
|
|
},
|
|
{
|
|
"epoch": 4.6268656716417915,
|
|
"grad_norm": 0.15706645950649392,
|
|
"learning_rate": 6.956581902052306e-07,
|
|
"loss": 1.0225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20473934710025787,
|
|
"step": 1087,
|
|
"valid_targets_mean": 11744.4,
|
|
"valid_targets_min": 2102
|
|
},
|
|
{
|
|
"epoch": 4.631130063965885,
|
|
"grad_norm": 0.143365373014936,
|
|
"learning_rate": 6.802019727964593e-07,
|
|
"loss": 0.9888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2392513006925583,
|
|
"step": 1088,
|
|
"valid_targets_mean": 16153.6,
|
|
"valid_targets_min": 15511
|
|
},
|
|
{
|
|
"epoch": 4.635394456289979,
|
|
"grad_norm": 0.15945068466229714,
|
|
"learning_rate": 6.64916423361679e-07,
|
|
"loss": 1.0181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2937304973602295,
|
|
"step": 1089,
|
|
"valid_targets_mean": 16096.2,
|
|
"valid_targets_min": 15122
|
|
},
|
|
{
|
|
"epoch": 4.639658848614072,
|
|
"grad_norm": 0.1591581081828491,
|
|
"learning_rate": 6.498016769309567e-07,
|
|
"loss": 1.0082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1473933458328247,
|
|
"step": 1090,
|
|
"valid_targets_mean": 8071.3,
|
|
"valid_targets_min": 2493
|
|
},
|
|
{
|
|
"epoch": 4.643923240938166,
|
|
"grad_norm": 0.13705684735753473,
|
|
"learning_rate": 6.348578670255224e-07,
|
|
"loss": 1.0004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2415798306465149,
|
|
"step": 1091,
|
|
"valid_targets_mean": 16186.2,
|
|
"valid_targets_min": 15270
|
|
},
|
|
{
|
|
"epoch": 4.6481876332622605,
|
|
"grad_norm": 0.14894995926384885,
|
|
"learning_rate": 6.200851256565799e-07,
|
|
"loss": 1.0026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29065680503845215,
|
|
"step": 1092,
|
|
"valid_targets_mean": 15866.9,
|
|
"valid_targets_min": 5993
|
|
},
|
|
{
|
|
"epoch": 4.652452025586354,
|
|
"grad_norm": 0.14628259051610237,
|
|
"learning_rate": 6.054835833241357e-07,
|
|
"loss": 0.9657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20221902430057526,
|
|
"step": 1093,
|
|
"valid_targets_mean": 13206.6,
|
|
"valid_targets_min": 10096
|
|
},
|
|
{
|
|
"epoch": 4.656716417910448,
|
|
"grad_norm": 0.1465313831298571,
|
|
"learning_rate": 5.910533690158593e-07,
|
|
"loss": 1.0124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25688618421554565,
|
|
"step": 1094,
|
|
"valid_targets_mean": 16232.6,
|
|
"valid_targets_min": 15734
|
|
},
|
|
{
|
|
"epoch": 4.660980810234541,
|
|
"grad_norm": 0.14879824830331395,
|
|
"learning_rate": 5.767946102059307e-07,
|
|
"loss": 0.9861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.276365727186203,
|
|
"step": 1095,
|
|
"valid_targets_mean": 16205.2,
|
|
"valid_targets_min": 15637
|
|
},
|
|
{
|
|
"epoch": 4.665245202558635,
|
|
"grad_norm": 0.14986577980104723,
|
|
"learning_rate": 5.627074328539173e-07,
|
|
"loss": 1.0086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22488278150558472,
|
|
"step": 1096,
|
|
"valid_targets_mean": 13320.2,
|
|
"valid_targets_min": 11667
|
|
},
|
|
{
|
|
"epoch": 4.669509594882729,
|
|
"grad_norm": 0.15263641354366045,
|
|
"learning_rate": 5.487919614036741e-07,
|
|
"loss": 1.0345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2853548526763916,
|
|
"step": 1097,
|
|
"valid_targets_mean": 16139.1,
|
|
"valid_targets_min": 14753
|
|
},
|
|
{
|
|
"epoch": 4.673773987206823,
|
|
"grad_norm": 0.15304900679796296,
|
|
"learning_rate": 5.350483187822231e-07,
|
|
"loss": 1.03,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25601962208747864,
|
|
"step": 1098,
|
|
"valid_targets_mean": 12700.3,
|
|
"valid_targets_min": 1472
|
|
},
|
|
{
|
|
"epoch": 4.678038379530917,
|
|
"grad_norm": 0.13864305612866518,
|
|
"learning_rate": 5.214766263986848e-07,
|
|
"loss": 1.0247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2643899917602539,
|
|
"step": 1099,
|
|
"valid_targets_mean": 15921.6,
|
|
"valid_targets_min": 15264
|
|
},
|
|
{
|
|
"epoch": 4.682302771855011,
|
|
"grad_norm": 0.1585008120687658,
|
|
"learning_rate": 5.080770041431926e-07,
|
|
"loss": 0.9869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2867545485496521,
|
|
"step": 1100,
|
|
"valid_targets_mean": 16070.0,
|
|
"valid_targets_min": 13688
|
|
},
|
|
{
|
|
"epoch": 4.686567164179104,
|
|
"grad_norm": 0.15796797836556023,
|
|
"learning_rate": 4.948495703858492e-07,
|
|
"loss": 0.9744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14525067806243896,
|
|
"step": 1101,
|
|
"valid_targets_mean": 8999.8,
|
|
"valid_targets_min": 1416
|
|
},
|
|
{
|
|
"epoch": 4.690831556503198,
|
|
"grad_norm": 0.14332859110714966,
|
|
"learning_rate": 4.81794441975667e-07,
|
|
"loss": 1.0161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26003924012184143,
|
|
"step": 1102,
|
|
"valid_targets_mean": 16092.8,
|
|
"valid_targets_min": 14994
|
|
},
|
|
{
|
|
"epoch": 4.6950959488272925,
|
|
"grad_norm": 0.1627151240096481,
|
|
"learning_rate": 4.689117342395388e-07,
|
|
"loss": 0.9894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27263838052749634,
|
|
"step": 1103,
|
|
"valid_targets_mean": 16196.6,
|
|
"valid_targets_min": 15475
|
|
},
|
|
{
|
|
"epoch": 4.699360341151386,
|
|
"grad_norm": 0.15728636158959014,
|
|
"learning_rate": 4.5620156098122204e-07,
|
|
"loss": 1.0145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19399403035640717,
|
|
"step": 1104,
|
|
"valid_targets_mean": 10582.8,
|
|
"valid_targets_min": 5279
|
|
},
|
|
{
|
|
"epoch": 4.70362473347548,
|
|
"grad_norm": 0.14868122226793565,
|
|
"learning_rate": 4.4366403448033334e-07,
|
|
"loss": 1.024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25753432512283325,
|
|
"step": 1105,
|
|
"valid_targets_mean": 16152.5,
|
|
"valid_targets_min": 15082
|
|
},
|
|
{
|
|
"epoch": 4.707889125799573,
|
|
"grad_norm": 0.1556625193647965,
|
|
"learning_rate": 4.3129926549136057e-07,
|
|
"loss": 1.0442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2863929867744446,
|
|
"step": 1106,
|
|
"valid_targets_mean": 16147.2,
|
|
"valid_targets_min": 15112
|
|
},
|
|
{
|
|
"epoch": 4.712153518123667,
|
|
"grad_norm": 0.14446145619265383,
|
|
"learning_rate": 4.191073632426701e-07,
|
|
"loss": 0.9751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21357882022857666,
|
|
"step": 1107,
|
|
"valid_targets_mean": 13378.2,
|
|
"valid_targets_min": 10566
|
|
},
|
|
{
|
|
"epoch": 4.7164179104477615,
|
|
"grad_norm": 0.16206675648159802,
|
|
"learning_rate": 4.0708843543555643e-07,
|
|
"loss": 0.9997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2900570034980774,
|
|
"step": 1108,
|
|
"valid_targets_mean": 15993.5,
|
|
"valid_targets_min": 10506
|
|
},
|
|
{
|
|
"epoch": 4.720682302771855,
|
|
"grad_norm": 0.15101141923399938,
|
|
"learning_rate": 3.95242588243292e-07,
|
|
"loss": 0.9856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2901330888271332,
|
|
"step": 1109,
|
|
"valid_targets_mean": 16143.4,
|
|
"valid_targets_min": 15280
|
|
},
|
|
{
|
|
"epoch": 4.724946695095949,
|
|
"grad_norm": 0.14621637188831488,
|
|
"learning_rate": 3.8356992631017e-07,
|
|
"loss": 1.0324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23136021196842194,
|
|
"step": 1110,
|
|
"valid_targets_mean": 15238.5,
|
|
"valid_targets_min": 13159
|
|
},
|
|
{
|
|
"epoch": 4.729211087420042,
|
|
"grad_norm": 0.1524386161010507,
|
|
"learning_rate": 3.720705527506008e-07,
|
|
"loss": 1.0195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30147069692611694,
|
|
"step": 1111,
|
|
"valid_targets_mean": 16091.4,
|
|
"valid_targets_min": 14723
|
|
},
|
|
{
|
|
"epoch": 4.733475479744136,
|
|
"grad_norm": 0.1619679827146892,
|
|
"learning_rate": 3.60744569148197e-07,
|
|
"loss": 1.066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20357254147529602,
|
|
"step": 1112,
|
|
"valid_targets_mean": 11411.5,
|
|
"valid_targets_min": 2225
|
|
},
|
|
{
|
|
"epoch": 4.73773987206823,
|
|
"grad_norm": 0.14349005246341898,
|
|
"learning_rate": 3.4959207555485873e-07,
|
|
"loss": 1.0372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27663302421569824,
|
|
"step": 1113,
|
|
"valid_targets_mean": 15943.1,
|
|
"valid_targets_min": 14159
|
|
},
|
|
{
|
|
"epoch": 4.742004264392325,
|
|
"grad_norm": 0.16716344154424714,
|
|
"learning_rate": 3.3861317048992317e-07,
|
|
"loss": 1.04,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3008594214916229,
|
|
"step": 1114,
|
|
"valid_targets_mean": 16178.0,
|
|
"valid_targets_min": 15181
|
|
},
|
|
{
|
|
"epoch": 4.746268656716418,
|
|
"grad_norm": 0.14277538969640619,
|
|
"learning_rate": 3.278079509392562e-07,
|
|
"loss": 0.97,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17491425573825836,
|
|
"step": 1115,
|
|
"valid_targets_mean": 9657.3,
|
|
"valid_targets_min": 3062
|
|
},
|
|
{
|
|
"epoch": 4.750533049040512,
|
|
"grad_norm": 0.1411692924943566,
|
|
"learning_rate": 3.171765123544224e-07,
|
|
"loss": 1.0159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25507792830467224,
|
|
"step": 1116,
|
|
"valid_targets_mean": 16089.0,
|
|
"valid_targets_min": 13436
|
|
},
|
|
{
|
|
"epoch": 4.754797441364605,
|
|
"grad_norm": 0.15818532876299207,
|
|
"learning_rate": 3.06718948651834e-07,
|
|
"loss": 1.0006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2846674919128418,
|
|
"step": 1117,
|
|
"valid_targets_mean": 16174.9,
|
|
"valid_targets_min": 15128
|
|
},
|
|
{
|
|
"epoch": 4.759061833688699,
|
|
"grad_norm": 0.14543713743713674,
|
|
"learning_rate": 2.964353522119168e-07,
|
|
"loss": 1.0132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2241479456424713,
|
|
"step": 1118,
|
|
"valid_targets_mean": 13306.7,
|
|
"valid_targets_min": 11186
|
|
},
|
|
{
|
|
"epoch": 4.7633262260127935,
|
|
"grad_norm": 0.15406571881357786,
|
|
"learning_rate": 2.863258138783032e-07,
|
|
"loss": 1.0327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29350900650024414,
|
|
"step": 1119,
|
|
"valid_targets_mean": 16107.2,
|
|
"valid_targets_min": 15308
|
|
},
|
|
{
|
|
"epoch": 4.767590618336887,
|
|
"grad_norm": 0.1454041308304772,
|
|
"learning_rate": 2.7639042295702245e-07,
|
|
"loss": 0.9931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2819235026836395,
|
|
"step": 1120,
|
|
"valid_targets_mean": 16163.9,
|
|
"valid_targets_min": 15717
|
|
},
|
|
{
|
|
"epoch": 4.771855010660981,
|
|
"grad_norm": 0.14077932344598632,
|
|
"learning_rate": 2.666292672157056e-07,
|
|
"loss": 0.9905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23407378792762756,
|
|
"step": 1121,
|
|
"valid_targets_mean": 15291.8,
|
|
"valid_targets_min": 13903
|
|
},
|
|
{
|
|
"epoch": 4.776119402985074,
|
|
"grad_norm": 0.1608947735935997,
|
|
"learning_rate": 2.570424328828325e-07,
|
|
"loss": 1.01,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2588140070438385,
|
|
"step": 1122,
|
|
"valid_targets_mean": 16229.8,
|
|
"valid_targets_min": 15610
|
|
},
|
|
{
|
|
"epoch": 4.780383795309168,
|
|
"grad_norm": 0.15743473018186965,
|
|
"learning_rate": 2.4763000464694377e-07,
|
|
"loss": 1.0033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23125317692756653,
|
|
"step": 1123,
|
|
"valid_targets_mean": 12867.4,
|
|
"valid_targets_min": 1501
|
|
},
|
|
{
|
|
"epoch": 4.7846481876332625,
|
|
"grad_norm": 0.14635899385747156,
|
|
"learning_rate": 2.383920656559102e-07,
|
|
"loss": 1.0252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2757848799228668,
|
|
"step": 1124,
|
|
"valid_targets_mean": 15862.7,
|
|
"valid_targets_min": 14853
|
|
},
|
|
{
|
|
"epoch": 4.788912579957356,
|
|
"grad_norm": 0.15057907442569857,
|
|
"learning_rate": 2.2932869751619568e-07,
|
|
"loss": 0.983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2796521782875061,
|
|
"step": 1125,
|
|
"valid_targets_mean": 16139.7,
|
|
"valid_targets_min": 14793
|
|
},
|
|
{
|
|
"epoch": 4.79317697228145,
|
|
"grad_norm": 0.15462099524501327,
|
|
"learning_rate": 2.2043998029212643e-07,
|
|
"loss": 1.0425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19477930665016174,
|
|
"step": 1126,
|
|
"valid_targets_mean": 10342.5,
|
|
"valid_targets_min": 1758
|
|
},
|
|
{
|
|
"epoch": 4.797441364605544,
|
|
"grad_norm": 0.14428333450435837,
|
|
"learning_rate": 2.1172599250519398e-07,
|
|
"loss": 0.998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23206013441085815,
|
|
"step": 1127,
|
|
"valid_targets_mean": 16088.4,
|
|
"valid_targets_min": 14763
|
|
},
|
|
{
|
|
"epoch": 4.801705756929637,
|
|
"grad_norm": 0.15760507966173962,
|
|
"learning_rate": 2.0318681113336013e-07,
|
|
"loss": 1.0299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2897353768348694,
|
|
"step": 1128,
|
|
"valid_targets_mean": 16119.7,
|
|
"valid_targets_min": 14736
|
|
},
|
|
{
|
|
"epoch": 4.8059701492537314,
|
|
"grad_norm": 0.14776221098452197,
|
|
"learning_rate": 1.9482251161037302e-07,
|
|
"loss": 0.9977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1809859275817871,
|
|
"step": 1129,
|
|
"valid_targets_mean": 9719.4,
|
|
"valid_targets_min": 5689
|
|
},
|
|
{
|
|
"epoch": 4.810234541577826,
|
|
"grad_norm": 0.14661112551393696,
|
|
"learning_rate": 1.866331678251032e-07,
|
|
"loss": 1.0252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2804567217826843,
|
|
"step": 1130,
|
|
"valid_targets_mean": 16003.1,
|
|
"valid_targets_min": 13828
|
|
},
|
|
{
|
|
"epoch": 4.814498933901919,
|
|
"grad_norm": 0.14900277046790783,
|
|
"learning_rate": 1.7861885212088869e-07,
|
|
"loss": 0.9969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28320544958114624,
|
|
"step": 1131,
|
|
"valid_targets_mean": 15943.6,
|
|
"valid_targets_min": 13414
|
|
},
|
|
{
|
|
"epoch": 4.818763326226013,
|
|
"grad_norm": 0.14358657341847153,
|
|
"learning_rate": 1.7077963529490204e-07,
|
|
"loss": 0.9993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21058914065361023,
|
|
"step": 1132,
|
|
"valid_targets_mean": 13237.8,
|
|
"valid_targets_min": 10666
|
|
},
|
|
{
|
|
"epoch": 4.823027718550106,
|
|
"grad_norm": 0.1503311290624774,
|
|
"learning_rate": 1.6311558659751535e-07,
|
|
"loss": 0.9778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2585322856903076,
|
|
"step": 1133,
|
|
"valid_targets_mean": 16197.3,
|
|
"valid_targets_min": 14919
|
|
},
|
|
{
|
|
"epoch": 4.8272921108742,
|
|
"grad_norm": 0.15520941024351134,
|
|
"learning_rate": 1.5562677373169855e-07,
|
|
"loss": 1.0642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31580662727355957,
|
|
"step": 1134,
|
|
"valid_targets_mean": 15890.8,
|
|
"valid_targets_min": 11287
|
|
},
|
|
{
|
|
"epoch": 4.8315565031982945,
|
|
"grad_norm": 0.14840007430449761,
|
|
"learning_rate": 1.483132628524131e-07,
|
|
"loss": 1.0291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24786341190338135,
|
|
"step": 1135,
|
|
"valid_targets_mean": 14563.6,
|
|
"valid_targets_min": 12694
|
|
},
|
|
{
|
|
"epoch": 4.835820895522388,
|
|
"grad_norm": 0.15658968069087506,
|
|
"learning_rate": 1.4117511856603262e-07,
|
|
"loss": 1.0482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30078184604644775,
|
|
"step": 1136,
|
|
"valid_targets_mean": 16095.4,
|
|
"valid_targets_min": 15282
|
|
},
|
|
{
|
|
"epoch": 4.840085287846482,
|
|
"grad_norm": 0.15285312174229707,
|
|
"learning_rate": 1.342124039297721e-07,
|
|
"loss": 1.0314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19714577496051788,
|
|
"step": 1137,
|
|
"valid_targets_mean": 10712.7,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 4.844349680170575,
|
|
"grad_norm": 0.14167262709410533,
|
|
"learning_rate": 1.2742518045112396e-07,
|
|
"loss": 1.0354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2605418562889099,
|
|
"step": 1138,
|
|
"valid_targets_mean": 15546.0,
|
|
"valid_targets_min": 14178
|
|
},
|
|
{
|
|
"epoch": 4.848614072494669,
|
|
"grad_norm": 0.15674575697411522,
|
|
"learning_rate": 1.2081350808732518e-07,
|
|
"loss": 1.0282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31616824865341187,
|
|
"step": 1139,
|
|
"valid_targets_mean": 16119.0,
|
|
"valid_targets_min": 15332
|
|
},
|
|
{
|
|
"epoch": 4.8528784648187635,
|
|
"grad_norm": 0.15423989041611458,
|
|
"learning_rate": 1.143774452448243e-07,
|
|
"loss": 1.0315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16706404089927673,
|
|
"step": 1140,
|
|
"valid_targets_mean": 9525.8,
|
|
"valid_targets_min": 4937
|
|
},
|
|
{
|
|
"epoch": 4.857142857142857,
|
|
"grad_norm": 0.14447090658394057,
|
|
"learning_rate": 1.0811704877875528e-07,
|
|
"loss": 0.9732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24503830075263977,
|
|
"step": 1141,
|
|
"valid_targets_mean": 16191.3,
|
|
"valid_targets_min": 15163
|
|
},
|
|
{
|
|
"epoch": 4.861407249466951,
|
|
"grad_norm": 0.1615026471556301,
|
|
"learning_rate": 1.0203237399245336e-07,
|
|
"loss": 1.0181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28254231810569763,
|
|
"step": 1142,
|
|
"valid_targets_mean": 16124.7,
|
|
"valid_targets_min": 15328
|
|
},
|
|
{
|
|
"epoch": 4.865671641791045,
|
|
"grad_norm": 0.14928773912115367,
|
|
"learning_rate": 9.612347463694882e-08,
|
|
"loss": 1.0314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19812950491905212,
|
|
"step": 1143,
|
|
"valid_targets_mean": 12115.9,
|
|
"valid_targets_min": 9191
|
|
},
|
|
{
|
|
"epoch": 4.869936034115138,
|
|
"grad_norm": 0.15010321527120785,
|
|
"learning_rate": 9.039040291050738e-08,
|
|
"loss": 0.9814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2673875689506531,
|
|
"step": 1144,
|
|
"valid_targets_mean": 16236.7,
|
|
"valid_targets_min": 15773
|
|
},
|
|
{
|
|
"epoch": 4.8742004264392325,
|
|
"grad_norm": 0.15861682212571024,
|
|
"learning_rate": 8.483320945815499e-08,
|
|
"loss": 1.0056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2802891731262207,
|
|
"step": 1145,
|
|
"valid_targets_mean": 16165.2,
|
|
"valid_targets_min": 14819
|
|
},
|
|
{
|
|
"epoch": 4.878464818763327,
|
|
"grad_norm": 0.13882421330125413,
|
|
"learning_rate": 7.945194337124262e-08,
|
|
"loss": 1.0089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2199188768863678,
|
|
"step": 1146,
|
|
"valid_targets_mean": 14201.6,
|
|
"valid_targets_min": 11573
|
|
},
|
|
{
|
|
"epoch": 4.88272921108742,
|
|
"grad_norm": 0.1470938082355776,
|
|
"learning_rate": 7.424665218700444e-08,
|
|
"loss": 1.0122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2924698293209076,
|
|
"step": 1147,
|
|
"valid_targets_mean": 16140.8,
|
|
"valid_targets_min": 15359
|
|
},
|
|
{
|
|
"epoch": 4.886993603411514,
|
|
"grad_norm": 0.14990143490007027,
|
|
"learning_rate": 6.921738188814254e-08,
|
|
"loss": 0.9767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2214415818452835,
|
|
"step": 1148,
|
|
"valid_targets_mean": 13035.8,
|
|
"valid_targets_min": 2380
|
|
},
|
|
{
|
|
"epoch": 4.891257995735607,
|
|
"grad_norm": 0.14054548406236866,
|
|
"learning_rate": 6.436417690241614e-08,
|
|
"loss": 0.9932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27612608671188354,
|
|
"step": 1149,
|
|
"valid_targets_mean": 15234.4,
|
|
"valid_targets_min": 13564
|
|
},
|
|
{
|
|
"epoch": 4.895522388059701,
|
|
"grad_norm": 0.15582504420991788,
|
|
"learning_rate": 5.968708010225532e-08,
|
|
"loss": 0.949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27208343148231506,
|
|
"step": 1150,
|
|
"valid_targets_mean": 16216.5,
|
|
"valid_targets_min": 15713
|
|
},
|
|
{
|
|
"epoch": 4.899786780383796,
|
|
"grad_norm": 0.15383698638285176,
|
|
"learning_rate": 5.518613280437901e-08,
|
|
"loss": 1.0332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17712029814720154,
|
|
"step": 1151,
|
|
"valid_targets_mean": 9263.9,
|
|
"valid_targets_min": 930
|
|
},
|
|
{
|
|
"epoch": 4.904051172707889,
|
|
"grad_norm": 0.1421412073915396,
|
|
"learning_rate": 5.0861374769426433e-08,
|
|
"loss": 1.0092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26269394159317017,
|
|
"step": 1152,
|
|
"valid_targets_mean": 15946.3,
|
|
"valid_targets_min": 14850
|
|
},
|
|
{
|
|
"epoch": 4.908315565031983,
|
|
"grad_norm": 0.1600097468312609,
|
|
"learning_rate": 4.671284420161071e-08,
|
|
"loss": 1.0605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32577964663505554,
|
|
"step": 1153,
|
|
"valid_targets_mean": 15997.7,
|
|
"valid_targets_min": 14732
|
|
},
|
|
{
|
|
"epoch": 4.912579957356077,
|
|
"grad_norm": 0.15738714930436676,
|
|
"learning_rate": 4.274057774838136e-08,
|
|
"loss": 1.0205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18098321557044983,
|
|
"step": 1154,
|
|
"valid_targets_mean": 10754.5,
|
|
"valid_targets_min": 7226
|
|
},
|
|
{
|
|
"epoch": 4.91684434968017,
|
|
"grad_norm": 0.14450740335662998,
|
|
"learning_rate": 3.894461050010012e-08,
|
|
"loss": 1.0047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27671700716018677,
|
|
"step": 1155,
|
|
"valid_targets_mean": 16053.7,
|
|
"valid_targets_min": 15223
|
|
},
|
|
{
|
|
"epoch": 4.9211087420042645,
|
|
"grad_norm": 0.15489667907035298,
|
|
"learning_rate": 3.5324975989725615e-08,
|
|
"loss": 0.9844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30148401856422424,
|
|
"step": 1156,
|
|
"valid_targets_mean": 16156.8,
|
|
"valid_targets_min": 15596
|
|
},
|
|
{
|
|
"epoch": 4.925373134328359,
|
|
"grad_norm": 0.14276493452378128,
|
|
"learning_rate": 3.188170619252473e-08,
|
|
"loss": 0.9553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20629984140396118,
|
|
"step": 1157,
|
|
"valid_targets_mean": 12721.6,
|
|
"valid_targets_min": 10683
|
|
},
|
|
{
|
|
"epoch": 4.929637526652452,
|
|
"grad_norm": 0.1499066761403792,
|
|
"learning_rate": 2.8614831525786147e-08,
|
|
"loss": 0.9996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26819825172424316,
|
|
"step": 1158,
|
|
"valid_targets_mean": 16143.8,
|
|
"valid_targets_min": 14124
|
|
},
|
|
{
|
|
"epoch": 4.933901918976546,
|
|
"grad_norm": 0.1546352831828286,
|
|
"learning_rate": 2.552438084855613e-08,
|
|
"loss": 1.0172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29075586795806885,
|
|
"step": 1159,
|
|
"valid_targets_mean": 16117.7,
|
|
"valid_targets_min": 14424
|
|
},
|
|
{
|
|
"epoch": 4.938166311300639,
|
|
"grad_norm": 0.14296264281771326,
|
|
"learning_rate": 2.2610381461372068e-08,
|
|
"loss": 1.0429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27615079283714294,
|
|
"step": 1160,
|
|
"valid_targets_mean": 15388.0,
|
|
"valid_targets_min": 13731
|
|
},
|
|
{
|
|
"epoch": 4.9424307036247335,
|
|
"grad_norm": 0.1486321420613884,
|
|
"learning_rate": 1.987285910603598e-08,
|
|
"loss": 0.9986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29423749446868896,
|
|
"step": 1161,
|
|
"valid_targets_mean": 16165.4,
|
|
"valid_targets_min": 15308
|
|
},
|
|
{
|
|
"epoch": 4.946695095948828,
|
|
"grad_norm": 0.1549221284940577,
|
|
"learning_rate": 1.7311837965379164e-08,
|
|
"loss": 0.9956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19188982248306274,
|
|
"step": 1162,
|
|
"valid_targets_mean": 10865.3,
|
|
"valid_targets_min": 1443
|
|
},
|
|
{
|
|
"epoch": 4.950959488272921,
|
|
"grad_norm": 0.14892217565418125,
|
|
"learning_rate": 1.4927340663046798e-08,
|
|
"loss": 1.0422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26834145188331604,
|
|
"step": 1163,
|
|
"valid_targets_mean": 16131.4,
|
|
"valid_targets_min": 14555
|
|
},
|
|
{
|
|
"epoch": 4.955223880597015,
|
|
"grad_norm": 0.1517686797669809,
|
|
"learning_rate": 1.2719388263300325e-08,
|
|
"loss": 1.0194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2813887596130371,
|
|
"step": 1164,
|
|
"valid_targets_mean": 16143.4,
|
|
"valid_targets_min": 15234
|
|
},
|
|
{
|
|
"epoch": 4.959488272921108,
|
|
"grad_norm": 0.15087023278229247,
|
|
"learning_rate": 1.0688000270839827e-08,
|
|
"loss": 1.0049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17231392860412598,
|
|
"step": 1165,
|
|
"valid_targets_mean": 9892.4,
|
|
"valid_targets_min": 1676
|
|
},
|
|
{
|
|
"epoch": 4.963752665245202,
|
|
"grad_norm": 0.14010312630125438,
|
|
"learning_rate": 8.833194630615271e-09,
|
|
"loss": 1.0094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25139176845550537,
|
|
"step": 1166,
|
|
"valid_targets_mean": 16117.2,
|
|
"valid_targets_min": 14248
|
|
},
|
|
{
|
|
"epoch": 4.968017057569297,
|
|
"grad_norm": 0.14920848498253594,
|
|
"learning_rate": 7.154987727682194e-09,
|
|
"loss": 0.9357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2747774124145508,
|
|
"step": 1167,
|
|
"valid_targets_mean": 16177.2,
|
|
"valid_targets_min": 15498
|
|
},
|
|
{
|
|
"epoch": 4.97228144989339,
|
|
"grad_norm": 0.14459040867917233,
|
|
"learning_rate": 5.6533943870462625e-09,
|
|
"loss": 1.0106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20723284780979156,
|
|
"step": 1168,
|
|
"valid_targets_mean": 12527.4,
|
|
"valid_targets_min": 9734
|
|
},
|
|
{
|
|
"epoch": 4.976545842217484,
|
|
"grad_norm": 0.14742734957961473,
|
|
"learning_rate": 4.328427873541152e-09,
|
|
"loss": 1.0015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24740174412727356,
|
|
"step": 1169,
|
|
"valid_targets_mean": 16187.4,
|
|
"valid_targets_min": 15278
|
|
},
|
|
{
|
|
"epoch": 4.980810234541578,
|
|
"grad_norm": 0.1569923184042177,
|
|
"learning_rate": 3.1800998917086432e-09,
|
|
"loss": 1.0297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2794538736343384,
|
|
"step": 1170,
|
|
"valid_targets_mean": 16146.8,
|
|
"valid_targets_min": 15337
|
|
},
|
|
{
|
|
"epoch": 4.985074626865671,
|
|
"grad_norm": 0.14694235426435637,
|
|
"learning_rate": 2.2084205856920393e-09,
|
|
"loss": 1.02,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22485260665416718,
|
|
"step": 1171,
|
|
"valid_targets_mean": 14174.9,
|
|
"valid_targets_min": 11882
|
|
},
|
|
{
|
|
"epoch": 4.9893390191897655,
|
|
"grad_norm": 0.1503276282480094,
|
|
"learning_rate": 1.4133985391473482e-09,
|
|
"loss": 1.0201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2849038541316986,
|
|
"step": 1172,
|
|
"valid_targets_mean": 16147.8,
|
|
"valid_targets_min": 14958
|
|
},
|
|
{
|
|
"epoch": 4.99360341151386,
|
|
"grad_norm": 0.15542866475604034,
|
|
"learning_rate": 7.950407751722288e-10,
|
|
"loss": 1.0032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25908148288726807,
|
|
"step": 1173,
|
|
"valid_targets_mean": 12998.6,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 4.997867803837953,
|
|
"grad_norm": 0.1441501696307319,
|
|
"learning_rate": 3.5335275624159835e-10,
|
|
"loss": 1.0496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24824313819408417,
|
|
"step": 1174,
|
|
"valid_targets_mean": 15772.4,
|
|
"valid_targets_min": 14853
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.22893672755573746,
|
|
"learning_rate": 8.833838415212014e-11,
|
|
"loss": 1.0091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4849323630332947,
|
|
"step": 1175,
|
|
"valid_targets_mean": 11353.3,
|
|
"valid_targets_min": 2084
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4849323630332947,
|
|
"step": 1175,
|
|
"total_flos": 1940656491724800.0,
|
|
"train_loss": 1.0709928606926127,
|
|
"train_runtime": 5219.2407,
|
|
"train_samples_per_second": 28.74,
|
|
"train_steps_per_second": 0.225,
|
|
"valid_targets_mean": 11353.3,
|
|
"valid_targets_min": 2084
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 1175,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 5,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1940656491724800.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|