Model: open-sci/sft__ot30k_Qwen2.5-1.5B-DPO-Tulu3-decontaminated Source: Original Platform
12973 lines
362 KiB
JSON
12973 lines
362 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 5.0,
|
|
"eval_steps": 500,
|
|
"global_step": 1175,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0042643923240938165,
|
|
"grad_norm": 2.5912132310951623,
|
|
"learning_rate": 0.0,
|
|
"loss": 1.4683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3637838363647461,
|
|
"step": 1,
|
|
"valid_targets_mean": 16166.3,
|
|
"valid_targets_min": 15435
|
|
},
|
|
{
|
|
"epoch": 0.008528784648187633,
|
|
"grad_norm": 2.633706304465534,
|
|
"learning_rate": 3.3898305084745766e-07,
|
|
"loss": 1.4793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43529486656188965,
|
|
"step": 2,
|
|
"valid_targets_mean": 16231.4,
|
|
"valid_targets_min": 15861
|
|
},
|
|
{
|
|
"epoch": 0.01279317697228145,
|
|
"grad_norm": 2.5974910700245264,
|
|
"learning_rate": 6.779661016949153e-07,
|
|
"loss": 1.4998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3247825503349304,
|
|
"step": 3,
|
|
"valid_targets_mean": 12902.7,
|
|
"valid_targets_min": 9698
|
|
},
|
|
{
|
|
"epoch": 0.017057569296375266,
|
|
"grad_norm": 2.6121819436088614,
|
|
"learning_rate": 1.016949152542373e-06,
|
|
"loss": 1.5653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3887319564819336,
|
|
"step": 4,
|
|
"valid_targets_mean": 16193.7,
|
|
"valid_targets_min": 15332
|
|
},
|
|
{
|
|
"epoch": 0.021321961620469083,
|
|
"grad_norm": 2.548523661705242,
|
|
"learning_rate": 1.3559322033898307e-06,
|
|
"loss": 1.4902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4223015308380127,
|
|
"step": 5,
|
|
"valid_targets_mean": 16192.6,
|
|
"valid_targets_min": 15561
|
|
},
|
|
{
|
|
"epoch": 0.0255863539445629,
|
|
"grad_norm": 2.5571521946752354,
|
|
"learning_rate": 1.6949152542372882e-06,
|
|
"loss": 1.4791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3385732173919678,
|
|
"step": 6,
|
|
"valid_targets_mean": 14216.8,
|
|
"valid_targets_min": 11918
|
|
},
|
|
{
|
|
"epoch": 0.029850746268656716,
|
|
"grad_norm": 2.448502334537302,
|
|
"learning_rate": 2.033898305084746e-06,
|
|
"loss": 1.4619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44735777378082275,
|
|
"step": 7,
|
|
"valid_targets_mean": 15977.0,
|
|
"valid_targets_min": 14845
|
|
},
|
|
{
|
|
"epoch": 0.03411513859275053,
|
|
"grad_norm": 2.3737266913379207,
|
|
"learning_rate": 2.372881355932204e-06,
|
|
"loss": 1.4867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.351285457611084,
|
|
"step": 8,
|
|
"valid_targets_mean": 12883.5,
|
|
"valid_targets_min": 1369
|
|
},
|
|
{
|
|
"epoch": 0.03837953091684435,
|
|
"grad_norm": 1.9960343385318842,
|
|
"learning_rate": 2.7118644067796613e-06,
|
|
"loss": 1.4211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35067230463027954,
|
|
"step": 9,
|
|
"valid_targets_mean": 15800.5,
|
|
"valid_targets_min": 14349
|
|
},
|
|
{
|
|
"epoch": 0.042643923240938165,
|
|
"grad_norm": 1.9214566204964303,
|
|
"learning_rate": 3.0508474576271192e-06,
|
|
"loss": 1.4802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43322673439979553,
|
|
"step": 10,
|
|
"valid_targets_mean": 16118.9,
|
|
"valid_targets_min": 15042
|
|
},
|
|
{
|
|
"epoch": 0.046908315565031986,
|
|
"grad_norm": 1.6833352642818202,
|
|
"learning_rate": 3.3898305084745763e-06,
|
|
"loss": 1.4316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25016266107559204,
|
|
"step": 11,
|
|
"valid_targets_mean": 9185.1,
|
|
"valid_targets_min": 1758
|
|
},
|
|
{
|
|
"epoch": 0.0511727078891258,
|
|
"grad_norm": 1.460938942408396,
|
|
"learning_rate": 3.7288135593220342e-06,
|
|
"loss": 1.4272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3515564799308777,
|
|
"step": 12,
|
|
"valid_targets_mean": 16135.2,
|
|
"valid_targets_min": 14794
|
|
},
|
|
{
|
|
"epoch": 0.05543710021321962,
|
|
"grad_norm": 1.4790245779031177,
|
|
"learning_rate": 4.067796610169492e-06,
|
|
"loss": 1.4265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39283841848373413,
|
|
"step": 13,
|
|
"valid_targets_mean": 16090.6,
|
|
"valid_targets_min": 14826
|
|
},
|
|
{
|
|
"epoch": 0.05970149253731343,
|
|
"grad_norm": 1.3561975660857941,
|
|
"learning_rate": 4.40677966101695e-06,
|
|
"loss": 1.4154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2586643695831299,
|
|
"step": 14,
|
|
"valid_targets_mean": 10057.8,
|
|
"valid_targets_min": 6101
|
|
},
|
|
{
|
|
"epoch": 0.06396588486140725,
|
|
"grad_norm": 1.1805698070522472,
|
|
"learning_rate": 4.745762711864408e-06,
|
|
"loss": 1.4102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3817826509475708,
|
|
"step": 15,
|
|
"valid_targets_mean": 16117.9,
|
|
"valid_targets_min": 14544
|
|
},
|
|
{
|
|
"epoch": 0.06823027718550106,
|
|
"grad_norm": 1.8410675828756973,
|
|
"learning_rate": 5.084745762711865e-06,
|
|
"loss": 1.4161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4136810302734375,
|
|
"step": 16,
|
|
"valid_targets_mean": 16074.6,
|
|
"valid_targets_min": 14755
|
|
},
|
|
{
|
|
"epoch": 0.07249466950959488,
|
|
"grad_norm": 1.777607621979654,
|
|
"learning_rate": 5.423728813559323e-06,
|
|
"loss": 1.4231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.360395610332489,
|
|
"step": 17,
|
|
"valid_targets_mean": 14765.2,
|
|
"valid_targets_min": 12313
|
|
},
|
|
{
|
|
"epoch": 0.0767590618336887,
|
|
"grad_norm": 1.490309523738917,
|
|
"learning_rate": 5.7627118644067805e-06,
|
|
"loss": 1.3993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3600786328315735,
|
|
"step": 18,
|
|
"valid_targets_mean": 16238.3,
|
|
"valid_targets_min": 15660
|
|
},
|
|
{
|
|
"epoch": 0.08102345415778252,
|
|
"grad_norm": 1.211959385404553,
|
|
"learning_rate": 6.1016949152542385e-06,
|
|
"loss": 1.3577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3780166506767273,
|
|
"step": 19,
|
|
"valid_targets_mean": 16181.4,
|
|
"valid_targets_min": 15422
|
|
},
|
|
{
|
|
"epoch": 0.08528784648187633,
|
|
"grad_norm": 0.950111030978371,
|
|
"learning_rate": 6.440677966101695e-06,
|
|
"loss": 1.4088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35725221037864685,
|
|
"step": 20,
|
|
"valid_targets_mean": 15211.0,
|
|
"valid_targets_min": 13345
|
|
},
|
|
{
|
|
"epoch": 0.08955223880597014,
|
|
"grad_norm": 0.9032081253318737,
|
|
"learning_rate": 6.779661016949153e-06,
|
|
"loss": 1.3604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3920598030090332,
|
|
"step": 21,
|
|
"valid_targets_mean": 16066.3,
|
|
"valid_targets_min": 13888
|
|
},
|
|
{
|
|
"epoch": 0.09381663113006397,
|
|
"grad_norm": 0.9764773588982553,
|
|
"learning_rate": 7.1186440677966106e-06,
|
|
"loss": 1.3799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2867366671562195,
|
|
"step": 22,
|
|
"valid_targets_mean": 11589.2,
|
|
"valid_targets_min": 2526
|
|
},
|
|
{
|
|
"epoch": 0.09808102345415778,
|
|
"grad_norm": 0.8766391421099312,
|
|
"learning_rate": 7.4576271186440685e-06,
|
|
"loss": 1.3195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3462587893009186,
|
|
"step": 23,
|
|
"valid_targets_mean": 16071.5,
|
|
"valid_targets_min": 14705
|
|
},
|
|
{
|
|
"epoch": 0.1023454157782516,
|
|
"grad_norm": 0.7031116505718454,
|
|
"learning_rate": 7.796610169491526e-06,
|
|
"loss": 1.3344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3843736946582794,
|
|
"step": 24,
|
|
"valid_targets_mean": 16015.3,
|
|
"valid_targets_min": 13108
|
|
},
|
|
{
|
|
"epoch": 0.10660980810234541,
|
|
"grad_norm": 0.7887371865345751,
|
|
"learning_rate": 8.135593220338983e-06,
|
|
"loss": 1.3387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2373809814453125,
|
|
"step": 25,
|
|
"valid_targets_mean": 10277.5,
|
|
"valid_targets_min": 2154
|
|
},
|
|
{
|
|
"epoch": 0.11087420042643924,
|
|
"grad_norm": 0.7380167449655485,
|
|
"learning_rate": 8.47457627118644e-06,
|
|
"loss": 1.3258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3606833517551422,
|
|
"step": 26,
|
|
"valid_targets_mean": 16022.2,
|
|
"valid_targets_min": 14908
|
|
},
|
|
{
|
|
"epoch": 0.11513859275053305,
|
|
"grad_norm": 0.6257505321465542,
|
|
"learning_rate": 8.8135593220339e-06,
|
|
"loss": 1.3147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3576739430427551,
|
|
"step": 27,
|
|
"valid_targets_mean": 16100.3,
|
|
"valid_targets_min": 13427
|
|
},
|
|
{
|
|
"epoch": 0.11940298507462686,
|
|
"grad_norm": 0.6271882262231636,
|
|
"learning_rate": 9.152542372881356e-06,
|
|
"loss": 1.2753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2551213502883911,
|
|
"step": 28,
|
|
"valid_targets_mean": 11641.7,
|
|
"valid_targets_min": 8792
|
|
},
|
|
{
|
|
"epoch": 0.12366737739872068,
|
|
"grad_norm": 0.6453782947757865,
|
|
"learning_rate": 9.491525423728815e-06,
|
|
"loss": 1.3404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36255574226379395,
|
|
"step": 29,
|
|
"valid_targets_mean": 16077.6,
|
|
"valid_targets_min": 15116
|
|
},
|
|
{
|
|
"epoch": 0.1279317697228145,
|
|
"grad_norm": 0.4867792580923484,
|
|
"learning_rate": 9.830508474576272e-06,
|
|
"loss": 1.3004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38774198293685913,
|
|
"step": 30,
|
|
"valid_targets_mean": 16075.6,
|
|
"valid_targets_min": 15222
|
|
},
|
|
{
|
|
"epoch": 0.13219616204690832,
|
|
"grad_norm": 0.5241976134494157,
|
|
"learning_rate": 1.016949152542373e-05,
|
|
"loss": 1.2633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2905063033103943,
|
|
"step": 31,
|
|
"valid_targets_mean": 14641.4,
|
|
"valid_targets_min": 12602
|
|
},
|
|
{
|
|
"epoch": 0.13646055437100213,
|
|
"grad_norm": 0.5399645368153256,
|
|
"learning_rate": 1.0508474576271188e-05,
|
|
"loss": 1.3194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3627786636352539,
|
|
"step": 32,
|
|
"valid_targets_mean": 16118.6,
|
|
"valid_targets_min": 14547
|
|
},
|
|
{
|
|
"epoch": 0.14072494669509594,
|
|
"grad_norm": 0.4809078702151959,
|
|
"learning_rate": 1.0847457627118645e-05,
|
|
"loss": 1.2602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28553783893585205,
|
|
"step": 33,
|
|
"valid_targets_mean": 13292.3,
|
|
"valid_targets_min": 2298
|
|
},
|
|
{
|
|
"epoch": 0.14498933901918976,
|
|
"grad_norm": 0.4631742166084725,
|
|
"learning_rate": 1.1186440677966102e-05,
|
|
"loss": 1.2355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30626145005226135,
|
|
"step": 34,
|
|
"valid_targets_mean": 15516.8,
|
|
"valid_targets_min": 13685
|
|
},
|
|
{
|
|
"epoch": 0.14925373134328357,
|
|
"grad_norm": 0.4331028281583258,
|
|
"learning_rate": 1.1525423728813561e-05,
|
|
"loss": 1.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37871289253234863,
|
|
"step": 35,
|
|
"valid_targets_mean": 16160.1,
|
|
"valid_targets_min": 15418
|
|
},
|
|
{
|
|
"epoch": 0.1535181236673774,
|
|
"grad_norm": 0.45270210159861074,
|
|
"learning_rate": 1.1864406779661018e-05,
|
|
"loss": 1.2624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22720596194267273,
|
|
"step": 36,
|
|
"valid_targets_mean": 9924.8,
|
|
"valid_targets_min": 1242
|
|
},
|
|
{
|
|
"epoch": 0.15778251599147122,
|
|
"grad_norm": 0.3738929022888006,
|
|
"learning_rate": 1.2203389830508477e-05,
|
|
"loss": 1.2657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30965572595596313,
|
|
"step": 37,
|
|
"valid_targets_mean": 16209.8,
|
|
"valid_targets_min": 15560
|
|
},
|
|
{
|
|
"epoch": 0.16204690831556504,
|
|
"grad_norm": 0.4212524619888493,
|
|
"learning_rate": 1.2542372881355932e-05,
|
|
"loss": 1.2357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3551151752471924,
|
|
"step": 38,
|
|
"valid_targets_mean": 16175.6,
|
|
"valid_targets_min": 15189
|
|
},
|
|
{
|
|
"epoch": 0.16631130063965885,
|
|
"grad_norm": 0.41114455021498564,
|
|
"learning_rate": 1.288135593220339e-05,
|
|
"loss": 1.2873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23346129059791565,
|
|
"step": 39,
|
|
"valid_targets_mean": 9991.4,
|
|
"valid_targets_min": 6726
|
|
},
|
|
{
|
|
"epoch": 0.17057569296375266,
|
|
"grad_norm": 0.40275607477881575,
|
|
"learning_rate": 1.3220338983050848e-05,
|
|
"loss": 1.2268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32813870906829834,
|
|
"step": 40,
|
|
"valid_targets_mean": 16142.2,
|
|
"valid_targets_min": 14273
|
|
},
|
|
{
|
|
"epoch": 0.17484008528784648,
|
|
"grad_norm": 0.3397786814942616,
|
|
"learning_rate": 1.3559322033898305e-05,
|
|
"loss": 1.223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3357704281806946,
|
|
"step": 41,
|
|
"valid_targets_mean": 16186.6,
|
|
"valid_targets_min": 14994
|
|
},
|
|
{
|
|
"epoch": 0.1791044776119403,
|
|
"grad_norm": 0.32856750595631484,
|
|
"learning_rate": 1.3898305084745764e-05,
|
|
"loss": 1.2686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2756190299987793,
|
|
"step": 42,
|
|
"valid_targets_mean": 13378.8,
|
|
"valid_targets_min": 11254
|
|
},
|
|
{
|
|
"epoch": 0.18336886993603413,
|
|
"grad_norm": 0.33800232466028274,
|
|
"learning_rate": 1.4237288135593221e-05,
|
|
"loss": 1.2096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3431663513183594,
|
|
"step": 43,
|
|
"valid_targets_mean": 16160.8,
|
|
"valid_targets_min": 15560
|
|
},
|
|
{
|
|
"epoch": 0.18763326226012794,
|
|
"grad_norm": 0.3221739518777501,
|
|
"learning_rate": 1.4576271186440678e-05,
|
|
"loss": 1.1891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3114537298679352,
|
|
"step": 44,
|
|
"valid_targets_mean": 16207.8,
|
|
"valid_targets_min": 14732
|
|
},
|
|
{
|
|
"epoch": 0.19189765458422176,
|
|
"grad_norm": 0.3251904659935115,
|
|
"learning_rate": 1.4915254237288137e-05,
|
|
"loss": 1.322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32420068979263306,
|
|
"step": 45,
|
|
"valid_targets_mean": 14813.5,
|
|
"valid_targets_min": 13169
|
|
},
|
|
{
|
|
"epoch": 0.19616204690831557,
|
|
"grad_norm": 0.30647036782786324,
|
|
"learning_rate": 1.5254237288135594e-05,
|
|
"loss": 1.1982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33520829677581787,
|
|
"step": 46,
|
|
"valid_targets_mean": 16167.5,
|
|
"valid_targets_min": 14831
|
|
},
|
|
{
|
|
"epoch": 0.20042643923240938,
|
|
"grad_norm": 0.311780621286371,
|
|
"learning_rate": 1.5593220338983053e-05,
|
|
"loss": 1.2705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2603854537010193,
|
|
"step": 47,
|
|
"valid_targets_mean": 11703.6,
|
|
"valid_targets_min": 3390
|
|
},
|
|
{
|
|
"epoch": 0.2046908315565032,
|
|
"grad_norm": 0.3126674849938319,
|
|
"learning_rate": 1.593220338983051e-05,
|
|
"loss": 1.2092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3102681040763855,
|
|
"step": 48,
|
|
"valid_targets_mean": 15695.9,
|
|
"valid_targets_min": 14068
|
|
},
|
|
{
|
|
"epoch": 0.208955223880597,
|
|
"grad_norm": 0.3038830093046899,
|
|
"learning_rate": 1.6271186440677967e-05,
|
|
"loss": 1.2578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37953513860702515,
|
|
"step": 49,
|
|
"valid_targets_mean": 16029.5,
|
|
"valid_targets_min": 13688
|
|
},
|
|
{
|
|
"epoch": 0.21321961620469082,
|
|
"grad_norm": 0.2889986469822944,
|
|
"learning_rate": 1.6610169491525424e-05,
|
|
"loss": 1.2202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17968645691871643,
|
|
"step": 50,
|
|
"valid_targets_mean": 7348.0,
|
|
"valid_targets_min": 1535
|
|
},
|
|
{
|
|
"epoch": 0.21748400852878466,
|
|
"grad_norm": 0.3076923469624088,
|
|
"learning_rate": 1.694915254237288e-05,
|
|
"loss": 1.184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3010212779045105,
|
|
"step": 51,
|
|
"valid_targets_mean": 16203.1,
|
|
"valid_targets_min": 15803
|
|
},
|
|
{
|
|
"epoch": 0.22174840085287847,
|
|
"grad_norm": 0.3176997254227205,
|
|
"learning_rate": 1.728813559322034e-05,
|
|
"loss": 1.2366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36455297470092773,
|
|
"step": 52,
|
|
"valid_targets_mean": 16132.4,
|
|
"valid_targets_min": 15171
|
|
},
|
|
{
|
|
"epoch": 0.2260127931769723,
|
|
"grad_norm": 0.27905566276946,
|
|
"learning_rate": 1.76271186440678e-05,
|
|
"loss": 1.2404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26121824979782104,
|
|
"step": 53,
|
|
"valid_targets_mean": 12511.2,
|
|
"valid_targets_min": 10082
|
|
},
|
|
{
|
|
"epoch": 0.2302771855010661,
|
|
"grad_norm": 0.2990599787842524,
|
|
"learning_rate": 1.7966101694915256e-05,
|
|
"loss": 1.2038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33031195402145386,
|
|
"step": 54,
|
|
"valid_targets_mean": 16137.9,
|
|
"valid_targets_min": 14934
|
|
},
|
|
{
|
|
"epoch": 0.2345415778251599,
|
|
"grad_norm": 0.30401338514787324,
|
|
"learning_rate": 1.8305084745762713e-05,
|
|
"loss": 1.2215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.366455078125,
|
|
"step": 55,
|
|
"valid_targets_mean": 16097.3,
|
|
"valid_targets_min": 15345
|
|
},
|
|
{
|
|
"epoch": 0.23880597014925373,
|
|
"grad_norm": 0.31847695549632293,
|
|
"learning_rate": 1.864406779661017e-05,
|
|
"loss": 1.2061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28000304102897644,
|
|
"step": 56,
|
|
"valid_targets_mean": 14551.7,
|
|
"valid_targets_min": 12631
|
|
},
|
|
{
|
|
"epoch": 0.24307036247334754,
|
|
"grad_norm": 0.28834213390656604,
|
|
"learning_rate": 1.898305084745763e-05,
|
|
"loss": 1.2495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33794599771499634,
|
|
"step": 57,
|
|
"valid_targets_mean": 15843.4,
|
|
"valid_targets_min": 9286
|
|
},
|
|
{
|
|
"epoch": 0.24733475479744135,
|
|
"grad_norm": 0.32989762963069885,
|
|
"learning_rate": 1.9322033898305087e-05,
|
|
"loss": 1.1741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30099916458129883,
|
|
"step": 58,
|
|
"valid_targets_mean": 13183.4,
|
|
"valid_targets_min": 2731
|
|
},
|
|
{
|
|
"epoch": 0.2515991471215352,
|
|
"grad_norm": 0.30207558418894537,
|
|
"learning_rate": 1.9661016949152545e-05,
|
|
"loss": 1.233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3025549054145813,
|
|
"step": 59,
|
|
"valid_targets_mean": 15810.2,
|
|
"valid_targets_min": 14420
|
|
},
|
|
{
|
|
"epoch": 0.255863539445629,
|
|
"grad_norm": 0.3261759997601209,
|
|
"learning_rate": 2e-05,
|
|
"loss": 1.1725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3403782248497009,
|
|
"step": 60,
|
|
"valid_targets_mean": 16229.3,
|
|
"valid_targets_min": 15657
|
|
},
|
|
{
|
|
"epoch": 0.2601279317697228,
|
|
"grad_norm": 0.3595768572224599,
|
|
"learning_rate": 2.033898305084746e-05,
|
|
"loss": 1.2157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20572395622730255,
|
|
"step": 61,
|
|
"valid_targets_mean": 8622.5,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 0.26439232409381663,
|
|
"grad_norm": 0.3038555289239557,
|
|
"learning_rate": 2.0677966101694916e-05,
|
|
"loss": 1.2038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30789583921432495,
|
|
"step": 62,
|
|
"valid_targets_mean": 16106.1,
|
|
"valid_targets_min": 14943
|
|
},
|
|
{
|
|
"epoch": 0.26865671641791045,
|
|
"grad_norm": 0.3759360511833827,
|
|
"learning_rate": 2.1016949152542376e-05,
|
|
"loss": 1.1709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3404845893383026,
|
|
"step": 63,
|
|
"valid_targets_mean": 16136.8,
|
|
"valid_targets_min": 15314
|
|
},
|
|
{
|
|
"epoch": 0.27292110874200426,
|
|
"grad_norm": 0.35102271121517886,
|
|
"learning_rate": 2.1355932203389833e-05,
|
|
"loss": 1.2298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23231232166290283,
|
|
"step": 64,
|
|
"valid_targets_mean": 10336.6,
|
|
"valid_targets_min": 5623
|
|
},
|
|
{
|
|
"epoch": 0.2771855010660981,
|
|
"grad_norm": 0.322742132512888,
|
|
"learning_rate": 2.169491525423729e-05,
|
|
"loss": 1.1721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3035464882850647,
|
|
"step": 65,
|
|
"valid_targets_mean": 16176.8,
|
|
"valid_targets_min": 15278
|
|
},
|
|
{
|
|
"epoch": 0.2814498933901919,
|
|
"grad_norm": 0.37619477944948826,
|
|
"learning_rate": 2.2033898305084748e-05,
|
|
"loss": 1.1622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3538297414779663,
|
|
"step": 66,
|
|
"valid_targets_mean": 16186.6,
|
|
"valid_targets_min": 15380
|
|
},
|
|
{
|
|
"epoch": 0.2857142857142857,
|
|
"grad_norm": 0.301167960713595,
|
|
"learning_rate": 2.2372881355932205e-05,
|
|
"loss": 1.2062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25172507762908936,
|
|
"step": 67,
|
|
"valid_targets_mean": 12693.9,
|
|
"valid_targets_min": 10543
|
|
},
|
|
{
|
|
"epoch": 0.2899786780383795,
|
|
"grad_norm": 0.42344576721991084,
|
|
"learning_rate": 2.2711864406779665e-05,
|
|
"loss": 1.1958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3390490710735321,
|
|
"step": 68,
|
|
"valid_targets_mean": 16133.4,
|
|
"valid_targets_min": 15452
|
|
},
|
|
{
|
|
"epoch": 0.2942430703624733,
|
|
"grad_norm": 0.3419862520100689,
|
|
"learning_rate": 2.3050847457627122e-05,
|
|
"loss": 1.1609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3601224422454834,
|
|
"step": 69,
|
|
"valid_targets_mean": 16142.4,
|
|
"valid_targets_min": 15328
|
|
},
|
|
{
|
|
"epoch": 0.29850746268656714,
|
|
"grad_norm": 0.4276207843222072,
|
|
"learning_rate": 2.338983050847458e-05,
|
|
"loss": 1.2268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28064393997192383,
|
|
"step": 70,
|
|
"valid_targets_mean": 14594.2,
|
|
"valid_targets_min": 13086
|
|
},
|
|
{
|
|
"epoch": 0.302771855010661,
|
|
"grad_norm": 0.4829759289765743,
|
|
"learning_rate": 2.3728813559322036e-05,
|
|
"loss": 1.1577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33678144216537476,
|
|
"step": 71,
|
|
"valid_targets_mean": 16185.4,
|
|
"valid_targets_min": 15260
|
|
},
|
|
{
|
|
"epoch": 0.3070362473347548,
|
|
"grad_norm": 0.42323084264939814,
|
|
"learning_rate": 2.406779661016949e-05,
|
|
"loss": 1.2519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2538195848464966,
|
|
"step": 72,
|
|
"valid_targets_mean": 11549.2,
|
|
"valid_targets_min": 1417
|
|
},
|
|
{
|
|
"epoch": 0.31130063965884863,
|
|
"grad_norm": 0.3655308537540164,
|
|
"learning_rate": 2.4406779661016954e-05,
|
|
"loss": 1.2118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3072863221168518,
|
|
"step": 73,
|
|
"valid_targets_mean": 16113.0,
|
|
"valid_targets_min": 13998
|
|
},
|
|
{
|
|
"epoch": 0.31556503198294245,
|
|
"grad_norm": 0.43346869778280545,
|
|
"learning_rate": 2.474576271186441e-05,
|
|
"loss": 1.1924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31907588243484497,
|
|
"step": 74,
|
|
"valid_targets_mean": 16215.8,
|
|
"valid_targets_min": 15958
|
|
},
|
|
{
|
|
"epoch": 0.31982942430703626,
|
|
"grad_norm": 0.478644315787928,
|
|
"learning_rate": 2.5084745762711865e-05,
|
|
"loss": 1.2118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17873533070087433,
|
|
"step": 75,
|
|
"valid_targets_mean": 8364.3,
|
|
"valid_targets_min": 2376
|
|
},
|
|
{
|
|
"epoch": 0.32409381663113007,
|
|
"grad_norm": 0.5076407642004973,
|
|
"learning_rate": 2.5423728813559322e-05,
|
|
"loss": 1.1661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30682048201560974,
|
|
"step": 76,
|
|
"valid_targets_mean": 16091.7,
|
|
"valid_targets_min": 14826
|
|
},
|
|
{
|
|
"epoch": 0.3283582089552239,
|
|
"grad_norm": 0.3806276932479539,
|
|
"learning_rate": 2.576271186440678e-05,
|
|
"loss": 1.1854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3455969989299774,
|
|
"step": 77,
|
|
"valid_targets_mean": 16167.6,
|
|
"valid_targets_min": 15351
|
|
},
|
|
{
|
|
"epoch": 0.3326226012793177,
|
|
"grad_norm": 0.5754233285474364,
|
|
"learning_rate": 2.610169491525424e-05,
|
|
"loss": 1.1722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23480388522148132,
|
|
"step": 78,
|
|
"valid_targets_mean": 12202.2,
|
|
"valid_targets_min": 8376
|
|
},
|
|
{
|
|
"epoch": 0.3368869936034115,
|
|
"grad_norm": 0.46393394706946234,
|
|
"learning_rate": 2.6440677966101696e-05,
|
|
"loss": 1.1894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31483885645866394,
|
|
"step": 79,
|
|
"valid_targets_mean": 16147.0,
|
|
"valid_targets_min": 15397
|
|
},
|
|
{
|
|
"epoch": 0.3411513859275053,
|
|
"grad_norm": 0.4225738451929594,
|
|
"learning_rate": 2.6779661016949153e-05,
|
|
"loss": 1.1895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3444315493106842,
|
|
"step": 80,
|
|
"valid_targets_mean": 16172.6,
|
|
"valid_targets_min": 14636
|
|
},
|
|
{
|
|
"epoch": 0.34541577825159914,
|
|
"grad_norm": 0.49308893509545715,
|
|
"learning_rate": 2.711864406779661e-05,
|
|
"loss": 1.2276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29010820388793945,
|
|
"step": 81,
|
|
"valid_targets_mean": 14128.0,
|
|
"valid_targets_min": 10472
|
|
},
|
|
{
|
|
"epoch": 0.34968017057569295,
|
|
"grad_norm": 0.4397789217175473,
|
|
"learning_rate": 2.7457627118644068e-05,
|
|
"loss": 1.1605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3423722982406616,
|
|
"step": 82,
|
|
"valid_targets_mean": 16067.8,
|
|
"valid_targets_min": 15370
|
|
},
|
|
{
|
|
"epoch": 0.35394456289978676,
|
|
"grad_norm": 0.5416955629140322,
|
|
"learning_rate": 2.7796610169491528e-05,
|
|
"loss": 1.193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2991706132888794,
|
|
"step": 83,
|
|
"valid_targets_mean": 13262.2,
|
|
"valid_targets_min": 2435
|
|
},
|
|
{
|
|
"epoch": 0.3582089552238806,
|
|
"grad_norm": 0.5401809455437692,
|
|
"learning_rate": 2.8135593220338985e-05,
|
|
"loss": 1.1404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2678249478340149,
|
|
"step": 84,
|
|
"valid_targets_mean": 15934.3,
|
|
"valid_targets_min": 14185
|
|
},
|
|
{
|
|
"epoch": 0.3624733475479744,
|
|
"grad_norm": 0.500278991508074,
|
|
"learning_rate": 2.8474576271186442e-05,
|
|
"loss": 1.2268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36243772506713867,
|
|
"step": 85,
|
|
"valid_targets_mean": 16101.3,
|
|
"valid_targets_min": 14897
|
|
},
|
|
{
|
|
"epoch": 0.36673773987206826,
|
|
"grad_norm": 0.5920401079460927,
|
|
"learning_rate": 2.88135593220339e-05,
|
|
"loss": 1.2216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20134751498699188,
|
|
"step": 86,
|
|
"valid_targets_mean": 9425.7,
|
|
"valid_targets_min": 1851
|
|
},
|
|
{
|
|
"epoch": 0.37100213219616207,
|
|
"grad_norm": 0.44938297761375107,
|
|
"learning_rate": 2.9152542372881356e-05,
|
|
"loss": 1.1829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3085672855377197,
|
|
"step": 87,
|
|
"valid_targets_mean": 16128.9,
|
|
"valid_targets_min": 15263
|
|
},
|
|
{
|
|
"epoch": 0.3752665245202559,
|
|
"grad_norm": 0.5854582449500921,
|
|
"learning_rate": 2.9491525423728817e-05,
|
|
"loss": 1.1851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3548707962036133,
|
|
"step": 88,
|
|
"valid_targets_mean": 16217.1,
|
|
"valid_targets_min": 15697
|
|
},
|
|
{
|
|
"epoch": 0.3795309168443497,
|
|
"grad_norm": 0.5504106812921528,
|
|
"learning_rate": 2.9830508474576274e-05,
|
|
"loss": 1.1552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20897847414016724,
|
|
"step": 89,
|
|
"valid_targets_mean": 10271.9,
|
|
"valid_targets_min": 5871
|
|
},
|
|
{
|
|
"epoch": 0.3837953091684435,
|
|
"grad_norm": 0.4852936129372909,
|
|
"learning_rate": 3.016949152542373e-05,
|
|
"loss": 1.1955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30411073565483093,
|
|
"step": 90,
|
|
"valid_targets_mean": 16159.1,
|
|
"valid_targets_min": 15337
|
|
},
|
|
{
|
|
"epoch": 0.3880597014925373,
|
|
"grad_norm": 0.5614838664830023,
|
|
"learning_rate": 3.0508474576271188e-05,
|
|
"loss": 1.1668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3615682125091553,
|
|
"step": 91,
|
|
"valid_targets_mean": 16088.1,
|
|
"valid_targets_min": 14827
|
|
},
|
|
{
|
|
"epoch": 0.39232409381663114,
|
|
"grad_norm": 0.49688315347571765,
|
|
"learning_rate": 3.084745762711865e-05,
|
|
"loss": 1.209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2728273570537567,
|
|
"step": 92,
|
|
"valid_targets_mean": 13574.6,
|
|
"valid_targets_min": 10444
|
|
},
|
|
{
|
|
"epoch": 0.39658848614072495,
|
|
"grad_norm": 0.5583883978364661,
|
|
"learning_rate": 3.1186440677966106e-05,
|
|
"loss": 1.1793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.339051216840744,
|
|
"step": 93,
|
|
"valid_targets_mean": 16158.7,
|
|
"valid_targets_min": 15460
|
|
},
|
|
{
|
|
"epoch": 0.40085287846481876,
|
|
"grad_norm": 0.5609899154793154,
|
|
"learning_rate": 3.152542372881356e-05,
|
|
"loss": 1.1513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3467799425125122,
|
|
"step": 94,
|
|
"valid_targets_mean": 16128.6,
|
|
"valid_targets_min": 13927
|
|
},
|
|
{
|
|
"epoch": 0.4051172707889126,
|
|
"grad_norm": 0.46962358481300204,
|
|
"learning_rate": 3.186440677966102e-05,
|
|
"loss": 1.1836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2939866781234741,
|
|
"step": 95,
|
|
"valid_targets_mean": 15844.7,
|
|
"valid_targets_min": 14737
|
|
},
|
|
{
|
|
"epoch": 0.4093816631130064,
|
|
"grad_norm": 0.5582293882928827,
|
|
"learning_rate": 3.2203389830508473e-05,
|
|
"loss": 1.2349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3391205072402954,
|
|
"step": 96,
|
|
"valid_targets_mean": 16087.4,
|
|
"valid_targets_min": 13635
|
|
},
|
|
{
|
|
"epoch": 0.4136460554371002,
|
|
"grad_norm": 0.5900657459930109,
|
|
"learning_rate": 3.2542372881355934e-05,
|
|
"loss": 1.1851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2372097373008728,
|
|
"step": 97,
|
|
"valid_targets_mean": 10771.6,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 0.417910447761194,
|
|
"grad_norm": 0.5340688290650466,
|
|
"learning_rate": 3.2881355932203394e-05,
|
|
"loss": 1.1681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2912096083164215,
|
|
"step": 98,
|
|
"valid_targets_mean": 15845.2,
|
|
"valid_targets_min": 14972
|
|
},
|
|
{
|
|
"epoch": 0.42217484008528783,
|
|
"grad_norm": 0.6378100977602194,
|
|
"learning_rate": 3.322033898305085e-05,
|
|
"loss": 1.1795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35477086901664734,
|
|
"step": 99,
|
|
"valid_targets_mean": 16180.6,
|
|
"valid_targets_min": 15678
|
|
},
|
|
{
|
|
"epoch": 0.42643923240938164,
|
|
"grad_norm": 0.6988755028234622,
|
|
"learning_rate": 3.355932203389831e-05,
|
|
"loss": 1.1797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19588929414749146,
|
|
"step": 100,
|
|
"valid_targets_mean": 8886.4,
|
|
"valid_targets_min": 2484
|
|
},
|
|
{
|
|
"epoch": 0.43070362473347545,
|
|
"grad_norm": 0.47418325441465914,
|
|
"learning_rate": 3.389830508474576e-05,
|
|
"loss": 1.1321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2752765417098999,
|
|
"step": 101,
|
|
"valid_targets_mean": 16190.2,
|
|
"valid_targets_min": 15322
|
|
},
|
|
{
|
|
"epoch": 0.4349680170575693,
|
|
"grad_norm": 0.8577574413948679,
|
|
"learning_rate": 3.423728813559322e-05,
|
|
"loss": 1.1826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34668517112731934,
|
|
"step": 102,
|
|
"valid_targets_mean": 16177.2,
|
|
"valid_targets_min": 15635
|
|
},
|
|
{
|
|
"epoch": 0.43923240938166314,
|
|
"grad_norm": 1.0101093509748802,
|
|
"learning_rate": 3.457627118644068e-05,
|
|
"loss": 1.1274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23290054500102997,
|
|
"step": 103,
|
|
"valid_targets_mean": 12241.3,
|
|
"valid_targets_min": 9421
|
|
},
|
|
{
|
|
"epoch": 0.44349680170575695,
|
|
"grad_norm": 0.5175939406637968,
|
|
"learning_rate": 3.491525423728814e-05,
|
|
"loss": 1.1439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2979327440261841,
|
|
"step": 104,
|
|
"valid_targets_mean": 16131.7,
|
|
"valid_targets_min": 15263
|
|
},
|
|
{
|
|
"epoch": 0.44776119402985076,
|
|
"grad_norm": 0.5572197547617472,
|
|
"learning_rate": 3.52542372881356e-05,
|
|
"loss": 1.1738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3531283140182495,
|
|
"step": 105,
|
|
"valid_targets_mean": 15808.0,
|
|
"valid_targets_min": 5994
|
|
},
|
|
{
|
|
"epoch": 0.4520255863539446,
|
|
"grad_norm": 0.6605618903327797,
|
|
"learning_rate": 3.559322033898305e-05,
|
|
"loss": 1.2059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2809103727340698,
|
|
"step": 106,
|
|
"valid_targets_mean": 14647.7,
|
|
"valid_targets_min": 12782
|
|
},
|
|
{
|
|
"epoch": 0.4562899786780384,
|
|
"grad_norm": 0.4884826013889433,
|
|
"learning_rate": 3.593220338983051e-05,
|
|
"loss": 1.1906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.328754723072052,
|
|
"step": 107,
|
|
"valid_targets_mean": 16161.3,
|
|
"valid_targets_min": 15446
|
|
},
|
|
{
|
|
"epoch": 0.4605543710021322,
|
|
"grad_norm": 0.45357794917277694,
|
|
"learning_rate": 3.627118644067797e-05,
|
|
"loss": 1.1976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2861141860485077,
|
|
"step": 108,
|
|
"valid_targets_mean": 12701.5,
|
|
"valid_targets_min": 755
|
|
},
|
|
{
|
|
"epoch": 0.464818763326226,
|
|
"grad_norm": 0.5652955970227891,
|
|
"learning_rate": 3.6610169491525426e-05,
|
|
"loss": 1.1211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2760468125343323,
|
|
"step": 109,
|
|
"valid_targets_mean": 15305.1,
|
|
"valid_targets_min": 13189
|
|
},
|
|
{
|
|
"epoch": 0.4690831556503198,
|
|
"grad_norm": 0.5417174035420975,
|
|
"learning_rate": 3.6949152542372886e-05,
|
|
"loss": 1.1708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33323991298675537,
|
|
"step": 110,
|
|
"valid_targets_mean": 16089.3,
|
|
"valid_targets_min": 13573
|
|
},
|
|
{
|
|
"epoch": 0.47334754797441364,
|
|
"grad_norm": 0.43726055299368505,
|
|
"learning_rate": 3.728813559322034e-05,
|
|
"loss": 1.204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19943693280220032,
|
|
"step": 111,
|
|
"valid_targets_mean": 9455.9,
|
|
"valid_targets_min": 1155
|
|
},
|
|
{
|
|
"epoch": 0.47761194029850745,
|
|
"grad_norm": 0.5751565532130996,
|
|
"learning_rate": 3.76271186440678e-05,
|
|
"loss": 1.1544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27978599071502686,
|
|
"step": 112,
|
|
"valid_targets_mean": 16189.5,
|
|
"valid_targets_min": 14479
|
|
},
|
|
{
|
|
"epoch": 0.48187633262260127,
|
|
"grad_norm": 0.7101710184226162,
|
|
"learning_rate": 3.796610169491526e-05,
|
|
"loss": 1.1231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3205156922340393,
|
|
"step": 113,
|
|
"valid_targets_mean": 16139.4,
|
|
"valid_targets_min": 15023
|
|
},
|
|
{
|
|
"epoch": 0.4861407249466951,
|
|
"grad_norm": 0.6637726550198171,
|
|
"learning_rate": 3.8305084745762714e-05,
|
|
"loss": 1.196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20474639534950256,
|
|
"step": 114,
|
|
"valid_targets_mean": 10739.7,
|
|
"valid_targets_min": 4961
|
|
},
|
|
{
|
|
"epoch": 0.4904051172707889,
|
|
"grad_norm": 0.653009338459572,
|
|
"learning_rate": 3.8644067796610175e-05,
|
|
"loss": 1.2007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30754905939102173,
|
|
"step": 115,
|
|
"valid_targets_mean": 16131.4,
|
|
"valid_targets_min": 15295
|
|
},
|
|
{
|
|
"epoch": 0.4946695095948827,
|
|
"grad_norm": 0.7500290339063932,
|
|
"learning_rate": 3.898305084745763e-05,
|
|
"loss": 1.1815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3587116599082947,
|
|
"step": 116,
|
|
"valid_targets_mean": 16016.1,
|
|
"valid_targets_min": 14837
|
|
},
|
|
{
|
|
"epoch": 0.4989339019189765,
|
|
"grad_norm": 0.666000348059281,
|
|
"learning_rate": 3.932203389830509e-05,
|
|
"loss": 1.1505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22062388062477112,
|
|
"step": 117,
|
|
"valid_targets_mean": 12074.7,
|
|
"valid_targets_min": 9259
|
|
},
|
|
{
|
|
"epoch": 0.5031982942430704,
|
|
"grad_norm": 0.5205942147444115,
|
|
"learning_rate": 3.966101694915255e-05,
|
|
"loss": 1.2016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33427849411964417,
|
|
"step": 118,
|
|
"valid_targets_mean": 16067.9,
|
|
"valid_targets_min": 13427
|
|
},
|
|
{
|
|
"epoch": 0.5074626865671642,
|
|
"grad_norm": 0.6982918983089882,
|
|
"learning_rate": 4e-05,
|
|
"loss": 1.1372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3225334882736206,
|
|
"step": 119,
|
|
"valid_targets_mean": 15887.3,
|
|
"valid_targets_min": 4483
|
|
},
|
|
{
|
|
"epoch": 0.511727078891258,
|
|
"grad_norm": 0.6809274398624741,
|
|
"learning_rate": 3.999991166161585e-05,
|
|
"loss": 1.1731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.258020281791687,
|
|
"step": 120,
|
|
"valid_targets_mean": 15023.2,
|
|
"valid_targets_min": 12822
|
|
},
|
|
{
|
|
"epoch": 0.5159914712153518,
|
|
"grad_norm": 0.6796579602435189,
|
|
"learning_rate": 3.999964664724376e-05,
|
|
"loss": 1.1313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3398219347000122,
|
|
"step": 121,
|
|
"valid_targets_mean": 16139.8,
|
|
"valid_targets_min": 15345
|
|
},
|
|
{
|
|
"epoch": 0.5202558635394456,
|
|
"grad_norm": 0.9053221526056996,
|
|
"learning_rate": 3.999920495922483e-05,
|
|
"loss": 1.1455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20825019478797913,
|
|
"step": 122,
|
|
"valid_targets_mean": 10948.9,
|
|
"valid_targets_min": 2964
|
|
},
|
|
{
|
|
"epoch": 0.5245202558635395,
|
|
"grad_norm": 0.7225808525909699,
|
|
"learning_rate": 3.999858660146085e-05,
|
|
"loss": 1.1613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2959098219871521,
|
|
"step": 123,
|
|
"valid_targets_mean": 16067.6,
|
|
"valid_targets_min": 13788
|
|
},
|
|
{
|
|
"epoch": 0.5287846481876333,
|
|
"grad_norm": 0.699040585359214,
|
|
"learning_rate": 3.999779157941431e-05,
|
|
"loss": 1.1711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32279884815216064,
|
|
"step": 124,
|
|
"valid_targets_mean": 16100.9,
|
|
"valid_targets_min": 14446
|
|
},
|
|
{
|
|
"epoch": 0.5330490405117271,
|
|
"grad_norm": 0.6867814111008005,
|
|
"learning_rate": 3.99968199001083e-05,
|
|
"loss": 1.1336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1771920621395111,
|
|
"step": 125,
|
|
"valid_targets_mean": 8757.8,
|
|
"valid_targets_min": 1946
|
|
},
|
|
{
|
|
"epoch": 0.5373134328358209,
|
|
"grad_norm": 0.6942075233485823,
|
|
"learning_rate": 3.999567157212646e-05,
|
|
"loss": 1.1549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2975453734397888,
|
|
"step": 126,
|
|
"valid_targets_mean": 16084.3,
|
|
"valid_targets_min": 15180
|
|
},
|
|
{
|
|
"epoch": 0.5415778251599147,
|
|
"grad_norm": 0.5834359412645455,
|
|
"learning_rate": 3.9994346605612955e-05,
|
|
"loss": 1.1582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.334403395652771,
|
|
"step": 127,
|
|
"valid_targets_mean": 16168.2,
|
|
"valid_targets_min": 15465
|
|
},
|
|
{
|
|
"epoch": 0.5458422174840085,
|
|
"grad_norm": 0.6871781087014723,
|
|
"learning_rate": 3.999284501227232e-05,
|
|
"loss": 1.1451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.225129634141922,
|
|
"step": 128,
|
|
"valid_targets_mean": 11782.7,
|
|
"valid_targets_min": 8043
|
|
},
|
|
{
|
|
"epoch": 0.5501066098081023,
|
|
"grad_norm": 0.42199383147183966,
|
|
"learning_rate": 3.9991166805369393e-05,
|
|
"loss": 1.1669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.312316358089447,
|
|
"step": 129,
|
|
"valid_targets_mean": 16157.4,
|
|
"valid_targets_min": 15459
|
|
},
|
|
{
|
|
"epoch": 0.5543710021321961,
|
|
"grad_norm": 0.5901638763955319,
|
|
"learning_rate": 3.9989311999729166e-05,
|
|
"loss": 1.1839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3369404077529907,
|
|
"step": 130,
|
|
"valid_targets_mean": 16114.2,
|
|
"valid_targets_min": 14385
|
|
},
|
|
{
|
|
"epoch": 0.55863539445629,
|
|
"grad_norm": 0.5256224443564378,
|
|
"learning_rate": 3.99872806117367e-05,
|
|
"loss": 1.1193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2685885727405548,
|
|
"step": 131,
|
|
"valid_targets_mean": 14860.6,
|
|
"valid_targets_min": 13129
|
|
},
|
|
{
|
|
"epoch": 0.5628997867803838,
|
|
"grad_norm": 0.47653542542793115,
|
|
"learning_rate": 3.998507265933696e-05,
|
|
"loss": 1.1043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29919832944869995,
|
|
"step": 132,
|
|
"valid_targets_mean": 15748.8,
|
|
"valid_targets_min": 2605
|
|
},
|
|
{
|
|
"epoch": 0.5671641791044776,
|
|
"grad_norm": 0.5030344370097122,
|
|
"learning_rate": 3.9982688162034624e-05,
|
|
"loss": 1.1437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25398001074790955,
|
|
"step": 133,
|
|
"valid_targets_mean": 13342.2,
|
|
"valid_targets_min": 2588
|
|
},
|
|
{
|
|
"epoch": 0.5714285714285714,
|
|
"grad_norm": 0.42492982816138963,
|
|
"learning_rate": 3.998012714089397e-05,
|
|
"loss": 1.1355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2820109724998474,
|
|
"step": 134,
|
|
"valid_targets_mean": 15889.0,
|
|
"valid_targets_min": 14972
|
|
},
|
|
{
|
|
"epoch": 0.5756929637526652,
|
|
"grad_norm": 0.5235005077222882,
|
|
"learning_rate": 3.997738961853863e-05,
|
|
"loss": 1.1275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3352549076080322,
|
|
"step": 135,
|
|
"valid_targets_mean": 16079.2,
|
|
"valid_targets_min": 14609
|
|
},
|
|
{
|
|
"epoch": 0.579957356076759,
|
|
"grad_norm": 0.4467842546749747,
|
|
"learning_rate": 3.9974475619151445e-05,
|
|
"loss": 1.099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19214390218257904,
|
|
"step": 136,
|
|
"valid_targets_mean": 11098.3,
|
|
"valid_targets_min": 4124
|
|
},
|
|
{
|
|
"epoch": 0.5842217484008528,
|
|
"grad_norm": 0.5449627767069584,
|
|
"learning_rate": 3.997138516847422e-05,
|
|
"loss": 1.0947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2702724039554596,
|
|
"step": 137,
|
|
"valid_targets_mean": 16150.4,
|
|
"valid_targets_min": 15263
|
|
},
|
|
{
|
|
"epoch": 0.5884861407249466,
|
|
"grad_norm": 0.44578828819431243,
|
|
"learning_rate": 3.9968118293807476e-05,
|
|
"loss": 1.158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3618798851966858,
|
|
"step": 138,
|
|
"valid_targets_mean": 16076.4,
|
|
"valid_targets_min": 15001
|
|
},
|
|
{
|
|
"epoch": 0.5927505330490405,
|
|
"grad_norm": 0.5396897727847857,
|
|
"learning_rate": 3.996467502401028e-05,
|
|
"loss": 1.1555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21823956072330475,
|
|
"step": 139,
|
|
"valid_targets_mean": 10867.7,
|
|
"valid_targets_min": 7289
|
|
},
|
|
{
|
|
"epoch": 0.5970149253731343,
|
|
"grad_norm": 0.47029393037752354,
|
|
"learning_rate": 3.9961055389499904e-05,
|
|
"loss": 1.142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30193018913269043,
|
|
"step": 140,
|
|
"valid_targets_mean": 16133.0,
|
|
"valid_targets_min": 14726
|
|
},
|
|
{
|
|
"epoch": 0.6012793176972282,
|
|
"grad_norm": 0.5615330990941316,
|
|
"learning_rate": 3.995725942225162e-05,
|
|
"loss": 1.176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34185144305229187,
|
|
"step": 141,
|
|
"valid_targets_mean": 16148.1,
|
|
"valid_targets_min": 15478
|
|
},
|
|
{
|
|
"epoch": 0.605543710021322,
|
|
"grad_norm": 0.48830337729881,
|
|
"learning_rate": 3.995328715579839e-05,
|
|
"loss": 1.1599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2765701413154602,
|
|
"step": 142,
|
|
"valid_targets_mean": 13650.9,
|
|
"valid_targets_min": 11524
|
|
},
|
|
{
|
|
"epoch": 0.6098081023454158,
|
|
"grad_norm": 0.5532155174714763,
|
|
"learning_rate": 3.994913862523058e-05,
|
|
"loss": 1.1821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3266953229904175,
|
|
"step": 143,
|
|
"valid_targets_mean": 16144.9,
|
|
"valid_targets_min": 14836
|
|
},
|
|
{
|
|
"epoch": 0.6140724946695096,
|
|
"grad_norm": 0.5257667799203136,
|
|
"learning_rate": 3.9944813867195624e-05,
|
|
"loss": 1.1547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3085562288761139,
|
|
"step": 144,
|
|
"valid_targets_mean": 16204.7,
|
|
"valid_targets_min": 15523
|
|
},
|
|
{
|
|
"epoch": 0.6183368869936035,
|
|
"grad_norm": 0.44566640544014385,
|
|
"learning_rate": 3.9940312919897744e-05,
|
|
"loss": 1.083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25907477736473083,
|
|
"step": 145,
|
|
"valid_targets_mean": 15518.0,
|
|
"valid_targets_min": 13968
|
|
},
|
|
{
|
|
"epoch": 0.6226012793176973,
|
|
"grad_norm": 0.5770025533700831,
|
|
"learning_rate": 3.993563582309759e-05,
|
|
"loss": 1.097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3107393682003021,
|
|
"step": 146,
|
|
"valid_targets_mean": 16188.8,
|
|
"valid_targets_min": 15135
|
|
},
|
|
{
|
|
"epoch": 0.6268656716417911,
|
|
"grad_norm": 0.44133997776491973,
|
|
"learning_rate": 3.993078261811186e-05,
|
|
"loss": 1.1808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2310573309659958,
|
|
"step": 147,
|
|
"valid_targets_mean": 10491.1,
|
|
"valid_targets_min": 1680
|
|
},
|
|
{
|
|
"epoch": 0.6311300639658849,
|
|
"grad_norm": 0.5156433356806751,
|
|
"learning_rate": 3.9925753347813e-05,
|
|
"loss": 1.1279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2652381658554077,
|
|
"step": 148,
|
|
"valid_targets_mean": 15896.2,
|
|
"valid_targets_min": 14353
|
|
},
|
|
{
|
|
"epoch": 0.6353944562899787,
|
|
"grad_norm": 0.5676390485196015,
|
|
"learning_rate": 3.992054805662876e-05,
|
|
"loss": 1.189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.363337904214859,
|
|
"step": 149,
|
|
"valid_targets_mean": 16139.4,
|
|
"valid_targets_min": 15325
|
|
},
|
|
{
|
|
"epoch": 0.6396588486140725,
|
|
"grad_norm": 0.5061444651712808,
|
|
"learning_rate": 3.991516679054185e-05,
|
|
"loss": 1.1326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15361131727695465,
|
|
"step": 150,
|
|
"valid_targets_mean": 7331.1,
|
|
"valid_targets_min": 1086
|
|
},
|
|
{
|
|
"epoch": 0.6439232409381663,
|
|
"grad_norm": 0.4807577550346515,
|
|
"learning_rate": 3.9909609597089496e-05,
|
|
"loss": 1.0901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27330508828163147,
|
|
"step": 151,
|
|
"valid_targets_mean": 16209.2,
|
|
"valid_targets_min": 15437
|
|
},
|
|
{
|
|
"epoch": 0.6481876332622601,
|
|
"grad_norm": 0.5801878387706576,
|
|
"learning_rate": 3.9903876525363055e-05,
|
|
"loss": 1.2186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3727971911430359,
|
|
"step": 152,
|
|
"valid_targets_mean": 15970.0,
|
|
"valid_targets_min": 15153
|
|
},
|
|
{
|
|
"epoch": 0.652452025586354,
|
|
"grad_norm": 0.4683308102653706,
|
|
"learning_rate": 3.989796762600755e-05,
|
|
"loss": 1.147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2308533787727356,
|
|
"step": 153,
|
|
"valid_targets_mean": 12333.0,
|
|
"valid_targets_min": 7440
|
|
},
|
|
{
|
|
"epoch": 0.6567164179104478,
|
|
"grad_norm": 0.5032781216294883,
|
|
"learning_rate": 3.9891882951221246e-05,
|
|
"loss": 1.1674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29714855551719666,
|
|
"step": 154,
|
|
"valid_targets_mean": 16176.9,
|
|
"valid_targets_min": 15348
|
|
},
|
|
{
|
|
"epoch": 0.6609808102345416,
|
|
"grad_norm": 0.4740597478203266,
|
|
"learning_rate": 3.988562255475518e-05,
|
|
"loss": 1.189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3306240737438202,
|
|
"step": 155,
|
|
"valid_targets_mean": 16113.9,
|
|
"valid_targets_min": 15057
|
|
},
|
|
{
|
|
"epoch": 0.6652452025586354,
|
|
"grad_norm": 0.4634618395491457,
|
|
"learning_rate": 3.987918649191268e-05,
|
|
"loss": 1.1625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3024733066558838,
|
|
"step": 156,
|
|
"valid_targets_mean": 15849.1,
|
|
"valid_targets_min": 14908
|
|
},
|
|
{
|
|
"epoch": 0.6695095948827292,
|
|
"grad_norm": 0.4354653983169533,
|
|
"learning_rate": 3.987257481954888e-05,
|
|
"loss": 1.0912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3164946138858795,
|
|
"step": 157,
|
|
"valid_targets_mean": 16035.5,
|
|
"valid_targets_min": 13635
|
|
},
|
|
{
|
|
"epoch": 0.673773987206823,
|
|
"grad_norm": 0.4719279447538755,
|
|
"learning_rate": 3.9865787596070236e-05,
|
|
"loss": 1.1227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2650447487831116,
|
|
"step": 158,
|
|
"valid_targets_mean": 13319.5,
|
|
"valid_targets_min": 1947
|
|
},
|
|
{
|
|
"epoch": 0.6780383795309168,
|
|
"grad_norm": 0.4219587814060548,
|
|
"learning_rate": 3.9858824881433975e-05,
|
|
"loss": 1.1807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29355788230895996,
|
|
"step": 159,
|
|
"valid_targets_mean": 15898.2,
|
|
"valid_targets_min": 14591
|
|
},
|
|
{
|
|
"epoch": 0.6823027718550106,
|
|
"grad_norm": 0.5590094018108228,
|
|
"learning_rate": 3.9851686737147585e-05,
|
|
"loss": 1.1171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3133642077445984,
|
|
"step": 160,
|
|
"valid_targets_mean": 16092.8,
|
|
"valid_targets_min": 14793
|
|
},
|
|
{
|
|
"epoch": 0.6865671641791045,
|
|
"grad_norm": 0.5011249076649783,
|
|
"learning_rate": 3.9844373226268305e-05,
|
|
"loss": 1.1379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19621127843856812,
|
|
"step": 161,
|
|
"valid_targets_mean": 8096.2,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 0.6908315565031983,
|
|
"grad_norm": 0.46138156716244344,
|
|
"learning_rate": 3.983688441340249e-05,
|
|
"loss": 1.1308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27151405811309814,
|
|
"step": 162,
|
|
"valid_targets_mean": 16160.3,
|
|
"valid_targets_min": 15572
|
|
},
|
|
{
|
|
"epoch": 0.6950959488272921,
|
|
"grad_norm": 0.6772786842818087,
|
|
"learning_rate": 3.98292203647051e-05,
|
|
"loss": 1.1715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33365413546562195,
|
|
"step": 163,
|
|
"valid_targets_mean": 16126.5,
|
|
"valid_targets_min": 15564
|
|
},
|
|
{
|
|
"epoch": 0.6993603411513859,
|
|
"grad_norm": 0.48421119835644655,
|
|
"learning_rate": 3.982138114787912e-05,
|
|
"loss": 1.1768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1866360902786255,
|
|
"step": 164,
|
|
"valid_targets_mean": 10036.9,
|
|
"valid_targets_min": 3346
|
|
},
|
|
{
|
|
"epoch": 0.7036247334754797,
|
|
"grad_norm": 0.4578744586411911,
|
|
"learning_rate": 3.98133668321749e-05,
|
|
"loss": 1.1191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2684229910373688,
|
|
"step": 165,
|
|
"valid_targets_mean": 16222.7,
|
|
"valid_targets_min": 15436
|
|
},
|
|
{
|
|
"epoch": 0.7078891257995735,
|
|
"grad_norm": 0.5014323664992882,
|
|
"learning_rate": 3.980517748838963e-05,
|
|
"loss": 1.0933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3025428056716919,
|
|
"step": 166,
|
|
"valid_targets_mean": 16143.4,
|
|
"valid_targets_min": 13886
|
|
},
|
|
{
|
|
"epoch": 0.7121535181236673,
|
|
"grad_norm": 0.4491958491741342,
|
|
"learning_rate": 3.979681318886664e-05,
|
|
"loss": 1.1501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2620503008365631,
|
|
"step": 167,
|
|
"valid_targets_mean": 13617.7,
|
|
"valid_targets_min": 11839
|
|
},
|
|
{
|
|
"epoch": 0.7164179104477612,
|
|
"grad_norm": 0.5019461861483926,
|
|
"learning_rate": 3.978827400749481e-05,
|
|
"loss": 1.1293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3074752688407898,
|
|
"step": 168,
|
|
"valid_targets_mean": 16190.0,
|
|
"valid_targets_min": 15712
|
|
},
|
|
{
|
|
"epoch": 0.720682302771855,
|
|
"grad_norm": 0.5284578208517453,
|
|
"learning_rate": 3.977956001970788e-05,
|
|
"loss": 1.0932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3165856599807739,
|
|
"step": 169,
|
|
"valid_targets_mean": 16177.5,
|
|
"valid_targets_min": 15165
|
|
},
|
|
{
|
|
"epoch": 0.7249466950959488,
|
|
"grad_norm": 0.45222368130220614,
|
|
"learning_rate": 3.977067130248381e-05,
|
|
"loss": 1.125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2776346802711487,
|
|
"step": 170,
|
|
"valid_targets_mean": 16127.6,
|
|
"valid_targets_min": 15280
|
|
},
|
|
{
|
|
"epoch": 0.7292110874200426,
|
|
"grad_norm": 0.37526014538686786,
|
|
"learning_rate": 3.9761607934344095e-05,
|
|
"loss": 1.115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.321167528629303,
|
|
"step": 171,
|
|
"valid_targets_mean": 16080.7,
|
|
"valid_targets_min": 14124
|
|
},
|
|
{
|
|
"epoch": 0.7334754797441365,
|
|
"grad_norm": 0.45258588780734565,
|
|
"learning_rate": 3.975236999535306e-05,
|
|
"loss": 1.1177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23799501359462738,
|
|
"step": 172,
|
|
"valid_targets_mean": 10701.9,
|
|
"valid_targets_min": 1268
|
|
},
|
|
{
|
|
"epoch": 0.7377398720682303,
|
|
"grad_norm": 0.5185543129001556,
|
|
"learning_rate": 3.974295756711717e-05,
|
|
"loss": 1.1174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28210726380348206,
|
|
"step": 173,
|
|
"valid_targets_mean": 16045.4,
|
|
"valid_targets_min": 15437
|
|
},
|
|
{
|
|
"epoch": 0.7420042643923241,
|
|
"grad_norm": 0.47674548232982294,
|
|
"learning_rate": 3.9733370732784296e-05,
|
|
"loss": 1.1651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30489468574523926,
|
|
"step": 174,
|
|
"valid_targets_mean": 16151.0,
|
|
"valid_targets_min": 14351
|
|
},
|
|
{
|
|
"epoch": 0.746268656716418,
|
|
"grad_norm": 0.4569066165976318,
|
|
"learning_rate": 3.972360957704298e-05,
|
|
"loss": 1.1144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18339887261390686,
|
|
"step": 175,
|
|
"valid_targets_mean": 8556.6,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 0.7505330490405118,
|
|
"grad_norm": 0.41485456028485257,
|
|
"learning_rate": 3.97136741861217e-05,
|
|
"loss": 1.1769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28565722703933716,
|
|
"step": 176,
|
|
"valid_targets_mean": 16134.4,
|
|
"valid_targets_min": 14897
|
|
},
|
|
{
|
|
"epoch": 0.7547974413646056,
|
|
"grad_norm": 0.3954877369632809,
|
|
"learning_rate": 3.970356464778808e-05,
|
|
"loss": 1.1242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30436745285987854,
|
|
"step": 177,
|
|
"valid_targets_mean": 16085.6,
|
|
"valid_targets_min": 12492
|
|
},
|
|
{
|
|
"epoch": 0.7590618336886994,
|
|
"grad_norm": 0.4955980232357404,
|
|
"learning_rate": 3.969328105134817e-05,
|
|
"loss": 1.1129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.222524493932724,
|
|
"step": 178,
|
|
"valid_targets_mean": 11892.4,
|
|
"valid_targets_min": 9743
|
|
},
|
|
{
|
|
"epoch": 0.7633262260127932,
|
|
"grad_norm": 0.41702004642848145,
|
|
"learning_rate": 3.9682823487645584e-05,
|
|
"loss": 1.1983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35379302501678467,
|
|
"step": 179,
|
|
"valid_targets_mean": 15843.5,
|
|
"valid_targets_min": 13927
|
|
},
|
|
{
|
|
"epoch": 0.767590618336887,
|
|
"grad_norm": 0.384960155190534,
|
|
"learning_rate": 3.9672192049060745e-05,
|
|
"loss": 1.1297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2948912978172302,
|
|
"step": 180,
|
|
"valid_targets_mean": 16161.1,
|
|
"valid_targets_min": 14871
|
|
},
|
|
{
|
|
"epoch": 0.7718550106609808,
|
|
"grad_norm": 0.5712546295504819,
|
|
"learning_rate": 3.966138682951008e-05,
|
|
"loss": 1.1182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26173871755599976,
|
|
"step": 181,
|
|
"valid_targets_mean": 15097.6,
|
|
"valid_targets_min": 12384
|
|
},
|
|
{
|
|
"epoch": 0.7761194029850746,
|
|
"grad_norm": 0.5850669894570342,
|
|
"learning_rate": 3.9650407924445147e-05,
|
|
"loss": 1.1362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3023485839366913,
|
|
"step": 182,
|
|
"valid_targets_mean": 15968.6,
|
|
"valid_targets_min": 9799
|
|
},
|
|
{
|
|
"epoch": 0.7803837953091685,
|
|
"grad_norm": 0.411665246811253,
|
|
"learning_rate": 3.963925543085181e-05,
|
|
"loss": 1.1745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2788766026496887,
|
|
"step": 183,
|
|
"valid_targets_mean": 13224.8,
|
|
"valid_targets_min": 2180
|
|
},
|
|
{
|
|
"epoch": 0.7846481876332623,
|
|
"grad_norm": 0.37703285386494834,
|
|
"learning_rate": 3.96279294472494e-05,
|
|
"loss": 1.1151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2726665735244751,
|
|
"step": 184,
|
|
"valid_targets_mean": 16042.3,
|
|
"valid_targets_min": 15121
|
|
},
|
|
{
|
|
"epoch": 0.7889125799573561,
|
|
"grad_norm": 0.4584215787797822,
|
|
"learning_rate": 3.961643007368984e-05,
|
|
"loss": 1.0951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33016419410705566,
|
|
"step": 185,
|
|
"valid_targets_mean": 16088.2,
|
|
"valid_targets_min": 15153
|
|
},
|
|
{
|
|
"epoch": 0.7931769722814499,
|
|
"grad_norm": 0.4482669750685588,
|
|
"learning_rate": 3.960475741175671e-05,
|
|
"loss": 1.1354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.188775897026062,
|
|
"step": 186,
|
|
"valid_targets_mean": 10063.5,
|
|
"valid_targets_min": 1284
|
|
},
|
|
{
|
|
"epoch": 0.7974413646055437,
|
|
"grad_norm": 0.3435379523087596,
|
|
"learning_rate": 3.959291156456444e-05,
|
|
"loss": 1.1136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2803458869457245,
|
|
"step": 187,
|
|
"valid_targets_mean": 16124.9,
|
|
"valid_targets_min": 14690
|
|
},
|
|
{
|
|
"epoch": 0.8017057569296375,
|
|
"grad_norm": 0.36498580649118295,
|
|
"learning_rate": 3.9580892636757334e-05,
|
|
"loss": 1.1667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3496624827384949,
|
|
"step": 188,
|
|
"valid_targets_mean": 16013.7,
|
|
"valid_targets_min": 13414
|
|
},
|
|
{
|
|
"epoch": 0.8059701492537313,
|
|
"grad_norm": 0.3641243088126945,
|
|
"learning_rate": 3.9568700734508645e-05,
|
|
"loss": 1.1108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19826599955558777,
|
|
"step": 189,
|
|
"valid_targets_mean": 9630.3,
|
|
"valid_targets_min": 4230
|
|
},
|
|
{
|
|
"epoch": 0.8102345415778252,
|
|
"grad_norm": 0.3504510733947343,
|
|
"learning_rate": 3.955633596551967e-05,
|
|
"loss": 1.1624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2921593189239502,
|
|
"step": 190,
|
|
"valid_targets_mean": 16130.3,
|
|
"valid_targets_min": 15217
|
|
},
|
|
{
|
|
"epoch": 0.814498933901919,
|
|
"grad_norm": 0.3491928195639914,
|
|
"learning_rate": 3.9543798439018776e-05,
|
|
"loss": 1.1362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33159956336021423,
|
|
"step": 191,
|
|
"valid_targets_mean": 16100.2,
|
|
"valid_targets_min": 15121
|
|
},
|
|
{
|
|
"epoch": 0.8187633262260128,
|
|
"grad_norm": 0.32336688716837675,
|
|
"learning_rate": 3.953108826576046e-05,
|
|
"loss": 1.1391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2630491256713867,
|
|
"step": 192,
|
|
"valid_targets_mean": 13192.3,
|
|
"valid_targets_min": 10124
|
|
},
|
|
{
|
|
"epoch": 0.8230277185501066,
|
|
"grad_norm": 0.39017739945263646,
|
|
"learning_rate": 3.9518205558024334e-05,
|
|
"loss": 1.1021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3345211446285248,
|
|
"step": 193,
|
|
"valid_targets_mean": 16110.0,
|
|
"valid_targets_min": 15222
|
|
},
|
|
{
|
|
"epoch": 0.8272921108742004,
|
|
"grad_norm": 0.34743379244902567,
|
|
"learning_rate": 3.9505150429614154e-05,
|
|
"loss": 1.0866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3242364525794983,
|
|
"step": 194,
|
|
"valid_targets_mean": 16157.9,
|
|
"valid_targets_min": 15195
|
|
},
|
|
{
|
|
"epoch": 0.8315565031982942,
|
|
"grad_norm": 0.38201086336633105,
|
|
"learning_rate": 3.949192299585681e-05,
|
|
"loss": 1.1235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28175580501556396,
|
|
"step": 195,
|
|
"valid_targets_mean": 15813.3,
|
|
"valid_targets_min": 14450
|
|
},
|
|
{
|
|
"epoch": 0.835820895522388,
|
|
"grad_norm": 0.3932823175171073,
|
|
"learning_rate": 3.9478523373601325e-05,
|
|
"loss": 1.1894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34019625186920166,
|
|
"step": 196,
|
|
"valid_targets_mean": 15987.5,
|
|
"valid_targets_min": 14241
|
|
},
|
|
{
|
|
"epoch": 0.8400852878464818,
|
|
"grad_norm": 0.44023963420501316,
|
|
"learning_rate": 3.946495168121778e-05,
|
|
"loss": 1.1449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22312858700752258,
|
|
"step": 197,
|
|
"valid_targets_mean": 10847.8,
|
|
"valid_targets_min": 793
|
|
},
|
|
{
|
|
"epoch": 0.8443496801705757,
|
|
"grad_norm": 0.42315255007841074,
|
|
"learning_rate": 3.9451208038596325e-05,
|
|
"loss": 1.1036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27388906478881836,
|
|
"step": 198,
|
|
"valid_targets_mean": 15738.3,
|
|
"valid_targets_min": 14685
|
|
},
|
|
{
|
|
"epoch": 0.8486140724946695,
|
|
"grad_norm": 0.38819598173208686,
|
|
"learning_rate": 3.943729256714608e-05,
|
|
"loss": 1.0955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.335122287273407,
|
|
"step": 199,
|
|
"valid_targets_mean": 16183.1,
|
|
"valid_targets_min": 15737
|
|
},
|
|
{
|
|
"epoch": 0.8528784648187633,
|
|
"grad_norm": 0.4455524378088675,
|
|
"learning_rate": 3.942320538979408e-05,
|
|
"loss": 1.1478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16939672827720642,
|
|
"step": 200,
|
|
"valid_targets_mean": 7677.2,
|
|
"valid_targets_min": 2065
|
|
},
|
|
{
|
|
"epoch": 0.8571428571428571,
|
|
"grad_norm": 0.46444801686351583,
|
|
"learning_rate": 3.9408946630984144e-05,
|
|
"loss": 1.1978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3230212330818176,
|
|
"step": 201,
|
|
"valid_targets_mean": 16004.1,
|
|
"valid_targets_min": 12492
|
|
},
|
|
{
|
|
"epoch": 0.8614072494669509,
|
|
"grad_norm": 0.4235046855847129,
|
|
"learning_rate": 3.939451641667587e-05,
|
|
"loss": 1.1068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3233122229576111,
|
|
"step": 202,
|
|
"valid_targets_mean": 16134.0,
|
|
"valid_targets_min": 14956
|
|
},
|
|
{
|
|
"epoch": 0.8656716417910447,
|
|
"grad_norm": 0.29302769606326967,
|
|
"learning_rate": 3.937991487434342e-05,
|
|
"loss": 1.0753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19844433665275574,
|
|
"step": 203,
|
|
"valid_targets_mean": 11952.2,
|
|
"valid_targets_min": 9228
|
|
},
|
|
{
|
|
"epoch": 0.8699360341151386,
|
|
"grad_norm": 0.45494602868420536,
|
|
"learning_rate": 3.9365142132974484e-05,
|
|
"loss": 1.103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2797389328479767,
|
|
"step": 204,
|
|
"valid_targets_mean": 16192.1,
|
|
"valid_targets_min": 15300
|
|
},
|
|
{
|
|
"epoch": 0.8742004264392325,
|
|
"grad_norm": 0.34686513693533766,
|
|
"learning_rate": 3.935019832306905e-05,
|
|
"loss": 1.1979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33879274129867554,
|
|
"step": 205,
|
|
"valid_targets_mean": 16104.9,
|
|
"valid_targets_min": 15016
|
|
},
|
|
{
|
|
"epoch": 0.8784648187633263,
|
|
"grad_norm": 0.30270135007173615,
|
|
"learning_rate": 3.933508357663832e-05,
|
|
"loss": 1.1315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25710511207580566,
|
|
"step": 206,
|
|
"valid_targets_mean": 14878.7,
|
|
"valid_targets_min": 13210
|
|
},
|
|
{
|
|
"epoch": 0.8827292110874201,
|
|
"grad_norm": 0.31631763138531493,
|
|
"learning_rate": 3.9319798027203544e-05,
|
|
"loss": 1.0725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3000783324241638,
|
|
"step": 207,
|
|
"valid_targets_mean": 16179.5,
|
|
"valid_targets_min": 15250
|
|
},
|
|
{
|
|
"epoch": 0.8869936034115139,
|
|
"grad_norm": 0.3374995461654255,
|
|
"learning_rate": 3.930434180979478e-05,
|
|
"loss": 1.1105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27523770928382874,
|
|
"step": 208,
|
|
"valid_targets_mean": 13244.8,
|
|
"valid_targets_min": 979
|
|
},
|
|
{
|
|
"epoch": 0.8912579957356077,
|
|
"grad_norm": 0.33826024909068153,
|
|
"learning_rate": 3.928871506094975e-05,
|
|
"loss": 1.0995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2702364921569824,
|
|
"step": 209,
|
|
"valid_targets_mean": 15530.1,
|
|
"valid_targets_min": 14077
|
|
},
|
|
{
|
|
"epoch": 0.8955223880597015,
|
|
"grad_norm": 0.3454149662965355,
|
|
"learning_rate": 3.927291791871264e-05,
|
|
"loss": 1.1048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32375890016555786,
|
|
"step": 210,
|
|
"valid_targets_mean": 16145.0,
|
|
"valid_targets_min": 15408
|
|
},
|
|
{
|
|
"epoch": 0.8997867803837953,
|
|
"grad_norm": 0.3290507938979677,
|
|
"learning_rate": 3.925695052263284e-05,
|
|
"loss": 1.1226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19708961248397827,
|
|
"step": 211,
|
|
"valid_targets_mean": 9302.1,
|
|
"valid_targets_min": 2002
|
|
},
|
|
{
|
|
"epoch": 0.9040511727078892,
|
|
"grad_norm": 0.3474005455216942,
|
|
"learning_rate": 3.924081301376375e-05,
|
|
"loss": 1.1531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2829481363296509,
|
|
"step": 212,
|
|
"valid_targets_mean": 16123.4,
|
|
"valid_targets_min": 15464
|
|
},
|
|
{
|
|
"epoch": 0.908315565031983,
|
|
"grad_norm": 0.390594945498062,
|
|
"learning_rate": 3.9224505534661525e-05,
|
|
"loss": 1.1655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32957708835601807,
|
|
"step": 213,
|
|
"valid_targets_mean": 16164.3,
|
|
"valid_targets_min": 15414
|
|
},
|
|
{
|
|
"epoch": 0.9125799573560768,
|
|
"grad_norm": 0.3607596936386698,
|
|
"learning_rate": 3.92080282293838e-05,
|
|
"loss": 1.136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2116210162639618,
|
|
"step": 214,
|
|
"valid_targets_mean": 11598.8,
|
|
"valid_targets_min": 6923
|
|
},
|
|
{
|
|
"epoch": 0.9168443496801706,
|
|
"grad_norm": 0.4446291987676236,
|
|
"learning_rate": 3.9191381243488417e-05,
|
|
"loss": 1.1191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27826356887817383,
|
|
"step": 215,
|
|
"valid_targets_mean": 16140.2,
|
|
"valid_targets_min": 15054
|
|
},
|
|
{
|
|
"epoch": 0.9211087420042644,
|
|
"grad_norm": 0.507497077902957,
|
|
"learning_rate": 3.9174564724032167e-05,
|
|
"loss": 1.1099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32445028424263,
|
|
"step": 216,
|
|
"valid_targets_mean": 16168.6,
|
|
"valid_targets_min": 15492
|
|
},
|
|
{
|
|
"epoch": 0.9253731343283582,
|
|
"grad_norm": 0.48132118117215644,
|
|
"learning_rate": 3.9157578819569455e-05,
|
|
"loss": 1.107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23471654951572418,
|
|
"step": 217,
|
|
"valid_targets_mean": 13440.2,
|
|
"valid_targets_min": 11543
|
|
},
|
|
{
|
|
"epoch": 0.929637526652452,
|
|
"grad_norm": 0.4387487536589673,
|
|
"learning_rate": 3.9140423680151036e-05,
|
|
"loss": 1.1143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30893394351005554,
|
|
"step": 218,
|
|
"valid_targets_mean": 16076.0,
|
|
"valid_targets_min": 14699
|
|
},
|
|
{
|
|
"epoch": 0.9339019189765458,
|
|
"grad_norm": 0.4285059199405423,
|
|
"learning_rate": 3.9123099457322625e-05,
|
|
"loss": 1.1156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.316764771938324,
|
|
"step": 219,
|
|
"valid_targets_mean": 16097.1,
|
|
"valid_targets_min": 14737
|
|
},
|
|
{
|
|
"epoch": 0.9381663113006397,
|
|
"grad_norm": 0.4054213596333165,
|
|
"learning_rate": 3.9105606304123605e-05,
|
|
"loss": 1.1458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27088722586631775,
|
|
"step": 220,
|
|
"valid_targets_mean": 14339.8,
|
|
"valid_targets_min": 12604
|
|
},
|
|
{
|
|
"epoch": 0.9424307036247335,
|
|
"grad_norm": 0.3765490714862659,
|
|
"learning_rate": 3.908794437508567e-05,
|
|
"loss": 1.1188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32623913884162903,
|
|
"step": 221,
|
|
"valid_targets_mean": 16064.1,
|
|
"valid_targets_min": 14428
|
|
},
|
|
{
|
|
"epoch": 0.9466950959488273,
|
|
"grad_norm": 0.3079645973246052,
|
|
"learning_rate": 3.907011382623145e-05,
|
|
"loss": 1.1522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2375921607017517,
|
|
"step": 222,
|
|
"valid_targets_mean": 10776.3,
|
|
"valid_targets_min": 1379
|
|
},
|
|
{
|
|
"epoch": 0.9509594882729211,
|
|
"grad_norm": 0.35405336033246676,
|
|
"learning_rate": 3.905211481507313e-05,
|
|
"loss": 1.0704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2573704123497009,
|
|
"step": 223,
|
|
"valid_targets_mean": 16155.4,
|
|
"valid_targets_min": 14394
|
|
},
|
|
{
|
|
"epoch": 0.9552238805970149,
|
|
"grad_norm": 0.4440397997262965,
|
|
"learning_rate": 3.903394750061106e-05,
|
|
"loss": 1.0416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28594210743904114,
|
|
"step": 224,
|
|
"valid_targets_mean": 16217.8,
|
|
"valid_targets_min": 15574
|
|
},
|
|
{
|
|
"epoch": 0.9594882729211087,
|
|
"grad_norm": 0.3633675996771697,
|
|
"learning_rate": 3.9015612043332375e-05,
|
|
"loss": 1.1106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1522378921508789,
|
|
"step": 225,
|
|
"valid_targets_mean": 7463.5,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 0.9637526652452025,
|
|
"grad_norm": 0.41067900123948564,
|
|
"learning_rate": 3.8997108605209535e-05,
|
|
"loss": 1.1012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2699429988861084,
|
|
"step": 226,
|
|
"valid_targets_mean": 16178.3,
|
|
"valid_targets_min": 15368
|
|
},
|
|
{
|
|
"epoch": 0.9680170575692963,
|
|
"grad_norm": 0.4235336117612582,
|
|
"learning_rate": 3.897843734969891e-05,
|
|
"loss": 1.0911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31220778822898865,
|
|
"step": 227,
|
|
"valid_targets_mean": 16218.0,
|
|
"valid_targets_min": 15492
|
|
},
|
|
{
|
|
"epoch": 0.9722814498933902,
|
|
"grad_norm": 0.35130467041824626,
|
|
"learning_rate": 3.895959844173937e-05,
|
|
"loss": 1.1083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20775854587554932,
|
|
"step": 228,
|
|
"valid_targets_mean": 11823.1,
|
|
"valid_targets_min": 9219
|
|
},
|
|
{
|
|
"epoch": 0.976545842217484,
|
|
"grad_norm": 0.49669402360213316,
|
|
"learning_rate": 3.8940592047750774e-05,
|
|
"loss": 1.1067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29683178663253784,
|
|
"step": 229,
|
|
"valid_targets_mean": 16117.2,
|
|
"valid_targets_min": 14958
|
|
},
|
|
{
|
|
"epoch": 0.9808102345415778,
|
|
"grad_norm": 0.4189225819959197,
|
|
"learning_rate": 3.892141833563255e-05,
|
|
"loss": 1.1472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3666456937789917,
|
|
"step": 230,
|
|
"valid_targets_mean": 16062.8,
|
|
"valid_targets_min": 15242
|
|
},
|
|
{
|
|
"epoch": 0.9850746268656716,
|
|
"grad_norm": 0.4328456477285098,
|
|
"learning_rate": 3.8902077474762155e-05,
|
|
"loss": 1.0571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2267758846282959,
|
|
"step": 231,
|
|
"valid_targets_mean": 13741.2,
|
|
"valid_targets_min": 11500
|
|
},
|
|
{
|
|
"epoch": 0.9893390191897654,
|
|
"grad_norm": 0.444586554519607,
|
|
"learning_rate": 3.888256963599364e-05,
|
|
"loss": 1.196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3470284342765808,
|
|
"step": 232,
|
|
"valid_targets_mean": 16078.0,
|
|
"valid_targets_min": 14904
|
|
},
|
|
{
|
|
"epoch": 0.9936034115138592,
|
|
"grad_norm": 0.4779659437702058,
|
|
"learning_rate": 3.886289499165609e-05,
|
|
"loss": 1.141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27443283796310425,
|
|
"step": 233,
|
|
"valid_targets_mean": 13027.3,
|
|
"valid_targets_min": 1521
|
|
},
|
|
{
|
|
"epoch": 0.997867803837953,
|
|
"grad_norm": 0.3798368486345066,
|
|
"learning_rate": 3.884305371555215e-05,
|
|
"loss": 1.158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3034205436706543,
|
|
"step": 234,
|
|
"valid_targets_mean": 15647.9,
|
|
"valid_targets_min": 14103
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 0.39024283570726004,
|
|
"learning_rate": 3.882304598295643e-05,
|
|
"loss": 1.1639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5116605162620544,
|
|
"step": 235,
|
|
"valid_targets_mean": 9944.1,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 1.004264392324094,
|
|
"grad_norm": 0.48688508195696417,
|
|
"learning_rate": 3.880287197061402e-05,
|
|
"loss": 1.1024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28223833441734314,
|
|
"step": 236,
|
|
"valid_targets_mean": 16134.4,
|
|
"valid_targets_min": 15417
|
|
},
|
|
{
|
|
"epoch": 1.0085287846481876,
|
|
"grad_norm": 0.41040342224548326,
|
|
"learning_rate": 3.878253185673888e-05,
|
|
"loss": 1.1161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3450218439102173,
|
|
"step": 237,
|
|
"valid_targets_mean": 16085.3,
|
|
"valid_targets_min": 15115
|
|
},
|
|
{
|
|
"epoch": 1.0127931769722816,
|
|
"grad_norm": 0.35009077862857224,
|
|
"learning_rate": 3.876202582101229e-05,
|
|
"loss": 1.1187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2232784926891327,
|
|
"step": 238,
|
|
"valid_targets_mean": 11956.3,
|
|
"valid_targets_min": 8620
|
|
},
|
|
{
|
|
"epoch": 1.0170575692963753,
|
|
"grad_norm": 0.5509367721079539,
|
|
"learning_rate": 3.874135404458125e-05,
|
|
"loss": 1.1086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3012659549713135,
|
|
"step": 239,
|
|
"valid_targets_mean": 16189.2,
|
|
"valid_targets_min": 15410
|
|
},
|
|
{
|
|
"epoch": 1.0213219616204692,
|
|
"grad_norm": 0.6943633030212262,
|
|
"learning_rate": 3.8720516710056905e-05,
|
|
"loss": 1.1158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32979708909988403,
|
|
"step": 240,
|
|
"valid_targets_mean": 16079.0,
|
|
"valid_targets_min": 14919
|
|
},
|
|
{
|
|
"epoch": 1.0255863539445629,
|
|
"grad_norm": 0.7010047361426518,
|
|
"learning_rate": 3.8699514001512885e-05,
|
|
"loss": 1.1298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25711578130722046,
|
|
"step": 241,
|
|
"valid_targets_mean": 14073.2,
|
|
"valid_targets_min": 12010
|
|
},
|
|
{
|
|
"epoch": 1.0298507462686568,
|
|
"grad_norm": 0.5465123104283881,
|
|
"learning_rate": 3.867834610448374e-05,
|
|
"loss": 1.0818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3156501054763794,
|
|
"step": 242,
|
|
"valid_targets_mean": 16126.4,
|
|
"valid_targets_min": 15263
|
|
},
|
|
{
|
|
"epoch": 1.0341151385927505,
|
|
"grad_norm": 0.4150254838791621,
|
|
"learning_rate": 3.865701320596324e-05,
|
|
"loss": 1.1171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24540677666664124,
|
|
"step": 243,
|
|
"valid_targets_mean": 13054.0,
|
|
"valid_targets_min": 1882
|
|
},
|
|
{
|
|
"epoch": 1.0383795309168444,
|
|
"grad_norm": 0.5184290554565314,
|
|
"learning_rate": 3.863551549440277e-05,
|
|
"loss": 1.1186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2725296914577484,
|
|
"step": 244,
|
|
"valid_targets_mean": 15801.2,
|
|
"valid_targets_min": 14436
|
|
},
|
|
{
|
|
"epoch": 1.0426439232409381,
|
|
"grad_norm": 0.5712555154187368,
|
|
"learning_rate": 3.861385315970964e-05,
|
|
"loss": 1.105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28132861852645874,
|
|
"step": 245,
|
|
"valid_targets_mean": 16140.1,
|
|
"valid_targets_min": 14444
|
|
},
|
|
{
|
|
"epoch": 1.046908315565032,
|
|
"grad_norm": 0.4312820171521829,
|
|
"learning_rate": 3.859202639324542e-05,
|
|
"loss": 1.0969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19228845834732056,
|
|
"step": 246,
|
|
"valid_targets_mean": 10079.0,
|
|
"valid_targets_min": 1025
|
|
},
|
|
{
|
|
"epoch": 1.0511727078891258,
|
|
"grad_norm": 0.4982181491360297,
|
|
"learning_rate": 3.8570035387824214e-05,
|
|
"loss": 1.1018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2837919294834137,
|
|
"step": 247,
|
|
"valid_targets_mean": 16107.3,
|
|
"valid_targets_min": 15290
|
|
},
|
|
{
|
|
"epoch": 1.0554371002132197,
|
|
"grad_norm": 0.5273505815311281,
|
|
"learning_rate": 3.8547880337711036e-05,
|
|
"loss": 1.1163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3252648115158081,
|
|
"step": 248,
|
|
"valid_targets_mean": 16154.4,
|
|
"valid_targets_min": 15031
|
|
},
|
|
{
|
|
"epoch": 1.0597014925373134,
|
|
"grad_norm": 0.44498988796021355,
|
|
"learning_rate": 3.8525561438620016e-05,
|
|
"loss": 1.0585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1894037425518036,
|
|
"step": 249,
|
|
"valid_targets_mean": 10379.2,
|
|
"valid_targets_min": 6997
|
|
},
|
|
{
|
|
"epoch": 1.0639658848614073,
|
|
"grad_norm": 0.4144830597405317,
|
|
"learning_rate": 3.850307888771269e-05,
|
|
"loss": 1.0731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28886622190475464,
|
|
"step": 250,
|
|
"valid_targets_mean": 16118.5,
|
|
"valid_targets_min": 14698
|
|
},
|
|
{
|
|
"epoch": 1.068230277185501,
|
|
"grad_norm": 0.5029381206794391,
|
|
"learning_rate": 3.848043288359629e-05,
|
|
"loss": 1.1498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3546772599220276,
|
|
"step": 251,
|
|
"valid_targets_mean": 16145.3,
|
|
"valid_targets_min": 15417
|
|
},
|
|
{
|
|
"epoch": 1.072494669509595,
|
|
"grad_norm": 0.44247318546686853,
|
|
"learning_rate": 3.8457623626321944e-05,
|
|
"loss": 1.104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2482362687587738,
|
|
"step": 252,
|
|
"valid_targets_mean": 13666.4,
|
|
"valid_targets_min": 11928
|
|
},
|
|
{
|
|
"epoch": 1.0767590618336886,
|
|
"grad_norm": 0.39005937001798274,
|
|
"learning_rate": 3.843465131738296e-05,
|
|
"loss": 1.0506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29449522495269775,
|
|
"step": 253,
|
|
"valid_targets_mean": 16168.5,
|
|
"valid_targets_min": 15367
|
|
},
|
|
{
|
|
"epoch": 1.0810234541577826,
|
|
"grad_norm": 0.4817631021826581,
|
|
"learning_rate": 3.8411516159713e-05,
|
|
"loss": 1.1048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33546924591064453,
|
|
"step": 254,
|
|
"valid_targets_mean": 16058.3,
|
|
"valid_targets_min": 15122
|
|
},
|
|
{
|
|
"epoch": 1.0852878464818763,
|
|
"grad_norm": 0.40971558311849615,
|
|
"learning_rate": 3.838821835768431e-05,
|
|
"loss": 1.0997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25414609909057617,
|
|
"step": 255,
|
|
"valid_targets_mean": 14634.8,
|
|
"valid_targets_min": 12318
|
|
},
|
|
{
|
|
"epoch": 1.0895522388059702,
|
|
"grad_norm": 0.41152374607669845,
|
|
"learning_rate": 3.83647581171059e-05,
|
|
"loss": 1.0722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28119754791259766,
|
|
"step": 256,
|
|
"valid_targets_mean": 16229.5,
|
|
"valid_targets_min": 15678
|
|
},
|
|
{
|
|
"epoch": 1.0938166311300639,
|
|
"grad_norm": 0.36986113057932224,
|
|
"learning_rate": 3.8341135645221744e-05,
|
|
"loss": 1.1548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21205706894397736,
|
|
"step": 257,
|
|
"valid_targets_mean": 10198.9,
|
|
"valid_targets_min": 1521
|
|
},
|
|
{
|
|
"epoch": 1.0980810234541578,
|
|
"grad_norm": 0.39538818902202144,
|
|
"learning_rate": 3.831735115070895e-05,
|
|
"loss": 1.112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2730725407600403,
|
|
"step": 258,
|
|
"valid_targets_mean": 15648.3,
|
|
"valid_targets_min": 14587
|
|
},
|
|
{
|
|
"epoch": 1.1023454157782515,
|
|
"grad_norm": 0.38776047712641437,
|
|
"learning_rate": 3.8293404843675904e-05,
|
|
"loss": 1.0702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3306257724761963,
|
|
"step": 259,
|
|
"valid_targets_mean": 16135.6,
|
|
"valid_targets_min": 15231
|
|
},
|
|
{
|
|
"epoch": 1.1066098081023454,
|
|
"grad_norm": 0.39177831154033477,
|
|
"learning_rate": 3.8269296935660395e-05,
|
|
"loss": 1.1263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15725815296173096,
|
|
"step": 260,
|
|
"valid_targets_mean": 7092.5,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 1.1108742004264391,
|
|
"grad_norm": 0.3955787180514775,
|
|
"learning_rate": 3.82450276396278e-05,
|
|
"loss": 1.0555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26038262248039246,
|
|
"step": 261,
|
|
"valid_targets_mean": 16152.0,
|
|
"valid_targets_min": 14548
|
|
},
|
|
{
|
|
"epoch": 1.115138592750533,
|
|
"grad_norm": 0.3943393830343504,
|
|
"learning_rate": 3.822059716996916e-05,
|
|
"loss": 1.0821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29220741987228394,
|
|
"step": 262,
|
|
"valid_targets_mean": 16084.3,
|
|
"valid_targets_min": 11563
|
|
},
|
|
{
|
|
"epoch": 1.1194029850746268,
|
|
"grad_norm": 0.34671504145506926,
|
|
"learning_rate": 3.819600574249929e-05,
|
|
"loss": 1.094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2236497700214386,
|
|
"step": 263,
|
|
"valid_targets_mean": 12894.8,
|
|
"valid_targets_min": 10816
|
|
},
|
|
{
|
|
"epoch": 1.1236673773987207,
|
|
"grad_norm": 0.42078315088119533,
|
|
"learning_rate": 3.817125357445489e-05,
|
|
"loss": 1.1106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2985043525695801,
|
|
"step": 264,
|
|
"valid_targets_mean": 16128.0,
|
|
"valid_targets_min": 15288
|
|
},
|
|
{
|
|
"epoch": 1.1279317697228146,
|
|
"grad_norm": 0.3432043026098217,
|
|
"learning_rate": 3.814634088449261e-05,
|
|
"loss": 1.0969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3035276532173157,
|
|
"step": 265,
|
|
"valid_targets_mean": 16191.3,
|
|
"valid_targets_min": 15360
|
|
},
|
|
{
|
|
"epoch": 1.1321961620469083,
|
|
"grad_norm": 0.41374067810311144,
|
|
"learning_rate": 3.812126789268712e-05,
|
|
"loss": 1.0658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25057581067085266,
|
|
"step": 266,
|
|
"valid_targets_mean": 13714.1,
|
|
"valid_targets_min": 10885
|
|
},
|
|
{
|
|
"epoch": 1.136460554371002,
|
|
"grad_norm": 0.4607688045395972,
|
|
"learning_rate": 3.80960348205292e-05,
|
|
"loss": 1.0765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3000008165836334,
|
|
"step": 267,
|
|
"valid_targets_mean": 16230.6,
|
|
"valid_targets_min": 14726
|
|
},
|
|
{
|
|
"epoch": 1.140724946695096,
|
|
"grad_norm": 0.40205293987912205,
|
|
"learning_rate": 3.807064189092372e-05,
|
|
"loss": 1.1149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25882160663604736,
|
|
"step": 268,
|
|
"valid_targets_mean": 12928.8,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 1.1449893390191899,
|
|
"grad_norm": 0.2840612227828314,
|
|
"learning_rate": 3.804508932818771e-05,
|
|
"loss": 1.0726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26746341586112976,
|
|
"step": 269,
|
|
"valid_targets_mean": 15937.6,
|
|
"valid_targets_min": 14274
|
|
},
|
|
{
|
|
"epoch": 1.1492537313432836,
|
|
"grad_norm": 0.4619094136744395,
|
|
"learning_rate": 3.801937735804838e-05,
|
|
"loss": 1.0768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31920215487480164,
|
|
"step": 270,
|
|
"valid_targets_mean": 16088.9,
|
|
"valid_targets_min": 14394
|
|
},
|
|
{
|
|
"epoch": 1.1535181236673775,
|
|
"grad_norm": 0.48231701020494727,
|
|
"learning_rate": 3.799350620764114e-05,
|
|
"loss": 1.1666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20658738911151886,
|
|
"step": 271,
|
|
"valid_targets_mean": 9558.3,
|
|
"valid_targets_min": 1284
|
|
},
|
|
{
|
|
"epoch": 1.1577825159914712,
|
|
"grad_norm": 0.46062053301062583,
|
|
"learning_rate": 3.7967476105507535e-05,
|
|
"loss": 1.0729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2467394322156906,
|
|
"step": 272,
|
|
"valid_targets_mean": 16074.0,
|
|
"valid_targets_min": 15120
|
|
},
|
|
{
|
|
"epoch": 1.1620469083155651,
|
|
"grad_norm": 0.3951224400955993,
|
|
"learning_rate": 3.7941287281593284e-05,
|
|
"loss": 1.1176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34779471158981323,
|
|
"step": 273,
|
|
"valid_targets_mean": 16051.9,
|
|
"valid_targets_min": 14958
|
|
},
|
|
{
|
|
"epoch": 1.1663113006396588,
|
|
"grad_norm": 0.4181270890938552,
|
|
"learning_rate": 3.7914939967246227e-05,
|
|
"loss": 1.1097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19121389091014862,
|
|
"step": 274,
|
|
"valid_targets_mean": 10320.2,
|
|
"valid_targets_min": 4592
|
|
},
|
|
{
|
|
"epoch": 1.1705756929637527,
|
|
"grad_norm": 0.5270170169887702,
|
|
"learning_rate": 3.7888434395214285e-05,
|
|
"loss": 1.065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27775952219963074,
|
|
"step": 275,
|
|
"valid_targets_mean": 16176.9,
|
|
"valid_targets_min": 15525
|
|
},
|
|
{
|
|
"epoch": 1.1748400852878464,
|
|
"grad_norm": 0.4363543220215517,
|
|
"learning_rate": 3.786177079964339e-05,
|
|
"loss": 1.1368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34759217500686646,
|
|
"step": 276,
|
|
"valid_targets_mean": 15977.2,
|
|
"valid_targets_min": 12151
|
|
},
|
|
{
|
|
"epoch": 1.1791044776119404,
|
|
"grad_norm": 0.3270505163256704,
|
|
"learning_rate": 3.783494941607544e-05,
|
|
"loss": 1.0769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24708306789398193,
|
|
"step": 277,
|
|
"valid_targets_mean": 14817.6,
|
|
"valid_targets_min": 12394
|
|
},
|
|
{
|
|
"epoch": 1.183368869936034,
|
|
"grad_norm": 0.5195692662658037,
|
|
"learning_rate": 3.780797048144621e-05,
|
|
"loss": 1.0675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29989132285118103,
|
|
"step": 278,
|
|
"valid_targets_mean": 16170.4,
|
|
"valid_targets_min": 15476
|
|
},
|
|
{
|
|
"epoch": 1.187633262260128,
|
|
"grad_norm": 0.6190527220557674,
|
|
"learning_rate": 3.7780834234083236e-05,
|
|
"loss": 1.0701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2992306053638458,
|
|
"step": 279,
|
|
"valid_targets_mean": 16205.1,
|
|
"valid_targets_min": 15621
|
|
},
|
|
{
|
|
"epoch": 1.1918976545842217,
|
|
"grad_norm": 0.6080600402547287,
|
|
"learning_rate": 3.775354091370376e-05,
|
|
"loss": 1.1296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2604046165943146,
|
|
"step": 280,
|
|
"valid_targets_mean": 15438.3,
|
|
"valid_targets_min": 13444
|
|
},
|
|
{
|
|
"epoch": 1.1961620469083156,
|
|
"grad_norm": 0.39041134509984243,
|
|
"learning_rate": 3.772609076141255e-05,
|
|
"loss": 1.0728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31458884477615356,
|
|
"step": 281,
|
|
"valid_targets_mean": 16119.0,
|
|
"valid_targets_min": 15507
|
|
},
|
|
{
|
|
"epoch": 1.2004264392324093,
|
|
"grad_norm": 0.4767624558807393,
|
|
"learning_rate": 3.769848401969982e-05,
|
|
"loss": 1.1081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21639636158943176,
|
|
"step": 282,
|
|
"valid_targets_mean": 10172.0,
|
|
"valid_targets_min": 1340
|
|
},
|
|
{
|
|
"epoch": 1.2046908315565032,
|
|
"grad_norm": 0.5623270402982615,
|
|
"learning_rate": 3.767072093243907e-05,
|
|
"loss": 1.1635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2950652837753296,
|
|
"step": 283,
|
|
"valid_targets_mean": 15828.4,
|
|
"valid_targets_min": 14460
|
|
},
|
|
{
|
|
"epoch": 1.208955223880597,
|
|
"grad_norm": 0.4524089975039437,
|
|
"learning_rate": 3.7642801744884915e-05,
|
|
"loss": 1.0963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.294228196144104,
|
|
"step": 284,
|
|
"valid_targets_mean": 16200.7,
|
|
"valid_targets_min": 15297
|
|
},
|
|
{
|
|
"epoch": 1.2132196162046909,
|
|
"grad_norm": 0.32281714093886354,
|
|
"learning_rate": 3.761472670367096e-05,
|
|
"loss": 1.0747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17498302459716797,
|
|
"step": 285,
|
|
"valid_targets_mean": 9475.9,
|
|
"valid_targets_min": 1947
|
|
},
|
|
{
|
|
"epoch": 1.2174840085287846,
|
|
"grad_norm": 0.5138432099324803,
|
|
"learning_rate": 3.758649605680758e-05,
|
|
"loss": 1.1036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28575873374938965,
|
|
"step": 286,
|
|
"valid_targets_mean": 16135.3,
|
|
"valid_targets_min": 15278
|
|
},
|
|
{
|
|
"epoch": 1.2217484008528785,
|
|
"grad_norm": 0.3655062872472561,
|
|
"learning_rate": 3.755811005367974e-05,
|
|
"loss": 1.1298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34900298714637756,
|
|
"step": 287,
|
|
"valid_targets_mean": 15927.2,
|
|
"valid_targets_min": 13538
|
|
},
|
|
{
|
|
"epoch": 1.2260127931769722,
|
|
"grad_norm": 0.4348050500931117,
|
|
"learning_rate": 3.752956894504481e-05,
|
|
"loss": 1.079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2179632931947708,
|
|
"step": 288,
|
|
"valid_targets_mean": 12555.1,
|
|
"valid_targets_min": 10326
|
|
},
|
|
{
|
|
"epoch": 1.2302771855010661,
|
|
"grad_norm": 0.4066292861906636,
|
|
"learning_rate": 3.750087298303033e-05,
|
|
"loss": 1.0861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2655118703842163,
|
|
"step": 289,
|
|
"valid_targets_mean": 16137.8,
|
|
"valid_targets_min": 12492
|
|
},
|
|
{
|
|
"epoch": 1.2345415778251598,
|
|
"grad_norm": 0.48614284361098015,
|
|
"learning_rate": 3.7472022421131795e-05,
|
|
"loss": 1.09,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31156063079833984,
|
|
"step": 290,
|
|
"valid_targets_mean": 16206.4,
|
|
"valid_targets_min": 15481
|
|
},
|
|
{
|
|
"epoch": 1.2388059701492538,
|
|
"grad_norm": 0.3675854254671058,
|
|
"learning_rate": 3.7443017514210406e-05,
|
|
"loss": 1.1211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2502858638763428,
|
|
"step": 291,
|
|
"valid_targets_mean": 14772.3,
|
|
"valid_targets_min": 12576
|
|
},
|
|
{
|
|
"epoch": 1.2430703624733475,
|
|
"grad_norm": 0.39619036032104965,
|
|
"learning_rate": 3.7413858518490825e-05,
|
|
"loss": 1.0596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3014882206916809,
|
|
"step": 292,
|
|
"valid_targets_mean": 16124.6,
|
|
"valid_targets_min": 14727
|
|
},
|
|
{
|
|
"epoch": 1.2473347547974414,
|
|
"grad_norm": 0.4106391418257523,
|
|
"learning_rate": 3.7384545691558895e-05,
|
|
"loss": 1.1055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29865384101867676,
|
|
"step": 293,
|
|
"valid_targets_mean": 12797.4,
|
|
"valid_targets_min": 1628
|
|
},
|
|
{
|
|
"epoch": 1.251599147121535,
|
|
"grad_norm": 0.3731804980440619,
|
|
"learning_rate": 3.735507929235941e-05,
|
|
"loss": 1.0794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28893664479255676,
|
|
"step": 294,
|
|
"valid_targets_mean": 15906.0,
|
|
"valid_targets_min": 14468
|
|
},
|
|
{
|
|
"epoch": 1.255863539445629,
|
|
"grad_norm": 0.3374195469547457,
|
|
"learning_rate": 3.732545958119378e-05,
|
|
"loss": 1.0804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32684630155563354,
|
|
"step": 295,
|
|
"valid_targets_mean": 16134.5,
|
|
"valid_targets_min": 15264
|
|
},
|
|
{
|
|
"epoch": 1.260127931769723,
|
|
"grad_norm": 0.3360515167194252,
|
|
"learning_rate": 3.729568681971774e-05,
|
|
"loss": 1.0911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19337254762649536,
|
|
"step": 296,
|
|
"valid_targets_mean": 9574.0,
|
|
"valid_targets_min": 2245
|
|
},
|
|
{
|
|
"epoch": 1.2643923240938166,
|
|
"grad_norm": 0.26536955158503056,
|
|
"learning_rate": 3.726576127093905e-05,
|
|
"loss": 1.0493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2696130573749542,
|
|
"step": 297,
|
|
"valid_targets_mean": 16036.6,
|
|
"valid_targets_min": 13771
|
|
},
|
|
{
|
|
"epoch": 1.2686567164179103,
|
|
"grad_norm": 0.30761645747590627,
|
|
"learning_rate": 3.7235683199215177e-05,
|
|
"loss": 1.0989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3584132790565491,
|
|
"step": 298,
|
|
"valid_targets_mean": 16034.9,
|
|
"valid_targets_min": 13226
|
|
},
|
|
{
|
|
"epoch": 1.2729211087420043,
|
|
"grad_norm": 0.27959937087294806,
|
|
"learning_rate": 3.7205452870250944e-05,
|
|
"loss": 1.0891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20323173701763153,
|
|
"step": 299,
|
|
"valid_targets_mean": 11007.0,
|
|
"valid_targets_min": 5744
|
|
},
|
|
{
|
|
"epoch": 1.2771855010660982,
|
|
"grad_norm": 0.2941909530383932,
|
|
"learning_rate": 3.7175070551096204e-05,
|
|
"loss": 1.061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2728692293167114,
|
|
"step": 300,
|
|
"valid_targets_mean": 16107.3,
|
|
"valid_targets_min": 14275
|
|
},
|
|
{
|
|
"epoch": 1.2814498933901919,
|
|
"grad_norm": 0.3045695451435711,
|
|
"learning_rate": 3.7144536510143436e-05,
|
|
"loss": 1.0794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3124944269657135,
|
|
"step": 301,
|
|
"valid_targets_mean": 16167.8,
|
|
"valid_targets_min": 15391
|
|
},
|
|
{
|
|
"epoch": 1.2857142857142856,
|
|
"grad_norm": 0.29964759276009667,
|
|
"learning_rate": 3.711385101712544e-05,
|
|
"loss": 1.088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.237869530916214,
|
|
"step": 302,
|
|
"valid_targets_mean": 13015.2,
|
|
"valid_targets_min": 10716
|
|
},
|
|
{
|
|
"epoch": 1.2899786780383795,
|
|
"grad_norm": 0.3370082192565383,
|
|
"learning_rate": 3.708301434311289e-05,
|
|
"loss": 1.0737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33356988430023193,
|
|
"step": 303,
|
|
"valid_targets_mean": 16074.2,
|
|
"valid_targets_min": 15513
|
|
},
|
|
{
|
|
"epoch": 1.2942430703624734,
|
|
"grad_norm": 0.32138080902527844,
|
|
"learning_rate": 3.7052026760511996e-05,
|
|
"loss": 1.0981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33788326382637024,
|
|
"step": 304,
|
|
"valid_targets_mean": 16062.2,
|
|
"valid_targets_min": 15222
|
|
},
|
|
{
|
|
"epoch": 1.2985074626865671,
|
|
"grad_norm": 0.30392134990032627,
|
|
"learning_rate": 3.7020888543062046e-05,
|
|
"loss": 1.124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2807016968727112,
|
|
"step": 305,
|
|
"valid_targets_mean": 15560.0,
|
|
"valid_targets_min": 7641
|
|
},
|
|
{
|
|
"epoch": 1.302771855010661,
|
|
"grad_norm": 0.3238007708694112,
|
|
"learning_rate": 3.6989599965833024e-05,
|
|
"loss": 1.0764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2851221561431885,
|
|
"step": 306,
|
|
"valid_targets_mean": 16099.2,
|
|
"valid_targets_min": 13108
|
|
},
|
|
{
|
|
"epoch": 1.3070362473347548,
|
|
"grad_norm": 0.29840930934212534,
|
|
"learning_rate": 3.695816130522317e-05,
|
|
"loss": 1.0946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22299593687057495,
|
|
"step": 307,
|
|
"valid_targets_mean": 11237.3,
|
|
"valid_targets_min": 1669
|
|
},
|
|
{
|
|
"epoch": 1.3113006396588487,
|
|
"grad_norm": 0.30168843638422593,
|
|
"learning_rate": 3.692657283895651e-05,
|
|
"loss": 1.0552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27403849363327026,
|
|
"step": 308,
|
|
"valid_targets_mean": 16148.6,
|
|
"valid_targets_min": 15513
|
|
},
|
|
{
|
|
"epoch": 1.3155650319829424,
|
|
"grad_norm": 0.32483179815857954,
|
|
"learning_rate": 3.689483484608048e-05,
|
|
"loss": 1.0612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3060385584831238,
|
|
"step": 309,
|
|
"valid_targets_mean": 16065.3,
|
|
"valid_targets_min": 13956
|
|
},
|
|
{
|
|
"epoch": 1.3198294243070363,
|
|
"grad_norm": 0.31149112041744637,
|
|
"learning_rate": 3.6862947606963364e-05,
|
|
"loss": 1.0866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17092487215995789,
|
|
"step": 310,
|
|
"valid_targets_mean": 8786.3,
|
|
"valid_targets_min": 979
|
|
},
|
|
{
|
|
"epoch": 1.32409381663113,
|
|
"grad_norm": 0.2886555855519825,
|
|
"learning_rate": 3.6830911403291885e-05,
|
|
"loss": 1.058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26450830698013306,
|
|
"step": 311,
|
|
"valid_targets_mean": 16189.7,
|
|
"valid_targets_min": 15573
|
|
},
|
|
{
|
|
"epoch": 1.328358208955224,
|
|
"grad_norm": 0.3306819378912933,
|
|
"learning_rate": 3.679872651806869e-05,
|
|
"loss": 1.0451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3085620403289795,
|
|
"step": 312,
|
|
"valid_targets_mean": 16043.3,
|
|
"valid_targets_min": 13998
|
|
},
|
|
{
|
|
"epoch": 1.3326226012793176,
|
|
"grad_norm": 0.3023638424474574,
|
|
"learning_rate": 3.676639323560986e-05,
|
|
"loss": 1.0489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21333524584770203,
|
|
"step": 313,
|
|
"valid_targets_mean": 11691.9,
|
|
"valid_targets_min": 8218
|
|
},
|
|
{
|
|
"epoch": 1.3368869936034116,
|
|
"grad_norm": 0.33604814898504637,
|
|
"learning_rate": 3.6733911841542365e-05,
|
|
"loss": 1.0771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.286090612411499,
|
|
"step": 314,
|
|
"valid_targets_mean": 16156.4,
|
|
"valid_targets_min": 15261
|
|
},
|
|
{
|
|
"epoch": 1.3411513859275053,
|
|
"grad_norm": 0.2916648358359987,
|
|
"learning_rate": 3.6701282622801626e-05,
|
|
"loss": 1.1131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.349140465259552,
|
|
"step": 315,
|
|
"valid_targets_mean": 16031.2,
|
|
"valid_targets_min": 14724
|
|
},
|
|
{
|
|
"epoch": 1.3454157782515992,
|
|
"grad_norm": 0.3577992054526001,
|
|
"learning_rate": 3.666850586762886e-05,
|
|
"loss": 1.0881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26788413524627686,
|
|
"step": 316,
|
|
"valid_targets_mean": 15082.3,
|
|
"valid_targets_min": 12379
|
|
},
|
|
{
|
|
"epoch": 1.349680170575693,
|
|
"grad_norm": 0.2848861404227202,
|
|
"learning_rate": 3.663558186556863e-05,
|
|
"loss": 1.0943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31389766931533813,
|
|
"step": 317,
|
|
"valid_targets_mean": 16143.6,
|
|
"valid_targets_min": 15422
|
|
},
|
|
{
|
|
"epoch": 1.3539445628997868,
|
|
"grad_norm": 0.3233779269723926,
|
|
"learning_rate": 3.660251090746627e-05,
|
|
"loss": 1.1292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2748628556728363,
|
|
"step": 318,
|
|
"valid_targets_mean": 13230.5,
|
|
"valid_targets_min": 2050
|
|
},
|
|
{
|
|
"epoch": 1.3582089552238805,
|
|
"grad_norm": 0.2858887914036524,
|
|
"learning_rate": 3.656929328546526e-05,
|
|
"loss": 1.1039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2868054509162903,
|
|
"step": 319,
|
|
"valid_targets_mean": 15947.4,
|
|
"valid_targets_min": 14844
|
|
},
|
|
{
|
|
"epoch": 1.3624733475479744,
|
|
"grad_norm": 0.32284902384340897,
|
|
"learning_rate": 3.653592929300471e-05,
|
|
"loss": 1.1129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3040750026702881,
|
|
"step": 320,
|
|
"valid_targets_mean": 16161.1,
|
|
"valid_targets_min": 15333
|
|
},
|
|
{
|
|
"epoch": 1.3667377398720681,
|
|
"grad_norm": 0.3297715967566815,
|
|
"learning_rate": 3.650241922481675e-05,
|
|
"loss": 1.0733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1861163079738617,
|
|
"step": 321,
|
|
"valid_targets_mean": 9992.6,
|
|
"valid_targets_min": 2412
|
|
},
|
|
{
|
|
"epoch": 1.371002132196162,
|
|
"grad_norm": 0.2870594544962561,
|
|
"learning_rate": 3.6468763376923886e-05,
|
|
"loss": 1.0679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.283233642578125,
|
|
"step": 322,
|
|
"valid_targets_mean": 16080.6,
|
|
"valid_targets_min": 15212
|
|
},
|
|
{
|
|
"epoch": 1.375266524520256,
|
|
"grad_norm": 0.34262055196282626,
|
|
"learning_rate": 3.6434962046636464e-05,
|
|
"loss": 1.0953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.324097216129303,
|
|
"step": 323,
|
|
"valid_targets_mean": 16106.3,
|
|
"valid_targets_min": 15153
|
|
},
|
|
{
|
|
"epoch": 1.3795309168443497,
|
|
"grad_norm": 0.3428410455003426,
|
|
"learning_rate": 3.6401015532549957e-05,
|
|
"loss": 1.1212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2151477187871933,
|
|
"step": 324,
|
|
"valid_targets_mean": 10360.2,
|
|
"valid_targets_min": 7125
|
|
},
|
|
{
|
|
"epoch": 1.3837953091684434,
|
|
"grad_norm": 0.3146362518291799,
|
|
"learning_rate": 3.6366924134542386e-05,
|
|
"loss": 1.0624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2897263765335083,
|
|
"step": 325,
|
|
"valid_targets_mean": 16060.8,
|
|
"valid_targets_min": 13555
|
|
},
|
|
{
|
|
"epoch": 1.3880597014925373,
|
|
"grad_norm": 0.3481176224891406,
|
|
"learning_rate": 3.633268815377166e-05,
|
|
"loss": 1.0962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.328260600566864,
|
|
"step": 326,
|
|
"valid_targets_mean": 16054.6,
|
|
"valid_targets_min": 14273
|
|
},
|
|
{
|
|
"epoch": 1.3923240938166312,
|
|
"grad_norm": 0.3077273003482749,
|
|
"learning_rate": 3.6298307892672895e-05,
|
|
"loss": 1.0716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23626792430877686,
|
|
"step": 327,
|
|
"valid_targets_mean": 14108.6,
|
|
"valid_targets_min": 11744
|
|
},
|
|
{
|
|
"epoch": 1.396588486140725,
|
|
"grad_norm": 0.3516293681006609,
|
|
"learning_rate": 3.626378365495577e-05,
|
|
"loss": 1.1138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2922561764717102,
|
|
"step": 328,
|
|
"valid_targets_mean": 16144.3,
|
|
"valid_targets_min": 15182
|
|
},
|
|
{
|
|
"epoch": 1.4008528784648187,
|
|
"grad_norm": 0.3756838223603127,
|
|
"learning_rate": 3.622911574560181e-05,
|
|
"loss": 1.0663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32784950733184814,
|
|
"step": 329,
|
|
"valid_targets_mean": 16119.1,
|
|
"valid_targets_min": 14732
|
|
},
|
|
{
|
|
"epoch": 1.4051172707889126,
|
|
"grad_norm": 0.3537330413744902,
|
|
"learning_rate": 3.6194304470861744e-05,
|
|
"loss": 1.108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27316704392433167,
|
|
"step": 330,
|
|
"valid_targets_mean": 15151.9,
|
|
"valid_targets_min": 13814
|
|
},
|
|
{
|
|
"epoch": 1.4093816631130065,
|
|
"grad_norm": 0.36125365393054715,
|
|
"learning_rate": 3.615935013825272e-05,
|
|
"loss": 1.1071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30955564975738525,
|
|
"step": 331,
|
|
"valid_targets_mean": 16155.7,
|
|
"valid_targets_min": 15560
|
|
},
|
|
{
|
|
"epoch": 1.4136460554371002,
|
|
"grad_norm": 0.3058246230311012,
|
|
"learning_rate": 3.612425305655569e-05,
|
|
"loss": 1.0806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22015517950057983,
|
|
"step": 332,
|
|
"valid_targets_mean": 10614.2,
|
|
"valid_targets_min": 1489
|
|
},
|
|
{
|
|
"epoch": 1.417910447761194,
|
|
"grad_norm": 0.3859159015033643,
|
|
"learning_rate": 3.6089013535812593e-05,
|
|
"loss": 1.0542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26703906059265137,
|
|
"step": 333,
|
|
"valid_targets_mean": 16140.9,
|
|
"valid_targets_min": 15217
|
|
},
|
|
{
|
|
"epoch": 1.4221748400852878,
|
|
"grad_norm": 0.38720420606419576,
|
|
"learning_rate": 3.6053631887323656e-05,
|
|
"loss": 1.0889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32786643505096436,
|
|
"step": 334,
|
|
"valid_targets_mean": 16096.1,
|
|
"valid_targets_min": 14473
|
|
},
|
|
{
|
|
"epoch": 1.4264392324093818,
|
|
"grad_norm": 0.31989957859029033,
|
|
"learning_rate": 3.601810842364465e-05,
|
|
"loss": 1.121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17885881662368774,
|
|
"step": 335,
|
|
"valid_targets_mean": 8554.1,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 1.4307036247334755,
|
|
"grad_norm": 0.30700677852157204,
|
|
"learning_rate": 3.598244345858412e-05,
|
|
"loss": 1.1056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2596096396446228,
|
|
"step": 336,
|
|
"valid_targets_mean": 16153.3,
|
|
"valid_targets_min": 15042
|
|
},
|
|
{
|
|
"epoch": 1.4349680170575694,
|
|
"grad_norm": 0.3463548518128724,
|
|
"learning_rate": 3.594663730720059e-05,
|
|
"loss": 1.0832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3149087429046631,
|
|
"step": 337,
|
|
"valid_targets_mean": 16167.1,
|
|
"valid_targets_min": 15032
|
|
},
|
|
{
|
|
"epoch": 1.439232409381663,
|
|
"grad_norm": 0.37307238466289866,
|
|
"learning_rate": 3.591069028579982e-05,
|
|
"loss": 1.0988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22190377116203308,
|
|
"step": 338,
|
|
"valid_targets_mean": 12233.7,
|
|
"valid_targets_min": 8961
|
|
},
|
|
{
|
|
"epoch": 1.443496801705757,
|
|
"grad_norm": 0.31508567330283965,
|
|
"learning_rate": 3.5874602711931994e-05,
|
|
"loss": 1.1008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2946207821369171,
|
|
"step": 339,
|
|
"valid_targets_mean": 16054.2,
|
|
"valid_targets_min": 15148
|
|
},
|
|
{
|
|
"epoch": 1.4477611940298507,
|
|
"grad_norm": 0.32971290964062444,
|
|
"learning_rate": 3.5838374904388904e-05,
|
|
"loss": 1.0702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32255274057388306,
|
|
"step": 340,
|
|
"valid_targets_mean": 16200.9,
|
|
"valid_targets_min": 15576
|
|
},
|
|
{
|
|
"epoch": 1.4520255863539446,
|
|
"grad_norm": 0.36090036147075477,
|
|
"learning_rate": 3.580200718320115e-05,
|
|
"loss": 1.0951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25074636936187744,
|
|
"step": 341,
|
|
"valid_targets_mean": 15703.2,
|
|
"valid_targets_min": 12835
|
|
},
|
|
{
|
|
"epoch": 1.4562899786780383,
|
|
"grad_norm": 0.4011601321937679,
|
|
"learning_rate": 3.576549986963531e-05,
|
|
"loss": 1.0778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31952688097953796,
|
|
"step": 342,
|
|
"valid_targets_mean": 16056.5,
|
|
"valid_targets_min": 14789
|
|
},
|
|
{
|
|
"epoch": 1.4605543710021323,
|
|
"grad_norm": 0.3676547349304962,
|
|
"learning_rate": 3.5728853286191075e-05,
|
|
"loss": 1.0645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2541731595993042,
|
|
"step": 343,
|
|
"valid_targets_mean": 12879.8,
|
|
"valid_targets_min": 1242
|
|
},
|
|
{
|
|
"epoch": 1.464818763326226,
|
|
"grad_norm": 0.3238724181712805,
|
|
"learning_rate": 3.5692067756598465e-05,
|
|
"loss": 1.0586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2604942321777344,
|
|
"step": 344,
|
|
"valid_targets_mean": 16118.8,
|
|
"valid_targets_min": 15426
|
|
},
|
|
{
|
|
"epoch": 1.4690831556503199,
|
|
"grad_norm": 0.44152925097732304,
|
|
"learning_rate": 3.5655143605814885e-05,
|
|
"loss": 1.0943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2965952754020691,
|
|
"step": 345,
|
|
"valid_targets_mean": 15897.9,
|
|
"valid_targets_min": 9286
|
|
},
|
|
{
|
|
"epoch": 1.4733475479744136,
|
|
"grad_norm": 0.34349751360888897,
|
|
"learning_rate": 3.561808116002232e-05,
|
|
"loss": 1.0814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1971406638622284,
|
|
"step": 346,
|
|
"valid_targets_mean": 10358.7,
|
|
"valid_targets_min": 2096
|
|
},
|
|
{
|
|
"epoch": 1.4776119402985075,
|
|
"grad_norm": 0.3225944292915461,
|
|
"learning_rate": 3.5580880746624444e-05,
|
|
"loss": 1.0539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24682000279426575,
|
|
"step": 347,
|
|
"valid_targets_mean": 16129.1,
|
|
"valid_targets_min": 15180
|
|
},
|
|
{
|
|
"epoch": 1.4818763326226012,
|
|
"grad_norm": 0.3124256407452722,
|
|
"learning_rate": 3.5543542694243685e-05,
|
|
"loss": 1.1107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3103386163711548,
|
|
"step": 348,
|
|
"valid_targets_mean": 16167.1,
|
|
"valid_targets_min": 15444
|
|
},
|
|
{
|
|
"epoch": 1.4861407249466951,
|
|
"grad_norm": 0.32974629082892415,
|
|
"learning_rate": 3.5506067332718355e-05,
|
|
"loss": 1.0881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21561576426029205,
|
|
"step": 349,
|
|
"valid_targets_mean": 10549.6,
|
|
"valid_targets_min": 6923
|
|
},
|
|
{
|
|
"epoch": 1.4904051172707888,
|
|
"grad_norm": 0.34775314789011286,
|
|
"learning_rate": 3.546845499309976e-05,
|
|
"loss": 1.0785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2754581570625305,
|
|
"step": 350,
|
|
"valid_targets_mean": 16196.4,
|
|
"valid_targets_min": 15446
|
|
},
|
|
{
|
|
"epoch": 1.4946695095948828,
|
|
"grad_norm": 0.3028713159156353,
|
|
"learning_rate": 3.5430706007649225e-05,
|
|
"loss": 1.0753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34413397312164307,
|
|
"step": 351,
|
|
"valid_targets_mean": 16196.4,
|
|
"valid_targets_min": 15577
|
|
},
|
|
{
|
|
"epoch": 1.4989339019189765,
|
|
"grad_norm": 0.3384662366501191,
|
|
"learning_rate": 3.539282070983518e-05,
|
|
"loss": 1.095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24725352227687836,
|
|
"step": 352,
|
|
"valid_targets_mean": 13119.6,
|
|
"valid_targets_min": 10183
|
|
},
|
|
{
|
|
"epoch": 1.5031982942430704,
|
|
"grad_norm": 0.34377284984114953,
|
|
"learning_rate": 3.535479943433023e-05,
|
|
"loss": 1.164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34733474254608154,
|
|
"step": 353,
|
|
"valid_targets_mean": 15859.2,
|
|
"valid_targets_min": 14337
|
|
},
|
|
{
|
|
"epoch": 1.5074626865671643,
|
|
"grad_norm": 0.36498915702842044,
|
|
"learning_rate": 3.5316642517008184e-05,
|
|
"loss": 1.0759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30858635902404785,
|
|
"step": 354,
|
|
"valid_targets_mean": 16225.0,
|
|
"valid_targets_min": 15717
|
|
},
|
|
{
|
|
"epoch": 1.511727078891258,
|
|
"grad_norm": 0.3806984231076566,
|
|
"learning_rate": 3.5278350294941074e-05,
|
|
"loss": 1.0931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.281022310256958,
|
|
"step": 355,
|
|
"valid_targets_mean": 15363.8,
|
|
"valid_targets_min": 13266
|
|
},
|
|
{
|
|
"epoch": 1.5159914712153517,
|
|
"grad_norm": 0.3607162844870746,
|
|
"learning_rate": 3.523992310639622e-05,
|
|
"loss": 1.0563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2892281115055084,
|
|
"step": 356,
|
|
"valid_targets_mean": 16191.6,
|
|
"valid_targets_min": 15466
|
|
},
|
|
{
|
|
"epoch": 1.5202558635394456,
|
|
"grad_norm": 0.3469251797058296,
|
|
"learning_rate": 3.5201361290833165e-05,
|
|
"loss": 1.082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22507451474666595,
|
|
"step": 357,
|
|
"valid_targets_mean": 11349.5,
|
|
"valid_targets_min": 1359
|
|
},
|
|
{
|
|
"epoch": 1.5245202558635396,
|
|
"grad_norm": 0.316941337853733,
|
|
"learning_rate": 3.516266518890079e-05,
|
|
"loss": 1.066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2844748795032501,
|
|
"step": 358,
|
|
"valid_targets_mean": 16115.3,
|
|
"valid_targets_min": 14702
|
|
},
|
|
{
|
|
"epoch": 1.5287846481876333,
|
|
"grad_norm": 0.3271118592624415,
|
|
"learning_rate": 3.512383514243419e-05,
|
|
"loss": 1.0595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2860044538974762,
|
|
"step": 359,
|
|
"valid_targets_mean": 16163.1,
|
|
"valid_targets_min": 14547
|
|
},
|
|
{
|
|
"epoch": 1.533049040511727,
|
|
"grad_norm": 0.4076918821251131,
|
|
"learning_rate": 3.5084871494451716e-05,
|
|
"loss": 1.0228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18574681878089905,
|
|
"step": 360,
|
|
"valid_targets_mean": 9576.8,
|
|
"valid_targets_min": 1522
|
|
},
|
|
{
|
|
"epoch": 1.537313432835821,
|
|
"grad_norm": 0.26250230087775484,
|
|
"learning_rate": 3.5045774589151955e-05,
|
|
"loss": 1.0805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26781558990478516,
|
|
"step": 361,
|
|
"valid_targets_mean": 15993.7,
|
|
"valid_targets_min": 10506
|
|
},
|
|
{
|
|
"epoch": 1.5415778251599148,
|
|
"grad_norm": 0.40215152628101086,
|
|
"learning_rate": 3.500654477191064e-05,
|
|
"loss": 1.1121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34173938632011414,
|
|
"step": 362,
|
|
"valid_targets_mean": 15984.2,
|
|
"valid_targets_min": 14307
|
|
},
|
|
{
|
|
"epoch": 1.5458422174840085,
|
|
"grad_norm": 0.41294328730666957,
|
|
"learning_rate": 3.496718238927764e-05,
|
|
"loss": 1.0905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19544759392738342,
|
|
"step": 363,
|
|
"valid_targets_mean": 10465.0,
|
|
"valid_targets_min": 6740
|
|
},
|
|
{
|
|
"epoch": 1.5501066098081022,
|
|
"grad_norm": 0.35410135683356636,
|
|
"learning_rate": 3.492768778897388e-05,
|
|
"loss": 1.0737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2821054756641388,
|
|
"step": 364,
|
|
"valid_targets_mean": 16167.3,
|
|
"valid_targets_min": 14513
|
|
},
|
|
{
|
|
"epoch": 1.5543710021321961,
|
|
"grad_norm": 0.3903312793171435,
|
|
"learning_rate": 3.4888061319888276e-05,
|
|
"loss": 1.1041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33056482672691345,
|
|
"step": 365,
|
|
"valid_targets_mean": 16124.3,
|
|
"valid_targets_min": 15263
|
|
},
|
|
{
|
|
"epoch": 1.55863539445629,
|
|
"grad_norm": 0.4017883197111794,
|
|
"learning_rate": 3.484830333207466e-05,
|
|
"loss": 1.0998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24902799725532532,
|
|
"step": 366,
|
|
"valid_targets_mean": 13535.5,
|
|
"valid_targets_min": 11622
|
|
},
|
|
{
|
|
"epoch": 1.5628997867803838,
|
|
"grad_norm": 0.40334704982682956,
|
|
"learning_rate": 3.4808414176748666e-05,
|
|
"loss": 1.0578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2977840304374695,
|
|
"step": 367,
|
|
"valid_targets_mean": 16211.1,
|
|
"valid_targets_min": 15301
|
|
},
|
|
{
|
|
"epoch": 1.5671641791044775,
|
|
"grad_norm": 0.3614561883984975,
|
|
"learning_rate": 3.476839420628466e-05,
|
|
"loss": 1.0816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27573463320732117,
|
|
"step": 368,
|
|
"valid_targets_mean": 13357.7,
|
|
"valid_targets_min": 1733
|
|
},
|
|
{
|
|
"epoch": 1.5714285714285714,
|
|
"grad_norm": 0.4456082344108554,
|
|
"learning_rate": 3.472824377421257e-05,
|
|
"loss": 1.0563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25675058364868164,
|
|
"step": 369,
|
|
"valid_targets_mean": 15113.8,
|
|
"valid_targets_min": 13189
|
|
},
|
|
{
|
|
"epoch": 1.5756929637526653,
|
|
"grad_norm": 0.3455042599947996,
|
|
"learning_rate": 3.4687963235214845e-05,
|
|
"loss": 1.0908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33542802929878235,
|
|
"step": 370,
|
|
"valid_targets_mean": 16135.2,
|
|
"valid_targets_min": 15576
|
|
},
|
|
{
|
|
"epoch": 1.579957356076759,
|
|
"grad_norm": 0.39394782964322445,
|
|
"learning_rate": 3.464755294512325e-05,
|
|
"loss": 1.0876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18428701162338257,
|
|
"step": 371,
|
|
"valid_targets_mean": 8529.3,
|
|
"valid_targets_min": 1895
|
|
},
|
|
{
|
|
"epoch": 1.5842217484008527,
|
|
"grad_norm": 0.33318601467958586,
|
|
"learning_rate": 3.4607013260915765e-05,
|
|
"loss": 1.0878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27869242429733276,
|
|
"step": 372,
|
|
"valid_targets_mean": 16116.1,
|
|
"valid_targets_min": 14467
|
|
},
|
|
{
|
|
"epoch": 1.5884861407249466,
|
|
"grad_norm": 0.3208068381003978,
|
|
"learning_rate": 3.4566344540713404e-05,
|
|
"loss": 1.0285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2834702730178833,
|
|
"step": 373,
|
|
"valid_targets_mean": 16224.1,
|
|
"valid_targets_min": 15168
|
|
},
|
|
{
|
|
"epoch": 1.5927505330490406,
|
|
"grad_norm": 0.3953424989715451,
|
|
"learning_rate": 3.452554714377706e-05,
|
|
"loss": 1.0686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2175963819026947,
|
|
"step": 374,
|
|
"valid_targets_mean": 11231.9,
|
|
"valid_targets_min": 7068
|
|
},
|
|
{
|
|
"epoch": 1.5970149253731343,
|
|
"grad_norm": 0.33097995220515114,
|
|
"learning_rate": 3.448462143050436e-05,
|
|
"loss": 1.056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28065377473831177,
|
|
"step": 375,
|
|
"valid_targets_mean": 15767.3,
|
|
"valid_targets_min": 9286
|
|
},
|
|
{
|
|
"epoch": 1.6012793176972282,
|
|
"grad_norm": 0.3921987235874417,
|
|
"learning_rate": 3.4443567762426444e-05,
|
|
"loss": 1.102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3164235055446625,
|
|
"step": 376,
|
|
"valid_targets_mean": 16184.6,
|
|
"valid_targets_min": 15606
|
|
},
|
|
{
|
|
"epoch": 1.6055437100213221,
|
|
"grad_norm": 0.37657492260788417,
|
|
"learning_rate": 3.440238650220477e-05,
|
|
"loss": 1.0384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22348591685295105,
|
|
"step": 377,
|
|
"valid_targets_mean": 13476.9,
|
|
"valid_targets_min": 11755
|
|
},
|
|
{
|
|
"epoch": 1.6098081023454158,
|
|
"grad_norm": 0.38361242226579406,
|
|
"learning_rate": 3.4361078013627945e-05,
|
|
"loss": 1.0483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2881014049053192,
|
|
"step": 378,
|
|
"valid_targets_mean": 16131.5,
|
|
"valid_targets_min": 15301
|
|
},
|
|
{
|
|
"epoch": 1.6140724946695095,
|
|
"grad_norm": 0.4542116388705416,
|
|
"learning_rate": 3.4319642661608474e-05,
|
|
"loss": 1.1182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3004990220069885,
|
|
"step": 379,
|
|
"valid_targets_mean": 16156.4,
|
|
"valid_targets_min": 15345
|
|
},
|
|
{
|
|
"epoch": 1.6183368869936035,
|
|
"grad_norm": 0.39387212174169905,
|
|
"learning_rate": 3.427808081217957e-05,
|
|
"loss": 1.1023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27379781007766724,
|
|
"step": 380,
|
|
"valid_targets_mean": 15046.2,
|
|
"valid_targets_min": 10805
|
|
},
|
|
{
|
|
"epoch": 1.6226012793176974,
|
|
"grad_norm": 0.39084162634088154,
|
|
"learning_rate": 3.423639283249189e-05,
|
|
"loss": 1.0788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2966284453868866,
|
|
"step": 381,
|
|
"valid_targets_mean": 16231.2,
|
|
"valid_targets_min": 15531
|
|
},
|
|
{
|
|
"epoch": 1.626865671641791,
|
|
"grad_norm": 0.4072191864933012,
|
|
"learning_rate": 3.419457909081032e-05,
|
|
"loss": 1.077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23579218983650208,
|
|
"step": 382,
|
|
"valid_targets_mean": 10985.6,
|
|
"valid_targets_min": 1923
|
|
},
|
|
{
|
|
"epoch": 1.6311300639658848,
|
|
"grad_norm": 0.34514348906831965,
|
|
"learning_rate": 3.415263995651069e-05,
|
|
"loss": 1.1111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28504568338394165,
|
|
"step": 383,
|
|
"valid_targets_mean": 15444.5,
|
|
"valid_targets_min": 13058
|
|
},
|
|
{
|
|
"epoch": 1.6353944562899787,
|
|
"grad_norm": 0.3169226325417078,
|
|
"learning_rate": 3.411057580007653e-05,
|
|
"loss": 1.0589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31453680992126465,
|
|
"step": 384,
|
|
"valid_targets_mean": 16178.7,
|
|
"valid_targets_min": 15572
|
|
},
|
|
{
|
|
"epoch": 1.6396588486140726,
|
|
"grad_norm": 0.41766890978652443,
|
|
"learning_rate": 3.4068386993095806e-05,
|
|
"loss": 1.0459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16267335414886475,
|
|
"step": 385,
|
|
"valid_targets_mean": 8596.7,
|
|
"valid_targets_min": 2213
|
|
},
|
|
{
|
|
"epoch": 1.6439232409381663,
|
|
"grad_norm": 0.31567309796851967,
|
|
"learning_rate": 3.402607390825762e-05,
|
|
"loss": 1.056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2421300858259201,
|
|
"step": 386,
|
|
"valid_targets_mean": 16232.5,
|
|
"valid_targets_min": 15539
|
|
},
|
|
{
|
|
"epoch": 1.64818763326226,
|
|
"grad_norm": 0.3440061443480324,
|
|
"learning_rate": 3.398363691934894e-05,
|
|
"loss": 1.09,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3332326114177704,
|
|
"step": 387,
|
|
"valid_targets_mean": 16060.8,
|
|
"valid_targets_min": 15033
|
|
},
|
|
{
|
|
"epoch": 1.652452025586354,
|
|
"grad_norm": 0.33587676960996954,
|
|
"learning_rate": 3.3941076401251244e-05,
|
|
"loss": 1.089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1977711319923401,
|
|
"step": 388,
|
|
"valid_targets_mean": 11657.1,
|
|
"valid_targets_min": 9535
|
|
},
|
|
{
|
|
"epoch": 1.6567164179104479,
|
|
"grad_norm": 0.3741622521424512,
|
|
"learning_rate": 3.3898392729937295e-05,
|
|
"loss": 1.0538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29115524888038635,
|
|
"step": 389,
|
|
"valid_targets_mean": 16162.9,
|
|
"valid_targets_min": 15023
|
|
},
|
|
{
|
|
"epoch": 1.6609808102345416,
|
|
"grad_norm": 0.32571931932140896,
|
|
"learning_rate": 3.385558628246774e-05,
|
|
"loss": 1.1034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3178585171699524,
|
|
"step": 390,
|
|
"valid_targets_mean": 16113.8,
|
|
"valid_targets_min": 15576
|
|
},
|
|
{
|
|
"epoch": 1.6652452025586353,
|
|
"grad_norm": 0.31703646298997484,
|
|
"learning_rate": 3.381265743698781e-05,
|
|
"loss": 1.0783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23743729293346405,
|
|
"step": 391,
|
|
"valid_targets_mean": 14455.2,
|
|
"valid_targets_min": 12866
|
|
},
|
|
{
|
|
"epoch": 1.6695095948827292,
|
|
"grad_norm": 0.3292168753278362,
|
|
"learning_rate": 3.3769606572724e-05,
|
|
"loss": 1.0542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2905117869377136,
|
|
"step": 392,
|
|
"valid_targets_mean": 16126.9,
|
|
"valid_targets_min": 14699
|
|
},
|
|
{
|
|
"epoch": 1.6737739872068231,
|
|
"grad_norm": 0.3347504777862101,
|
|
"learning_rate": 3.3726434069980686e-05,
|
|
"loss": 1.0383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23373055458068848,
|
|
"step": 393,
|
|
"valid_targets_mean": 12935.7,
|
|
"valid_targets_min": 1379
|
|
},
|
|
{
|
|
"epoch": 1.6780383795309168,
|
|
"grad_norm": 0.34230310668388375,
|
|
"learning_rate": 3.368314031013678e-05,
|
|
"loss": 1.0794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26925498247146606,
|
|
"step": 394,
|
|
"valid_targets_mean": 16041.7,
|
|
"valid_targets_min": 14053
|
|
},
|
|
{
|
|
"epoch": 1.6823027718550105,
|
|
"grad_norm": 0.3235869439000228,
|
|
"learning_rate": 3.363972567564236e-05,
|
|
"loss": 1.0771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29748794436454773,
|
|
"step": 395,
|
|
"valid_targets_mean": 16214.8,
|
|
"valid_targets_min": 15371
|
|
},
|
|
{
|
|
"epoch": 1.6865671641791045,
|
|
"grad_norm": 0.3334870078597985,
|
|
"learning_rate": 3.35961905500153e-05,
|
|
"loss": 1.0989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18330952525138855,
|
|
"step": 396,
|
|
"valid_targets_mean": 8966.2,
|
|
"valid_targets_min": 2017
|
|
},
|
|
{
|
|
"epoch": 1.6908315565031984,
|
|
"grad_norm": 0.30602511626511597,
|
|
"learning_rate": 3.3552535317837855e-05,
|
|
"loss": 1.1295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28136658668518066,
|
|
"step": 397,
|
|
"valid_targets_mean": 15754.8,
|
|
"valid_targets_min": 13899
|
|
},
|
|
{
|
|
"epoch": 1.695095948827292,
|
|
"grad_norm": 0.3380157464233408,
|
|
"learning_rate": 3.35087603647533e-05,
|
|
"loss": 1.0812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3226493000984192,
|
|
"step": 398,
|
|
"valid_targets_mean": 16187.2,
|
|
"valid_targets_min": 15624
|
|
},
|
|
{
|
|
"epoch": 1.6993603411513858,
|
|
"grad_norm": 0.32260213235248153,
|
|
"learning_rate": 3.346486607746249e-05,
|
|
"loss": 1.0957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1675054132938385,
|
|
"step": 399,
|
|
"valid_targets_mean": 9466.9,
|
|
"valid_targets_min": 4761
|
|
},
|
|
{
|
|
"epoch": 1.7036247334754797,
|
|
"grad_norm": 0.35610426430137454,
|
|
"learning_rate": 3.342085284372047e-05,
|
|
"loss": 1.0975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3175768256187439,
|
|
"step": 400,
|
|
"valid_targets_mean": 16054.5,
|
|
"valid_targets_min": 15525
|
|
},
|
|
{
|
|
"epoch": 1.7078891257995736,
|
|
"grad_norm": 0.3734277379203581,
|
|
"learning_rate": 3.337672105233303e-05,
|
|
"loss": 1.0742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29572874307632446,
|
|
"step": 401,
|
|
"valid_targets_mean": 16123.2,
|
|
"valid_targets_min": 14638
|
|
},
|
|
{
|
|
"epoch": 1.7121535181236673,
|
|
"grad_norm": 0.350322019083219,
|
|
"learning_rate": 3.3332471093153296e-05,
|
|
"loss": 1.1068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22418397665023804,
|
|
"step": 402,
|
|
"valid_targets_mean": 13257.8,
|
|
"valid_targets_min": 10153
|
|
},
|
|
{
|
|
"epoch": 1.716417910447761,
|
|
"grad_norm": 0.2887369091087072,
|
|
"learning_rate": 3.3288103357078244e-05,
|
|
"loss": 1.1046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3073233962059021,
|
|
"step": 403,
|
|
"valid_targets_mean": 16162.8,
|
|
"valid_targets_min": 15337
|
|
},
|
|
{
|
|
"epoch": 1.720682302771855,
|
|
"grad_norm": 0.3496910847706424,
|
|
"learning_rate": 3.324361823604529e-05,
|
|
"loss": 1.088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3313904404640198,
|
|
"step": 404,
|
|
"valid_targets_mean": 16163.1,
|
|
"valid_targets_min": 15308
|
|
},
|
|
{
|
|
"epoch": 1.724946695095949,
|
|
"grad_norm": 0.2978023325479404,
|
|
"learning_rate": 3.319901612302881e-05,
|
|
"loss": 1.084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2603868544101715,
|
|
"step": 405,
|
|
"valid_targets_mean": 15849.7,
|
|
"valid_targets_min": 14606
|
|
},
|
|
{
|
|
"epoch": 1.7292110874200426,
|
|
"grad_norm": 0.3400614764223683,
|
|
"learning_rate": 3.315429741203666e-05,
|
|
"loss": 1.0602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2796156704425812,
|
|
"step": 406,
|
|
"valid_targets_mean": 16208.5,
|
|
"valid_targets_min": 15536
|
|
},
|
|
{
|
|
"epoch": 1.7334754797441365,
|
|
"grad_norm": 0.318760235671882,
|
|
"learning_rate": 3.3109462498106705e-05,
|
|
"loss": 1.0399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20615634322166443,
|
|
"step": 407,
|
|
"valid_targets_mean": 11185.1,
|
|
"valid_targets_min": 1926
|
|
},
|
|
{
|
|
"epoch": 1.7377398720682304,
|
|
"grad_norm": 0.312215960115044,
|
|
"learning_rate": 3.306451177730333e-05,
|
|
"loss": 1.0415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.264221727848053,
|
|
"step": 408,
|
|
"valid_targets_mean": 15921.4,
|
|
"valid_targets_min": 15152
|
|
},
|
|
{
|
|
"epoch": 1.7420042643923241,
|
|
"grad_norm": 0.3309955562897752,
|
|
"learning_rate": 3.301944564671394e-05,
|
|
"loss": 1.0924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32010987401008606,
|
|
"step": 409,
|
|
"valid_targets_mean": 16101.6,
|
|
"valid_targets_min": 13635
|
|
},
|
|
{
|
|
"epoch": 1.7462686567164178,
|
|
"grad_norm": 0.30567938936491407,
|
|
"learning_rate": 3.297426450444546e-05,
|
|
"loss": 1.0772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18925735354423523,
|
|
"step": 410,
|
|
"valid_targets_mean": 10622.0,
|
|
"valid_targets_min": 2749
|
|
},
|
|
{
|
|
"epoch": 1.7505330490405118,
|
|
"grad_norm": 0.2968403930265541,
|
|
"learning_rate": 3.292896874962078e-05,
|
|
"loss": 1.0478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26409652829170227,
|
|
"step": 411,
|
|
"valid_targets_mean": 16163.8,
|
|
"valid_targets_min": 14690
|
|
},
|
|
{
|
|
"epoch": 1.7547974413646057,
|
|
"grad_norm": 0.2962356001809263,
|
|
"learning_rate": 3.2883558782375294e-05,
|
|
"loss": 1.0777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2959936261177063,
|
|
"step": 412,
|
|
"valid_targets_mean": 16117.5,
|
|
"valid_targets_min": 13635
|
|
},
|
|
{
|
|
"epoch": 1.7590618336886994,
|
|
"grad_norm": 0.26443877375742286,
|
|
"learning_rate": 3.283803500385332e-05,
|
|
"loss": 1.0932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22419683635234833,
|
|
"step": 413,
|
|
"valid_targets_mean": 12500.9,
|
|
"valid_targets_min": 9491
|
|
},
|
|
{
|
|
"epoch": 1.763326226012793,
|
|
"grad_norm": 0.29190721133115755,
|
|
"learning_rate": 3.2792397816204546e-05,
|
|
"loss": 1.0923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29217758774757385,
|
|
"step": 414,
|
|
"valid_targets_mean": 16139.3,
|
|
"valid_targets_min": 15077
|
|
},
|
|
{
|
|
"epoch": 1.767590618336887,
|
|
"grad_norm": 0.2966151215688941,
|
|
"learning_rate": 3.2746647622580524e-05,
|
|
"loss": 1.093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31082773208618164,
|
|
"step": 415,
|
|
"valid_targets_mean": 16148.9,
|
|
"valid_targets_min": 15485
|
|
},
|
|
{
|
|
"epoch": 1.771855010660981,
|
|
"grad_norm": 0.317949985070703,
|
|
"learning_rate": 3.270078482713106e-05,
|
|
"loss": 1.0336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23525992035865784,
|
|
"step": 416,
|
|
"valid_targets_mean": 14075.5,
|
|
"valid_targets_min": 11572
|
|
},
|
|
{
|
|
"epoch": 1.7761194029850746,
|
|
"grad_norm": 0.34552253743878153,
|
|
"learning_rate": 3.265480983500069e-05,
|
|
"loss": 1.0632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3058857321739197,
|
|
"step": 417,
|
|
"valid_targets_mean": 16051.5,
|
|
"valid_targets_min": 13927
|
|
},
|
|
{
|
|
"epoch": 1.7803837953091683,
|
|
"grad_norm": 0.32803071709502934,
|
|
"learning_rate": 3.260872305232507e-05,
|
|
"loss": 1.041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25929567217826843,
|
|
"step": 418,
|
|
"valid_targets_mean": 13168.9,
|
|
"valid_targets_min": 2225
|
|
},
|
|
{
|
|
"epoch": 1.7846481876332623,
|
|
"grad_norm": 0.3538046380610209,
|
|
"learning_rate": 3.256252488622738e-05,
|
|
"loss": 1.0467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24721963703632355,
|
|
"step": 419,
|
|
"valid_targets_mean": 16116.3,
|
|
"valid_targets_min": 13473
|
|
},
|
|
{
|
|
"epoch": 1.7889125799573562,
|
|
"grad_norm": 0.45070711761291726,
|
|
"learning_rate": 3.251621574481475e-05,
|
|
"loss": 1.0642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2939479351043701,
|
|
"step": 420,
|
|
"valid_targets_mean": 16112.0,
|
|
"valid_targets_min": 14287
|
|
},
|
|
{
|
|
"epoch": 1.79317697228145,
|
|
"grad_norm": 0.30337437904558584,
|
|
"learning_rate": 3.246979603717467e-05,
|
|
"loss": 1.1039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20673009753227234,
|
|
"step": 421,
|
|
"valid_targets_mean": 10187.9,
|
|
"valid_targets_min": 1240
|
|
},
|
|
{
|
|
"epoch": 1.7974413646055436,
|
|
"grad_norm": 0.42598340707539195,
|
|
"learning_rate": 3.242326617337133e-05,
|
|
"loss": 1.0479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24972651898860931,
|
|
"step": 422,
|
|
"valid_targets_mean": 16181.6,
|
|
"valid_targets_min": 14875
|
|
},
|
|
{
|
|
"epoch": 1.8017057569296375,
|
|
"grad_norm": 0.4218988697298897,
|
|
"learning_rate": 3.2376626564442016e-05,
|
|
"loss": 1.0657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.314885675907135,
|
|
"step": 423,
|
|
"valid_targets_mean": 16183.7,
|
|
"valid_targets_min": 15121
|
|
},
|
|
{
|
|
"epoch": 1.8059701492537314,
|
|
"grad_norm": 0.3491283230796325,
|
|
"learning_rate": 3.2329877622393515e-05,
|
|
"loss": 1.0507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21938323974609375,
|
|
"step": 424,
|
|
"valid_targets_mean": 11132.9,
|
|
"valid_targets_min": 7632
|
|
},
|
|
{
|
|
"epoch": 1.8102345415778252,
|
|
"grad_norm": 0.386094531849704,
|
|
"learning_rate": 3.228301976019841e-05,
|
|
"loss": 1.1135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2914331555366516,
|
|
"step": 425,
|
|
"valid_targets_mean": 16018.5,
|
|
"valid_targets_min": 14821
|
|
},
|
|
{
|
|
"epoch": 1.8144989339019189,
|
|
"grad_norm": 0.42035237534257025,
|
|
"learning_rate": 3.22360533917915e-05,
|
|
"loss": 1.0777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29757189750671387,
|
|
"step": 426,
|
|
"valid_targets_mean": 16201.6,
|
|
"valid_targets_min": 15221
|
|
},
|
|
{
|
|
"epoch": 1.8187633262260128,
|
|
"grad_norm": 0.3272401912447551,
|
|
"learning_rate": 3.218897893206608e-05,
|
|
"loss": 1.0407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22152376174926758,
|
|
"step": 427,
|
|
"valid_targets_mean": 13069.1,
|
|
"valid_targets_min": 8831
|
|
},
|
|
{
|
|
"epoch": 1.8230277185501067,
|
|
"grad_norm": 0.41873622157001394,
|
|
"learning_rate": 3.2141796796870335e-05,
|
|
"loss": 1.0824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29691261053085327,
|
|
"step": 428,
|
|
"valid_targets_mean": 16014.8,
|
|
"valid_targets_min": 13788
|
|
},
|
|
{
|
|
"epoch": 1.8272921108742004,
|
|
"grad_norm": 0.35459421001907965,
|
|
"learning_rate": 3.2094507403003614e-05,
|
|
"loss": 1.077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3163725733757019,
|
|
"step": 429,
|
|
"valid_targets_mean": 16206.0,
|
|
"valid_targets_min": 15778
|
|
},
|
|
{
|
|
"epoch": 1.831556503198294,
|
|
"grad_norm": 0.3392941956349515,
|
|
"learning_rate": 3.2047111168212785e-05,
|
|
"loss": 1.1017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2829766869544983,
|
|
"step": 430,
|
|
"valid_targets_mean": 15181.8,
|
|
"valid_targets_min": 13169
|
|
},
|
|
{
|
|
"epoch": 1.835820895522388,
|
|
"grad_norm": 0.42684034612504657,
|
|
"learning_rate": 3.1999608511188524e-05,
|
|
"loss": 1.0455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28068628907203674,
|
|
"step": 431,
|
|
"valid_targets_mean": 16113.2,
|
|
"valid_targets_min": 13828
|
|
},
|
|
{
|
|
"epoch": 1.840085287846482,
|
|
"grad_norm": 0.2746470236815193,
|
|
"learning_rate": 3.1951999851561625e-05,
|
|
"loss": 1.1286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21455985307693481,
|
|
"step": 432,
|
|
"valid_targets_mean": 11254.6,
|
|
"valid_targets_min": 1974
|
|
},
|
|
{
|
|
"epoch": 1.8443496801705757,
|
|
"grad_norm": 0.36610134802439465,
|
|
"learning_rate": 3.190428560989931e-05,
|
|
"loss": 1.0754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2696194052696228,
|
|
"step": 433,
|
|
"valid_targets_mean": 15552.6,
|
|
"valid_targets_min": 14147
|
|
},
|
|
{
|
|
"epoch": 1.8486140724946694,
|
|
"grad_norm": 0.400332194583115,
|
|
"learning_rate": 3.185646620770146e-05,
|
|
"loss": 1.0556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3428300619125366,
|
|
"step": 434,
|
|
"valid_targets_mean": 16115.7,
|
|
"valid_targets_min": 15331
|
|
},
|
|
{
|
|
"epoch": 1.8528784648187633,
|
|
"grad_norm": 0.3407840252878956,
|
|
"learning_rate": 3.180854206739696e-05,
|
|
"loss": 1.1194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15828776359558105,
|
|
"step": 435,
|
|
"valid_targets_mean": 7300.2,
|
|
"valid_targets_min": 1215
|
|
},
|
|
{
|
|
"epoch": 1.8571428571428572,
|
|
"grad_norm": 0.3993524123800141,
|
|
"learning_rate": 3.176051361233991e-05,
|
|
"loss": 1.0794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27049949765205383,
|
|
"step": 436,
|
|
"valid_targets_mean": 16174.1,
|
|
"valid_targets_min": 15414
|
|
},
|
|
{
|
|
"epoch": 1.861407249466951,
|
|
"grad_norm": 0.4822527374297782,
|
|
"learning_rate": 3.171238126680594e-05,
|
|
"loss": 1.0505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29055434465408325,
|
|
"step": 437,
|
|
"valid_targets_mean": 16202.2,
|
|
"valid_targets_min": 15560
|
|
},
|
|
{
|
|
"epoch": 1.8656716417910446,
|
|
"grad_norm": 0.31172090535429725,
|
|
"learning_rate": 3.166414545598839e-05,
|
|
"loss": 1.0991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23214572668075562,
|
|
"step": 438,
|
|
"valid_targets_mean": 12580.4,
|
|
"valid_targets_min": 10074
|
|
},
|
|
{
|
|
"epoch": 1.8699360341151388,
|
|
"grad_norm": 0.4353380907159879,
|
|
"learning_rate": 3.161580660599464e-05,
|
|
"loss": 1.0707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27525749802589417,
|
|
"step": 439,
|
|
"valid_targets_mean": 15703.7,
|
|
"valid_targets_min": 4778
|
|
},
|
|
{
|
|
"epoch": 1.8742004264392325,
|
|
"grad_norm": 0.392351166551999,
|
|
"learning_rate": 3.1567365143842264e-05,
|
|
"loss": 1.0603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3120921850204468,
|
|
"step": 440,
|
|
"valid_targets_mean": 16157.0,
|
|
"valid_targets_min": 15368
|
|
},
|
|
{
|
|
"epoch": 1.8784648187633262,
|
|
"grad_norm": 0.28404162303736435,
|
|
"learning_rate": 3.1518821497455326e-05,
|
|
"loss": 1.0662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22893066704273224,
|
|
"step": 441,
|
|
"valid_targets_mean": 13424.1,
|
|
"valid_targets_min": 11379
|
|
},
|
|
{
|
|
"epoch": 1.88272921108742,
|
|
"grad_norm": 0.3828851066479204,
|
|
"learning_rate": 3.147017609566054e-05,
|
|
"loss": 1.0955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3331298232078552,
|
|
"step": 442,
|
|
"valid_targets_mean": 16049.3,
|
|
"valid_targets_min": 14702
|
|
},
|
|
{
|
|
"epoch": 1.886993603411514,
|
|
"grad_norm": 0.2848328516596313,
|
|
"learning_rate": 3.142142936818353e-05,
|
|
"loss": 1.0738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2532411813735962,
|
|
"step": 443,
|
|
"valid_targets_mean": 13397.8,
|
|
"valid_targets_min": 2017
|
|
},
|
|
{
|
|
"epoch": 1.8912579957356077,
|
|
"grad_norm": 0.3358827557474576,
|
|
"learning_rate": 3.137258174564501e-05,
|
|
"loss": 1.131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27709946036338806,
|
|
"step": 444,
|
|
"valid_targets_mean": 16007.1,
|
|
"valid_targets_min": 15085
|
|
},
|
|
{
|
|
"epoch": 1.8955223880597014,
|
|
"grad_norm": 0.39408596835599197,
|
|
"learning_rate": 3.1323633659556986e-05,
|
|
"loss": 1.0239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28863510489463806,
|
|
"step": 445,
|
|
"valid_targets_mean": 16240.1,
|
|
"valid_targets_min": 15762
|
|
},
|
|
{
|
|
"epoch": 1.8997867803837953,
|
|
"grad_norm": 0.2776216793862093,
|
|
"learning_rate": 3.127458554231894e-05,
|
|
"loss": 1.1116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1944802701473236,
|
|
"step": 446,
|
|
"valid_targets_mean": 9927.9,
|
|
"valid_targets_min": 2329
|
|
},
|
|
{
|
|
"epoch": 1.9040511727078893,
|
|
"grad_norm": 0.3809551637456404,
|
|
"learning_rate": 3.122543782721402e-05,
|
|
"loss": 1.0928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27463626861572266,
|
|
"step": 447,
|
|
"valid_targets_mean": 16032.5,
|
|
"valid_targets_min": 15102
|
|
},
|
|
{
|
|
"epoch": 1.908315565031983,
|
|
"grad_norm": 0.3629617030320813,
|
|
"learning_rate": 3.1176190948405194e-05,
|
|
"loss": 1.0737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34954962134361267,
|
|
"step": 448,
|
|
"valid_targets_mean": 16119.2,
|
|
"valid_targets_min": 15712
|
|
},
|
|
{
|
|
"epoch": 1.9125799573560767,
|
|
"grad_norm": 0.32157438333518096,
|
|
"learning_rate": 3.112684534093142e-05,
|
|
"loss": 1.0878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19989347457885742,
|
|
"step": 449,
|
|
"valid_targets_mean": 10913.1,
|
|
"valid_targets_min": 6367
|
|
},
|
|
{
|
|
"epoch": 1.9168443496801706,
|
|
"grad_norm": 0.34579393461468116,
|
|
"learning_rate": 3.107740144070385e-05,
|
|
"loss": 1.0235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2891630530357361,
|
|
"step": 450,
|
|
"valid_targets_mean": 16040.9,
|
|
"valid_targets_min": 13080
|
|
},
|
|
{
|
|
"epoch": 1.9211087420042645,
|
|
"grad_norm": 0.39082666775836783,
|
|
"learning_rate": 3.102785968450188e-05,
|
|
"loss": 1.043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3001328110694885,
|
|
"step": 451,
|
|
"valid_targets_mean": 16137.6,
|
|
"valid_targets_min": 15228
|
|
},
|
|
{
|
|
"epoch": 1.9253731343283582,
|
|
"grad_norm": 0.2674061959920609,
|
|
"learning_rate": 3.09782205099694e-05,
|
|
"loss": 1.1203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2544635236263275,
|
|
"step": 452,
|
|
"valid_targets_mean": 13191.7,
|
|
"valid_targets_min": 10839
|
|
},
|
|
{
|
|
"epoch": 1.929637526652452,
|
|
"grad_norm": 0.3051703102612724,
|
|
"learning_rate": 3.092848435561084e-05,
|
|
"loss": 1.0383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28948408365249634,
|
|
"step": 453,
|
|
"valid_targets_mean": 16126.2,
|
|
"valid_targets_min": 14702
|
|
},
|
|
{
|
|
"epoch": 1.9339019189765458,
|
|
"grad_norm": 0.31071649144982727,
|
|
"learning_rate": 3.0878651660787376e-05,
|
|
"loss": 1.0736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2999756932258606,
|
|
"step": 454,
|
|
"valid_targets_mean": 16165.5,
|
|
"valid_targets_min": 14997
|
|
},
|
|
{
|
|
"epoch": 1.9381663113006398,
|
|
"grad_norm": 0.28775024375357744,
|
|
"learning_rate": 3.082872286571295e-05,
|
|
"loss": 1.0611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26242709159851074,
|
|
"step": 455,
|
|
"valid_targets_mean": 14387.2,
|
|
"valid_targets_min": 12697
|
|
},
|
|
{
|
|
"epoch": 1.9424307036247335,
|
|
"grad_norm": 0.28090612344311117,
|
|
"learning_rate": 3.077869841145049e-05,
|
|
"loss": 1.0564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30365267395973206,
|
|
"step": 456,
|
|
"valid_targets_mean": 16208.8,
|
|
"valid_targets_min": 15627
|
|
},
|
|
{
|
|
"epoch": 1.9466950959488272,
|
|
"grad_norm": 0.294242971564645,
|
|
"learning_rate": 3.0728578739907934e-05,
|
|
"loss": 1.1074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21823406219482422,
|
|
"step": 457,
|
|
"valid_targets_mean": 10413.9,
|
|
"valid_targets_min": 2491
|
|
},
|
|
{
|
|
"epoch": 1.950959488272921,
|
|
"grad_norm": 0.29858032626112163,
|
|
"learning_rate": 3.067836429383437e-05,
|
|
"loss": 1.0378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23922613263130188,
|
|
"step": 458,
|
|
"valid_targets_mean": 16266.1,
|
|
"valid_targets_min": 16019
|
|
},
|
|
{
|
|
"epoch": 1.955223880597015,
|
|
"grad_norm": 0.2824659397473703,
|
|
"learning_rate": 3.062805551681609e-05,
|
|
"loss": 1.0779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3138640224933624,
|
|
"step": 459,
|
|
"valid_targets_mean": 16080.7,
|
|
"valid_targets_min": 14943
|
|
},
|
|
{
|
|
"epoch": 1.9594882729211087,
|
|
"grad_norm": 0.33450839912164904,
|
|
"learning_rate": 3.057765285327271e-05,
|
|
"loss": 1.1298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1324230283498764,
|
|
"step": 460,
|
|
"valid_targets_mean": 6411.7,
|
|
"valid_targets_min": 2004
|
|
},
|
|
{
|
|
"epoch": 1.9637526652452024,
|
|
"grad_norm": 0.30443859172334176,
|
|
"learning_rate": 3.0527156748453214e-05,
|
|
"loss": 1.0247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2705356478691101,
|
|
"step": 461,
|
|
"valid_targets_mean": 16094.2,
|
|
"valid_targets_min": 13927
|
|
},
|
|
{
|
|
"epoch": 1.9680170575692963,
|
|
"grad_norm": 0.25303205575402105,
|
|
"learning_rate": 3.047656764843203e-05,
|
|
"loss": 1.1089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31647318601608276,
|
|
"step": 462,
|
|
"valid_targets_mean": 16166.5,
|
|
"valid_targets_min": 15218
|
|
},
|
|
{
|
|
"epoch": 1.9722814498933903,
|
|
"grad_norm": 0.29050836340256686,
|
|
"learning_rate": 3.0425886000105094e-05,
|
|
"loss": 1.043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2042907178401947,
|
|
"step": 463,
|
|
"valid_targets_mean": 12991.4,
|
|
"valid_targets_min": 11115
|
|
},
|
|
{
|
|
"epoch": 1.976545842217484,
|
|
"grad_norm": 0.2595278858325184,
|
|
"learning_rate": 3.0375112251185892e-05,
|
|
"loss": 1.0472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28346630930900574,
|
|
"step": 464,
|
|
"valid_targets_mean": 16167.2,
|
|
"valid_targets_min": 15368
|
|
},
|
|
{
|
|
"epoch": 1.9808102345415777,
|
|
"grad_norm": 0.2907791177123676,
|
|
"learning_rate": 3.0324246850201527e-05,
|
|
"loss": 1.1343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3213297724723816,
|
|
"step": 465,
|
|
"valid_targets_mean": 16105.9,
|
|
"valid_targets_min": 15564
|
|
},
|
|
{
|
|
"epoch": 1.9850746268656716,
|
|
"grad_norm": 0.29493125054466257,
|
|
"learning_rate": 3.0273290246488732e-05,
|
|
"loss": 1.0956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25564977526664734,
|
|
"step": 466,
|
|
"valid_targets_mean": 15038.5,
|
|
"valid_targets_min": 13132
|
|
},
|
|
{
|
|
"epoch": 1.9893390191897655,
|
|
"grad_norm": 0.2748687198619738,
|
|
"learning_rate": 3.0222242890189904e-05,
|
|
"loss": 1.1019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31964734196662903,
|
|
"step": 467,
|
|
"valid_targets_mean": 16099.2,
|
|
"valid_targets_min": 15195
|
|
},
|
|
{
|
|
"epoch": 1.9936034115138592,
|
|
"grad_norm": 0.2957007354271567,
|
|
"learning_rate": 3.017110523224914e-05,
|
|
"loss": 1.0748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24382814764976501,
|
|
"step": 468,
|
|
"valid_targets_mean": 13108.7,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 1.997867803837953,
|
|
"grad_norm": 0.30158366498484296,
|
|
"learning_rate": 3.011987772440825e-05,
|
|
"loss": 1.1061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2694101929664612,
|
|
"step": 469,
|
|
"valid_targets_mean": 15314.6,
|
|
"valid_targets_min": 13039
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 0.34885266698923534,
|
|
"learning_rate": 3.006856081920277e-05,
|
|
"loss": 1.0758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4675246477127075,
|
|
"step": 470,
|
|
"valid_targets_mean": 10737.9,
|
|
"valid_targets_min": 1562
|
|
},
|
|
{
|
|
"epoch": 2.0042643923240937,
|
|
"grad_norm": 0.34517596530036393,
|
|
"learning_rate": 3.001715496995793e-05,
|
|
"loss": 1.0286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2580025792121887,
|
|
"step": 471,
|
|
"valid_targets_mean": 16126.5,
|
|
"valid_targets_min": 15300
|
|
},
|
|
{
|
|
"epoch": 2.008528784648188,
|
|
"grad_norm": 0.373065396849927,
|
|
"learning_rate": 2.9965660630784715e-05,
|
|
"loss": 1.0686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.316106379032135,
|
|
"step": 472,
|
|
"valid_targets_mean": 16121.2,
|
|
"valid_targets_min": 14702
|
|
},
|
|
{
|
|
"epoch": 2.0127931769722816,
|
|
"grad_norm": 0.3771084735928616,
|
|
"learning_rate": 2.9914078256575782e-05,
|
|
"loss": 1.0198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1904955953359604,
|
|
"step": 473,
|
|
"valid_targets_mean": 11766.6,
|
|
"valid_targets_min": 7044
|
|
},
|
|
{
|
|
"epoch": 2.0170575692963753,
|
|
"grad_norm": 0.4005641507418509,
|
|
"learning_rate": 2.9862408303001492e-05,
|
|
"loss": 1.0289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.286452054977417,
|
|
"step": 474,
|
|
"valid_targets_mean": 16154.8,
|
|
"valid_targets_min": 15606
|
|
},
|
|
{
|
|
"epoch": 2.021321961620469,
|
|
"grad_norm": 0.4423303974992883,
|
|
"learning_rate": 2.9810651226505875e-05,
|
|
"loss": 1.0107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29240161180496216,
|
|
"step": 475,
|
|
"valid_targets_mean": 16143.0,
|
|
"valid_targets_min": 14917
|
|
},
|
|
{
|
|
"epoch": 2.025586353944563,
|
|
"grad_norm": 0.3732349325294711,
|
|
"learning_rate": 2.9758807484302566e-05,
|
|
"loss": 1.0117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22778771817684174,
|
|
"step": 476,
|
|
"valid_targets_mean": 14726.9,
|
|
"valid_targets_min": 13072
|
|
},
|
|
{
|
|
"epoch": 2.029850746268657,
|
|
"grad_norm": 0.3381768710409226,
|
|
"learning_rate": 2.9706877534370822e-05,
|
|
"loss": 1.0104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2630852460861206,
|
|
"step": 477,
|
|
"valid_targets_mean": 16200.3,
|
|
"valid_targets_min": 15042
|
|
},
|
|
{
|
|
"epoch": 2.0341151385927505,
|
|
"grad_norm": 0.4221093160318879,
|
|
"learning_rate": 2.965486183545142e-05,
|
|
"loss": 1.0938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2496234029531479,
|
|
"step": 478,
|
|
"valid_targets_mean": 13077.4,
|
|
"valid_targets_min": 1630
|
|
},
|
|
{
|
|
"epoch": 2.038379530916844,
|
|
"grad_norm": 0.3276412284276854,
|
|
"learning_rate": 2.9602760847042645e-05,
|
|
"loss": 1.0234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24705776572227478,
|
|
"step": 479,
|
|
"valid_targets_mean": 15628.0,
|
|
"valid_targets_min": 14094
|
|
},
|
|
{
|
|
"epoch": 2.0426439232409384,
|
|
"grad_norm": 0.32310401402046046,
|
|
"learning_rate": 2.955057502939621e-05,
|
|
"loss": 1.0477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28313151001930237,
|
|
"step": 480,
|
|
"valid_targets_mean": 16204.5,
|
|
"valid_targets_min": 15520
|
|
},
|
|
{
|
|
"epoch": 2.046908315565032,
|
|
"grad_norm": 0.3577915559469497,
|
|
"learning_rate": 2.9498304843513193e-05,
|
|
"loss": 1.0475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16355785727500916,
|
|
"step": 481,
|
|
"valid_targets_mean": 8899.1,
|
|
"valid_targets_min": 1392
|
|
},
|
|
{
|
|
"epoch": 2.0511727078891258,
|
|
"grad_norm": 0.28212571183019797,
|
|
"learning_rate": 2.9445950751139957e-05,
|
|
"loss": 0.9925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2508832812309265,
|
|
"step": 482,
|
|
"valid_targets_mean": 16143.0,
|
|
"valid_targets_min": 15153
|
|
},
|
|
{
|
|
"epoch": 2.0554371002132195,
|
|
"grad_norm": 0.33622066965100017,
|
|
"learning_rate": 2.939351321476412e-05,
|
|
"loss": 1.0443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30986666679382324,
|
|
"step": 483,
|
|
"valid_targets_mean": 16180.8,
|
|
"valid_targets_min": 15538
|
|
},
|
|
{
|
|
"epoch": 2.0597014925373136,
|
|
"grad_norm": 0.3467890112463114,
|
|
"learning_rate": 2.9340992697610393e-05,
|
|
"loss": 1.1174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2318936139345169,
|
|
"step": 484,
|
|
"valid_targets_mean": 12631.6,
|
|
"valid_targets_min": 7725
|
|
},
|
|
{
|
|
"epoch": 2.0639658848614073,
|
|
"grad_norm": 0.2674323491019401,
|
|
"learning_rate": 2.9288389663636537e-05,
|
|
"loss": 1.0595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2773456573486328,
|
|
"step": 485,
|
|
"valid_targets_mean": 16057.7,
|
|
"valid_targets_min": 14555
|
|
},
|
|
{
|
|
"epoch": 2.068230277185501,
|
|
"grad_norm": 0.2936106148375603,
|
|
"learning_rate": 2.923570457752925e-05,
|
|
"loss": 0.9843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2952362895011902,
|
|
"step": 486,
|
|
"valid_targets_mean": 16159.5,
|
|
"valid_targets_min": 15168
|
|
},
|
|
{
|
|
"epoch": 2.0724946695095947,
|
|
"grad_norm": 0.31998591126401277,
|
|
"learning_rate": 2.9182937904700078e-05,
|
|
"loss": 1.0427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23296266794204712,
|
|
"step": 487,
|
|
"valid_targets_mean": 13743.6,
|
|
"valid_targets_min": 11030
|
|
},
|
|
{
|
|
"epoch": 2.076759061833689,
|
|
"grad_norm": 0.31585506557092585,
|
|
"learning_rate": 2.9130090111281278e-05,
|
|
"loss": 1.013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2714338004589081,
|
|
"step": 488,
|
|
"valid_targets_mean": 16219.4,
|
|
"valid_targets_min": 15554
|
|
},
|
|
{
|
|
"epoch": 2.0810234541577826,
|
|
"grad_norm": 0.3031311573959109,
|
|
"learning_rate": 2.9077161664121722e-05,
|
|
"loss": 1.0553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3078286647796631,
|
|
"step": 489,
|
|
"valid_targets_mean": 16029.9,
|
|
"valid_targets_min": 15168
|
|
},
|
|
{
|
|
"epoch": 2.0852878464818763,
|
|
"grad_norm": 0.3645885710506865,
|
|
"learning_rate": 2.902415303078275e-05,
|
|
"loss": 1.0255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2386752814054489,
|
|
"step": 490,
|
|
"valid_targets_mean": 15594.2,
|
|
"valid_targets_min": 13709
|
|
},
|
|
{
|
|
"epoch": 2.08955223880597,
|
|
"grad_norm": 0.3035060185630192,
|
|
"learning_rate": 2.8971064679534072e-05,
|
|
"loss": 1.0153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27316075563430786,
|
|
"step": 491,
|
|
"valid_targets_mean": 16149.3,
|
|
"valid_targets_min": 15057
|
|
},
|
|
{
|
|
"epoch": 2.093816631130064,
|
|
"grad_norm": 0.35076723647713254,
|
|
"learning_rate": 2.8917897079349604e-05,
|
|
"loss": 1.0378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20928055047988892,
|
|
"step": 492,
|
|
"valid_targets_mean": 10589.6,
|
|
"valid_targets_min": 2052
|
|
},
|
|
{
|
|
"epoch": 2.098081023454158,
|
|
"grad_norm": 0.36278573612041354,
|
|
"learning_rate": 2.8864650699903336e-05,
|
|
"loss": 1.0248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2569931745529175,
|
|
"step": 493,
|
|
"valid_targets_mean": 15575.6,
|
|
"valid_targets_min": 12875
|
|
},
|
|
{
|
|
"epoch": 2.1023454157782515,
|
|
"grad_norm": 0.30816980414676637,
|
|
"learning_rate": 2.881132601156518e-05,
|
|
"loss": 1.0578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3187094032764435,
|
|
"step": 494,
|
|
"valid_targets_mean": 16201.0,
|
|
"valid_targets_min": 15588
|
|
},
|
|
{
|
|
"epoch": 2.106609808102345,
|
|
"grad_norm": 0.34040889262708063,
|
|
"learning_rate": 2.8757923485396805e-05,
|
|
"loss": 1.0357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15746864676475525,
|
|
"step": 495,
|
|
"valid_targets_mean": 8183.4,
|
|
"valid_targets_min": 1761
|
|
},
|
|
{
|
|
"epoch": 2.1108742004264394,
|
|
"grad_norm": 0.32466615006446503,
|
|
"learning_rate": 2.8704443593147517e-05,
|
|
"loss": 1.0719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27217546105384827,
|
|
"step": 496,
|
|
"valid_targets_mean": 16106.9,
|
|
"valid_targets_min": 15525
|
|
},
|
|
{
|
|
"epoch": 2.115138592750533,
|
|
"grad_norm": 0.3023775342977778,
|
|
"learning_rate": 2.8650886807250024e-05,
|
|
"loss": 1.0675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31930744647979736,
|
|
"step": 497,
|
|
"valid_targets_mean": 16090.3,
|
|
"valid_targets_min": 14394
|
|
},
|
|
{
|
|
"epoch": 2.1194029850746268,
|
|
"grad_norm": 0.3371633957587706,
|
|
"learning_rate": 2.8597253600816332e-05,
|
|
"loss": 1.0762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20702311396598816,
|
|
"step": 498,
|
|
"valid_targets_mean": 11938.5,
|
|
"valid_targets_min": 10146
|
|
},
|
|
{
|
|
"epoch": 2.1236673773987205,
|
|
"grad_norm": 0.3176936683472285,
|
|
"learning_rate": 2.8543544447633517e-05,
|
|
"loss": 1.0566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2848049998283386,
|
|
"step": 499,
|
|
"valid_targets_mean": 16117.8,
|
|
"valid_targets_min": 14547
|
|
},
|
|
{
|
|
"epoch": 2.1279317697228146,
|
|
"grad_norm": 0.4137454949540055,
|
|
"learning_rate": 2.8489759822159558e-05,
|
|
"loss": 1.1128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3339790105819702,
|
|
"step": 500,
|
|
"valid_targets_mean": 16178.9,
|
|
"valid_targets_min": 15565
|
|
},
|
|
{
|
|
"epoch": 2.1321961620469083,
|
|
"grad_norm": 0.32527015375381896,
|
|
"learning_rate": 2.843590019951914e-05,
|
|
"loss": 1.0501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22105172276496887,
|
|
"step": 501,
|
|
"valid_targets_mean": 13181.2,
|
|
"valid_targets_min": 11593
|
|
},
|
|
{
|
|
"epoch": 2.136460554371002,
|
|
"grad_norm": 0.4063954902492941,
|
|
"learning_rate": 2.838196605549948e-05,
|
|
"loss": 1.0656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31084293127059937,
|
|
"step": 502,
|
|
"valid_targets_mean": 16180.7,
|
|
"valid_targets_min": 15667
|
|
},
|
|
{
|
|
"epoch": 2.140724946695096,
|
|
"grad_norm": 0.2866719521007267,
|
|
"learning_rate": 2.8327957866546082e-05,
|
|
"loss": 1.1069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25871700048446655,
|
|
"step": 503,
|
|
"valid_targets_mean": 12895.1,
|
|
"valid_targets_min": 755
|
|
},
|
|
{
|
|
"epoch": 2.14498933901919,
|
|
"grad_norm": 0.40645393887022263,
|
|
"learning_rate": 2.8273876109758568e-05,
|
|
"loss": 1.0785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2629484534263611,
|
|
"step": 504,
|
|
"valid_targets_mean": 15436.5,
|
|
"valid_targets_min": 14157
|
|
},
|
|
{
|
|
"epoch": 2.1492537313432836,
|
|
"grad_norm": 0.37813096114486067,
|
|
"learning_rate": 2.8219721262886427e-05,
|
|
"loss": 1.0456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31807708740234375,
|
|
"step": 505,
|
|
"valid_targets_mean": 16037.3,
|
|
"valid_targets_min": 13211
|
|
},
|
|
{
|
|
"epoch": 2.1535181236673773,
|
|
"grad_norm": 0.2977743779696731,
|
|
"learning_rate": 2.816549380432483e-05,
|
|
"loss": 1.0264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18133559823036194,
|
|
"step": 506,
|
|
"valid_targets_mean": 9544.8,
|
|
"valid_targets_min": 1763
|
|
},
|
|
{
|
|
"epoch": 2.1577825159914714,
|
|
"grad_norm": 0.37204065392259444,
|
|
"learning_rate": 2.8111194213110386e-05,
|
|
"loss": 1.0286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2634444832801819,
|
|
"step": 507,
|
|
"valid_targets_mean": 15868.5,
|
|
"valid_targets_min": 9799
|
|
},
|
|
{
|
|
"epoch": 2.162046908315565,
|
|
"grad_norm": 0.3239743361709542,
|
|
"learning_rate": 2.805682296891691e-05,
|
|
"loss": 1.074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31965699791908264,
|
|
"step": 508,
|
|
"valid_targets_mean": 16133.4,
|
|
"valid_targets_min": 15537
|
|
},
|
|
{
|
|
"epoch": 2.166311300639659,
|
|
"grad_norm": 0.34246005873336005,
|
|
"learning_rate": 2.8002380552051186e-05,
|
|
"loss": 1.0715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16129739582538605,
|
|
"step": 509,
|
|
"valid_targets_mean": 9578.0,
|
|
"valid_targets_min": 5674
|
|
},
|
|
{
|
|
"epoch": 2.1705756929637525,
|
|
"grad_norm": 0.3449084096654681,
|
|
"learning_rate": 2.7947867443448728e-05,
|
|
"loss": 1.0268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2731706500053406,
|
|
"step": 510,
|
|
"valid_targets_mean": 16089.0,
|
|
"valid_targets_min": 14444
|
|
},
|
|
{
|
|
"epoch": 2.1748400852878467,
|
|
"grad_norm": 0.30284223216636613,
|
|
"learning_rate": 2.789328412466953e-05,
|
|
"loss": 1.0741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2972923815250397,
|
|
"step": 511,
|
|
"valid_targets_mean": 16163.5,
|
|
"valid_targets_min": 15222
|
|
},
|
|
{
|
|
"epoch": 2.1791044776119404,
|
|
"grad_norm": 0.373113109179941,
|
|
"learning_rate": 2.7838631077893813e-05,
|
|
"loss": 1.0548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21403327584266663,
|
|
"step": 512,
|
|
"valid_targets_mean": 12320.2,
|
|
"valid_targets_min": 10472
|
|
},
|
|
{
|
|
"epoch": 2.183368869936034,
|
|
"grad_norm": 0.3815816706427068,
|
|
"learning_rate": 2.7783908785917753e-05,
|
|
"loss": 1.0746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3001013696193695,
|
|
"step": 513,
|
|
"valid_targets_mean": 16060.1,
|
|
"valid_targets_min": 14908
|
|
},
|
|
{
|
|
"epoch": 2.1876332622601278,
|
|
"grad_norm": 0.2998392934268683,
|
|
"learning_rate": 2.7729117732149244e-05,
|
|
"loss": 1.0626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29828304052352905,
|
|
"step": 514,
|
|
"valid_targets_mean": 15956.0,
|
|
"valid_targets_min": 12151
|
|
},
|
|
{
|
|
"epoch": 2.191897654584222,
|
|
"grad_norm": 0.4332747528053942,
|
|
"learning_rate": 2.7674258400603587e-05,
|
|
"loss": 1.0431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2451964169740677,
|
|
"step": 515,
|
|
"valid_targets_mean": 14654.4,
|
|
"valid_targets_min": 12701
|
|
},
|
|
{
|
|
"epoch": 2.1961620469083156,
|
|
"grad_norm": 0.35999553766272824,
|
|
"learning_rate": 2.761933127589927e-05,
|
|
"loss": 1.0719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2868337631225586,
|
|
"step": 516,
|
|
"valid_targets_mean": 16187.1,
|
|
"valid_targets_min": 15153
|
|
},
|
|
{
|
|
"epoch": 2.2004264392324093,
|
|
"grad_norm": 0.34876479689639817,
|
|
"learning_rate": 2.7564336843253633e-05,
|
|
"loss": 1.0367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2030986100435257,
|
|
"step": 517,
|
|
"valid_targets_mean": 11016.9,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 2.204690831556503,
|
|
"grad_norm": 0.30481299782119614,
|
|
"learning_rate": 2.7509275588478606e-05,
|
|
"loss": 1.0649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2779153287410736,
|
|
"step": 518,
|
|
"valid_targets_mean": 16131.2,
|
|
"valid_targets_min": 15565
|
|
},
|
|
{
|
|
"epoch": 2.208955223880597,
|
|
"grad_norm": 0.36837699902079,
|
|
"learning_rate": 2.7454147997976404e-05,
|
|
"loss": 1.0135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28670260310173035,
|
|
"step": 519,
|
|
"valid_targets_mean": 16204.6,
|
|
"valid_targets_min": 15596
|
|
},
|
|
{
|
|
"epoch": 2.213219616204691,
|
|
"grad_norm": 0.3052854817653588,
|
|
"learning_rate": 2.7398954558735272e-05,
|
|
"loss": 1.0505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15494415163993835,
|
|
"step": 520,
|
|
"valid_targets_mean": 8753.9,
|
|
"valid_targets_min": 2749
|
|
},
|
|
{
|
|
"epoch": 2.2174840085287846,
|
|
"grad_norm": 0.3719041397938604,
|
|
"learning_rate": 2.7343695758325125e-05,
|
|
"loss": 1.0293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2593284249305725,
|
|
"step": 521,
|
|
"valid_targets_mean": 16047.2,
|
|
"valid_targets_min": 15012
|
|
},
|
|
{
|
|
"epoch": 2.2217484008528783,
|
|
"grad_norm": 0.3207789320304075,
|
|
"learning_rate": 2.7288372084893282e-05,
|
|
"loss": 1.0266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29587921500205994,
|
|
"step": 522,
|
|
"valid_targets_mean": 16112.9,
|
|
"valid_targets_min": 14609
|
|
},
|
|
{
|
|
"epoch": 2.2260127931769724,
|
|
"grad_norm": 0.2915274338073017,
|
|
"learning_rate": 2.7232984027160126e-05,
|
|
"loss": 1.037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22466421127319336,
|
|
"step": 523,
|
|
"valid_targets_mean": 12362.7,
|
|
"valid_targets_min": 10008
|
|
},
|
|
{
|
|
"epoch": 2.230277185501066,
|
|
"grad_norm": 0.3404642290839142,
|
|
"learning_rate": 2.7177532074414822e-05,
|
|
"loss": 1.0744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2900736331939697,
|
|
"step": 524,
|
|
"valid_targets_mean": 16111.7,
|
|
"valid_targets_min": 14958
|
|
},
|
|
{
|
|
"epoch": 2.23454157782516,
|
|
"grad_norm": 0.2907464000342923,
|
|
"learning_rate": 2.712201671651094e-05,
|
|
"loss": 1.0374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31494271755218506,
|
|
"step": 525,
|
|
"valid_targets_mean": 16096.5,
|
|
"valid_targets_min": 13688
|
|
},
|
|
{
|
|
"epoch": 2.2388059701492535,
|
|
"grad_norm": 0.32048507495101003,
|
|
"learning_rate": 2.7066438443862205e-05,
|
|
"loss": 1.0949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26120030879974365,
|
|
"step": 526,
|
|
"valid_targets_mean": 13684.5,
|
|
"valid_targets_min": 11557
|
|
},
|
|
{
|
|
"epoch": 2.2430703624733477,
|
|
"grad_norm": 0.33007219784645936,
|
|
"learning_rate": 2.701079774743808e-05,
|
|
"loss": 1.0497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3147214353084564,
|
|
"step": 527,
|
|
"valid_targets_mean": 16176.9,
|
|
"valid_targets_min": 15185
|
|
},
|
|
{
|
|
"epoch": 2.2473347547974414,
|
|
"grad_norm": 0.3276428323624906,
|
|
"learning_rate": 2.6955095118759496e-05,
|
|
"loss": 1.1136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24769821763038635,
|
|
"step": 528,
|
|
"valid_targets_mean": 13108.9,
|
|
"valid_targets_min": 2016
|
|
},
|
|
{
|
|
"epoch": 2.251599147121535,
|
|
"grad_norm": 0.3418907614080023,
|
|
"learning_rate": 2.689933104989447e-05,
|
|
"loss": 1.0355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25434744358062744,
|
|
"step": 529,
|
|
"valid_targets_mean": 15346.7,
|
|
"valid_targets_min": 14049
|
|
},
|
|
{
|
|
"epoch": 2.2558635394456292,
|
|
"grad_norm": 0.2904867573329887,
|
|
"learning_rate": 2.6843506033453777e-05,
|
|
"loss": 1.092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.321340948343277,
|
|
"step": 530,
|
|
"valid_targets_mean": 15995.2,
|
|
"valid_targets_min": 13688
|
|
},
|
|
{
|
|
"epoch": 2.260127931769723,
|
|
"grad_norm": 0.3476989542237827,
|
|
"learning_rate": 2.6787620562586587e-05,
|
|
"loss": 1.048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17278584837913513,
|
|
"step": 531,
|
|
"valid_targets_mean": 9370.6,
|
|
"valid_targets_min": 3012
|
|
},
|
|
{
|
|
"epoch": 2.2643923240938166,
|
|
"grad_norm": 0.2786187248061428,
|
|
"learning_rate": 2.673167513097613e-05,
|
|
"loss": 0.9944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23674175143241882,
|
|
"step": 532,
|
|
"valid_targets_mean": 16198.4,
|
|
"valid_targets_min": 15665
|
|
},
|
|
{
|
|
"epoch": 2.2686567164179103,
|
|
"grad_norm": 0.33480287771669115,
|
|
"learning_rate": 2.6675670232835297e-05,
|
|
"loss": 1.0376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29758474230766296,
|
|
"step": 533,
|
|
"valid_targets_mean": 16011.2,
|
|
"valid_targets_min": 13427
|
|
},
|
|
{
|
|
"epoch": 2.272921108742004,
|
|
"grad_norm": 0.3125122769507082,
|
|
"learning_rate": 2.661960636290231e-05,
|
|
"loss": 1.052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18115438520908356,
|
|
"step": 534,
|
|
"valid_targets_mean": 10492.3,
|
|
"valid_targets_min": 5775
|
|
},
|
|
{
|
|
"epoch": 2.277185501066098,
|
|
"grad_norm": 0.335477046064761,
|
|
"learning_rate": 2.6563484016436346e-05,
|
|
"loss": 1.0559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.270550400018692,
|
|
"step": 535,
|
|
"valid_targets_mean": 16125.0,
|
|
"valid_targets_min": 15091
|
|
},
|
|
{
|
|
"epoch": 2.281449893390192,
|
|
"grad_norm": 0.314942245164124,
|
|
"learning_rate": 2.6507303689213143e-05,
|
|
"loss": 1.0926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2903967499732971,
|
|
"step": 536,
|
|
"valid_targets_mean": 16207.7,
|
|
"valid_targets_min": 15562
|
|
},
|
|
{
|
|
"epoch": 2.2857142857142856,
|
|
"grad_norm": 0.314621997715093,
|
|
"learning_rate": 2.6451065877520634e-05,
|
|
"loss": 1.0289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21829786896705627,
|
|
"step": 537,
|
|
"valid_targets_mean": 13124.1,
|
|
"valid_targets_min": 10613
|
|
},
|
|
{
|
|
"epoch": 2.2899786780383797,
|
|
"grad_norm": 0.28636340650931896,
|
|
"learning_rate": 2.639477107815455e-05,
|
|
"loss": 1.0859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2993796467781067,
|
|
"step": 538,
|
|
"valid_targets_mean": 16085.9,
|
|
"valid_targets_min": 15042
|
|
},
|
|
{
|
|
"epoch": 2.2942430703624734,
|
|
"grad_norm": 0.3145823987158278,
|
|
"learning_rate": 2.633841978841406e-05,
|
|
"loss": 1.0428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3006322383880615,
|
|
"step": 539,
|
|
"valid_targets_mean": 16127.8,
|
|
"valid_targets_min": 14446
|
|
},
|
|
{
|
|
"epoch": 2.298507462686567,
|
|
"grad_norm": 0.26769100096181425,
|
|
"learning_rate": 2.6282012506097347e-05,
|
|
"loss": 0.9944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2365688532590866,
|
|
"step": 540,
|
|
"valid_targets_mean": 14421.1,
|
|
"valid_targets_min": 12694
|
|
},
|
|
{
|
|
"epoch": 2.302771855010661,
|
|
"grad_norm": 0.2917387085286522,
|
|
"learning_rate": 2.622554972949724e-05,
|
|
"loss": 1.0699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29734212160110474,
|
|
"step": 541,
|
|
"valid_targets_mean": 16063.4,
|
|
"valid_targets_min": 14259
|
|
},
|
|
{
|
|
"epoch": 2.307036247334755,
|
|
"grad_norm": 0.3196379228120349,
|
|
"learning_rate": 2.6169031957396778e-05,
|
|
"loss": 1.0505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21211479604244232,
|
|
"step": 542,
|
|
"valid_targets_mean": 11104.2,
|
|
"valid_targets_min": 2564
|
|
},
|
|
{
|
|
"epoch": 2.3113006396588487,
|
|
"grad_norm": 0.252583483704961,
|
|
"learning_rate": 2.611245968906482e-05,
|
|
"loss": 1.0956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28201597929000854,
|
|
"step": 543,
|
|
"valid_targets_mean": 15902.0,
|
|
"valid_targets_min": 13609
|
|
},
|
|
{
|
|
"epoch": 2.3155650319829424,
|
|
"grad_norm": 0.272609277675995,
|
|
"learning_rate": 2.605583342425165e-05,
|
|
"loss": 0.9867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27425339818000793,
|
|
"step": 544,
|
|
"valid_targets_mean": 16183.0,
|
|
"valid_targets_min": 15463
|
|
},
|
|
{
|
|
"epoch": 2.319829424307036,
|
|
"grad_norm": 0.28361784931520334,
|
|
"learning_rate": 2.5999153663184546e-05,
|
|
"loss": 1.0623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17816561460494995,
|
|
"step": 545,
|
|
"valid_targets_mean": 9009.8,
|
|
"valid_targets_min": 1284
|
|
},
|
|
{
|
|
"epoch": 2.3240938166311302,
|
|
"grad_norm": 0.25903056353359666,
|
|
"learning_rate": 2.594242090656335e-05,
|
|
"loss": 1.0542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24510842561721802,
|
|
"step": 546,
|
|
"valid_targets_mean": 16194.9,
|
|
"valid_targets_min": 14875
|
|
},
|
|
{
|
|
"epoch": 2.328358208955224,
|
|
"grad_norm": 0.33662351487682074,
|
|
"learning_rate": 2.5885635655556075e-05,
|
|
"loss": 1.0407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2889542579650879,
|
|
"step": 547,
|
|
"valid_targets_mean": 16138.1,
|
|
"valid_targets_min": 14550
|
|
},
|
|
{
|
|
"epoch": 2.3326226012793176,
|
|
"grad_norm": 0.2572787483287525,
|
|
"learning_rate": 2.5828798411794443e-05,
|
|
"loss": 1.0784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22583162784576416,
|
|
"step": 548,
|
|
"valid_targets_mean": 12317.6,
|
|
"valid_targets_min": 8720
|
|
},
|
|
{
|
|
"epoch": 2.3368869936034113,
|
|
"grad_norm": 0.3387629705408929,
|
|
"learning_rate": 2.5771909677369484e-05,
|
|
"loss": 1.0128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28297415375709534,
|
|
"step": 549,
|
|
"valid_targets_mean": 16203.8,
|
|
"valid_targets_min": 15058
|
|
},
|
|
{
|
|
"epoch": 2.3411513859275055,
|
|
"grad_norm": 0.32417196993230063,
|
|
"learning_rate": 2.571496995482709e-05,
|
|
"loss": 1.0359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2865076959133148,
|
|
"step": 550,
|
|
"valid_targets_mean": 16102.8,
|
|
"valid_targets_min": 14826
|
|
},
|
|
{
|
|
"epoch": 2.345415778251599,
|
|
"grad_norm": 0.2981277693270899,
|
|
"learning_rate": 2.565797974716357e-05,
|
|
"loss": 0.9893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21984124183654785,
|
|
"step": 551,
|
|
"valid_targets_mean": 13737.8,
|
|
"valid_targets_min": 11566
|
|
},
|
|
{
|
|
"epoch": 2.349680170575693,
|
|
"grad_norm": 0.364748162831638,
|
|
"learning_rate": 2.5600939557821205e-05,
|
|
"loss": 1.064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3167724609375,
|
|
"step": 552,
|
|
"valid_targets_mean": 16058.5,
|
|
"valid_targets_min": 14649
|
|
},
|
|
{
|
|
"epoch": 2.3539445628997866,
|
|
"grad_norm": 0.29462969851788084,
|
|
"learning_rate": 2.5543849890683813e-05,
|
|
"loss": 1.041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.259488582611084,
|
|
"step": 553,
|
|
"valid_targets_mean": 13189.4,
|
|
"valid_targets_min": 3578
|
|
},
|
|
{
|
|
"epoch": 2.3582089552238807,
|
|
"grad_norm": 0.2968723375949295,
|
|
"learning_rate": 2.548671125007229e-05,
|
|
"loss": 1.0296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24670171737670898,
|
|
"step": 554,
|
|
"valid_targets_mean": 15325.0,
|
|
"valid_targets_min": 13465
|
|
},
|
|
{
|
|
"epoch": 2.3624733475479744,
|
|
"grad_norm": 0.3531811065799325,
|
|
"learning_rate": 2.5429524140740155e-05,
|
|
"loss": 1.0447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2935154139995575,
|
|
"step": 555,
|
|
"valid_targets_mean": 16192.3,
|
|
"valid_targets_min": 15636
|
|
},
|
|
{
|
|
"epoch": 2.366737739872068,
|
|
"grad_norm": 0.26755386598655445,
|
|
"learning_rate": 2.537228906786908e-05,
|
|
"loss": 1.0759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16546911001205444,
|
|
"step": 556,
|
|
"valid_targets_mean": 8583.2,
|
|
"valid_targets_min": 1277
|
|
},
|
|
{
|
|
"epoch": 2.3710021321961623,
|
|
"grad_norm": 0.34376174101342133,
|
|
"learning_rate": 2.5315006537064473e-05,
|
|
"loss": 1.0346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27252626419067383,
|
|
"step": 557,
|
|
"valid_targets_mean": 16119.8,
|
|
"valid_targets_min": 15222
|
|
},
|
|
{
|
|
"epoch": 2.375266524520256,
|
|
"grad_norm": 0.254239466533083,
|
|
"learning_rate": 2.5257677054350927e-05,
|
|
"loss": 1.0451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32484906911849976,
|
|
"step": 558,
|
|
"valid_targets_mean": 16112.5,
|
|
"valid_targets_min": 13427
|
|
},
|
|
{
|
|
"epoch": 2.3795309168443497,
|
|
"grad_norm": 0.2855408536563262,
|
|
"learning_rate": 2.5200301126167857e-05,
|
|
"loss": 1.0183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19524550437927246,
|
|
"step": 559,
|
|
"valid_targets_mean": 11318.0,
|
|
"valid_targets_min": 7978
|
|
},
|
|
{
|
|
"epoch": 2.3837953091684434,
|
|
"grad_norm": 0.29290215055425123,
|
|
"learning_rate": 2.514287925936492e-05,
|
|
"loss": 1.0268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2714173495769501,
|
|
"step": 560,
|
|
"valid_targets_mean": 16124.8,
|
|
"valid_targets_min": 14702
|
|
},
|
|
{
|
|
"epoch": 2.388059701492537,
|
|
"grad_norm": 0.264587408670158,
|
|
"learning_rate": 2.5085411961197626e-05,
|
|
"loss": 1.0382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30561578273773193,
|
|
"step": 561,
|
|
"valid_targets_mean": 16119.8,
|
|
"valid_targets_min": 15507
|
|
},
|
|
{
|
|
"epoch": 2.3923240938166312,
|
|
"grad_norm": 0.29703874036572053,
|
|
"learning_rate": 2.502789973932278e-05,
|
|
"loss": 1.0542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23760643601417542,
|
|
"step": 562,
|
|
"valid_targets_mean": 13217.6,
|
|
"valid_targets_min": 9927
|
|
},
|
|
{
|
|
"epoch": 2.396588486140725,
|
|
"grad_norm": 0.27521833873424617,
|
|
"learning_rate": 2.4970343101794073e-05,
|
|
"loss": 1.0578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29066234827041626,
|
|
"step": 563,
|
|
"valid_targets_mean": 16146.4,
|
|
"valid_targets_min": 15545
|
|
},
|
|
{
|
|
"epoch": 2.4008528784648187,
|
|
"grad_norm": 0.3138363244065825,
|
|
"learning_rate": 2.4912742557057538e-05,
|
|
"loss": 1.0256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2866852283477783,
|
|
"step": 564,
|
|
"valid_targets_mean": 16232.5,
|
|
"valid_targets_min": 15938
|
|
},
|
|
{
|
|
"epoch": 2.405117270788913,
|
|
"grad_norm": 0.34269422690135737,
|
|
"learning_rate": 2.485509861394708e-05,
|
|
"loss": 1.0407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25185465812683105,
|
|
"step": 565,
|
|
"valid_targets_mean": 15403.8,
|
|
"valid_targets_min": 13098
|
|
},
|
|
{
|
|
"epoch": 2.4093816631130065,
|
|
"grad_norm": 0.2899844099522823,
|
|
"learning_rate": 2.4797411781679975e-05,
|
|
"loss": 1.0334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29885533452033997,
|
|
"step": 566,
|
|
"valid_targets_mean": 16083.7,
|
|
"valid_targets_min": 13883
|
|
},
|
|
{
|
|
"epoch": 2.4136460554371,
|
|
"grad_norm": 0.3122942927433159,
|
|
"learning_rate": 2.473968256985238e-05,
|
|
"loss": 1.0992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2282446324825287,
|
|
"step": 567,
|
|
"valid_targets_mean": 11219.6,
|
|
"valid_targets_min": 2899
|
|
},
|
|
{
|
|
"epoch": 2.417910447761194,
|
|
"grad_norm": 0.23632056584438826,
|
|
"learning_rate": 2.4681911488434825e-05,
|
|
"loss": 0.9762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23319000005722046,
|
|
"step": 568,
|
|
"valid_targets_mean": 16104.4,
|
|
"valid_targets_min": 14771
|
|
},
|
|
{
|
|
"epoch": 2.4221748400852876,
|
|
"grad_norm": 0.36234671617359504,
|
|
"learning_rate": 2.4624099047767702e-05,
|
|
"loss": 1.0098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30094248056411743,
|
|
"step": 569,
|
|
"valid_targets_mean": 16132.6,
|
|
"valid_targets_min": 13886
|
|
},
|
|
{
|
|
"epoch": 2.4264392324093818,
|
|
"grad_norm": 0.25416569702976993,
|
|
"learning_rate": 2.4566245758556787e-05,
|
|
"loss": 1.0387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18206633627414703,
|
|
"step": 570,
|
|
"valid_targets_mean": 9096.2,
|
|
"valid_targets_min": 2044
|
|
},
|
|
{
|
|
"epoch": 2.4307036247334755,
|
|
"grad_norm": 0.309343376998228,
|
|
"learning_rate": 2.4508352131868664e-05,
|
|
"loss": 1.0308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24328666925430298,
|
|
"step": 571,
|
|
"valid_targets_mean": 16176.6,
|
|
"valid_targets_min": 14769
|
|
},
|
|
{
|
|
"epoch": 2.434968017057569,
|
|
"grad_norm": 0.27995366768803925,
|
|
"learning_rate": 2.445041867912629e-05,
|
|
"loss": 1.0949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31025460362434387,
|
|
"step": 572,
|
|
"valid_targets_mean": 16152.8,
|
|
"valid_targets_min": 15180
|
|
},
|
|
{
|
|
"epoch": 2.4392324093816633,
|
|
"grad_norm": 0.28389477313638534,
|
|
"learning_rate": 2.439244591210443e-05,
|
|
"loss": 1.0379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2253277450799942,
|
|
"step": 573,
|
|
"valid_targets_mean": 12206.7,
|
|
"valid_targets_min": 8908
|
|
},
|
|
{
|
|
"epoch": 2.443496801705757,
|
|
"grad_norm": 0.32162903536975057,
|
|
"learning_rate": 2.4334434342925133e-05,
|
|
"loss": 1.0412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2790234088897705,
|
|
"step": 574,
|
|
"valid_targets_mean": 16198.4,
|
|
"valid_targets_min": 15622
|
|
},
|
|
{
|
|
"epoch": 2.4477611940298507,
|
|
"grad_norm": 0.2612451270954594,
|
|
"learning_rate": 2.4276384484053227e-05,
|
|
"loss": 1.1031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3081274926662445,
|
|
"step": 575,
|
|
"valid_targets_mean": 16086.4,
|
|
"valid_targets_min": 15153
|
|
},
|
|
{
|
|
"epoch": 2.4520255863539444,
|
|
"grad_norm": 0.3491514604467944,
|
|
"learning_rate": 2.4218296848291795e-05,
|
|
"loss": 1.0572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2677220106124878,
|
|
"step": 576,
|
|
"valid_targets_mean": 14360.7,
|
|
"valid_targets_min": 12313
|
|
},
|
|
{
|
|
"epoch": 2.4562899786780386,
|
|
"grad_norm": 0.28692500381866665,
|
|
"learning_rate": 2.4160171948777603e-05,
|
|
"loss": 1.0159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2858474850654602,
|
|
"step": 577,
|
|
"valid_targets_mean": 16193.1,
|
|
"valid_targets_min": 15374
|
|
},
|
|
{
|
|
"epoch": 2.4605543710021323,
|
|
"grad_norm": 0.275130562732031,
|
|
"learning_rate": 2.410201029897665e-05,
|
|
"loss": 1.0913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24891135096549988,
|
|
"step": 578,
|
|
"valid_targets_mean": 12682.9,
|
|
"valid_targets_min": 1240
|
|
},
|
|
{
|
|
"epoch": 2.464818763326226,
|
|
"grad_norm": 0.2943698116744979,
|
|
"learning_rate": 2.4043812412679532e-05,
|
|
"loss": 1.0546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26071763038635254,
|
|
"step": 579,
|
|
"valid_targets_mean": 14894.9,
|
|
"valid_targets_min": 13767
|
|
},
|
|
{
|
|
"epoch": 2.4690831556503197,
|
|
"grad_norm": 0.25500988535123864,
|
|
"learning_rate": 2.3985578803996985e-05,
|
|
"loss": 1.0265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2972412109375,
|
|
"step": 580,
|
|
"valid_targets_mean": 16157.9,
|
|
"valid_targets_min": 14275
|
|
},
|
|
{
|
|
"epoch": 2.473347547974414,
|
|
"grad_norm": 0.29927062736580173,
|
|
"learning_rate": 2.392730998735529e-05,
|
|
"loss": 1.05,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17381608486175537,
|
|
"step": 581,
|
|
"valid_targets_mean": 10229.2,
|
|
"valid_targets_min": 1759
|
|
},
|
|
{
|
|
"epoch": 2.4776119402985075,
|
|
"grad_norm": 0.2541380870867221,
|
|
"learning_rate": 2.3869006477491755e-05,
|
|
"loss": 1.0934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28194326162338257,
|
|
"step": 582,
|
|
"valid_targets_mean": 15959.8,
|
|
"valid_targets_min": 13835
|
|
},
|
|
{
|
|
"epoch": 2.481876332622601,
|
|
"grad_norm": 0.2762729974148565,
|
|
"learning_rate": 2.381066878945017e-05,
|
|
"loss": 1.0006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2987706959247589,
|
|
"step": 583,
|
|
"valid_targets_mean": 16156.6,
|
|
"valid_targets_min": 15054
|
|
},
|
|
{
|
|
"epoch": 2.486140724946695,
|
|
"grad_norm": 0.2474085611889176,
|
|
"learning_rate": 2.3752297438576257e-05,
|
|
"loss": 1.0479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18751725554466248,
|
|
"step": 584,
|
|
"valid_targets_mean": 10441.4,
|
|
"valid_targets_min": 7074
|
|
},
|
|
{
|
|
"epoch": 2.490405117270789,
|
|
"grad_norm": 0.24419316067038616,
|
|
"learning_rate": 2.3693892940513074e-05,
|
|
"loss": 1.0593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27784836292266846,
|
|
"step": 585,
|
|
"valid_targets_mean": 16191.9,
|
|
"valid_targets_min": 15389
|
|
},
|
|
{
|
|
"epoch": 2.4946695095948828,
|
|
"grad_norm": 0.2257422624548447,
|
|
"learning_rate": 2.3635455811196536e-05,
|
|
"loss": 1.0472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32030776143074036,
|
|
"step": 586,
|
|
"valid_targets_mean": 16169.7,
|
|
"valid_targets_min": 15678
|
|
},
|
|
{
|
|
"epoch": 2.4989339019189765,
|
|
"grad_norm": 0.27087617879229475,
|
|
"learning_rate": 2.3576986566850796e-05,
|
|
"loss": 1.058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24315331876277924,
|
|
"step": 587,
|
|
"valid_targets_mean": 13982.8,
|
|
"valid_targets_min": 11677
|
|
},
|
|
{
|
|
"epoch": 2.50319829424307,
|
|
"grad_norm": 0.23183419509830458,
|
|
"learning_rate": 2.351848572398371e-05,
|
|
"loss": 0.9892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26703619956970215,
|
|
"step": 588,
|
|
"valid_targets_mean": 16228.9,
|
|
"valid_targets_min": 15674
|
|
},
|
|
{
|
|
"epoch": 2.5074626865671643,
|
|
"grad_norm": 0.2876555675694533,
|
|
"learning_rate": 2.3459953799382276e-05,
|
|
"loss": 1.0059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.283439040184021,
|
|
"step": 589,
|
|
"valid_targets_mean": 16192.3,
|
|
"valid_targets_min": 15222
|
|
},
|
|
{
|
|
"epoch": 2.511727078891258,
|
|
"grad_norm": 0.257214821267071,
|
|
"learning_rate": 2.3401391310108054e-05,
|
|
"loss": 1.0117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25144246220588684,
|
|
"step": 590,
|
|
"valid_targets_mean": 15235.8,
|
|
"valid_targets_min": 10805
|
|
},
|
|
{
|
|
"epoch": 2.5159914712153517,
|
|
"grad_norm": 0.28742274365564024,
|
|
"learning_rate": 2.3342798773492602e-05,
|
|
"loss": 1.0093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27801424264907837,
|
|
"step": 591,
|
|
"valid_targets_mean": 16150.8,
|
|
"valid_targets_min": 14997
|
|
},
|
|
{
|
|
"epoch": 2.520255863539446,
|
|
"grad_norm": 0.2792250435177512,
|
|
"learning_rate": 2.328417670713294e-05,
|
|
"loss": 1.0332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2068663090467453,
|
|
"step": 592,
|
|
"valid_targets_mean": 10595.8,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 2.5245202558635396,
|
|
"grad_norm": 0.25584728220546477,
|
|
"learning_rate": 2.3225525628886918e-05,
|
|
"loss": 1.0436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25134313106536865,
|
|
"step": 593,
|
|
"valid_targets_mean": 16091.4,
|
|
"valid_targets_min": 15167
|
|
},
|
|
{
|
|
"epoch": 2.5287846481876333,
|
|
"grad_norm": 0.24579866266022213,
|
|
"learning_rate": 2.3166846056868687e-05,
|
|
"loss": 1.0581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33337050676345825,
|
|
"step": 594,
|
|
"valid_targets_mean": 15568.4,
|
|
"valid_targets_min": 2605
|
|
},
|
|
{
|
|
"epoch": 2.533049040511727,
|
|
"grad_norm": 0.2604522535541197,
|
|
"learning_rate": 2.31081385094441e-05,
|
|
"loss": 1.0388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15701742470264435,
|
|
"step": 595,
|
|
"valid_targets_mean": 8344.5,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 2.5373134328358207,
|
|
"grad_norm": 0.23510609540711125,
|
|
"learning_rate": 2.304940350522615e-05,
|
|
"loss": 1.0552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25622326135635376,
|
|
"step": 596,
|
|
"valid_targets_mean": 16130.5,
|
|
"valid_targets_min": 14384
|
|
},
|
|
{
|
|
"epoch": 2.541577825159915,
|
|
"grad_norm": 0.2554529059538655,
|
|
"learning_rate": 2.299064156307037e-05,
|
|
"loss": 1.0373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30680519342422485,
|
|
"step": 597,
|
|
"valid_targets_mean": 16067.1,
|
|
"valid_targets_min": 14103
|
|
},
|
|
{
|
|
"epoch": 2.5458422174840085,
|
|
"grad_norm": 0.2439618134001834,
|
|
"learning_rate": 2.2931853202070275e-05,
|
|
"loss": 1.0624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22057399153709412,
|
|
"step": 598,
|
|
"valid_targets_mean": 13112.9,
|
|
"valid_targets_min": 9219
|
|
},
|
|
{
|
|
"epoch": 2.550106609808102,
|
|
"grad_norm": 0.24263854012333433,
|
|
"learning_rate": 2.2873038941552724e-05,
|
|
"loss": 0.9907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2783924341201782,
|
|
"step": 599,
|
|
"valid_targets_mean": 16142.3,
|
|
"valid_targets_min": 13573
|
|
},
|
|
{
|
|
"epoch": 2.5543710021321964,
|
|
"grad_norm": 0.31281048228813396,
|
|
"learning_rate": 2.2814199301073412e-05,
|
|
"loss": 1.0716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3061503469944,
|
|
"step": 600,
|
|
"valid_targets_mean": 16099.6,
|
|
"valid_targets_min": 13080
|
|
},
|
|
{
|
|
"epoch": 2.55863539445629,
|
|
"grad_norm": 0.22100958753785552,
|
|
"learning_rate": 2.27553348004122e-05,
|
|
"loss": 1.0724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2541562020778656,
|
|
"step": 601,
|
|
"valid_targets_mean": 14181.0,
|
|
"valid_targets_min": 11734
|
|
},
|
|
{
|
|
"epoch": 2.5628997867803838,
|
|
"grad_norm": 0.2757247058016967,
|
|
"learning_rate": 2.2696445959568577e-05,
|
|
"loss": 1.0465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3028034567832947,
|
|
"step": 602,
|
|
"valid_targets_mean": 16122.7,
|
|
"valid_targets_min": 15319
|
|
},
|
|
{
|
|
"epoch": 2.5671641791044775,
|
|
"grad_norm": 0.23155575529044867,
|
|
"learning_rate": 2.2637533298757064e-05,
|
|
"loss": 1.0579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24338635802268982,
|
|
"step": 603,
|
|
"valid_targets_mean": 13220.1,
|
|
"valid_targets_min": 2036
|
|
},
|
|
{
|
|
"epoch": 2.571428571428571,
|
|
"grad_norm": 0.25103099670230467,
|
|
"learning_rate": 2.2578597338402567e-05,
|
|
"loss": 1.0428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26038774847984314,
|
|
"step": 604,
|
|
"valid_targets_mean": 16067.2,
|
|
"valid_targets_min": 14862
|
|
},
|
|
{
|
|
"epoch": 2.5756929637526653,
|
|
"grad_norm": 0.216514057593945,
|
|
"learning_rate": 2.2519638599135844e-05,
|
|
"loss": 1.0982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28920871019363403,
|
|
"step": 605,
|
|
"valid_targets_mean": 16087.8,
|
|
"valid_targets_min": 14331
|
|
},
|
|
{
|
|
"epoch": 2.579957356076759,
|
|
"grad_norm": 0.25468437099798963,
|
|
"learning_rate": 2.2460657601788875e-05,
|
|
"loss": 1.0366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18615485727787018,
|
|
"step": 606,
|
|
"valid_targets_mean": 9125.1,
|
|
"valid_targets_min": 1895
|
|
},
|
|
{
|
|
"epoch": 2.5842217484008527,
|
|
"grad_norm": 0.2157935003553507,
|
|
"learning_rate": 2.2401654867390256e-05,
|
|
"loss": 1.0319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2346121072769165,
|
|
"step": 607,
|
|
"valid_targets_mean": 16230.6,
|
|
"valid_targets_min": 15761
|
|
},
|
|
{
|
|
"epoch": 2.588486140724947,
|
|
"grad_norm": 0.2722214784520387,
|
|
"learning_rate": 2.2342630917160605e-05,
|
|
"loss": 1.0526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3194594979286194,
|
|
"step": 608,
|
|
"valid_targets_mean": 16008.2,
|
|
"valid_targets_min": 12492
|
|
},
|
|
{
|
|
"epoch": 2.5927505330490406,
|
|
"grad_norm": 0.24055844482244143,
|
|
"learning_rate": 2.2283586272507975e-05,
|
|
"loss": 1.0366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20761112868785858,
|
|
"step": 609,
|
|
"valid_targets_mean": 12059.9,
|
|
"valid_targets_min": 7841
|
|
},
|
|
{
|
|
"epoch": 2.5970149253731343,
|
|
"grad_norm": 0.25176700378196953,
|
|
"learning_rate": 2.2224521455023193e-05,
|
|
"loss": 1.0351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27428489923477173,
|
|
"step": 610,
|
|
"valid_targets_mean": 16181.1,
|
|
"valid_targets_min": 15563
|
|
},
|
|
{
|
|
"epoch": 2.6012793176972284,
|
|
"grad_norm": 0.2837174045985541,
|
|
"learning_rate": 2.216543698647534e-05,
|
|
"loss": 1.0351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32162636518478394,
|
|
"step": 611,
|
|
"valid_targets_mean": 16136.7,
|
|
"valid_targets_min": 14943
|
|
},
|
|
{
|
|
"epoch": 2.605543710021322,
|
|
"grad_norm": 0.25060068140336916,
|
|
"learning_rate": 2.210633338880704e-05,
|
|
"loss": 1.0672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23636803030967712,
|
|
"step": 612,
|
|
"valid_targets_mean": 13411.0,
|
|
"valid_targets_min": 11613
|
|
},
|
|
{
|
|
"epoch": 2.609808102345416,
|
|
"grad_norm": 0.29988526616833044,
|
|
"learning_rate": 2.204721118412994e-05,
|
|
"loss": 1.0629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33013397455215454,
|
|
"step": 613,
|
|
"valid_targets_mean": 16006.3,
|
|
"valid_targets_min": 15028
|
|
},
|
|
{
|
|
"epoch": 2.6140724946695095,
|
|
"grad_norm": 0.23392427852382872,
|
|
"learning_rate": 2.1988070894720037e-05,
|
|
"loss": 1.0508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30861896276474,
|
|
"step": 614,
|
|
"valid_targets_mean": 16166.3,
|
|
"valid_targets_min": 15389
|
|
},
|
|
{
|
|
"epoch": 2.6183368869936032,
|
|
"grad_norm": 0.2566019859501764,
|
|
"learning_rate": 2.192891304301309e-05,
|
|
"loss": 1.071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24006184935569763,
|
|
"step": 615,
|
|
"valid_targets_mean": 14385.6,
|
|
"valid_targets_min": 12637
|
|
},
|
|
{
|
|
"epoch": 2.6226012793176974,
|
|
"grad_norm": 0.24410610023772966,
|
|
"learning_rate": 2.18697381516e-05,
|
|
"loss": 1.0465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2994387447834015,
|
|
"step": 616,
|
|
"valid_targets_mean": 16184.0,
|
|
"valid_targets_min": 15614
|
|
},
|
|
{
|
|
"epoch": 2.626865671641791,
|
|
"grad_norm": 0.260247846089678,
|
|
"learning_rate": 2.181054674322221e-05,
|
|
"loss": 1.0306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19397930800914764,
|
|
"step": 617,
|
|
"valid_targets_mean": 10109.2,
|
|
"valid_targets_min": 1733
|
|
},
|
|
{
|
|
"epoch": 2.631130063965885,
|
|
"grad_norm": 0.24510360860781857,
|
|
"learning_rate": 2.1751339340767043e-05,
|
|
"loss": 0.9939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26600921154022217,
|
|
"step": 618,
|
|
"valid_targets_mean": 16138.7,
|
|
"valid_targets_min": 14734
|
|
},
|
|
{
|
|
"epoch": 2.635394456289979,
|
|
"grad_norm": 0.2791782186631597,
|
|
"learning_rate": 2.169211646726313e-05,
|
|
"loss": 1.0788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31525731086730957,
|
|
"step": 619,
|
|
"valid_targets_mean": 16145.5,
|
|
"valid_targets_min": 15576
|
|
},
|
|
{
|
|
"epoch": 2.6396588486140726,
|
|
"grad_norm": 0.24716367049686047,
|
|
"learning_rate": 2.163287864587576e-05,
|
|
"loss": 1.0436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15222877264022827,
|
|
"step": 620,
|
|
"valid_targets_mean": 8127.2,
|
|
"valid_targets_min": 2379
|
|
},
|
|
{
|
|
"epoch": 2.6439232409381663,
|
|
"grad_norm": 0.280066841325202,
|
|
"learning_rate": 2.157362639990229e-05,
|
|
"loss": 0.9964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2535462975502014,
|
|
"step": 621,
|
|
"valid_targets_mean": 16050.8,
|
|
"valid_targets_min": 14159
|
|
},
|
|
{
|
|
"epoch": 2.64818763326226,
|
|
"grad_norm": 0.2531906956808335,
|
|
"learning_rate": 2.151436025276747e-05,
|
|
"loss": 1.047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3116755187511444,
|
|
"step": 622,
|
|
"valid_targets_mean": 16133.9,
|
|
"valid_targets_min": 13934
|
|
},
|
|
{
|
|
"epoch": 2.6524520255863537,
|
|
"grad_norm": 0.2850201057006353,
|
|
"learning_rate": 2.145508072801888e-05,
|
|
"loss": 1.0769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25311654806137085,
|
|
"step": 623,
|
|
"valid_targets_mean": 13166.8,
|
|
"valid_targets_min": 9709
|
|
},
|
|
{
|
|
"epoch": 2.656716417910448,
|
|
"grad_norm": 0.23355453716914845,
|
|
"learning_rate": 2.1395788349322256e-05,
|
|
"loss": 1.0671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2790774703025818,
|
|
"step": 624,
|
|
"valid_targets_mean": 16132.4,
|
|
"valid_targets_min": 14394
|
|
},
|
|
{
|
|
"epoch": 2.6609808102345416,
|
|
"grad_norm": 0.31998428525759265,
|
|
"learning_rate": 2.133648364045689e-05,
|
|
"loss": 1.0399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3171985149383545,
|
|
"step": 625,
|
|
"valid_targets_mean": 16104.8,
|
|
"valid_targets_min": 14688
|
|
},
|
|
{
|
|
"epoch": 2.6652452025586353,
|
|
"grad_norm": 0.21947281552927425,
|
|
"learning_rate": 2.1277167125310996e-05,
|
|
"loss": 1.0454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2476813942193985,
|
|
"step": 626,
|
|
"valid_targets_mean": 15302.1,
|
|
"valid_targets_min": 13615
|
|
},
|
|
{
|
|
"epoch": 2.6695095948827294,
|
|
"grad_norm": 0.2937535223304543,
|
|
"learning_rate": 2.1217839327877098e-05,
|
|
"loss": 1.0462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2926105856895447,
|
|
"step": 627,
|
|
"valid_targets_mean": 16166.6,
|
|
"valid_targets_min": 15148
|
|
},
|
|
{
|
|
"epoch": 2.673773987206823,
|
|
"grad_norm": 0.236485365201918,
|
|
"learning_rate": 2.1158500772247352e-05,
|
|
"loss": 1.0538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26240310072898865,
|
|
"step": 628,
|
|
"valid_targets_mean": 13016.6,
|
|
"valid_targets_min": 1533
|
|
},
|
|
{
|
|
"epoch": 2.678038379530917,
|
|
"grad_norm": 0.2821086013302492,
|
|
"learning_rate": 2.1099151982608985e-05,
|
|
"loss": 1.0742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2824803590774536,
|
|
"step": 629,
|
|
"valid_targets_mean": 15976.4,
|
|
"valid_targets_min": 14810
|
|
},
|
|
{
|
|
"epoch": 2.6823027718550105,
|
|
"grad_norm": 0.24455709718828342,
|
|
"learning_rate": 2.1039793483239607e-05,
|
|
"loss": 1.0681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30158841609954834,
|
|
"step": 630,
|
|
"valid_targets_mean": 16166.0,
|
|
"valid_targets_min": 15282
|
|
},
|
|
{
|
|
"epoch": 2.6865671641791042,
|
|
"grad_norm": 0.237209848329062,
|
|
"learning_rate": 2.0980425798502616e-05,
|
|
"loss": 1.0209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1627788245677948,
|
|
"step": 631,
|
|
"valid_targets_mean": 8038.8,
|
|
"valid_targets_min": 1420
|
|
},
|
|
{
|
|
"epoch": 2.6908315565031984,
|
|
"grad_norm": 0.24278918018028017,
|
|
"learning_rate": 2.092104945284255e-05,
|
|
"loss": 1.0157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24492160975933075,
|
|
"step": 632,
|
|
"valid_targets_mean": 15944.4,
|
|
"valid_targets_min": 14420
|
|
},
|
|
{
|
|
"epoch": 2.695095948827292,
|
|
"grad_norm": 0.25151491707665696,
|
|
"learning_rate": 2.0861664970780434e-05,
|
|
"loss": 1.0376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3210940659046173,
|
|
"step": 633,
|
|
"valid_targets_mean": 16005.0,
|
|
"valid_targets_min": 12680
|
|
},
|
|
{
|
|
"epoch": 2.699360341151386,
|
|
"grad_norm": 0.2402402743315667,
|
|
"learning_rate": 2.08022728769092e-05,
|
|
"loss": 1.0514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19892586767673492,
|
|
"step": 634,
|
|
"valid_targets_mean": 10561.3,
|
|
"valid_targets_min": 6389
|
|
},
|
|
{
|
|
"epoch": 2.70362473347548,
|
|
"grad_norm": 0.22059046014233277,
|
|
"learning_rate": 2.0742873695889005e-05,
|
|
"loss": 1.0731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.279842734336853,
|
|
"step": 635,
|
|
"valid_targets_mean": 16137.4,
|
|
"valid_targets_min": 15187
|
|
},
|
|
{
|
|
"epoch": 2.7078891257995736,
|
|
"grad_norm": 0.2549914086411169,
|
|
"learning_rate": 2.0683467952442626e-05,
|
|
"loss": 1.0577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3054582476615906,
|
|
"step": 636,
|
|
"valid_targets_mean": 16199.2,
|
|
"valid_targets_min": 15333
|
|
},
|
|
{
|
|
"epoch": 2.7121535181236673,
|
|
"grad_norm": 0.24298507497702615,
|
|
"learning_rate": 2.0624056171350785e-05,
|
|
"loss": 1.0576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23113980889320374,
|
|
"step": 637,
|
|
"valid_targets_mean": 13013.5,
|
|
"valid_targets_min": 10913
|
|
},
|
|
{
|
|
"epoch": 2.716417910447761,
|
|
"grad_norm": 0.2471230597120791,
|
|
"learning_rate": 2.0564638877447566e-05,
|
|
"loss": 0.9992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28163012862205505,
|
|
"step": 638,
|
|
"valid_targets_mean": 16089.1,
|
|
"valid_targets_min": 15225
|
|
},
|
|
{
|
|
"epoch": 2.7206823027718547,
|
|
"grad_norm": 0.2617874704502744,
|
|
"learning_rate": 2.0505216595615742e-05,
|
|
"loss": 1.1097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3296041488647461,
|
|
"step": 639,
|
|
"valid_targets_mean": 16108.9,
|
|
"valid_targets_min": 14793
|
|
},
|
|
{
|
|
"epoch": 2.724946695095949,
|
|
"grad_norm": 0.2183371999404649,
|
|
"learning_rate": 2.044578985078215e-05,
|
|
"loss": 1.0646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2446061074733734,
|
|
"step": 640,
|
|
"valid_targets_mean": 15094.3,
|
|
"valid_targets_min": 13211
|
|
},
|
|
{
|
|
"epoch": 2.7292110874200426,
|
|
"grad_norm": 0.26131948956127204,
|
|
"learning_rate": 2.0386359167913046e-05,
|
|
"loss": 1.0648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.320263147354126,
|
|
"step": 641,
|
|
"valid_targets_mean": 16148.0,
|
|
"valid_targets_min": 15288
|
|
},
|
|
{
|
|
"epoch": 2.7334754797441363,
|
|
"grad_norm": 0.22950984010860573,
|
|
"learning_rate": 2.0326925072009485e-05,
|
|
"loss": 1.0857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1871369332075119,
|
|
"step": 642,
|
|
"valid_targets_mean": 10481.8,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 2.7377398720682304,
|
|
"grad_norm": 0.23719243746195032,
|
|
"learning_rate": 2.0267488088102657e-05,
|
|
"loss": 1.0849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25228750705718994,
|
|
"step": 643,
|
|
"valid_targets_mean": 15958.4,
|
|
"valid_targets_min": 15228
|
|
},
|
|
{
|
|
"epoch": 2.742004264392324,
|
|
"grad_norm": 0.24644555909085203,
|
|
"learning_rate": 2.0208048741249288e-05,
|
|
"loss": 0.9916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29770416021347046,
|
|
"step": 644,
|
|
"valid_targets_mean": 16206.8,
|
|
"valid_targets_min": 15398
|
|
},
|
|
{
|
|
"epoch": 2.746268656716418,
|
|
"grad_norm": 0.21217998900070079,
|
|
"learning_rate": 2.014860755652695e-05,
|
|
"loss": 1.0249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17109495401382446,
|
|
"step": 645,
|
|
"valid_targets_mean": 9416.3,
|
|
"valid_targets_min": 979
|
|
},
|
|
{
|
|
"epoch": 2.750533049040512,
|
|
"grad_norm": 0.22591098862055015,
|
|
"learning_rate": 2.0089165059029477e-05,
|
|
"loss": 1.0624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24200916290283203,
|
|
"step": 646,
|
|
"valid_targets_mean": 16177.7,
|
|
"valid_targets_min": 14460
|
|
},
|
|
{
|
|
"epoch": 2.7547974413646057,
|
|
"grad_norm": 0.20975905675277473,
|
|
"learning_rate": 2.0029721773862277e-05,
|
|
"loss": 1.0362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30201756954193115,
|
|
"step": 647,
|
|
"valid_targets_mean": 15917.4,
|
|
"valid_targets_min": 9299
|
|
},
|
|
{
|
|
"epoch": 2.7590618336886994,
|
|
"grad_norm": 0.22888891178981322,
|
|
"learning_rate": 1.997027822613773e-05,
|
|
"loss": 1.0103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20682796835899353,
|
|
"step": 648,
|
|
"valid_targets_mean": 12600.8,
|
|
"valid_targets_min": 9289
|
|
},
|
|
{
|
|
"epoch": 2.763326226012793,
|
|
"grad_norm": 0.21690541236552421,
|
|
"learning_rate": 1.9910834940970533e-05,
|
|
"loss": 1.0881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30877169966697693,
|
|
"step": 649,
|
|
"valid_targets_mean": 16163.2,
|
|
"valid_targets_min": 15488
|
|
},
|
|
{
|
|
"epoch": 2.767590618336887,
|
|
"grad_norm": 0.25150084829078434,
|
|
"learning_rate": 1.985139244347305e-05,
|
|
"loss": 1.0175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28148651123046875,
|
|
"step": 650,
|
|
"valid_targets_mean": 16152.5,
|
|
"valid_targets_min": 14851
|
|
},
|
|
{
|
|
"epoch": 2.771855010660981,
|
|
"grad_norm": 0.23104681337169866,
|
|
"learning_rate": 1.979195125875072e-05,
|
|
"loss": 1.0532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24804601073265076,
|
|
"step": 651,
|
|
"valid_targets_mean": 14425.4,
|
|
"valid_targets_min": 12081
|
|
},
|
|
{
|
|
"epoch": 2.7761194029850746,
|
|
"grad_norm": 0.27722695150572985,
|
|
"learning_rate": 1.9732511911897353e-05,
|
|
"loss": 1.0534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2992546558380127,
|
|
"step": 652,
|
|
"valid_targets_mean": 16109.8,
|
|
"valid_targets_min": 14956
|
|
},
|
|
{
|
|
"epoch": 2.7803837953091683,
|
|
"grad_norm": 0.25971725300390797,
|
|
"learning_rate": 1.9673074927990525e-05,
|
|
"loss": 1.054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2553447484970093,
|
|
"step": 653,
|
|
"valid_targets_mean": 12686.2,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 2.7846481876332625,
|
|
"grad_norm": 0.2537874516386417,
|
|
"learning_rate": 1.9613640832086957e-05,
|
|
"loss": 1.0174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2649880349636078,
|
|
"step": 654,
|
|
"valid_targets_mean": 16176.4,
|
|
"valid_targets_min": 15720
|
|
},
|
|
{
|
|
"epoch": 2.788912579957356,
|
|
"grad_norm": 0.26893498968806956,
|
|
"learning_rate": 1.9554210149217855e-05,
|
|
"loss": 1.0149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28672242164611816,
|
|
"step": 655,
|
|
"valid_targets_mean": 16022.2,
|
|
"valid_targets_min": 13473
|
|
},
|
|
{
|
|
"epoch": 2.79317697228145,
|
|
"grad_norm": 0.26631615615334103,
|
|
"learning_rate": 1.9494783404384265e-05,
|
|
"loss": 1.0538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20251776278018951,
|
|
"step": 656,
|
|
"valid_targets_mean": 9653.7,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 2.7974413646055436,
|
|
"grad_norm": 0.23092218311033125,
|
|
"learning_rate": 1.9435361122552437e-05,
|
|
"loss": 0.9846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24132248759269714,
|
|
"step": 657,
|
|
"valid_targets_mean": 16161.2,
|
|
"valid_targets_min": 14851
|
|
},
|
|
{
|
|
"epoch": 2.8017057569296373,
|
|
"grad_norm": 0.25735963443562043,
|
|
"learning_rate": 1.9375943828649215e-05,
|
|
"loss": 1.0606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2917920649051666,
|
|
"step": 658,
|
|
"valid_targets_mean": 16125.6,
|
|
"valid_targets_min": 14877
|
|
},
|
|
{
|
|
"epoch": 2.8059701492537314,
|
|
"grad_norm": 0.24947329490805698,
|
|
"learning_rate": 1.9316532047557378e-05,
|
|
"loss": 1.0235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1940438151359558,
|
|
"step": 659,
|
|
"valid_targets_mean": 11347.2,
|
|
"valid_targets_min": 7388
|
|
},
|
|
{
|
|
"epoch": 2.810234541577825,
|
|
"grad_norm": 0.22255033056333007,
|
|
"learning_rate": 1.9257126304110998e-05,
|
|
"loss": 1.031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27259165048599243,
|
|
"step": 660,
|
|
"valid_targets_mean": 16103.5,
|
|
"valid_targets_min": 15242
|
|
},
|
|
{
|
|
"epoch": 2.814498933901919,
|
|
"grad_norm": 0.23951809938555543,
|
|
"learning_rate": 1.919772712309081e-05,
|
|
"loss": 1.0209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29353398084640503,
|
|
"step": 661,
|
|
"valid_targets_mean": 16163.3,
|
|
"valid_targets_min": 15198
|
|
},
|
|
{
|
|
"epoch": 2.818763326226013,
|
|
"grad_norm": 0.21718664313423708,
|
|
"learning_rate": 1.9138335029219572e-05,
|
|
"loss": 1.0536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23224960267543793,
|
|
"step": 662,
|
|
"valid_targets_mean": 13117.1,
|
|
"valid_targets_min": 11573
|
|
},
|
|
{
|
|
"epoch": 2.8230277185501067,
|
|
"grad_norm": 0.2257844184470096,
|
|
"learning_rate": 1.9078950547157458e-05,
|
|
"loss": 1.0369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2831473648548126,
|
|
"step": 663,
|
|
"valid_targets_mean": 16066.2,
|
|
"valid_targets_min": 13927
|
|
},
|
|
{
|
|
"epoch": 2.8272921108742004,
|
|
"grad_norm": 0.22243980626383753,
|
|
"learning_rate": 1.9019574201497387e-05,
|
|
"loss": 1.0857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3331933617591858,
|
|
"step": 664,
|
|
"valid_targets_mean": 16098.2,
|
|
"valid_targets_min": 14646
|
|
},
|
|
{
|
|
"epoch": 2.831556503198294,
|
|
"grad_norm": 0.21657423948340665,
|
|
"learning_rate": 1.8960206516760396e-05,
|
|
"loss": 1.0492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25344744324684143,
|
|
"step": 665,
|
|
"valid_targets_mean": 15457.3,
|
|
"valid_targets_min": 14161
|
|
},
|
|
{
|
|
"epoch": 2.835820895522388,
|
|
"grad_norm": 0.21439810928341393,
|
|
"learning_rate": 1.890084801739102e-05,
|
|
"loss": 0.9945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2717779278755188,
|
|
"step": 666,
|
|
"valid_targets_mean": 16156.0,
|
|
"valid_targets_min": 15128
|
|
},
|
|
{
|
|
"epoch": 2.840085287846482,
|
|
"grad_norm": 0.2130122550921266,
|
|
"learning_rate": 1.884149922775265e-05,
|
|
"loss": 1.0345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21412064135074615,
|
|
"step": 667,
|
|
"valid_targets_mean": 11165.4,
|
|
"valid_targets_min": 2031
|
|
},
|
|
{
|
|
"epoch": 2.8443496801705757,
|
|
"grad_norm": 0.21128604300748366,
|
|
"learning_rate": 1.878216067212291e-05,
|
|
"loss": 1.0247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2540205121040344,
|
|
"step": 668,
|
|
"valid_targets_mean": 15901.6,
|
|
"valid_targets_min": 14526
|
|
},
|
|
{
|
|
"epoch": 2.8486140724946694,
|
|
"grad_norm": 0.2387833449847982,
|
|
"learning_rate": 1.8722832874689007e-05,
|
|
"loss": 1.0656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28927671909332275,
|
|
"step": 669,
|
|
"valid_targets_mean": 16190.7,
|
|
"valid_targets_min": 15714
|
|
},
|
|
{
|
|
"epoch": 2.8528784648187635,
|
|
"grad_norm": 0.23623343768116514,
|
|
"learning_rate": 1.8663516359543123e-05,
|
|
"loss": 1.0172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1471552848815918,
|
|
"step": 670,
|
|
"valid_targets_mean": 8172.2,
|
|
"valid_targets_min": 1215
|
|
},
|
|
{
|
|
"epoch": 2.857142857142857,
|
|
"grad_norm": 0.22674027962997848,
|
|
"learning_rate": 1.860421165067775e-05,
|
|
"loss": 1.0239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2460513412952423,
|
|
"step": 671,
|
|
"valid_targets_mean": 16181.2,
|
|
"valid_targets_min": 15380
|
|
},
|
|
{
|
|
"epoch": 2.861407249466951,
|
|
"grad_norm": 0.22114452151323957,
|
|
"learning_rate": 1.8544919271981125e-05,
|
|
"loss": 1.0617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31195616722106934,
|
|
"step": 672,
|
|
"valid_targets_mean": 16093.1,
|
|
"valid_targets_min": 14929
|
|
},
|
|
{
|
|
"epoch": 2.8656716417910446,
|
|
"grad_norm": 0.2136213592765994,
|
|
"learning_rate": 1.8485639747232535e-05,
|
|
"loss": 1.035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22793647646903992,
|
|
"step": 673,
|
|
"valid_targets_mean": 12507.3,
|
|
"valid_targets_min": 10158
|
|
},
|
|
{
|
|
"epoch": 2.8699360341151388,
|
|
"grad_norm": 0.23365397541891172,
|
|
"learning_rate": 1.8426373600097723e-05,
|
|
"loss": 1.1082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27648717164993286,
|
|
"step": 674,
|
|
"valid_targets_mean": 16078.8,
|
|
"valid_targets_min": 13788
|
|
},
|
|
{
|
|
"epoch": 2.8742004264392325,
|
|
"grad_norm": 0.22044626779400517,
|
|
"learning_rate": 1.836712135412424e-05,
|
|
"loss": 1.0315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2947613298892975,
|
|
"step": 675,
|
|
"valid_targets_mean": 16158.5,
|
|
"valid_targets_min": 14904
|
|
},
|
|
{
|
|
"epoch": 2.878464818763326,
|
|
"grad_norm": 0.24912478292173995,
|
|
"learning_rate": 1.8307883532736878e-05,
|
|
"loss": 1.0229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23116914927959442,
|
|
"step": 676,
|
|
"valid_targets_mean": 14858.4,
|
|
"valid_targets_min": 12939
|
|
},
|
|
{
|
|
"epoch": 2.88272921108742,
|
|
"grad_norm": 0.2030538010263503,
|
|
"learning_rate": 1.8248660659232964e-05,
|
|
"loss": 1.0148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29867780208587646,
|
|
"step": 677,
|
|
"valid_targets_mean": 16104.2,
|
|
"valid_targets_min": 15187
|
|
},
|
|
{
|
|
"epoch": 2.886993603411514,
|
|
"grad_norm": 0.22455801254559324,
|
|
"learning_rate": 1.8189453256777798e-05,
|
|
"loss": 1.0114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25471770763397217,
|
|
"step": 678,
|
|
"valid_targets_mean": 13330.7,
|
|
"valid_targets_min": 1100
|
|
},
|
|
{
|
|
"epoch": 2.8912579957356077,
|
|
"grad_norm": 0.22572819203043235,
|
|
"learning_rate": 1.8130261848399996e-05,
|
|
"loss": 1.0466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25024929642677307,
|
|
"step": 679,
|
|
"valid_targets_mean": 15772.3,
|
|
"valid_targets_min": 14001
|
|
},
|
|
{
|
|
"epoch": 2.8955223880597014,
|
|
"grad_norm": 0.2148928953258898,
|
|
"learning_rate": 1.8071086956986916e-05,
|
|
"loss": 1.0073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3032970726490021,
|
|
"step": 680,
|
|
"valid_targets_mean": 16085.4,
|
|
"valid_targets_min": 14479
|
|
},
|
|
{
|
|
"epoch": 2.8997867803837956,
|
|
"grad_norm": 0.23287112459611656,
|
|
"learning_rate": 1.8011929105279967e-05,
|
|
"loss": 1.041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1503039300441742,
|
|
"step": 681,
|
|
"valid_targets_mean": 8402.1,
|
|
"valid_targets_min": 1075
|
|
},
|
|
{
|
|
"epoch": 2.9040511727078893,
|
|
"grad_norm": 0.20159707915220337,
|
|
"learning_rate": 1.795278881587007e-05,
|
|
"loss": 1.0471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2602740526199341,
|
|
"step": 682,
|
|
"valid_targets_mean": 16153.5,
|
|
"valid_targets_min": 15636
|
|
},
|
|
{
|
|
"epoch": 2.908315565031983,
|
|
"grad_norm": 0.21093567241301997,
|
|
"learning_rate": 1.7893666611192962e-05,
|
|
"loss": 1.0346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2978820204734802,
|
|
"step": 683,
|
|
"valid_targets_mean": 16154.4,
|
|
"valid_targets_min": 15505
|
|
},
|
|
{
|
|
"epoch": 2.9125799573560767,
|
|
"grad_norm": 0.20731989692928762,
|
|
"learning_rate": 1.783456301352467e-05,
|
|
"loss": 1.0782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18348245322704315,
|
|
"step": 684,
|
|
"valid_targets_mean": 10106.0,
|
|
"valid_targets_min": 4326
|
|
},
|
|
{
|
|
"epoch": 2.9168443496801704,
|
|
"grad_norm": 0.20726205030517525,
|
|
"learning_rate": 1.7775478544976813e-05,
|
|
"loss": 1.0677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2854314148426056,
|
|
"step": 685,
|
|
"valid_targets_mean": 16062.8,
|
|
"valid_targets_min": 14365
|
|
},
|
|
{
|
|
"epoch": 2.9211087420042645,
|
|
"grad_norm": 0.21417839427489976,
|
|
"learning_rate": 1.7716413727492035e-05,
|
|
"loss": 1.0752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3212803900241852,
|
|
"step": 686,
|
|
"valid_targets_mean": 16094.8,
|
|
"valid_targets_min": 14920
|
|
},
|
|
{
|
|
"epoch": 2.925373134328358,
|
|
"grad_norm": 0.2098697663887528,
|
|
"learning_rate": 1.7657369082839392e-05,
|
|
"loss": 1.0722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2322990596294403,
|
|
"step": 687,
|
|
"valid_targets_mean": 13003.5,
|
|
"valid_targets_min": 10625
|
|
},
|
|
{
|
|
"epoch": 2.929637526652452,
|
|
"grad_norm": 0.22237164566610645,
|
|
"learning_rate": 1.7598345132609747e-05,
|
|
"loss": 1.0324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2856200933456421,
|
|
"step": 688,
|
|
"valid_targets_mean": 16220.6,
|
|
"valid_targets_min": 15582
|
|
},
|
|
{
|
|
"epoch": 2.933901918976546,
|
|
"grad_norm": 0.22324289372338676,
|
|
"learning_rate": 1.7539342398211132e-05,
|
|
"loss": 1.0593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3239976167678833,
|
|
"step": 689,
|
|
"valid_targets_mean": 16127.6,
|
|
"valid_targets_min": 15595
|
|
},
|
|
{
|
|
"epoch": 2.9381663113006398,
|
|
"grad_norm": 0.2260280034583233,
|
|
"learning_rate": 1.748036140086416e-05,
|
|
"loss": 1.0494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2603621482849121,
|
|
"step": 690,
|
|
"valid_targets_mean": 15390.4,
|
|
"valid_targets_min": 14171
|
|
},
|
|
{
|
|
"epoch": 2.9424307036247335,
|
|
"grad_norm": 0.22104793798071715,
|
|
"learning_rate": 1.742140266159744e-05,
|
|
"loss": 1.0286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2908846139907837,
|
|
"step": 691,
|
|
"valid_targets_mean": 16097.5,
|
|
"valid_targets_min": 15042
|
|
},
|
|
{
|
|
"epoch": 2.946695095948827,
|
|
"grad_norm": 0.2316562632020644,
|
|
"learning_rate": 1.7362466701242943e-05,
|
|
"loss": 1.0351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21313974261283875,
|
|
"step": 692,
|
|
"valid_targets_mean": 11418.3,
|
|
"valid_targets_min": 2707
|
|
},
|
|
{
|
|
"epoch": 2.950959488272921,
|
|
"grad_norm": 0.21042378519746668,
|
|
"learning_rate": 1.7303554040431426e-05,
|
|
"loss": 1.0486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2517317533493042,
|
|
"step": 693,
|
|
"valid_targets_mean": 16096.1,
|
|
"valid_targets_min": 13929
|
|
},
|
|
{
|
|
"epoch": 2.955223880597015,
|
|
"grad_norm": 0.2544599471399008,
|
|
"learning_rate": 1.7244665199587812e-05,
|
|
"loss": 1.0689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2812119126319885,
|
|
"step": 694,
|
|
"valid_targets_mean": 16215.6,
|
|
"valid_targets_min": 15674
|
|
},
|
|
{
|
|
"epoch": 2.9594882729211087,
|
|
"grad_norm": 0.2095821412385017,
|
|
"learning_rate": 1.7185800698926594e-05,
|
|
"loss": 1.0718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16977180540561676,
|
|
"step": 695,
|
|
"valid_targets_mean": 9167.3,
|
|
"valid_targets_min": 2370
|
|
},
|
|
{
|
|
"epoch": 2.9637526652452024,
|
|
"grad_norm": 0.20048483730316313,
|
|
"learning_rate": 1.7126961058447276e-05,
|
|
"loss": 1.0793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27764832973480225,
|
|
"step": 696,
|
|
"valid_targets_mean": 16013.3,
|
|
"valid_targets_min": 14420
|
|
},
|
|
{
|
|
"epoch": 2.9680170575692966,
|
|
"grad_norm": 0.2322609467170458,
|
|
"learning_rate": 1.706814679792973e-05,
|
|
"loss": 1.0605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27883732318878174,
|
|
"step": 697,
|
|
"valid_targets_mean": 16093.7,
|
|
"valid_targets_min": 13108
|
|
},
|
|
{
|
|
"epoch": 2.9722814498933903,
|
|
"grad_norm": 0.2237777591899605,
|
|
"learning_rate": 1.7009358436929632e-05,
|
|
"loss": 1.0599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22867128252983093,
|
|
"step": 698,
|
|
"valid_targets_mean": 12394.0,
|
|
"valid_targets_min": 9653
|
|
},
|
|
{
|
|
"epoch": 2.976545842217484,
|
|
"grad_norm": 0.22114776777868672,
|
|
"learning_rate": 1.6950596494773855e-05,
|
|
"loss": 1.091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3075253367424011,
|
|
"step": 699,
|
|
"valid_targets_mean": 16081.6,
|
|
"valid_targets_min": 14632
|
|
},
|
|
{
|
|
"epoch": 2.9808102345415777,
|
|
"grad_norm": 0.2152688913421649,
|
|
"learning_rate": 1.6891861490555906e-05,
|
|
"loss": 0.9983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29235392808914185,
|
|
"step": 700,
|
|
"valid_targets_mean": 16114.5,
|
|
"valid_targets_min": 15221
|
|
},
|
|
{
|
|
"epoch": 2.9850746268656714,
|
|
"grad_norm": 0.21852433363432214,
|
|
"learning_rate": 1.683315394313132e-05,
|
|
"loss": 1.0639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24894961714744568,
|
|
"step": 701,
|
|
"valid_targets_mean": 14577.7,
|
|
"valid_targets_min": 12869
|
|
},
|
|
{
|
|
"epoch": 2.9893390191897655,
|
|
"grad_norm": 0.24510299803151367,
|
|
"learning_rate": 1.677447437111309e-05,
|
|
"loss": 1.0082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2949763536453247,
|
|
"step": 702,
|
|
"valid_targets_mean": 16146.8,
|
|
"valid_targets_min": 15427
|
|
},
|
|
{
|
|
"epoch": 2.9936034115138592,
|
|
"grad_norm": 0.23612089030624425,
|
|
"learning_rate": 1.671582329286707e-05,
|
|
"loss": 1.0479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23207339644432068,
|
|
"step": 703,
|
|
"valid_targets_mean": 12671.8,
|
|
"valid_targets_min": 1334
|
|
},
|
|
{
|
|
"epoch": 2.997867803837953,
|
|
"grad_norm": 0.24541368708513386,
|
|
"learning_rate": 1.66572012265074e-05,
|
|
"loss": 1.0403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25882816314697266,
|
|
"step": 704,
|
|
"valid_targets_mean": 15686.2,
|
|
"valid_targets_min": 14947
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 0.2988024198484961,
|
|
"learning_rate": 1.6598608689891953e-05,
|
|
"loss": 1.003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47871822118759155,
|
|
"step": 705,
|
|
"valid_targets_mean": 11341.6,
|
|
"valid_targets_min": 3066
|
|
},
|
|
{
|
|
"epoch": 3.0042643923240937,
|
|
"grad_norm": 0.2266855675990286,
|
|
"learning_rate": 1.654004620061773e-05,
|
|
"loss": 1.0221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2473788857460022,
|
|
"step": 706,
|
|
"valid_targets_mean": 16072.4,
|
|
"valid_targets_min": 14698
|
|
},
|
|
{
|
|
"epoch": 3.008528784648188,
|
|
"grad_norm": 0.24397232042587513,
|
|
"learning_rate": 1.6481514276016297e-05,
|
|
"loss": 1.0196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.265364408493042,
|
|
"step": 707,
|
|
"valid_targets_mean": 16044.9,
|
|
"valid_targets_min": 11563
|
|
},
|
|
{
|
|
"epoch": 3.0127931769722816,
|
|
"grad_norm": 0.21774523167351617,
|
|
"learning_rate": 1.6423013433149207e-05,
|
|
"loss": 1.0046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21565969288349152,
|
|
"step": 708,
|
|
"valid_targets_mean": 13379.7,
|
|
"valid_targets_min": 10328
|
|
},
|
|
{
|
|
"epoch": 3.0170575692963753,
|
|
"grad_norm": 0.22723363387040352,
|
|
"learning_rate": 1.636454418880347e-05,
|
|
"loss": 1.0165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2604562044143677,
|
|
"step": 709,
|
|
"valid_targets_mean": 16162.3,
|
|
"valid_targets_min": 15122
|
|
},
|
|
{
|
|
"epoch": 3.021321961620469,
|
|
"grad_norm": 0.23087794160849823,
|
|
"learning_rate": 1.630610705948693e-05,
|
|
"loss": 1.0108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28909438848495483,
|
|
"step": 710,
|
|
"valid_targets_mean": 16157.8,
|
|
"valid_targets_min": 15578
|
|
},
|
|
{
|
|
"epoch": 3.025586353944563,
|
|
"grad_norm": 0.2278612049391398,
|
|
"learning_rate": 1.6247702561423753e-05,
|
|
"loss": 1.0195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22586047649383545,
|
|
"step": 711,
|
|
"valid_targets_mean": 13078.1,
|
|
"valid_targets_min": 11469
|
|
},
|
|
{
|
|
"epoch": 3.029850746268657,
|
|
"grad_norm": 0.24001780421330396,
|
|
"learning_rate": 1.6189331210549828e-05,
|
|
"loss": 1.0564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30949312448501587,
|
|
"step": 712,
|
|
"valid_targets_mean": 16113.1,
|
|
"valid_targets_min": 15270
|
|
},
|
|
{
|
|
"epoch": 3.0341151385927505,
|
|
"grad_norm": 0.2436155708375844,
|
|
"learning_rate": 1.613099352250825e-05,
|
|
"loss": 1.0506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25395599007606506,
|
|
"step": 713,
|
|
"valid_targets_mean": 13016.7,
|
|
"valid_targets_min": 1369
|
|
},
|
|
{
|
|
"epoch": 3.038379530916844,
|
|
"grad_norm": 0.26118984540755125,
|
|
"learning_rate": 1.6072690012644717e-05,
|
|
"loss": 1.0233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2473236322402954,
|
|
"step": 714,
|
|
"valid_targets_mean": 16104.4,
|
|
"valid_targets_min": 14551
|
|
},
|
|
{
|
|
"epoch": 3.0426439232409384,
|
|
"grad_norm": 0.2224532421613924,
|
|
"learning_rate": 1.6014421196003022e-05,
|
|
"loss": 0.9958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2721056044101715,
|
|
"step": 715,
|
|
"valid_targets_mean": 16228.6,
|
|
"valid_targets_min": 15711
|
|
},
|
|
{
|
|
"epoch": 3.046908315565032,
|
|
"grad_norm": 0.23300941273161108,
|
|
"learning_rate": 1.5956187587320468e-05,
|
|
"loss": 1.0163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1803487092256546,
|
|
"step": 716,
|
|
"valid_targets_mean": 10309.6,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 3.0511727078891258,
|
|
"grad_norm": 0.23875698882674556,
|
|
"learning_rate": 1.5897989701023355e-05,
|
|
"loss": 1.0308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2667001485824585,
|
|
"step": 717,
|
|
"valid_targets_mean": 16109.7,
|
|
"valid_targets_min": 14769
|
|
},
|
|
{
|
|
"epoch": 3.0554371002132195,
|
|
"grad_norm": 0.22441155544330002,
|
|
"learning_rate": 1.58398280512224e-05,
|
|
"loss": 1.0402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2941433787345886,
|
|
"step": 718,
|
|
"valid_targets_mean": 15781.9,
|
|
"valid_targets_min": 4778
|
|
},
|
|
{
|
|
"epoch": 3.0597014925373136,
|
|
"grad_norm": 0.22787547247463202,
|
|
"learning_rate": 1.5781703151708215e-05,
|
|
"loss": 1.0194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18810631334781647,
|
|
"step": 719,
|
|
"valid_targets_mean": 10639.8,
|
|
"valid_targets_min": 4800
|
|
},
|
|
{
|
|
"epoch": 3.0639658848614073,
|
|
"grad_norm": 0.2347582147376603,
|
|
"learning_rate": 1.5723615515946773e-05,
|
|
"loss": 1.0522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2743699550628662,
|
|
"step": 720,
|
|
"valid_targets_mean": 16126.2,
|
|
"valid_targets_min": 15410
|
|
},
|
|
{
|
|
"epoch": 3.068230277185501,
|
|
"grad_norm": 0.2396019554271381,
|
|
"learning_rate": 1.5665565657074874e-05,
|
|
"loss": 1.0562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3132514953613281,
|
|
"step": 721,
|
|
"valid_targets_mean": 16086.5,
|
|
"valid_targets_min": 14307
|
|
},
|
|
{
|
|
"epoch": 3.0724946695095947,
|
|
"grad_norm": 0.23540444130078883,
|
|
"learning_rate": 1.560755408789558e-05,
|
|
"loss": 1.0111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2244694083929062,
|
|
"step": 722,
|
|
"valid_targets_mean": 14448.2,
|
|
"valid_targets_min": 12279
|
|
},
|
|
{
|
|
"epoch": 3.076759061833689,
|
|
"grad_norm": 0.21770435242774863,
|
|
"learning_rate": 1.5549581320873715e-05,
|
|
"loss": 1.0388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28377026319503784,
|
|
"step": 723,
|
|
"valid_targets_mean": 16100.5,
|
|
"valid_targets_min": 14460
|
|
},
|
|
{
|
|
"epoch": 3.0810234541577826,
|
|
"grad_norm": 0.23265730631315767,
|
|
"learning_rate": 1.5491647868131343e-05,
|
|
"loss": 1.0703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2843765616416931,
|
|
"step": 724,
|
|
"valid_targets_mean": 16215.7,
|
|
"valid_targets_min": 15372
|
|
},
|
|
{
|
|
"epoch": 3.0852878464818763,
|
|
"grad_norm": 0.2194538754887101,
|
|
"learning_rate": 1.5433754241443223e-05,
|
|
"loss": 1.0092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23863404989242554,
|
|
"step": 725,
|
|
"valid_targets_mean": 15597.2,
|
|
"valid_targets_min": 13274
|
|
},
|
|
{
|
|
"epoch": 3.08955223880597,
|
|
"grad_norm": 0.23074129167573834,
|
|
"learning_rate": 1.53759009522323e-05,
|
|
"loss": 1.0145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26802095770835876,
|
|
"step": 726,
|
|
"valid_targets_mean": 16214.7,
|
|
"valid_targets_min": 15574
|
|
},
|
|
{
|
|
"epoch": 3.093816631130064,
|
|
"grad_norm": 0.2522402268059096,
|
|
"learning_rate": 1.5318088511565185e-05,
|
|
"loss": 0.9812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2044985443353653,
|
|
"step": 727,
|
|
"valid_targets_mean": 11884.8,
|
|
"valid_targets_min": 1898
|
|
},
|
|
{
|
|
"epoch": 3.098081023454158,
|
|
"grad_norm": 0.23790237363419217,
|
|
"learning_rate": 1.5260317430147627e-05,
|
|
"loss": 0.9949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2379896193742752,
|
|
"step": 728,
|
|
"valid_targets_mean": 15739.9,
|
|
"valid_targets_min": 13414
|
|
},
|
|
{
|
|
"epoch": 3.1023454157782515,
|
|
"grad_norm": 0.24412181927245838,
|
|
"learning_rate": 1.5202588218320024e-05,
|
|
"loss": 1.0149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2855202853679657,
|
|
"step": 729,
|
|
"valid_targets_mean": 16185.2,
|
|
"valid_targets_min": 15222
|
|
},
|
|
{
|
|
"epoch": 3.106609808102345,
|
|
"grad_norm": 0.24245536165586512,
|
|
"learning_rate": 1.5144901386052924e-05,
|
|
"loss": 0.985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14238911867141724,
|
|
"step": 730,
|
|
"valid_targets_mean": 8392.7,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 3.1108742004264394,
|
|
"grad_norm": 0.22716508602160268,
|
|
"learning_rate": 1.5087257442942467e-05,
|
|
"loss": 1.0369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2581910789012909,
|
|
"step": 731,
|
|
"valid_targets_mean": 16164.5,
|
|
"valid_targets_min": 15163
|
|
},
|
|
{
|
|
"epoch": 3.115138592750533,
|
|
"grad_norm": 0.22496683546445154,
|
|
"learning_rate": 1.502965689820593e-05,
|
|
"loss": 1.0127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.293232798576355,
|
|
"step": 732,
|
|
"valid_targets_mean": 16121.2,
|
|
"valid_targets_min": 15579
|
|
},
|
|
{
|
|
"epoch": 3.1194029850746268,
|
|
"grad_norm": 0.20609639755204723,
|
|
"learning_rate": 1.4972100260677222e-05,
|
|
"loss": 1.0679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20337709784507751,
|
|
"step": 733,
|
|
"valid_targets_mean": 12018.5,
|
|
"valid_targets_min": 6953
|
|
},
|
|
{
|
|
"epoch": 3.1236673773987205,
|
|
"grad_norm": 0.20993276078800788,
|
|
"learning_rate": 1.4914588038802383e-05,
|
|
"loss": 1.0256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.284699022769928,
|
|
"step": 734,
|
|
"valid_targets_mean": 16149.8,
|
|
"valid_targets_min": 15222
|
|
},
|
|
{
|
|
"epoch": 3.1279317697228146,
|
|
"grad_norm": 0.22073555730795605,
|
|
"learning_rate": 1.4857120740635084e-05,
|
|
"loss": 1.0357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3081209063529968,
|
|
"step": 735,
|
|
"valid_targets_mean": 16092.7,
|
|
"valid_targets_min": 14058
|
|
},
|
|
{
|
|
"epoch": 3.1321961620469083,
|
|
"grad_norm": 0.2118749400777254,
|
|
"learning_rate": 1.4799698873832153e-05,
|
|
"loss": 1.0533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25038349628448486,
|
|
"step": 736,
|
|
"valid_targets_mean": 14434.2,
|
|
"valid_targets_min": 12077
|
|
},
|
|
{
|
|
"epoch": 3.136460554371002,
|
|
"grad_norm": 0.21808191893465817,
|
|
"learning_rate": 1.4742322945649073e-05,
|
|
"loss": 0.9979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2898986339569092,
|
|
"step": 737,
|
|
"valid_targets_mean": 16217.7,
|
|
"valid_targets_min": 15808
|
|
},
|
|
{
|
|
"epoch": 3.140724946695096,
|
|
"grad_norm": 0.21834155157389645,
|
|
"learning_rate": 1.4684993462935532e-05,
|
|
"loss": 0.9881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2285723090171814,
|
|
"step": 738,
|
|
"valid_targets_mean": 13375.1,
|
|
"valid_targets_min": 1759
|
|
},
|
|
{
|
|
"epoch": 3.14498933901919,
|
|
"grad_norm": 0.21066829367355328,
|
|
"learning_rate": 1.462771093213092e-05,
|
|
"loss": 1.0113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2540314197540283,
|
|
"step": 739,
|
|
"valid_targets_mean": 15798.1,
|
|
"valid_targets_min": 13840
|
|
},
|
|
{
|
|
"epoch": 3.1492537313432836,
|
|
"grad_norm": 0.22388010721759177,
|
|
"learning_rate": 1.4570475859259856e-05,
|
|
"loss": 0.9978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2815060615539551,
|
|
"step": 740,
|
|
"valid_targets_mean": 16178.0,
|
|
"valid_targets_min": 15576
|
|
},
|
|
{
|
|
"epoch": 3.1535181236673773,
|
|
"grad_norm": 0.25907589389289265,
|
|
"learning_rate": 1.4513288749927714e-05,
|
|
"loss": 0.9978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1643589735031128,
|
|
"step": 741,
|
|
"valid_targets_mean": 9426.2,
|
|
"valid_targets_min": 2154
|
|
},
|
|
{
|
|
"epoch": 3.1577825159914714,
|
|
"grad_norm": 0.20641498134376396,
|
|
"learning_rate": 1.4456150109316192e-05,
|
|
"loss": 1.024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26102137565612793,
|
|
"step": 742,
|
|
"valid_targets_mean": 16006.3,
|
|
"valid_targets_min": 14473
|
|
},
|
|
{
|
|
"epoch": 3.162046908315565,
|
|
"grad_norm": 0.240935797241021,
|
|
"learning_rate": 1.4399060442178798e-05,
|
|
"loss": 0.947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2881847620010376,
|
|
"step": 743,
|
|
"valid_targets_mean": 16178.9,
|
|
"valid_targets_min": 15476
|
|
},
|
|
{
|
|
"epoch": 3.166311300639659,
|
|
"grad_norm": 0.22276458014826844,
|
|
"learning_rate": 1.4342020252836437e-05,
|
|
"loss": 1.0389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19214186072349548,
|
|
"step": 744,
|
|
"valid_targets_mean": 10538.3,
|
|
"valid_targets_min": 6177
|
|
},
|
|
{
|
|
"epoch": 3.1705756929637525,
|
|
"grad_norm": 0.21187980596465947,
|
|
"learning_rate": 1.4285030045172913e-05,
|
|
"loss": 1.0197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26047828793525696,
|
|
"step": 745,
|
|
"valid_targets_mean": 16127.6,
|
|
"valid_targets_min": 14124
|
|
},
|
|
{
|
|
"epoch": 3.1748400852878467,
|
|
"grad_norm": 0.21632618709500964,
|
|
"learning_rate": 1.422809032263052e-05,
|
|
"loss": 1.0468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2817734181880951,
|
|
"step": 746,
|
|
"valid_targets_mean": 16165.9,
|
|
"valid_targets_min": 15366
|
|
},
|
|
{
|
|
"epoch": 3.1791044776119404,
|
|
"grad_norm": 0.19992786613248376,
|
|
"learning_rate": 1.4171201588205566e-05,
|
|
"loss": 1.0547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24404045939445496,
|
|
"step": 747,
|
|
"valid_targets_mean": 13604.9,
|
|
"valid_targets_min": 10519
|
|
},
|
|
{
|
|
"epoch": 3.183368869936034,
|
|
"grad_norm": 0.20249665386988036,
|
|
"learning_rate": 1.4114364344443935e-05,
|
|
"loss": 0.9756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.264335572719574,
|
|
"step": 748,
|
|
"valid_targets_mean": 16148.0,
|
|
"valid_targets_min": 13688
|
|
},
|
|
{
|
|
"epoch": 3.1876332622601278,
|
|
"grad_norm": 0.2125162350655941,
|
|
"learning_rate": 1.4057579093436653e-05,
|
|
"loss": 1.0189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.296421617269516,
|
|
"step": 749,
|
|
"valid_targets_mean": 16185.6,
|
|
"valid_targets_min": 15616
|
|
},
|
|
{
|
|
"epoch": 3.191897654584222,
|
|
"grad_norm": 0.20187139404961882,
|
|
"learning_rate": 1.400084633681546e-05,
|
|
"loss": 1.037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25970757007598877,
|
|
"step": 750,
|
|
"valid_targets_mean": 15437.9,
|
|
"valid_targets_min": 13886
|
|
},
|
|
{
|
|
"epoch": 3.1961620469083156,
|
|
"grad_norm": 0.1933929537100215,
|
|
"learning_rate": 1.3944166575748355e-05,
|
|
"loss": 0.9745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2798892855644226,
|
|
"step": 751,
|
|
"valid_targets_mean": 16108.0,
|
|
"valid_targets_min": 15148
|
|
},
|
|
{
|
|
"epoch": 3.2004264392324093,
|
|
"grad_norm": 0.24288889012927542,
|
|
"learning_rate": 1.3887540310935187e-05,
|
|
"loss": 1.0267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1941322237253189,
|
|
"step": 752,
|
|
"valid_targets_mean": 10666.6,
|
|
"valid_targets_min": 1758
|
|
},
|
|
{
|
|
"epoch": 3.204690831556503,
|
|
"grad_norm": 0.19913062025676398,
|
|
"learning_rate": 1.3830968042603226e-05,
|
|
"loss": 1.0082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26480650901794434,
|
|
"step": 753,
|
|
"valid_targets_mean": 15740.0,
|
|
"valid_targets_min": 14420
|
|
},
|
|
{
|
|
"epoch": 3.208955223880597,
|
|
"grad_norm": 0.2455636710309093,
|
|
"learning_rate": 1.3774450270502762e-05,
|
|
"loss": 0.9949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31609392166137695,
|
|
"step": 754,
|
|
"valid_targets_mean": 16115.1,
|
|
"valid_targets_min": 14966
|
|
},
|
|
{
|
|
"epoch": 3.213219616204691,
|
|
"grad_norm": 0.2118354645940954,
|
|
"learning_rate": 1.3717987493902656e-05,
|
|
"loss": 1.0205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14785653352737427,
|
|
"step": 755,
|
|
"valid_targets_mean": 8256.4,
|
|
"valid_targets_min": 1725
|
|
},
|
|
{
|
|
"epoch": 3.2174840085287846,
|
|
"grad_norm": 0.2430534376385067,
|
|
"learning_rate": 1.3661580211585947e-05,
|
|
"loss": 1.0203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25054389238357544,
|
|
"step": 756,
|
|
"valid_targets_mean": 16184.0,
|
|
"valid_targets_min": 14698
|
|
},
|
|
{
|
|
"epoch": 3.2217484008528783,
|
|
"grad_norm": 0.24162088545510896,
|
|
"learning_rate": 1.3605228921845457e-05,
|
|
"loss": 1.0206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30578795075416565,
|
|
"step": 757,
|
|
"valid_targets_mean": 16165.3,
|
|
"valid_targets_min": 15239
|
|
},
|
|
{
|
|
"epoch": 3.2260127931769724,
|
|
"grad_norm": 0.2240326703391185,
|
|
"learning_rate": 1.3548934122479373e-05,
|
|
"loss": 1.0558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20352651178836823,
|
|
"step": 758,
|
|
"valid_targets_mean": 11775.9,
|
|
"valid_targets_min": 8991
|
|
},
|
|
{
|
|
"epoch": 3.230277185501066,
|
|
"grad_norm": 0.2410022141296356,
|
|
"learning_rate": 1.349269631078686e-05,
|
|
"loss": 1.0173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2741716504096985,
|
|
"step": 759,
|
|
"valid_targets_mean": 16214.9,
|
|
"valid_targets_min": 15357
|
|
},
|
|
{
|
|
"epoch": 3.23454157782516,
|
|
"grad_norm": 0.22791667668633342,
|
|
"learning_rate": 1.3436515983563659e-05,
|
|
"loss": 1.0685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30892500281333923,
|
|
"step": 760,
|
|
"valid_targets_mean": 16046.1,
|
|
"valid_targets_min": 15157
|
|
},
|
|
{
|
|
"epoch": 3.2388059701492535,
|
|
"grad_norm": 0.22418104434533087,
|
|
"learning_rate": 1.3380393637097692e-05,
|
|
"loss": 0.9868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24890297651290894,
|
|
"step": 761,
|
|
"valid_targets_mean": 14222.7,
|
|
"valid_targets_min": 12022
|
|
},
|
|
{
|
|
"epoch": 3.2430703624733477,
|
|
"grad_norm": 0.22364469895511957,
|
|
"learning_rate": 1.3324329767164708e-05,
|
|
"loss": 1.0181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2839733958244324,
|
|
"step": 762,
|
|
"valid_targets_mean": 16097.0,
|
|
"valid_targets_min": 13888
|
|
},
|
|
{
|
|
"epoch": 3.2473347547974414,
|
|
"grad_norm": 0.2138675118579394,
|
|
"learning_rate": 1.3268324869023878e-05,
|
|
"loss": 0.9988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2439764440059662,
|
|
"step": 763,
|
|
"valid_targets_mean": 13022.2,
|
|
"valid_targets_min": 1535
|
|
},
|
|
{
|
|
"epoch": 3.251599147121535,
|
|
"grad_norm": 0.20025910558984245,
|
|
"learning_rate": 1.3212379437413421e-05,
|
|
"loss": 1.0638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.266014039516449,
|
|
"step": 764,
|
|
"valid_targets_mean": 15528.1,
|
|
"valid_targets_min": 14452
|
|
},
|
|
{
|
|
"epoch": 3.2558635394456292,
|
|
"grad_norm": 0.21667010718081733,
|
|
"learning_rate": 1.3156493966546236e-05,
|
|
"loss": 1.0167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28856518864631653,
|
|
"step": 765,
|
|
"valid_targets_mean": 16141.7,
|
|
"valid_targets_min": 14987
|
|
},
|
|
{
|
|
"epoch": 3.260127931769723,
|
|
"grad_norm": 0.2185113869050033,
|
|
"learning_rate": 1.3100668950105534e-05,
|
|
"loss": 1.0809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19959990680217743,
|
|
"step": 766,
|
|
"valid_targets_mean": 10493.4,
|
|
"valid_targets_min": 2195
|
|
},
|
|
{
|
|
"epoch": 3.2643923240938166,
|
|
"grad_norm": 0.19663806577012186,
|
|
"learning_rate": 1.3044904881240507e-05,
|
|
"loss": 1.0014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25376349687576294,
|
|
"step": 767,
|
|
"valid_targets_mean": 16092.9,
|
|
"valid_targets_min": 15480
|
|
},
|
|
{
|
|
"epoch": 3.2686567164179103,
|
|
"grad_norm": 0.2194771759902532,
|
|
"learning_rate": 1.2989202252561926e-05,
|
|
"loss": 1.0207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31395870447158813,
|
|
"step": 768,
|
|
"valid_targets_mean": 16164.0,
|
|
"valid_targets_min": 15618
|
|
},
|
|
{
|
|
"epoch": 3.272921108742004,
|
|
"grad_norm": 0.21227372654419208,
|
|
"learning_rate": 1.2933561556137806e-05,
|
|
"loss": 0.9937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20684891939163208,
|
|
"step": 769,
|
|
"valid_targets_mean": 11801.2,
|
|
"valid_targets_min": 9058
|
|
},
|
|
{
|
|
"epoch": 3.277185501066098,
|
|
"grad_norm": 0.2304324486379456,
|
|
"learning_rate": 1.2877983283489062e-05,
|
|
"loss": 1.0106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2633814513683319,
|
|
"step": 770,
|
|
"valid_targets_mean": 16097.4,
|
|
"valid_targets_min": 14732
|
|
},
|
|
{
|
|
"epoch": 3.281449893390192,
|
|
"grad_norm": 0.20527170635041583,
|
|
"learning_rate": 1.2822467925585186e-05,
|
|
"loss": 1.0257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2899390757083893,
|
|
"step": 771,
|
|
"valid_targets_mean": 16153.3,
|
|
"valid_targets_min": 15217
|
|
},
|
|
{
|
|
"epoch": 3.2857142857142856,
|
|
"grad_norm": 0.20913612528231754,
|
|
"learning_rate": 1.2767015972839879e-05,
|
|
"loss": 0.9893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21578413248062134,
|
|
"step": 772,
|
|
"valid_targets_mean": 13491.8,
|
|
"valid_targets_min": 11383
|
|
},
|
|
{
|
|
"epoch": 3.2899786780383797,
|
|
"grad_norm": 0.19422581490469495,
|
|
"learning_rate": 1.2711627915106728e-05,
|
|
"loss": 1.0052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2850065231323242,
|
|
"step": 773,
|
|
"valid_targets_mean": 16005.6,
|
|
"valid_targets_min": 9286
|
|
},
|
|
{
|
|
"epoch": 3.2942430703624734,
|
|
"grad_norm": 0.2108346113520723,
|
|
"learning_rate": 1.2656304241674877e-05,
|
|
"loss": 0.9973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30305132269859314,
|
|
"step": 774,
|
|
"valid_targets_mean": 16154.3,
|
|
"valid_targets_min": 15623
|
|
},
|
|
{
|
|
"epoch": 3.298507462686567,
|
|
"grad_norm": 0.18490596176842927,
|
|
"learning_rate": 1.2601045441264734e-05,
|
|
"loss": 0.9799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24768772721290588,
|
|
"step": 775,
|
|
"valid_targets_mean": 15966.1,
|
|
"valid_targets_min": 14649
|
|
},
|
|
{
|
|
"epoch": 3.302771855010661,
|
|
"grad_norm": 0.2065317895797339,
|
|
"learning_rate": 1.2545852002023599e-05,
|
|
"loss": 0.9942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30079466104507446,
|
|
"step": 776,
|
|
"valid_targets_mean": 16077.0,
|
|
"valid_targets_min": 14795
|
|
},
|
|
{
|
|
"epoch": 3.307036247334755,
|
|
"grad_norm": 0.23824529055789273,
|
|
"learning_rate": 1.2490724411521406e-05,
|
|
"loss": 1.0184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20452925562858582,
|
|
"step": 777,
|
|
"valid_targets_mean": 11349.3,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 3.3113006396588487,
|
|
"grad_norm": 0.21648887677200584,
|
|
"learning_rate": 1.243566315674637e-05,
|
|
"loss": 1.0384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2684295177459717,
|
|
"step": 778,
|
|
"valid_targets_mean": 15895.2,
|
|
"valid_targets_min": 14853
|
|
},
|
|
{
|
|
"epoch": 3.3155650319829424,
|
|
"grad_norm": 0.21679968258683346,
|
|
"learning_rate": 1.238066872410073e-05,
|
|
"loss": 0.9662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29563671350479126,
|
|
"step": 779,
|
|
"valid_targets_mean": 16154.4,
|
|
"valid_targets_min": 15042
|
|
},
|
|
{
|
|
"epoch": 3.319829424307036,
|
|
"grad_norm": 0.22116485254505697,
|
|
"learning_rate": 1.2325741599396418e-05,
|
|
"loss": 0.9854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13217319548130035,
|
|
"step": 780,
|
|
"valid_targets_mean": 7354.0,
|
|
"valid_targets_min": 2380
|
|
},
|
|
{
|
|
"epoch": 3.3240938166311302,
|
|
"grad_norm": 0.20648000330195984,
|
|
"learning_rate": 1.2270882267850765e-05,
|
|
"loss": 1.0674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2683570981025696,
|
|
"step": 781,
|
|
"valid_targets_mean": 16123.9,
|
|
"valid_targets_min": 15331
|
|
},
|
|
{
|
|
"epoch": 3.328358208955224,
|
|
"grad_norm": 0.21007057028989912,
|
|
"learning_rate": 1.2216091214082248e-05,
|
|
"loss": 1.0288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28703442215919495,
|
|
"step": 782,
|
|
"valid_targets_mean": 16087.6,
|
|
"valid_targets_min": 13835
|
|
},
|
|
{
|
|
"epoch": 3.3326226012793176,
|
|
"grad_norm": 0.2157248680996307,
|
|
"learning_rate": 1.2161368922106192e-05,
|
|
"loss": 1.0276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1911306381225586,
|
|
"step": 783,
|
|
"valid_targets_mean": 11460.7,
|
|
"valid_targets_min": 8049
|
|
},
|
|
{
|
|
"epoch": 3.3368869936034113,
|
|
"grad_norm": 0.21780108169899068,
|
|
"learning_rate": 1.2106715875330475e-05,
|
|
"loss": 1.0501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27310866117477417,
|
|
"step": 784,
|
|
"valid_targets_mean": 16209.4,
|
|
"valid_targets_min": 15350
|
|
},
|
|
{
|
|
"epoch": 3.3411513859275055,
|
|
"grad_norm": 0.21280209729977745,
|
|
"learning_rate": 1.2052132556551275e-05,
|
|
"loss": 1.0367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2992352247238159,
|
|
"step": 785,
|
|
"valid_targets_mean": 16167.2,
|
|
"valid_targets_min": 15730
|
|
},
|
|
{
|
|
"epoch": 3.345415778251599,
|
|
"grad_norm": 0.21333591743615385,
|
|
"learning_rate": 1.1997619447948814e-05,
|
|
"loss": 1.0698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24031849205493927,
|
|
"step": 786,
|
|
"valid_targets_mean": 13789.7,
|
|
"valid_targets_min": 11708
|
|
},
|
|
{
|
|
"epoch": 3.349680170575693,
|
|
"grad_norm": 0.20205112261373354,
|
|
"learning_rate": 1.1943177031083094e-05,
|
|
"loss": 1.0053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29222190380096436,
|
|
"step": 787,
|
|
"valid_targets_mean": 16186.4,
|
|
"valid_targets_min": 14810
|
|
},
|
|
{
|
|
"epoch": 3.3539445628997866,
|
|
"grad_norm": 0.20941481525377442,
|
|
"learning_rate": 1.1888805786889621e-05,
|
|
"loss": 1.0411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2621380388736725,
|
|
"step": 788,
|
|
"valid_targets_mean": 13417.4,
|
|
"valid_targets_min": 3667
|
|
},
|
|
{
|
|
"epoch": 3.3582089552238807,
|
|
"grad_norm": 0.2004826920475616,
|
|
"learning_rate": 1.183450619567518e-05,
|
|
"loss": 1.0038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25708091259002686,
|
|
"step": 789,
|
|
"valid_targets_mean": 15748.0,
|
|
"valid_targets_min": 13975
|
|
},
|
|
{
|
|
"epoch": 3.3624733475479744,
|
|
"grad_norm": 0.18353874472333903,
|
|
"learning_rate": 1.1780278737113581e-05,
|
|
"loss": 1.0402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2851468622684479,
|
|
"step": 790,
|
|
"valid_targets_mean": 15791.8,
|
|
"valid_targets_min": 5994
|
|
},
|
|
{
|
|
"epoch": 3.366737739872068,
|
|
"grad_norm": 0.1936685158973221,
|
|
"learning_rate": 1.1726123890241439e-05,
|
|
"loss": 0.9975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17022772133350372,
|
|
"step": 791,
|
|
"valid_targets_mean": 9803.9,
|
|
"valid_targets_min": 1575
|
|
},
|
|
{
|
|
"epoch": 3.3710021321961623,
|
|
"grad_norm": 0.1836632139660297,
|
|
"learning_rate": 1.1672042133453925e-05,
|
|
"loss": 1.0138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24327097833156586,
|
|
"step": 792,
|
|
"valid_targets_mean": 16135.9,
|
|
"valid_targets_min": 15270
|
|
},
|
|
{
|
|
"epoch": 3.375266524520256,
|
|
"grad_norm": 0.19606375449703972,
|
|
"learning_rate": 1.1618033944500527e-05,
|
|
"loss": 0.9982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32109493017196655,
|
|
"step": 793,
|
|
"valid_targets_mean": 16139.2,
|
|
"valid_targets_min": 15361
|
|
},
|
|
{
|
|
"epoch": 3.3795309168443497,
|
|
"grad_norm": 0.20067880131394983,
|
|
"learning_rate": 1.1564099800480864e-05,
|
|
"loss": 1.048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19777941703796387,
|
|
"step": 794,
|
|
"valid_targets_mean": 11481.5,
|
|
"valid_targets_min": 6068
|
|
},
|
|
{
|
|
"epoch": 3.3837953091684434,
|
|
"grad_norm": 0.20515621052947725,
|
|
"learning_rate": 1.151024017784045e-05,
|
|
"loss": 1.0054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24627156555652618,
|
|
"step": 795,
|
|
"valid_targets_mean": 16202.7,
|
|
"valid_targets_min": 15135
|
|
},
|
|
{
|
|
"epoch": 3.388059701492537,
|
|
"grad_norm": 0.2178803359427102,
|
|
"learning_rate": 1.1456455552366488e-05,
|
|
"loss": 1.043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31557852029800415,
|
|
"step": 796,
|
|
"valid_targets_mean": 16132.7,
|
|
"valid_targets_min": 14783
|
|
},
|
|
{
|
|
"epoch": 3.3923240938166312,
|
|
"grad_norm": 0.18350168282278012,
|
|
"learning_rate": 1.1402746399183671e-05,
|
|
"loss": 1.0137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24853818118572235,
|
|
"step": 797,
|
|
"valid_targets_mean": 13810.4,
|
|
"valid_targets_min": 11194
|
|
},
|
|
{
|
|
"epoch": 3.396588486140725,
|
|
"grad_norm": 0.19600293746108755,
|
|
"learning_rate": 1.1349113192749986e-05,
|
|
"loss": 1.0325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2772599160671234,
|
|
"step": 798,
|
|
"valid_targets_mean": 16136.4,
|
|
"valid_targets_min": 13927
|
|
},
|
|
{
|
|
"epoch": 3.4008528784648187,
|
|
"grad_norm": 0.20306513004117993,
|
|
"learning_rate": 1.1295556406852488e-05,
|
|
"loss": 1.0258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29193854331970215,
|
|
"step": 799,
|
|
"valid_targets_mean": 16118.9,
|
|
"valid_targets_min": 15147
|
|
},
|
|
{
|
|
"epoch": 3.405117270788913,
|
|
"grad_norm": 0.1789680337786877,
|
|
"learning_rate": 1.1242076514603201e-05,
|
|
"loss": 1.0816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25641292333602905,
|
|
"step": 800,
|
|
"valid_targets_mean": 14175.8,
|
|
"valid_targets_min": 12875
|
|
},
|
|
{
|
|
"epoch": 3.4093816631130065,
|
|
"grad_norm": 0.21327911409893333,
|
|
"learning_rate": 1.1188673988434831e-05,
|
|
"loss": 1.04,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3180328607559204,
|
|
"step": 801,
|
|
"valid_targets_mean": 16137.8,
|
|
"valid_targets_min": 15389
|
|
},
|
|
{
|
|
"epoch": 3.4136460554371,
|
|
"grad_norm": 0.18702688074220328,
|
|
"learning_rate": 1.1135349300096667e-05,
|
|
"loss": 1.0053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20022016763687134,
|
|
"step": 802,
|
|
"valid_targets_mean": 10829.2,
|
|
"valid_targets_min": 1238
|
|
},
|
|
{
|
|
"epoch": 3.417910447761194,
|
|
"grad_norm": 0.20021405336513426,
|
|
"learning_rate": 1.1082102920650397e-05,
|
|
"loss": 1.0103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2536824345588684,
|
|
"step": 803,
|
|
"valid_targets_mean": 16158.3,
|
|
"valid_targets_min": 15109
|
|
},
|
|
{
|
|
"epoch": 3.4221748400852876,
|
|
"grad_norm": 0.2343082045165571,
|
|
"learning_rate": 1.102893532046593e-05,
|
|
"loss": 1.039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28357917070388794,
|
|
"step": 804,
|
|
"valid_targets_mean": 16059.1,
|
|
"valid_targets_min": 14384
|
|
},
|
|
{
|
|
"epoch": 3.4264392324093818,
|
|
"grad_norm": 0.18774598172060927,
|
|
"learning_rate": 1.0975846969217258e-05,
|
|
"loss": 1.0246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15860168635845184,
|
|
"step": 805,
|
|
"valid_targets_mean": 8455.3,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 3.4307036247334755,
|
|
"grad_norm": 0.19433251187804543,
|
|
"learning_rate": 1.092283833587829e-05,
|
|
"loss": 1.0397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2533392608165741,
|
|
"step": 806,
|
|
"valid_targets_mean": 16085.5,
|
|
"valid_targets_min": 14929
|
|
},
|
|
{
|
|
"epoch": 3.434968017057569,
|
|
"grad_norm": 0.21801910284939063,
|
|
"learning_rate": 1.086990988871873e-05,
|
|
"loss": 1.0609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2913307249546051,
|
|
"step": 807,
|
|
"valid_targets_mean": 16047.4,
|
|
"valid_targets_min": 13080
|
|
},
|
|
{
|
|
"epoch": 3.4392324093816633,
|
|
"grad_norm": 0.19502316630234423,
|
|
"learning_rate": 1.0817062095299929e-05,
|
|
"loss": 1.0441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18524152040481567,
|
|
"step": 808,
|
|
"valid_targets_mean": 11042.4,
|
|
"valid_targets_min": 7440
|
|
},
|
|
{
|
|
"epoch": 3.443496801705757,
|
|
"grad_norm": 0.20864485170866784,
|
|
"learning_rate": 1.0764295422470755e-05,
|
|
"loss": 1.0013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2895178496837616,
|
|
"step": 809,
|
|
"valid_targets_mean": 16087.7,
|
|
"valid_targets_min": 13211
|
|
},
|
|
{
|
|
"epoch": 3.4477611940298507,
|
|
"grad_norm": 0.18863795458851085,
|
|
"learning_rate": 1.0711610336363477e-05,
|
|
"loss": 1.0257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3027087152004242,
|
|
"step": 810,
|
|
"valid_targets_mean": 16093.6,
|
|
"valid_targets_min": 14556
|
|
},
|
|
{
|
|
"epoch": 3.4520255863539444,
|
|
"grad_norm": 0.18456814498100765,
|
|
"learning_rate": 1.065900730238961e-05,
|
|
"loss": 1.025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23894429206848145,
|
|
"step": 811,
|
|
"valid_targets_mean": 14490.8,
|
|
"valid_targets_min": 12326
|
|
},
|
|
{
|
|
"epoch": 3.4562899786780386,
|
|
"grad_norm": 0.1902760939156765,
|
|
"learning_rate": 1.0606486785235879e-05,
|
|
"loss": 0.9798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2806459665298462,
|
|
"step": 812,
|
|
"valid_targets_mean": 16143.8,
|
|
"valid_targets_min": 14907
|
|
},
|
|
{
|
|
"epoch": 3.4605543710021323,
|
|
"grad_norm": 0.19157785153299178,
|
|
"learning_rate": 1.0554049248860045e-05,
|
|
"loss": 1.0569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25310009717941284,
|
|
"step": 813,
|
|
"valid_targets_mean": 12887.4,
|
|
"valid_targets_min": 1961
|
|
},
|
|
{
|
|
"epoch": 3.464818763326226,
|
|
"grad_norm": 0.18985238273898838,
|
|
"learning_rate": 1.0501695156486819e-05,
|
|
"loss": 1.0567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25601091980934143,
|
|
"step": 814,
|
|
"valid_targets_mean": 15759.6,
|
|
"valid_targets_min": 11287
|
|
},
|
|
{
|
|
"epoch": 3.4690831556503197,
|
|
"grad_norm": 0.19855924467029767,
|
|
"learning_rate": 1.0449424970603796e-05,
|
|
"loss": 0.9965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26552850008010864,
|
|
"step": 815,
|
|
"valid_targets_mean": 16230.9,
|
|
"valid_targets_min": 15773
|
|
},
|
|
{
|
|
"epoch": 3.473347547974414,
|
|
"grad_norm": 0.1974055282587953,
|
|
"learning_rate": 1.0397239152957356e-05,
|
|
"loss": 1.0171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16817912459373474,
|
|
"step": 816,
|
|
"valid_targets_mean": 9027.7,
|
|
"valid_targets_min": 1228
|
|
},
|
|
{
|
|
"epoch": 3.4776119402985075,
|
|
"grad_norm": 0.20712085130917862,
|
|
"learning_rate": 1.034513816454858e-05,
|
|
"loss": 1.0378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2572016716003418,
|
|
"step": 817,
|
|
"valid_targets_mean": 16111.5,
|
|
"valid_targets_min": 15042
|
|
},
|
|
{
|
|
"epoch": 3.481876332622601,
|
|
"grad_norm": 0.19575625777873262,
|
|
"learning_rate": 1.0293122465629186e-05,
|
|
"loss": 1.0049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2861330509185791,
|
|
"step": 818,
|
|
"valid_targets_mean": 16084.9,
|
|
"valid_targets_min": 13788
|
|
},
|
|
{
|
|
"epoch": 3.486140724946695,
|
|
"grad_norm": 0.22385303113206428,
|
|
"learning_rate": 1.0241192515697432e-05,
|
|
"loss": 1.011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20530882477760315,
|
|
"step": 819,
|
|
"valid_targets_mean": 12038.9,
|
|
"valid_targets_min": 7752
|
|
},
|
|
{
|
|
"epoch": 3.490405117270789,
|
|
"grad_norm": 0.1946858165374974,
|
|
"learning_rate": 1.0189348773494135e-05,
|
|
"loss": 1.0241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2773318290710449,
|
|
"step": 820,
|
|
"valid_targets_mean": 16122.8,
|
|
"valid_targets_min": 15219
|
|
},
|
|
{
|
|
"epoch": 3.4946695095948828,
|
|
"grad_norm": 0.19537526220319537,
|
|
"learning_rate": 1.0137591696998514e-05,
|
|
"loss": 1.0268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31383180618286133,
|
|
"step": 821,
|
|
"valid_targets_mean": 16039.4,
|
|
"valid_targets_min": 15167
|
|
},
|
|
{
|
|
"epoch": 3.4989339019189765,
|
|
"grad_norm": 0.24280617354394693,
|
|
"learning_rate": 1.0085921743424225e-05,
|
|
"loss": 1.0271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22470004856586456,
|
|
"step": 822,
|
|
"valid_targets_mean": 13571.1,
|
|
"valid_targets_min": 11707
|
|
},
|
|
{
|
|
"epoch": 3.50319829424307,
|
|
"grad_norm": 0.18304494041520544,
|
|
"learning_rate": 1.0034339369215288e-05,
|
|
"loss": 1.0324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28197503089904785,
|
|
"step": 823,
|
|
"valid_targets_mean": 16043.7,
|
|
"valid_targets_min": 13934
|
|
},
|
|
{
|
|
"epoch": 3.5074626865671643,
|
|
"grad_norm": 0.19420082892046947,
|
|
"learning_rate": 9.982845030042068e-06,
|
|
"loss": 1.0656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.337486207485199,
|
|
"step": 824,
|
|
"valid_targets_mean": 16131.1,
|
|
"valid_targets_min": 15258
|
|
},
|
|
{
|
|
"epoch": 3.511727078891258,
|
|
"grad_norm": 0.20605511973553206,
|
|
"learning_rate": 9.931439180797237e-06,
|
|
"loss": 1.0218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22845080494880676,
|
|
"step": 825,
|
|
"valid_targets_mean": 14083.7,
|
|
"valid_targets_min": 12058
|
|
},
|
|
{
|
|
"epoch": 3.5159914712153517,
|
|
"grad_norm": 0.18888891400658922,
|
|
"learning_rate": 9.880122275591752e-06,
|
|
"loss": 1.0479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2947491407394409,
|
|
"step": 826,
|
|
"valid_targets_mean": 16235.3,
|
|
"valid_targets_min": 15869
|
|
},
|
|
{
|
|
"epoch": 3.520255863539446,
|
|
"grad_norm": 0.2063578630426139,
|
|
"learning_rate": 9.828894767750865e-06,
|
|
"loss": 1.0607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2067328840494156,
|
|
"step": 827,
|
|
"valid_targets_mean": 10532.3,
|
|
"valid_targets_min": 793
|
|
},
|
|
{
|
|
"epoch": 3.5245202558635396,
|
|
"grad_norm": 0.19912988426708253,
|
|
"learning_rate": 9.777757109810102e-06,
|
|
"loss": 1.0312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2553637623786926,
|
|
"step": 828,
|
|
"valid_targets_mean": 16077.7,
|
|
"valid_targets_min": 15153
|
|
},
|
|
{
|
|
"epoch": 3.5287846481876333,
|
|
"grad_norm": 0.18925687906293026,
|
|
"learning_rate": 9.726709753511275e-06,
|
|
"loss": 1.0715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2934688329696655,
|
|
"step": 829,
|
|
"valid_targets_mean": 16147.0,
|
|
"valid_targets_min": 15263
|
|
},
|
|
{
|
|
"epoch": 3.533049040511727,
|
|
"grad_norm": 0.2094808172783584,
|
|
"learning_rate": 9.675753149798474e-06,
|
|
"loss": 1.0262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16240262985229492,
|
|
"step": 830,
|
|
"valid_targets_mean": 9455.6,
|
|
"valid_targets_min": 1075
|
|
},
|
|
{
|
|
"epoch": 3.5373134328358207,
|
|
"grad_norm": 0.18576715219917836,
|
|
"learning_rate": 9.624887748814118e-06,
|
|
"loss": 1.0308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24754361808300018,
|
|
"step": 831,
|
|
"valid_targets_mean": 16233.1,
|
|
"valid_targets_min": 15409
|
|
},
|
|
{
|
|
"epoch": 3.541577825159915,
|
|
"grad_norm": 0.2064767838511618,
|
|
"learning_rate": 9.574113999894909e-06,
|
|
"loss": 1.0296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.274272620677948,
|
|
"step": 832,
|
|
"valid_targets_mean": 16230.2,
|
|
"valid_targets_min": 14917
|
|
},
|
|
{
|
|
"epoch": 3.5458422174840085,
|
|
"grad_norm": 0.20402400381489313,
|
|
"learning_rate": 9.523432351567979e-06,
|
|
"loss": 1.0223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.202209010720253,
|
|
"step": 833,
|
|
"valid_targets_mean": 12193.7,
|
|
"valid_targets_min": 9042
|
|
},
|
|
{
|
|
"epoch": 3.550106609808102,
|
|
"grad_norm": 0.19935362434216167,
|
|
"learning_rate": 9.472843251546792e-06,
|
|
"loss": 1.0421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31874680519104004,
|
|
"step": 834,
|
|
"valid_targets_mean": 16074.3,
|
|
"valid_targets_min": 14705
|
|
},
|
|
{
|
|
"epoch": 3.5543710021321964,
|
|
"grad_norm": 0.21569338244238667,
|
|
"learning_rate": 9.422347146727294e-06,
|
|
"loss": 1.0171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2925390601158142,
|
|
"step": 835,
|
|
"valid_targets_mean": 16097.2,
|
|
"valid_targets_min": 14702
|
|
},
|
|
{
|
|
"epoch": 3.55863539445629,
|
|
"grad_norm": 0.19039616467467887,
|
|
"learning_rate": 9.371944483183912e-06,
|
|
"loss": 1.0199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24568170309066772,
|
|
"step": 836,
|
|
"valid_targets_mean": 13546.5,
|
|
"valid_targets_min": 11766
|
|
},
|
|
{
|
|
"epoch": 3.5628997867803838,
|
|
"grad_norm": 0.19359612793383688,
|
|
"learning_rate": 9.321635706165635e-06,
|
|
"loss": 1.0503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2961440980434418,
|
|
"step": 837,
|
|
"valid_targets_mean": 16135.5,
|
|
"valid_targets_min": 15091
|
|
},
|
|
{
|
|
"epoch": 3.5671641791044775,
|
|
"grad_norm": 0.2212993937532291,
|
|
"learning_rate": 9.271421260092075e-06,
|
|
"loss": 1.0581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26963526010513306,
|
|
"step": 838,
|
|
"valid_targets_mean": 12912.3,
|
|
"valid_targets_min": 1400
|
|
},
|
|
{
|
|
"epoch": 3.571428571428571,
|
|
"grad_norm": 0.17477649578500942,
|
|
"learning_rate": 9.221301588549519e-06,
|
|
"loss": 1.0187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25952205061912537,
|
|
"step": 839,
|
|
"valid_targets_mean": 15765.0,
|
|
"valid_targets_min": 14646
|
|
},
|
|
{
|
|
"epoch": 3.5756929637526653,
|
|
"grad_norm": 0.19564495166764467,
|
|
"learning_rate": 9.171277134287057e-06,
|
|
"loss": 1.0423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28726887702941895,
|
|
"step": 840,
|
|
"valid_targets_mean": 16137.4,
|
|
"valid_targets_min": 14841
|
|
},
|
|
{
|
|
"epoch": 3.579957356076759,
|
|
"grad_norm": 0.22310022257348242,
|
|
"learning_rate": 9.121348339212634e-06,
|
|
"loss": 1.0514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19209259748458862,
|
|
"step": 841,
|
|
"valid_targets_mean": 11219.8,
|
|
"valid_targets_min": 3007
|
|
},
|
|
{
|
|
"epoch": 3.5842217484008527,
|
|
"grad_norm": 0.18790974314852715,
|
|
"learning_rate": 9.07151564438916e-06,
|
|
"loss": 1.0272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26113003492355347,
|
|
"step": 842,
|
|
"valid_targets_mean": 15881.8,
|
|
"valid_targets_min": 15016
|
|
},
|
|
{
|
|
"epoch": 3.588486140724947,
|
|
"grad_norm": 0.19344700899715275,
|
|
"learning_rate": 9.021779490030611e-06,
|
|
"loss": 1.0069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29760706424713135,
|
|
"step": 843,
|
|
"valid_targets_mean": 16168.9,
|
|
"valid_targets_min": 15515
|
|
},
|
|
{
|
|
"epoch": 3.5927505330490406,
|
|
"grad_norm": 0.19844246180745356,
|
|
"learning_rate": 8.972140315498119e-06,
|
|
"loss": 0.9891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16281500458717346,
|
|
"step": 844,
|
|
"valid_targets_mean": 9614.0,
|
|
"valid_targets_min": 3748
|
|
},
|
|
{
|
|
"epoch": 3.5970149253731343,
|
|
"grad_norm": 0.17974321723597542,
|
|
"learning_rate": 8.922598559296154e-06,
|
|
"loss": 0.9994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26607078313827515,
|
|
"step": 845,
|
|
"valid_targets_mean": 16162.2,
|
|
"valid_targets_min": 15506
|
|
},
|
|
{
|
|
"epoch": 3.6012793176972284,
|
|
"grad_norm": 0.20341146934383053,
|
|
"learning_rate": 8.873154659068582e-06,
|
|
"loss": 0.9778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2741701602935791,
|
|
"step": 846,
|
|
"valid_targets_mean": 16113.0,
|
|
"valid_targets_min": 14547
|
|
},
|
|
{
|
|
"epoch": 3.605543710021322,
|
|
"grad_norm": 0.18731768881578453,
|
|
"learning_rate": 8.823809051594816e-06,
|
|
"loss": 1.0405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21835577487945557,
|
|
"step": 847,
|
|
"valid_targets_mean": 13214.4,
|
|
"valid_targets_min": 11479
|
|
},
|
|
{
|
|
"epoch": 3.609808102345416,
|
|
"grad_norm": 0.1900110640471695,
|
|
"learning_rate": 8.774562172785988e-06,
|
|
"loss": 1.0198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27935534715652466,
|
|
"step": 848,
|
|
"valid_targets_mean": 16196.8,
|
|
"valid_targets_min": 15650
|
|
},
|
|
{
|
|
"epoch": 3.6140724946695095,
|
|
"grad_norm": 0.19643877531258055,
|
|
"learning_rate": 8.725414457681063e-06,
|
|
"loss": 1.0721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31650349497795105,
|
|
"step": 849,
|
|
"valid_targets_mean": 16141.3,
|
|
"valid_targets_min": 15560
|
|
},
|
|
{
|
|
"epoch": 3.6183368869936032,
|
|
"grad_norm": 0.18159277841303603,
|
|
"learning_rate": 8.676366340443017e-06,
|
|
"loss": 1.0258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23279966413974762,
|
|
"step": 850,
|
|
"valid_targets_mean": 15714.5,
|
|
"valid_targets_min": 13969
|
|
},
|
|
{
|
|
"epoch": 3.6226012793176974,
|
|
"grad_norm": 0.18914174971991118,
|
|
"learning_rate": 8.627418254355e-06,
|
|
"loss": 0.9954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29254311323165894,
|
|
"step": 851,
|
|
"valid_targets_mean": 16100.1,
|
|
"valid_targets_min": 14702
|
|
},
|
|
{
|
|
"epoch": 3.626865671641791,
|
|
"grad_norm": 0.18851106922705965,
|
|
"learning_rate": 8.578570631816474e-06,
|
|
"loss": 0.9885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1963026523590088,
|
|
"step": 852,
|
|
"valid_targets_mean": 10805.0,
|
|
"valid_targets_min": 1647
|
|
},
|
|
{
|
|
"epoch": 3.631130063965885,
|
|
"grad_norm": 0.18553123818491868,
|
|
"learning_rate": 8.529823904339472e-06,
|
|
"loss": 1.0004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26472026109695435,
|
|
"step": 853,
|
|
"valid_targets_mean": 16080.2,
|
|
"valid_targets_min": 14810
|
|
},
|
|
{
|
|
"epoch": 3.635394456289979,
|
|
"grad_norm": 0.18921337437319277,
|
|
"learning_rate": 8.481178502544684e-06,
|
|
"loss": 0.9801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2843508720397949,
|
|
"step": 854,
|
|
"valid_targets_mean": 16059.2,
|
|
"valid_targets_min": 13226
|
|
},
|
|
{
|
|
"epoch": 3.6396588486140726,
|
|
"grad_norm": 0.19440573786676135,
|
|
"learning_rate": 8.43263485615774e-06,
|
|
"loss": 1.0438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16292321681976318,
|
|
"step": 855,
|
|
"valid_targets_mean": 8378.9,
|
|
"valid_targets_min": 1276
|
|
},
|
|
{
|
|
"epoch": 3.6439232409381663,
|
|
"grad_norm": 0.18601232249517927,
|
|
"learning_rate": 8.384193394005372e-06,
|
|
"loss": 1.0378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2532096803188324,
|
|
"step": 856,
|
|
"valid_targets_mean": 16128.1,
|
|
"valid_targets_min": 14943
|
|
},
|
|
{
|
|
"epoch": 3.64818763326226,
|
|
"grad_norm": 0.20527307938374237,
|
|
"learning_rate": 8.33585454401161e-06,
|
|
"loss": 1.0523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32182398438453674,
|
|
"step": 857,
|
|
"valid_targets_mean": 16055.6,
|
|
"valid_targets_min": 15389
|
|
},
|
|
{
|
|
"epoch": 3.6524520255863537,
|
|
"grad_norm": 0.1873639985796243,
|
|
"learning_rate": 8.287618733194073e-06,
|
|
"loss": 1.029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21054118871688843,
|
|
"step": 858,
|
|
"valid_targets_mean": 12071.9,
|
|
"valid_targets_min": 9257
|
|
},
|
|
{
|
|
"epoch": 3.656716417910448,
|
|
"grad_norm": 0.19285049569075394,
|
|
"learning_rate": 8.239486387660096e-06,
|
|
"loss": 0.9993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29161518812179565,
|
|
"step": 859,
|
|
"valid_targets_mean": 16002.4,
|
|
"valid_targets_min": 14544
|
|
},
|
|
{
|
|
"epoch": 3.6609808102345416,
|
|
"grad_norm": 0.206953502913472,
|
|
"learning_rate": 8.191457932603052e-06,
|
|
"loss": 0.9978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3028186857700348,
|
|
"step": 860,
|
|
"valid_targets_mean": 16125.6,
|
|
"valid_targets_min": 14765
|
|
},
|
|
{
|
|
"epoch": 3.6652452025586353,
|
|
"grad_norm": 0.2021671638526298,
|
|
"learning_rate": 8.143533792298545e-06,
|
|
"loss": 1.0762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25631392002105713,
|
|
"step": 861,
|
|
"valid_targets_mean": 15072.8,
|
|
"valid_targets_min": 13354
|
|
},
|
|
{
|
|
"epoch": 3.6695095948827294,
|
|
"grad_norm": 0.18705163656391824,
|
|
"learning_rate": 8.095714390100698e-06,
|
|
"loss": 1.0125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2564089298248291,
|
|
"step": 862,
|
|
"valid_targets_mean": 16131.2,
|
|
"valid_targets_min": 13888
|
|
},
|
|
{
|
|
"epoch": 3.673773987206823,
|
|
"grad_norm": 0.20229185100536531,
|
|
"learning_rate": 8.048000148438375e-06,
|
|
"loss": 1.0595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2647925019264221,
|
|
"step": 863,
|
|
"valid_targets_mean": 13058.5,
|
|
"valid_targets_min": 1185
|
|
},
|
|
{
|
|
"epoch": 3.678038379530917,
|
|
"grad_norm": 0.19478431134981805,
|
|
"learning_rate": 8.000391488811485e-06,
|
|
"loss": 1.015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2527012228965759,
|
|
"step": 864,
|
|
"valid_targets_mean": 15782.9,
|
|
"valid_targets_min": 15070
|
|
},
|
|
{
|
|
"epoch": 3.6823027718550105,
|
|
"grad_norm": 0.19431581425484498,
|
|
"learning_rate": 7.952888831787215e-06,
|
|
"loss": 1.0346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29282718896865845,
|
|
"step": 865,
|
|
"valid_targets_mean": 16152.8,
|
|
"valid_targets_min": 14904
|
|
},
|
|
{
|
|
"epoch": 3.6865671641791042,
|
|
"grad_norm": 0.195257986285235,
|
|
"learning_rate": 7.905492596996391e-06,
|
|
"loss": 0.9991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1756191849708557,
|
|
"step": 866,
|
|
"valid_targets_mean": 9315.6,
|
|
"valid_targets_min": 979
|
|
},
|
|
{
|
|
"epoch": 3.6908315565031984,
|
|
"grad_norm": 0.2013478651180254,
|
|
"learning_rate": 7.858203203129668e-06,
|
|
"loss": 1.0574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2624044418334961,
|
|
"step": 867,
|
|
"valid_targets_mean": 16006.4,
|
|
"valid_targets_min": 14911
|
|
},
|
|
{
|
|
"epoch": 3.695095948827292,
|
|
"grad_norm": 0.20042757097195638,
|
|
"learning_rate": 7.811021067933919e-06,
|
|
"loss": 1.0214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.275198757648468,
|
|
"step": 868,
|
|
"valid_targets_mean": 16138.8,
|
|
"valid_targets_min": 14951
|
|
},
|
|
{
|
|
"epoch": 3.699360341151386,
|
|
"grad_norm": 0.2086779710908981,
|
|
"learning_rate": 7.763946608208504e-06,
|
|
"loss": 1.0115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20529082417488098,
|
|
"step": 869,
|
|
"valid_targets_mean": 11099.1,
|
|
"valid_targets_min": 7003
|
|
},
|
|
{
|
|
"epoch": 3.70362473347548,
|
|
"grad_norm": 0.18841632626418162,
|
|
"learning_rate": 7.716980239801588e-06,
|
|
"loss": 1.0112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28387880325317383,
|
|
"step": 870,
|
|
"valid_targets_mean": 16065.7,
|
|
"valid_targets_min": 14908
|
|
},
|
|
{
|
|
"epoch": 3.7078891257995736,
|
|
"grad_norm": 0.2009942429824906,
|
|
"learning_rate": 7.670122377606495e-06,
|
|
"loss": 1.0628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31606560945510864,
|
|
"step": 871,
|
|
"valid_targets_mean": 16118.9,
|
|
"valid_targets_min": 15290
|
|
},
|
|
{
|
|
"epoch": 3.7121535181236673,
|
|
"grad_norm": 0.18713924668332327,
|
|
"learning_rate": 7.623373435557988e-06,
|
|
"loss": 1.0437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24377189576625824,
|
|
"step": 872,
|
|
"valid_targets_mean": 13757.4,
|
|
"valid_targets_min": 11218
|
|
},
|
|
{
|
|
"epoch": 3.716417910447761,
|
|
"grad_norm": 0.19627794573308893,
|
|
"learning_rate": 7.5767338266286775e-06,
|
|
"loss": 1.0006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28263744711875916,
|
|
"step": 873,
|
|
"valid_targets_mean": 16168.4,
|
|
"valid_targets_min": 14830
|
|
},
|
|
{
|
|
"epoch": 3.7206823027718547,
|
|
"grad_norm": 0.20272887024473196,
|
|
"learning_rate": 7.530203962825331e-06,
|
|
"loss": 1.0316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3026537001132965,
|
|
"step": 874,
|
|
"valid_targets_mean": 16155.6,
|
|
"valid_targets_min": 15464
|
|
},
|
|
{
|
|
"epoch": 3.724946695095949,
|
|
"grad_norm": 0.18496492208485346,
|
|
"learning_rate": 7.483784255185249e-06,
|
|
"loss": 1.0431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23528644442558289,
|
|
"step": 875,
|
|
"valid_targets_mean": 14721.8,
|
|
"valid_targets_min": 12209
|
|
},
|
|
{
|
|
"epoch": 3.7292110874200426,
|
|
"grad_norm": 0.1938003198560578,
|
|
"learning_rate": 7.437475113772632e-06,
|
|
"loss": 1.0017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2918860912322998,
|
|
"step": 876,
|
|
"valid_targets_mean": 16160.6,
|
|
"valid_targets_min": 15234
|
|
},
|
|
{
|
|
"epoch": 3.7334754797441363,
|
|
"grad_norm": 0.1857564354208292,
|
|
"learning_rate": 7.391276947674932e-06,
|
|
"loss": 1.0165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2113693207502365,
|
|
"step": 877,
|
|
"valid_targets_mean": 11079.2,
|
|
"valid_targets_min": 1520
|
|
},
|
|
{
|
|
"epoch": 3.7377398720682304,
|
|
"grad_norm": 0.20687492345356723,
|
|
"learning_rate": 7.345190164999307e-06,
|
|
"loss": 1.007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24111422896385193,
|
|
"step": 878,
|
|
"valid_targets_mean": 15988.7,
|
|
"valid_targets_min": 14889
|
|
},
|
|
{
|
|
"epoch": 3.742004264392324,
|
|
"grad_norm": 0.18975274436616069,
|
|
"learning_rate": 7.299215172868947e-06,
|
|
"loss": 1.0029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3014640510082245,
|
|
"step": 879,
|
|
"valid_targets_mean": 16100.1,
|
|
"valid_targets_min": 15198
|
|
},
|
|
{
|
|
"epoch": 3.746268656716418,
|
|
"grad_norm": 0.17178565152484693,
|
|
"learning_rate": 7.2533523774194865e-06,
|
|
"loss": 0.9928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15241798758506775,
|
|
"step": 880,
|
|
"valid_targets_mean": 8749.8,
|
|
"valid_targets_min": 1971
|
|
},
|
|
{
|
|
"epoch": 3.750533049040512,
|
|
"grad_norm": 0.16823338937422472,
|
|
"learning_rate": 7.2076021837954616e-06,
|
|
"loss": 0.9577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24019664525985718,
|
|
"step": 881,
|
|
"valid_targets_mean": 16235.8,
|
|
"valid_targets_min": 15737
|
|
},
|
|
{
|
|
"epoch": 3.7547974413646057,
|
|
"grad_norm": 0.19457652381144655,
|
|
"learning_rate": 7.161964996146689e-06,
|
|
"loss": 0.9948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28556281328201294,
|
|
"step": 882,
|
|
"valid_targets_mean": 16172.3,
|
|
"valid_targets_min": 15386
|
|
},
|
|
{
|
|
"epoch": 3.7590618336886994,
|
|
"grad_norm": 0.17320229188333314,
|
|
"learning_rate": 7.116441217624708e-06,
|
|
"loss": 1.0278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2017824500799179,
|
|
"step": 883,
|
|
"valid_targets_mean": 11656.7,
|
|
"valid_targets_min": 7621
|
|
},
|
|
{
|
|
"epoch": 3.763326226012793,
|
|
"grad_norm": 0.17868285444807047,
|
|
"learning_rate": 7.071031250379228e-06,
|
|
"loss": 0.998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27688509225845337,
|
|
"step": 884,
|
|
"valid_targets_mean": 16177.2,
|
|
"valid_targets_min": 15576
|
|
},
|
|
{
|
|
"epoch": 3.767590618336887,
|
|
"grad_norm": 0.18116858646029893,
|
|
"learning_rate": 7.0257354955545466e-06,
|
|
"loss": 1.0266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2880440354347229,
|
|
"step": 885,
|
|
"valid_targets_mean": 16203.8,
|
|
"valid_targets_min": 15219
|
|
},
|
|
{
|
|
"epoch": 3.771855010660981,
|
|
"grad_norm": 0.17238054940141237,
|
|
"learning_rate": 6.980554353286066e-06,
|
|
"loss": 0.9933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24837496876716614,
|
|
"step": 886,
|
|
"valid_targets_mean": 14070.1,
|
|
"valid_targets_min": 12079
|
|
},
|
|
{
|
|
"epoch": 3.7761194029850746,
|
|
"grad_norm": 0.19830502471733977,
|
|
"learning_rate": 6.935488222696676e-06,
|
|
"loss": 1.0337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2940099239349365,
|
|
"step": 887,
|
|
"valid_targets_mean": 16149.6,
|
|
"valid_targets_min": 15401
|
|
},
|
|
{
|
|
"epoch": 3.7803837953091683,
|
|
"grad_norm": 0.17875551605968748,
|
|
"learning_rate": 6.890537501893302e-06,
|
|
"loss": 1.0153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2383759617805481,
|
|
"step": 888,
|
|
"valid_targets_mean": 12633.7,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 3.7846481876332625,
|
|
"grad_norm": 0.1717516988375723,
|
|
"learning_rate": 6.845702587963352e-06,
|
|
"loss": 1.0471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2500758469104767,
|
|
"step": 889,
|
|
"valid_targets_mean": 15490.9,
|
|
"valid_targets_min": 13898
|
|
},
|
|
{
|
|
"epoch": 3.788912579957356,
|
|
"grad_norm": 0.17536669547483766,
|
|
"learning_rate": 6.800983876971192e-06,
|
|
"loss": 0.967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27792251110076904,
|
|
"step": 890,
|
|
"valid_targets_mean": 16048.4,
|
|
"valid_targets_min": 13573
|
|
},
|
|
{
|
|
"epoch": 3.79317697228145,
|
|
"grad_norm": 0.18991827312707868,
|
|
"learning_rate": 6.756381763954718e-06,
|
|
"loss": 1.0392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1701337993144989,
|
|
"step": 891,
|
|
"valid_targets_mean": 9984.5,
|
|
"valid_targets_min": 1676
|
|
},
|
|
{
|
|
"epoch": 3.7974413646055436,
|
|
"grad_norm": 0.167736113405825,
|
|
"learning_rate": 6.7118966429217645e-06,
|
|
"loss": 1.0304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.255683958530426,
|
|
"step": 892,
|
|
"valid_targets_mean": 15897.7,
|
|
"valid_targets_min": 15188
|
|
},
|
|
{
|
|
"epoch": 3.8017057569296373,
|
|
"grad_norm": 0.19724106669757815,
|
|
"learning_rate": 6.667528906846714e-06,
|
|
"loss": 1.0152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30840742588043213,
|
|
"step": 893,
|
|
"valid_targets_mean": 16174.7,
|
|
"valid_targets_min": 15665
|
|
},
|
|
{
|
|
"epoch": 3.8059701492537314,
|
|
"grad_norm": 0.17662184585660445,
|
|
"learning_rate": 6.623278947666974e-06,
|
|
"loss": 1.0597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19059161841869354,
|
|
"step": 894,
|
|
"valid_targets_mean": 10780.8,
|
|
"valid_targets_min": 6697
|
|
},
|
|
{
|
|
"epoch": 3.810234541577825,
|
|
"grad_norm": 0.17658779693706902,
|
|
"learning_rate": 6.579147156279538e-06,
|
|
"loss": 1.017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2489801049232483,
|
|
"step": 895,
|
|
"valid_targets_mean": 16181.2,
|
|
"valid_targets_min": 14287
|
|
},
|
|
{
|
|
"epoch": 3.814498933901919,
|
|
"grad_norm": 0.1860395690959855,
|
|
"learning_rate": 6.535133922537513e-06,
|
|
"loss": 1.0184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27477774024009705,
|
|
"step": 896,
|
|
"valid_targets_mean": 16171.2,
|
|
"valid_targets_min": 15437
|
|
},
|
|
{
|
|
"epoch": 3.818763326226013,
|
|
"grad_norm": 0.19224230910304432,
|
|
"learning_rate": 6.491239635246709e-06,
|
|
"loss": 1.029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.204367995262146,
|
|
"step": 897,
|
|
"valid_targets_mean": 13337.2,
|
|
"valid_targets_min": 10600
|
|
},
|
|
{
|
|
"epoch": 3.8230277185501067,
|
|
"grad_norm": 0.18093082818791587,
|
|
"learning_rate": 6.447464682162143e-06,
|
|
"loss": 1.0278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28919845819473267,
|
|
"step": 898,
|
|
"valid_targets_mean": 16177.3,
|
|
"valid_targets_min": 15404
|
|
},
|
|
{
|
|
"epoch": 3.8272921108742004,
|
|
"grad_norm": 0.19298455083094485,
|
|
"learning_rate": 6.403809449984704e-06,
|
|
"loss": 1.0163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29189974069595337,
|
|
"step": 899,
|
|
"valid_targets_mean": 16179.9,
|
|
"valid_targets_min": 15636
|
|
},
|
|
{
|
|
"epoch": 3.831556503198294,
|
|
"grad_norm": 0.1767420179649191,
|
|
"learning_rate": 6.3602743243576405e-06,
|
|
"loss": 1.0556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26589658856391907,
|
|
"step": 900,
|
|
"valid_targets_mean": 15050.4,
|
|
"valid_targets_min": 12744
|
|
},
|
|
{
|
|
"epoch": 3.835820895522388,
|
|
"grad_norm": 0.18011064807367771,
|
|
"learning_rate": 6.316859689863222e-06,
|
|
"loss": 1.0028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27655208110809326,
|
|
"step": 901,
|
|
"valid_targets_mean": 16103.3,
|
|
"valid_targets_min": 14365
|
|
},
|
|
{
|
|
"epoch": 3.840085287846482,
|
|
"grad_norm": 0.17395673981038276,
|
|
"learning_rate": 6.273565930019316e-06,
|
|
"loss": 1.0172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19080668687820435,
|
|
"step": 902,
|
|
"valid_targets_mean": 9851.0,
|
|
"valid_targets_min": 1680
|
|
},
|
|
{
|
|
"epoch": 3.8443496801705757,
|
|
"grad_norm": 0.16555740578076278,
|
|
"learning_rate": 6.230393427276e-06,
|
|
"loss": 0.9984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2510816752910614,
|
|
"step": 903,
|
|
"valid_targets_mean": 16164.2,
|
|
"valid_targets_min": 15371
|
|
},
|
|
{
|
|
"epoch": 3.8486140724946694,
|
|
"grad_norm": 0.1821501550607803,
|
|
"learning_rate": 6.187342563012198e-06,
|
|
"loss": 0.9932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29555076360702515,
|
|
"step": 904,
|
|
"valid_targets_mean": 16153.7,
|
|
"valid_targets_min": 15258
|
|
},
|
|
{
|
|
"epoch": 3.8528784648187635,
|
|
"grad_norm": 0.1746269251661234,
|
|
"learning_rate": 6.144413717532269e-06,
|
|
"loss": 0.985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13593673706054688,
|
|
"step": 905,
|
|
"valid_targets_mean": 7781.8,
|
|
"valid_targets_min": 1738
|
|
},
|
|
{
|
|
"epoch": 3.857142857142857,
|
|
"grad_norm": 0.16465614397089992,
|
|
"learning_rate": 6.1016072700627106e-06,
|
|
"loss": 1.0492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25289154052734375,
|
|
"step": 906,
|
|
"valid_targets_mean": 16108.3,
|
|
"valid_targets_min": 14821
|
|
},
|
|
{
|
|
"epoch": 3.861407249466951,
|
|
"grad_norm": 0.16890711782852966,
|
|
"learning_rate": 6.058923598748756e-06,
|
|
"loss": 1.0049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27441728115081787,
|
|
"step": 907,
|
|
"valid_targets_mean": 16135.9,
|
|
"valid_targets_min": 15182
|
|
},
|
|
{
|
|
"epoch": 3.8656716417910446,
|
|
"grad_norm": 0.18238285888271216,
|
|
"learning_rate": 6.016363080651066e-06,
|
|
"loss": 1.0129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22377993166446686,
|
|
"step": 908,
|
|
"valid_targets_mean": 12611.5,
|
|
"valid_targets_min": 9636
|
|
},
|
|
{
|
|
"epoch": 3.8699360341151388,
|
|
"grad_norm": 0.18454798647982595,
|
|
"learning_rate": 5.973926091742386e-06,
|
|
"loss": 1.0333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28381484746932983,
|
|
"step": 909,
|
|
"valid_targets_mean": 15943.4,
|
|
"valid_targets_min": 11336
|
|
},
|
|
{
|
|
"epoch": 3.8742004264392325,
|
|
"grad_norm": 0.18673125131825194,
|
|
"learning_rate": 5.931613006904196e-06,
|
|
"loss": 0.9666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27381324768066406,
|
|
"step": 910,
|
|
"valid_targets_mean": 16215.7,
|
|
"valid_targets_min": 15773
|
|
},
|
|
{
|
|
"epoch": 3.878464818763326,
|
|
"grad_norm": 0.16458547922432532,
|
|
"learning_rate": 5.889424199923473e-06,
|
|
"loss": 1.0194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2533954381942749,
|
|
"step": 911,
|
|
"valid_targets_mean": 15084.9,
|
|
"valid_targets_min": 13687
|
|
},
|
|
{
|
|
"epoch": 3.88272921108742,
|
|
"grad_norm": 0.17898454832383665,
|
|
"learning_rate": 5.847360043489318e-06,
|
|
"loss": 1.0415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27176639437675476,
|
|
"step": 912,
|
|
"valid_targets_mean": 16055.0,
|
|
"valid_targets_min": 13080
|
|
},
|
|
{
|
|
"epoch": 3.886993603411514,
|
|
"grad_norm": 0.1948402691787504,
|
|
"learning_rate": 5.805420909189683e-06,
|
|
"loss": 0.9968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23655077815055847,
|
|
"step": 913,
|
|
"valid_targets_mean": 12997.0,
|
|
"valid_targets_min": 1489
|
|
},
|
|
{
|
|
"epoch": 3.8912579957356077,
|
|
"grad_norm": 0.19024798333407084,
|
|
"learning_rate": 5.7636071675081076e-06,
|
|
"loss": 1.0155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24532374739646912,
|
|
"step": 914,
|
|
"valid_targets_mean": 15195.4,
|
|
"valid_targets_min": 13288
|
|
},
|
|
{
|
|
"epoch": 3.8955223880597014,
|
|
"grad_norm": 0.17422448747153263,
|
|
"learning_rate": 5.721919187820431e-06,
|
|
"loss": 1.0186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30523931980133057,
|
|
"step": 915,
|
|
"valid_targets_mean": 16162.2,
|
|
"valid_targets_min": 14786
|
|
},
|
|
{
|
|
"epoch": 3.8997867803837956,
|
|
"grad_norm": 0.18834891235846302,
|
|
"learning_rate": 5.6803573383915265e-06,
|
|
"loss": 1.0157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16399307548999786,
|
|
"step": 916,
|
|
"valid_targets_mean": 8992.8,
|
|
"valid_targets_min": 2237
|
|
},
|
|
{
|
|
"epoch": 3.9040511727078893,
|
|
"grad_norm": 0.1867944799039997,
|
|
"learning_rate": 5.638921986372064e-06,
|
|
"loss": 1.051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25738126039505005,
|
|
"step": 917,
|
|
"valid_targets_mean": 16127.8,
|
|
"valid_targets_min": 15305
|
|
},
|
|
{
|
|
"epoch": 3.908315565031983,
|
|
"grad_norm": 0.17361563033877148,
|
|
"learning_rate": 5.5976134977952315e-06,
|
|
"loss": 1.0119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28804343938827515,
|
|
"step": 918,
|
|
"valid_targets_mean": 16123.2,
|
|
"valid_targets_min": 15121
|
|
},
|
|
{
|
|
"epoch": 3.9125799573560767,
|
|
"grad_norm": 0.16825648583825015,
|
|
"learning_rate": 5.556432237573564e-06,
|
|
"loss": 1.0279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20955432951450348,
|
|
"step": 919,
|
|
"valid_targets_mean": 11376.9,
|
|
"valid_targets_min": 7337
|
|
},
|
|
{
|
|
"epoch": 3.9168443496801704,
|
|
"grad_norm": 0.16553801574522675,
|
|
"learning_rate": 5.5153785694956416e-06,
|
|
"loss": 0.9933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24325954914093018,
|
|
"step": 920,
|
|
"valid_targets_mean": 16196.0,
|
|
"valid_targets_min": 15345
|
|
},
|
|
{
|
|
"epoch": 3.9211087420042645,
|
|
"grad_norm": 0.18329983316187032,
|
|
"learning_rate": 5.474452856222942e-06,
|
|
"loss": 1.0192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3223472833633423,
|
|
"step": 921,
|
|
"valid_targets_mean": 15823.3,
|
|
"valid_targets_min": 4778
|
|
},
|
|
{
|
|
"epoch": 3.925373134328358,
|
|
"grad_norm": 0.1636455592002287,
|
|
"learning_rate": 5.433655459286611e-06,
|
|
"loss": 1.0356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23511505126953125,
|
|
"step": 922,
|
|
"valid_targets_mean": 13433.8,
|
|
"valid_targets_min": 9965
|
|
},
|
|
{
|
|
"epoch": 3.929637526652452,
|
|
"grad_norm": 0.17299159273541392,
|
|
"learning_rate": 5.392986739084238e-06,
|
|
"loss": 1.0416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2826547622680664,
|
|
"step": 923,
|
|
"valid_targets_mean": 16122.8,
|
|
"valid_targets_min": 15294
|
|
},
|
|
{
|
|
"epoch": 3.933901918976546,
|
|
"grad_norm": 0.18542024973383645,
|
|
"learning_rate": 5.352447054876755e-06,
|
|
"loss": 1.0217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2850426137447357,
|
|
"step": 924,
|
|
"valid_targets_mean": 16221.2,
|
|
"valid_targets_min": 15261
|
|
},
|
|
{
|
|
"epoch": 3.9381663113006398,
|
|
"grad_norm": 0.17584366091314624,
|
|
"learning_rate": 5.31203676478516e-06,
|
|
"loss": 1.0178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24125772714614868,
|
|
"step": 925,
|
|
"valid_targets_mean": 14959.6,
|
|
"valid_targets_min": 12523
|
|
},
|
|
{
|
|
"epoch": 3.9424307036247335,
|
|
"grad_norm": 0.178384382105139,
|
|
"learning_rate": 5.271756225787434e-06,
|
|
"loss": 1.0219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30257290601730347,
|
|
"step": 926,
|
|
"valid_targets_mean": 16134.2,
|
|
"valid_targets_min": 15345
|
|
},
|
|
{
|
|
"epoch": 3.946695095948827,
|
|
"grad_norm": 0.18212083604118592,
|
|
"learning_rate": 5.231605793715348e-06,
|
|
"loss": 1.0402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20673322677612305,
|
|
"step": 927,
|
|
"valid_targets_mean": 10423.7,
|
|
"valid_targets_min": 1155
|
|
},
|
|
{
|
|
"epoch": 3.950959488272921,
|
|
"grad_norm": 0.17544505960661458,
|
|
"learning_rate": 5.191585823251335e-06,
|
|
"loss": 1.0086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2677339017391205,
|
|
"step": 928,
|
|
"valid_targets_mean": 15745.4,
|
|
"valid_targets_min": 12492
|
|
},
|
|
{
|
|
"epoch": 3.955223880597015,
|
|
"grad_norm": 0.17946206809277102,
|
|
"learning_rate": 5.151696667925348e-06,
|
|
"loss": 1.0196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30542314052581787,
|
|
"step": 929,
|
|
"valid_targets_mean": 15883.8,
|
|
"valid_targets_min": 9286
|
|
},
|
|
{
|
|
"epoch": 3.9594882729211087,
|
|
"grad_norm": 0.17024673912893915,
|
|
"learning_rate": 5.111938680111732e-06,
|
|
"loss": 1.0081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15605497360229492,
|
|
"step": 930,
|
|
"valid_targets_mean": 8194.7,
|
|
"valid_targets_min": 2514
|
|
},
|
|
{
|
|
"epoch": 3.9637526652452024,
|
|
"grad_norm": 0.1603880817840821,
|
|
"learning_rate": 5.072312211026125e-06,
|
|
"loss": 0.9481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23745033144950867,
|
|
"step": 931,
|
|
"valid_targets_mean": 16183.0,
|
|
"valid_targets_min": 15263
|
|
},
|
|
{
|
|
"epoch": 3.9680170575692966,
|
|
"grad_norm": 0.18716221209545653,
|
|
"learning_rate": 5.032817610722369e-06,
|
|
"loss": 1.0416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2806117534637451,
|
|
"step": 932,
|
|
"valid_targets_mean": 16226.6,
|
|
"valid_targets_min": 15460
|
|
},
|
|
{
|
|
"epoch": 3.9722814498933903,
|
|
"grad_norm": 0.17056746416208973,
|
|
"learning_rate": 4.993455228089366e-06,
|
|
"loss": 0.98,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19527921080589294,
|
|
"step": 933,
|
|
"valid_targets_mean": 12267.8,
|
|
"valid_targets_min": 9004
|
|
},
|
|
{
|
|
"epoch": 3.976545842217484,
|
|
"grad_norm": 0.1655198301817832,
|
|
"learning_rate": 4.954225410848048e-06,
|
|
"loss": 1.027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29158180952072144,
|
|
"step": 934,
|
|
"valid_targets_mean": 16006.9,
|
|
"valid_targets_min": 13688
|
|
},
|
|
{
|
|
"epoch": 3.9808102345415777,
|
|
"grad_norm": 0.1835251340588986,
|
|
"learning_rate": 4.915128505548284e-06,
|
|
"loss": 1.0526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2927752733230591,
|
|
"step": 935,
|
|
"valid_targets_mean": 16135.8,
|
|
"valid_targets_min": 14460
|
|
},
|
|
{
|
|
"epoch": 3.9850746268656714,
|
|
"grad_norm": 0.17772379148672862,
|
|
"learning_rate": 4.8761648575658145e-06,
|
|
"loss": 0.9824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2293371558189392,
|
|
"step": 936,
|
|
"valid_targets_mean": 14490.9,
|
|
"valid_targets_min": 12290
|
|
},
|
|
{
|
|
"epoch": 3.9893390191897655,
|
|
"grad_norm": 0.17677193480971368,
|
|
"learning_rate": 4.837334811099217e-06,
|
|
"loss": 1.0409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3174881637096405,
|
|
"step": 937,
|
|
"valid_targets_mean": 16084.7,
|
|
"valid_targets_min": 14795
|
|
},
|
|
{
|
|
"epoch": 3.9936034115138592,
|
|
"grad_norm": 0.16691715810349897,
|
|
"learning_rate": 4.7986387091668365e-06,
|
|
"loss": 1.0494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2444138526916504,
|
|
"step": 938,
|
|
"valid_targets_mean": 13311.0,
|
|
"valid_targets_min": 1753
|
|
},
|
|
{
|
|
"epoch": 3.997867803837953,
|
|
"grad_norm": 0.1722769767004743,
|
|
"learning_rate": 4.760076893603791e-06,
|
|
"loss": 0.988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24182796478271484,
|
|
"step": 939,
|
|
"valid_targets_mean": 16049.2,
|
|
"valid_targets_min": 15191
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"grad_norm": 0.2504244198165015,
|
|
"learning_rate": 4.721649705058926e-06,
|
|
"loss": 0.9825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4427710175514221,
|
|
"step": 940,
|
|
"valid_targets_mean": 11319.7,
|
|
"valid_targets_min": 1545
|
|
},
|
|
{
|
|
"epoch": 4.004264392324094,
|
|
"grad_norm": 0.17246692751377352,
|
|
"learning_rate": 4.683357482991819e-06,
|
|
"loss": 0.9659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2455691248178482,
|
|
"step": 941,
|
|
"valid_targets_mean": 16066.7,
|
|
"valid_targets_min": 14483
|
|
},
|
|
{
|
|
"epoch": 4.008528784648187,
|
|
"grad_norm": 0.1798024598736799,
|
|
"learning_rate": 4.645200565669776e-06,
|
|
"loss": 1.0174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3127375841140747,
|
|
"step": 942,
|
|
"valid_targets_mean": 16023.8,
|
|
"valid_targets_min": 14103
|
|
},
|
|
{
|
|
"epoch": 4.0127931769722816,
|
|
"grad_norm": 0.17460584931760878,
|
|
"learning_rate": 4.607179290164823e-06,
|
|
"loss": 1.0181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2128245085477829,
|
|
"step": 943,
|
|
"valid_targets_mean": 13279.5,
|
|
"valid_targets_min": 10443
|
|
},
|
|
{
|
|
"epoch": 4.017057569296376,
|
|
"grad_norm": 0.17515223987813858,
|
|
"learning_rate": 4.569293992350783e-06,
|
|
"loss": 0.9927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2609207034111023,
|
|
"step": 944,
|
|
"valid_targets_mean": 16190.7,
|
|
"valid_targets_min": 14904
|
|
},
|
|
{
|
|
"epoch": 4.021321961620469,
|
|
"grad_norm": 0.18214997012512624,
|
|
"learning_rate": 4.531545006900244e-06,
|
|
"loss": 1.0005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.296354740858078,
|
|
"step": 945,
|
|
"valid_targets_mean": 16119.8,
|
|
"valid_targets_min": 15234
|
|
},
|
|
{
|
|
"epoch": 4.025586353944563,
|
|
"grad_norm": 0.1616773227415402,
|
|
"learning_rate": 4.493932667281646e-06,
|
|
"loss": 1.0045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23627632856369019,
|
|
"step": 946,
|
|
"valid_targets_mean": 14883.1,
|
|
"valid_targets_min": 12676
|
|
},
|
|
{
|
|
"epoch": 4.029850746268656,
|
|
"grad_norm": 0.17564863386214266,
|
|
"learning_rate": 4.456457305756321e-06,
|
|
"loss": 1.0169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2919178009033203,
|
|
"step": 947,
|
|
"valid_targets_mean": 16098.3,
|
|
"valid_targets_min": 15263
|
|
},
|
|
{
|
|
"epoch": 4.0341151385927505,
|
|
"grad_norm": 0.17370772393234352,
|
|
"learning_rate": 4.419119253375557e-06,
|
|
"loss": 1.0203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2674356698989868,
|
|
"step": 948,
|
|
"valid_targets_mean": 12999.3,
|
|
"valid_targets_min": 2666
|
|
},
|
|
{
|
|
"epoch": 4.038379530916845,
|
|
"grad_norm": 0.1829213891838492,
|
|
"learning_rate": 4.381918839977675e-06,
|
|
"loss": 1.0165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25065314769744873,
|
|
"step": 949,
|
|
"valid_targets_mean": 15620.4,
|
|
"valid_targets_min": 13538
|
|
},
|
|
{
|
|
"epoch": 4.042643923240938,
|
|
"grad_norm": 0.18813205257563564,
|
|
"learning_rate": 4.344856394185122e-06,
|
|
"loss": 0.9905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3093401789665222,
|
|
"step": 950,
|
|
"valid_targets_mean": 16151.6,
|
|
"valid_targets_min": 15414
|
|
},
|
|
{
|
|
"epoch": 4.046908315565032,
|
|
"grad_norm": 0.17425678705418207,
|
|
"learning_rate": 4.307932243401538e-06,
|
|
"loss": 1.0561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17597821354866028,
|
|
"step": 951,
|
|
"valid_targets_mean": 9453.4,
|
|
"valid_targets_min": 1451
|
|
},
|
|
{
|
|
"epoch": 4.051172707889126,
|
|
"grad_norm": 0.16299499509558893,
|
|
"learning_rate": 4.271146713808927e-06,
|
|
"loss": 0.9752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24879279732704163,
|
|
"step": 952,
|
|
"valid_targets_mean": 16083.6,
|
|
"valid_targets_min": 13226
|
|
},
|
|
{
|
|
"epoch": 4.0554371002132195,
|
|
"grad_norm": 0.1791656232253041,
|
|
"learning_rate": 4.234500130364698e-06,
|
|
"loss": 1.0566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3101106584072113,
|
|
"step": 953,
|
|
"valid_targets_mean": 16129.1,
|
|
"valid_targets_min": 15302
|
|
},
|
|
{
|
|
"epoch": 4.059701492537314,
|
|
"grad_norm": 0.16465711266648786,
|
|
"learning_rate": 4.197992816798851e-06,
|
|
"loss": 0.9986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1662733256816864,
|
|
"step": 954,
|
|
"valid_targets_mean": 10084.4,
|
|
"valid_targets_min": 5271
|
|
},
|
|
{
|
|
"epoch": 4.063965884861407,
|
|
"grad_norm": 0.17209047962238969,
|
|
"learning_rate": 4.161625095611101e-06,
|
|
"loss": 1.0102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2812535762786865,
|
|
"step": 955,
|
|
"valid_targets_mean": 16028.5,
|
|
"valid_targets_min": 14444
|
|
},
|
|
{
|
|
"epoch": 4.068230277185501,
|
|
"grad_norm": 0.18920002579566672,
|
|
"learning_rate": 4.125397288068007e-06,
|
|
"loss": 1.0075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.295272558927536,
|
|
"step": 956,
|
|
"valid_targets_mean": 16037.3,
|
|
"valid_targets_min": 15042
|
|
},
|
|
{
|
|
"epoch": 4.072494669509595,
|
|
"grad_norm": 0.16631336871093422,
|
|
"learning_rate": 4.089309714200187e-06,
|
|
"loss": 1.0173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21547776460647583,
|
|
"step": 957,
|
|
"valid_targets_mean": 12814.3,
|
|
"valid_targets_min": 10793
|
|
},
|
|
{
|
|
"epoch": 4.076759061833688,
|
|
"grad_norm": 0.17195252518997706,
|
|
"learning_rate": 4.0533626927994185e-06,
|
|
"loss": 1.0294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27610135078430176,
|
|
"step": 958,
|
|
"valid_targets_mean": 16213.8,
|
|
"valid_targets_min": 15779
|
|
},
|
|
{
|
|
"epoch": 4.081023454157783,
|
|
"grad_norm": 0.17746150202592195,
|
|
"learning_rate": 4.017556541415888e-06,
|
|
"loss": 1.0215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2968849539756775,
|
|
"step": 959,
|
|
"valid_targets_mean": 16128.7,
|
|
"valid_targets_min": 15225
|
|
},
|
|
{
|
|
"epoch": 4.085287846481877,
|
|
"grad_norm": 0.15732737671341956,
|
|
"learning_rate": 3.981891576355352e-06,
|
|
"loss": 1.0329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23210272192955017,
|
|
"step": 960,
|
|
"valid_targets_mean": 14446.7,
|
|
"valid_targets_min": 12362
|
|
},
|
|
{
|
|
"epoch": 4.08955223880597,
|
|
"grad_norm": 0.1687050929318872,
|
|
"learning_rate": 3.946368112676346e-06,
|
|
"loss": 0.9614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2920669913291931,
|
|
"step": 961,
|
|
"valid_targets_mean": 16109.6,
|
|
"valid_targets_min": 15314
|
|
},
|
|
{
|
|
"epoch": 4.093816631130064,
|
|
"grad_norm": 0.1809155819031798,
|
|
"learning_rate": 3.9109864641874166e-06,
|
|
"loss": 0.9769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19918814301490784,
|
|
"step": 962,
|
|
"valid_targets_mean": 11302.1,
|
|
"valid_targets_min": 1971
|
|
},
|
|
{
|
|
"epoch": 4.098081023454157,
|
|
"grad_norm": 0.1637091957585024,
|
|
"learning_rate": 3.875746943444316e-06,
|
|
"loss": 1.0004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26292359828948975,
|
|
"step": 963,
|
|
"valid_targets_mean": 14906.4,
|
|
"valid_targets_min": 13182
|
|
},
|
|
{
|
|
"epoch": 4.1023454157782515,
|
|
"grad_norm": 0.17110911413629215,
|
|
"learning_rate": 3.840649861747278e-06,
|
|
"loss": 1.0001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29952409863471985,
|
|
"step": 964,
|
|
"valid_targets_mean": 16189.0,
|
|
"valid_targets_min": 15541
|
|
},
|
|
{
|
|
"epoch": 4.106609808102346,
|
|
"grad_norm": 0.17203665064856394,
|
|
"learning_rate": 3.8056955291382667e-06,
|
|
"loss": 1.0416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15105456113815308,
|
|
"step": 965,
|
|
"valid_targets_mean": 8370.9,
|
|
"valid_targets_min": 1779
|
|
},
|
|
{
|
|
"epoch": 4.110874200426439,
|
|
"grad_norm": 0.16543690889160476,
|
|
"learning_rate": 3.7708842543981928e-06,
|
|
"loss": 1.0102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2592537999153137,
|
|
"step": 966,
|
|
"valid_targets_mean": 16096.9,
|
|
"valid_targets_min": 15058
|
|
},
|
|
{
|
|
"epoch": 4.115138592750533,
|
|
"grad_norm": 0.16640196738551852,
|
|
"learning_rate": 3.736216345044237e-06,
|
|
"loss": 0.9855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3046841323375702,
|
|
"step": 967,
|
|
"valid_targets_mean": 16148.6,
|
|
"valid_targets_min": 15180
|
|
},
|
|
{
|
|
"epoch": 4.119402985074627,
|
|
"grad_norm": 0.17170009093669797,
|
|
"learning_rate": 3.7016921073271084e-06,
|
|
"loss": 1.0408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2107902467250824,
|
|
"step": 968,
|
|
"valid_targets_mean": 12434.7,
|
|
"valid_targets_min": 9731
|
|
},
|
|
{
|
|
"epoch": 4.1236673773987205,
|
|
"grad_norm": 0.16001787781461205,
|
|
"learning_rate": 3.6673118462283453e-06,
|
|
"loss": 1.0082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2566899359226227,
|
|
"step": 969,
|
|
"valid_targets_mean": 16190.2,
|
|
"valid_targets_min": 15606
|
|
},
|
|
{
|
|
"epoch": 4.127931769722815,
|
|
"grad_norm": 0.16967689729626861,
|
|
"learning_rate": 3.6330758654576227e-06,
|
|
"loss": 0.9784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2849799692630768,
|
|
"step": 970,
|
|
"valid_targets_mean": 16164.5,
|
|
"valid_targets_min": 14793
|
|
},
|
|
{
|
|
"epoch": 4.132196162046908,
|
|
"grad_norm": 0.15176319024151982,
|
|
"learning_rate": 3.598984467450055e-06,
|
|
"loss": 1.041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24832749366760254,
|
|
"step": 971,
|
|
"valid_targets_mean": 14369.9,
|
|
"valid_targets_min": 12092
|
|
},
|
|
{
|
|
"epoch": 4.136460554371002,
|
|
"grad_norm": 0.16531344462110711,
|
|
"learning_rate": 3.565037953363546e-06,
|
|
"loss": 1.0143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2875286936759949,
|
|
"step": 972,
|
|
"valid_targets_mean": 16104.4,
|
|
"valid_targets_min": 15148
|
|
},
|
|
{
|
|
"epoch": 4.140724946695096,
|
|
"grad_norm": 0.17350623076171356,
|
|
"learning_rate": 3.5312366230761154e-06,
|
|
"loss": 1.0355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23286902904510498,
|
|
"step": 973,
|
|
"valid_targets_mean": 13172.6,
|
|
"valid_targets_min": 3256
|
|
},
|
|
{
|
|
"epoch": 4.144989339019189,
|
|
"grad_norm": 0.15340678177707007,
|
|
"learning_rate": 3.497580775183258e-06,
|
|
"loss": 0.9921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2518599033355713,
|
|
"step": 974,
|
|
"valid_targets_mean": 15601.2,
|
|
"valid_targets_min": 14121
|
|
},
|
|
{
|
|
"epoch": 4.149253731343284,
|
|
"grad_norm": 0.1626701032938541,
|
|
"learning_rate": 3.464070706995295e-06,
|
|
"loss": 1.0218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3254699409008026,
|
|
"step": 975,
|
|
"valid_targets_mean": 16042.2,
|
|
"valid_targets_min": 14897
|
|
},
|
|
{
|
|
"epoch": 4.153518123667378,
|
|
"grad_norm": 0.17186135125890936,
|
|
"learning_rate": 3.4307067145347417e-06,
|
|
"loss": 1.038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1800900399684906,
|
|
"step": 976,
|
|
"valid_targets_mean": 8586.8,
|
|
"valid_targets_min": 1768
|
|
},
|
|
{
|
|
"epoch": 4.157782515991471,
|
|
"grad_norm": 0.1529083398755195,
|
|
"learning_rate": 3.397489092533739e-06,
|
|
"loss": 1.0341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2551673650741577,
|
|
"step": 977,
|
|
"valid_targets_mean": 16131.9,
|
|
"valid_targets_min": 15537
|
|
},
|
|
{
|
|
"epoch": 4.162046908315565,
|
|
"grad_norm": 0.17395760784964165,
|
|
"learning_rate": 3.364418134431371e-06,
|
|
"loss": 1.0481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3169260025024414,
|
|
"step": 978,
|
|
"valid_targets_mean": 16057.0,
|
|
"valid_targets_min": 15080
|
|
},
|
|
{
|
|
"epoch": 4.166311300639659,
|
|
"grad_norm": 0.16875699439080014,
|
|
"learning_rate": 3.331494132371149e-06,
|
|
"loss": 0.9846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18680010735988617,
|
|
"step": 979,
|
|
"valid_targets_mean": 10399.8,
|
|
"valid_targets_min": 5144
|
|
},
|
|
{
|
|
"epoch": 4.1705756929637525,
|
|
"grad_norm": 0.16679936398998382,
|
|
"learning_rate": 3.2987173771983816e-06,
|
|
"loss": 1.0194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26042672991752625,
|
|
"step": 980,
|
|
"valid_targets_mean": 16078.6,
|
|
"valid_targets_min": 14547
|
|
},
|
|
{
|
|
"epoch": 4.174840085287847,
|
|
"grad_norm": 0.17405132531861456,
|
|
"learning_rate": 3.266088158457634e-06,
|
|
"loss": 0.9868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3239675760269165,
|
|
"step": 981,
|
|
"valid_targets_mean": 16150.1,
|
|
"valid_targets_min": 15327
|
|
},
|
|
{
|
|
"epoch": 4.17910447761194,
|
|
"grad_norm": 0.15822002672622312,
|
|
"learning_rate": 3.233606764390147e-06,
|
|
"loss": 1.0079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24741286039352417,
|
|
"step": 982,
|
|
"valid_targets_mean": 14295.7,
|
|
"valid_targets_min": 11896
|
|
},
|
|
{
|
|
"epoch": 4.183368869936034,
|
|
"grad_norm": 0.1579526767136909,
|
|
"learning_rate": 3.2012734819313127e-06,
|
|
"loss": 0.9713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27433016896247864,
|
|
"step": 983,
|
|
"valid_targets_mean": 16193.6,
|
|
"valid_targets_min": 15576
|
|
},
|
|
{
|
|
"epoch": 4.187633262260128,
|
|
"grad_norm": 0.15783691778025702,
|
|
"learning_rate": 3.1690885967081187e-06,
|
|
"loss": 1.0139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3027735948562622,
|
|
"step": 984,
|
|
"valid_targets_mean": 16187.3,
|
|
"valid_targets_min": 15688
|
|
},
|
|
{
|
|
"epoch": 4.1918976545842215,
|
|
"grad_norm": 0.15058203511071058,
|
|
"learning_rate": 3.1370523930366393e-06,
|
|
"loss": 1.0096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23257461190223694,
|
|
"step": 985,
|
|
"valid_targets_mean": 15108.0,
|
|
"valid_targets_min": 13057
|
|
},
|
|
{
|
|
"epoch": 4.196162046908316,
|
|
"grad_norm": 0.17297443496300652,
|
|
"learning_rate": 3.105165153919525e-06,
|
|
"loss": 1.0544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32339954376220703,
|
|
"step": 986,
|
|
"valid_targets_mean": 16057.4,
|
|
"valid_targets_min": 13080
|
|
},
|
|
{
|
|
"epoch": 4.20042643923241,
|
|
"grad_norm": 0.17096423504833785,
|
|
"learning_rate": 3.073427161043492e-06,
|
|
"loss": 1.0446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19717785716056824,
|
|
"step": 987,
|
|
"valid_targets_mean": 10671.0,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 4.204690831556503,
|
|
"grad_norm": 0.15811864850862392,
|
|
"learning_rate": 3.0418386947768463e-06,
|
|
"loss": 1.0348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25622496008872986,
|
|
"step": 988,
|
|
"valid_targets_mean": 15663.9,
|
|
"valid_targets_min": 13837
|
|
},
|
|
{
|
|
"epoch": 4.208955223880597,
|
|
"grad_norm": 0.1704318941483858,
|
|
"learning_rate": 3.01040003416698e-06,
|
|
"loss": 1.0535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3200272023677826,
|
|
"step": 989,
|
|
"valid_targets_mean": 16049.9,
|
|
"valid_targets_min": 14594
|
|
},
|
|
{
|
|
"epoch": 4.21321961620469,
|
|
"grad_norm": 0.16854343341015596,
|
|
"learning_rate": 2.97911145693796e-06,
|
|
"loss": 0.9835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15381957590579987,
|
|
"step": 990,
|
|
"valid_targets_mean": 8766.8,
|
|
"valid_targets_min": 1535
|
|
},
|
|
{
|
|
"epoch": 4.217484008528785,
|
|
"grad_norm": 0.15484972848704895,
|
|
"learning_rate": 2.947973239488009e-06,
|
|
"loss": 1.0194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25665929913520813,
|
|
"step": 991,
|
|
"valid_targets_mean": 16073.6,
|
|
"valid_targets_min": 13538
|
|
},
|
|
{
|
|
"epoch": 4.221748400852879,
|
|
"grad_norm": 0.17861405921548443,
|
|
"learning_rate": 2.91698565688711e-06,
|
|
"loss": 1.0424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3063114285469055,
|
|
"step": 992,
|
|
"valid_targets_mean": 16132.2,
|
|
"valid_targets_min": 15545
|
|
},
|
|
{
|
|
"epoch": 4.226012793176972,
|
|
"grad_norm": 0.16031459231248926,
|
|
"learning_rate": 2.886148982874566e-06,
|
|
"loss": 0.9808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.185684472322464,
|
|
"step": 993,
|
|
"valid_targets_mean": 12287.9,
|
|
"valid_targets_min": 10321
|
|
},
|
|
{
|
|
"epoch": 4.230277185501066,
|
|
"grad_norm": 0.16045974590643913,
|
|
"learning_rate": 2.8554634898565668e-06,
|
|
"loss": 1.0018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2779512405395508,
|
|
"step": 994,
|
|
"valid_targets_mean": 16178.9,
|
|
"valid_targets_min": 15636
|
|
},
|
|
{
|
|
"epoch": 4.23454157782516,
|
|
"grad_norm": 0.1778944082901531,
|
|
"learning_rate": 2.824929448903806e-06,
|
|
"loss": 0.9823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2869250774383545,
|
|
"step": 995,
|
|
"valid_targets_mean": 16156.2,
|
|
"valid_targets_min": 14699
|
|
},
|
|
{
|
|
"epoch": 4.2388059701492535,
|
|
"grad_norm": 0.1493336206027735,
|
|
"learning_rate": 2.794547129749059e-06,
|
|
"loss": 0.9895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23386715352535248,
|
|
"step": 996,
|
|
"valid_targets_mean": 14443.9,
|
|
"valid_targets_min": 12790
|
|
},
|
|
{
|
|
"epoch": 4.243070362473348,
|
|
"grad_norm": 0.16220313233911257,
|
|
"learning_rate": 2.7643168007848255e-06,
|
|
"loss": 0.9708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2724795937538147,
|
|
"step": 997,
|
|
"valid_targets_mean": 16089.3,
|
|
"valid_targets_min": 15042
|
|
},
|
|
{
|
|
"epoch": 4.247334754797441,
|
|
"grad_norm": 0.16170772060137895,
|
|
"learning_rate": 2.734238729060956e-06,
|
|
"loss": 1.0114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20901989936828613,
|
|
"step": 998,
|
|
"valid_targets_mean": 13288.2,
|
|
"valid_targets_min": 1630
|
|
},
|
|
{
|
|
"epoch": 4.251599147121535,
|
|
"grad_norm": 0.15830204817167906,
|
|
"learning_rate": 2.7043131802822653e-06,
|
|
"loss": 1.0119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2576751112937927,
|
|
"step": 999,
|
|
"valid_targets_mean": 15930.2,
|
|
"valid_targets_min": 14795
|
|
},
|
|
{
|
|
"epoch": 4.255863539445629,
|
|
"grad_norm": 0.1867642856332476,
|
|
"learning_rate": 2.674540418806222e-06,
|
|
"loss": 1.0501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30706915259361267,
|
|
"step": 1000,
|
|
"valid_targets_mean": 16080.7,
|
|
"valid_targets_min": 14917
|
|
},
|
|
{
|
|
"epoch": 4.2601279317697225,
|
|
"grad_norm": 0.17186096561203892,
|
|
"learning_rate": 2.6449207076405857e-06,
|
|
"loss": 1.0333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18668824434280396,
|
|
"step": 1001,
|
|
"valid_targets_mean": 9100.4,
|
|
"valid_targets_min": 1545
|
|
},
|
|
{
|
|
"epoch": 4.264392324093817,
|
|
"grad_norm": 0.15607267361455907,
|
|
"learning_rate": 2.6154543084411035e-06,
|
|
"loss": 0.9947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23665571212768555,
|
|
"step": 1002,
|
|
"valid_targets_mean": 16176.8,
|
|
"valid_targets_min": 15348
|
|
},
|
|
{
|
|
"epoch": 4.268656716417911,
|
|
"grad_norm": 0.16745332192918422,
|
|
"learning_rate": 2.5861414815091834e-06,
|
|
"loss": 1.0021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2971222400665283,
|
|
"step": 1003,
|
|
"valid_targets_mean": 16140.2,
|
|
"valid_targets_min": 14907
|
|
},
|
|
{
|
|
"epoch": 4.272921108742004,
|
|
"grad_norm": 0.1536915120933892,
|
|
"learning_rate": 2.5569824857895987e-06,
|
|
"loss": 0.9776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15721696615219116,
|
|
"step": 1004,
|
|
"valid_targets_mean": 10518.3,
|
|
"valid_targets_min": 5601
|
|
},
|
|
{
|
|
"epoch": 4.277185501066098,
|
|
"grad_norm": 0.15329230166214117,
|
|
"learning_rate": 2.5279775788682083e-06,
|
|
"loss": 0.9857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25782108306884766,
|
|
"step": 1005,
|
|
"valid_targets_mean": 16179.2,
|
|
"valid_targets_min": 15505
|
|
},
|
|
{
|
|
"epoch": 4.281449893390192,
|
|
"grad_norm": 0.16258821898989245,
|
|
"learning_rate": 2.499127016969671e-06,
|
|
"loss": 1.0034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31064146757125854,
|
|
"step": 1006,
|
|
"valid_targets_mean": 16071.4,
|
|
"valid_targets_min": 14103
|
|
},
|
|
{
|
|
"epoch": 4.285714285714286,
|
|
"grad_norm": 0.15808292683731548,
|
|
"learning_rate": 2.4704310549551934e-06,
|
|
"loss": 1.0443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2495027780532837,
|
|
"step": 1007,
|
|
"valid_targets_mean": 15003.7,
|
|
"valid_targets_min": 12787
|
|
},
|
|
{
|
|
"epoch": 4.28997867803838,
|
|
"grad_norm": 0.1605732731969054,
|
|
"learning_rate": 2.441889946320266e-06,
|
|
"loss": 0.9922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2703011929988861,
|
|
"step": 1008,
|
|
"valid_targets_mean": 16140.7,
|
|
"valid_targets_min": 15389
|
|
},
|
|
{
|
|
"epoch": 4.294243070362473,
|
|
"grad_norm": 0.16814631204029493,
|
|
"learning_rate": 2.4135039431924233e-06,
|
|
"loss": 1.0159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3285123109817505,
|
|
"step": 1009,
|
|
"valid_targets_mean": 16154.0,
|
|
"valid_targets_min": 15556
|
|
},
|
|
{
|
|
"epoch": 4.298507462686567,
|
|
"grad_norm": 0.15002691688599254,
|
|
"learning_rate": 2.3852732963290426e-06,
|
|
"loss": 0.9767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2269066572189331,
|
|
"step": 1010,
|
|
"valid_targets_mean": 15455.0,
|
|
"valid_targets_min": 13975
|
|
},
|
|
{
|
|
"epoch": 4.302771855010661,
|
|
"grad_norm": 0.17298389903055245,
|
|
"learning_rate": 2.3571982551150853e-06,
|
|
"loss": 1.0086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29187387228012085,
|
|
"step": 1011,
|
|
"valid_targets_mean": 16222.7,
|
|
"valid_targets_min": 15806
|
|
},
|
|
{
|
|
"epoch": 4.3070362473347545,
|
|
"grad_norm": 0.16594472638041377,
|
|
"learning_rate": 2.329279067560937e-06,
|
|
"loss": 1.018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19952982664108276,
|
|
"step": 1012,
|
|
"valid_targets_mean": 11263.8,
|
|
"valid_targets_min": 2852
|
|
},
|
|
{
|
|
"epoch": 4.311300639658849,
|
|
"grad_norm": 0.14878057763580033,
|
|
"learning_rate": 2.301515980300182e-06,
|
|
"loss": 0.9703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24623236060142517,
|
|
"step": 1013,
|
|
"valid_targets_mean": 15904.6,
|
|
"valid_targets_min": 14269
|
|
},
|
|
{
|
|
"epoch": 4.315565031982943,
|
|
"grad_norm": 0.1664120989672971,
|
|
"learning_rate": 2.2739092385874527e-06,
|
|
"loss": 1.0225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3133246898651123,
|
|
"step": 1014,
|
|
"valid_targets_mean": 15890.2,
|
|
"valid_targets_min": 13080
|
|
},
|
|
{
|
|
"epoch": 4.319829424307036,
|
|
"grad_norm": 0.1679412224909653,
|
|
"learning_rate": 2.2464590862962443e-06,
|
|
"loss": 1.0714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.157842218875885,
|
|
"step": 1015,
|
|
"valid_targets_mean": 9874.5,
|
|
"valid_targets_min": 1763
|
|
},
|
|
{
|
|
"epoch": 4.32409381663113,
|
|
"grad_norm": 0.15170182866254592,
|
|
"learning_rate": 2.219165765916769e-06,
|
|
"loss": 0.9763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2510037124156952,
|
|
"step": 1016,
|
|
"valid_targets_mean": 16141.7,
|
|
"valid_targets_min": 15225
|
|
},
|
|
{
|
|
"epoch": 4.3283582089552235,
|
|
"grad_norm": 0.1566256747824191,
|
|
"learning_rate": 2.192029518553798e-06,
|
|
"loss": 1.0175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27299946546554565,
|
|
"step": 1017,
|
|
"valid_targets_mean": 16163.5,
|
|
"valid_targets_min": 15023
|
|
},
|
|
{
|
|
"epoch": 4.332622601279318,
|
|
"grad_norm": 0.16085658004618664,
|
|
"learning_rate": 2.165050583924566e-06,
|
|
"loss": 1.0073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17534524202346802,
|
|
"step": 1018,
|
|
"valid_targets_mean": 10685.0,
|
|
"valid_targets_min": 8089
|
|
},
|
|
{
|
|
"epoch": 4.336886993603412,
|
|
"grad_norm": 0.15338849547135697,
|
|
"learning_rate": 2.1382292003566163e-06,
|
|
"loss": 0.9786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2618844509124756,
|
|
"step": 1019,
|
|
"valid_targets_mean": 16124.3,
|
|
"valid_targets_min": 14958
|
|
},
|
|
{
|
|
"epoch": 4.341151385927505,
|
|
"grad_norm": 0.17116869671671522,
|
|
"learning_rate": 2.1115656047857213e-06,
|
|
"loss": 1.0341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3112441301345825,
|
|
"step": 1020,
|
|
"valid_targets_mean": 16112.7,
|
|
"valid_targets_min": 14259
|
|
},
|
|
{
|
|
"epoch": 4.345415778251599,
|
|
"grad_norm": 0.150789433772488,
|
|
"learning_rate": 2.0850600327537806e-06,
|
|
"loss": 1.0104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22541023790836334,
|
|
"step": 1021,
|
|
"valid_targets_mean": 14805.6,
|
|
"valid_targets_min": 12847
|
|
},
|
|
{
|
|
"epoch": 4.349680170575693,
|
|
"grad_norm": 0.16306860494702535,
|
|
"learning_rate": 2.058712718406719e-06,
|
|
"loss": 0.9824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2656344473361969,
|
|
"step": 1022,
|
|
"valid_targets_mean": 16220.5,
|
|
"valid_targets_min": 15492
|
|
},
|
|
{
|
|
"epoch": 4.353944562899787,
|
|
"grad_norm": 0.15423342533630213,
|
|
"learning_rate": 2.032523894492471e-06,
|
|
"loss": 0.9938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22228878736495972,
|
|
"step": 1023,
|
|
"valid_targets_mean": 13809.1,
|
|
"valid_targets_min": 1400
|
|
},
|
|
{
|
|
"epoch": 4.358208955223881,
|
|
"grad_norm": 0.14751574771293288,
|
|
"learning_rate": 2.0064937923588634e-06,
|
|
"loss": 0.9891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24881970882415771,
|
|
"step": 1024,
|
|
"valid_targets_mean": 15233.9,
|
|
"valid_targets_min": 13301
|
|
},
|
|
{
|
|
"epoch": 4.362473347547974,
|
|
"grad_norm": 0.16974394234227128,
|
|
"learning_rate": 1.9806226419516195e-06,
|
|
"loss": 1.0345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3334333002567291,
|
|
"step": 1025,
|
|
"valid_targets_mean": 16119.5,
|
|
"valid_targets_min": 15337
|
|
},
|
|
{
|
|
"epoch": 4.366737739872068,
|
|
"grad_norm": 0.15804196601559314,
|
|
"learning_rate": 1.954910671812298e-06,
|
|
"loss": 0.9604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16841614246368408,
|
|
"step": 1026,
|
|
"valid_targets_mean": 10671.9,
|
|
"valid_targets_min": 1922
|
|
},
|
|
{
|
|
"epoch": 4.371002132196162,
|
|
"grad_norm": 0.15478484273028714,
|
|
"learning_rate": 1.9293581090762894e-06,
|
|
"loss": 0.9773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25780200958251953,
|
|
"step": 1027,
|
|
"valid_targets_mean": 16111.6,
|
|
"valid_targets_min": 15024
|
|
},
|
|
{
|
|
"epoch": 4.3752665245202556,
|
|
"grad_norm": 0.16077698496779474,
|
|
"learning_rate": 1.9039651794708058e-06,
|
|
"loss": 1.0045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3041727542877197,
|
|
"step": 1028,
|
|
"valid_targets_mean": 16166.4,
|
|
"valid_targets_min": 15153
|
|
},
|
|
{
|
|
"epoch": 4.37953091684435,
|
|
"grad_norm": 0.15765458616190584,
|
|
"learning_rate": 1.8787321073128817e-06,
|
|
"loss": 1.0002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16751885414123535,
|
|
"step": 1029,
|
|
"valid_targets_mean": 10813.5,
|
|
"valid_targets_min": 7229
|
|
},
|
|
{
|
|
"epoch": 4.383795309168444,
|
|
"grad_norm": 0.15327668842291345,
|
|
"learning_rate": 1.8536591155073958e-06,
|
|
"loss": 1.035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26276057958602905,
|
|
"step": 1030,
|
|
"valid_targets_mean": 16132.2,
|
|
"valid_targets_min": 13998
|
|
},
|
|
{
|
|
"epoch": 4.388059701492537,
|
|
"grad_norm": 0.16348802349145783,
|
|
"learning_rate": 1.8287464255451181e-06,
|
|
"loss": 1.0365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31952422857284546,
|
|
"step": 1031,
|
|
"valid_targets_mean": 16103.5,
|
|
"valid_targets_min": 15221
|
|
},
|
|
{
|
|
"epoch": 4.392324093816631,
|
|
"grad_norm": 0.15242318786548045,
|
|
"learning_rate": 1.803994257500714e-06,
|
|
"loss": 1.0578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22637370228767395,
|
|
"step": 1032,
|
|
"valid_targets_mean": 12657.4,
|
|
"valid_targets_min": 10184
|
|
},
|
|
{
|
|
"epoch": 4.396588486140725,
|
|
"grad_norm": 0.1570367933427492,
|
|
"learning_rate": 1.7794028300308474e-06,
|
|
"loss": 0.97,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2758348286151886,
|
|
"step": 1033,
|
|
"valid_targets_mean": 16116.2,
|
|
"valid_targets_min": 14473
|
|
},
|
|
{
|
|
"epoch": 4.400852878464819,
|
|
"grad_norm": 0.15577642436453135,
|
|
"learning_rate": 1.7549723603722003e-06,
|
|
"loss": 0.9842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26771968603134155,
|
|
"step": 1034,
|
|
"valid_targets_mean": 16112.7,
|
|
"valid_targets_min": 13108
|
|
},
|
|
{
|
|
"epoch": 4.405117270788913,
|
|
"grad_norm": 0.14800215530279717,
|
|
"learning_rate": 1.730703064339605e-06,
|
|
"loss": 1.0185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2370331585407257,
|
|
"step": 1035,
|
|
"valid_targets_mean": 15234.6,
|
|
"valid_targets_min": 13039
|
|
},
|
|
{
|
|
"epoch": 4.409381663113006,
|
|
"grad_norm": 0.16303545524154645,
|
|
"learning_rate": 1.7065951563241022e-06,
|
|
"loss": 1.0129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2689518332481384,
|
|
"step": 1036,
|
|
"valid_targets_mean": 16181.4,
|
|
"valid_targets_min": 15559
|
|
},
|
|
{
|
|
"epoch": 4.4136460554371,
|
|
"grad_norm": 0.15966327249920928,
|
|
"learning_rate": 1.682648849291051e-06,
|
|
"loss": 1.0179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20377540588378906,
|
|
"step": 1037,
|
|
"valid_targets_mean": 11859.0,
|
|
"valid_targets_min": 1729
|
|
},
|
|
{
|
|
"epoch": 4.417910447761194,
|
|
"grad_norm": 0.14769832114251558,
|
|
"learning_rate": 1.6588643547782579e-06,
|
|
"loss": 0.9838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2632647454738617,
|
|
"step": 1038,
|
|
"valid_targets_mean": 16072.0,
|
|
"valid_targets_min": 14287
|
|
},
|
|
{
|
|
"epoch": 4.422174840085288,
|
|
"grad_norm": 0.15809775822892966,
|
|
"learning_rate": 1.6352418828941052e-06,
|
|
"loss": 1.021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3000580370426178,
|
|
"step": 1039,
|
|
"valid_targets_mean": 16157.0,
|
|
"valid_targets_min": 15147
|
|
},
|
|
{
|
|
"epoch": 4.426439232409382,
|
|
"grad_norm": 0.16399847192577693,
|
|
"learning_rate": 1.6117816423156952e-06,
|
|
"loss": 1.0063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15796786546707153,
|
|
"step": 1040,
|
|
"valid_targets_mean": 8889.2,
|
|
"valid_targets_min": 2228
|
|
},
|
|
{
|
|
"epoch": 4.430703624733475,
|
|
"grad_norm": 0.14611658166081745,
|
|
"learning_rate": 1.5884838402870029e-06,
|
|
"loss": 0.9856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24859371781349182,
|
|
"step": 1041,
|
|
"valid_targets_mean": 15932.7,
|
|
"valid_targets_min": 9286
|
|
},
|
|
{
|
|
"epoch": 4.434968017057569,
|
|
"grad_norm": 0.16102392580702096,
|
|
"learning_rate": 1.5653486826170384e-06,
|
|
"loss": 1.0101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2845401465892792,
|
|
"step": 1042,
|
|
"valid_targets_mean": 16150.2,
|
|
"valid_targets_min": 14556
|
|
},
|
|
{
|
|
"epoch": 4.439232409381663,
|
|
"grad_norm": 0.1477293053720464,
|
|
"learning_rate": 1.5423763736780583e-06,
|
|
"loss": 0.9726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19538019597530365,
|
|
"step": 1043,
|
|
"valid_targets_mean": 11303.3,
|
|
"valid_targets_min": 8757
|
|
},
|
|
{
|
|
"epoch": 4.443496801705757,
|
|
"grad_norm": 0.16104094252331272,
|
|
"learning_rate": 1.5195671164037173e-06,
|
|
"loss": 1.0075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26239079236984253,
|
|
"step": 1044,
|
|
"valid_targets_mean": 16184.0,
|
|
"valid_targets_min": 15290
|
|
},
|
|
{
|
|
"epoch": 4.447761194029851,
|
|
"grad_norm": 0.1576181987064525,
|
|
"learning_rate": 1.496921112287315e-06,
|
|
"loss": 1.0156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28871822357177734,
|
|
"step": 1045,
|
|
"valid_targets_mean": 16116.8,
|
|
"valid_targets_min": 14479
|
|
},
|
|
{
|
|
"epoch": 4.452025586353945,
|
|
"grad_norm": 0.1514559329091864,
|
|
"learning_rate": 1.4744385613799894e-06,
|
|
"loss": 0.9911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23139512538909912,
|
|
"step": 1046,
|
|
"valid_targets_mean": 15366.7,
|
|
"valid_targets_min": 14043
|
|
},
|
|
{
|
|
"epoch": 4.456289978678038,
|
|
"grad_norm": 0.1544327160620694,
|
|
"learning_rate": 1.4521196622889644e-06,
|
|
"loss": 1.0174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28524869680404663,
|
|
"step": 1047,
|
|
"valid_targets_mean": 16143.6,
|
|
"valid_targets_min": 15408
|
|
},
|
|
{
|
|
"epoch": 4.460554371002132,
|
|
"grad_norm": 0.1539747406488716,
|
|
"learning_rate": 1.4299646121757892e-06,
|
|
"loss": 0.9533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21494226157665253,
|
|
"step": 1048,
|
|
"valid_targets_mean": 12775.2,
|
|
"valid_targets_min": 1665
|
|
},
|
|
{
|
|
"epoch": 4.464818763326226,
|
|
"grad_norm": 0.14883048075606736,
|
|
"learning_rate": 1.4079736067545912e-06,
|
|
"loss": 1.0303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26429611444473267,
|
|
"step": 1049,
|
|
"valid_targets_mean": 15916.8,
|
|
"valid_targets_min": 13208
|
|
},
|
|
{
|
|
"epoch": 4.46908315565032,
|
|
"grad_norm": 0.15749999395713188,
|
|
"learning_rate": 1.3861468402903634e-06,
|
|
"loss": 1.0528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3019416332244873,
|
|
"step": 1050,
|
|
"valid_targets_mean": 15767.2,
|
|
"valid_targets_min": 7641
|
|
},
|
|
{
|
|
"epoch": 4.473347547974414,
|
|
"grad_norm": 0.15992625620668152,
|
|
"learning_rate": 1.3644845055972322e-06,
|
|
"loss": 1.0067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1712096482515335,
|
|
"step": 1051,
|
|
"valid_targets_mean": 9511.8,
|
|
"valid_targets_min": 1923
|
|
},
|
|
{
|
|
"epoch": 4.477611940298507,
|
|
"grad_norm": 0.14959621631803036,
|
|
"learning_rate": 1.3429867940367626e-06,
|
|
"loss": 1.0094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25053784251213074,
|
|
"step": 1052,
|
|
"valid_targets_mean": 16088.7,
|
|
"valid_targets_min": 14698
|
|
},
|
|
{
|
|
"epoch": 4.481876332622601,
|
|
"grad_norm": 0.15615095263392972,
|
|
"learning_rate": 1.321653895516264e-06,
|
|
"loss": 1.0581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3083847761154175,
|
|
"step": 1053,
|
|
"valid_targets_mean": 16089.3,
|
|
"valid_targets_min": 14313
|
|
},
|
|
{
|
|
"epoch": 4.486140724946695,
|
|
"grad_norm": 0.15470075468336017,
|
|
"learning_rate": 1.3004859984871199e-06,
|
|
"loss": 1.0584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1911090612411499,
|
|
"step": 1054,
|
|
"valid_targets_mean": 10396.3,
|
|
"valid_targets_min": 6721
|
|
},
|
|
{
|
|
"epoch": 4.490405117270789,
|
|
"grad_norm": 0.15536045395181527,
|
|
"learning_rate": 1.279483289943102e-06,
|
|
"loss": 1.0568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26203101873397827,
|
|
"step": 1055,
|
|
"valid_targets_mean": 16159.7,
|
|
"valid_targets_min": 15182
|
|
},
|
|
{
|
|
"epoch": 4.494669509594883,
|
|
"grad_norm": 0.1601967574271805,
|
|
"learning_rate": 1.2586459554187558e-06,
|
|
"loss": 0.9644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.287068247795105,
|
|
"step": 1056,
|
|
"valid_targets_mean": 16055.6,
|
|
"valid_targets_min": 11563
|
|
},
|
|
{
|
|
"epoch": 4.498933901918977,
|
|
"grad_norm": 0.1441967479889594,
|
|
"learning_rate": 1.2379741789877175e-06,
|
|
"loss": 0.9872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.211162269115448,
|
|
"step": 1057,
|
|
"valid_targets_mean": 13391.1,
|
|
"valid_targets_min": 11725
|
|
},
|
|
{
|
|
"epoch": 4.50319829424307,
|
|
"grad_norm": 0.15205725282962812,
|
|
"learning_rate": 1.2174681432611245e-06,
|
|
"loss": 1.0221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3103165030479431,
|
|
"step": 1058,
|
|
"valid_targets_mean": 16118.4,
|
|
"valid_targets_min": 15359
|
|
},
|
|
{
|
|
"epoch": 4.507462686567164,
|
|
"grad_norm": 0.16376572486838228,
|
|
"learning_rate": 1.1971280293859811e-06,
|
|
"loss": 1.0478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29465800523757935,
|
|
"step": 1059,
|
|
"valid_targets_mean": 16146.3,
|
|
"valid_targets_min": 15305
|
|
},
|
|
{
|
|
"epoch": 4.5117270788912585,
|
|
"grad_norm": 0.14402907076972832,
|
|
"learning_rate": 1.17695401704357e-06,
|
|
"loss": 1.0072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25192785263061523,
|
|
"step": 1060,
|
|
"valid_targets_mean": 15245.5,
|
|
"valid_targets_min": 13728
|
|
},
|
|
{
|
|
"epoch": 4.515991471215352,
|
|
"grad_norm": 0.16432331284789337,
|
|
"learning_rate": 1.1569462844478552e-06,
|
|
"loss": 0.9562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26456835865974426,
|
|
"step": 1061,
|
|
"valid_targets_mean": 16195.4,
|
|
"valid_targets_min": 15305
|
|
},
|
|
{
|
|
"epoch": 4.520255863539446,
|
|
"grad_norm": 0.16065398401619552,
|
|
"learning_rate": 1.1371050083439107e-06,
|
|
"loss": 1.0315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19990354776382446,
|
|
"step": 1062,
|
|
"valid_targets_mean": 10561.4,
|
|
"valid_targets_min": 1369
|
|
},
|
|
{
|
|
"epoch": 4.524520255863539,
|
|
"grad_norm": 0.14646982443053014,
|
|
"learning_rate": 1.1174303640063622e-06,
|
|
"loss": 1.0269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2594957947731018,
|
|
"step": 1063,
|
|
"valid_targets_mean": 16008.1,
|
|
"valid_targets_min": 15016
|
|
},
|
|
{
|
|
"epoch": 4.528784648187633,
|
|
"grad_norm": 0.15701583127972654,
|
|
"learning_rate": 1.097922525237849e-06,
|
|
"loss": 0.9584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27362123131752014,
|
|
"step": 1064,
|
|
"valid_targets_mean": 16250.7,
|
|
"valid_targets_min": 15657
|
|
},
|
|
{
|
|
"epoch": 4.533049040511727,
|
|
"grad_norm": 0.15381529886819967,
|
|
"learning_rate": 1.078581664367455e-06,
|
|
"loss": 1.0058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17687851190567017,
|
|
"step": 1065,
|
|
"valid_targets_mean": 9856.1,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 4.537313432835821,
|
|
"grad_norm": 0.14443424313899902,
|
|
"learning_rate": 1.0594079522492274e-06,
|
|
"loss": 0.9305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2510538101196289,
|
|
"step": 1066,
|
|
"valid_targets_mean": 16141.2,
|
|
"valid_targets_min": 14841
|
|
},
|
|
{
|
|
"epoch": 4.541577825159915,
|
|
"grad_norm": 0.15747376640028649,
|
|
"learning_rate": 1.040401558260633e-06,
|
|
"loss": 1.0514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29428815841674805,
|
|
"step": 1067,
|
|
"valid_targets_mean": 16152.6,
|
|
"valid_targets_min": 15628
|
|
},
|
|
{
|
|
"epoch": 4.545842217484008,
|
|
"grad_norm": 0.1613989326737328,
|
|
"learning_rate": 1.0215626503010911e-06,
|
|
"loss": 1.032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21291378140449524,
|
|
"step": 1068,
|
|
"valid_targets_mean": 12159.6,
|
|
"valid_targets_min": 10098
|
|
},
|
|
{
|
|
"epoch": 4.550106609808102,
|
|
"grad_norm": 0.14544194922176812,
|
|
"learning_rate": 1.002891394790475e-06,
|
|
"loss": 1.0014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27118825912475586,
|
|
"step": 1069,
|
|
"valid_targets_mean": 16114.3,
|
|
"valid_targets_min": 15154
|
|
},
|
|
{
|
|
"epoch": 4.554371002132196,
|
|
"grad_norm": 0.15391142328365776,
|
|
"learning_rate": 9.843879566676273e-07,
|
|
"loss": 1.005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2847220003604889,
|
|
"step": 1070,
|
|
"valid_targets_mean": 16146.2,
|
|
"valid_targets_min": 15081
|
|
},
|
|
{
|
|
"epoch": 4.55863539445629,
|
|
"grad_norm": 0.14876989839842653,
|
|
"learning_rate": 9.660524993889386e-07,
|
|
"loss": 1.0106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22060608863830566,
|
|
"step": 1071,
|
|
"valid_targets_mean": 14322.9,
|
|
"valid_targets_min": 12453
|
|
},
|
|
{
|
|
"epoch": 4.562899786780384,
|
|
"grad_norm": 0.15028057509574336,
|
|
"learning_rate": 9.478851849268733e-07,
|
|
"loss": 0.9695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26452404260635376,
|
|
"step": 1072,
|
|
"valid_targets_mean": 16183.2,
|
|
"valid_targets_min": 15361
|
|
},
|
|
{
|
|
"epoch": 4.567164179104478,
|
|
"grad_norm": 0.15781811933848672,
|
|
"learning_rate": 9.298861737685527e-07,
|
|
"loss": 0.9916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2241704761981964,
|
|
"step": 1073,
|
|
"valid_targets_mean": 13273.8,
|
|
"valid_targets_min": 1961
|
|
},
|
|
{
|
|
"epoch": 4.571428571428571,
|
|
"grad_norm": 0.14766354955730807,
|
|
"learning_rate": 9.120556249143341e-07,
|
|
"loss": 1.0185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25623926520347595,
|
|
"step": 1074,
|
|
"valid_targets_mean": 15460.6,
|
|
"valid_targets_min": 13041
|
|
},
|
|
{
|
|
"epoch": 4.575692963752665,
|
|
"grad_norm": 0.15939438874130157,
|
|
"learning_rate": 8.943936958763988e-07,
|
|
"loss": 1.0483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3335661292076111,
|
|
"step": 1075,
|
|
"valid_targets_mean": 16153.5,
|
|
"valid_targets_min": 15474
|
|
},
|
|
{
|
|
"epoch": 4.5799573560767595,
|
|
"grad_norm": 0.16581608055696434,
|
|
"learning_rate": 8.769005426773836e-07,
|
|
"loss": 1.0092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1556980311870575,
|
|
"step": 1076,
|
|
"valid_targets_mean": 8296.6,
|
|
"valid_targets_min": 1759
|
|
},
|
|
{
|
|
"epoch": 4.584221748400853,
|
|
"grad_norm": 0.14417940194847872,
|
|
"learning_rate": 8.595763198489714e-07,
|
|
"loss": 0.9909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2521516680717468,
|
|
"step": 1077,
|
|
"valid_targets_mean": 15818.3,
|
|
"valid_targets_min": 7637
|
|
},
|
|
{
|
|
"epoch": 4.588486140724947,
|
|
"grad_norm": 0.15714196727427426,
|
|
"learning_rate": 8.42421180430546e-07,
|
|
"loss": 1.0011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29390835762023926,
|
|
"step": 1078,
|
|
"valid_targets_mean": 16082.0,
|
|
"valid_targets_min": 14558
|
|
},
|
|
{
|
|
"epoch": 4.59275053304904,
|
|
"grad_norm": 0.15621193374665054,
|
|
"learning_rate": 8.254352759678386e-07,
|
|
"loss": 1.0095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19507339596748352,
|
|
"step": 1079,
|
|
"valid_targets_mean": 10988.8,
|
|
"valid_targets_min": 7524
|
|
},
|
|
{
|
|
"epoch": 4.597014925373134,
|
|
"grad_norm": 0.15080103999037953,
|
|
"learning_rate": 8.086187565115877e-07,
|
|
"loss": 1.0579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27117210626602173,
|
|
"step": 1080,
|
|
"valid_targets_mean": 16105.0,
|
|
"valid_targets_min": 14248
|
|
},
|
|
{
|
|
"epoch": 4.601279317697228,
|
|
"grad_norm": 0.15637589604321644,
|
|
"learning_rate": 7.919717706162067e-07,
|
|
"loss": 1.0249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28904783725738525,
|
|
"step": 1081,
|
|
"valid_targets_mean": 16170.6,
|
|
"valid_targets_min": 15348
|
|
},
|
|
{
|
|
"epoch": 4.605543710021322,
|
|
"grad_norm": 0.1497637307594254,
|
|
"learning_rate": 7.754944653384777e-07,
|
|
"loss": 0.9961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2104974389076233,
|
|
"step": 1082,
|
|
"valid_targets_mean": 13349.8,
|
|
"valid_targets_min": 10099
|
|
},
|
|
{
|
|
"epoch": 4.609808102345416,
|
|
"grad_norm": 0.15800987460369717,
|
|
"learning_rate": 7.591869862362534e-07,
|
|
"loss": 1.0077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26623108983039856,
|
|
"step": 1083,
|
|
"valid_targets_mean": 16033.9,
|
|
"valid_targets_min": 13427
|
|
},
|
|
{
|
|
"epoch": 4.61407249466951,
|
|
"grad_norm": 0.1531642828562778,
|
|
"learning_rate": 7.430494773671682e-07,
|
|
"loss": 0.9676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3077933192253113,
|
|
"step": 1084,
|
|
"valid_targets_mean": 16172.6,
|
|
"valid_targets_min": 15109
|
|
},
|
|
{
|
|
"epoch": 4.618336886993603,
|
|
"grad_norm": 0.16404055431288472,
|
|
"learning_rate": 7.270820812873714e-07,
|
|
"loss": 1.0082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25060850381851196,
|
|
"step": 1085,
|
|
"valid_targets_mean": 14830.2,
|
|
"valid_targets_min": 12981
|
|
},
|
|
{
|
|
"epoch": 4.622601279317697,
|
|
"grad_norm": 0.15289769966609285,
|
|
"learning_rate": 7.112849390502563e-07,
|
|
"loss": 0.9816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2679944932460785,
|
|
"step": 1086,
|
|
"valid_targets_mean": 16209.5,
|
|
"valid_targets_min": 15350
|
|
},
|
|
{
|
|
"epoch": 4.6268656716417915,
|
|
"grad_norm": 0.15369614819749217,
|
|
"learning_rate": 6.956581902052306e-07,
|
|
"loss": 1.0225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20471858978271484,
|
|
"step": 1087,
|
|
"valid_targets_mean": 11744.4,
|
|
"valid_targets_min": 2102
|
|
},
|
|
{
|
|
"epoch": 4.631130063965885,
|
|
"grad_norm": 0.14586590402152805,
|
|
"learning_rate": 6.802019727964593e-07,
|
|
"loss": 0.9888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23928365111351013,
|
|
"step": 1088,
|
|
"valid_targets_mean": 16153.6,
|
|
"valid_targets_min": 15511
|
|
},
|
|
{
|
|
"epoch": 4.635394456289979,
|
|
"grad_norm": 0.16034697491320948,
|
|
"learning_rate": 6.64916423361679e-07,
|
|
"loss": 1.018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.293688029050827,
|
|
"step": 1089,
|
|
"valid_targets_mean": 16096.2,
|
|
"valid_targets_min": 15122
|
|
},
|
|
{
|
|
"epoch": 4.639658848614072,
|
|
"grad_norm": 0.16092679284373484,
|
|
"learning_rate": 6.498016769309567e-07,
|
|
"loss": 1.0082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1474006474018097,
|
|
"step": 1090,
|
|
"valid_targets_mean": 8071.3,
|
|
"valid_targets_min": 2493
|
|
},
|
|
{
|
|
"epoch": 4.643923240938166,
|
|
"grad_norm": 0.1374452974138826,
|
|
"learning_rate": 6.348578670255224e-07,
|
|
"loss": 1.0004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24160240590572357,
|
|
"step": 1091,
|
|
"valid_targets_mean": 16186.2,
|
|
"valid_targets_min": 15270
|
|
},
|
|
{
|
|
"epoch": 4.6481876332622605,
|
|
"grad_norm": 0.14990534345753154,
|
|
"learning_rate": 6.200851256565799e-07,
|
|
"loss": 1.0026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2906924784183502,
|
|
"step": 1092,
|
|
"valid_targets_mean": 15866.9,
|
|
"valid_targets_min": 5993
|
|
},
|
|
{
|
|
"epoch": 4.652452025586354,
|
|
"grad_norm": 0.15052030368326985,
|
|
"learning_rate": 6.054835833241357e-07,
|
|
"loss": 0.9658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2022162675857544,
|
|
"step": 1093,
|
|
"valid_targets_mean": 13206.6,
|
|
"valid_targets_min": 10096
|
|
},
|
|
{
|
|
"epoch": 4.656716417910448,
|
|
"grad_norm": 0.14661811747825992,
|
|
"learning_rate": 5.910533690158593e-07,
|
|
"loss": 1.0125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2569063901901245,
|
|
"step": 1094,
|
|
"valid_targets_mean": 16232.6,
|
|
"valid_targets_min": 15734
|
|
},
|
|
{
|
|
"epoch": 4.660980810234541,
|
|
"grad_norm": 0.14959521605839796,
|
|
"learning_rate": 5.767946102059307e-07,
|
|
"loss": 0.9861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2763957977294922,
|
|
"step": 1095,
|
|
"valid_targets_mean": 16205.2,
|
|
"valid_targets_min": 15637
|
|
},
|
|
{
|
|
"epoch": 4.665245202558635,
|
|
"grad_norm": 0.1517174769008749,
|
|
"learning_rate": 5.627074328539173e-07,
|
|
"loss": 1.0086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22489699721336365,
|
|
"step": 1096,
|
|
"valid_targets_mean": 13320.2,
|
|
"valid_targets_min": 11667
|
|
},
|
|
{
|
|
"epoch": 4.669509594882729,
|
|
"grad_norm": 0.1540607434871499,
|
|
"learning_rate": 5.487919614036741e-07,
|
|
"loss": 1.0345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2853516936302185,
|
|
"step": 1097,
|
|
"valid_targets_mean": 16139.1,
|
|
"valid_targets_min": 14753
|
|
},
|
|
{
|
|
"epoch": 4.673773987206823,
|
|
"grad_norm": 0.15418499967552973,
|
|
"learning_rate": 5.350483187822231e-07,
|
|
"loss": 1.03,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25601962208747864,
|
|
"step": 1098,
|
|
"valid_targets_mean": 12700.3,
|
|
"valid_targets_min": 1472
|
|
},
|
|
{
|
|
"epoch": 4.678038379530917,
|
|
"grad_norm": 0.14002577075034336,
|
|
"learning_rate": 5.214766263986848e-07,
|
|
"loss": 1.0247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26440098881721497,
|
|
"step": 1099,
|
|
"valid_targets_mean": 15921.6,
|
|
"valid_targets_min": 15264
|
|
},
|
|
{
|
|
"epoch": 4.682302771855011,
|
|
"grad_norm": 0.15633515780597726,
|
|
"learning_rate": 5.080770041431926e-07,
|
|
"loss": 0.9869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28674036264419556,
|
|
"step": 1100,
|
|
"valid_targets_mean": 16070.0,
|
|
"valid_targets_min": 13688
|
|
},
|
|
{
|
|
"epoch": 4.686567164179104,
|
|
"grad_norm": 0.15843619704035772,
|
|
"learning_rate": 4.948495703858492e-07,
|
|
"loss": 0.9743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14524246752262115,
|
|
"step": 1101,
|
|
"valid_targets_mean": 8999.8,
|
|
"valid_targets_min": 1416
|
|
},
|
|
{
|
|
"epoch": 4.690831556503198,
|
|
"grad_norm": 0.14453182191447111,
|
|
"learning_rate": 4.81794441975667e-07,
|
|
"loss": 1.0162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2600679099559784,
|
|
"step": 1102,
|
|
"valid_targets_mean": 16092.8,
|
|
"valid_targets_min": 14994
|
|
},
|
|
{
|
|
"epoch": 4.6950959488272925,
|
|
"grad_norm": 0.15880734371219649,
|
|
"learning_rate": 4.689117342395388e-07,
|
|
"loss": 0.9894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2726406455039978,
|
|
"step": 1103,
|
|
"valid_targets_mean": 16196.6,
|
|
"valid_targets_min": 15475
|
|
},
|
|
{
|
|
"epoch": 4.699360341151386,
|
|
"grad_norm": 0.1570016874540462,
|
|
"learning_rate": 4.5620156098122204e-07,
|
|
"loss": 1.0145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19400817155838013,
|
|
"step": 1104,
|
|
"valid_targets_mean": 10582.8,
|
|
"valid_targets_min": 5279
|
|
},
|
|
{
|
|
"epoch": 4.70362473347548,
|
|
"grad_norm": 0.1477680049899446,
|
|
"learning_rate": 4.4366403448033334e-07,
|
|
"loss": 1.0239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2575407922267914,
|
|
"step": 1105,
|
|
"valid_targets_mean": 16152.5,
|
|
"valid_targets_min": 15082
|
|
},
|
|
{
|
|
"epoch": 4.707889125799573,
|
|
"grad_norm": 0.15557190332540394,
|
|
"learning_rate": 4.3129926549136057e-07,
|
|
"loss": 1.0442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28639519214630127,
|
|
"step": 1106,
|
|
"valid_targets_mean": 16147.2,
|
|
"valid_targets_min": 15112
|
|
},
|
|
{
|
|
"epoch": 4.712153518123667,
|
|
"grad_norm": 0.143537767091724,
|
|
"learning_rate": 4.191073632426701e-07,
|
|
"loss": 0.9751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21357901394367218,
|
|
"step": 1107,
|
|
"valid_targets_mean": 13378.2,
|
|
"valid_targets_min": 10566
|
|
},
|
|
{
|
|
"epoch": 4.7164179104477615,
|
|
"grad_norm": 0.15879123382097343,
|
|
"learning_rate": 4.0708843543555643e-07,
|
|
"loss": 0.9998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2900780439376831,
|
|
"step": 1108,
|
|
"valid_targets_mean": 15993.5,
|
|
"valid_targets_min": 10506
|
|
},
|
|
{
|
|
"epoch": 4.720682302771855,
|
|
"grad_norm": 0.15035207990047497,
|
|
"learning_rate": 3.95242588243292e-07,
|
|
"loss": 0.9857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2901480793952942,
|
|
"step": 1109,
|
|
"valid_targets_mean": 16143.4,
|
|
"valid_targets_min": 15280
|
|
},
|
|
{
|
|
"epoch": 4.724946695095949,
|
|
"grad_norm": 0.1460776092827468,
|
|
"learning_rate": 3.8356992631017e-07,
|
|
"loss": 1.0324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2313530147075653,
|
|
"step": 1110,
|
|
"valid_targets_mean": 15238.5,
|
|
"valid_targets_min": 13159
|
|
},
|
|
{
|
|
"epoch": 4.729211087420042,
|
|
"grad_norm": 0.15277335121013408,
|
|
"learning_rate": 3.720705527506008e-07,
|
|
"loss": 1.0195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3014744520187378,
|
|
"step": 1111,
|
|
"valid_targets_mean": 16091.4,
|
|
"valid_targets_min": 14723
|
|
},
|
|
{
|
|
"epoch": 4.733475479744136,
|
|
"grad_norm": 0.16052988590923026,
|
|
"learning_rate": 3.60744569148197e-07,
|
|
"loss": 1.066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2035495489835739,
|
|
"step": 1112,
|
|
"valid_targets_mean": 11411.5,
|
|
"valid_targets_min": 2225
|
|
},
|
|
{
|
|
"epoch": 4.73773987206823,
|
|
"grad_norm": 0.14426694374247717,
|
|
"learning_rate": 3.4959207555485873e-07,
|
|
"loss": 1.0372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27664193511009216,
|
|
"step": 1113,
|
|
"valid_targets_mean": 15943.1,
|
|
"valid_targets_min": 14159
|
|
},
|
|
{
|
|
"epoch": 4.742004264392325,
|
|
"grad_norm": 0.1679484644587711,
|
|
"learning_rate": 3.3861317048992317e-07,
|
|
"loss": 1.04,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30087628960609436,
|
|
"step": 1114,
|
|
"valid_targets_mean": 16178.0,
|
|
"valid_targets_min": 15181
|
|
},
|
|
{
|
|
"epoch": 4.746268656716418,
|
|
"grad_norm": 0.14362575674510863,
|
|
"learning_rate": 3.278079509392562e-07,
|
|
"loss": 0.97,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1749127209186554,
|
|
"step": 1115,
|
|
"valid_targets_mean": 9657.3,
|
|
"valid_targets_min": 3062
|
|
},
|
|
{
|
|
"epoch": 4.750533049040512,
|
|
"grad_norm": 0.1414962354318647,
|
|
"learning_rate": 3.171765123544224e-07,
|
|
"loss": 1.0159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2550608813762665,
|
|
"step": 1116,
|
|
"valid_targets_mean": 16089.0,
|
|
"valid_targets_min": 13436
|
|
},
|
|
{
|
|
"epoch": 4.754797441364605,
|
|
"grad_norm": 0.15812986008972324,
|
|
"learning_rate": 3.06718948651834e-07,
|
|
"loss": 1.0007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2847060263156891,
|
|
"step": 1117,
|
|
"valid_targets_mean": 16174.9,
|
|
"valid_targets_min": 15128
|
|
},
|
|
{
|
|
"epoch": 4.759061833688699,
|
|
"grad_norm": 0.1440472048445097,
|
|
"learning_rate": 2.964353522119168e-07,
|
|
"loss": 1.0132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22414368391036987,
|
|
"step": 1118,
|
|
"valid_targets_mean": 13306.7,
|
|
"valid_targets_min": 11186
|
|
},
|
|
{
|
|
"epoch": 4.7633262260127935,
|
|
"grad_norm": 0.15230908494890952,
|
|
"learning_rate": 2.863258138783032e-07,
|
|
"loss": 1.0327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2935028672218323,
|
|
"step": 1119,
|
|
"valid_targets_mean": 16107.2,
|
|
"valid_targets_min": 15308
|
|
},
|
|
{
|
|
"epoch": 4.767590618336887,
|
|
"grad_norm": 0.14417473132171305,
|
|
"learning_rate": 2.7639042295702245e-07,
|
|
"loss": 0.993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2819124162197113,
|
|
"step": 1120,
|
|
"valid_targets_mean": 16163.9,
|
|
"valid_targets_min": 15717
|
|
},
|
|
{
|
|
"epoch": 4.771855010660981,
|
|
"grad_norm": 0.14151471802233015,
|
|
"learning_rate": 2.666292672157056e-07,
|
|
"loss": 0.9906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2340664118528366,
|
|
"step": 1121,
|
|
"valid_targets_mean": 15291.8,
|
|
"valid_targets_min": 13903
|
|
},
|
|
{
|
|
"epoch": 4.776119402985074,
|
|
"grad_norm": 0.15851416758379985,
|
|
"learning_rate": 2.570424328828325e-07,
|
|
"loss": 1.01,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2588421106338501,
|
|
"step": 1122,
|
|
"valid_targets_mean": 16229.8,
|
|
"valid_targets_min": 15610
|
|
},
|
|
{
|
|
"epoch": 4.780383795309168,
|
|
"grad_norm": 0.15652233802051627,
|
|
"learning_rate": 2.4763000464694377e-07,
|
|
"loss": 1.0034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23127931356430054,
|
|
"step": 1123,
|
|
"valid_targets_mean": 12867.4,
|
|
"valid_targets_min": 1501
|
|
},
|
|
{
|
|
"epoch": 4.7846481876332625,
|
|
"grad_norm": 0.142736914108249,
|
|
"learning_rate": 2.383920656559102e-07,
|
|
"loss": 1.0252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2757580876350403,
|
|
"step": 1124,
|
|
"valid_targets_mean": 15862.7,
|
|
"valid_targets_min": 14853
|
|
},
|
|
{
|
|
"epoch": 4.788912579957356,
|
|
"grad_norm": 0.15005101246689867,
|
|
"learning_rate": 2.2932869751619568e-07,
|
|
"loss": 0.983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27966541051864624,
|
|
"step": 1125,
|
|
"valid_targets_mean": 16139.7,
|
|
"valid_targets_min": 14793
|
|
},
|
|
{
|
|
"epoch": 4.79317697228145,
|
|
"grad_norm": 0.15459377610384537,
|
|
"learning_rate": 2.2043998029212643e-07,
|
|
"loss": 1.0425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19476796686649323,
|
|
"step": 1126,
|
|
"valid_targets_mean": 10342.5,
|
|
"valid_targets_min": 1758
|
|
},
|
|
{
|
|
"epoch": 4.797441364605544,
|
|
"grad_norm": 0.14312547549339766,
|
|
"learning_rate": 2.1172599250519398e-07,
|
|
"loss": 0.9981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23207619786262512,
|
|
"step": 1127,
|
|
"valid_targets_mean": 16088.4,
|
|
"valid_targets_min": 14763
|
|
},
|
|
{
|
|
"epoch": 4.801705756929637,
|
|
"grad_norm": 0.15523298033362853,
|
|
"learning_rate": 2.0318681113336013e-07,
|
|
"loss": 1.0299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2897603511810303,
|
|
"step": 1128,
|
|
"valid_targets_mean": 16119.7,
|
|
"valid_targets_min": 14736
|
|
},
|
|
{
|
|
"epoch": 4.8059701492537314,
|
|
"grad_norm": 0.1453677379546031,
|
|
"learning_rate": 1.9482251161037302e-07,
|
|
"loss": 0.9977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18096661567687988,
|
|
"step": 1129,
|
|
"valid_targets_mean": 9719.4,
|
|
"valid_targets_min": 5689
|
|
},
|
|
{
|
|
"epoch": 4.810234541577826,
|
|
"grad_norm": 0.1488645762470813,
|
|
"learning_rate": 1.866331678251032e-07,
|
|
"loss": 1.0251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2804568111896515,
|
|
"step": 1130,
|
|
"valid_targets_mean": 16003.1,
|
|
"valid_targets_min": 13828
|
|
},
|
|
{
|
|
"epoch": 4.814498933901919,
|
|
"grad_norm": 0.14846824743421222,
|
|
"learning_rate": 1.7861885212088869e-07,
|
|
"loss": 0.9969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2831680178642273,
|
|
"step": 1131,
|
|
"valid_targets_mean": 15943.6,
|
|
"valid_targets_min": 13414
|
|
},
|
|
{
|
|
"epoch": 4.818763326226013,
|
|
"grad_norm": 0.14311100391724843,
|
|
"learning_rate": 1.7077963529490204e-07,
|
|
"loss": 0.9993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21057608723640442,
|
|
"step": 1132,
|
|
"valid_targets_mean": 13237.8,
|
|
"valid_targets_min": 10666
|
|
},
|
|
{
|
|
"epoch": 4.823027718550106,
|
|
"grad_norm": 0.15024873088610943,
|
|
"learning_rate": 1.6311558659751535e-07,
|
|
"loss": 0.9778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25852662324905396,
|
|
"step": 1133,
|
|
"valid_targets_mean": 16197.3,
|
|
"valid_targets_min": 14919
|
|
},
|
|
{
|
|
"epoch": 4.8272921108742,
|
|
"grad_norm": 0.15476097816328543,
|
|
"learning_rate": 1.5562677373169855e-07,
|
|
"loss": 1.0642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31582337617874146,
|
|
"step": 1134,
|
|
"valid_targets_mean": 15890.8,
|
|
"valid_targets_min": 11287
|
|
},
|
|
{
|
|
"epoch": 4.8315565031982945,
|
|
"grad_norm": 0.14880901722650003,
|
|
"learning_rate": 1.483132628524131e-07,
|
|
"loss": 1.0291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24785952270030975,
|
|
"step": 1135,
|
|
"valid_targets_mean": 14563.6,
|
|
"valid_targets_min": 12694
|
|
},
|
|
{
|
|
"epoch": 4.835820895522388,
|
|
"grad_norm": 0.1563087580779486,
|
|
"learning_rate": 1.4117511856603262e-07,
|
|
"loss": 1.0482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3007662892341614,
|
|
"step": 1136,
|
|
"valid_targets_mean": 16095.4,
|
|
"valid_targets_min": 15282
|
|
},
|
|
{
|
|
"epoch": 4.840085287846482,
|
|
"grad_norm": 0.15273216710371285,
|
|
"learning_rate": 1.342124039297721e-07,
|
|
"loss": 1.0314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19717839360237122,
|
|
"step": 1137,
|
|
"valid_targets_mean": 10712.7,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 4.844349680170575,
|
|
"grad_norm": 0.1397819137495005,
|
|
"learning_rate": 1.2742518045112396e-07,
|
|
"loss": 1.0354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2605479061603546,
|
|
"step": 1138,
|
|
"valid_targets_mean": 15546.0,
|
|
"valid_targets_min": 14178
|
|
},
|
|
{
|
|
"epoch": 4.848614072494669,
|
|
"grad_norm": 0.1565654947664924,
|
|
"learning_rate": 1.2081350808732518e-07,
|
|
"loss": 1.0282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3161640465259552,
|
|
"step": 1139,
|
|
"valid_targets_mean": 16119.0,
|
|
"valid_targets_min": 15332
|
|
},
|
|
{
|
|
"epoch": 4.8528784648187635,
|
|
"grad_norm": 0.15247671081293684,
|
|
"learning_rate": 1.143774452448243e-07,
|
|
"loss": 1.0316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16707585752010345,
|
|
"step": 1140,
|
|
"valid_targets_mean": 9525.8,
|
|
"valid_targets_min": 4937
|
|
},
|
|
{
|
|
"epoch": 4.857142857142857,
|
|
"grad_norm": 0.1437034489057456,
|
|
"learning_rate": 1.0811704877875528e-07,
|
|
"loss": 0.9732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2450575828552246,
|
|
"step": 1141,
|
|
"valid_targets_mean": 16191.3,
|
|
"valid_targets_min": 15163
|
|
},
|
|
{
|
|
"epoch": 4.861407249466951,
|
|
"grad_norm": 0.1612664036029572,
|
|
"learning_rate": 1.0203237399245336e-07,
|
|
"loss": 1.0181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.282533198595047,
|
|
"step": 1142,
|
|
"valid_targets_mean": 16124.7,
|
|
"valid_targets_min": 15328
|
|
},
|
|
{
|
|
"epoch": 4.865671641791045,
|
|
"grad_norm": 0.14816548673203833,
|
|
"learning_rate": 9.612347463694882e-08,
|
|
"loss": 1.0314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19812676310539246,
|
|
"step": 1143,
|
|
"valid_targets_mean": 12115.9,
|
|
"valid_targets_min": 9191
|
|
},
|
|
{
|
|
"epoch": 4.869936034115138,
|
|
"grad_norm": 0.14866024981202283,
|
|
"learning_rate": 9.039040291050738e-08,
|
|
"loss": 0.9815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2674241364002228,
|
|
"step": 1144,
|
|
"valid_targets_mean": 16236.7,
|
|
"valid_targets_min": 15773
|
|
},
|
|
{
|
|
"epoch": 4.8742004264392325,
|
|
"grad_norm": 0.15880645774537222,
|
|
"learning_rate": 8.483320945815499e-08,
|
|
"loss": 1.0056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2802906036376953,
|
|
"step": 1145,
|
|
"valid_targets_mean": 16165.2,
|
|
"valid_targets_min": 14819
|
|
},
|
|
{
|
|
"epoch": 4.878464818763327,
|
|
"grad_norm": 0.14088755569433822,
|
|
"learning_rate": 7.945194337124262e-08,
|
|
"loss": 1.0089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21992269158363342,
|
|
"step": 1146,
|
|
"valid_targets_mean": 14201.6,
|
|
"valid_targets_min": 11573
|
|
},
|
|
{
|
|
"epoch": 4.88272921108742,
|
|
"grad_norm": 0.14899913298241538,
|
|
"learning_rate": 7.424665218700444e-08,
|
|
"loss": 1.0122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29248669743537903,
|
|
"step": 1147,
|
|
"valid_targets_mean": 16140.8,
|
|
"valid_targets_min": 15359
|
|
},
|
|
{
|
|
"epoch": 4.886993603411514,
|
|
"grad_norm": 0.1499355802037987,
|
|
"learning_rate": 6.921738188814254e-08,
|
|
"loss": 0.9767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2214260995388031,
|
|
"step": 1148,
|
|
"valid_targets_mean": 13035.8,
|
|
"valid_targets_min": 2380
|
|
},
|
|
{
|
|
"epoch": 4.891257995735607,
|
|
"grad_norm": 0.13887284659092788,
|
|
"learning_rate": 6.436417690241614e-08,
|
|
"loss": 0.9932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2761473059654236,
|
|
"step": 1149,
|
|
"valid_targets_mean": 15234.4,
|
|
"valid_targets_min": 13564
|
|
},
|
|
{
|
|
"epoch": 4.895522388059701,
|
|
"grad_norm": 0.15423876595711647,
|
|
"learning_rate": 5.968708010225532e-08,
|
|
"loss": 0.949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2720834016799927,
|
|
"step": 1150,
|
|
"valid_targets_mean": 16216.5,
|
|
"valid_targets_min": 15713
|
|
},
|
|
{
|
|
"epoch": 4.899786780383796,
|
|
"grad_norm": 0.15339007326318832,
|
|
"learning_rate": 5.518613280437901e-08,
|
|
"loss": 1.0331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17710110545158386,
|
|
"step": 1151,
|
|
"valid_targets_mean": 9263.9,
|
|
"valid_targets_min": 930
|
|
},
|
|
{
|
|
"epoch": 4.904051172707889,
|
|
"grad_norm": 0.14130979523452628,
|
|
"learning_rate": 5.0861374769426433e-08,
|
|
"loss": 1.0092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2627219557762146,
|
|
"step": 1152,
|
|
"valid_targets_mean": 15946.3,
|
|
"valid_targets_min": 14850
|
|
},
|
|
{
|
|
"epoch": 4.908315565031983,
|
|
"grad_norm": 0.15928432460307718,
|
|
"learning_rate": 4.671284420161071e-08,
|
|
"loss": 1.0605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32579365372657776,
|
|
"step": 1153,
|
|
"valid_targets_mean": 15997.7,
|
|
"valid_targets_min": 14732
|
|
},
|
|
{
|
|
"epoch": 4.912579957356077,
|
|
"grad_norm": 0.156236965321274,
|
|
"learning_rate": 4.274057774838136e-08,
|
|
"loss": 1.0205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.180985689163208,
|
|
"step": 1154,
|
|
"valid_targets_mean": 10754.5,
|
|
"valid_targets_min": 7226
|
|
},
|
|
{
|
|
"epoch": 4.91684434968017,
|
|
"grad_norm": 0.14288484599599216,
|
|
"learning_rate": 3.894461050010012e-08,
|
|
"loss": 1.0046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27671968936920166,
|
|
"step": 1155,
|
|
"valid_targets_mean": 16053.7,
|
|
"valid_targets_min": 15223
|
|
},
|
|
{
|
|
"epoch": 4.9211087420042645,
|
|
"grad_norm": 0.15468225628884527,
|
|
"learning_rate": 3.5324975989725615e-08,
|
|
"loss": 0.9843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30150264501571655,
|
|
"step": 1156,
|
|
"valid_targets_mean": 16156.8,
|
|
"valid_targets_min": 15596
|
|
},
|
|
{
|
|
"epoch": 4.925373134328359,
|
|
"grad_norm": 0.14012913428926552,
|
|
"learning_rate": 3.188170619252473e-08,
|
|
"loss": 0.9552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2062998265028,
|
|
"step": 1157,
|
|
"valid_targets_mean": 12721.6,
|
|
"valid_targets_min": 10683
|
|
},
|
|
{
|
|
"epoch": 4.929637526652452,
|
|
"grad_norm": 0.15135915977649797,
|
|
"learning_rate": 2.8614831525786147e-08,
|
|
"loss": 0.9997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2682117223739624,
|
|
"step": 1158,
|
|
"valid_targets_mean": 16143.8,
|
|
"valid_targets_min": 14124
|
|
},
|
|
{
|
|
"epoch": 4.933901918976546,
|
|
"grad_norm": 0.15491333934114618,
|
|
"learning_rate": 2.552438084855613e-08,
|
|
"loss": 1.0172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2907482385635376,
|
|
"step": 1159,
|
|
"valid_targets_mean": 16117.7,
|
|
"valid_targets_min": 14424
|
|
},
|
|
{
|
|
"epoch": 4.938166311300639,
|
|
"grad_norm": 0.14336988174613013,
|
|
"learning_rate": 2.2610381461372068e-08,
|
|
"loss": 1.0428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2761475145816803,
|
|
"step": 1160,
|
|
"valid_targets_mean": 15388.0,
|
|
"valid_targets_min": 13731
|
|
},
|
|
{
|
|
"epoch": 4.9424307036247335,
|
|
"grad_norm": 0.1472721332240846,
|
|
"learning_rate": 1.987285910603598e-08,
|
|
"loss": 0.9986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29423782229423523,
|
|
"step": 1161,
|
|
"valid_targets_mean": 16165.4,
|
|
"valid_targets_min": 15308
|
|
},
|
|
{
|
|
"epoch": 4.946695095948828,
|
|
"grad_norm": 0.15451112925589527,
|
|
"learning_rate": 1.7311837965379164e-08,
|
|
"loss": 0.9956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19189612567424774,
|
|
"step": 1162,
|
|
"valid_targets_mean": 10865.3,
|
|
"valid_targets_min": 1443
|
|
},
|
|
{
|
|
"epoch": 4.950959488272921,
|
|
"grad_norm": 0.14775971878449218,
|
|
"learning_rate": 1.4927340663046798e-08,
|
|
"loss": 1.0422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26835548877716064,
|
|
"step": 1163,
|
|
"valid_targets_mean": 16131.4,
|
|
"valid_targets_min": 14555
|
|
},
|
|
{
|
|
"epoch": 4.955223880597015,
|
|
"grad_norm": 0.15223179497796113,
|
|
"learning_rate": 1.2719388263300325e-08,
|
|
"loss": 1.0194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2813838720321655,
|
|
"step": 1164,
|
|
"valid_targets_mean": 16143.4,
|
|
"valid_targets_min": 15234
|
|
},
|
|
{
|
|
"epoch": 4.959488272921108,
|
|
"grad_norm": 0.14919085924589962,
|
|
"learning_rate": 1.0688000270839827e-08,
|
|
"loss": 1.0049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17233330011367798,
|
|
"step": 1165,
|
|
"valid_targets_mean": 9892.4,
|
|
"valid_targets_min": 1676
|
|
},
|
|
{
|
|
"epoch": 4.963752665245202,
|
|
"grad_norm": 0.13999997154232136,
|
|
"learning_rate": 8.833194630615271e-09,
|
|
"loss": 1.0093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25140058994293213,
|
|
"step": 1166,
|
|
"valid_targets_mean": 16117.2,
|
|
"valid_targets_min": 14248
|
|
},
|
|
{
|
|
"epoch": 4.968017057569297,
|
|
"grad_norm": 0.14650839142109814,
|
|
"learning_rate": 7.154987727682194e-09,
|
|
"loss": 0.9356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2747792601585388,
|
|
"step": 1167,
|
|
"valid_targets_mean": 16177.2,
|
|
"valid_targets_min": 15498
|
|
},
|
|
{
|
|
"epoch": 4.97228144989339,
|
|
"grad_norm": 0.14470146907773984,
|
|
"learning_rate": 5.6533943870462625e-09,
|
|
"loss": 1.0106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20723219215869904,
|
|
"step": 1168,
|
|
"valid_targets_mean": 12527.4,
|
|
"valid_targets_min": 9734
|
|
},
|
|
{
|
|
"epoch": 4.976545842217484,
|
|
"grad_norm": 0.1449205618855386,
|
|
"learning_rate": 4.328427873541152e-09,
|
|
"loss": 1.0014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24739845097064972,
|
|
"step": 1169,
|
|
"valid_targets_mean": 16187.4,
|
|
"valid_targets_min": 15278
|
|
},
|
|
{
|
|
"epoch": 4.980810234541578,
|
|
"grad_norm": 0.15399492154680527,
|
|
"learning_rate": 3.1800998917086432e-09,
|
|
"loss": 1.0297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2794463634490967,
|
|
"step": 1170,
|
|
"valid_targets_mean": 16146.8,
|
|
"valid_targets_min": 15337
|
|
},
|
|
{
|
|
"epoch": 4.985074626865671,
|
|
"grad_norm": 0.1460871738721153,
|
|
"learning_rate": 2.2084205856920393e-09,
|
|
"loss": 1.02,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22483548521995544,
|
|
"step": 1171,
|
|
"valid_targets_mean": 14174.9,
|
|
"valid_targets_min": 11882
|
|
},
|
|
{
|
|
"epoch": 4.9893390191897655,
|
|
"grad_norm": 0.14791868420274756,
|
|
"learning_rate": 1.4133985391473482e-09,
|
|
"loss": 1.0201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28491806983947754,
|
|
"step": 1172,
|
|
"valid_targets_mean": 16147.8,
|
|
"valid_targets_min": 14958
|
|
},
|
|
{
|
|
"epoch": 4.99360341151386,
|
|
"grad_norm": 0.15535541216631937,
|
|
"learning_rate": 7.950407751722288e-10,
|
|
"loss": 1.0033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25910621881484985,
|
|
"step": 1173,
|
|
"valid_targets_mean": 12998.6,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 4.997867803837953,
|
|
"grad_norm": 0.14322205584202816,
|
|
"learning_rate": 3.5335275624159835e-10,
|
|
"loss": 1.0496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2482285052537918,
|
|
"step": 1174,
|
|
"valid_targets_mean": 15772.4,
|
|
"valid_targets_min": 14853
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.2299317160898379,
|
|
"learning_rate": 8.833838415212014e-11,
|
|
"loss": 1.0092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48496419191360474,
|
|
"step": 1175,
|
|
"valid_targets_mean": 11353.3,
|
|
"valid_targets_min": 2084
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48496419191360474,
|
|
"step": 1175,
|
|
"total_flos": 1940656491724800.0,
|
|
"train_loss": 1.0710605895772893,
|
|
"train_runtime": 5191.8253,
|
|
"train_samples_per_second": 28.892,
|
|
"train_steps_per_second": 0.226,
|
|
"valid_targets_mean": 11353.3,
|
|
"valid_targets_min": 2084
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 1175,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 5,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1940656491724800.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|