Model: ali-elganzory/sft_ot30k_1.7b-MixtureVitae-300BT-v1-decontaminated-16k-SFT-Tulu3-decontaminated_v0 Source: Original Platform
12973 lines
363 KiB
JSON
12973 lines
363 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 5.0,
|
|
"eval_steps": 500,
|
|
"global_step": 1175,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0042643923240938165,
|
|
"grad_norm": 0.096886924619637,
|
|
"learning_rate": 0.0,
|
|
"loss": 0.9796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.241354838013649,
|
|
"step": 1,
|
|
"valid_targets_mean": 16103.3,
|
|
"valid_targets_min": 14705
|
|
},
|
|
{
|
|
"epoch": 0.008528784648187633,
|
|
"grad_norm": 0.09563404320514839,
|
|
"learning_rate": 3.3898305084745766e-07,
|
|
"loss": 0.9847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28922921419143677,
|
|
"step": 2,
|
|
"valid_targets_mean": 16185.5,
|
|
"valid_targets_min": 15076
|
|
},
|
|
{
|
|
"epoch": 0.01279317697228145,
|
|
"grad_norm": 0.09434573973776735,
|
|
"learning_rate": 6.779661016949153e-07,
|
|
"loss": 1.001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22117456793785095,
|
|
"step": 3,
|
|
"valid_targets_mean": 13391.5,
|
|
"valid_targets_min": 9805
|
|
},
|
|
{
|
|
"epoch": 0.017057569296375266,
|
|
"grad_norm": 0.09502045292996218,
|
|
"learning_rate": 1.016949152542373e-06,
|
|
"loss": 1.0371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24571175873279572,
|
|
"step": 4,
|
|
"valid_targets_mean": 16229.8,
|
|
"valid_targets_min": 15577
|
|
},
|
|
{
|
|
"epoch": 0.021321961620469083,
|
|
"grad_norm": 0.096569956534327,
|
|
"learning_rate": 1.3559322033898307e-06,
|
|
"loss": 1.0126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2807372808456421,
|
|
"step": 5,
|
|
"valid_targets_mean": 16155.2,
|
|
"valid_targets_min": 14943
|
|
},
|
|
{
|
|
"epoch": 0.0255863539445629,
|
|
"grad_norm": 0.09409064865223535,
|
|
"learning_rate": 1.6949152542372882e-06,
|
|
"loss": 0.9931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2307095229625702,
|
|
"step": 6,
|
|
"valid_targets_mean": 14326.3,
|
|
"valid_targets_min": 12589
|
|
},
|
|
{
|
|
"epoch": 0.029850746268656716,
|
|
"grad_norm": 0.09578384515577106,
|
|
"learning_rate": 2.033898305084746e-06,
|
|
"loss": 0.9716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.293038547039032,
|
|
"step": 7,
|
|
"valid_targets_mean": 16075.6,
|
|
"valid_targets_min": 15192
|
|
},
|
|
{
|
|
"epoch": 0.03411513859275053,
|
|
"grad_norm": 0.09506436760787393,
|
|
"learning_rate": 2.372881355932204e-06,
|
|
"loss": 0.994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22461552917957306,
|
|
"step": 8,
|
|
"valid_targets_mean": 12872.3,
|
|
"valid_targets_min": 1387
|
|
},
|
|
{
|
|
"epoch": 0.03837953091684435,
|
|
"grad_norm": 0.09519325273508478,
|
|
"learning_rate": 2.7118644067796613e-06,
|
|
"loss": 0.9601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23488740622997284,
|
|
"step": 9,
|
|
"valid_targets_mean": 15709.8,
|
|
"valid_targets_min": 14482
|
|
},
|
|
{
|
|
"epoch": 0.042643923240938165,
|
|
"grad_norm": 0.09132980732441298,
|
|
"learning_rate": 3.0508474576271192e-06,
|
|
"loss": 0.9797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2698548138141632,
|
|
"step": 10,
|
|
"valid_targets_mean": 16164.9,
|
|
"valid_targets_min": 15170
|
|
},
|
|
{
|
|
"epoch": 0.046908315565031986,
|
|
"grad_norm": 0.09178522761323936,
|
|
"learning_rate": 3.3898305084745763e-06,
|
|
"loss": 0.9851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16811712086200714,
|
|
"step": 11,
|
|
"valid_targets_mean": 9219.8,
|
|
"valid_targets_min": 1810
|
|
},
|
|
{
|
|
"epoch": 0.0511727078891258,
|
|
"grad_norm": 0.09112748999032022,
|
|
"learning_rate": 3.7288135593220342e-06,
|
|
"loss": 0.9903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24219410121440887,
|
|
"step": 12,
|
|
"valid_targets_mean": 16130.4,
|
|
"valid_targets_min": 15447
|
|
},
|
|
{
|
|
"epoch": 0.05543710021321962,
|
|
"grad_norm": 0.09077632299045191,
|
|
"learning_rate": 4.067796610169492e-06,
|
|
"loss": 0.9813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26429295539855957,
|
|
"step": 13,
|
|
"valid_targets_mean": 16154.5,
|
|
"valid_targets_min": 14851
|
|
},
|
|
{
|
|
"epoch": 0.05970149253731343,
|
|
"grad_norm": 0.09067358699424162,
|
|
"learning_rate": 4.40677966101695e-06,
|
|
"loss": 0.974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18924854695796967,
|
|
"step": 14,
|
|
"valid_targets_mean": 10164.9,
|
|
"valid_targets_min": 6105
|
|
},
|
|
{
|
|
"epoch": 0.06396588486140725,
|
|
"grad_norm": 0.08973992760465696,
|
|
"learning_rate": 4.745762711864408e-06,
|
|
"loss": 1.0145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25748246908187866,
|
|
"step": 15,
|
|
"valid_targets_mean": 16154.9,
|
|
"valid_targets_min": 15765
|
|
},
|
|
{
|
|
"epoch": 0.06823027718550106,
|
|
"grad_norm": 0.08634559797122227,
|
|
"learning_rate": 5.084745762711865e-06,
|
|
"loss": 1.0349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3159717917442322,
|
|
"step": 16,
|
|
"valid_targets_mean": 16040.2,
|
|
"valid_targets_min": 14552
|
|
},
|
|
{
|
|
"epoch": 0.07249466950959488,
|
|
"grad_norm": 0.08298445415435929,
|
|
"learning_rate": 5.423728813559323e-06,
|
|
"loss": 0.9773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22751039266586304,
|
|
"step": 17,
|
|
"valid_targets_mean": 14871.8,
|
|
"valid_targets_min": 12671
|
|
},
|
|
{
|
|
"epoch": 0.0767590618336887,
|
|
"grad_norm": 0.08182038834682566,
|
|
"learning_rate": 5.7627118644067805e-06,
|
|
"loss": 1.0072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2424641102552414,
|
|
"step": 18,
|
|
"valid_targets_mean": 16241.4,
|
|
"valid_targets_min": 15372
|
|
},
|
|
{
|
|
"epoch": 0.08102345415778252,
|
|
"grad_norm": 0.08162988069972821,
|
|
"learning_rate": 6.1016949152542385e-06,
|
|
"loss": 0.9866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28007063269615173,
|
|
"step": 19,
|
|
"valid_targets_mean": 16193.4,
|
|
"valid_targets_min": 15445
|
|
},
|
|
{
|
|
"epoch": 0.08528784648187633,
|
|
"grad_norm": 0.08215190445504957,
|
|
"learning_rate": 6.440677966101695e-06,
|
|
"loss": 1.001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2558754086494446,
|
|
"step": 20,
|
|
"valid_targets_mean": 15318.8,
|
|
"valid_targets_min": 13169
|
|
},
|
|
{
|
|
"epoch": 0.08955223880597014,
|
|
"grad_norm": 0.07644864093401017,
|
|
"learning_rate": 6.779661016949153e-06,
|
|
"loss": 1.03,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2945907413959503,
|
|
"step": 21,
|
|
"valid_targets_mean": 16012.4,
|
|
"valid_targets_min": 13688
|
|
},
|
|
{
|
|
"epoch": 0.09381663113006397,
|
|
"grad_norm": 0.06622574700244813,
|
|
"learning_rate": 7.1186440677966106e-06,
|
|
"loss": 0.9991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21056988835334778,
|
|
"step": 22,
|
|
"valid_targets_mean": 11514.8,
|
|
"valid_targets_min": 2588
|
|
},
|
|
{
|
|
"epoch": 0.09808102345415778,
|
|
"grad_norm": 0.06661238551768667,
|
|
"learning_rate": 7.4576271186440685e-06,
|
|
"loss": 0.9643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2530766725540161,
|
|
"step": 23,
|
|
"valid_targets_mean": 16071.6,
|
|
"valid_targets_min": 14743
|
|
},
|
|
{
|
|
"epoch": 0.1023454157782516,
|
|
"grad_norm": 0.06360360873895532,
|
|
"learning_rate": 7.796610169491526e-06,
|
|
"loss": 1.0137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3074164390563965,
|
|
"step": 24,
|
|
"valid_targets_mean": 16023.0,
|
|
"valid_targets_min": 14102
|
|
},
|
|
{
|
|
"epoch": 0.10660980810234541,
|
|
"grad_norm": 0.059129817739771044,
|
|
"learning_rate": 8.135593220338983e-06,
|
|
"loss": 0.9796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17694783210754395,
|
|
"step": 25,
|
|
"valid_targets_mean": 10452.0,
|
|
"valid_targets_min": 2107
|
|
},
|
|
{
|
|
"epoch": 0.11087420042643924,
|
|
"grad_norm": 0.05969437034798757,
|
|
"learning_rate": 8.47457627118644e-06,
|
|
"loss": 1.0031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26482921838760376,
|
|
"step": 26,
|
|
"valid_targets_mean": 16017.7,
|
|
"valid_targets_min": 14869
|
|
},
|
|
{
|
|
"epoch": 0.11513859275053305,
|
|
"grad_norm": 0.05921237045095926,
|
|
"learning_rate": 8.8135593220339e-06,
|
|
"loss": 0.957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2573852837085724,
|
|
"step": 27,
|
|
"valid_targets_mean": 16133.4,
|
|
"valid_targets_min": 12838
|
|
},
|
|
{
|
|
"epoch": 0.11940298507462686,
|
|
"grad_norm": 0.05600892807179194,
|
|
"learning_rate": 9.152542372881356e-06,
|
|
"loss": 0.9783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1971319615840912,
|
|
"step": 28,
|
|
"valid_targets_mean": 11665.1,
|
|
"valid_targets_min": 8921
|
|
},
|
|
{
|
|
"epoch": 0.12366737739872068,
|
|
"grad_norm": 0.048999850499990734,
|
|
"learning_rate": 9.491525423728815e-06,
|
|
"loss": 1.0095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25832825899124146,
|
|
"step": 29,
|
|
"valid_targets_mean": 16103.1,
|
|
"valid_targets_min": 15115
|
|
},
|
|
{
|
|
"epoch": 0.1279317697228145,
|
|
"grad_norm": 0.04107182965358036,
|
|
"learning_rate": 9.830508474576272e-06,
|
|
"loss": 1.0072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30730485916137695,
|
|
"step": 30,
|
|
"valid_targets_mean": 16075.4,
|
|
"valid_targets_min": 15241
|
|
},
|
|
{
|
|
"epoch": 0.13219616204690832,
|
|
"grad_norm": 0.0383295116858804,
|
|
"learning_rate": 1.016949152542373e-05,
|
|
"loss": 0.939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20292264223098755,
|
|
"step": 31,
|
|
"valid_targets_mean": 14654.8,
|
|
"valid_targets_min": 12455
|
|
},
|
|
{
|
|
"epoch": 0.13646055437100213,
|
|
"grad_norm": 0.04157069039665174,
|
|
"learning_rate": 1.0508474576271188e-05,
|
|
"loss": 1.0068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2628253400325775,
|
|
"step": 32,
|
|
"valid_targets_mean": 16197.7,
|
|
"valid_targets_min": 15069
|
|
},
|
|
{
|
|
"epoch": 0.14072494669509594,
|
|
"grad_norm": 0.045584409476979004,
|
|
"learning_rate": 1.0847457627118645e-05,
|
|
"loss": 0.9717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22255894541740417,
|
|
"step": 33,
|
|
"valid_targets_mean": 13340.2,
|
|
"valid_targets_min": 2356
|
|
},
|
|
{
|
|
"epoch": 0.14498933901918976,
|
|
"grad_norm": 0.04621925168030546,
|
|
"learning_rate": 1.1186440677966102e-05,
|
|
"loss": 0.9512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2379852533340454,
|
|
"step": 34,
|
|
"valid_targets_mean": 15084.7,
|
|
"valid_targets_min": 7923
|
|
},
|
|
{
|
|
"epoch": 0.14925373134328357,
|
|
"grad_norm": 0.044951420559420405,
|
|
"learning_rate": 1.1525423728813561e-05,
|
|
"loss": 0.971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2971063256263733,
|
|
"step": 35,
|
|
"valid_targets_mean": 16172.9,
|
|
"valid_targets_min": 15462
|
|
},
|
|
{
|
|
"epoch": 0.1535181236673774,
|
|
"grad_norm": 0.04507111432872475,
|
|
"learning_rate": 1.1864406779661018e-05,
|
|
"loss": 0.9862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18123115599155426,
|
|
"step": 36,
|
|
"valid_targets_mean": 9951.7,
|
|
"valid_targets_min": 1254
|
|
},
|
|
{
|
|
"epoch": 0.15778251599147122,
|
|
"grad_norm": 0.04135609261795384,
|
|
"learning_rate": 1.2203389830508477e-05,
|
|
"loss": 0.9926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22945839166641235,
|
|
"step": 37,
|
|
"valid_targets_mean": 16189.6,
|
|
"valid_targets_min": 15506
|
|
},
|
|
{
|
|
"epoch": 0.16204690831556504,
|
|
"grad_norm": 0.03960471548152739,
|
|
"learning_rate": 1.2542372881355932e-05,
|
|
"loss": 0.9642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27514830231666565,
|
|
"step": 38,
|
|
"valid_targets_mean": 16141.1,
|
|
"valid_targets_min": 14896
|
|
},
|
|
{
|
|
"epoch": 0.16631130063965885,
|
|
"grad_norm": 0.03786837049176053,
|
|
"learning_rate": 1.288135593220339e-05,
|
|
"loss": 1.0157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18904536962509155,
|
|
"step": 39,
|
|
"valid_targets_mean": 10078.8,
|
|
"valid_targets_min": 6475
|
|
},
|
|
{
|
|
"epoch": 0.17057569296375266,
|
|
"grad_norm": 0.03702792230439802,
|
|
"learning_rate": 1.3220338983050848e-05,
|
|
"loss": 0.9544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24826553463935852,
|
|
"step": 40,
|
|
"valid_targets_mean": 16234.8,
|
|
"valid_targets_min": 15255
|
|
},
|
|
{
|
|
"epoch": 0.17484008528784648,
|
|
"grad_norm": 0.03513819264782961,
|
|
"learning_rate": 1.3559322033898305e-05,
|
|
"loss": 0.9445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2586909830570221,
|
|
"step": 41,
|
|
"valid_targets_mean": 16211.3,
|
|
"valid_targets_min": 15116
|
|
},
|
|
{
|
|
"epoch": 0.1791044776119403,
|
|
"grad_norm": 0.03543338669144235,
|
|
"learning_rate": 1.3898305084745764e-05,
|
|
"loss": 0.9974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21098120510578156,
|
|
"step": 42,
|
|
"valid_targets_mean": 13499.4,
|
|
"valid_targets_min": 11596
|
|
},
|
|
{
|
|
"epoch": 0.18336886993603413,
|
|
"grad_norm": 0.03313882077908196,
|
|
"learning_rate": 1.4237288135593221e-05,
|
|
"loss": 0.9721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2688150405883789,
|
|
"step": 43,
|
|
"valid_targets_mean": 16137.6,
|
|
"valid_targets_min": 15314
|
|
},
|
|
{
|
|
"epoch": 0.18763326226012794,
|
|
"grad_norm": 0.0345522760781207,
|
|
"learning_rate": 1.4576271186440678e-05,
|
|
"loss": 0.9149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2419596165418625,
|
|
"step": 44,
|
|
"valid_targets_mean": 16227.9,
|
|
"valid_targets_min": 14656
|
|
},
|
|
{
|
|
"epoch": 0.19189765458422176,
|
|
"grad_norm": 0.03519190315729759,
|
|
"learning_rate": 1.4915254237288137e-05,
|
|
"loss": 1.0563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23708871006965637,
|
|
"step": 45,
|
|
"valid_targets_mean": 14956.2,
|
|
"valid_targets_min": 13592
|
|
},
|
|
{
|
|
"epoch": 0.19616204690831557,
|
|
"grad_norm": 0.03498669739597042,
|
|
"learning_rate": 1.5254237288135594e-05,
|
|
"loss": 0.9864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27887651324272156,
|
|
"step": 46,
|
|
"valid_targets_mean": 16158.3,
|
|
"valid_targets_min": 15004
|
|
},
|
|
{
|
|
"epoch": 0.20042643923240938,
|
|
"grad_norm": 0.03294924245440561,
|
|
"learning_rate": 1.5593220338983053e-05,
|
|
"loss": 0.9758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20153044164180756,
|
|
"step": 47,
|
|
"valid_targets_mean": 11801.2,
|
|
"valid_targets_min": 3471
|
|
},
|
|
{
|
|
"epoch": 0.2046908315565032,
|
|
"grad_norm": 0.03009556402222723,
|
|
"learning_rate": 1.593220338983051e-05,
|
|
"loss": 0.9594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2391558438539505,
|
|
"step": 48,
|
|
"valid_targets_mean": 15603.8,
|
|
"valid_targets_min": 14363
|
|
},
|
|
{
|
|
"epoch": 0.208955223880597,
|
|
"grad_norm": 0.030950033584954354,
|
|
"learning_rate": 1.6271186440677967e-05,
|
|
"loss": 0.9867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27356553077697754,
|
|
"step": 49,
|
|
"valid_targets_mean": 16112.0,
|
|
"valid_targets_min": 14235
|
|
},
|
|
{
|
|
"epoch": 0.21321961620469082,
|
|
"grad_norm": 0.030364922900513805,
|
|
"learning_rate": 1.6610169491525424e-05,
|
|
"loss": 0.9864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15093693137168884,
|
|
"step": 50,
|
|
"valid_targets_mean": 7430.2,
|
|
"valid_targets_min": 1491
|
|
},
|
|
{
|
|
"epoch": 0.21748400852878466,
|
|
"grad_norm": 0.0279795006686685,
|
|
"learning_rate": 1.694915254237288e-05,
|
|
"loss": 0.9576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24323467910289764,
|
|
"step": 51,
|
|
"valid_targets_mean": 16137.9,
|
|
"valid_targets_min": 13562
|
|
},
|
|
{
|
|
"epoch": 0.22174840085287847,
|
|
"grad_norm": 0.029783610068958383,
|
|
"learning_rate": 1.728813559322034e-05,
|
|
"loss": 0.9667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2927943468093872,
|
|
"step": 52,
|
|
"valid_targets_mean": 16103.2,
|
|
"valid_targets_min": 15150
|
|
},
|
|
{
|
|
"epoch": 0.2260127931769723,
|
|
"grad_norm": 0.029737904597647347,
|
|
"learning_rate": 1.76271186440678e-05,
|
|
"loss": 0.9986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2064538598060608,
|
|
"step": 53,
|
|
"valid_targets_mean": 12599.2,
|
|
"valid_targets_min": 9218
|
|
},
|
|
{
|
|
"epoch": 0.2302771855010661,
|
|
"grad_norm": 0.02715254848686034,
|
|
"learning_rate": 1.7966101694915256e-05,
|
|
"loss": 0.9711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26498943567276,
|
|
"step": 54,
|
|
"valid_targets_mean": 16131.9,
|
|
"valid_targets_min": 15146
|
|
},
|
|
{
|
|
"epoch": 0.2345415778251599,
|
|
"grad_norm": 0.029741277416861552,
|
|
"learning_rate": 1.8305084745762713e-05,
|
|
"loss": 0.9674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28610119223594666,
|
|
"step": 55,
|
|
"valid_targets_mean": 16111.3,
|
|
"valid_targets_min": 15592
|
|
},
|
|
{
|
|
"epoch": 0.23880597014925373,
|
|
"grad_norm": 0.028385851529196676,
|
|
"learning_rate": 1.864406779661017e-05,
|
|
"loss": 0.9639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21565639972686768,
|
|
"step": 56,
|
|
"valid_targets_mean": 14647.1,
|
|
"valid_targets_min": 12903
|
|
},
|
|
{
|
|
"epoch": 0.24307036247334754,
|
|
"grad_norm": 0.02781672033955557,
|
|
"learning_rate": 1.898305084745763e-05,
|
|
"loss": 1.0066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2721647322177887,
|
|
"step": 57,
|
|
"valid_targets_mean": 15840.5,
|
|
"valid_targets_min": 8340
|
|
},
|
|
{
|
|
"epoch": 0.24733475479744135,
|
|
"grad_norm": 0.026586727598952788,
|
|
"learning_rate": 1.9322033898305087e-05,
|
|
"loss": 0.9447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23874936997890472,
|
|
"step": 58,
|
|
"valid_targets_mean": 13233.3,
|
|
"valid_targets_min": 2807
|
|
},
|
|
{
|
|
"epoch": 0.2515991471215352,
|
|
"grad_norm": 0.024397987674722288,
|
|
"learning_rate": 1.9661016949152545e-05,
|
|
"loss": 0.9793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2325296550989151,
|
|
"step": 59,
|
|
"valid_targets_mean": 16033.5,
|
|
"valid_targets_min": 15160
|
|
},
|
|
{
|
|
"epoch": 0.255863539445629,
|
|
"grad_norm": 0.026421464782320464,
|
|
"learning_rate": 2e-05,
|
|
"loss": 0.9757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2822790741920471,
|
|
"step": 60,
|
|
"valid_targets_mean": 16134.5,
|
|
"valid_targets_min": 12378
|
|
},
|
|
{
|
|
"epoch": 0.2601279317697228,
|
|
"grad_norm": 0.026147875010783778,
|
|
"learning_rate": 2.033898305084746e-05,
|
|
"loss": 0.9535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16812659800052643,
|
|
"step": 61,
|
|
"valid_targets_mean": 8668.3,
|
|
"valid_targets_min": 1144
|
|
},
|
|
{
|
|
"epoch": 0.26439232409381663,
|
|
"grad_norm": 0.024411772477778523,
|
|
"learning_rate": 2.0677966101694916e-05,
|
|
"loss": 0.9935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23327814042568207,
|
|
"step": 62,
|
|
"valid_targets_mean": 16138.2,
|
|
"valid_targets_min": 15352
|
|
},
|
|
{
|
|
"epoch": 0.26865671641791045,
|
|
"grad_norm": 0.023326138386476743,
|
|
"learning_rate": 2.1016949152542376e-05,
|
|
"loss": 0.911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2603323459625244,
|
|
"step": 63,
|
|
"valid_targets_mean": 16197.9,
|
|
"valid_targets_min": 15523
|
|
},
|
|
{
|
|
"epoch": 0.27292110874200426,
|
|
"grad_norm": 0.024631063445641654,
|
|
"learning_rate": 2.1355932203389833e-05,
|
|
"loss": 1.0072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19310042262077332,
|
|
"step": 64,
|
|
"valid_targets_mean": 10580.2,
|
|
"valid_targets_min": 5595
|
|
},
|
|
{
|
|
"epoch": 0.2771855010660981,
|
|
"grad_norm": 0.021840055066577675,
|
|
"learning_rate": 2.169491525423729e-05,
|
|
"loss": 0.9617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.256880521774292,
|
|
"step": 65,
|
|
"valid_targets_mean": 16181.6,
|
|
"valid_targets_min": 15228
|
|
},
|
|
{
|
|
"epoch": 0.2814498933901919,
|
|
"grad_norm": 0.02456090705074775,
|
|
"learning_rate": 2.2033898305084748e-05,
|
|
"loss": 0.9427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2829405665397644,
|
|
"step": 66,
|
|
"valid_targets_mean": 16184.6,
|
|
"valid_targets_min": 15260
|
|
},
|
|
{
|
|
"epoch": 0.2857142857142857,
|
|
"grad_norm": 0.022480459218492767,
|
|
"learning_rate": 2.2372881355932205e-05,
|
|
"loss": 0.9812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20558682084083557,
|
|
"step": 67,
|
|
"valid_targets_mean": 12750.7,
|
|
"valid_targets_min": 10054
|
|
},
|
|
{
|
|
"epoch": 0.2899786780383795,
|
|
"grad_norm": 0.023922945522637688,
|
|
"learning_rate": 2.2711864406779665e-05,
|
|
"loss": 0.9717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2629331946372986,
|
|
"step": 68,
|
|
"valid_targets_mean": 16183.3,
|
|
"valid_targets_min": 15443
|
|
},
|
|
{
|
|
"epoch": 0.2942430703624733,
|
|
"grad_norm": 0.026102238061528067,
|
|
"learning_rate": 2.3050847457627122e-05,
|
|
"loss": 0.9676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30709654092788696,
|
|
"step": 69,
|
|
"valid_targets_mean": 16082.1,
|
|
"valid_targets_min": 15187
|
|
},
|
|
{
|
|
"epoch": 0.29850746268656714,
|
|
"grad_norm": 0.022194886271654706,
|
|
"learning_rate": 2.338983050847458e-05,
|
|
"loss": 0.9742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21478226780891418,
|
|
"step": 70,
|
|
"valid_targets_mean": 15024.0,
|
|
"valid_targets_min": 12713
|
|
},
|
|
{
|
|
"epoch": 0.302771855010661,
|
|
"grad_norm": 0.022329492303660402,
|
|
"learning_rate": 2.3728813559322036e-05,
|
|
"loss": 0.9864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28527936339378357,
|
|
"step": 71,
|
|
"valid_targets_mean": 16127.9,
|
|
"valid_targets_min": 13863
|
|
},
|
|
{
|
|
"epoch": 0.3070362473347548,
|
|
"grad_norm": 0.021997685837490098,
|
|
"learning_rate": 2.406779661016949e-05,
|
|
"loss": 0.9938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2072170078754425,
|
|
"step": 72,
|
|
"valid_targets_mean": 11473.8,
|
|
"valid_targets_min": 1483
|
|
},
|
|
{
|
|
"epoch": 0.31130063965884863,
|
|
"grad_norm": 0.019901006945245436,
|
|
"learning_rate": 2.4406779661016954e-05,
|
|
"loss": 1.0045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25010067224502563,
|
|
"step": 73,
|
|
"valid_targets_mean": 16089.1,
|
|
"valid_targets_min": 15426
|
|
},
|
|
{
|
|
"epoch": 0.31556503198294245,
|
|
"grad_norm": 0.02098151407253464,
|
|
"learning_rate": 2.474576271186441e-05,
|
|
"loss": 0.9905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27008217573165894,
|
|
"step": 74,
|
|
"valid_targets_mean": 16227.9,
|
|
"valid_targets_min": 15487
|
|
},
|
|
{
|
|
"epoch": 0.31982942430703626,
|
|
"grad_norm": 0.022850447334891926,
|
|
"learning_rate": 2.5084745762711865e-05,
|
|
"loss": 0.9829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14634910225868225,
|
|
"step": 75,
|
|
"valid_targets_mean": 8328.3,
|
|
"valid_targets_min": 2530
|
|
},
|
|
{
|
|
"epoch": 0.32409381663113007,
|
|
"grad_norm": 0.021522521459666884,
|
|
"learning_rate": 2.5423728813559322e-05,
|
|
"loss": 0.9633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2459203600883484,
|
|
"step": 76,
|
|
"valid_targets_mean": 16123.9,
|
|
"valid_targets_min": 14851
|
|
},
|
|
{
|
|
"epoch": 0.3283582089552239,
|
|
"grad_norm": 0.022422271582637193,
|
|
"learning_rate": 2.576271186440678e-05,
|
|
"loss": 0.9675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.287975549697876,
|
|
"step": 77,
|
|
"valid_targets_mean": 16167.7,
|
|
"valid_targets_min": 15312
|
|
},
|
|
{
|
|
"epoch": 0.3326226012793177,
|
|
"grad_norm": 0.02140560779509087,
|
|
"learning_rate": 2.610169491525424e-05,
|
|
"loss": 0.9576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1962912529706955,
|
|
"step": 78,
|
|
"valid_targets_mean": 12456.6,
|
|
"valid_targets_min": 8870
|
|
},
|
|
{
|
|
"epoch": 0.3368869936034115,
|
|
"grad_norm": 0.021831582691159927,
|
|
"learning_rate": 2.6440677966101696e-05,
|
|
"loss": 0.9933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26069438457489014,
|
|
"step": 79,
|
|
"valid_targets_mean": 16122.0,
|
|
"valid_targets_min": 15344
|
|
},
|
|
{
|
|
"epoch": 0.3411513859275053,
|
|
"grad_norm": 0.022348075233379408,
|
|
"learning_rate": 2.6779661016949153e-05,
|
|
"loss": 0.9731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28060999512672424,
|
|
"step": 80,
|
|
"valid_targets_mean": 16169.0,
|
|
"valid_targets_min": 14236
|
|
},
|
|
{
|
|
"epoch": 0.34541577825159914,
|
|
"grad_norm": 0.02174407011441448,
|
|
"learning_rate": 2.711864406779661e-05,
|
|
"loss": 0.9968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21677571535110474,
|
|
"step": 81,
|
|
"valid_targets_mean": 14335.5,
|
|
"valid_targets_min": 11937
|
|
},
|
|
{
|
|
"epoch": 0.34968017057569295,
|
|
"grad_norm": 0.020921050516978134,
|
|
"learning_rate": 2.7457627118644068e-05,
|
|
"loss": 0.9484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27182823419570923,
|
|
"step": 82,
|
|
"valid_targets_mean": 16110.3,
|
|
"valid_targets_min": 15503
|
|
},
|
|
{
|
|
"epoch": 0.35394456289978676,
|
|
"grad_norm": 0.022949842232457322,
|
|
"learning_rate": 2.7796610169491528e-05,
|
|
"loss": 1.0056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2620610296726227,
|
|
"step": 83,
|
|
"valid_targets_mean": 13237.7,
|
|
"valid_targets_min": 2437
|
|
},
|
|
{
|
|
"epoch": 0.3582089552238806,
|
|
"grad_norm": 0.02078715334777192,
|
|
"learning_rate": 2.8135593220338985e-05,
|
|
"loss": 0.9285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.209890678524971,
|
|
"step": 84,
|
|
"valid_targets_mean": 15746.1,
|
|
"valid_targets_min": 15164
|
|
},
|
|
{
|
|
"epoch": 0.3624733475479744,
|
|
"grad_norm": 0.021596708139222605,
|
|
"learning_rate": 2.8474576271186442e-05,
|
|
"loss": 0.9967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28995853662490845,
|
|
"step": 85,
|
|
"valid_targets_mean": 16138.9,
|
|
"valid_targets_min": 14950
|
|
},
|
|
{
|
|
"epoch": 0.36673773987206826,
|
|
"grad_norm": 0.022887603974184496,
|
|
"learning_rate": 2.88135593220339e-05,
|
|
"loss": 1.0206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16542835533618927,
|
|
"step": 86,
|
|
"valid_targets_mean": 9408.4,
|
|
"valid_targets_min": 1890
|
|
},
|
|
{
|
|
"epoch": 0.37100213219616207,
|
|
"grad_norm": 0.02084835024137853,
|
|
"learning_rate": 2.9152542372881356e-05,
|
|
"loss": 0.9667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23904331028461456,
|
|
"step": 87,
|
|
"valid_targets_mean": 16096.6,
|
|
"valid_targets_min": 15276
|
|
},
|
|
{
|
|
"epoch": 0.3752665245202559,
|
|
"grad_norm": 0.021880796285522006,
|
|
"learning_rate": 2.9491525423728817e-05,
|
|
"loss": 0.9921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29959046840667725,
|
|
"step": 88,
|
|
"valid_targets_mean": 16169.5,
|
|
"valid_targets_min": 14984
|
|
},
|
|
{
|
|
"epoch": 0.3795309168443497,
|
|
"grad_norm": 0.02073981606140259,
|
|
"learning_rate": 2.9830508474576274e-05,
|
|
"loss": 0.9378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1787843108177185,
|
|
"step": 89,
|
|
"valid_targets_mean": 10425.1,
|
|
"valid_targets_min": 5896
|
|
},
|
|
{
|
|
"epoch": 0.3837953091684435,
|
|
"grad_norm": 0.020601187491489047,
|
|
"learning_rate": 3.016949152542373e-05,
|
|
"loss": 0.9906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26153984665870667,
|
|
"step": 90,
|
|
"valid_targets_mean": 16099.6,
|
|
"valid_targets_min": 15278
|
|
},
|
|
{
|
|
"epoch": 0.3880597014925373,
|
|
"grad_norm": 0.022816342007711772,
|
|
"learning_rate": 3.0508474576271188e-05,
|
|
"loss": 0.9861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3047121465206146,
|
|
"step": 91,
|
|
"valid_targets_mean": 16075.3,
|
|
"valid_targets_min": 14896
|
|
},
|
|
{
|
|
"epoch": 0.39232409381663114,
|
|
"grad_norm": 0.02014041992418665,
|
|
"learning_rate": 3.084745762711865e-05,
|
|
"loss": 0.9907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2203022837638855,
|
|
"step": 92,
|
|
"valid_targets_mean": 13524.8,
|
|
"valid_targets_min": 11028
|
|
},
|
|
{
|
|
"epoch": 0.39658848614072495,
|
|
"grad_norm": 0.020855844473723486,
|
|
"learning_rate": 3.1186440677966106e-05,
|
|
"loss": 0.9676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2784779667854309,
|
|
"step": 93,
|
|
"valid_targets_mean": 16129.1,
|
|
"valid_targets_min": 14935
|
|
},
|
|
{
|
|
"epoch": 0.40085287846481876,
|
|
"grad_norm": 0.021982752347661467,
|
|
"learning_rate": 3.152542372881356e-05,
|
|
"loss": 0.976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2944275736808777,
|
|
"step": 94,
|
|
"valid_targets_mean": 16143.3,
|
|
"valid_targets_min": 14248
|
|
},
|
|
{
|
|
"epoch": 0.4051172707889126,
|
|
"grad_norm": 0.01974750991659846,
|
|
"learning_rate": 3.186440677966102e-05,
|
|
"loss": 0.9857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24613580107688904,
|
|
"step": 95,
|
|
"valid_targets_mean": 15716.7,
|
|
"valid_targets_min": 14525
|
|
},
|
|
{
|
|
"epoch": 0.4093816631130064,
|
|
"grad_norm": 0.02011356135896114,
|
|
"learning_rate": 3.2203389830508473e-05,
|
|
"loss": 1.0155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27125972509384155,
|
|
"step": 96,
|
|
"valid_targets_mean": 16073.1,
|
|
"valid_targets_min": 13449
|
|
},
|
|
{
|
|
"epoch": 0.4136460554371002,
|
|
"grad_norm": 0.021382249863399653,
|
|
"learning_rate": 3.2542372881355934e-05,
|
|
"loss": 0.9788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19551990926265717,
|
|
"step": 97,
|
|
"valid_targets_mean": 10809.0,
|
|
"valid_targets_min": 1150
|
|
},
|
|
{
|
|
"epoch": 0.417910447761194,
|
|
"grad_norm": 0.020265844724426482,
|
|
"learning_rate": 3.2881355932203394e-05,
|
|
"loss": 0.9632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23800063133239746,
|
|
"step": 98,
|
|
"valid_targets_mean": 15808.9,
|
|
"valid_targets_min": 14629
|
|
},
|
|
{
|
|
"epoch": 0.42217484008528783,
|
|
"grad_norm": 0.022331200999506685,
|
|
"learning_rate": 3.322033898305085e-05,
|
|
"loss": 1.0031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29094916582107544,
|
|
"step": 99,
|
|
"valid_targets_mean": 16108.2,
|
|
"valid_targets_min": 13682
|
|
},
|
|
{
|
|
"epoch": 0.42643923240938164,
|
|
"grad_norm": 0.0209756934558208,
|
|
"learning_rate": 3.355932203389831e-05,
|
|
"loss": 0.9618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1729329526424408,
|
|
"step": 100,
|
|
"valid_targets_mean": 9011.3,
|
|
"valid_targets_min": 2435
|
|
},
|
|
{
|
|
"epoch": 0.43070362473347545,
|
|
"grad_norm": 0.020152644360067997,
|
|
"learning_rate": 3.389830508474576e-05,
|
|
"loss": 0.944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2317223846912384,
|
|
"step": 101,
|
|
"valid_targets_mean": 16204.4,
|
|
"valid_targets_min": 15219
|
|
},
|
|
{
|
|
"epoch": 0.4349680170575693,
|
|
"grad_norm": 0.021217357338002,
|
|
"learning_rate": 3.423728813559322e-05,
|
|
"loss": 0.9892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2881006896495819,
|
|
"step": 102,
|
|
"valid_targets_mean": 16191.1,
|
|
"valid_targets_min": 15659
|
|
},
|
|
{
|
|
"epoch": 0.43923240938166314,
|
|
"grad_norm": 0.021458473654988437,
|
|
"learning_rate": 3.457627118644068e-05,
|
|
"loss": 0.9242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2011905163526535,
|
|
"step": 103,
|
|
"valid_targets_mean": 12231.0,
|
|
"valid_targets_min": 9115
|
|
},
|
|
{
|
|
"epoch": 0.44349680170575695,
|
|
"grad_norm": 0.020121508821071714,
|
|
"learning_rate": 3.491525423728814e-05,
|
|
"loss": 0.944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24188287556171417,
|
|
"step": 104,
|
|
"valid_targets_mean": 16178.5,
|
|
"valid_targets_min": 15276
|
|
},
|
|
{
|
|
"epoch": 0.44776119402985076,
|
|
"grad_norm": 0.021099010251510737,
|
|
"learning_rate": 3.52542372881356e-05,
|
|
"loss": 0.9915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2950183153152466,
|
|
"step": 105,
|
|
"valid_targets_mean": 15760.2,
|
|
"valid_targets_min": 6594
|
|
},
|
|
{
|
|
"epoch": 0.4520255863539446,
|
|
"grad_norm": 0.02056873211667901,
|
|
"learning_rate": 3.559322033898305e-05,
|
|
"loss": 1.0079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21873688697814941,
|
|
"step": 106,
|
|
"valid_targets_mean": 14701.0,
|
|
"valid_targets_min": 12244
|
|
},
|
|
{
|
|
"epoch": 0.4562899786780384,
|
|
"grad_norm": 0.021263423657852606,
|
|
"learning_rate": 3.593220338983051e-05,
|
|
"loss": 1.0203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28287264704704285,
|
|
"step": 107,
|
|
"valid_targets_mean": 16172.6,
|
|
"valid_targets_min": 15496
|
|
},
|
|
{
|
|
"epoch": 0.4605543710021322,
|
|
"grad_norm": 0.0211077949226087,
|
|
"learning_rate": 3.627118644067797e-05,
|
|
"loss": 0.9877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2367570698261261,
|
|
"step": 108,
|
|
"valid_targets_mean": 12735.3,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 0.464818763326226,
|
|
"grad_norm": 0.020281442739726017,
|
|
"learning_rate": 3.6610169491525426e-05,
|
|
"loss": 0.9303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2263236939907074,
|
|
"step": 109,
|
|
"valid_targets_mean": 15377.7,
|
|
"valid_targets_min": 14440
|
|
},
|
|
{
|
|
"epoch": 0.4690831556503198,
|
|
"grad_norm": 0.021929604718983035,
|
|
"learning_rate": 3.6949152542372886e-05,
|
|
"loss": 0.9933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28336554765701294,
|
|
"step": 110,
|
|
"valid_targets_mean": 16057.0,
|
|
"valid_targets_min": 13369
|
|
},
|
|
{
|
|
"epoch": 0.47334754797441364,
|
|
"grad_norm": 0.021568080875278048,
|
|
"learning_rate": 3.728813559322034e-05,
|
|
"loss": 1.0073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17097915709018707,
|
|
"step": 111,
|
|
"valid_targets_mean": 9595.3,
|
|
"valid_targets_min": 1108
|
|
},
|
|
{
|
|
"epoch": 0.47761194029850745,
|
|
"grad_norm": 0.019743499181402058,
|
|
"learning_rate": 3.76271186440678e-05,
|
|
"loss": 0.9725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25016847252845764,
|
|
"step": 112,
|
|
"valid_targets_mean": 16093.3,
|
|
"valid_targets_min": 14849
|
|
},
|
|
{
|
|
"epoch": 0.48187633262260127,
|
|
"grad_norm": 0.021724342268355623,
|
|
"learning_rate": 3.796610169491526e-05,
|
|
"loss": 0.9366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2679430842399597,
|
|
"step": 113,
|
|
"valid_targets_mean": 16177.1,
|
|
"valid_targets_min": 15069
|
|
},
|
|
{
|
|
"epoch": 0.4861407249466951,
|
|
"grad_norm": 0.020333539214787994,
|
|
"learning_rate": 3.8305084745762714e-05,
|
|
"loss": 0.9912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17153987288475037,
|
|
"step": 114,
|
|
"valid_targets_mean": 10876.6,
|
|
"valid_targets_min": 5060
|
|
},
|
|
{
|
|
"epoch": 0.4904051172707889,
|
|
"grad_norm": 0.021165589872127546,
|
|
"learning_rate": 3.8644067796610175e-05,
|
|
"loss": 1.0159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2608182430267334,
|
|
"step": 115,
|
|
"valid_targets_mean": 16147.9,
|
|
"valid_targets_min": 15403
|
|
},
|
|
{
|
|
"epoch": 0.4946695095948827,
|
|
"grad_norm": 0.022433223356041036,
|
|
"learning_rate": 3.898305084745763e-05,
|
|
"loss": 0.9641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2925991117954254,
|
|
"step": 116,
|
|
"valid_targets_mean": 16050.6,
|
|
"valid_targets_min": 15044
|
|
},
|
|
{
|
|
"epoch": 0.4989339019189765,
|
|
"grad_norm": 0.021627292159432345,
|
|
"learning_rate": 3.932203389830509e-05,
|
|
"loss": 0.9701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19639122486114502,
|
|
"step": 117,
|
|
"valid_targets_mean": 12286.3,
|
|
"valid_targets_min": 9227
|
|
},
|
|
{
|
|
"epoch": 0.5031982942430704,
|
|
"grad_norm": 0.022416624856410494,
|
|
"learning_rate": 3.966101694915255e-05,
|
|
"loss": 1.0301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2988147437572479,
|
|
"step": 118,
|
|
"valid_targets_mean": 16022.7,
|
|
"valid_targets_min": 12838
|
|
},
|
|
{
|
|
"epoch": 0.5074626865671642,
|
|
"grad_norm": 0.022047626152921513,
|
|
"learning_rate": 4e-05,
|
|
"loss": 0.9391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2716435194015503,
|
|
"step": 119,
|
|
"valid_targets_mean": 16238.1,
|
|
"valid_targets_min": 15365
|
|
},
|
|
{
|
|
"epoch": 0.511727078891258,
|
|
"grad_norm": 0.02011059044128375,
|
|
"learning_rate": 3.999991166161585e-05,
|
|
"loss": 0.9853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22559040784835815,
|
|
"step": 120,
|
|
"valid_targets_mean": 14894.9,
|
|
"valid_targets_min": 12766
|
|
},
|
|
{
|
|
"epoch": 0.5159914712153518,
|
|
"grad_norm": 0.02121597731084117,
|
|
"learning_rate": 3.999964664724376e-05,
|
|
"loss": 0.9374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28334271907806396,
|
|
"step": 121,
|
|
"valid_targets_mean": 16163.6,
|
|
"valid_targets_min": 15329
|
|
},
|
|
{
|
|
"epoch": 0.5202558635394456,
|
|
"grad_norm": 0.022251241312081053,
|
|
"learning_rate": 3.999920495922483e-05,
|
|
"loss": 0.9698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1875140368938446,
|
|
"step": 122,
|
|
"valid_targets_mean": 10970.1,
|
|
"valid_targets_min": 2947
|
|
},
|
|
{
|
|
"epoch": 0.5245202558635395,
|
|
"grad_norm": 0.02017780235802464,
|
|
"learning_rate": 3.999858660146085e-05,
|
|
"loss": 0.9771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23657098412513733,
|
|
"step": 123,
|
|
"valid_targets_mean": 16116.7,
|
|
"valid_targets_min": 14097
|
|
},
|
|
{
|
|
"epoch": 0.5287846481876333,
|
|
"grad_norm": 0.021264151696513377,
|
|
"learning_rate": 3.999779157941431e-05,
|
|
"loss": 0.9879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2908053696155548,
|
|
"step": 124,
|
|
"valid_targets_mean": 16076.1,
|
|
"valid_targets_min": 14349
|
|
},
|
|
{
|
|
"epoch": 0.5330490405117271,
|
|
"grad_norm": 0.021266678082225883,
|
|
"learning_rate": 3.99968199001083e-05,
|
|
"loss": 0.9396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.152372345328331,
|
|
"step": 125,
|
|
"valid_targets_mean": 8823.0,
|
|
"valid_targets_min": 1964
|
|
},
|
|
{
|
|
"epoch": 0.5373134328358209,
|
|
"grad_norm": 0.020065060116963702,
|
|
"learning_rate": 3.999567157212646e-05,
|
|
"loss": 0.9817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2422441989183426,
|
|
"step": 126,
|
|
"valid_targets_mean": 16103.7,
|
|
"valid_targets_min": 14670
|
|
},
|
|
{
|
|
"epoch": 0.5415778251599147,
|
|
"grad_norm": 0.021377594478988522,
|
|
"learning_rate": 3.9994346605612955e-05,
|
|
"loss": 0.9742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28974485397338867,
|
|
"step": 127,
|
|
"valid_targets_mean": 16162.0,
|
|
"valid_targets_min": 15503
|
|
},
|
|
{
|
|
"epoch": 0.5458422174840085,
|
|
"grad_norm": 0.020289900867874733,
|
|
"learning_rate": 3.999284501227232e-05,
|
|
"loss": 0.9619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.194909930229187,
|
|
"step": 128,
|
|
"valid_targets_mean": 11757.2,
|
|
"valid_targets_min": 8396
|
|
},
|
|
{
|
|
"epoch": 0.5501066098081023,
|
|
"grad_norm": 0.02080614108643718,
|
|
"learning_rate": 3.9991166805369393e-05,
|
|
"loss": 0.9925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26247116923332214,
|
|
"step": 129,
|
|
"valid_targets_mean": 16170.3,
|
|
"valid_targets_min": 15285
|
|
},
|
|
{
|
|
"epoch": 0.5543710021321961,
|
|
"grad_norm": 0.022100904368128272,
|
|
"learning_rate": 3.9989311999729166e-05,
|
|
"loss": 1.0344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31214579939842224,
|
|
"step": 130,
|
|
"valid_targets_mean": 16122.5,
|
|
"valid_targets_min": 14383
|
|
},
|
|
{
|
|
"epoch": 0.55863539445629,
|
|
"grad_norm": 0.019816152450482617,
|
|
"learning_rate": 3.99872806117367e-05,
|
|
"loss": 0.9225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21119537949562073,
|
|
"step": 131,
|
|
"valid_targets_mean": 14895.4,
|
|
"valid_targets_min": 12887
|
|
},
|
|
{
|
|
"epoch": 0.5628997867803838,
|
|
"grad_norm": 0.02024903830584211,
|
|
"learning_rate": 3.998507265933696e-05,
|
|
"loss": 0.9405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2536119222640991,
|
|
"step": 132,
|
|
"valid_targets_mean": 16135.3,
|
|
"valid_targets_min": 15023
|
|
},
|
|
{
|
|
"epoch": 0.5671641791044776,
|
|
"grad_norm": 0.021817366867683298,
|
|
"learning_rate": 3.9982688162034624e-05,
|
|
"loss": 0.9633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21941867470741272,
|
|
"step": 133,
|
|
"valid_targets_mean": 13351.5,
|
|
"valid_targets_min": 2548
|
|
},
|
|
{
|
|
"epoch": 0.5714285714285714,
|
|
"grad_norm": 0.01904626717242015,
|
|
"learning_rate": 3.998012714089397e-05,
|
|
"loss": 0.9517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21697096526622772,
|
|
"step": 134,
|
|
"valid_targets_mean": 15830.9,
|
|
"valid_targets_min": 15064
|
|
},
|
|
{
|
|
"epoch": 0.5756929637526652,
|
|
"grad_norm": 0.021235763164139627,
|
|
"learning_rate": 3.997738961853863e-05,
|
|
"loss": 0.957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26761946082115173,
|
|
"step": 135,
|
|
"valid_targets_mean": 16154.7,
|
|
"valid_targets_min": 15232
|
|
},
|
|
{
|
|
"epoch": 0.579957356076759,
|
|
"grad_norm": 0.0203380966149706,
|
|
"learning_rate": 3.9974475619151445e-05,
|
|
"loss": 0.9154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.172822505235672,
|
|
"step": 136,
|
|
"valid_targets_mean": 11104.2,
|
|
"valid_targets_min": 4073
|
|
},
|
|
{
|
|
"epoch": 0.5842217484008528,
|
|
"grad_norm": 0.01970257620502463,
|
|
"learning_rate": 3.997138516847422e-05,
|
|
"loss": 0.9419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21897849440574646,
|
|
"step": 137,
|
|
"valid_targets_mean": 15956.0,
|
|
"valid_targets_min": 15209
|
|
},
|
|
{
|
|
"epoch": 0.5884861407249466,
|
|
"grad_norm": 0.022047220182933,
|
|
"learning_rate": 3.9968118293807476e-05,
|
|
"loss": 0.9789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3147332966327667,
|
|
"step": 138,
|
|
"valid_targets_mean": 16088.7,
|
|
"valid_targets_min": 15222
|
|
},
|
|
{
|
|
"epoch": 0.5927505330490405,
|
|
"grad_norm": 0.02151483243266618,
|
|
"learning_rate": 3.996467502401028e-05,
|
|
"loss": 0.9646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1870661973953247,
|
|
"step": 139,
|
|
"valid_targets_mean": 11090.8,
|
|
"valid_targets_min": 7536
|
|
},
|
|
{
|
|
"epoch": 0.5970149253731343,
|
|
"grad_norm": 0.020640894582035912,
|
|
"learning_rate": 3.9961055389499904e-05,
|
|
"loss": 0.9826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25833842158317566,
|
|
"step": 140,
|
|
"valid_targets_mean": 16129.2,
|
|
"valid_targets_min": 14836
|
|
},
|
|
{
|
|
"epoch": 0.6012793176972282,
|
|
"grad_norm": 0.021023562097243147,
|
|
"learning_rate": 3.995725942225162e-05,
|
|
"loss": 0.9817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27318665385246277,
|
|
"step": 141,
|
|
"valid_targets_mean": 16199.3,
|
|
"valid_targets_min": 15664
|
|
},
|
|
{
|
|
"epoch": 0.605543710021322,
|
|
"grad_norm": 0.020822157419671992,
|
|
"learning_rate": 3.995328715579839e-05,
|
|
"loss": 0.993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23057281970977783,
|
|
"step": 142,
|
|
"valid_targets_mean": 13840.3,
|
|
"valid_targets_min": 11087
|
|
},
|
|
{
|
|
"epoch": 0.6098081023454158,
|
|
"grad_norm": 0.021002774123317643,
|
|
"learning_rate": 3.994913862523058e-05,
|
|
"loss": 1.0013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27449026703834534,
|
|
"step": 143,
|
|
"valid_targets_mean": 16161.0,
|
|
"valid_targets_min": 14821
|
|
},
|
|
{
|
|
"epoch": 0.6140724946695096,
|
|
"grad_norm": 0.021123404389439878,
|
|
"learning_rate": 3.9944813867195624e-05,
|
|
"loss": 0.983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.260378360748291,
|
|
"step": 144,
|
|
"valid_targets_mean": 16192.5,
|
|
"valid_targets_min": 15681
|
|
},
|
|
{
|
|
"epoch": 0.6183368869936035,
|
|
"grad_norm": 0.019632251293702368,
|
|
"learning_rate": 3.9940312919897744e-05,
|
|
"loss": 0.9322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23691125214099884,
|
|
"step": 145,
|
|
"valid_targets_mean": 15141.9,
|
|
"valid_targets_min": 13641
|
|
},
|
|
{
|
|
"epoch": 0.6226012793176973,
|
|
"grad_norm": 0.021605110773861222,
|
|
"learning_rate": 3.993563582309759e-05,
|
|
"loss": 0.9244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26022523641586304,
|
|
"step": 146,
|
|
"valid_targets_mean": 16211.6,
|
|
"valid_targets_min": 15436
|
|
},
|
|
{
|
|
"epoch": 0.6268656716417911,
|
|
"grad_norm": 0.021688865612169143,
|
|
"learning_rate": 3.993078261811186e-05,
|
|
"loss": 1.0096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20714055001735687,
|
|
"step": 147,
|
|
"valid_targets_mean": 10490.2,
|
|
"valid_targets_min": 1799
|
|
},
|
|
{
|
|
"epoch": 0.6311300639658849,
|
|
"grad_norm": 0.021266822606868806,
|
|
"learning_rate": 3.9925753347813e-05,
|
|
"loss": 0.9503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21735161542892456,
|
|
"step": 148,
|
|
"valid_targets_mean": 15595.6,
|
|
"valid_targets_min": 14469
|
|
},
|
|
{
|
|
"epoch": 0.6353944562899787,
|
|
"grad_norm": 0.02144211181259071,
|
|
"learning_rate": 3.992054805662876e-05,
|
|
"loss": 1.0306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3036506175994873,
|
|
"step": 149,
|
|
"valid_targets_mean": 16174.6,
|
|
"valid_targets_min": 15342
|
|
},
|
|
{
|
|
"epoch": 0.6396588486140725,
|
|
"grad_norm": 0.02223464693449807,
|
|
"learning_rate": 3.991516679054185e-05,
|
|
"loss": 0.9601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13784047961235046,
|
|
"step": 150,
|
|
"valid_targets_mean": 7276.1,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 0.6439232409381663,
|
|
"grad_norm": 0.02150266588340357,
|
|
"learning_rate": 3.9909609597089496e-05,
|
|
"loss": 0.9375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23577255010604858,
|
|
"step": 151,
|
|
"valid_targets_mean": 16193.9,
|
|
"valid_targets_min": 15622
|
|
},
|
|
{
|
|
"epoch": 0.6481876332622601,
|
|
"grad_norm": 0.02308598798479967,
|
|
"learning_rate": 3.9903876525363055e-05,
|
|
"loss": 1.0294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3142617344856262,
|
|
"step": 152,
|
|
"valid_targets_mean": 16011.0,
|
|
"valid_targets_min": 15310
|
|
},
|
|
{
|
|
"epoch": 0.652452025586354,
|
|
"grad_norm": 0.020860380628135066,
|
|
"learning_rate": 3.989796762600755e-05,
|
|
"loss": 0.9596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19116519391536713,
|
|
"step": 153,
|
|
"valid_targets_mean": 12549.9,
|
|
"valid_targets_min": 9154
|
|
},
|
|
{
|
|
"epoch": 0.6567164179104478,
|
|
"grad_norm": 0.020530983919045887,
|
|
"learning_rate": 3.9891882951221246e-05,
|
|
"loss": 1.0055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2613886296749115,
|
|
"step": 154,
|
|
"valid_targets_mean": 16120.7,
|
|
"valid_targets_min": 15188
|
|
},
|
|
{
|
|
"epoch": 0.6609808102345416,
|
|
"grad_norm": 0.022674961696222153,
|
|
"learning_rate": 3.988562255475518e-05,
|
|
"loss": 1.0006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27846676111221313,
|
|
"step": 155,
|
|
"valid_targets_mean": 16149.2,
|
|
"valid_targets_min": 15243
|
|
},
|
|
{
|
|
"epoch": 0.6652452025586354,
|
|
"grad_norm": 0.02093816300628776,
|
|
"learning_rate": 3.987918649191268e-05,
|
|
"loss": 0.9941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24163512885570526,
|
|
"step": 156,
|
|
"valid_targets_mean": 15808.4,
|
|
"valid_targets_min": 14739
|
|
},
|
|
{
|
|
"epoch": 0.6695095948827292,
|
|
"grad_norm": 0.020744013279519836,
|
|
"learning_rate": 3.987257481954888e-05,
|
|
"loss": 0.9316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27360090613365173,
|
|
"step": 157,
|
|
"valid_targets_mean": 16021.9,
|
|
"valid_targets_min": 13449
|
|
},
|
|
{
|
|
"epoch": 0.673773987206823,
|
|
"grad_norm": 0.02207515333390211,
|
|
"learning_rate": 3.9865787596070236e-05,
|
|
"loss": 0.938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22181160748004913,
|
|
"step": 158,
|
|
"valid_targets_mean": 13372.6,
|
|
"valid_targets_min": 1937
|
|
},
|
|
{
|
|
"epoch": 0.6780383795309168,
|
|
"grad_norm": 0.020549184729741616,
|
|
"learning_rate": 3.9858824881433975e-05,
|
|
"loss": 1.0087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2499282956123352,
|
|
"step": 159,
|
|
"valid_targets_mean": 15985.7,
|
|
"valid_targets_min": 14788
|
|
},
|
|
{
|
|
"epoch": 0.6823027718550106,
|
|
"grad_norm": 0.020870979075327017,
|
|
"learning_rate": 3.9851686737147585e-05,
|
|
"loss": 0.9592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2576824128627777,
|
|
"step": 160,
|
|
"valid_targets_mean": 16123.3,
|
|
"valid_targets_min": 14773
|
|
},
|
|
{
|
|
"epoch": 0.6865671641791045,
|
|
"grad_norm": 0.022670240453181213,
|
|
"learning_rate": 3.9844373226268305e-05,
|
|
"loss": 0.9663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1692708283662796,
|
|
"step": 161,
|
|
"valid_targets_mean": 8168.0,
|
|
"valid_targets_min": 1432
|
|
},
|
|
{
|
|
"epoch": 0.6908315565031983,
|
|
"grad_norm": 0.019476344357992184,
|
|
"learning_rate": 3.983688441340249e-05,
|
|
"loss": 0.9759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23636317253112793,
|
|
"step": 162,
|
|
"valid_targets_mean": 16017.7,
|
|
"valid_targets_min": 15268
|
|
},
|
|
{
|
|
"epoch": 0.6950959488272921,
|
|
"grad_norm": 0.02163567803133885,
|
|
"learning_rate": 3.98292203647051e-05,
|
|
"loss": 1.0287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30122441053390503,
|
|
"step": 163,
|
|
"valid_targets_mean": 16137.8,
|
|
"valid_targets_min": 15568
|
|
},
|
|
{
|
|
"epoch": 0.6993603411513859,
|
|
"grad_norm": 0.021338795045941775,
|
|
"learning_rate": 3.982138114787912e-05,
|
|
"loss": 0.9752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1654120534658432,
|
|
"step": 164,
|
|
"valid_targets_mean": 10142.4,
|
|
"valid_targets_min": 3450
|
|
},
|
|
{
|
|
"epoch": 0.7036247334754797,
|
|
"grad_norm": 0.021360563043732326,
|
|
"learning_rate": 3.98133668321749e-05,
|
|
"loss": 0.9544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2355993390083313,
|
|
"step": 165,
|
|
"valid_targets_mean": 16181.6,
|
|
"valid_targets_min": 14851
|
|
},
|
|
{
|
|
"epoch": 0.7078891257995735,
|
|
"grad_norm": 0.02184650496542462,
|
|
"learning_rate": 3.980517748838963e-05,
|
|
"loss": 0.9378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2563576102256775,
|
|
"step": 166,
|
|
"valid_targets_mean": 16120.2,
|
|
"valid_targets_min": 13757
|
|
},
|
|
{
|
|
"epoch": 0.7121535181236673,
|
|
"grad_norm": 0.020631881843084912,
|
|
"learning_rate": 3.979681318886664e-05,
|
|
"loss": 0.9739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2135094702243805,
|
|
"step": 167,
|
|
"valid_targets_mean": 13960.2,
|
|
"valid_targets_min": 12201
|
|
},
|
|
{
|
|
"epoch": 0.7164179104477612,
|
|
"grad_norm": 0.020583199270827923,
|
|
"learning_rate": 3.978827400749481e-05,
|
|
"loss": 0.9551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2588880658149719,
|
|
"step": 168,
|
|
"valid_targets_mean": 16200.0,
|
|
"valid_targets_min": 15679
|
|
},
|
|
{
|
|
"epoch": 0.720682302771855,
|
|
"grad_norm": 0.02166825090520609,
|
|
"learning_rate": 3.977956001970788e-05,
|
|
"loss": 0.9372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2682245969772339,
|
|
"step": 169,
|
|
"valid_targets_mean": 16186.5,
|
|
"valid_targets_min": 15259
|
|
},
|
|
{
|
|
"epoch": 0.7249466950959488,
|
|
"grad_norm": 0.021931838490400715,
|
|
"learning_rate": 3.977067130248381e-05,
|
|
"loss": 0.9536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2236153781414032,
|
|
"step": 170,
|
|
"valid_targets_mean": 15947.5,
|
|
"valid_targets_min": 14891
|
|
},
|
|
{
|
|
"epoch": 0.7292110874200426,
|
|
"grad_norm": 0.02146527547201413,
|
|
"learning_rate": 3.9761607934344095e-05,
|
|
"loss": 0.9518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2630627155303955,
|
|
"step": 171,
|
|
"valid_targets_mean": 16091.7,
|
|
"valid_targets_min": 13878
|
|
},
|
|
{
|
|
"epoch": 0.7334754797441365,
|
|
"grad_norm": 0.02156822540584215,
|
|
"learning_rate": 3.975236999535306e-05,
|
|
"loss": 0.9721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20983393490314484,
|
|
"step": 172,
|
|
"valid_targets_mean": 10681.1,
|
|
"valid_targets_min": 1310
|
|
},
|
|
{
|
|
"epoch": 0.7377398720682303,
|
|
"grad_norm": 0.020509095854735204,
|
|
"learning_rate": 3.974295756711717e-05,
|
|
"loss": 0.9492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23455804586410522,
|
|
"step": 173,
|
|
"valid_targets_mean": 16104.5,
|
|
"valid_targets_min": 14749
|
|
},
|
|
{
|
|
"epoch": 0.7420042643923241,
|
|
"grad_norm": 0.022021031255564667,
|
|
"learning_rate": 3.9733370732784296e-05,
|
|
"loss": 1.0071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2763734459877014,
|
|
"step": 174,
|
|
"valid_targets_mean": 16122.6,
|
|
"valid_targets_min": 14305
|
|
},
|
|
{
|
|
"epoch": 0.746268656716418,
|
|
"grad_norm": 0.02154130880260425,
|
|
"learning_rate": 3.972360957704298e-05,
|
|
"loss": 0.9414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16149087250232697,
|
|
"step": 175,
|
|
"valid_targets_mean": 8570.5,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 0.7505330490405118,
|
|
"grad_norm": 0.020704909376232313,
|
|
"learning_rate": 3.97136741861217e-05,
|
|
"loss": 1.0407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2630600929260254,
|
|
"step": 176,
|
|
"valid_targets_mean": 16094.0,
|
|
"valid_targets_min": 14923
|
|
},
|
|
{
|
|
"epoch": 0.7547974413646056,
|
|
"grad_norm": 0.02076306325892583,
|
|
"learning_rate": 3.970356464778808e-05,
|
|
"loss": 0.9593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28457731008529663,
|
|
"step": 177,
|
|
"valid_targets_mean": 15972.6,
|
|
"valid_targets_min": 12180
|
|
},
|
|
{
|
|
"epoch": 0.7590618336886994,
|
|
"grad_norm": 0.022813983046909982,
|
|
"learning_rate": 3.969328105134817e-05,
|
|
"loss": 0.9398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19965027272701263,
|
|
"step": 178,
|
|
"valid_targets_mean": 11740.2,
|
|
"valid_targets_min": 9116
|
|
},
|
|
{
|
|
"epoch": 0.7633262260127932,
|
|
"grad_norm": 0.021035992992402747,
|
|
"learning_rate": 3.9682823487645584e-05,
|
|
"loss": 1.0151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29171618819236755,
|
|
"step": 179,
|
|
"valid_targets_mean": 15911.7,
|
|
"valid_targets_min": 14573
|
|
},
|
|
{
|
|
"epoch": 0.767590618336887,
|
|
"grad_norm": 0.020863704990099676,
|
|
"learning_rate": 3.9672192049060745e-05,
|
|
"loss": 0.9676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25523146986961365,
|
|
"step": 180,
|
|
"valid_targets_mean": 16157.0,
|
|
"valid_targets_min": 15033
|
|
},
|
|
{
|
|
"epoch": 0.7718550106609808,
|
|
"grad_norm": 0.0205649704629955,
|
|
"learning_rate": 3.966138682951008e-05,
|
|
"loss": 0.9664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22264903783798218,
|
|
"step": 181,
|
|
"valid_targets_mean": 14627.9,
|
|
"valid_targets_min": 12294
|
|
},
|
|
{
|
|
"epoch": 0.7761194029850746,
|
|
"grad_norm": 0.023169631162007445,
|
|
"learning_rate": 3.9650407924445147e-05,
|
|
"loss": 0.9868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25275078415870667,
|
|
"step": 182,
|
|
"valid_targets_mean": 15994.2,
|
|
"valid_targets_min": 9016
|
|
},
|
|
{
|
|
"epoch": 0.7803837953091685,
|
|
"grad_norm": 0.021451265522994107,
|
|
"learning_rate": 3.963925543085181e-05,
|
|
"loss": 0.9973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23396554589271545,
|
|
"step": 183,
|
|
"valid_targets_mean": 13237.6,
|
|
"valid_targets_min": 2282
|
|
},
|
|
{
|
|
"epoch": 0.7846481876332623,
|
|
"grad_norm": 0.020454521170255768,
|
|
"learning_rate": 3.96279294472494e-05,
|
|
"loss": 0.9539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22253644466400146,
|
|
"step": 184,
|
|
"valid_targets_mean": 15915.1,
|
|
"valid_targets_min": 15022
|
|
},
|
|
{
|
|
"epoch": 0.7889125799573561,
|
|
"grad_norm": 0.020272941561494718,
|
|
"learning_rate": 3.961643007368984e-05,
|
|
"loss": 0.9809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3135003447532654,
|
|
"step": 185,
|
|
"valid_targets_mean": 15954.2,
|
|
"valid_targets_min": 14500
|
|
},
|
|
{
|
|
"epoch": 0.7931769722814499,
|
|
"grad_norm": 0.021501541037366564,
|
|
"learning_rate": 3.960475741175671e-05,
|
|
"loss": 0.9453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16432541608810425,
|
|
"step": 186,
|
|
"valid_targets_mean": 10097.6,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 0.7974413646055437,
|
|
"grad_norm": 0.020637152450000865,
|
|
"learning_rate": 3.959291156456444e-05,
|
|
"loss": 0.9703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22534318268299103,
|
|
"step": 187,
|
|
"valid_targets_mean": 16156.3,
|
|
"valid_targets_min": 14616
|
|
},
|
|
{
|
|
"epoch": 0.8017057569296375,
|
|
"grad_norm": 0.0216885258820386,
|
|
"learning_rate": 3.9580892636757334e-05,
|
|
"loss": 1.0058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3012459874153137,
|
|
"step": 188,
|
|
"valid_targets_mean": 16048.5,
|
|
"valid_targets_min": 14695
|
|
},
|
|
{
|
|
"epoch": 0.8059701492537313,
|
|
"grad_norm": 0.020602817515989424,
|
|
"learning_rate": 3.9568700734508645e-05,
|
|
"loss": 0.9459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17224372923374176,
|
|
"step": 189,
|
|
"valid_targets_mean": 9846.8,
|
|
"valid_targets_min": 4185
|
|
},
|
|
{
|
|
"epoch": 0.8102345415778252,
|
|
"grad_norm": 0.021668786698032873,
|
|
"learning_rate": 3.955633596551967e-05,
|
|
"loss": 1.0039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2582971453666687,
|
|
"step": 190,
|
|
"valid_targets_mean": 16123.1,
|
|
"valid_targets_min": 15258
|
|
},
|
|
{
|
|
"epoch": 0.814498933901919,
|
|
"grad_norm": 0.02260171754883426,
|
|
"learning_rate": 3.9543798439018776e-05,
|
|
"loss": 0.9884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29938027262687683,
|
|
"step": 191,
|
|
"valid_targets_mean": 16095.9,
|
|
"valid_targets_min": 15138
|
|
},
|
|
{
|
|
"epoch": 0.8187633262260128,
|
|
"grad_norm": 0.02137186528103518,
|
|
"learning_rate": 3.953108826576046e-05,
|
|
"loss": 0.9726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22393490374088287,
|
|
"step": 192,
|
|
"valid_targets_mean": 13252.4,
|
|
"valid_targets_min": 10517
|
|
},
|
|
{
|
|
"epoch": 0.8230277185501066,
|
|
"grad_norm": 0.021023011443063965,
|
|
"learning_rate": 3.9518205558024334e-05,
|
|
"loss": 0.9831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28166884183883667,
|
|
"step": 193,
|
|
"valid_targets_mean": 16132.5,
|
|
"valid_targets_min": 15241
|
|
},
|
|
{
|
|
"epoch": 0.8272921108742004,
|
|
"grad_norm": 0.02041537999831593,
|
|
"learning_rate": 3.9505150429614154e-05,
|
|
"loss": 0.9159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27768152952194214,
|
|
"step": 194,
|
|
"valid_targets_mean": 16141.4,
|
|
"valid_targets_min": 15258
|
|
},
|
|
{
|
|
"epoch": 0.8315565031982942,
|
|
"grad_norm": 0.020584308885303524,
|
|
"learning_rate": 3.949192299585681e-05,
|
|
"loss": 0.9779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25782328844070435,
|
|
"step": 195,
|
|
"valid_targets_mean": 15803.9,
|
|
"valid_targets_min": 14998
|
|
},
|
|
{
|
|
"epoch": 0.835820895522388,
|
|
"grad_norm": 0.022112102427081663,
|
|
"learning_rate": 3.9478523373601325e-05,
|
|
"loss": 1.0262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2917345464229584,
|
|
"step": 196,
|
|
"valid_targets_mean": 15994.0,
|
|
"valid_targets_min": 14672
|
|
},
|
|
{
|
|
"epoch": 0.8400852878464818,
|
|
"grad_norm": 0.021903732197990588,
|
|
"learning_rate": 3.946495168121778e-05,
|
|
"loss": 0.9648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18892240524291992,
|
|
"step": 197,
|
|
"valid_targets_mean": 10865.2,
|
|
"valid_targets_min": 815
|
|
},
|
|
{
|
|
"epoch": 0.8443496801705757,
|
|
"grad_norm": 0.020281717720857413,
|
|
"learning_rate": 3.9451208038596325e-05,
|
|
"loss": 0.951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21324071288108826,
|
|
"step": 198,
|
|
"valid_targets_mean": 15726.8,
|
|
"valid_targets_min": 14730
|
|
},
|
|
{
|
|
"epoch": 0.8486140724946695,
|
|
"grad_norm": 0.021715739735202577,
|
|
"learning_rate": 3.943729256714608e-05,
|
|
"loss": 0.9635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3028005361557007,
|
|
"step": 199,
|
|
"valid_targets_mean": 16198.1,
|
|
"valid_targets_min": 15748
|
|
},
|
|
{
|
|
"epoch": 0.8528784648187633,
|
|
"grad_norm": 0.022545156537191163,
|
|
"learning_rate": 3.942320538979408e-05,
|
|
"loss": 0.9764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14240457117557526,
|
|
"step": 200,
|
|
"valid_targets_mean": 7739.5,
|
|
"valid_targets_min": 2073
|
|
},
|
|
{
|
|
"epoch": 0.8571428571428571,
|
|
"grad_norm": 0.02087867115422663,
|
|
"learning_rate": 3.9408946630984144e-05,
|
|
"loss": 1.028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2566724419593811,
|
|
"step": 201,
|
|
"valid_targets_mean": 15964.7,
|
|
"valid_targets_min": 12180
|
|
},
|
|
{
|
|
"epoch": 0.8614072494669509,
|
|
"grad_norm": 0.022416783780977105,
|
|
"learning_rate": 3.939451641667587e-05,
|
|
"loss": 0.9757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28411751985549927,
|
|
"step": 202,
|
|
"valid_targets_mean": 16117.1,
|
|
"valid_targets_min": 14905
|
|
},
|
|
{
|
|
"epoch": 0.8656716417910447,
|
|
"grad_norm": 0.02156908159636131,
|
|
"learning_rate": 3.937991487434342e-05,
|
|
"loss": 0.9188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17827017605304718,
|
|
"step": 203,
|
|
"valid_targets_mean": 12001.3,
|
|
"valid_targets_min": 9081
|
|
},
|
|
{
|
|
"epoch": 0.8699360341151386,
|
|
"grad_norm": 0.021168375279297397,
|
|
"learning_rate": 3.9365142132974484e-05,
|
|
"loss": 0.9868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26591408252716064,
|
|
"step": 204,
|
|
"valid_targets_mean": 16108.2,
|
|
"valid_targets_min": 15263
|
|
},
|
|
{
|
|
"epoch": 0.8742004264392325,
|
|
"grad_norm": 0.022064516901569794,
|
|
"learning_rate": 3.935019832306905e-05,
|
|
"loss": 0.9934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2857792377471924,
|
|
"step": 205,
|
|
"valid_targets_mean": 16117.6,
|
|
"valid_targets_min": 15022
|
|
},
|
|
{
|
|
"epoch": 0.8784648187633263,
|
|
"grad_norm": 0.020238525632817903,
|
|
"learning_rate": 3.933508357663832e-05,
|
|
"loss": 0.988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23251603543758392,
|
|
"step": 206,
|
|
"valid_targets_mean": 14554.5,
|
|
"valid_targets_min": 7924
|
|
},
|
|
{
|
|
"epoch": 0.8827292110874201,
|
|
"grad_norm": 0.020182115705191357,
|
|
"learning_rate": 3.9319798027203544e-05,
|
|
"loss": 0.9287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2542370557785034,
|
|
"step": 207,
|
|
"valid_targets_mean": 16154.1,
|
|
"valid_targets_min": 15073
|
|
},
|
|
{
|
|
"epoch": 0.8869936034115139,
|
|
"grad_norm": 0.02138490357984852,
|
|
"learning_rate": 3.930434180979478e-05,
|
|
"loss": 0.9474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22509917616844177,
|
|
"step": 208,
|
|
"valid_targets_mean": 13274.8,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 0.8912579957356077,
|
|
"grad_norm": 0.019816141608615067,
|
|
"learning_rate": 3.928871506094975e-05,
|
|
"loss": 0.9719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2433328628540039,
|
|
"step": 209,
|
|
"valid_targets_mean": 15674.8,
|
|
"valid_targets_min": 13973
|
|
},
|
|
{
|
|
"epoch": 0.8955223880597015,
|
|
"grad_norm": 0.021186620770208633,
|
|
"learning_rate": 3.927291791871264e-05,
|
|
"loss": 0.945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28887563943862915,
|
|
"step": 210,
|
|
"valid_targets_mean": 16140.3,
|
|
"valid_targets_min": 15386
|
|
},
|
|
{
|
|
"epoch": 0.8997867803837953,
|
|
"grad_norm": 0.02148459716894132,
|
|
"learning_rate": 3.925695052263284e-05,
|
|
"loss": 0.9466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.168173685669899,
|
|
"step": 211,
|
|
"valid_targets_mean": 9343.2,
|
|
"valid_targets_min": 2046
|
|
},
|
|
{
|
|
"epoch": 0.9040511727078892,
|
|
"grad_norm": 0.02094900139597615,
|
|
"learning_rate": 3.924081301376375e-05,
|
|
"loss": 1.0153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2623210549354553,
|
|
"step": 212,
|
|
"valid_targets_mean": 15877.0,
|
|
"valid_targets_min": 14394
|
|
},
|
|
{
|
|
"epoch": 0.908315565031983,
|
|
"grad_norm": 0.022606714924704504,
|
|
"learning_rate": 3.9224505534661525e-05,
|
|
"loss": 1.0095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28156691789627075,
|
|
"step": 213,
|
|
"valid_targets_mean": 16171.1,
|
|
"valid_targets_min": 15390
|
|
},
|
|
{
|
|
"epoch": 0.9125799573560768,
|
|
"grad_norm": 0.022337852961703913,
|
|
"learning_rate": 3.92080282293838e-05,
|
|
"loss": 0.9795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17728140950202942,
|
|
"step": 214,
|
|
"valid_targets_mean": 11795.1,
|
|
"valid_targets_min": 6981
|
|
},
|
|
{
|
|
"epoch": 0.9168443496801706,
|
|
"grad_norm": 0.021237617260337228,
|
|
"learning_rate": 3.9191381243488417e-05,
|
|
"loss": 0.9618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2440396398305893,
|
|
"step": 215,
|
|
"valid_targets_mean": 16165.6,
|
|
"valid_targets_min": 15228
|
|
},
|
|
{
|
|
"epoch": 0.9211087420042644,
|
|
"grad_norm": 0.02142622286636768,
|
|
"learning_rate": 3.9174564724032167e-05,
|
|
"loss": 0.9717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29267382621765137,
|
|
"step": 216,
|
|
"valid_targets_mean": 16143.5,
|
|
"valid_targets_min": 15556
|
|
},
|
|
{
|
|
"epoch": 0.9253731343283582,
|
|
"grad_norm": 0.020913290548704284,
|
|
"learning_rate": 3.9157578819569455e-05,
|
|
"loss": 0.9616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1893332302570343,
|
|
"step": 217,
|
|
"valid_targets_mean": 13532.0,
|
|
"valid_targets_min": 10452
|
|
},
|
|
{
|
|
"epoch": 0.929637526652452,
|
|
"grad_norm": 0.021179274439712133,
|
|
"learning_rate": 3.9140423680151036e-05,
|
|
"loss": 0.9727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27407997846603394,
|
|
"step": 218,
|
|
"valid_targets_mean": 16084.0,
|
|
"valid_targets_min": 14810
|
|
},
|
|
{
|
|
"epoch": 0.9339019189765458,
|
|
"grad_norm": 0.022397133405303277,
|
|
"learning_rate": 3.9123099457322625e-05,
|
|
"loss": 0.96,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2793392241001129,
|
|
"step": 219,
|
|
"valid_targets_mean": 16103.1,
|
|
"valid_targets_min": 14415
|
|
},
|
|
{
|
|
"epoch": 0.9381663113006397,
|
|
"grad_norm": 0.020623794753748537,
|
|
"learning_rate": 3.9105606304123605e-05,
|
|
"loss": 0.9933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2240883708000183,
|
|
"step": 220,
|
|
"valid_targets_mean": 14692.8,
|
|
"valid_targets_min": 13091
|
|
},
|
|
{
|
|
"epoch": 0.9424307036247335,
|
|
"grad_norm": 0.02243159216533204,
|
|
"learning_rate": 3.908794437508567e-05,
|
|
"loss": 0.9974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3026067018508911,
|
|
"step": 221,
|
|
"valid_targets_mean": 16012.4,
|
|
"valid_targets_min": 14192
|
|
},
|
|
{
|
|
"epoch": 0.9466950959488273,
|
|
"grad_norm": 0.022137663840484545,
|
|
"learning_rate": 3.907011382623145e-05,
|
|
"loss": 0.9843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2083946168422699,
|
|
"step": 222,
|
|
"valid_targets_mean": 10876.3,
|
|
"valid_targets_min": 1405
|
|
},
|
|
{
|
|
"epoch": 0.9509594882729211,
|
|
"grad_norm": 0.01904096156517905,
|
|
"learning_rate": 3.905211481507313e-05,
|
|
"loss": 0.9168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2139977514743805,
|
|
"step": 223,
|
|
"valid_targets_mean": 16156.1,
|
|
"valid_targets_min": 15391
|
|
},
|
|
{
|
|
"epoch": 0.9552238805970149,
|
|
"grad_norm": 0.02202195818603212,
|
|
"learning_rate": 3.903394750061106e-05,
|
|
"loss": 0.9317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2620837986469269,
|
|
"step": 224,
|
|
"valid_targets_mean": 16214.3,
|
|
"valid_targets_min": 15503
|
|
},
|
|
{
|
|
"epoch": 0.9594882729211087,
|
|
"grad_norm": 0.02136481570390676,
|
|
"learning_rate": 3.9015612043332375e-05,
|
|
"loss": 0.9437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1336422860622406,
|
|
"step": 225,
|
|
"valid_targets_mean": 7417.2,
|
|
"valid_targets_min": 1256
|
|
},
|
|
{
|
|
"epoch": 0.9637526652452025,
|
|
"grad_norm": 0.021020666340488464,
|
|
"learning_rate": 3.8997108605209535e-05,
|
|
"loss": 0.9528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22992467880249023,
|
|
"step": 226,
|
|
"valid_targets_mean": 16191.5,
|
|
"valid_targets_min": 15459
|
|
},
|
|
{
|
|
"epoch": 0.9680170575692963,
|
|
"grad_norm": 0.022364412859411625,
|
|
"learning_rate": 3.897843734969891e-05,
|
|
"loss": 0.9664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.296586811542511,
|
|
"step": 227,
|
|
"valid_targets_mean": 16110.9,
|
|
"valid_targets_min": 13912
|
|
},
|
|
{
|
|
"epoch": 0.9722814498933902,
|
|
"grad_norm": 0.02081244818805342,
|
|
"learning_rate": 3.895959844173937e-05,
|
|
"loss": 0.9356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.186720609664917,
|
|
"step": 228,
|
|
"valid_targets_mean": 11978.9,
|
|
"valid_targets_min": 9328
|
|
},
|
|
{
|
|
"epoch": 0.976545842217484,
|
|
"grad_norm": 0.019315236104902395,
|
|
"learning_rate": 3.8940592047750774e-05,
|
|
"loss": 0.9503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24086540937423706,
|
|
"step": 229,
|
|
"valid_targets_mean": 16107.3,
|
|
"valid_targets_min": 13689
|
|
},
|
|
{
|
|
"epoch": 0.9808102345415778,
|
|
"grad_norm": 0.02178634761710576,
|
|
"learning_rate": 3.892141833563255e-05,
|
|
"loss": 1.0039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3103596270084381,
|
|
"step": 230,
|
|
"valid_targets_mean": 16052.4,
|
|
"valid_targets_min": 15327
|
|
},
|
|
{
|
|
"epoch": 0.9850746268656716,
|
|
"grad_norm": 0.020863323158617116,
|
|
"learning_rate": 3.8902077474762155e-05,
|
|
"loss": 0.9243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2050703763961792,
|
|
"step": 231,
|
|
"valid_targets_mean": 13529.6,
|
|
"valid_targets_min": 11914
|
|
},
|
|
{
|
|
"epoch": 0.9893390191897654,
|
|
"grad_norm": 0.02224812857003269,
|
|
"learning_rate": 3.888256963599364e-05,
|
|
"loss": 1.0407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30222398042678833,
|
|
"step": 232,
|
|
"valid_targets_mean": 16059.8,
|
|
"valid_targets_min": 14583
|
|
},
|
|
{
|
|
"epoch": 0.9936034115138592,
|
|
"grad_norm": 0.02179746606533591,
|
|
"learning_rate": 3.886289499165609e-05,
|
|
"loss": 0.9997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2460249662399292,
|
|
"step": 233,
|
|
"valid_targets_mean": 13017.8,
|
|
"valid_targets_min": 1445
|
|
},
|
|
{
|
|
"epoch": 0.997867803837953,
|
|
"grad_norm": 0.02189250073373731,
|
|
"learning_rate": 3.884305371555215e-05,
|
|
"loss": 0.9892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2544005513191223,
|
|
"step": 234,
|
|
"valid_targets_mean": 15797.3,
|
|
"valid_targets_min": 14896
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 0.03285724841957632,
|
|
"learning_rate": 3.882304598295643e-05,
|
|
"loss": 0.9979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4330124855041504,
|
|
"step": 235,
|
|
"valid_targets_mean": 9971.4,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 1.004264392324094,
|
|
"grad_norm": 0.020730960791596087,
|
|
"learning_rate": 3.880287197061402e-05,
|
|
"loss": 0.9589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24139849841594696,
|
|
"step": 236,
|
|
"valid_targets_mean": 16151.5,
|
|
"valid_targets_min": 15506
|
|
},
|
|
{
|
|
"epoch": 1.0085287846481876,
|
|
"grad_norm": 0.021678740829271653,
|
|
"learning_rate": 3.878253185673888e-05,
|
|
"loss": 0.978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3028293550014496,
|
|
"step": 237,
|
|
"valid_targets_mean": 16127.5,
|
|
"valid_targets_min": 15044
|
|
},
|
|
{
|
|
"epoch": 1.0127931769722816,
|
|
"grad_norm": 0.02209392603160782,
|
|
"learning_rate": 3.876202582101229e-05,
|
|
"loss": 0.9714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1950599104166031,
|
|
"step": 238,
|
|
"valid_targets_mean": 12329.8,
|
|
"valid_targets_min": 8883
|
|
},
|
|
{
|
|
"epoch": 1.0170575692963753,
|
|
"grad_norm": 0.022311111940518026,
|
|
"learning_rate": 3.874135404458125e-05,
|
|
"loss": 0.9883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2520405054092407,
|
|
"step": 239,
|
|
"valid_targets_mean": 16209.4,
|
|
"valid_targets_min": 15322
|
|
},
|
|
{
|
|
"epoch": 1.0213219616204692,
|
|
"grad_norm": 0.021814455655340806,
|
|
"learning_rate": 3.8720516710056905e-05,
|
|
"loss": 0.9542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2847292423248291,
|
|
"step": 240,
|
|
"valid_targets_mean": 16131.2,
|
|
"valid_targets_min": 15394
|
|
},
|
|
{
|
|
"epoch": 1.0255863539445629,
|
|
"grad_norm": 0.022515570846508032,
|
|
"learning_rate": 3.8699514001512885e-05,
|
|
"loss": 1.0005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2258748710155487,
|
|
"step": 241,
|
|
"valid_targets_mean": 14172.9,
|
|
"valid_targets_min": 11958
|
|
},
|
|
{
|
|
"epoch": 1.0298507462686568,
|
|
"grad_norm": 0.0226862568617241,
|
|
"learning_rate": 3.867834610448374e-05,
|
|
"loss": 0.9768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2842453420162201,
|
|
"step": 242,
|
|
"valid_targets_mean": 16126.0,
|
|
"valid_targets_min": 15276
|
|
},
|
|
{
|
|
"epoch": 1.0341151385927505,
|
|
"grad_norm": 0.023882727667588016,
|
|
"learning_rate": 3.865701320596324e-05,
|
|
"loss": 0.9575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2112501710653305,
|
|
"step": 243,
|
|
"valid_targets_mean": 13076.0,
|
|
"valid_targets_min": 1852
|
|
},
|
|
{
|
|
"epoch": 1.0383795309168444,
|
|
"grad_norm": 0.02080996013068224,
|
|
"learning_rate": 3.863551549440277e-05,
|
|
"loss": 0.995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2388361692428589,
|
|
"step": 244,
|
|
"valid_targets_mean": 15930.0,
|
|
"valid_targets_min": 15246
|
|
},
|
|
{
|
|
"epoch": 1.0426439232409381,
|
|
"grad_norm": 0.021041947999052308,
|
|
"learning_rate": 3.861385315970964e-05,
|
|
"loss": 0.9705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26563936471939087,
|
|
"step": 245,
|
|
"valid_targets_mean": 16139.5,
|
|
"valid_targets_min": 14672
|
|
},
|
|
{
|
|
"epoch": 1.046908315565032,
|
|
"grad_norm": 0.021039165730031374,
|
|
"learning_rate": 3.859202639324542e-05,
|
|
"loss": 0.9476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16970095038414001,
|
|
"step": 246,
|
|
"valid_targets_mean": 9914.1,
|
|
"valid_targets_min": 1053
|
|
},
|
|
{
|
|
"epoch": 1.0511727078891258,
|
|
"grad_norm": 0.02139089350300504,
|
|
"learning_rate": 3.8570035387824214e-05,
|
|
"loss": 0.9699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23806408047676086,
|
|
"step": 247,
|
|
"valid_targets_mean": 16068.2,
|
|
"valid_targets_min": 15353
|
|
},
|
|
{
|
|
"epoch": 1.0554371002132197,
|
|
"grad_norm": 0.022685136536396776,
|
|
"learning_rate": 3.8547880337711036e-05,
|
|
"loss": 0.9629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27920055389404297,
|
|
"step": 248,
|
|
"valid_targets_mean": 16174.2,
|
|
"valid_targets_min": 15182
|
|
},
|
|
{
|
|
"epoch": 1.0597014925373134,
|
|
"grad_norm": 0.020919703807169936,
|
|
"learning_rate": 3.8525561438620016e-05,
|
|
"loss": 0.9427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1646086424589157,
|
|
"step": 249,
|
|
"valid_targets_mean": 10546.0,
|
|
"valid_targets_min": 6936
|
|
},
|
|
{
|
|
"epoch": 1.0639658848614073,
|
|
"grad_norm": 0.020941223431384305,
|
|
"learning_rate": 3.850307888771269e-05,
|
|
"loss": 0.9673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.271384060382843,
|
|
"step": 250,
|
|
"valid_targets_mean": 16118.7,
|
|
"valid_targets_min": 15448
|
|
},
|
|
{
|
|
"epoch": 1.068230277185501,
|
|
"grad_norm": 0.021915763212146517,
|
|
"learning_rate": 3.848043288359629e-05,
|
|
"loss": 0.9572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28256577253341675,
|
|
"step": 251,
|
|
"valid_targets_mean": 16153.9,
|
|
"valid_targets_min": 15506
|
|
},
|
|
{
|
|
"epoch": 1.072494669509595,
|
|
"grad_norm": 0.020349393374658124,
|
|
"learning_rate": 3.8457623626321944e-05,
|
|
"loss": 0.9651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.218336284160614,
|
|
"step": 252,
|
|
"valid_targets_mean": 13836.2,
|
|
"valid_targets_min": 12245
|
|
},
|
|
{
|
|
"epoch": 1.0767590618336886,
|
|
"grad_norm": 0.0216523270801992,
|
|
"learning_rate": 3.843465131738296e-05,
|
|
"loss": 0.9401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25135505199432373,
|
|
"step": 253,
|
|
"valid_targets_mean": 16249.0,
|
|
"valid_targets_min": 15770
|
|
},
|
|
{
|
|
"epoch": 1.0810234541577826,
|
|
"grad_norm": 0.02230905224054325,
|
|
"learning_rate": 3.8411516159713e-05,
|
|
"loss": 0.9695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29536378383636475,
|
|
"step": 254,
|
|
"valid_targets_mean": 16026.6,
|
|
"valid_targets_min": 15189
|
|
},
|
|
{
|
|
"epoch": 1.0852878464818763,
|
|
"grad_norm": 0.020890134764966654,
|
|
"learning_rate": 3.838821835768431e-05,
|
|
"loss": 0.9714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22130641341209412,
|
|
"step": 255,
|
|
"valid_targets_mean": 14562.3,
|
|
"valid_targets_min": 12771
|
|
},
|
|
{
|
|
"epoch": 1.0895522388059702,
|
|
"grad_norm": 0.021137930961037586,
|
|
"learning_rate": 3.83647581171059e-05,
|
|
"loss": 0.9781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2657286822795868,
|
|
"step": 256,
|
|
"valid_targets_mean": 16177.3,
|
|
"valid_targets_min": 15339
|
|
},
|
|
{
|
|
"epoch": 1.0938166311300639,
|
|
"grad_norm": 0.023683950403388505,
|
|
"learning_rate": 3.8341135645221744e-05,
|
|
"loss": 0.9857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1855088174343109,
|
|
"step": 257,
|
|
"valid_targets_mean": 10268.0,
|
|
"valid_targets_min": 1445
|
|
},
|
|
{
|
|
"epoch": 1.0980810234541578,
|
|
"grad_norm": 0.021710299127557946,
|
|
"learning_rate": 3.831735115070895e-05,
|
|
"loss": 0.9826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22451967000961304,
|
|
"step": 258,
|
|
"valid_targets_mean": 15584.8,
|
|
"valid_targets_min": 12360
|
|
},
|
|
{
|
|
"epoch": 1.1023454157782515,
|
|
"grad_norm": 0.021947613301864632,
|
|
"learning_rate": 3.8293404843675904e-05,
|
|
"loss": 0.9511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2908894121646881,
|
|
"step": 259,
|
|
"valid_targets_mean": 16119.3,
|
|
"valid_targets_min": 15244
|
|
},
|
|
{
|
|
"epoch": 1.1066098081023454,
|
|
"grad_norm": 0.022560785432242523,
|
|
"learning_rate": 3.8269296935660395e-05,
|
|
"loss": 0.9822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1450565904378891,
|
|
"step": 260,
|
|
"valid_targets_mean": 7178.3,
|
|
"valid_targets_min": 824
|
|
},
|
|
{
|
|
"epoch": 1.1108742004264391,
|
|
"grad_norm": 0.019985089064940297,
|
|
"learning_rate": 3.82450276396278e-05,
|
|
"loss": 0.9517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23591622710227966,
|
|
"step": 261,
|
|
"valid_targets_mean": 16098.5,
|
|
"valid_targets_min": 14578
|
|
},
|
|
{
|
|
"epoch": 1.115138592750533,
|
|
"grad_norm": 0.02205059428994166,
|
|
"learning_rate": 3.822059716996916e-05,
|
|
"loss": 0.9249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24301102757453918,
|
|
"step": 262,
|
|
"valid_targets_mean": 16087.1,
|
|
"valid_targets_min": 10621
|
|
},
|
|
{
|
|
"epoch": 1.1194029850746268,
|
|
"grad_norm": 0.022119325582468184,
|
|
"learning_rate": 3.819600574249929e-05,
|
|
"loss": 0.9653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19326110184192657,
|
|
"step": 263,
|
|
"valid_targets_mean": 13091.4,
|
|
"valid_targets_min": 10872
|
|
},
|
|
{
|
|
"epoch": 1.1236673773987207,
|
|
"grad_norm": 0.021311231255800884,
|
|
"learning_rate": 3.817125357445489e-05,
|
|
"loss": 0.9885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2605571150779724,
|
|
"step": 264,
|
|
"valid_targets_mean": 16141.9,
|
|
"valid_targets_min": 15076
|
|
},
|
|
{
|
|
"epoch": 1.1279317697228146,
|
|
"grad_norm": 0.021109476943510952,
|
|
"learning_rate": 3.814634088449261e-05,
|
|
"loss": 0.9438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2603449821472168,
|
|
"step": 265,
|
|
"valid_targets_mean": 16207.4,
|
|
"valid_targets_min": 15321
|
|
},
|
|
{
|
|
"epoch": 1.1321961620469083,
|
|
"grad_norm": 0.020401937386932113,
|
|
"learning_rate": 3.812126789268712e-05,
|
|
"loss": 0.9392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2103728950023651,
|
|
"step": 266,
|
|
"valid_targets_mean": 13988.3,
|
|
"valid_targets_min": 11922
|
|
},
|
|
{
|
|
"epoch": 1.136460554371002,
|
|
"grad_norm": 0.022399147405737164,
|
|
"learning_rate": 3.80960348205292e-05,
|
|
"loss": 0.9557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.269101083278656,
|
|
"step": 267,
|
|
"valid_targets_mean": 16217.7,
|
|
"valid_targets_min": 15535
|
|
},
|
|
{
|
|
"epoch": 1.140724946695096,
|
|
"grad_norm": 0.022298270586927828,
|
|
"learning_rate": 3.807064189092372e-05,
|
|
"loss": 0.9789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2304411232471466,
|
|
"step": 268,
|
|
"valid_targets_mean": 12975.8,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 1.1449893390191899,
|
|
"grad_norm": 0.02094162061738807,
|
|
"learning_rate": 3.804508932818771e-05,
|
|
"loss": 0.9323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22554528713226318,
|
|
"step": 269,
|
|
"valid_targets_mean": 15793.7,
|
|
"valid_targets_min": 14623
|
|
},
|
|
{
|
|
"epoch": 1.1492537313432836,
|
|
"grad_norm": 0.022889608178373516,
|
|
"learning_rate": 3.801937735804838e-05,
|
|
"loss": 0.9892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30089887976646423,
|
|
"step": 270,
|
|
"valid_targets_mean": 15993.5,
|
|
"valid_targets_min": 14260
|
|
},
|
|
{
|
|
"epoch": 1.1535181236673775,
|
|
"grad_norm": 0.022780189469309032,
|
|
"learning_rate": 3.799350620764114e-05,
|
|
"loss": 1.0041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19340048730373383,
|
|
"step": 271,
|
|
"valid_targets_mean": 9609.6,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 1.1577825159914712,
|
|
"grad_norm": 0.020508384718487628,
|
|
"learning_rate": 3.7967476105507535e-05,
|
|
"loss": 0.9503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2286824882030487,
|
|
"step": 272,
|
|
"valid_targets_mean": 15748.7,
|
|
"valid_targets_min": 14300
|
|
},
|
|
{
|
|
"epoch": 1.1620469083155651,
|
|
"grad_norm": 0.022483337920124163,
|
|
"learning_rate": 3.7941287281593284e-05,
|
|
"loss": 1.0325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32754823565483093,
|
|
"step": 273,
|
|
"valid_targets_mean": 16051.0,
|
|
"valid_targets_min": 15207
|
|
},
|
|
{
|
|
"epoch": 1.1663113006396588,
|
|
"grad_norm": 0.023421805368364582,
|
|
"learning_rate": 3.7914939967246227e-05,
|
|
"loss": 0.9436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16955426335334778,
|
|
"step": 274,
|
|
"valid_targets_mean": 10276.3,
|
|
"valid_targets_min": 4673
|
|
},
|
|
{
|
|
"epoch": 1.1705756929637527,
|
|
"grad_norm": 0.020521512400258943,
|
|
"learning_rate": 3.7888434395214285e-05,
|
|
"loss": 0.9386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2543228566646576,
|
|
"step": 275,
|
|
"valid_targets_mean": 16148.7,
|
|
"valid_targets_min": 15515
|
|
},
|
|
{
|
|
"epoch": 1.1748400852878464,
|
|
"grad_norm": 0.021884018510990796,
|
|
"learning_rate": 3.786177079964339e-05,
|
|
"loss": 0.9898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3025882840156555,
|
|
"step": 276,
|
|
"valid_targets_mean": 16121.5,
|
|
"valid_targets_min": 14573
|
|
},
|
|
{
|
|
"epoch": 1.1791044776119404,
|
|
"grad_norm": 0.019942483658416384,
|
|
"learning_rate": 3.783494941607544e-05,
|
|
"loss": 0.942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21508443355560303,
|
|
"step": 277,
|
|
"valid_targets_mean": 14664.9,
|
|
"valid_targets_min": 12143
|
|
},
|
|
{
|
|
"epoch": 1.183368869936034,
|
|
"grad_norm": 0.021424000878951032,
|
|
"learning_rate": 3.780797048144621e-05,
|
|
"loss": 0.9395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27440714836120605,
|
|
"step": 278,
|
|
"valid_targets_mean": 16161.2,
|
|
"valid_targets_min": 15525
|
|
},
|
|
{
|
|
"epoch": 1.187633262260128,
|
|
"grad_norm": 0.022422524776699114,
|
|
"learning_rate": 3.7780834234083236e-05,
|
|
"loss": 0.9277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2552390694618225,
|
|
"step": 279,
|
|
"valid_targets_mean": 16223.9,
|
|
"valid_targets_min": 15689
|
|
},
|
|
{
|
|
"epoch": 1.1918976545842217,
|
|
"grad_norm": 0.020949917736549133,
|
|
"learning_rate": 3.775354091370376e-05,
|
|
"loss": 0.9915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24117562174797058,
|
|
"step": 280,
|
|
"valid_targets_mean": 15455.4,
|
|
"valid_targets_min": 14103
|
|
},
|
|
{
|
|
"epoch": 1.1961620469083156,
|
|
"grad_norm": 0.020749629332668478,
|
|
"learning_rate": 3.772609076141255e-05,
|
|
"loss": 0.9514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28457361459732056,
|
|
"step": 281,
|
|
"valid_targets_mean": 16105.2,
|
|
"valid_targets_min": 15435
|
|
},
|
|
{
|
|
"epoch": 1.2004264392324093,
|
|
"grad_norm": 0.02253413183394309,
|
|
"learning_rate": 3.769848401969982e-05,
|
|
"loss": 0.9633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19412922859191895,
|
|
"step": 282,
|
|
"valid_targets_mean": 10228.9,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 1.2046908315565032,
|
|
"grad_norm": 0.020832293771813826,
|
|
"learning_rate": 3.767072093243907e-05,
|
|
"loss": 1.0355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2509702742099762,
|
|
"step": 283,
|
|
"valid_targets_mean": 15805.7,
|
|
"valid_targets_min": 14552
|
|
},
|
|
{
|
|
"epoch": 1.208955223880597,
|
|
"grad_norm": 0.021178164932643788,
|
|
"learning_rate": 3.7642801744884915e-05,
|
|
"loss": 0.9496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26452258229255676,
|
|
"step": 284,
|
|
"valid_targets_mean": 16186.2,
|
|
"valid_targets_min": 15179
|
|
},
|
|
{
|
|
"epoch": 1.2132196162046909,
|
|
"grad_norm": 0.0207372593752374,
|
|
"learning_rate": 3.761472670367096e-05,
|
|
"loss": 0.9466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1544024497270584,
|
|
"step": 285,
|
|
"valid_targets_mean": 9708.0,
|
|
"valid_targets_min": 1937
|
|
},
|
|
{
|
|
"epoch": 1.2174840085287846,
|
|
"grad_norm": 0.020117948970003886,
|
|
"learning_rate": 3.758649605680758e-05,
|
|
"loss": 0.9486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22092610597610474,
|
|
"step": 286,
|
|
"valid_targets_mean": 16194.2,
|
|
"valid_targets_min": 15651
|
|
},
|
|
{
|
|
"epoch": 1.2217484008528785,
|
|
"grad_norm": 0.02236838269144788,
|
|
"learning_rate": 3.755811005367974e-05,
|
|
"loss": 1.014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30473244190216064,
|
|
"step": 287,
|
|
"valid_targets_mean": 15962.8,
|
|
"valid_targets_min": 13388
|
|
},
|
|
{
|
|
"epoch": 1.2260127931769722,
|
|
"grad_norm": 0.020677270558246384,
|
|
"learning_rate": 3.752956894504481e-05,
|
|
"loss": 0.9472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19772371649742126,
|
|
"step": 288,
|
|
"valid_targets_mean": 12719.8,
|
|
"valid_targets_min": 10390
|
|
},
|
|
{
|
|
"epoch": 1.2302771855010661,
|
|
"grad_norm": 0.021776841804097617,
|
|
"learning_rate": 3.750087298303033e-05,
|
|
"loss": 0.9633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22894775867462158,
|
|
"step": 289,
|
|
"valid_targets_mean": 16155.7,
|
|
"valid_targets_min": 12180
|
|
},
|
|
{
|
|
"epoch": 1.2345415778251598,
|
|
"grad_norm": 0.021802953714575683,
|
|
"learning_rate": 3.7472022421131795e-05,
|
|
"loss": 0.9545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27260851860046387,
|
|
"step": 290,
|
|
"valid_targets_mean": 16184.1,
|
|
"valid_targets_min": 15339
|
|
},
|
|
{
|
|
"epoch": 1.2388059701492538,
|
|
"grad_norm": 0.02037619523700463,
|
|
"learning_rate": 3.7443017514210406e-05,
|
|
"loss": 0.9707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2290799915790558,
|
|
"step": 291,
|
|
"valid_targets_mean": 14838.3,
|
|
"valid_targets_min": 13407
|
|
},
|
|
{
|
|
"epoch": 1.2430703624733475,
|
|
"grad_norm": 0.02121822256908429,
|
|
"learning_rate": 3.7413858518490825e-05,
|
|
"loss": 0.9419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2708539366722107,
|
|
"step": 292,
|
|
"valid_targets_mean": 16135.3,
|
|
"valid_targets_min": 15028
|
|
},
|
|
{
|
|
"epoch": 1.2473347547974414,
|
|
"grad_norm": 0.023044001037171035,
|
|
"learning_rate": 3.7384545691558895e-05,
|
|
"loss": 0.9694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26938313245773315,
|
|
"step": 293,
|
|
"valid_targets_mean": 12802.7,
|
|
"valid_targets_min": 1609
|
|
},
|
|
{
|
|
"epoch": 1.251599147121535,
|
|
"grad_norm": 0.0194161271941763,
|
|
"learning_rate": 3.735507929235941e-05,
|
|
"loss": 0.947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23040422797203064,
|
|
"step": 294,
|
|
"valid_targets_mean": 15855.2,
|
|
"valid_targets_min": 15043
|
|
},
|
|
{
|
|
"epoch": 1.255863539445629,
|
|
"grad_norm": 0.021514472647709833,
|
|
"learning_rate": 3.732545958119378e-05,
|
|
"loss": 0.9662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2963070869445801,
|
|
"step": 295,
|
|
"valid_targets_mean": 16092.4,
|
|
"valid_targets_min": 15276
|
|
},
|
|
{
|
|
"epoch": 1.260127931769723,
|
|
"grad_norm": 0.021863948579874955,
|
|
"learning_rate": 3.729568681971774e-05,
|
|
"loss": 0.9509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17454400658607483,
|
|
"step": 296,
|
|
"valid_targets_mean": 9613.2,
|
|
"valid_targets_min": 2275
|
|
},
|
|
{
|
|
"epoch": 1.2643923240938166,
|
|
"grad_norm": 0.020855043543041768,
|
|
"learning_rate": 3.726576127093905e-05,
|
|
"loss": 0.9296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23523148894309998,
|
|
"step": 297,
|
|
"valid_targets_mean": 15849.4,
|
|
"valid_targets_min": 15029
|
|
},
|
|
{
|
|
"epoch": 1.2686567164179103,
|
|
"grad_norm": 0.021848129253506675,
|
|
"learning_rate": 3.7235683199215177e-05,
|
|
"loss": 0.9641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31243783235549927,
|
|
"step": 298,
|
|
"valid_targets_mean": 16127.3,
|
|
"valid_targets_min": 15551
|
|
},
|
|
{
|
|
"epoch": 1.2729211087420043,
|
|
"grad_norm": 0.020222215112413365,
|
|
"learning_rate": 3.7205452870250944e-05,
|
|
"loss": 0.9585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18428143858909607,
|
|
"step": 299,
|
|
"valid_targets_mean": 11090.5,
|
|
"valid_targets_min": 5758
|
|
},
|
|
{
|
|
"epoch": 1.2771855010660982,
|
|
"grad_norm": 0.02040134486660789,
|
|
"learning_rate": 3.7175070551096204e-05,
|
|
"loss": 0.9346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24869747459888458,
|
|
"step": 300,
|
|
"valid_targets_mean": 16097.5,
|
|
"valid_targets_min": 14262
|
|
},
|
|
{
|
|
"epoch": 1.2814498933901919,
|
|
"grad_norm": 0.022405277855910226,
|
|
"learning_rate": 3.7144536510143436e-05,
|
|
"loss": 0.9654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2883691191673279,
|
|
"step": 301,
|
|
"valid_targets_mean": 16088.8,
|
|
"valid_targets_min": 15308
|
|
},
|
|
{
|
|
"epoch": 1.2857142857142856,
|
|
"grad_norm": 0.02037256261652766,
|
|
"learning_rate": 3.711385101712544e-05,
|
|
"loss": 0.9537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20540942251682281,
|
|
"step": 302,
|
|
"valid_targets_mean": 13202.6,
|
|
"valid_targets_min": 11045
|
|
},
|
|
{
|
|
"epoch": 1.2899786780383795,
|
|
"grad_norm": 0.020413485653971844,
|
|
"learning_rate": 3.708301434311289e-05,
|
|
"loss": 0.9817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29026246070861816,
|
|
"step": 303,
|
|
"valid_targets_mean": 16053.5,
|
|
"valid_targets_min": 14202
|
|
},
|
|
{
|
|
"epoch": 1.2942430703624734,
|
|
"grad_norm": 0.02145703567450628,
|
|
"learning_rate": 3.7052026760511996e-05,
|
|
"loss": 0.9418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2964595556259155,
|
|
"step": 304,
|
|
"valid_targets_mean": 16069.5,
|
|
"valid_targets_min": 15241
|
|
},
|
|
{
|
|
"epoch": 1.2985074626865671,
|
|
"grad_norm": 0.020215277183096764,
|
|
"learning_rate": 3.7020888543062046e-05,
|
|
"loss": 0.9918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22943922877311707,
|
|
"step": 305,
|
|
"valid_targets_mean": 15831.0,
|
|
"valid_targets_min": 14690
|
|
},
|
|
{
|
|
"epoch": 1.302771855010661,
|
|
"grad_norm": 0.021241633581083736,
|
|
"learning_rate": 3.6989599965833024e-05,
|
|
"loss": 0.9617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26224833726882935,
|
|
"step": 306,
|
|
"valid_targets_mean": 16124.4,
|
|
"valid_targets_min": 14102
|
|
},
|
|
{
|
|
"epoch": 1.3070362473347548,
|
|
"grad_norm": 0.022387440675154302,
|
|
"learning_rate": 3.695816130522317e-05,
|
|
"loss": 0.976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19870209693908691,
|
|
"step": 307,
|
|
"valid_targets_mean": 11196.8,
|
|
"valid_targets_min": 1637
|
|
},
|
|
{
|
|
"epoch": 1.3113006396588487,
|
|
"grad_norm": 0.019987573137055865,
|
|
"learning_rate": 3.692657283895651e-05,
|
|
"loss": 0.9315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22559893131256104,
|
|
"step": 308,
|
|
"valid_targets_mean": 15986.6,
|
|
"valid_targets_min": 15168
|
|
},
|
|
{
|
|
"epoch": 1.3155650319829424,
|
|
"grad_norm": 0.02122240323877498,
|
|
"learning_rate": 3.689483484608048e-05,
|
|
"loss": 0.9363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2659769356250763,
|
|
"step": 309,
|
|
"valid_targets_mean": 15967.5,
|
|
"valid_targets_min": 13872
|
|
},
|
|
{
|
|
"epoch": 1.3198294243070363,
|
|
"grad_norm": 0.022402255303537805,
|
|
"learning_rate": 3.6862947606963364e-05,
|
|
"loss": 0.9533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15855848789215088,
|
|
"step": 310,
|
|
"valid_targets_mean": 8818.1,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 1.32409381663113,
|
|
"grad_norm": 0.019940887026278897,
|
|
"learning_rate": 3.6830911403291885e-05,
|
|
"loss": 0.9664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24071869254112244,
|
|
"step": 311,
|
|
"valid_targets_mean": 16242.1,
|
|
"valid_targets_min": 15583
|
|
},
|
|
{
|
|
"epoch": 1.328358208955224,
|
|
"grad_norm": 0.021259632779084867,
|
|
"learning_rate": 3.679872651806869e-05,
|
|
"loss": 0.9057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29548853635787964,
|
|
"step": 312,
|
|
"valid_targets_mean": 16076.8,
|
|
"valid_targets_min": 14684
|
|
},
|
|
{
|
|
"epoch": 1.3326226012793176,
|
|
"grad_norm": 0.020411653756479714,
|
|
"learning_rate": 3.676639323560986e-05,
|
|
"loss": 0.9084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19466716051101685,
|
|
"step": 313,
|
|
"valid_targets_mean": 11957.9,
|
|
"valid_targets_min": 8306
|
|
},
|
|
{
|
|
"epoch": 1.3368869936034116,
|
|
"grad_norm": 0.020885839407807354,
|
|
"learning_rate": 3.6733911841542365e-05,
|
|
"loss": 0.9797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2562810182571411,
|
|
"step": 314,
|
|
"valid_targets_mean": 16131.2,
|
|
"valid_targets_min": 15241
|
|
},
|
|
{
|
|
"epoch": 1.3411513859275053,
|
|
"grad_norm": 0.021543584169130537,
|
|
"learning_rate": 3.6701282622801626e-05,
|
|
"loss": 0.9605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30723950266838074,
|
|
"step": 315,
|
|
"valid_targets_mean": 16064.5,
|
|
"valid_targets_min": 14902
|
|
},
|
|
{
|
|
"epoch": 1.3454157782515992,
|
|
"grad_norm": 0.01959181595256813,
|
|
"learning_rate": 3.666850586762886e-05,
|
|
"loss": 0.9696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2419573962688446,
|
|
"step": 316,
|
|
"valid_targets_mean": 14870.1,
|
|
"valid_targets_min": 12118
|
|
},
|
|
{
|
|
"epoch": 1.349680170575693,
|
|
"grad_norm": 0.02154351553037945,
|
|
"learning_rate": 3.663558186556863e-05,
|
|
"loss": 0.9892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2843114733695984,
|
|
"step": 317,
|
|
"valid_targets_mean": 16127.5,
|
|
"valid_targets_min": 15404
|
|
},
|
|
{
|
|
"epoch": 1.3539445628997868,
|
|
"grad_norm": 0.02130427954909734,
|
|
"learning_rate": 3.660251090746627e-05,
|
|
"loss": 0.9717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24448466300964355,
|
|
"step": 318,
|
|
"valid_targets_mean": 13217.4,
|
|
"valid_targets_min": 2105
|
|
},
|
|
{
|
|
"epoch": 1.3582089552238805,
|
|
"grad_norm": 0.020148577459201872,
|
|
"learning_rate": 3.656929328546526e-05,
|
|
"loss": 0.9727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23301547765731812,
|
|
"step": 319,
|
|
"valid_targets_mean": 15822.2,
|
|
"valid_targets_min": 15165
|
|
},
|
|
{
|
|
"epoch": 1.3624733475479744,
|
|
"grad_norm": 0.02254457731892048,
|
|
"learning_rate": 3.653592929300471e-05,
|
|
"loss": 1.0067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2694302201271057,
|
|
"step": 320,
|
|
"valid_targets_mean": 16193.0,
|
|
"valid_targets_min": 15660
|
|
},
|
|
{
|
|
"epoch": 1.3667377398720681,
|
|
"grad_norm": 0.021602239746830013,
|
|
"learning_rate": 3.650241922481675e-05,
|
|
"loss": 0.9444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16235379874706268,
|
|
"step": 321,
|
|
"valid_targets_mean": 9975.7,
|
|
"valid_targets_min": 2431
|
|
},
|
|
{
|
|
"epoch": 1.371002132196162,
|
|
"grad_norm": 0.02013087933531282,
|
|
"learning_rate": 3.6468763376923886e-05,
|
|
"loss": 0.9559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.254020094871521,
|
|
"step": 322,
|
|
"valid_targets_mean": 16053.6,
|
|
"valid_targets_min": 15377
|
|
},
|
|
{
|
|
"epoch": 1.375266524520256,
|
|
"grad_norm": 0.021236300378812828,
|
|
"learning_rate": 3.6434962046636464e-05,
|
|
"loss": 0.9643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.279460072517395,
|
|
"step": 323,
|
|
"valid_targets_mean": 16178.9,
|
|
"valid_targets_min": 15479
|
|
},
|
|
{
|
|
"epoch": 1.3795309168443497,
|
|
"grad_norm": 0.02114898815390917,
|
|
"learning_rate": 3.6401015532549957e-05,
|
|
"loss": 0.9879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19305935502052307,
|
|
"step": 324,
|
|
"valid_targets_mean": 10627.7,
|
|
"valid_targets_min": 7275
|
|
},
|
|
{
|
|
"epoch": 1.3837953091684434,
|
|
"grad_norm": 0.020460229383588976,
|
|
"learning_rate": 3.6366924134542386e-05,
|
|
"loss": 0.9651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2638334035873413,
|
|
"step": 325,
|
|
"valid_targets_mean": 16111.0,
|
|
"valid_targets_min": 15284
|
|
},
|
|
{
|
|
"epoch": 1.3880597014925373,
|
|
"grad_norm": 0.020371265309288976,
|
|
"learning_rate": 3.633268815377166e-05,
|
|
"loss": 0.9317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28315305709838867,
|
|
"step": 326,
|
|
"valid_targets_mean": 16165.5,
|
|
"valid_targets_min": 15059
|
|
},
|
|
{
|
|
"epoch": 1.3923240938166312,
|
|
"grad_norm": 0.02042343150885015,
|
|
"learning_rate": 3.6298307892672895e-05,
|
|
"loss": 0.9572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2068769782781601,
|
|
"step": 327,
|
|
"valid_targets_mean": 14014.3,
|
|
"valid_targets_min": 12347
|
|
},
|
|
{
|
|
"epoch": 1.396588486140725,
|
|
"grad_norm": 0.021073522988564786,
|
|
"learning_rate": 3.626378365495577e-05,
|
|
"loss": 0.9877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26230764389038086,
|
|
"step": 328,
|
|
"valid_targets_mean": 16146.2,
|
|
"valid_targets_min": 15147
|
|
},
|
|
{
|
|
"epoch": 1.4008528784648187,
|
|
"grad_norm": 0.0219224334239064,
|
|
"learning_rate": 3.622911574560181e-05,
|
|
"loss": 0.9483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29579466581344604,
|
|
"step": 329,
|
|
"valid_targets_mean": 16076.6,
|
|
"valid_targets_min": 14656
|
|
},
|
|
{
|
|
"epoch": 1.4051172707889126,
|
|
"grad_norm": 0.02074208323339176,
|
|
"learning_rate": 3.6194304470861744e-05,
|
|
"loss": 0.9694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22506991028785706,
|
|
"step": 330,
|
|
"valid_targets_mean": 15407.6,
|
|
"valid_targets_min": 14388
|
|
},
|
|
{
|
|
"epoch": 1.4093816631130065,
|
|
"grad_norm": 0.02091026866767609,
|
|
"learning_rate": 3.615935013825272e-05,
|
|
"loss": 0.9678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2653864622116089,
|
|
"step": 331,
|
|
"valid_targets_mean": 16198.8,
|
|
"valid_targets_min": 15519
|
|
},
|
|
{
|
|
"epoch": 1.4136460554371002,
|
|
"grad_norm": 0.02193298486884456,
|
|
"learning_rate": 3.612425305655569e-05,
|
|
"loss": 0.9781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1987227499485016,
|
|
"step": 332,
|
|
"valid_targets_mean": 10650.1,
|
|
"valid_targets_min": 1458
|
|
},
|
|
{
|
|
"epoch": 1.417910447761194,
|
|
"grad_norm": 0.020857436128484137,
|
|
"learning_rate": 3.6089013535812593e-05,
|
|
"loss": 0.9248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21897855401039124,
|
|
"step": 333,
|
|
"valid_targets_mean": 15912.2,
|
|
"valid_targets_min": 15209
|
|
},
|
|
{
|
|
"epoch": 1.4221748400852878,
|
|
"grad_norm": 0.02081324403164226,
|
|
"learning_rate": 3.6053631887323656e-05,
|
|
"loss": 0.9638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2890179753303528,
|
|
"step": 334,
|
|
"valid_targets_mean": 16107.1,
|
|
"valid_targets_min": 14192
|
|
},
|
|
{
|
|
"epoch": 1.4264392324093818,
|
|
"grad_norm": 0.021135203706636893,
|
|
"learning_rate": 3.601810842364465e-05,
|
|
"loss": 1.0051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15422949194908142,
|
|
"step": 335,
|
|
"valid_targets_mean": 8578.5,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 1.4307036247334755,
|
|
"grad_norm": 0.01990255116172728,
|
|
"learning_rate": 3.598244345858412e-05,
|
|
"loss": 0.9789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2270747721195221,
|
|
"step": 336,
|
|
"valid_targets_mean": 16149.1,
|
|
"valid_targets_min": 13369
|
|
},
|
|
{
|
|
"epoch": 1.4349680170575694,
|
|
"grad_norm": 0.021750293303804586,
|
|
"learning_rate": 3.594663730720059e-05,
|
|
"loss": 0.9566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27288711071014404,
|
|
"step": 337,
|
|
"valid_targets_mean": 16134.9,
|
|
"valid_targets_min": 13697
|
|
},
|
|
{
|
|
"epoch": 1.439232409381663,
|
|
"grad_norm": 0.02117385805880512,
|
|
"learning_rate": 3.591069028579982e-05,
|
|
"loss": 0.9746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19177991151809692,
|
|
"step": 338,
|
|
"valid_targets_mean": 12585.0,
|
|
"valid_targets_min": 8364
|
|
},
|
|
{
|
|
"epoch": 1.443496801705757,
|
|
"grad_norm": 0.021739742613686594,
|
|
"learning_rate": 3.5874602711931994e-05,
|
|
"loss": 0.985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27265310287475586,
|
|
"step": 339,
|
|
"valid_targets_mean": 16086.6,
|
|
"valid_targets_min": 15239
|
|
},
|
|
{
|
|
"epoch": 1.4477611940298507,
|
|
"grad_norm": 0.02213502438209742,
|
|
"learning_rate": 3.5838374904388904e-05,
|
|
"loss": 0.9435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2904359996318817,
|
|
"step": 340,
|
|
"valid_targets_mean": 16185.0,
|
|
"valid_targets_min": 15426
|
|
},
|
|
{
|
|
"epoch": 1.4520255863539446,
|
|
"grad_norm": 0.02119136423357909,
|
|
"learning_rate": 3.580200718320115e-05,
|
|
"loss": 0.9645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2332826554775238,
|
|
"step": 341,
|
|
"valid_targets_mean": 15524.8,
|
|
"valid_targets_min": 14360
|
|
},
|
|
{
|
|
"epoch": 1.4562899786780383,
|
|
"grad_norm": 0.021598667690886877,
|
|
"learning_rate": 3.576549986963531e-05,
|
|
"loss": 0.9826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28831905126571655,
|
|
"step": 342,
|
|
"valid_targets_mean": 16002.2,
|
|
"valid_targets_min": 14760
|
|
},
|
|
{
|
|
"epoch": 1.4605543710021323,
|
|
"grad_norm": 0.02128806676051918,
|
|
"learning_rate": 3.5728853286191075e-05,
|
|
"loss": 0.9062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22554714977741241,
|
|
"step": 343,
|
|
"valid_targets_mean": 12910.2,
|
|
"valid_targets_min": 1254
|
|
},
|
|
{
|
|
"epoch": 1.464818763326226,
|
|
"grad_norm": 0.019835953174840906,
|
|
"learning_rate": 3.5692067756598465e-05,
|
|
"loss": 0.948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24969357252120972,
|
|
"step": 344,
|
|
"valid_targets_mean": 15713.8,
|
|
"valid_targets_min": 14462
|
|
},
|
|
{
|
|
"epoch": 1.4690831556503199,
|
|
"grad_norm": 0.021167080850404276,
|
|
"learning_rate": 3.5655143605814885e-05,
|
|
"loss": 0.9655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2663092613220215,
|
|
"step": 345,
|
|
"valid_targets_mean": 16094.8,
|
|
"valid_targets_min": 15241
|
|
},
|
|
{
|
|
"epoch": 1.4733475479744136,
|
|
"grad_norm": 0.021466815686012785,
|
|
"learning_rate": 3.561808116002232e-05,
|
|
"loss": 0.9477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1699264645576477,
|
|
"step": 346,
|
|
"valid_targets_mean": 10433.8,
|
|
"valid_targets_min": 2149
|
|
},
|
|
{
|
|
"epoch": 1.4776119402985075,
|
|
"grad_norm": 0.019248463897868925,
|
|
"learning_rate": 3.5580880746624444e-05,
|
|
"loss": 0.9327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21550115942955017,
|
|
"step": 347,
|
|
"valid_targets_mean": 16012.9,
|
|
"valid_targets_min": 15228
|
|
},
|
|
{
|
|
"epoch": 1.4818763326226012,
|
|
"grad_norm": 0.021580546453072513,
|
|
"learning_rate": 3.5543542694243685e-05,
|
|
"loss": 0.974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2649380564689636,
|
|
"step": 348,
|
|
"valid_targets_mean": 16178.6,
|
|
"valid_targets_min": 15431
|
|
},
|
|
{
|
|
"epoch": 1.4861407249466951,
|
|
"grad_norm": 0.02098911899525261,
|
|
"learning_rate": 3.5506067332718355e-05,
|
|
"loss": 0.9782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19869481027126312,
|
|
"step": 349,
|
|
"valid_targets_mean": 10749.7,
|
|
"valid_targets_min": 7584
|
|
},
|
|
{
|
|
"epoch": 1.4904051172707888,
|
|
"grad_norm": 0.02030483107512438,
|
|
"learning_rate": 3.546845499309976e-05,
|
|
"loss": 0.9552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2493928074836731,
|
|
"step": 350,
|
|
"valid_targets_mean": 16174.0,
|
|
"valid_targets_min": 15228
|
|
},
|
|
{
|
|
"epoch": 1.4946695095948828,
|
|
"grad_norm": 0.02275468197040157,
|
|
"learning_rate": 3.5430706007649225e-05,
|
|
"loss": 0.9613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31151849031448364,
|
|
"step": 351,
|
|
"valid_targets_mean": 16188.4,
|
|
"valid_targets_min": 15522
|
|
},
|
|
{
|
|
"epoch": 1.4989339019189765,
|
|
"grad_norm": 0.021025210252012868,
|
|
"learning_rate": 3.539282070983518e-05,
|
|
"loss": 0.9692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20601017773151398,
|
|
"step": 352,
|
|
"valid_targets_mean": 13305.9,
|
|
"valid_targets_min": 10940
|
|
},
|
|
{
|
|
"epoch": 1.5031982942430704,
|
|
"grad_norm": 0.021505907711748068,
|
|
"learning_rate": 3.535479943433023e-05,
|
|
"loss": 1.0325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3107979893684387,
|
|
"step": 353,
|
|
"valid_targets_mean": 15869.9,
|
|
"valid_targets_min": 14311
|
|
},
|
|
{
|
|
"epoch": 1.5074626865671643,
|
|
"grad_norm": 0.021805162527261564,
|
|
"learning_rate": 3.5316642517008184e-05,
|
|
"loss": 0.9435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.263502299785614,
|
|
"step": 354,
|
|
"valid_targets_mean": 16252.4,
|
|
"valid_targets_min": 15734
|
|
},
|
|
{
|
|
"epoch": 1.511727078891258,
|
|
"grad_norm": 0.02106837573031613,
|
|
"learning_rate": 3.5278350294941074e-05,
|
|
"loss": 0.9733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23262187838554382,
|
|
"step": 355,
|
|
"valid_targets_mean": 15459.3,
|
|
"valid_targets_min": 14187
|
|
},
|
|
{
|
|
"epoch": 1.5159914712153517,
|
|
"grad_norm": 0.021602088501074035,
|
|
"learning_rate": 3.523992310639622e-05,
|
|
"loss": 0.9272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2626686692237854,
|
|
"step": 356,
|
|
"valid_targets_mean": 16210.6,
|
|
"valid_targets_min": 15417
|
|
},
|
|
{
|
|
"epoch": 1.5202558635394456,
|
|
"grad_norm": 0.023649718619320834,
|
|
"learning_rate": 3.5201361290833165e-05,
|
|
"loss": 0.9721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20455145835876465,
|
|
"step": 357,
|
|
"valid_targets_mean": 11460.9,
|
|
"valid_targets_min": 1396
|
|
},
|
|
{
|
|
"epoch": 1.5245202558635396,
|
|
"grad_norm": 0.020182250545453907,
|
|
"learning_rate": 3.516266518890079e-05,
|
|
"loss": 0.9564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2473708987236023,
|
|
"step": 358,
|
|
"valid_targets_mean": 16063.5,
|
|
"valid_targets_min": 15612
|
|
},
|
|
{
|
|
"epoch": 1.5287846481876333,
|
|
"grad_norm": 0.021473841918621447,
|
|
"learning_rate": 3.512383514243419e-05,
|
|
"loss": 0.932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2438630610704422,
|
|
"step": 359,
|
|
"valid_targets_mean": 16235.6,
|
|
"valid_targets_min": 15646
|
|
},
|
|
{
|
|
"epoch": 1.533049040511727,
|
|
"grad_norm": 0.021869483024952813,
|
|
"learning_rate": 3.5084871494451716e-05,
|
|
"loss": 0.8989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16998907923698425,
|
|
"step": 360,
|
|
"valid_targets_mean": 9575.4,
|
|
"valid_targets_min": 1481
|
|
},
|
|
{
|
|
"epoch": 1.537313432835821,
|
|
"grad_norm": 0.020961509832221767,
|
|
"learning_rate": 3.5045774589151955e-05,
|
|
"loss": 0.9626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23756466805934906,
|
|
"step": 361,
|
|
"valid_targets_mean": 15896.8,
|
|
"valid_targets_min": 10520
|
|
},
|
|
{
|
|
"epoch": 1.5415778251599148,
|
|
"grad_norm": 0.021129234127304136,
|
|
"learning_rate": 3.500654477191064e-05,
|
|
"loss": 0.9921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29901623725891113,
|
|
"step": 362,
|
|
"valid_targets_mean": 16049.7,
|
|
"valid_targets_min": 14239
|
|
},
|
|
{
|
|
"epoch": 1.5458422174840085,
|
|
"grad_norm": 0.021875244631136773,
|
|
"learning_rate": 3.496718238927764e-05,
|
|
"loss": 0.9594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17784026265144348,
|
|
"step": 363,
|
|
"valid_targets_mean": 10676.7,
|
|
"valid_targets_min": 7039
|
|
},
|
|
{
|
|
"epoch": 1.5501066098081022,
|
|
"grad_norm": 0.020692490137139724,
|
|
"learning_rate": 3.492768778897388e-05,
|
|
"loss": 0.9668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25093957781791687,
|
|
"step": 364,
|
|
"valid_targets_mean": 16083.6,
|
|
"valid_targets_min": 14152
|
|
},
|
|
{
|
|
"epoch": 1.5543710021321961,
|
|
"grad_norm": 0.020898862455467127,
|
|
"learning_rate": 3.4888061319888276e-05,
|
|
"loss": 0.9638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2926800847053528,
|
|
"step": 365,
|
|
"valid_targets_mean": 16139.0,
|
|
"valid_targets_min": 15276
|
|
},
|
|
{
|
|
"epoch": 1.55863539445629,
|
|
"grad_norm": 0.01996168036965207,
|
|
"learning_rate": 3.484830333207466e-05,
|
|
"loss": 0.975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2084559053182602,
|
|
"step": 366,
|
|
"valid_targets_mean": 13693.0,
|
|
"valid_targets_min": 11647
|
|
},
|
|
{
|
|
"epoch": 1.5628997867803838,
|
|
"grad_norm": 0.020962482422792052,
|
|
"learning_rate": 3.4808414176748666e-05,
|
|
"loss": 0.9614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2821657061576843,
|
|
"step": 367,
|
|
"valid_targets_mean": 16172.2,
|
|
"valid_targets_min": 15355
|
|
},
|
|
{
|
|
"epoch": 1.5671641791044775,
|
|
"grad_norm": 0.021217330519714576,
|
|
"learning_rate": 3.476839420628466e-05,
|
|
"loss": 0.9471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25308144092559814,
|
|
"step": 368,
|
|
"valid_targets_mean": 13362.8,
|
|
"valid_targets_min": 1847
|
|
},
|
|
{
|
|
"epoch": 1.5714285714285714,
|
|
"grad_norm": 0.019735684881360512,
|
|
"learning_rate": 3.472824377421257e-05,
|
|
"loss": 0.9348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23869484663009644,
|
|
"step": 369,
|
|
"valid_targets_mean": 15102.8,
|
|
"valid_targets_min": 13529
|
|
},
|
|
{
|
|
"epoch": 1.5756929637526653,
|
|
"grad_norm": 0.022664478949249375,
|
|
"learning_rate": 3.4687963235214845e-05,
|
|
"loss": 0.982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29631197452545166,
|
|
"step": 370,
|
|
"valid_targets_mean": 16147.3,
|
|
"valid_targets_min": 15610
|
|
},
|
|
{
|
|
"epoch": 1.579957356076759,
|
|
"grad_norm": 0.02158964815799617,
|
|
"learning_rate": 3.464755294512325e-05,
|
|
"loss": 0.9527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1592029482126236,
|
|
"step": 371,
|
|
"valid_targets_mean": 8644.8,
|
|
"valid_targets_min": 1944
|
|
},
|
|
{
|
|
"epoch": 1.5842217484008527,
|
|
"grad_norm": 0.020000241213357844,
|
|
"learning_rate": 3.4607013260915765e-05,
|
|
"loss": 0.9661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2352025806903839,
|
|
"step": 372,
|
|
"valid_targets_mean": 16176.8,
|
|
"valid_targets_min": 15655
|
|
},
|
|
{
|
|
"epoch": 1.5884861407249466,
|
|
"grad_norm": 0.019858204728615825,
|
|
"learning_rate": 3.4566344540713404e-05,
|
|
"loss": 0.9144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25332534313201904,
|
|
"step": 373,
|
|
"valid_targets_mean": 16209.2,
|
|
"valid_targets_min": 15196
|
|
},
|
|
{
|
|
"epoch": 1.5927505330490406,
|
|
"grad_norm": 0.021334226456194684,
|
|
"learning_rate": 3.452554714377706e-05,
|
|
"loss": 0.9476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1872849464416504,
|
|
"step": 374,
|
|
"valid_targets_mean": 11638.9,
|
|
"valid_targets_min": 7109
|
|
},
|
|
{
|
|
"epoch": 1.5970149253731343,
|
|
"grad_norm": 0.021102676606301086,
|
|
"learning_rate": 3.448462143050436e-05,
|
|
"loss": 0.9819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25525495409965515,
|
|
"step": 375,
|
|
"valid_targets_mean": 15764.8,
|
|
"valid_targets_min": 8340
|
|
},
|
|
{
|
|
"epoch": 1.6012793176972282,
|
|
"grad_norm": 0.02196647798568977,
|
|
"learning_rate": 3.4443567762426444e-05,
|
|
"loss": 0.9378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27831071615219116,
|
|
"step": 376,
|
|
"valid_targets_mean": 16209.0,
|
|
"valid_targets_min": 15650
|
|
},
|
|
{
|
|
"epoch": 1.6055437100213221,
|
|
"grad_norm": 0.020269375845258043,
|
|
"learning_rate": 3.440238650220477e-05,
|
|
"loss": 0.9342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2082153707742691,
|
|
"step": 377,
|
|
"valid_targets_mean": 13610.0,
|
|
"valid_targets_min": 12277
|
|
},
|
|
{
|
|
"epoch": 1.6098081023454158,
|
|
"grad_norm": 0.02062385792289085,
|
|
"learning_rate": 3.4361078013627945e-05,
|
|
"loss": 0.9402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25554201006889343,
|
|
"step": 378,
|
|
"valid_targets_mean": 16102.3,
|
|
"valid_targets_min": 15219
|
|
},
|
|
{
|
|
"epoch": 1.6140724946695095,
|
|
"grad_norm": 0.02080267297378242,
|
|
"learning_rate": 3.4319642661608474e-05,
|
|
"loss": 0.9706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2553861439228058,
|
|
"step": 379,
|
|
"valid_targets_mean": 16191.4,
|
|
"valid_targets_min": 15478
|
|
},
|
|
{
|
|
"epoch": 1.6183368869936035,
|
|
"grad_norm": 0.020027653203361306,
|
|
"learning_rate": 3.427808081217957e-05,
|
|
"loss": 0.9805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.237830251455307,
|
|
"step": 380,
|
|
"valid_targets_mean": 15048.2,
|
|
"valid_targets_min": 12946
|
|
},
|
|
{
|
|
"epoch": 1.6226012793176974,
|
|
"grad_norm": 0.021063054028497643,
|
|
"learning_rate": 3.423639283249189e-05,
|
|
"loss": 0.9467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26139163970947266,
|
|
"step": 381,
|
|
"valid_targets_mean": 16223.2,
|
|
"valid_targets_min": 15679
|
|
},
|
|
{
|
|
"epoch": 1.626865671641791,
|
|
"grad_norm": 0.02120673881165638,
|
|
"learning_rate": 3.419457909081032e-05,
|
|
"loss": 0.9713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.217096209526062,
|
|
"step": 382,
|
|
"valid_targets_mean": 10954.2,
|
|
"valid_targets_min": 1940
|
|
},
|
|
{
|
|
"epoch": 1.6311300639658848,
|
|
"grad_norm": 0.01992097360177943,
|
|
"learning_rate": 3.415263995651069e-05,
|
|
"loss": 0.983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23178398609161377,
|
|
"step": 383,
|
|
"valid_targets_mean": 15542.5,
|
|
"valid_targets_min": 14290
|
|
},
|
|
{
|
|
"epoch": 1.6353944562899787,
|
|
"grad_norm": 0.021619632363390446,
|
|
"learning_rate": 3.411057580007653e-05,
|
|
"loss": 0.9414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28393059968948364,
|
|
"step": 384,
|
|
"valid_targets_mean": 16192.2,
|
|
"valid_targets_min": 15589
|
|
},
|
|
{
|
|
"epoch": 1.6396588486140726,
|
|
"grad_norm": 0.020906866468054047,
|
|
"learning_rate": 3.4068386993095806e-05,
|
|
"loss": 0.9249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14363005757331848,
|
|
"step": 385,
|
|
"valid_targets_mean": 8556.6,
|
|
"valid_targets_min": 2255
|
|
},
|
|
{
|
|
"epoch": 1.6439232409381663,
|
|
"grad_norm": 0.020564426787136887,
|
|
"learning_rate": 3.402607390825762e-05,
|
|
"loss": 0.9403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22132807970046997,
|
|
"step": 386,
|
|
"valid_targets_mean": 16199.5,
|
|
"valid_targets_min": 15461
|
|
},
|
|
{
|
|
"epoch": 1.64818763326226,
|
|
"grad_norm": 0.021618888638051652,
|
|
"learning_rate": 3.398363691934894e-05,
|
|
"loss": 0.9451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2890056371688843,
|
|
"step": 387,
|
|
"valid_targets_mean": 16075.8,
|
|
"valid_targets_min": 15229
|
|
},
|
|
{
|
|
"epoch": 1.652452025586354,
|
|
"grad_norm": 0.020755222511571733,
|
|
"learning_rate": 3.3941076401251244e-05,
|
|
"loss": 0.9625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18527905642986298,
|
|
"step": 388,
|
|
"valid_targets_mean": 11951.2,
|
|
"valid_targets_min": 9148
|
|
},
|
|
{
|
|
"epoch": 1.6567164179104479,
|
|
"grad_norm": 0.02052659692396289,
|
|
"learning_rate": 3.3898392729937295e-05,
|
|
"loss": 0.9389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2527425289154053,
|
|
"step": 389,
|
|
"valid_targets_mean": 16213.6,
|
|
"valid_targets_min": 15464
|
|
},
|
|
{
|
|
"epoch": 1.6609808102345416,
|
|
"grad_norm": 0.021562837481466087,
|
|
"learning_rate": 3.385558628246774e-05,
|
|
"loss": 0.9636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2690006196498871,
|
|
"step": 390,
|
|
"valid_targets_mean": 16163.7,
|
|
"valid_targets_min": 15566
|
|
},
|
|
{
|
|
"epoch": 1.6652452025586353,
|
|
"grad_norm": 0.019857890638508922,
|
|
"learning_rate": 3.381265743698781e-05,
|
|
"loss": 0.9729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2105507254600525,
|
|
"step": 391,
|
|
"valid_targets_mean": 14500.9,
|
|
"valid_targets_min": 12795
|
|
},
|
|
{
|
|
"epoch": 1.6695095948827292,
|
|
"grad_norm": 0.021060020690069205,
|
|
"learning_rate": 3.3769606572724e-05,
|
|
"loss": 0.9545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2837657332420349,
|
|
"step": 392,
|
|
"valid_targets_mean": 16072.7,
|
|
"valid_targets_min": 14831
|
|
},
|
|
{
|
|
"epoch": 1.6737739872068231,
|
|
"grad_norm": 0.02114370076376681,
|
|
"learning_rate": 3.3726434069980686e-05,
|
|
"loss": 0.9217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21678954362869263,
|
|
"step": 393,
|
|
"valid_targets_mean": 12898.8,
|
|
"valid_targets_min": 1405
|
|
},
|
|
{
|
|
"epoch": 1.6780383795309168,
|
|
"grad_norm": 0.019610926925244267,
|
|
"learning_rate": 3.368314031013678e-05,
|
|
"loss": 0.9452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22999313473701477,
|
|
"step": 394,
|
|
"valid_targets_mean": 15915.6,
|
|
"valid_targets_min": 14077
|
|
},
|
|
{
|
|
"epoch": 1.6823027718550105,
|
|
"grad_norm": 0.022311388588044587,
|
|
"learning_rate": 3.363972567564236e-05,
|
|
"loss": 0.9671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26264309883117676,
|
|
"step": 395,
|
|
"valid_targets_mean": 16189.2,
|
|
"valid_targets_min": 15331
|
|
},
|
|
{
|
|
"epoch": 1.6865671641791045,
|
|
"grad_norm": 0.022168476547753502,
|
|
"learning_rate": 3.35961905500153e-05,
|
|
"loss": 0.9767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1652470827102661,
|
|
"step": 396,
|
|
"valid_targets_mean": 9036.9,
|
|
"valid_targets_min": 2052
|
|
},
|
|
{
|
|
"epoch": 1.6908315565031984,
|
|
"grad_norm": 0.020168897101941776,
|
|
"learning_rate": 3.3552535317837855e-05,
|
|
"loss": 1.0118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24864624440670013,
|
|
"step": 397,
|
|
"valid_targets_mean": 15838.0,
|
|
"valid_targets_min": 14647
|
|
},
|
|
{
|
|
"epoch": 1.695095948827292,
|
|
"grad_norm": 0.021999284596368508,
|
|
"learning_rate": 3.35087603647533e-05,
|
|
"loss": 0.9642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30087724328041077,
|
|
"step": 398,
|
|
"valid_targets_mean": 16139.2,
|
|
"valid_targets_min": 14797
|
|
},
|
|
{
|
|
"epoch": 1.6993603411513858,
|
|
"grad_norm": 0.021291087606209156,
|
|
"learning_rate": 3.346486607746249e-05,
|
|
"loss": 0.9723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1574464589357376,
|
|
"step": 399,
|
|
"valid_targets_mean": 9468.8,
|
|
"valid_targets_min": 4620
|
|
},
|
|
{
|
|
"epoch": 1.7036247334754797,
|
|
"grad_norm": 0.020675563263550193,
|
|
"learning_rate": 3.342085284372047e-05,
|
|
"loss": 0.9748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2689174711704254,
|
|
"step": 400,
|
|
"valid_targets_mean": 16042.5,
|
|
"valid_targets_min": 14657
|
|
},
|
|
{
|
|
"epoch": 1.7078891257995736,
|
|
"grad_norm": 0.021742574243585507,
|
|
"learning_rate": 3.337672105233303e-05,
|
|
"loss": 0.9621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2659856677055359,
|
|
"step": 401,
|
|
"valid_targets_mean": 16116.5,
|
|
"valid_targets_min": 14746
|
|
},
|
|
{
|
|
"epoch": 1.7121535181236673,
|
|
"grad_norm": 0.020062838287137577,
|
|
"learning_rate": 3.3332471093153296e-05,
|
|
"loss": 0.9785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2010663002729416,
|
|
"step": 402,
|
|
"valid_targets_mean": 13291.3,
|
|
"valid_targets_min": 9805
|
|
},
|
|
{
|
|
"epoch": 1.716417910447761,
|
|
"grad_norm": 0.021333855021459878,
|
|
"learning_rate": 3.3288103357078244e-05,
|
|
"loss": 1.0077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2840135991573334,
|
|
"step": 403,
|
|
"valid_targets_mean": 16106.0,
|
|
"valid_targets_min": 14989
|
|
},
|
|
{
|
|
"epoch": 1.720682302771855,
|
|
"grad_norm": 0.021790479429611372,
|
|
"learning_rate": 3.324361823604529e-05,
|
|
"loss": 0.9488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29594528675079346,
|
|
"step": 404,
|
|
"valid_targets_mean": 16116.4,
|
|
"valid_targets_min": 14311
|
|
},
|
|
{
|
|
"epoch": 1.724946695095949,
|
|
"grad_norm": 0.019948723080871693,
|
|
"learning_rate": 3.319901612302881e-05,
|
|
"loss": 0.9577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23182149231433868,
|
|
"step": 405,
|
|
"valid_targets_mean": 15988.9,
|
|
"valid_targets_min": 15170
|
|
},
|
|
{
|
|
"epoch": 1.7292110874200426,
|
|
"grad_norm": 0.02017308162821173,
|
|
"learning_rate": 3.315429741203666e-05,
|
|
"loss": 0.9206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23992013931274414,
|
|
"step": 406,
|
|
"valid_targets_mean": 16216.4,
|
|
"valid_targets_min": 15592
|
|
},
|
|
{
|
|
"epoch": 1.7334754797441365,
|
|
"grad_norm": 0.020674606301756396,
|
|
"learning_rate": 3.3109462498106705e-05,
|
|
"loss": 0.9262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18523073196411133,
|
|
"step": 407,
|
|
"valid_targets_mean": 11218.8,
|
|
"valid_targets_min": 1866
|
|
},
|
|
{
|
|
"epoch": 1.7377398720682304,
|
|
"grad_norm": 0.0196748694871804,
|
|
"learning_rate": 3.306451177730333e-05,
|
|
"loss": 0.9295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2334335446357727,
|
|
"step": 408,
|
|
"valid_targets_mean": 15831.5,
|
|
"valid_targets_min": 14464
|
|
},
|
|
{
|
|
"epoch": 1.7420042643923241,
|
|
"grad_norm": 0.022272219457707434,
|
|
"learning_rate": 3.301944564671394e-05,
|
|
"loss": 0.9871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29115623235702515,
|
|
"step": 409,
|
|
"valid_targets_mean": 16057.6,
|
|
"valid_targets_min": 13449
|
|
},
|
|
{
|
|
"epoch": 1.7462686567164178,
|
|
"grad_norm": 0.020854918046524604,
|
|
"learning_rate": 3.297426450444546e-05,
|
|
"loss": 0.9458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15708360075950623,
|
|
"step": 410,
|
|
"valid_targets_mean": 10743.4,
|
|
"valid_targets_min": 2794
|
|
},
|
|
{
|
|
"epoch": 1.7505330490405118,
|
|
"grad_norm": 0.02116826782624495,
|
|
"learning_rate": 3.292896874962078e-05,
|
|
"loss": 0.9543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2419443279504776,
|
|
"step": 411,
|
|
"valid_targets_mean": 16157.0,
|
|
"valid_targets_min": 14616
|
|
},
|
|
{
|
|
"epoch": 1.7547974413646057,
|
|
"grad_norm": 0.021956351361709706,
|
|
"learning_rate": 3.2883558782375294e-05,
|
|
"loss": 0.9475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2713549733161926,
|
|
"step": 412,
|
|
"valid_targets_mean": 16118.3,
|
|
"valid_targets_min": 13449
|
|
},
|
|
{
|
|
"epoch": 1.7590618336886994,
|
|
"grad_norm": 0.021329278933101627,
|
|
"learning_rate": 3.283803500385332e-05,
|
|
"loss": 0.9725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20209068059921265,
|
|
"step": 413,
|
|
"valid_targets_mean": 12713.3,
|
|
"valid_targets_min": 9673
|
|
},
|
|
{
|
|
"epoch": 1.763326226012793,
|
|
"grad_norm": 0.020893880718819164,
|
|
"learning_rate": 3.2792397816204546e-05,
|
|
"loss": 0.9846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27608564496040344,
|
|
"step": 414,
|
|
"valid_targets_mean": 16157.4,
|
|
"valid_targets_min": 15613
|
|
},
|
|
{
|
|
"epoch": 1.767590618336887,
|
|
"grad_norm": 0.022211529918683113,
|
|
"learning_rate": 3.2746647622580524e-05,
|
|
"loss": 0.9534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27259349822998047,
|
|
"step": 415,
|
|
"valid_targets_mean": 16090.7,
|
|
"valid_targets_min": 14898
|
|
},
|
|
{
|
|
"epoch": 1.771855010660981,
|
|
"grad_norm": 0.02098968019943598,
|
|
"learning_rate": 3.270078482713106e-05,
|
|
"loss": 0.93,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21816179156303406,
|
|
"step": 416,
|
|
"valid_targets_mean": 14021.0,
|
|
"valid_targets_min": 12655
|
|
},
|
|
{
|
|
"epoch": 1.7761194029850746,
|
|
"grad_norm": 0.020755901128740588,
|
|
"learning_rate": 3.265480983500069e-05,
|
|
"loss": 0.9527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2778633236885071,
|
|
"step": 417,
|
|
"valid_targets_mean": 16079.4,
|
|
"valid_targets_min": 14248
|
|
},
|
|
{
|
|
"epoch": 1.7803837953091683,
|
|
"grad_norm": 0.023013578727991174,
|
|
"learning_rate": 3.260872305232507e-05,
|
|
"loss": 0.9272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23680084943771362,
|
|
"step": 418,
|
|
"valid_targets_mean": 13120.1,
|
|
"valid_targets_min": 2256
|
|
},
|
|
{
|
|
"epoch": 1.7846481876332623,
|
|
"grad_norm": 0.019960232866087863,
|
|
"learning_rate": 3.256252488622738e-05,
|
|
"loss": 0.9285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22897347807884216,
|
|
"step": 419,
|
|
"valid_targets_mean": 15876.4,
|
|
"valid_targets_min": 13764
|
|
},
|
|
{
|
|
"epoch": 1.7889125799573562,
|
|
"grad_norm": 0.020661237378940912,
|
|
"learning_rate": 3.251621574481475e-05,
|
|
"loss": 0.9435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26216891407966614,
|
|
"step": 420,
|
|
"valid_targets_mean": 16110.2,
|
|
"valid_targets_min": 14218
|
|
},
|
|
{
|
|
"epoch": 1.79317697228145,
|
|
"grad_norm": 0.02120037440242725,
|
|
"learning_rate": 3.246979603717467e-05,
|
|
"loss": 0.9756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1863858699798584,
|
|
"step": 421,
|
|
"valid_targets_mean": 10297.3,
|
|
"valid_targets_min": 1269
|
|
},
|
|
{
|
|
"epoch": 1.7974413646055436,
|
|
"grad_norm": 0.020937773290145,
|
|
"learning_rate": 3.242326617337133e-05,
|
|
"loss": 0.9408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21877554059028625,
|
|
"step": 422,
|
|
"valid_targets_mean": 16029.6,
|
|
"valid_targets_min": 12180
|
|
},
|
|
{
|
|
"epoch": 1.8017057569296375,
|
|
"grad_norm": 0.021931310247178634,
|
|
"learning_rate": 3.2376626564442016e-05,
|
|
"loss": 0.9411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2848588824272156,
|
|
"step": 423,
|
|
"valid_targets_mean": 16165.7,
|
|
"valid_targets_min": 15138
|
|
},
|
|
{
|
|
"epoch": 1.8059701492537314,
|
|
"grad_norm": 0.020079902076387577,
|
|
"learning_rate": 3.2329877622393515e-05,
|
|
"loss": 0.9352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19458532333374023,
|
|
"step": 424,
|
|
"valid_targets_mean": 11471.3,
|
|
"valid_targets_min": 7416
|
|
},
|
|
{
|
|
"epoch": 1.8102345415778252,
|
|
"grad_norm": 0.020394727486918215,
|
|
"learning_rate": 3.228301976019841e-05,
|
|
"loss": 0.9995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25857067108154297,
|
|
"step": 425,
|
|
"valid_targets_mean": 16009.2,
|
|
"valid_targets_min": 14984
|
|
},
|
|
{
|
|
"epoch": 1.8144989339019189,
|
|
"grad_norm": 0.022255551467300475,
|
|
"learning_rate": 3.22360533917915e-05,
|
|
"loss": 0.954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2591392993927002,
|
|
"step": 426,
|
|
"valid_targets_mean": 16203.5,
|
|
"valid_targets_min": 15285
|
|
},
|
|
{
|
|
"epoch": 1.8187633262260128,
|
|
"grad_norm": 0.02084030050969497,
|
|
"learning_rate": 3.218897893206608e-05,
|
|
"loss": 0.9318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19440467655658722,
|
|
"step": 427,
|
|
"valid_targets_mean": 13004.2,
|
|
"valid_targets_min": 4321
|
|
},
|
|
{
|
|
"epoch": 1.8230277185501067,
|
|
"grad_norm": 0.021503929037026106,
|
|
"learning_rate": 3.2141796796870335e-05,
|
|
"loss": 0.9736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2720338702201843,
|
|
"step": 428,
|
|
"valid_targets_mean": 16044.8,
|
|
"valid_targets_min": 14077
|
|
},
|
|
{
|
|
"epoch": 1.8272921108742004,
|
|
"grad_norm": 0.022027154760575096,
|
|
"learning_rate": 3.2094507403003614e-05,
|
|
"loss": 0.9691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29510757327079773,
|
|
"step": 429,
|
|
"valid_targets_mean": 16068.7,
|
|
"valid_targets_min": 13562
|
|
},
|
|
{
|
|
"epoch": 1.831556503198294,
|
|
"grad_norm": 0.020823022922104324,
|
|
"learning_rate": 3.2047111168212785e-05,
|
|
"loss": 0.9717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23505569994449615,
|
|
"step": 430,
|
|
"valid_targets_mean": 15165.2,
|
|
"valid_targets_min": 14290
|
|
},
|
|
{
|
|
"epoch": 1.835820895522388,
|
|
"grad_norm": 0.021468023908629286,
|
|
"learning_rate": 3.1999608511188524e-05,
|
|
"loss": 0.9549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27706122398376465,
|
|
"step": 431,
|
|
"valid_targets_mean": 16057.8,
|
|
"valid_targets_min": 13514
|
|
},
|
|
{
|
|
"epoch": 1.840085287846482,
|
|
"grad_norm": 0.020905277787314828,
|
|
"learning_rate": 3.1951999851561625e-05,
|
|
"loss": 0.9846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18867135047912598,
|
|
"step": 432,
|
|
"valid_targets_mean": 11267.6,
|
|
"valid_targets_min": 2047
|
|
},
|
|
{
|
|
"epoch": 1.8443496801705757,
|
|
"grad_norm": 0.019422061371006952,
|
|
"learning_rate": 3.190428560989931e-05,
|
|
"loss": 0.9584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21667999029159546,
|
|
"step": 433,
|
|
"valid_targets_mean": 15758.6,
|
|
"valid_targets_min": 14791
|
|
},
|
|
{
|
|
"epoch": 1.8486140724946694,
|
|
"grad_norm": 0.022649795170711078,
|
|
"learning_rate": 3.185646620770146e-05,
|
|
"loss": 0.963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3150181770324707,
|
|
"step": 434,
|
|
"valid_targets_mean": 16093.3,
|
|
"valid_targets_min": 15339
|
|
},
|
|
{
|
|
"epoch": 1.8528784648187633,
|
|
"grad_norm": 0.021902374753624963,
|
|
"learning_rate": 3.180854206739696e-05,
|
|
"loss": 0.9888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1358647346496582,
|
|
"step": 435,
|
|
"valid_targets_mean": 7360.9,
|
|
"valid_targets_min": 1210
|
|
},
|
|
{
|
|
"epoch": 1.8571428571428572,
|
|
"grad_norm": 0.019639057786515028,
|
|
"learning_rate": 3.176051361233991e-05,
|
|
"loss": 0.9733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24032211303710938,
|
|
"step": 436,
|
|
"valid_targets_mean": 16169.9,
|
|
"valid_targets_min": 15437
|
|
},
|
|
{
|
|
"epoch": 1.861407249466951,
|
|
"grad_norm": 0.021041600925038685,
|
|
"learning_rate": 3.171238126680594e-05,
|
|
"loss": 0.9258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2626997232437134,
|
|
"step": 437,
|
|
"valid_targets_mean": 16143.6,
|
|
"valid_targets_min": 15190
|
|
},
|
|
{
|
|
"epoch": 1.8656716417910446,
|
|
"grad_norm": 0.021400491175345124,
|
|
"learning_rate": 3.166414545598839e-05,
|
|
"loss": 0.9895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20141258835792542,
|
|
"step": 438,
|
|
"valid_targets_mean": 12741.5,
|
|
"valid_targets_min": 9479
|
|
},
|
|
{
|
|
"epoch": 1.8699360341151388,
|
|
"grad_norm": 0.020121905819649122,
|
|
"learning_rate": 3.161580660599464e-05,
|
|
"loss": 0.957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24636691808700562,
|
|
"step": 439,
|
|
"valid_targets_mean": 16144.5,
|
|
"valid_targets_min": 15133
|
|
},
|
|
{
|
|
"epoch": 1.8742004264392325,
|
|
"grad_norm": 0.023010014545580295,
|
|
"learning_rate": 3.1567365143842264e-05,
|
|
"loss": 0.9488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2865312099456787,
|
|
"step": 440,
|
|
"valid_targets_mean": 16157.0,
|
|
"valid_targets_min": 15430
|
|
},
|
|
{
|
|
"epoch": 1.8784648187633262,
|
|
"grad_norm": 0.020583120203159465,
|
|
"learning_rate": 3.1518821497455326e-05,
|
|
"loss": 0.9386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1917174607515335,
|
|
"step": 441,
|
|
"valid_targets_mean": 13529.2,
|
|
"valid_targets_min": 11121
|
|
},
|
|
{
|
|
"epoch": 1.88272921108742,
|
|
"grad_norm": 0.02170246979787594,
|
|
"learning_rate": 3.147017609566054e-05,
|
|
"loss": 0.9617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28883200883865356,
|
|
"step": 442,
|
|
"valid_targets_mean": 16090.9,
|
|
"valid_targets_min": 14780
|
|
},
|
|
{
|
|
"epoch": 1.886993603411514,
|
|
"grad_norm": 0.021759560485940757,
|
|
"learning_rate": 3.142142936818353e-05,
|
|
"loss": 0.9607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22022616863250732,
|
|
"step": 443,
|
|
"valid_targets_mean": 13443.2,
|
|
"valid_targets_min": 1964
|
|
},
|
|
{
|
|
"epoch": 1.8912579957356077,
|
|
"grad_norm": 0.020263132485004377,
|
|
"learning_rate": 3.137258174564501e-05,
|
|
"loss": 1.0029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2490268498659134,
|
|
"step": 444,
|
|
"valid_targets_mean": 15927.8,
|
|
"valid_targets_min": 14705
|
|
},
|
|
{
|
|
"epoch": 1.8955223880597014,
|
|
"grad_norm": 0.02133608826729028,
|
|
"learning_rate": 3.1323633659556986e-05,
|
|
"loss": 0.954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2736002206802368,
|
|
"step": 445,
|
|
"valid_targets_mean": 16131.4,
|
|
"valid_targets_min": 14018
|
|
},
|
|
{
|
|
"epoch": 1.8997867803837953,
|
|
"grad_norm": 0.021972220319816972,
|
|
"learning_rate": 3.127458554231894e-05,
|
|
"loss": 0.9742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17150625586509705,
|
|
"step": 446,
|
|
"valid_targets_mean": 9858.8,
|
|
"valid_targets_min": 2341
|
|
},
|
|
{
|
|
"epoch": 1.9040511727078893,
|
|
"grad_norm": 0.020458746754008394,
|
|
"learning_rate": 3.122543782721402e-05,
|
|
"loss": 0.9845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2544245719909668,
|
|
"step": 447,
|
|
"valid_targets_mean": 15981.0,
|
|
"valid_targets_min": 15050
|
|
},
|
|
{
|
|
"epoch": 1.908315565031983,
|
|
"grad_norm": 0.022003394037095222,
|
|
"learning_rate": 3.1176190948405194e-05,
|
|
"loss": 0.9736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3180297911167145,
|
|
"step": 448,
|
|
"valid_targets_mean": 16136.4,
|
|
"valid_targets_min": 15679
|
|
},
|
|
{
|
|
"epoch": 1.9125799573560767,
|
|
"grad_norm": 0.02111991409005941,
|
|
"learning_rate": 3.112684534093142e-05,
|
|
"loss": 0.9482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18285101652145386,
|
|
"step": 449,
|
|
"valid_targets_mean": 11083.8,
|
|
"valid_targets_min": 6638
|
|
},
|
|
{
|
|
"epoch": 1.9168443496801706,
|
|
"grad_norm": 0.019763717541558923,
|
|
"learning_rate": 3.107740144070385e-05,
|
|
"loss": 0.9129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24713127315044403,
|
|
"step": 450,
|
|
"valid_targets_mean": 16159.3,
|
|
"valid_targets_min": 15416
|
|
},
|
|
{
|
|
"epoch": 1.9211087420042645,
|
|
"grad_norm": 0.02151273220247889,
|
|
"learning_rate": 3.102785968450188e-05,
|
|
"loss": 0.9679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28934621810913086,
|
|
"step": 451,
|
|
"valid_targets_mean": 16150.6,
|
|
"valid_targets_min": 15129
|
|
},
|
|
{
|
|
"epoch": 1.9253731343283582,
|
|
"grad_norm": 0.02065410806766011,
|
|
"learning_rate": 3.09782205099694e-05,
|
|
"loss": 0.9804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22422046959400177,
|
|
"step": 452,
|
|
"valid_targets_mean": 13237.7,
|
|
"valid_targets_min": 11416
|
|
},
|
|
{
|
|
"epoch": 1.929637526652452,
|
|
"grad_norm": 0.02035869807849362,
|
|
"learning_rate": 3.092848435561084e-05,
|
|
"loss": 0.9455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25095710158348083,
|
|
"step": 453,
|
|
"valid_targets_mean": 16193.3,
|
|
"valid_targets_min": 14780
|
|
},
|
|
{
|
|
"epoch": 1.9339019189765458,
|
|
"grad_norm": 0.020813138529897936,
|
|
"learning_rate": 3.0878651660787376e-05,
|
|
"loss": 0.9547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26540815830230713,
|
|
"step": 454,
|
|
"valid_targets_mean": 16158.5,
|
|
"valid_targets_min": 15077
|
|
},
|
|
{
|
|
"epoch": 1.9381663113006398,
|
|
"grad_norm": 0.02075816772743918,
|
|
"learning_rate": 3.082872286571295e-05,
|
|
"loss": 0.9557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22672376036643982,
|
|
"step": 455,
|
|
"valid_targets_mean": 14503.8,
|
|
"valid_targets_min": 12743
|
|
},
|
|
{
|
|
"epoch": 1.9424307036247335,
|
|
"grad_norm": 0.021334564399645324,
|
|
"learning_rate": 3.077869841145049e-05,
|
|
"loss": 0.9399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26789626479148865,
|
|
"step": 456,
|
|
"valid_targets_mean": 16164.8,
|
|
"valid_targets_min": 15431
|
|
},
|
|
{
|
|
"epoch": 1.9466950959488272,
|
|
"grad_norm": 0.022338369588684417,
|
|
"learning_rate": 3.0728578739907934e-05,
|
|
"loss": 0.9836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19911347329616547,
|
|
"step": 457,
|
|
"valid_targets_mean": 10476.9,
|
|
"valid_targets_min": 2488
|
|
},
|
|
{
|
|
"epoch": 1.950959488272921,
|
|
"grad_norm": 0.018784430244821484,
|
|
"learning_rate": 3.067836429383437e-05,
|
|
"loss": 0.929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2269982397556305,
|
|
"step": 458,
|
|
"valid_targets_mean": 16121.7,
|
|
"valid_targets_min": 14638
|
|
},
|
|
{
|
|
"epoch": 1.955223880597015,
|
|
"grad_norm": 0.020733912260012952,
|
|
"learning_rate": 3.062805551681609e-05,
|
|
"loss": 0.9496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27733397483825684,
|
|
"step": 459,
|
|
"valid_targets_mean": 16089.1,
|
|
"valid_targets_min": 15016
|
|
},
|
|
{
|
|
"epoch": 1.9594882729211087,
|
|
"grad_norm": 0.02119126043538997,
|
|
"learning_rate": 3.057765285327271e-05,
|
|
"loss": 1.0119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12115494161844254,
|
|
"step": 460,
|
|
"valid_targets_mean": 6449.0,
|
|
"valid_targets_min": 2135
|
|
},
|
|
{
|
|
"epoch": 1.9637526652452024,
|
|
"grad_norm": 0.02058710455811254,
|
|
"learning_rate": 3.0527156748453214e-05,
|
|
"loss": 0.9434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24707047641277313,
|
|
"step": 461,
|
|
"valid_targets_mean": 16119.7,
|
|
"valid_targets_min": 14004
|
|
},
|
|
{
|
|
"epoch": 1.9680170575692963,
|
|
"grad_norm": 0.02233858023583447,
|
|
"learning_rate": 3.047656764843203e-05,
|
|
"loss": 0.973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28606706857681274,
|
|
"step": 462,
|
|
"valid_targets_mean": 16150.6,
|
|
"valid_targets_min": 15250
|
|
},
|
|
{
|
|
"epoch": 1.9722814498933903,
|
|
"grad_norm": 0.019911674320328483,
|
|
"learning_rate": 3.0425886000105094e-05,
|
|
"loss": 0.9255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18937817215919495,
|
|
"step": 463,
|
|
"valid_targets_mean": 13032.3,
|
|
"valid_targets_min": 10397
|
|
},
|
|
{
|
|
"epoch": 1.976545842217484,
|
|
"grad_norm": 0.020157193786139102,
|
|
"learning_rate": 3.0375112251185892e-05,
|
|
"loss": 0.9608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2491195797920227,
|
|
"step": 464,
|
|
"valid_targets_mean": 16192.8,
|
|
"valid_targets_min": 15459
|
|
},
|
|
{
|
|
"epoch": 1.9808102345415777,
|
|
"grad_norm": 0.020936012661782778,
|
|
"learning_rate": 3.0324246850201527e-05,
|
|
"loss": 1.0112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30640310049057007,
|
|
"step": 465,
|
|
"valid_targets_mean": 16023.2,
|
|
"valid_targets_min": 15159
|
|
},
|
|
{
|
|
"epoch": 1.9850746268656716,
|
|
"grad_norm": 0.02061142866565183,
|
|
"learning_rate": 3.0273290246488732e-05,
|
|
"loss": 0.9597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21289363503456116,
|
|
"step": 466,
|
|
"valid_targets_mean": 14998.0,
|
|
"valid_targets_min": 13279
|
|
},
|
|
{
|
|
"epoch": 1.9893390191897655,
|
|
"grad_norm": 0.021422494247876768,
|
|
"learning_rate": 3.0222242890189904e-05,
|
|
"loss": 0.9889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26937663555145264,
|
|
"step": 467,
|
|
"valid_targets_mean": 16135.7,
|
|
"valid_targets_min": 15209
|
|
},
|
|
{
|
|
"epoch": 1.9936034115138592,
|
|
"grad_norm": 0.021512551706130475,
|
|
"learning_rate": 3.017110523224914e-05,
|
|
"loss": 0.9606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2252834588289261,
|
|
"step": 468,
|
|
"valid_targets_mean": 13144.4,
|
|
"valid_targets_min": 1432
|
|
},
|
|
{
|
|
"epoch": 1.997867803837953,
|
|
"grad_norm": 0.020270203062543474,
|
|
"learning_rate": 3.011987772440825e-05,
|
|
"loss": 0.9745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23703861236572266,
|
|
"step": 469,
|
|
"valid_targets_mean": 15427.8,
|
|
"valid_targets_min": 14254
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 0.03107220842492135,
|
|
"learning_rate": 3.006856081920277e-05,
|
|
"loss": 0.974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41704708337783813,
|
|
"step": 470,
|
|
"valid_targets_mean": 10768.1,
|
|
"valid_targets_min": 1584
|
|
},
|
|
{
|
|
"epoch": 2.0042643923240937,
|
|
"grad_norm": 0.020555656303956275,
|
|
"learning_rate": 3.001715496995793e-05,
|
|
"loss": 0.9444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22971820831298828,
|
|
"step": 471,
|
|
"valid_targets_mean": 16168.0,
|
|
"valid_targets_min": 15613
|
|
},
|
|
{
|
|
"epoch": 2.008528784648188,
|
|
"grad_norm": 0.022710318369144018,
|
|
"learning_rate": 2.9965660630784715e-05,
|
|
"loss": 0.9471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2741541266441345,
|
|
"step": 472,
|
|
"valid_targets_mean": 16136.2,
|
|
"valid_targets_min": 14780
|
|
},
|
|
{
|
|
"epoch": 2.0127931769722816,
|
|
"grad_norm": 0.02276428647674756,
|
|
"learning_rate": 2.9914078256575782e-05,
|
|
"loss": 0.9266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.180230051279068,
|
|
"step": 473,
|
|
"valid_targets_mean": 11938.6,
|
|
"valid_targets_min": 7465
|
|
},
|
|
{
|
|
"epoch": 2.0170575692963753,
|
|
"grad_norm": 0.02139686048402706,
|
|
"learning_rate": 2.9862408303001492e-05,
|
|
"loss": 0.9351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26803261041641235,
|
|
"step": 474,
|
|
"valid_targets_mean": 16158.6,
|
|
"valid_targets_min": 15623
|
|
},
|
|
{
|
|
"epoch": 2.021321961620469,
|
|
"grad_norm": 0.021084443829887653,
|
|
"learning_rate": 2.9810651226505875e-05,
|
|
"loss": 0.9207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.275611937046051,
|
|
"step": 475,
|
|
"valid_targets_mean": 16164.1,
|
|
"valid_targets_min": 14909
|
|
},
|
|
{
|
|
"epoch": 2.025586353944563,
|
|
"grad_norm": 0.020411151078450606,
|
|
"learning_rate": 2.9758807484302566e-05,
|
|
"loss": 0.9161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19949185848236084,
|
|
"step": 476,
|
|
"valid_targets_mean": 14284.3,
|
|
"valid_targets_min": 7924
|
|
},
|
|
{
|
|
"epoch": 2.029850746268657,
|
|
"grad_norm": 0.021414723496428034,
|
|
"learning_rate": 2.9706877534370822e-05,
|
|
"loss": 0.9384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2464442402124405,
|
|
"step": 477,
|
|
"valid_targets_mean": 16226.5,
|
|
"valid_targets_min": 15170
|
|
},
|
|
{
|
|
"epoch": 2.0341151385927505,
|
|
"grad_norm": 0.021729045176301816,
|
|
"learning_rate": 2.965486183545142e-05,
|
|
"loss": 0.9812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23386169970035553,
|
|
"step": 478,
|
|
"valid_targets_mean": 13049.3,
|
|
"valid_targets_min": 1700
|
|
},
|
|
{
|
|
"epoch": 2.038379530916844,
|
|
"grad_norm": 0.020174283488739064,
|
|
"learning_rate": 2.9602760847042645e-05,
|
|
"loss": 0.9211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2219517081975937,
|
|
"step": 479,
|
|
"valid_targets_mean": 15446.1,
|
|
"valid_targets_min": 13522
|
|
},
|
|
{
|
|
"epoch": 2.0426439232409384,
|
|
"grad_norm": 0.020838866672505323,
|
|
"learning_rate": 2.955057502939621e-05,
|
|
"loss": 0.976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26795539259910583,
|
|
"step": 480,
|
|
"valid_targets_mean": 16064.9,
|
|
"valid_targets_min": 12434
|
|
},
|
|
{
|
|
"epoch": 2.046908315565032,
|
|
"grad_norm": 0.021028462664413414,
|
|
"learning_rate": 2.9498304843513193e-05,
|
|
"loss": 0.9349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14952436089515686,
|
|
"step": 481,
|
|
"valid_targets_mean": 8832.5,
|
|
"valid_targets_min": 1417
|
|
},
|
|
{
|
|
"epoch": 2.0511727078891258,
|
|
"grad_norm": 0.01923742771936596,
|
|
"learning_rate": 2.9445950751139957e-05,
|
|
"loss": 0.9137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22533057630062103,
|
|
"step": 482,
|
|
"valid_targets_mean": 16138.0,
|
|
"valid_targets_min": 15381
|
|
},
|
|
{
|
|
"epoch": 2.0554371002132195,
|
|
"grad_norm": 0.02158674381812705,
|
|
"learning_rate": 2.939351321476412e-05,
|
|
"loss": 0.9449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2821138799190521,
|
|
"step": 483,
|
|
"valid_targets_mean": 16204.2,
|
|
"valid_targets_min": 15579
|
|
},
|
|
{
|
|
"epoch": 2.0597014925373136,
|
|
"grad_norm": 0.021450997112912464,
|
|
"learning_rate": 2.9340992697610393e-05,
|
|
"loss": 1.0024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20173928141593933,
|
|
"step": 484,
|
|
"valid_targets_mean": 12728.2,
|
|
"valid_targets_min": 7862
|
|
},
|
|
{
|
|
"epoch": 2.0639658848614073,
|
|
"grad_norm": 0.020909112580006148,
|
|
"learning_rate": 2.9288389663636537e-05,
|
|
"loss": 0.9574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24707570672035217,
|
|
"step": 485,
|
|
"valid_targets_mean": 16075.2,
|
|
"valid_targets_min": 14433
|
|
},
|
|
{
|
|
"epoch": 2.068230277185501,
|
|
"grad_norm": 0.02070424874775031,
|
|
"learning_rate": 2.923570457752925e-05,
|
|
"loss": 0.9243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28818607330322266,
|
|
"step": 486,
|
|
"valid_targets_mean": 16155.3,
|
|
"valid_targets_min": 15196
|
|
},
|
|
{
|
|
"epoch": 2.0724946695095947,
|
|
"grad_norm": 0.019797942138168795,
|
|
"learning_rate": 2.9182937904700078e-05,
|
|
"loss": 0.9327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21175923943519592,
|
|
"step": 487,
|
|
"valid_targets_mean": 14070.6,
|
|
"valid_targets_min": 11507
|
|
},
|
|
{
|
|
"epoch": 2.076759061833689,
|
|
"grad_norm": 0.02091302190204063,
|
|
"learning_rate": 2.9130090111281278e-05,
|
|
"loss": 0.9235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2537364959716797,
|
|
"step": 488,
|
|
"valid_targets_mean": 16156.8,
|
|
"valid_targets_min": 15552
|
|
},
|
|
{
|
|
"epoch": 2.0810234541577826,
|
|
"grad_norm": 0.022215063945469727,
|
|
"learning_rate": 2.9077161664121722e-05,
|
|
"loss": 0.9541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28326186537742615,
|
|
"step": 489,
|
|
"valid_targets_mean": 16040.2,
|
|
"valid_targets_min": 15193
|
|
},
|
|
{
|
|
"epoch": 2.0852878464818763,
|
|
"grad_norm": 0.019577516576906755,
|
|
"learning_rate": 2.902415303078275e-05,
|
|
"loss": 0.9298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20384737849235535,
|
|
"step": 490,
|
|
"valid_targets_mean": 15403.3,
|
|
"valid_targets_min": 13863
|
|
},
|
|
{
|
|
"epoch": 2.08955223880597,
|
|
"grad_norm": 0.02065884978098361,
|
|
"learning_rate": 2.8971064679534072e-05,
|
|
"loss": 0.9467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2595246732234955,
|
|
"step": 491,
|
|
"valid_targets_mean": 16078.2,
|
|
"valid_targets_min": 14085
|
|
},
|
|
{
|
|
"epoch": 2.093816631130064,
|
|
"grad_norm": 0.020347451322210656,
|
|
"learning_rate": 2.8917897079349604e-05,
|
|
"loss": 0.928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1920347809791565,
|
|
"step": 492,
|
|
"valid_targets_mean": 10591.6,
|
|
"valid_targets_min": 2118
|
|
},
|
|
{
|
|
"epoch": 2.098081023454158,
|
|
"grad_norm": 0.01970442407326144,
|
|
"learning_rate": 2.8864650699903336e-05,
|
|
"loss": 0.935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22991949319839478,
|
|
"step": 493,
|
|
"valid_targets_mean": 15522.6,
|
|
"valid_targets_min": 13481
|
|
},
|
|
{
|
|
"epoch": 2.1023454157782515,
|
|
"grad_norm": 0.020057089030357887,
|
|
"learning_rate": 2.881132601156518e-05,
|
|
"loss": 0.9271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2747058868408203,
|
|
"step": 494,
|
|
"valid_targets_mean": 16232.2,
|
|
"valid_targets_min": 15573
|
|
},
|
|
{
|
|
"epoch": 2.106609808102345,
|
|
"grad_norm": 0.021363839494572063,
|
|
"learning_rate": 2.8757923485396805e-05,
|
|
"loss": 0.9581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14810623228549957,
|
|
"step": 495,
|
|
"valid_targets_mean": 8089.1,
|
|
"valid_targets_min": 1805
|
|
},
|
|
{
|
|
"epoch": 2.1108742004264394,
|
|
"grad_norm": 0.01870595564896765,
|
|
"learning_rate": 2.8704443593147517e-05,
|
|
"loss": 0.9734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25529754161834717,
|
|
"step": 496,
|
|
"valid_targets_mean": 16061.9,
|
|
"valid_targets_min": 15278
|
|
},
|
|
{
|
|
"epoch": 2.115138592750533,
|
|
"grad_norm": 0.021579089006772058,
|
|
"learning_rate": 2.8650886807250024e-05,
|
|
"loss": 0.9796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2897915840148926,
|
|
"step": 497,
|
|
"valid_targets_mean": 16109.8,
|
|
"valid_targets_min": 14260
|
|
},
|
|
{
|
|
"epoch": 2.1194029850746268,
|
|
"grad_norm": 0.021146579150242078,
|
|
"learning_rate": 2.8597253600816332e-05,
|
|
"loss": 0.9704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19916190207004547,
|
|
"step": 498,
|
|
"valid_targets_mean": 12072.0,
|
|
"valid_targets_min": 10292
|
|
},
|
|
{
|
|
"epoch": 2.1236673773987205,
|
|
"grad_norm": 0.02150705084751759,
|
|
"learning_rate": 2.8543544447633517e-05,
|
|
"loss": 0.9765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2635442614555359,
|
|
"step": 499,
|
|
"valid_targets_mean": 16097.1,
|
|
"valid_targets_min": 14180
|
|
},
|
|
{
|
|
"epoch": 2.1279317697228146,
|
|
"grad_norm": 0.02301213686871947,
|
|
"learning_rate": 2.8489759822159558e-05,
|
|
"loss": 1.0116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3111782670021057,
|
|
"step": 500,
|
|
"valid_targets_mean": 15909.6,
|
|
"valid_targets_min": 10715
|
|
},
|
|
{
|
|
"epoch": 2.1321961620469083,
|
|
"grad_norm": 0.0200870279699286,
|
|
"learning_rate": 2.843590019951914e-05,
|
|
"loss": 0.9416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20290786027908325,
|
|
"step": 501,
|
|
"valid_targets_mean": 13677.8,
|
|
"valid_targets_min": 11562
|
|
},
|
|
{
|
|
"epoch": 2.136460554371002,
|
|
"grad_norm": 0.02095417440419239,
|
|
"learning_rate": 2.838196605549948e-05,
|
|
"loss": 0.9805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28576016426086426,
|
|
"step": 502,
|
|
"valid_targets_mean": 16169.4,
|
|
"valid_targets_min": 15369
|
|
},
|
|
{
|
|
"epoch": 2.140724946695096,
|
|
"grad_norm": 0.023457111199059168,
|
|
"learning_rate": 2.8327957866546082e-05,
|
|
"loss": 1.0128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25298795104026794,
|
|
"step": 503,
|
|
"valid_targets_mean": 12852.8,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 2.14498933901919,
|
|
"grad_norm": 0.02030739575755605,
|
|
"learning_rate": 2.8273876109758568e-05,
|
|
"loss": 0.9619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2229728400707245,
|
|
"step": 504,
|
|
"valid_targets_mean": 15186.3,
|
|
"valid_targets_min": 14004
|
|
},
|
|
{
|
|
"epoch": 2.1492537313432836,
|
|
"grad_norm": 0.021272160899551563,
|
|
"learning_rate": 2.8219721262886427e-05,
|
|
"loss": 0.9621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28996384143829346,
|
|
"step": 505,
|
|
"valid_targets_mean": 16063.7,
|
|
"valid_targets_min": 13599
|
|
},
|
|
{
|
|
"epoch": 2.1535181236673773,
|
|
"grad_norm": 0.020806055720319832,
|
|
"learning_rate": 2.816549380432483e-05,
|
|
"loss": 0.9263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17190763354301453,
|
|
"step": 506,
|
|
"valid_targets_mean": 9596.2,
|
|
"valid_targets_min": 1786
|
|
},
|
|
{
|
|
"epoch": 2.1577825159914714,
|
|
"grad_norm": 0.019239550072487448,
|
|
"learning_rate": 2.8111194213110386e-05,
|
|
"loss": 0.9397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.229374498128891,
|
|
"step": 507,
|
|
"valid_targets_mean": 16192.3,
|
|
"valid_targets_min": 15655
|
|
},
|
|
{
|
|
"epoch": 2.162046908315565,
|
|
"grad_norm": 0.02116347050335003,
|
|
"learning_rate": 2.805682296891691e-05,
|
|
"loss": 0.9768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2867833375930786,
|
|
"step": 508,
|
|
"valid_targets_mean": 16157.7,
|
|
"valid_targets_min": 15589
|
|
},
|
|
{
|
|
"epoch": 2.166311300639659,
|
|
"grad_norm": 0.02115773293181609,
|
|
"learning_rate": 2.8002380552051186e-05,
|
|
"loss": 0.9718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15333570539951324,
|
|
"step": 509,
|
|
"valid_targets_mean": 9651.6,
|
|
"valid_targets_min": 5678
|
|
},
|
|
{
|
|
"epoch": 2.1705756929637525,
|
|
"grad_norm": 0.021402262716235097,
|
|
"learning_rate": 2.7947867443448728e-05,
|
|
"loss": 0.9236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22939561307430267,
|
|
"step": 510,
|
|
"valid_targets_mean": 16216.6,
|
|
"valid_targets_min": 15345
|
|
},
|
|
{
|
|
"epoch": 2.1748400852878467,
|
|
"grad_norm": 0.021603736652016557,
|
|
"learning_rate": 2.789328412466953e-05,
|
|
"loss": 0.9749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2781219184398651,
|
|
"step": 511,
|
|
"valid_targets_mean": 16177.4,
|
|
"valid_targets_min": 15241
|
|
},
|
|
{
|
|
"epoch": 2.1791044776119404,
|
|
"grad_norm": 0.021901079326660678,
|
|
"learning_rate": 2.7838631077893813e-05,
|
|
"loss": 0.9546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20535436272621155,
|
|
"step": 512,
|
|
"valid_targets_mean": 12776.2,
|
|
"valid_targets_min": 10267
|
|
},
|
|
{
|
|
"epoch": 2.183368869936034,
|
|
"grad_norm": 0.021099291352594435,
|
|
"learning_rate": 2.7783908785917753e-05,
|
|
"loss": 0.9615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2695735692977905,
|
|
"step": 513,
|
|
"valid_targets_mean": 16067.2,
|
|
"valid_targets_min": 14869
|
|
},
|
|
{
|
|
"epoch": 2.1876332622601278,
|
|
"grad_norm": 0.02126264771879672,
|
|
"learning_rate": 2.7729117732149244e-05,
|
|
"loss": 0.9826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27854466438293457,
|
|
"step": 514,
|
|
"valid_targets_mean": 16073.2,
|
|
"valid_targets_min": 14855
|
|
},
|
|
{
|
|
"epoch": 2.191897654584222,
|
|
"grad_norm": 0.019500726065137412,
|
|
"learning_rate": 2.7674258400603587e-05,
|
|
"loss": 0.9403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21826767921447754,
|
|
"step": 515,
|
|
"valid_targets_mean": 14610.6,
|
|
"valid_targets_min": 12859
|
|
},
|
|
{
|
|
"epoch": 2.1961620469083156,
|
|
"grad_norm": 0.020820283106571,
|
|
"learning_rate": 2.761933127589927e-05,
|
|
"loss": 0.9972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27011042833328247,
|
|
"step": 516,
|
|
"valid_targets_mean": 16221.2,
|
|
"valid_targets_min": 15490
|
|
},
|
|
{
|
|
"epoch": 2.2004264392324093,
|
|
"grad_norm": 0.02142825177783525,
|
|
"learning_rate": 2.7564336843253633e-05,
|
|
"loss": 0.9392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19509926438331604,
|
|
"step": 517,
|
|
"valid_targets_mean": 11061.2,
|
|
"valid_targets_min": 1432
|
|
},
|
|
{
|
|
"epoch": 2.204690831556503,
|
|
"grad_norm": 0.01953006183486877,
|
|
"learning_rate": 2.7509275588478606e-05,
|
|
"loss": 0.9552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24030128121376038,
|
|
"step": 518,
|
|
"valid_targets_mean": 16005.6,
|
|
"valid_targets_min": 15251
|
|
},
|
|
{
|
|
"epoch": 2.208955223880597,
|
|
"grad_norm": 0.020606438368493938,
|
|
"learning_rate": 2.7454147997976404e-05,
|
|
"loss": 0.9525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2878165543079376,
|
|
"step": 519,
|
|
"valid_targets_mean": 16152.1,
|
|
"valid_targets_min": 15581
|
|
},
|
|
{
|
|
"epoch": 2.213219616204691,
|
|
"grad_norm": 0.01954628017283462,
|
|
"learning_rate": 2.7398954558735272e-05,
|
|
"loss": 0.9225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1442563384771347,
|
|
"step": 520,
|
|
"valid_targets_mean": 9104.9,
|
|
"valid_targets_min": 2794
|
|
},
|
|
{
|
|
"epoch": 2.2174840085287846,
|
|
"grad_norm": 0.021312052032646828,
|
|
"learning_rate": 2.7343695758325125e-05,
|
|
"loss": 0.9529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2236306220293045,
|
|
"step": 521,
|
|
"valid_targets_mean": 15920.8,
|
|
"valid_targets_min": 15022
|
|
},
|
|
{
|
|
"epoch": 2.2217484008528783,
|
|
"grad_norm": 0.021024506537541548,
|
|
"learning_rate": 2.7288372084893282e-05,
|
|
"loss": 0.9287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2794550061225891,
|
|
"step": 522,
|
|
"valid_targets_mean": 16113.2,
|
|
"valid_targets_min": 14663
|
|
},
|
|
{
|
|
"epoch": 2.2260127931769724,
|
|
"grad_norm": 0.020083960335147564,
|
|
"learning_rate": 2.7232984027160126e-05,
|
|
"loss": 0.9335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20333027839660645,
|
|
"step": 523,
|
|
"valid_targets_mean": 12710.2,
|
|
"valid_targets_min": 10493
|
|
},
|
|
{
|
|
"epoch": 2.230277185501066,
|
|
"grad_norm": 0.020099374876447283,
|
|
"learning_rate": 2.7177532074414822e-05,
|
|
"loss": 0.9721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2657122015953064,
|
|
"step": 524,
|
|
"valid_targets_mean": 16065.7,
|
|
"valid_targets_min": 14847
|
|
},
|
|
{
|
|
"epoch": 2.23454157782516,
|
|
"grad_norm": 0.023043498944526702,
|
|
"learning_rate": 2.712201671651094e-05,
|
|
"loss": 0.9535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2862994372844696,
|
|
"step": 525,
|
|
"valid_targets_mean": 16133.7,
|
|
"valid_targets_min": 14235
|
|
},
|
|
{
|
|
"epoch": 2.2388059701492535,
|
|
"grad_norm": 0.020735489009625012,
|
|
"learning_rate": 2.7066438443862205e-05,
|
|
"loss": 0.9884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20684762299060822,
|
|
"step": 526,
|
|
"valid_targets_mean": 13734.0,
|
|
"valid_targets_min": 11587
|
|
},
|
|
{
|
|
"epoch": 2.2430703624733477,
|
|
"grad_norm": 0.021814817563452084,
|
|
"learning_rate": 2.701079774743808e-05,
|
|
"loss": 0.9903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2793634533882141,
|
|
"step": 527,
|
|
"valid_targets_mean": 16159.2,
|
|
"valid_targets_min": 15175
|
|
},
|
|
{
|
|
"epoch": 2.2473347547974414,
|
|
"grad_norm": 0.02096979075914662,
|
|
"learning_rate": 2.6955095118759496e-05,
|
|
"loss": 0.9795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21442270278930664,
|
|
"step": 528,
|
|
"valid_targets_mean": 13231.1,
|
|
"valid_targets_min": 2058
|
|
},
|
|
{
|
|
"epoch": 2.251599147121535,
|
|
"grad_norm": 0.020119310668993386,
|
|
"learning_rate": 2.689933104989447e-05,
|
|
"loss": 0.9445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21956714987754822,
|
|
"step": 529,
|
|
"valid_targets_mean": 15414.6,
|
|
"valid_targets_min": 13757
|
|
},
|
|
{
|
|
"epoch": 2.2558635394456292,
|
|
"grad_norm": 0.022408594169135845,
|
|
"learning_rate": 2.6843506033453777e-05,
|
|
"loss": 0.9998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2824423611164093,
|
|
"step": 530,
|
|
"valid_targets_mean": 16063.4,
|
|
"valid_targets_min": 14235
|
|
},
|
|
{
|
|
"epoch": 2.260127931769723,
|
|
"grad_norm": 0.021572264502927967,
|
|
"learning_rate": 2.6787620562586587e-05,
|
|
"loss": 0.9541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15836024284362793,
|
|
"step": 531,
|
|
"valid_targets_mean": 9366.7,
|
|
"valid_targets_min": 3116
|
|
},
|
|
{
|
|
"epoch": 2.2643923240938166,
|
|
"grad_norm": 0.018901445271205075,
|
|
"learning_rate": 2.673167513097613e-05,
|
|
"loss": 0.9342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23982523381710052,
|
|
"step": 532,
|
|
"valid_targets_mean": 16116.0,
|
|
"valid_targets_min": 15469
|
|
},
|
|
{
|
|
"epoch": 2.2686567164179103,
|
|
"grad_norm": 0.020224388563996016,
|
|
"learning_rate": 2.6675670232835297e-05,
|
|
"loss": 0.9131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26109081506729126,
|
|
"step": 533,
|
|
"valid_targets_mean": 16048.3,
|
|
"valid_targets_min": 12838
|
|
},
|
|
{
|
|
"epoch": 2.272921108742004,
|
|
"grad_norm": 0.022282015890021604,
|
|
"learning_rate": 2.661960636290231e-05,
|
|
"loss": 0.9611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1686350256204605,
|
|
"step": 534,
|
|
"valid_targets_mean": 10608.0,
|
|
"valid_targets_min": 6015
|
|
},
|
|
{
|
|
"epoch": 2.277185501066098,
|
|
"grad_norm": 0.021490525740730668,
|
|
"learning_rate": 2.6563484016436346e-05,
|
|
"loss": 0.9565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24342119693756104,
|
|
"step": 535,
|
|
"valid_targets_mean": 16171.2,
|
|
"valid_targets_min": 15227
|
|
},
|
|
{
|
|
"epoch": 2.281449893390192,
|
|
"grad_norm": 0.021075186816034656,
|
|
"learning_rate": 2.6507303689213143e-05,
|
|
"loss": 1.0201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2897166609764099,
|
|
"step": 536,
|
|
"valid_targets_mean": 16170.9,
|
|
"valid_targets_min": 15568
|
|
},
|
|
{
|
|
"epoch": 2.2857142857142856,
|
|
"grad_norm": 0.019545007125288862,
|
|
"learning_rate": 2.6451065877520634e-05,
|
|
"loss": 0.9112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19772477447986603,
|
|
"step": 537,
|
|
"valid_targets_mean": 13365.8,
|
|
"valid_targets_min": 11075
|
|
},
|
|
{
|
|
"epoch": 2.2899786780383797,
|
|
"grad_norm": 0.021454221291919814,
|
|
"learning_rate": 2.639477107815455e-05,
|
|
"loss": 0.9872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2626629173755646,
|
|
"step": 538,
|
|
"valid_targets_mean": 16159.4,
|
|
"valid_targets_min": 15276
|
|
},
|
|
{
|
|
"epoch": 2.2942430703624734,
|
|
"grad_norm": 0.021825009536238442,
|
|
"learning_rate": 2.633841978841406e-05,
|
|
"loss": 0.9489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26567715406417847,
|
|
"step": 539,
|
|
"valid_targets_mean": 16152.2,
|
|
"valid_targets_min": 14349
|
|
},
|
|
{
|
|
"epoch": 2.298507462686567,
|
|
"grad_norm": 0.020180849242492156,
|
|
"learning_rate": 2.6282012506097347e-05,
|
|
"loss": 0.9207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21605639159679413,
|
|
"step": 540,
|
|
"valid_targets_mean": 14502.9,
|
|
"valid_targets_min": 12752
|
|
},
|
|
{
|
|
"epoch": 2.302771855010661,
|
|
"grad_norm": 0.02137826384582926,
|
|
"learning_rate": 2.622554972949724e-05,
|
|
"loss": 0.9768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26718878746032715,
|
|
"step": 541,
|
|
"valid_targets_mean": 16050.8,
|
|
"valid_targets_min": 13582
|
|
},
|
|
{
|
|
"epoch": 2.307036247334755,
|
|
"grad_norm": 0.020877745208291408,
|
|
"learning_rate": 2.6169031957396778e-05,
|
|
"loss": 0.947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19049867987632751,
|
|
"step": 542,
|
|
"valid_targets_mean": 11036.0,
|
|
"valid_targets_min": 2594
|
|
},
|
|
{
|
|
"epoch": 2.3113006396588487,
|
|
"grad_norm": 0.02114282930193323,
|
|
"learning_rate": 2.611245968906482e-05,
|
|
"loss": 0.9857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23663903772830963,
|
|
"step": 543,
|
|
"valid_targets_mean": 15935.0,
|
|
"valid_targets_min": 15241
|
|
},
|
|
{
|
|
"epoch": 2.3155650319829424,
|
|
"grad_norm": 0.02122929343718855,
|
|
"learning_rate": 2.605583342425165e-05,
|
|
"loss": 0.915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2644209861755371,
|
|
"step": 544,
|
|
"valid_targets_mean": 16109.6,
|
|
"valid_targets_min": 15159
|
|
},
|
|
{
|
|
"epoch": 2.319829424307036,
|
|
"grad_norm": 0.02134512383982228,
|
|
"learning_rate": 2.5999153663184546e-05,
|
|
"loss": 0.9499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17039459943771362,
|
|
"step": 545,
|
|
"valid_targets_mean": 9200.4,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 2.3240938166311302,
|
|
"grad_norm": 0.01937197035223608,
|
|
"learning_rate": 2.594242090656335e-05,
|
|
"loss": 0.9718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2464725375175476,
|
|
"step": 546,
|
|
"valid_targets_mean": 16155.6,
|
|
"valid_targets_min": 15337
|
|
},
|
|
{
|
|
"epoch": 2.328358208955224,
|
|
"grad_norm": 0.02211793927730315,
|
|
"learning_rate": 2.5885635655556075e-05,
|
|
"loss": 0.9318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2739616334438324,
|
|
"step": 547,
|
|
"valid_targets_mean": 16111.8,
|
|
"valid_targets_min": 14133
|
|
},
|
|
{
|
|
"epoch": 2.3326226012793176,
|
|
"grad_norm": 0.020647261622257083,
|
|
"learning_rate": 2.5828798411794443e-05,
|
|
"loss": 0.978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20479679107666016,
|
|
"step": 548,
|
|
"valid_targets_mean": 12415.0,
|
|
"valid_targets_min": 9393
|
|
},
|
|
{
|
|
"epoch": 2.3368869936034113,
|
|
"grad_norm": 0.02019478808008014,
|
|
"learning_rate": 2.5771909677369484e-05,
|
|
"loss": 0.9251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2515043616294861,
|
|
"step": 549,
|
|
"valid_targets_mean": 16268.8,
|
|
"valid_targets_min": 15853
|
|
},
|
|
{
|
|
"epoch": 2.3411513859275055,
|
|
"grad_norm": 0.020133326550717992,
|
|
"learning_rate": 2.571496995482709e-05,
|
|
"loss": 0.9267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.254985511302948,
|
|
"step": 550,
|
|
"valid_targets_mean": 16094.1,
|
|
"valid_targets_min": 14851
|
|
},
|
|
{
|
|
"epoch": 2.345415778251599,
|
|
"grad_norm": 0.019755528754209373,
|
|
"learning_rate": 2.565797974716357e-05,
|
|
"loss": 0.906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1945338249206543,
|
|
"step": 551,
|
|
"valid_targets_mean": 13947.7,
|
|
"valid_targets_min": 12127
|
|
},
|
|
{
|
|
"epoch": 2.349680170575693,
|
|
"grad_norm": 0.02211247299070169,
|
|
"learning_rate": 2.5600939557821205e-05,
|
|
"loss": 0.9801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2859010696411133,
|
|
"step": 552,
|
|
"valid_targets_mean": 16076.7,
|
|
"valid_targets_min": 14583
|
|
},
|
|
{
|
|
"epoch": 2.3539445628997866,
|
|
"grad_norm": 0.02035999410208659,
|
|
"learning_rate": 2.5543849890683813e-05,
|
|
"loss": 0.9342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2348511815071106,
|
|
"step": 553,
|
|
"valid_targets_mean": 13212.6,
|
|
"valid_targets_min": 3238
|
|
},
|
|
{
|
|
"epoch": 2.3582089552238807,
|
|
"grad_norm": 0.02000467942529742,
|
|
"learning_rate": 2.548671125007229e-05,
|
|
"loss": 0.9399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23307286202907562,
|
|
"step": 554,
|
|
"valid_targets_mean": 15229.8,
|
|
"valid_targets_min": 13097
|
|
},
|
|
{
|
|
"epoch": 2.3624733475479744,
|
|
"grad_norm": 0.021246013580197794,
|
|
"learning_rate": 2.5429524140740155e-05,
|
|
"loss": 0.9448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2713664174079895,
|
|
"step": 555,
|
|
"valid_targets_mean": 16190.5,
|
|
"valid_targets_min": 15620
|
|
},
|
|
{
|
|
"epoch": 2.366737739872068,
|
|
"grad_norm": 0.0220592439987313,
|
|
"learning_rate": 2.537228906786908e-05,
|
|
"loss": 0.9779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15300004184246063,
|
|
"step": 556,
|
|
"valid_targets_mean": 8599.8,
|
|
"valid_targets_min": 1308
|
|
},
|
|
{
|
|
"epoch": 2.3710021321961623,
|
|
"grad_norm": 0.019020679415237163,
|
|
"learning_rate": 2.5315006537064473e-05,
|
|
"loss": 0.9492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23588436841964722,
|
|
"step": 557,
|
|
"valid_targets_mean": 16018.4,
|
|
"valid_targets_min": 15241
|
|
},
|
|
{
|
|
"epoch": 2.375266524520256,
|
|
"grad_norm": 0.021006803549067758,
|
|
"learning_rate": 2.5257677054350927e-05,
|
|
"loss": 0.9436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29828566312789917,
|
|
"step": 558,
|
|
"valid_targets_mean": 16092.2,
|
|
"valid_targets_min": 12838
|
|
},
|
|
{
|
|
"epoch": 2.3795309168443497,
|
|
"grad_norm": 0.020611007975174817,
|
|
"learning_rate": 2.5200301126167857e-05,
|
|
"loss": 0.9094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17389297485351562,
|
|
"step": 559,
|
|
"valid_targets_mean": 11642.2,
|
|
"valid_targets_min": 7939
|
|
},
|
|
{
|
|
"epoch": 2.3837953091684434,
|
|
"grad_norm": 0.019917242084490513,
|
|
"learning_rate": 2.514287925936492e-05,
|
|
"loss": 0.9406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2526880204677582,
|
|
"step": 560,
|
|
"valid_targets_mean": 16051.5,
|
|
"valid_targets_min": 13682
|
|
},
|
|
{
|
|
"epoch": 2.388059701492537,
|
|
"grad_norm": 0.019998710531143882,
|
|
"learning_rate": 2.5085411961197626e-05,
|
|
"loss": 0.9575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28107404708862305,
|
|
"step": 561,
|
|
"valid_targets_mean": 16121.9,
|
|
"valid_targets_min": 15255
|
|
},
|
|
{
|
|
"epoch": 2.3923240938166312,
|
|
"grad_norm": 0.020156166074275596,
|
|
"learning_rate": 2.502789973932278e-05,
|
|
"loss": 0.9462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20902875065803528,
|
|
"step": 562,
|
|
"valid_targets_mean": 13494.1,
|
|
"valid_targets_min": 10723
|
|
},
|
|
{
|
|
"epoch": 2.396588486140725,
|
|
"grad_norm": 0.022026276796829974,
|
|
"learning_rate": 2.4970343101794073e-05,
|
|
"loss": 0.9667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27942579984664917,
|
|
"step": 563,
|
|
"valid_targets_mean": 16119.4,
|
|
"valid_targets_min": 15584
|
|
},
|
|
{
|
|
"epoch": 2.4008528784648187,
|
|
"grad_norm": 0.020657884988344078,
|
|
"learning_rate": 2.4912742557057538e-05,
|
|
"loss": 0.9197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.253190815448761,
|
|
"step": 564,
|
|
"valid_targets_mean": 16256.9,
|
|
"valid_targets_min": 15941
|
|
},
|
|
{
|
|
"epoch": 2.405117270788913,
|
|
"grad_norm": 0.019661374459667442,
|
|
"learning_rate": 2.485509861394708e-05,
|
|
"loss": 0.9422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21603262424468994,
|
|
"step": 565,
|
|
"valid_targets_mean": 15189.3,
|
|
"valid_targets_min": 13312
|
|
},
|
|
{
|
|
"epoch": 2.4093816631130065,
|
|
"grad_norm": 0.021771557597127292,
|
|
"learning_rate": 2.4797411781679975e-05,
|
|
"loss": 0.9338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2672555446624756,
|
|
"step": 566,
|
|
"valid_targets_mean": 16077.6,
|
|
"valid_targets_min": 13863
|
|
},
|
|
{
|
|
"epoch": 2.4136460554371,
|
|
"grad_norm": 0.021082391852363925,
|
|
"learning_rate": 2.473968256985238e-05,
|
|
"loss": 0.9974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21012061834335327,
|
|
"step": 567,
|
|
"valid_targets_mean": 11164.2,
|
|
"valid_targets_min": 2935
|
|
},
|
|
{
|
|
"epoch": 2.417910447761194,
|
|
"grad_norm": 0.01921686150355562,
|
|
"learning_rate": 2.4681911488434825e-05,
|
|
"loss": 0.8965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20908506214618683,
|
|
"step": 568,
|
|
"valid_targets_mean": 15729.7,
|
|
"valid_targets_min": 15008
|
|
},
|
|
{
|
|
"epoch": 2.4221748400852876,
|
|
"grad_norm": 0.020682866980511094,
|
|
"learning_rate": 2.4624099047767702e-05,
|
|
"loss": 0.916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2736762762069702,
|
|
"step": 569,
|
|
"valid_targets_mean": 16151.6,
|
|
"valid_targets_min": 13757
|
|
},
|
|
{
|
|
"epoch": 2.4264392324093818,
|
|
"grad_norm": 0.02113434842859691,
|
|
"learning_rate": 2.4566245758556787e-05,
|
|
"loss": 0.9482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16383413970470428,
|
|
"step": 570,
|
|
"valid_targets_mean": 9363.4,
|
|
"valid_targets_min": 2035
|
|
},
|
|
{
|
|
"epoch": 2.4307036247334755,
|
|
"grad_norm": 0.019608334782669802,
|
|
"learning_rate": 2.4508352131868664e-05,
|
|
"loss": 0.9641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23756608366966248,
|
|
"step": 571,
|
|
"valid_targets_mean": 16093.5,
|
|
"valid_targets_min": 14935
|
|
},
|
|
{
|
|
"epoch": 2.434968017057569,
|
|
"grad_norm": 0.021145329284488056,
|
|
"learning_rate": 2.445041867912629e-05,
|
|
"loss": 0.9772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.283695250749588,
|
|
"step": 572,
|
|
"valid_targets_mean": 16157.8,
|
|
"valid_targets_min": 15228
|
|
},
|
|
{
|
|
"epoch": 2.4392324093816633,
|
|
"grad_norm": 0.02068998811225493,
|
|
"learning_rate": 2.439244591210443e-05,
|
|
"loss": 0.9338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20451420545578003,
|
|
"step": 573,
|
|
"valid_targets_mean": 12390.7,
|
|
"valid_targets_min": 9741
|
|
},
|
|
{
|
|
"epoch": 2.443496801705757,
|
|
"grad_norm": 0.019899765042694712,
|
|
"learning_rate": 2.4334434342925133e-05,
|
|
"loss": 0.9802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2609851360321045,
|
|
"step": 574,
|
|
"valid_targets_mean": 16110.2,
|
|
"valid_targets_min": 14171
|
|
},
|
|
{
|
|
"epoch": 2.4477611940298507,
|
|
"grad_norm": 0.021612603402519217,
|
|
"learning_rate": 2.4276384484053227e-05,
|
|
"loss": 0.9893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28183862566947937,
|
|
"step": 575,
|
|
"valid_targets_mean": 16109.1,
|
|
"valid_targets_min": 15485
|
|
},
|
|
{
|
|
"epoch": 2.4520255863539444,
|
|
"grad_norm": 0.019343107341355845,
|
|
"learning_rate": 2.4218296848291795e-05,
|
|
"loss": 0.9611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23051133751869202,
|
|
"step": 576,
|
|
"valid_targets_mean": 14613.6,
|
|
"valid_targets_min": 12701
|
|
},
|
|
{
|
|
"epoch": 2.4562899786780386,
|
|
"grad_norm": 0.02115173734322528,
|
|
"learning_rate": 2.4160171948777603e-05,
|
|
"loss": 0.9432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26665574312210083,
|
|
"step": 577,
|
|
"valid_targets_mean": 16135.8,
|
|
"valid_targets_min": 14984
|
|
},
|
|
{
|
|
"epoch": 2.4605543710021323,
|
|
"grad_norm": 0.021779267424463743,
|
|
"learning_rate": 2.410201029897665e-05,
|
|
"loss": 0.9791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2305983603000641,
|
|
"step": 578,
|
|
"valid_targets_mean": 12771.2,
|
|
"valid_targets_min": 1269
|
|
},
|
|
{
|
|
"epoch": 2.464818763326226,
|
|
"grad_norm": 0.01947093371131984,
|
|
"learning_rate": 2.4043812412679532e-05,
|
|
"loss": 0.9538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2295411229133606,
|
|
"step": 579,
|
|
"valid_targets_mean": 14939.6,
|
|
"valid_targets_min": 13337
|
|
},
|
|
{
|
|
"epoch": 2.4690831556503197,
|
|
"grad_norm": 0.021484180698247552,
|
|
"learning_rate": 2.3985578803996985e-05,
|
|
"loss": 0.9434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26420867443084717,
|
|
"step": 580,
|
|
"valid_targets_mean": 16261.2,
|
|
"valid_targets_min": 15566
|
|
},
|
|
{
|
|
"epoch": 2.473347547974414,
|
|
"grad_norm": 0.020482800473745993,
|
|
"learning_rate": 2.392730998735529e-05,
|
|
"loss": 0.9529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16380247473716736,
|
|
"step": 581,
|
|
"valid_targets_mean": 10233.6,
|
|
"valid_targets_min": 1795
|
|
},
|
|
{
|
|
"epoch": 2.4776119402985075,
|
|
"grad_norm": 0.020204934117455067,
|
|
"learning_rate": 2.3869006477491755e-05,
|
|
"loss": 0.9971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24780839681625366,
|
|
"step": 582,
|
|
"valid_targets_mean": 15966.7,
|
|
"valid_targets_min": 14552
|
|
},
|
|
{
|
|
"epoch": 2.481876332622601,
|
|
"grad_norm": 0.021468970425988523,
|
|
"learning_rate": 2.381066878945017e-05,
|
|
"loss": 0.9079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2740297317504883,
|
|
"step": 583,
|
|
"valid_targets_mean": 16125.4,
|
|
"valid_targets_min": 15119
|
|
},
|
|
{
|
|
"epoch": 2.486140724946695,
|
|
"grad_norm": 0.020612754506297133,
|
|
"learning_rate": 2.3752297438576257e-05,
|
|
"loss": 0.957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17317865788936615,
|
|
"step": 584,
|
|
"valid_targets_mean": 10548.6,
|
|
"valid_targets_min": 6478
|
|
},
|
|
{
|
|
"epoch": 2.490405117270789,
|
|
"grad_norm": 0.020676279521831733,
|
|
"learning_rate": 2.3693892940513074e-05,
|
|
"loss": 0.9719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25092077255249023,
|
|
"step": 585,
|
|
"valid_targets_mean": 16169.0,
|
|
"valid_targets_min": 15241
|
|
},
|
|
{
|
|
"epoch": 2.4946695095948828,
|
|
"grad_norm": 0.02163998136688837,
|
|
"learning_rate": 2.3635455811196536e-05,
|
|
"loss": 0.9428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28742730617523193,
|
|
"step": 586,
|
|
"valid_targets_mean": 16154.1,
|
|
"valid_targets_min": 15660
|
|
},
|
|
{
|
|
"epoch": 2.4989339019189765,
|
|
"grad_norm": 0.0196117723077694,
|
|
"learning_rate": 2.3576986566850796e-05,
|
|
"loss": 0.9563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21040606498718262,
|
|
"step": 587,
|
|
"valid_targets_mean": 14049.0,
|
|
"valid_targets_min": 11844
|
|
},
|
|
{
|
|
"epoch": 2.50319829424307,
|
|
"grad_norm": 0.02055214475374906,
|
|
"learning_rate": 2.351848572398371e-05,
|
|
"loss": 0.9067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24326270818710327,
|
|
"step": 588,
|
|
"valid_targets_mean": 16253.8,
|
|
"valid_targets_min": 15645
|
|
},
|
|
{
|
|
"epoch": 2.5074626865671643,
|
|
"grad_norm": 0.021597749338657844,
|
|
"learning_rate": 2.3459953799382276e-05,
|
|
"loss": 0.9115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.251641184091568,
|
|
"step": 589,
|
|
"valid_targets_mean": 16216.8,
|
|
"valid_targets_min": 15241
|
|
},
|
|
{
|
|
"epoch": 2.511727078891258,
|
|
"grad_norm": 0.0198232981267047,
|
|
"learning_rate": 2.3401391310108054e-05,
|
|
"loss": 0.9226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22280138731002808,
|
|
"step": 590,
|
|
"valid_targets_mean": 15148.3,
|
|
"valid_targets_min": 11078
|
|
},
|
|
{
|
|
"epoch": 2.5159914712153517,
|
|
"grad_norm": 0.02204337539375846,
|
|
"learning_rate": 2.3342798773492602e-05,
|
|
"loss": 0.9295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2639668583869934,
|
|
"step": 591,
|
|
"valid_targets_mean": 16135.6,
|
|
"valid_targets_min": 14923
|
|
},
|
|
{
|
|
"epoch": 2.520255863539446,
|
|
"grad_norm": 0.0224011995027163,
|
|
"learning_rate": 2.328417670713294e-05,
|
|
"loss": 0.9287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1819169670343399,
|
|
"step": 592,
|
|
"valid_targets_mean": 10641.2,
|
|
"valid_targets_min": 1429
|
|
},
|
|
{
|
|
"epoch": 2.5245202558635396,
|
|
"grad_norm": 0.019766548267904624,
|
|
"learning_rate": 2.3225525628886918e-05,
|
|
"loss": 0.9512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23753896355628967,
|
|
"step": 593,
|
|
"valid_targets_mean": 16034.8,
|
|
"valid_targets_min": 15147
|
|
},
|
|
{
|
|
"epoch": 2.5287846481876333,
|
|
"grad_norm": 0.02076031020982299,
|
|
"learning_rate": 2.3166846056868687e-05,
|
|
"loss": 0.9692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2983801066875458,
|
|
"step": 594,
|
|
"valid_targets_mean": 16113.4,
|
|
"valid_targets_min": 15182
|
|
},
|
|
{
|
|
"epoch": 2.533049040511727,
|
|
"grad_norm": 0.022420739900169722,
|
|
"learning_rate": 2.31081385094441e-05,
|
|
"loss": 0.9439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13448143005371094,
|
|
"step": 595,
|
|
"valid_targets_mean": 7960.1,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 2.5373134328358207,
|
|
"grad_norm": 0.01954986247065215,
|
|
"learning_rate": 2.304940350522615e-05,
|
|
"loss": 0.9505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22509190440177917,
|
|
"step": 596,
|
|
"valid_targets_mean": 16137.4,
|
|
"valid_targets_min": 14044
|
|
},
|
|
{
|
|
"epoch": 2.541577825159915,
|
|
"grad_norm": 0.02180974404696445,
|
|
"learning_rate": 2.299064156307037e-05,
|
|
"loss": 0.9539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2770749032497406,
|
|
"step": 597,
|
|
"valid_targets_mean": 16072.3,
|
|
"valid_targets_min": 13650
|
|
},
|
|
{
|
|
"epoch": 2.5458422174840085,
|
|
"grad_norm": 0.01947287723084805,
|
|
"learning_rate": 2.2931853202070275e-05,
|
|
"loss": 0.9604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20471599698066711,
|
|
"step": 598,
|
|
"valid_targets_mean": 13140.9,
|
|
"valid_targets_min": 9423
|
|
},
|
|
{
|
|
"epoch": 2.550106609808102,
|
|
"grad_norm": 0.01934844174916304,
|
|
"learning_rate": 2.2873038941552724e-05,
|
|
"loss": 0.9076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2484910488128662,
|
|
"step": 599,
|
|
"valid_targets_mean": 16083.8,
|
|
"valid_targets_min": 13369
|
|
},
|
|
{
|
|
"epoch": 2.5543710021321964,
|
|
"grad_norm": 0.020984641118317007,
|
|
"learning_rate": 2.2814199301073412e-05,
|
|
"loss": 0.9745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28177356719970703,
|
|
"step": 600,
|
|
"valid_targets_mean": 16108.1,
|
|
"valid_targets_min": 12845
|
|
},
|
|
{
|
|
"epoch": 2.55863539445629,
|
|
"grad_norm": 0.019713543345793876,
|
|
"learning_rate": 2.27553348004122e-05,
|
|
"loss": 0.9628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22127491235733032,
|
|
"step": 601,
|
|
"valid_targets_mean": 14025.5,
|
|
"valid_targets_min": 7847
|
|
},
|
|
{
|
|
"epoch": 2.5628997867803838,
|
|
"grad_norm": 0.020962274749127992,
|
|
"learning_rate": 2.2696445959568577e-05,
|
|
"loss": 0.964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28617075085639954,
|
|
"step": 602,
|
|
"valid_targets_mean": 16065.5,
|
|
"valid_targets_min": 14907
|
|
},
|
|
{
|
|
"epoch": 2.5671641791044775,
|
|
"grad_norm": 0.020739601890762033,
|
|
"learning_rate": 2.2637533298757064e-05,
|
|
"loss": 0.9549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22186589241027832,
|
|
"step": 603,
|
|
"valid_targets_mean": 13228.7,
|
|
"valid_targets_min": 1918
|
|
},
|
|
{
|
|
"epoch": 2.571428571428571,
|
|
"grad_norm": 0.01934178591389983,
|
|
"learning_rate": 2.2578597338402567e-05,
|
|
"loss": 0.9521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2261563092470169,
|
|
"step": 604,
|
|
"valid_targets_mean": 15912.4,
|
|
"valid_targets_min": 15341
|
|
},
|
|
{
|
|
"epoch": 2.5756929637526653,
|
|
"grad_norm": 0.021237342601929672,
|
|
"learning_rate": 2.2519638599135844e-05,
|
|
"loss": 1.0041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2658895254135132,
|
|
"step": 605,
|
|
"valid_targets_mean": 16110.1,
|
|
"valid_targets_min": 14286
|
|
},
|
|
{
|
|
"epoch": 2.579957356076759,
|
|
"grad_norm": 0.022450537284325344,
|
|
"learning_rate": 2.2460657601788875e-05,
|
|
"loss": 0.9527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17862333357334137,
|
|
"step": 606,
|
|
"valid_targets_mean": 9186.5,
|
|
"valid_targets_min": 1944
|
|
},
|
|
{
|
|
"epoch": 2.5842217484008527,
|
|
"grad_norm": 0.01987780733711434,
|
|
"learning_rate": 2.2401654867390256e-05,
|
|
"loss": 0.9434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22894319891929626,
|
|
"step": 607,
|
|
"valid_targets_mean": 16210.4,
|
|
"valid_targets_min": 15611
|
|
},
|
|
{
|
|
"epoch": 2.588486140724947,
|
|
"grad_norm": 0.020345595663217322,
|
|
"learning_rate": 2.2342630917160605e-05,
|
|
"loss": 0.9576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28595733642578125,
|
|
"step": 608,
|
|
"valid_targets_mean": 15969.8,
|
|
"valid_targets_min": 12180
|
|
},
|
|
{
|
|
"epoch": 2.5927505330490406,
|
|
"grad_norm": 0.019921569592067957,
|
|
"learning_rate": 2.2283586272507975e-05,
|
|
"loss": 0.9388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18368935585021973,
|
|
"step": 609,
|
|
"valid_targets_mean": 12055.6,
|
|
"valid_targets_min": 8433
|
|
},
|
|
{
|
|
"epoch": 2.5970149253731343,
|
|
"grad_norm": 0.019753515692936954,
|
|
"learning_rate": 2.2224521455023193e-05,
|
|
"loss": 0.9518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25831496715545654,
|
|
"step": 610,
|
|
"valid_targets_mean": 16070.4,
|
|
"valid_targets_min": 13812
|
|
},
|
|
{
|
|
"epoch": 2.6012793176972284,
|
|
"grad_norm": 0.0240567653905672,
|
|
"learning_rate": 2.216543698647534e-05,
|
|
"loss": 0.9342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.293610155582428,
|
|
"step": 611,
|
|
"valid_targets_mean": 16128.1,
|
|
"valid_targets_min": 15016
|
|
},
|
|
{
|
|
"epoch": 2.605543710021322,
|
|
"grad_norm": 0.02027280547373075,
|
|
"learning_rate": 2.210633338880704e-05,
|
|
"loss": 0.9714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21246719360351562,
|
|
"step": 612,
|
|
"valid_targets_mean": 13523.2,
|
|
"valid_targets_min": 11323
|
|
},
|
|
{
|
|
"epoch": 2.609808102345416,
|
|
"grad_norm": 0.021660445658961257,
|
|
"learning_rate": 2.204721118412994e-05,
|
|
"loss": 0.996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3006371259689331,
|
|
"step": 613,
|
|
"valid_targets_mean": 16097.7,
|
|
"valid_targets_min": 15129
|
|
},
|
|
{
|
|
"epoch": 2.6140724946695095,
|
|
"grad_norm": 0.02050499594983497,
|
|
"learning_rate": 2.1988070894720037e-05,
|
|
"loss": 0.9443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2875359058380127,
|
|
"step": 614,
|
|
"valid_targets_mean": 16174.8,
|
|
"valid_targets_min": 15421
|
|
},
|
|
{
|
|
"epoch": 2.6183368869936032,
|
|
"grad_norm": 0.019079724506911186,
|
|
"learning_rate": 2.192891304301309e-05,
|
|
"loss": 0.9762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21451911330223083,
|
|
"step": 615,
|
|
"valid_targets_mean": 14380.2,
|
|
"valid_targets_min": 11927
|
|
},
|
|
{
|
|
"epoch": 2.6226012793176974,
|
|
"grad_norm": 0.021365015574036184,
|
|
"learning_rate": 2.18697381516e-05,
|
|
"loss": 0.9578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2637201249599457,
|
|
"step": 616,
|
|
"valid_targets_mean": 16225.0,
|
|
"valid_targets_min": 15710
|
|
},
|
|
{
|
|
"epoch": 2.626865671641791,
|
|
"grad_norm": 0.022959396057696545,
|
|
"learning_rate": 2.181054674322221e-05,
|
|
"loss": 0.9297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1822773814201355,
|
|
"step": 617,
|
|
"valid_targets_mean": 10140.2,
|
|
"valid_targets_min": 1847
|
|
},
|
|
{
|
|
"epoch": 2.631130063965885,
|
|
"grad_norm": 0.019592001514572376,
|
|
"learning_rate": 2.1751339340767043e-05,
|
|
"loss": 0.9091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24763141572475433,
|
|
"step": 618,
|
|
"valid_targets_mean": 15976.2,
|
|
"valid_targets_min": 15194
|
|
},
|
|
{
|
|
"epoch": 2.635394456289979,
|
|
"grad_norm": 0.022676433687634837,
|
|
"learning_rate": 2.169211646726313e-05,
|
|
"loss": 0.992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2986225187778473,
|
|
"step": 619,
|
|
"valid_targets_mean": 16007.5,
|
|
"valid_targets_min": 14018
|
|
},
|
|
{
|
|
"epoch": 2.6396588486140726,
|
|
"grad_norm": 0.020307466385644426,
|
|
"learning_rate": 2.163287864587576e-05,
|
|
"loss": 0.9362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14452718198299408,
|
|
"step": 620,
|
|
"valid_targets_mean": 8315.2,
|
|
"valid_targets_min": 2369
|
|
},
|
|
{
|
|
"epoch": 2.6439232409381663,
|
|
"grad_norm": 0.018943329756775956,
|
|
"learning_rate": 2.157362639990229e-05,
|
|
"loss": 0.9136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22754719853401184,
|
|
"step": 621,
|
|
"valid_targets_mean": 16077.3,
|
|
"valid_targets_min": 14036
|
|
},
|
|
{
|
|
"epoch": 2.64818763326226,
|
|
"grad_norm": 0.021647400277637306,
|
|
"learning_rate": 2.151436025276747e-05,
|
|
"loss": 0.9435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2887853980064392,
|
|
"step": 622,
|
|
"valid_targets_mean": 16142.0,
|
|
"valid_targets_min": 14202
|
|
},
|
|
{
|
|
"epoch": 2.6524520255863537,
|
|
"grad_norm": 0.019428160094080122,
|
|
"learning_rate": 2.145508072801888e-05,
|
|
"loss": 0.9716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2235621213912964,
|
|
"step": 623,
|
|
"valid_targets_mean": 13619.5,
|
|
"valid_targets_min": 10539
|
|
},
|
|
{
|
|
"epoch": 2.656716417910448,
|
|
"grad_norm": 0.020950015778347146,
|
|
"learning_rate": 2.1395788349322256e-05,
|
|
"loss": 0.9739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24686001241207123,
|
|
"step": 624,
|
|
"valid_targets_mean": 16119.5,
|
|
"valid_targets_min": 14260
|
|
},
|
|
{
|
|
"epoch": 2.6609808102345416,
|
|
"grad_norm": 0.021415391285557597,
|
|
"learning_rate": 2.133648364045689e-05,
|
|
"loss": 0.9387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.280288964509964,
|
|
"step": 625,
|
|
"valid_targets_mean": 16086.8,
|
|
"valid_targets_min": 14645
|
|
},
|
|
{
|
|
"epoch": 2.6652452025586353,
|
|
"grad_norm": 0.020853173612708862,
|
|
"learning_rate": 2.1277167125310996e-05,
|
|
"loss": 0.9608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2431098222732544,
|
|
"step": 626,
|
|
"valid_targets_mean": 15166.2,
|
|
"valid_targets_min": 13667
|
|
},
|
|
{
|
|
"epoch": 2.6695095948827294,
|
|
"grad_norm": 0.020565597988398392,
|
|
"learning_rate": 2.1217839327877098e-05,
|
|
"loss": 0.9416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2570705711841583,
|
|
"step": 627,
|
|
"valid_targets_mean": 16185.3,
|
|
"valid_targets_min": 15278
|
|
},
|
|
{
|
|
"epoch": 2.673773987206823,
|
|
"grad_norm": 0.021869989351923382,
|
|
"learning_rate": 2.1158500772247352e-05,
|
|
"loss": 0.9715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24044756591320038,
|
|
"step": 628,
|
|
"valid_targets_mean": 13055.9,
|
|
"valid_targets_min": 1601
|
|
},
|
|
{
|
|
"epoch": 2.678038379530917,
|
|
"grad_norm": 0.020652325317233666,
|
|
"learning_rate": 2.1099151982608985e-05,
|
|
"loss": 0.9766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2334204614162445,
|
|
"step": 629,
|
|
"valid_targets_mean": 15772.8,
|
|
"valid_targets_min": 14820
|
|
},
|
|
{
|
|
"epoch": 2.6823027718550105,
|
|
"grad_norm": 0.02147554084577331,
|
|
"learning_rate": 2.1039793483239607e-05,
|
|
"loss": 0.9912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2767207622528076,
|
|
"step": 630,
|
|
"valid_targets_mean": 16128.1,
|
|
"valid_targets_min": 15170
|
|
},
|
|
{
|
|
"epoch": 2.6865671641791042,
|
|
"grad_norm": 0.020934678192748368,
|
|
"learning_rate": 2.0980425798502616e-05,
|
|
"loss": 0.9259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15157610177993774,
|
|
"step": 631,
|
|
"valid_targets_mean": 8067.6,
|
|
"valid_targets_min": 1405
|
|
},
|
|
{
|
|
"epoch": 2.6908315565031984,
|
|
"grad_norm": 0.01952986313386112,
|
|
"learning_rate": 2.092104945284255e-05,
|
|
"loss": 0.9203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22488640248775482,
|
|
"step": 632,
|
|
"valid_targets_mean": 15833.1,
|
|
"valid_targets_min": 14915
|
|
},
|
|
{
|
|
"epoch": 2.695095948827292,
|
|
"grad_norm": 0.02092183967107276,
|
|
"learning_rate": 2.0861664970780434e-05,
|
|
"loss": 0.9317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2733396887779236,
|
|
"step": 633,
|
|
"valid_targets_mean": 16074.4,
|
|
"valid_targets_min": 13005
|
|
},
|
|
{
|
|
"epoch": 2.699360341151386,
|
|
"grad_norm": 0.020748495055080023,
|
|
"learning_rate": 2.08022728769092e-05,
|
|
"loss": 0.968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1905984729528427,
|
|
"step": 634,
|
|
"valid_targets_mean": 10732.6,
|
|
"valid_targets_min": 6568
|
|
},
|
|
{
|
|
"epoch": 2.70362473347548,
|
|
"grad_norm": 0.021200553910354178,
|
|
"learning_rate": 2.0742873695889005e-05,
|
|
"loss": 1.0218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25567522644996643,
|
|
"step": 635,
|
|
"valid_targets_mean": 15941.3,
|
|
"valid_targets_min": 13077
|
|
},
|
|
{
|
|
"epoch": 2.7078891257995736,
|
|
"grad_norm": 0.021773236066182103,
|
|
"learning_rate": 2.0683467952442626e-05,
|
|
"loss": 0.9256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2763078510761261,
|
|
"step": 636,
|
|
"valid_targets_mean": 16160.9,
|
|
"valid_targets_min": 15033
|
|
},
|
|
{
|
|
"epoch": 2.7121535181236673,
|
|
"grad_norm": 0.02029683173222719,
|
|
"learning_rate": 2.0624056171350785e-05,
|
|
"loss": 0.9585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2073425054550171,
|
|
"step": 637,
|
|
"valid_targets_mean": 13388.1,
|
|
"valid_targets_min": 10981
|
|
},
|
|
{
|
|
"epoch": 2.716417910447761,
|
|
"grad_norm": 0.022284498089516092,
|
|
"learning_rate": 2.0564638877447566e-05,
|
|
"loss": 0.9635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2874143421649933,
|
|
"step": 638,
|
|
"valid_targets_mean": 16091.8,
|
|
"valid_targets_min": 14033
|
|
},
|
|
{
|
|
"epoch": 2.7206823027718547,
|
|
"grad_norm": 0.022719687987602735,
|
|
"learning_rate": 2.0505216595615742e-05,
|
|
"loss": 0.9774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29986849427223206,
|
|
"step": 639,
|
|
"valid_targets_mean": 16127.6,
|
|
"valid_targets_min": 14773
|
|
},
|
|
{
|
|
"epoch": 2.724946695095949,
|
|
"grad_norm": 0.01900979271569081,
|
|
"learning_rate": 2.044578985078215e-05,
|
|
"loss": 0.9602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22792106866836548,
|
|
"step": 640,
|
|
"valid_targets_mean": 15363.4,
|
|
"valid_targets_min": 14134
|
|
},
|
|
{
|
|
"epoch": 2.7292110874200426,
|
|
"grad_norm": 0.02312726878018071,
|
|
"learning_rate": 2.0386359167913046e-05,
|
|
"loss": 0.9659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2774304747581482,
|
|
"step": 641,
|
|
"valid_targets_mean": 16115.0,
|
|
"valid_targets_min": 14670
|
|
},
|
|
{
|
|
"epoch": 2.7334754797441363,
|
|
"grad_norm": 0.023181424000067865,
|
|
"learning_rate": 2.0326925072009485e-05,
|
|
"loss": 1.0093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1740213930606842,
|
|
"step": 642,
|
|
"valid_targets_mean": 10474.2,
|
|
"valid_targets_min": 1209
|
|
},
|
|
{
|
|
"epoch": 2.7377398720682304,
|
|
"grad_norm": 0.01916014456181609,
|
|
"learning_rate": 2.0267488088102657e-05,
|
|
"loss": 0.9846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21843719482421875,
|
|
"step": 643,
|
|
"valid_targets_mean": 15896.8,
|
|
"valid_targets_min": 15071
|
|
},
|
|
{
|
|
"epoch": 2.742004264392324,
|
|
"grad_norm": 0.02061734097762422,
|
|
"learning_rate": 2.0208048741249288e-05,
|
|
"loss": 0.9303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2768741250038147,
|
|
"step": 644,
|
|
"valid_targets_mean": 16211.3,
|
|
"valid_targets_min": 15430
|
|
},
|
|
{
|
|
"epoch": 2.746268656716418,
|
|
"grad_norm": 0.020604886894584538,
|
|
"learning_rate": 2.014860755652695e-05,
|
|
"loss": 0.9254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15698102116584778,
|
|
"step": 645,
|
|
"valid_targets_mean": 9507.4,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 2.750533049040512,
|
|
"grad_norm": 0.019318719060371862,
|
|
"learning_rate": 2.0089165059029477e-05,
|
|
"loss": 0.9769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2272557020187378,
|
|
"step": 646,
|
|
"valid_targets_mean": 16218.2,
|
|
"valid_targets_min": 15608
|
|
},
|
|
{
|
|
"epoch": 2.7547974413646057,
|
|
"grad_norm": 0.021993531996547227,
|
|
"learning_rate": 2.0029721773862277e-05,
|
|
"loss": 0.9259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2600211501121521,
|
|
"step": 647,
|
|
"valid_targets_mean": 15960.8,
|
|
"valid_targets_min": 9185
|
|
},
|
|
{
|
|
"epoch": 2.7590618336886994,
|
|
"grad_norm": 0.020245710484209232,
|
|
"learning_rate": 1.997027822613773e-05,
|
|
"loss": 0.9193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18499726057052612,
|
|
"step": 648,
|
|
"valid_targets_mean": 12939.8,
|
|
"valid_targets_min": 9861
|
|
},
|
|
{
|
|
"epoch": 2.763326226012793,
|
|
"grad_norm": 0.02015037768551208,
|
|
"learning_rate": 1.9910834940970533e-05,
|
|
"loss": 1.0207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2872370481491089,
|
|
"step": 649,
|
|
"valid_targets_mean": 16154.9,
|
|
"valid_targets_min": 15435
|
|
},
|
|
{
|
|
"epoch": 2.767590618336887,
|
|
"grad_norm": 0.021004690939682383,
|
|
"learning_rate": 1.985139244347305e-05,
|
|
"loss": 0.9149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2695764899253845,
|
|
"step": 650,
|
|
"valid_targets_mean": 16160.9,
|
|
"valid_targets_min": 15099
|
|
},
|
|
{
|
|
"epoch": 2.771855010660981,
|
|
"grad_norm": 0.02056591989251542,
|
|
"learning_rate": 1.979195125875072e-05,
|
|
"loss": 0.9607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21235588192939758,
|
|
"step": 651,
|
|
"valid_targets_mean": 14259.2,
|
|
"valid_targets_min": 12433
|
|
},
|
|
{
|
|
"epoch": 2.7761194029850746,
|
|
"grad_norm": 0.02175091927504376,
|
|
"learning_rate": 1.9732511911897353e-05,
|
|
"loss": 0.9638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2766125798225403,
|
|
"step": 652,
|
|
"valid_targets_mean": 16035.8,
|
|
"valid_targets_min": 14192
|
|
},
|
|
{
|
|
"epoch": 2.7803837953091683,
|
|
"grad_norm": 0.02148975527261959,
|
|
"learning_rate": 1.9673074927990525e-05,
|
|
"loss": 0.9501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22407793998718262,
|
|
"step": 653,
|
|
"valid_targets_mean": 12639.2,
|
|
"valid_targets_min": 1144
|
|
},
|
|
{
|
|
"epoch": 2.7846481876332625,
|
|
"grad_norm": 0.018987391983864843,
|
|
"learning_rate": 1.9613640832086957e-05,
|
|
"loss": 0.9165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22934819757938385,
|
|
"step": 654,
|
|
"valid_targets_mean": 16169.5,
|
|
"valid_targets_min": 15639
|
|
},
|
|
{
|
|
"epoch": 2.788912579957356,
|
|
"grad_norm": 0.020166697291842,
|
|
"learning_rate": 1.9554210149217855e-05,
|
|
"loss": 0.9248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25665080547332764,
|
|
"step": 655,
|
|
"valid_targets_mean": 16046.8,
|
|
"valid_targets_min": 13764
|
|
},
|
|
{
|
|
"epoch": 2.79317697228145,
|
|
"grad_norm": 0.021099641167832296,
|
|
"learning_rate": 1.9494783404384265e-05,
|
|
"loss": 0.9623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18255582451820374,
|
|
"step": 656,
|
|
"valid_targets_mean": 9703.1,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 2.7974413646055436,
|
|
"grad_norm": 0.019031454366234962,
|
|
"learning_rate": 1.9435361122552437e-05,
|
|
"loss": 0.913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22038620710372925,
|
|
"step": 657,
|
|
"valid_targets_mean": 16022.1,
|
|
"valid_targets_min": 14923
|
|
},
|
|
{
|
|
"epoch": 2.8017057569296373,
|
|
"grad_norm": 0.02251873009979076,
|
|
"learning_rate": 1.9375943828649215e-05,
|
|
"loss": 0.9666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2816320061683655,
|
|
"step": 658,
|
|
"valid_targets_mean": 16101.4,
|
|
"valid_targets_min": 14899
|
|
},
|
|
{
|
|
"epoch": 2.8059701492537314,
|
|
"grad_norm": 0.020425465452093602,
|
|
"learning_rate": 1.9316532047557378e-05,
|
|
"loss": 0.9275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18331874907016754,
|
|
"step": 659,
|
|
"valid_targets_mean": 11431.4,
|
|
"valid_targets_min": 8642
|
|
},
|
|
{
|
|
"epoch": 2.810234541577825,
|
|
"grad_norm": 0.019711870655534557,
|
|
"learning_rate": 1.9257126304110998e-05,
|
|
"loss": 0.9282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23805047571659088,
|
|
"step": 660,
|
|
"valid_targets_mean": 16144.6,
|
|
"valid_targets_min": 15327
|
|
},
|
|
{
|
|
"epoch": 2.814498933901919,
|
|
"grad_norm": 0.020979804748688477,
|
|
"learning_rate": 1.919772712309081e-05,
|
|
"loss": 0.9405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2641265094280243,
|
|
"step": 661,
|
|
"valid_targets_mean": 16216.1,
|
|
"valid_targets_min": 15639
|
|
},
|
|
{
|
|
"epoch": 2.818763326226013,
|
|
"grad_norm": 0.02041458634575585,
|
|
"learning_rate": 1.9138335029219572e-05,
|
|
"loss": 0.9429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21489760279655457,
|
|
"step": 662,
|
|
"valid_targets_mean": 13697.8,
|
|
"valid_targets_min": 12002
|
|
},
|
|
{
|
|
"epoch": 2.8230277185501067,
|
|
"grad_norm": 0.020703989249956197,
|
|
"learning_rate": 1.9078950547157458e-05,
|
|
"loss": 0.9708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2665814757347107,
|
|
"step": 663,
|
|
"valid_targets_mean": 15934.8,
|
|
"valid_targets_min": 13697
|
|
},
|
|
{
|
|
"epoch": 2.8272921108742004,
|
|
"grad_norm": 0.020906970385614034,
|
|
"learning_rate": 1.9019574201497387e-05,
|
|
"loss": 0.9881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30490222573280334,
|
|
"step": 664,
|
|
"valid_targets_mean": 16069.9,
|
|
"valid_targets_min": 14516
|
|
},
|
|
{
|
|
"epoch": 2.831556503198294,
|
|
"grad_norm": 0.021319683040076767,
|
|
"learning_rate": 1.8960206516760396e-05,
|
|
"loss": 0.9506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2197318822145462,
|
|
"step": 665,
|
|
"valid_targets_mean": 15389.6,
|
|
"valid_targets_min": 13997
|
|
},
|
|
{
|
|
"epoch": 2.835820895522388,
|
|
"grad_norm": 0.020415046250898395,
|
|
"learning_rate": 1.890084801739102e-05,
|
|
"loss": 0.9178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2545510530471802,
|
|
"step": 666,
|
|
"valid_targets_mean": 16108.9,
|
|
"valid_targets_min": 15407
|
|
},
|
|
{
|
|
"epoch": 2.840085287846482,
|
|
"grad_norm": 0.02124335534592593,
|
|
"learning_rate": 1.884149922775265e-05,
|
|
"loss": 0.9602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.206266850233078,
|
|
"step": 667,
|
|
"valid_targets_mean": 11107.2,
|
|
"valid_targets_min": 2084
|
|
},
|
|
{
|
|
"epoch": 2.8443496801705757,
|
|
"grad_norm": 0.019987655833309255,
|
|
"learning_rate": 1.878216067212291e-05,
|
|
"loss": 0.9446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23538020253181458,
|
|
"step": 668,
|
|
"valid_targets_mean": 15820.6,
|
|
"valid_targets_min": 14631
|
|
},
|
|
{
|
|
"epoch": 2.8486140724946694,
|
|
"grad_norm": 0.02095252565156662,
|
|
"learning_rate": 1.8722832874689007e-05,
|
|
"loss": 0.9594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2542591989040375,
|
|
"step": 669,
|
|
"valid_targets_mean": 16204.2,
|
|
"valid_targets_min": 15767
|
|
},
|
|
{
|
|
"epoch": 2.8528784648187635,
|
|
"grad_norm": 0.02189919928275816,
|
|
"learning_rate": 1.8663516359543123e-05,
|
|
"loss": 0.9498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1358579397201538,
|
|
"step": 670,
|
|
"valid_targets_mean": 8031.1,
|
|
"valid_targets_min": 1210
|
|
},
|
|
{
|
|
"epoch": 2.857142857142857,
|
|
"grad_norm": 0.020422279231803522,
|
|
"learning_rate": 1.860421165067775e-05,
|
|
"loss": 0.9461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2385379821062088,
|
|
"step": 671,
|
|
"valid_targets_mean": 16114.0,
|
|
"valid_targets_min": 14763
|
|
},
|
|
{
|
|
"epoch": 2.861407249466951,
|
|
"grad_norm": 0.023918426032924586,
|
|
"learning_rate": 1.8544919271981125e-05,
|
|
"loss": 0.9553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2831515967845917,
|
|
"step": 672,
|
|
"valid_targets_mean": 16148.1,
|
|
"valid_targets_min": 14898
|
|
},
|
|
{
|
|
"epoch": 2.8656716417910446,
|
|
"grad_norm": 0.0199046239586933,
|
|
"learning_rate": 1.8485639747232535e-05,
|
|
"loss": 0.9402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1976347267627716,
|
|
"step": 673,
|
|
"valid_targets_mean": 12766.8,
|
|
"valid_targets_min": 10054
|
|
},
|
|
{
|
|
"epoch": 2.8699360341151388,
|
|
"grad_norm": 0.022861027200618684,
|
|
"learning_rate": 1.8426373600097723e-05,
|
|
"loss": 1.0261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2633063793182373,
|
|
"step": 674,
|
|
"valid_targets_mean": 16072.1,
|
|
"valid_targets_min": 14097
|
|
},
|
|
{
|
|
"epoch": 2.8742004264392325,
|
|
"grad_norm": 0.021469609967434085,
|
|
"learning_rate": 1.836712135412424e-05,
|
|
"loss": 0.9279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26836735010147095,
|
|
"step": 675,
|
|
"valid_targets_mean": 16151.2,
|
|
"valid_targets_min": 14583
|
|
},
|
|
{
|
|
"epoch": 2.878464818763326,
|
|
"grad_norm": 0.020117397980433226,
|
|
"learning_rate": 1.8307883532736878e-05,
|
|
"loss": 0.9331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21362093091011047,
|
|
"step": 676,
|
|
"valid_targets_mean": 14840.1,
|
|
"valid_targets_min": 12977
|
|
},
|
|
{
|
|
"epoch": 2.88272921108742,
|
|
"grad_norm": 0.021113859541205846,
|
|
"learning_rate": 1.8248660659232964e-05,
|
|
"loss": 0.9431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28226548433303833,
|
|
"step": 677,
|
|
"valid_targets_mean": 16107.4,
|
|
"valid_targets_min": 15179
|
|
},
|
|
{
|
|
"epoch": 2.886993603411514,
|
|
"grad_norm": 0.022225328288671416,
|
|
"learning_rate": 1.8189453256777798e-05,
|
|
"loss": 0.8911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22605980932712555,
|
|
"step": 678,
|
|
"valid_targets_mean": 13364.0,
|
|
"valid_targets_min": 1108
|
|
},
|
|
{
|
|
"epoch": 2.8912579957356077,
|
|
"grad_norm": 0.02016428205083576,
|
|
"learning_rate": 1.8130261848399996e-05,
|
|
"loss": 0.962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2205815315246582,
|
|
"step": 679,
|
|
"valid_targets_mean": 15667.0,
|
|
"valid_targets_min": 14788
|
|
},
|
|
{
|
|
"epoch": 2.8955223880597014,
|
|
"grad_norm": 0.02162263763576656,
|
|
"learning_rate": 1.8071086956986916e-05,
|
|
"loss": 0.931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2868203818798065,
|
|
"step": 680,
|
|
"valid_targets_mean": 16121.1,
|
|
"valid_targets_min": 14849
|
|
},
|
|
{
|
|
"epoch": 2.8997867803837956,
|
|
"grad_norm": 0.021563119124291076,
|
|
"learning_rate": 1.8011929105279967e-05,
|
|
"loss": 0.9424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13803821802139282,
|
|
"step": 681,
|
|
"valid_targets_mean": 8441.8,
|
|
"valid_targets_min": 1090
|
|
},
|
|
{
|
|
"epoch": 2.9040511727078893,
|
|
"grad_norm": 0.019134782594384718,
|
|
"learning_rate": 1.795278881587007e-05,
|
|
"loss": 0.962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2419804483652115,
|
|
"step": 682,
|
|
"valid_targets_mean": 16108.9,
|
|
"valid_targets_min": 14464
|
|
},
|
|
{
|
|
"epoch": 2.908315565031983,
|
|
"grad_norm": 0.021698999833651065,
|
|
"learning_rate": 1.7893666611192962e-05,
|
|
"loss": 0.9356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2692856788635254,
|
|
"step": 683,
|
|
"valid_targets_mean": 16157.2,
|
|
"valid_targets_min": 15475
|
|
},
|
|
{
|
|
"epoch": 2.9125799573560767,
|
|
"grad_norm": 0.021305612849151192,
|
|
"learning_rate": 1.783456301352467e-05,
|
|
"loss": 0.9759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15726087987422943,
|
|
"step": 684,
|
|
"valid_targets_mean": 10471.7,
|
|
"valid_targets_min": 4321
|
|
},
|
|
{
|
|
"epoch": 2.9168443496801704,
|
|
"grad_norm": 0.02030359098539184,
|
|
"learning_rate": 1.7775478544976813e-05,
|
|
"loss": 0.9818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25104421377182007,
|
|
"step": 685,
|
|
"valid_targets_mean": 16074.8,
|
|
"valid_targets_min": 14311
|
|
},
|
|
{
|
|
"epoch": 2.9211087420042645,
|
|
"grad_norm": 0.020408168224249706,
|
|
"learning_rate": 1.7716413727492035e-05,
|
|
"loss": 0.9617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28901344537734985,
|
|
"step": 686,
|
|
"valid_targets_mean": 16121.6,
|
|
"valid_targets_min": 15011
|
|
},
|
|
{
|
|
"epoch": 2.925373134328358,
|
|
"grad_norm": 0.01947158046695922,
|
|
"learning_rate": 1.7657369082839392e-05,
|
|
"loss": 0.9726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21650640666484833,
|
|
"step": 687,
|
|
"valid_targets_mean": 13182.6,
|
|
"valid_targets_min": 10691
|
|
},
|
|
{
|
|
"epoch": 2.929637526652452,
|
|
"grad_norm": 0.020647949803064832,
|
|
"learning_rate": 1.7598345132609747e-05,
|
|
"loss": 0.948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2610868811607361,
|
|
"step": 688,
|
|
"valid_targets_mean": 16233.4,
|
|
"valid_targets_min": 15526
|
|
},
|
|
{
|
|
"epoch": 2.933901918976546,
|
|
"grad_norm": 0.021624570312573036,
|
|
"learning_rate": 1.7539342398211132e-05,
|
|
"loss": 0.9752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2955983877182007,
|
|
"step": 689,
|
|
"valid_targets_mean": 16035.1,
|
|
"valid_targets_min": 14578
|
|
},
|
|
{
|
|
"epoch": 2.9381663113006398,
|
|
"grad_norm": 0.019542862017111657,
|
|
"learning_rate": 1.748036140086416e-05,
|
|
"loss": 0.9679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23876111209392548,
|
|
"step": 690,
|
|
"valid_targets_mean": 15261.3,
|
|
"valid_targets_min": 14188
|
|
},
|
|
{
|
|
"epoch": 2.9424307036247335,
|
|
"grad_norm": 0.019907395623217004,
|
|
"learning_rate": 1.742140266159744e-05,
|
|
"loss": 0.9401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.262081503868103,
|
|
"step": 691,
|
|
"valid_targets_mean": 16066.5,
|
|
"valid_targets_min": 14943
|
|
},
|
|
{
|
|
"epoch": 2.946695095948827,
|
|
"grad_norm": 0.02110171751750327,
|
|
"learning_rate": 1.7362466701242943e-05,
|
|
"loss": 0.9523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21154263615608215,
|
|
"step": 692,
|
|
"valid_targets_mean": 11339.0,
|
|
"valid_targets_min": 2671
|
|
},
|
|
{
|
|
"epoch": 2.950959488272921,
|
|
"grad_norm": 0.019188687969013414,
|
|
"learning_rate": 1.7303554040431426e-05,
|
|
"loss": 0.9694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23878350853919983,
|
|
"step": 693,
|
|
"valid_targets_mean": 16091.2,
|
|
"valid_targets_min": 14851
|
|
},
|
|
{
|
|
"epoch": 2.955223880597015,
|
|
"grad_norm": 0.020988711482645778,
|
|
"learning_rate": 1.7244665199587812e-05,
|
|
"loss": 0.933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25067949295043945,
|
|
"step": 694,
|
|
"valid_targets_mean": 16194.9,
|
|
"valid_targets_min": 15360
|
|
},
|
|
{
|
|
"epoch": 2.9594882729211087,
|
|
"grad_norm": 0.020885432339642542,
|
|
"learning_rate": 1.7185800698926594e-05,
|
|
"loss": 0.9851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15313079953193665,
|
|
"step": 695,
|
|
"valid_targets_mean": 9268.4,
|
|
"valid_targets_min": 2424
|
|
},
|
|
{
|
|
"epoch": 2.9637526652452024,
|
|
"grad_norm": 0.02030014766978381,
|
|
"learning_rate": 1.7126961058447276e-05,
|
|
"loss": 0.9884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24736404418945312,
|
|
"step": 696,
|
|
"valid_targets_mean": 16054.0,
|
|
"valid_targets_min": 14671
|
|
},
|
|
{
|
|
"epoch": 2.9680170575692966,
|
|
"grad_norm": 0.020020143046120056,
|
|
"learning_rate": 1.706814679792973e-05,
|
|
"loss": 0.9672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2594475746154785,
|
|
"step": 697,
|
|
"valid_targets_mean": 16155.3,
|
|
"valid_targets_min": 15278
|
|
},
|
|
{
|
|
"epoch": 2.9722814498933903,
|
|
"grad_norm": 0.020663835089492773,
|
|
"learning_rate": 1.7009358436929632e-05,
|
|
"loss": 0.9721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21901918947696686,
|
|
"step": 698,
|
|
"valid_targets_mean": 12411.4,
|
|
"valid_targets_min": 9914
|
|
},
|
|
{
|
|
"epoch": 2.976545842217484,
|
|
"grad_norm": 0.020299312919622725,
|
|
"learning_rate": 1.6950596494773855e-05,
|
|
"loss": 0.99,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2518657445907593,
|
|
"step": 699,
|
|
"valid_targets_mean": 16180.1,
|
|
"valid_targets_min": 14626
|
|
},
|
|
{
|
|
"epoch": 2.9808102345415777,
|
|
"grad_norm": 0.020942951224960683,
|
|
"learning_rate": 1.6891861490555906e-05,
|
|
"loss": 0.9284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26724305748939514,
|
|
"step": 700,
|
|
"valid_targets_mean": 16123.3,
|
|
"valid_targets_min": 15289
|
|
},
|
|
{
|
|
"epoch": 2.9850746268656714,
|
|
"grad_norm": 0.01966588605719068,
|
|
"learning_rate": 1.683315394313132e-05,
|
|
"loss": 0.953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2181733250617981,
|
|
"step": 701,
|
|
"valid_targets_mean": 14826.7,
|
|
"valid_targets_min": 13241
|
|
},
|
|
{
|
|
"epoch": 2.9893390191897655,
|
|
"grad_norm": 0.020216030645912483,
|
|
"learning_rate": 1.677447437111309e-05,
|
|
"loss": 0.9519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2879751920700073,
|
|
"step": 702,
|
|
"valid_targets_mean": 16083.4,
|
|
"valid_targets_min": 14680
|
|
},
|
|
{
|
|
"epoch": 2.9936034115138592,
|
|
"grad_norm": 0.0220896464422224,
|
|
"learning_rate": 1.671582329286707e-05,
|
|
"loss": 0.9394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22348260879516602,
|
|
"step": 703,
|
|
"valid_targets_mean": 12696.6,
|
|
"valid_targets_min": 1374
|
|
},
|
|
{
|
|
"epoch": 2.997867803837953,
|
|
"grad_norm": 0.019321692819932266,
|
|
"learning_rate": 1.66572012265074e-05,
|
|
"loss": 0.9485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21988409757614136,
|
|
"step": 704,
|
|
"valid_targets_mean": 15441.7,
|
|
"valid_targets_min": 14762
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 0.029762277325649716,
|
|
"learning_rate": 1.6598608689891953e-05,
|
|
"loss": 0.931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45550447702407837,
|
|
"step": 705,
|
|
"valid_targets_mean": 11316.6,
|
|
"valid_targets_min": 3048
|
|
},
|
|
{
|
|
"epoch": 3.0042643923240937,
|
|
"grad_norm": 0.019559360353045837,
|
|
"learning_rate": 1.654004620061773e-05,
|
|
"loss": 0.9611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23371516168117523,
|
|
"step": 706,
|
|
"valid_targets_mean": 16102.5,
|
|
"valid_targets_min": 14451
|
|
},
|
|
{
|
|
"epoch": 3.008528784648188,
|
|
"grad_norm": 0.020640856811458698,
|
|
"learning_rate": 1.6481514276016297e-05,
|
|
"loss": 0.9397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25629952549934387,
|
|
"step": 707,
|
|
"valid_targets_mean": 15997.8,
|
|
"valid_targets_min": 10621
|
|
},
|
|
{
|
|
"epoch": 3.0127931769722816,
|
|
"grad_norm": 0.01955401765745733,
|
|
"learning_rate": 1.6423013433149207e-05,
|
|
"loss": 0.9309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2040502429008484,
|
|
"step": 708,
|
|
"valid_targets_mean": 13259.4,
|
|
"valid_targets_min": 10474
|
|
},
|
|
{
|
|
"epoch": 3.0170575692963753,
|
|
"grad_norm": 0.02022462905164568,
|
|
"learning_rate": 1.636454418880347e-05,
|
|
"loss": 0.937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25201746821403503,
|
|
"step": 709,
|
|
"valid_targets_mean": 16183.6,
|
|
"valid_targets_min": 14902
|
|
},
|
|
{
|
|
"epoch": 3.021321961620469,
|
|
"grad_norm": 0.021207766944594485,
|
|
"learning_rate": 1.630610705948693e-05,
|
|
"loss": 0.9255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26892566680908203,
|
|
"step": 710,
|
|
"valid_targets_mean": 16187.1,
|
|
"valid_targets_min": 15589
|
|
},
|
|
{
|
|
"epoch": 3.025586353944563,
|
|
"grad_norm": 0.019438021140446456,
|
|
"learning_rate": 1.6247702561423753e-05,
|
|
"loss": 0.9565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20520862936973572,
|
|
"step": 711,
|
|
"valid_targets_mean": 13321.3,
|
|
"valid_targets_min": 11538
|
|
},
|
|
{
|
|
"epoch": 3.029850746268657,
|
|
"grad_norm": 0.0207994530288978,
|
|
"learning_rate": 1.6189331210549828e-05,
|
|
"loss": 0.9862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2983323633670807,
|
|
"step": 712,
|
|
"valid_targets_mean": 16095.5,
|
|
"valid_targets_min": 15232
|
|
},
|
|
{
|
|
"epoch": 3.0341151385927505,
|
|
"grad_norm": 0.022060722218969413,
|
|
"learning_rate": 1.613099352250825e-05,
|
|
"loss": 0.9561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23267187178134918,
|
|
"step": 713,
|
|
"valid_targets_mean": 13058.9,
|
|
"valid_targets_min": 1387
|
|
},
|
|
{
|
|
"epoch": 3.038379530916844,
|
|
"grad_norm": 0.020137842301823836,
|
|
"learning_rate": 1.6072690012644717e-05,
|
|
"loss": 0.9613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25193697214126587,
|
|
"step": 714,
|
|
"valid_targets_mean": 15768.5,
|
|
"valid_targets_min": 14312
|
|
},
|
|
{
|
|
"epoch": 3.0426439232409384,
|
|
"grad_norm": 0.020460342700984124,
|
|
"learning_rate": 1.6014421196003022e-05,
|
|
"loss": 0.922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2384202480316162,
|
|
"step": 715,
|
|
"valid_targets_mean": 16237.3,
|
|
"valid_targets_min": 15689
|
|
},
|
|
{
|
|
"epoch": 3.046908315565032,
|
|
"grad_norm": 0.02125110452598923,
|
|
"learning_rate": 1.5956187587320468e-05,
|
|
"loss": 0.9242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16637656092643738,
|
|
"step": 716,
|
|
"valid_targets_mean": 10445.0,
|
|
"valid_targets_min": 1372
|
|
},
|
|
{
|
|
"epoch": 3.0511727078891258,
|
|
"grad_norm": 0.01942394704371189,
|
|
"learning_rate": 1.5897989701023355e-05,
|
|
"loss": 0.9396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23076876997947693,
|
|
"step": 717,
|
|
"valid_targets_mean": 16039.9,
|
|
"valid_targets_min": 14935
|
|
},
|
|
{
|
|
"epoch": 3.0554371002132195,
|
|
"grad_norm": 0.020114605048941418,
|
|
"learning_rate": 1.58398280512224e-05,
|
|
"loss": 0.985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28203845024108887,
|
|
"step": 718,
|
|
"valid_targets_mean": 16135.4,
|
|
"valid_targets_min": 15244
|
|
},
|
|
{
|
|
"epoch": 3.0597014925373136,
|
|
"grad_norm": 0.021185470677767115,
|
|
"learning_rate": 1.5781703151708215e-05,
|
|
"loss": 0.9407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17241691052913666,
|
|
"step": 719,
|
|
"valid_targets_mean": 10550.5,
|
|
"valid_targets_min": 4778
|
|
},
|
|
{
|
|
"epoch": 3.0639658848614073,
|
|
"grad_norm": 0.02078085600451077,
|
|
"learning_rate": 1.5723615515946773e-05,
|
|
"loss": 0.9821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25103282928466797,
|
|
"step": 720,
|
|
"valid_targets_mean": 16171.0,
|
|
"valid_targets_min": 15322
|
|
},
|
|
{
|
|
"epoch": 3.068230277185501,
|
|
"grad_norm": 0.021216446200774765,
|
|
"learning_rate": 1.5665565657074874e-05,
|
|
"loss": 0.9554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27320724725723267,
|
|
"step": 721,
|
|
"valid_targets_mean": 16126.6,
|
|
"valid_targets_min": 14321
|
|
},
|
|
{
|
|
"epoch": 3.0724946695095947,
|
|
"grad_norm": 0.020062746846560557,
|
|
"learning_rate": 1.560755408789558e-05,
|
|
"loss": 0.9361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21006815135478973,
|
|
"step": 722,
|
|
"valid_targets_mean": 14209.3,
|
|
"valid_targets_min": 4811
|
|
},
|
|
{
|
|
"epoch": 3.076759061833689,
|
|
"grad_norm": 0.01972703735812746,
|
|
"learning_rate": 1.5549581320873715e-05,
|
|
"loss": 0.9688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26377683877944946,
|
|
"step": 723,
|
|
"valid_targets_mean": 16145.7,
|
|
"valid_targets_min": 14992
|
|
},
|
|
{
|
|
"epoch": 3.0810234541577826,
|
|
"grad_norm": 0.021400151633126443,
|
|
"learning_rate": 1.5491647868131343e-05,
|
|
"loss": 0.9759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2619912922382355,
|
|
"step": 724,
|
|
"valid_targets_mean": 16240.2,
|
|
"valid_targets_min": 15444
|
|
},
|
|
{
|
|
"epoch": 3.0852878464818763,
|
|
"grad_norm": 0.019273532406766845,
|
|
"learning_rate": 1.5433754241443223e-05,
|
|
"loss": 0.9382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22774362564086914,
|
|
"step": 725,
|
|
"valid_targets_mean": 15213.2,
|
|
"valid_targets_min": 7923
|
|
},
|
|
{
|
|
"epoch": 3.08955223880597,
|
|
"grad_norm": 0.020156989385786212,
|
|
"learning_rate": 1.53759009522323e-05,
|
|
"loss": 0.9386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2513734698295593,
|
|
"step": 726,
|
|
"valid_targets_mean": 16175.7,
|
|
"valid_targets_min": 15550
|
|
},
|
|
{
|
|
"epoch": 3.093816631130064,
|
|
"grad_norm": 0.020174402567532185,
|
|
"learning_rate": 1.5318088511565185e-05,
|
|
"loss": 0.9125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18984247744083405,
|
|
"step": 727,
|
|
"valid_targets_mean": 11907.9,
|
|
"valid_targets_min": 1886
|
|
},
|
|
{
|
|
"epoch": 3.098081023454158,
|
|
"grad_norm": 0.019059027963605334,
|
|
"learning_rate": 1.5260317430147627e-05,
|
|
"loss": 0.9247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2232477068901062,
|
|
"step": 728,
|
|
"valid_targets_mean": 15399.7,
|
|
"valid_targets_min": 7924
|
|
},
|
|
{
|
|
"epoch": 3.1023454157782515,
|
|
"grad_norm": 0.021199901535799336,
|
|
"learning_rate": 1.5202588218320024e-05,
|
|
"loss": 0.9436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27230608463287354,
|
|
"step": 729,
|
|
"valid_targets_mean": 16183.1,
|
|
"valid_targets_min": 15241
|
|
},
|
|
{
|
|
"epoch": 3.106609808102345,
|
|
"grad_norm": 0.0198122731994556,
|
|
"learning_rate": 1.5144901386052924e-05,
|
|
"loss": 0.913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1456482857465744,
|
|
"step": 730,
|
|
"valid_targets_mean": 8420.0,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 3.1108742004264394,
|
|
"grad_norm": 0.018810061343305397,
|
|
"learning_rate": 1.5087257442942467e-05,
|
|
"loss": 0.9507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23852373659610748,
|
|
"step": 731,
|
|
"valid_targets_mean": 16101.8,
|
|
"valid_targets_min": 14746
|
|
},
|
|
{
|
|
"epoch": 3.115138592750533,
|
|
"grad_norm": 0.021960534980512286,
|
|
"learning_rate": 1.502965689820593e-05,
|
|
"loss": 0.9694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2948272228240967,
|
|
"step": 732,
|
|
"valid_targets_mean": 16053.0,
|
|
"valid_targets_min": 14923
|
|
},
|
|
{
|
|
"epoch": 3.1194029850746268,
|
|
"grad_norm": 0.021047121184910658,
|
|
"learning_rate": 1.4972100260677222e-05,
|
|
"loss": 0.9736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18313106894493103,
|
|
"step": 733,
|
|
"valid_targets_mean": 12094.4,
|
|
"valid_targets_min": 7134
|
|
},
|
|
{
|
|
"epoch": 3.1236673773987205,
|
|
"grad_norm": 0.019889769891331704,
|
|
"learning_rate": 1.4914588038802383e-05,
|
|
"loss": 0.972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2584397792816162,
|
|
"step": 734,
|
|
"valid_targets_mean": 16056.4,
|
|
"valid_targets_min": 14506
|
|
},
|
|
{
|
|
"epoch": 3.1279317697228146,
|
|
"grad_norm": 0.02127039373589383,
|
|
"learning_rate": 1.4857120740635084e-05,
|
|
"loss": 0.9675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2978874444961548,
|
|
"step": 735,
|
|
"valid_targets_mean": 16067.7,
|
|
"valid_targets_min": 14066
|
|
},
|
|
{
|
|
"epoch": 3.1321961620469083,
|
|
"grad_norm": 0.01986824184293774,
|
|
"learning_rate": 1.4799698873832153e-05,
|
|
"loss": 0.9578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2371843159198761,
|
|
"step": 736,
|
|
"valid_targets_mean": 14349.0,
|
|
"valid_targets_min": 12106
|
|
},
|
|
{
|
|
"epoch": 3.136460554371002,
|
|
"grad_norm": 0.02068971641646746,
|
|
"learning_rate": 1.4742322945649073e-05,
|
|
"loss": 0.9341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2733715772628784,
|
|
"step": 737,
|
|
"valid_targets_mean": 16183.7,
|
|
"valid_targets_min": 15677
|
|
},
|
|
{
|
|
"epoch": 3.140724946695096,
|
|
"grad_norm": 0.019670473446908452,
|
|
"learning_rate": 1.4684993462935532e-05,
|
|
"loss": 0.9026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21542948484420776,
|
|
"step": 738,
|
|
"valid_targets_mean": 13350.6,
|
|
"valid_targets_min": 1795
|
|
},
|
|
{
|
|
"epoch": 3.14498933901919,
|
|
"grad_norm": 0.018911330772559985,
|
|
"learning_rate": 1.462771093213092e-05,
|
|
"loss": 0.9335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22063422203063965,
|
|
"step": 739,
|
|
"valid_targets_mean": 15596.1,
|
|
"valid_targets_min": 14280
|
|
},
|
|
{
|
|
"epoch": 3.1492537313432836,
|
|
"grad_norm": 0.02028751378135386,
|
|
"learning_rate": 1.4570475859259856e-05,
|
|
"loss": 0.9449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27420079708099365,
|
|
"step": 740,
|
|
"valid_targets_mean": 16051.7,
|
|
"valid_targets_min": 14236
|
|
},
|
|
{
|
|
"epoch": 3.1535181236673773,
|
|
"grad_norm": 0.021291291888318374,
|
|
"learning_rate": 1.4513288749927714e-05,
|
|
"loss": 0.9009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15627431869506836,
|
|
"step": 741,
|
|
"valid_targets_mean": 9511.2,
|
|
"valid_targets_min": 2107
|
|
},
|
|
{
|
|
"epoch": 3.1577825159914714,
|
|
"grad_norm": 0.019547483791197535,
|
|
"learning_rate": 1.4456150109316192e-05,
|
|
"loss": 0.9466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23859982192516327,
|
|
"step": 742,
|
|
"valid_targets_mean": 16030.4,
|
|
"valid_targets_min": 14192
|
|
},
|
|
{
|
|
"epoch": 3.162046908315565,
|
|
"grad_norm": 0.02061225955203012,
|
|
"learning_rate": 1.4399060442178798e-05,
|
|
"loss": 0.9034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2745538353919983,
|
|
"step": 743,
|
|
"valid_targets_mean": 16150.1,
|
|
"valid_targets_min": 15499
|
|
},
|
|
{
|
|
"epoch": 3.166311300639659,
|
|
"grad_norm": 0.020435108312234166,
|
|
"learning_rate": 1.4342020252836437e-05,
|
|
"loss": 0.9529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18290576338768005,
|
|
"step": 744,
|
|
"valid_targets_mean": 10618.1,
|
|
"valid_targets_min": 6216
|
|
},
|
|
{
|
|
"epoch": 3.1705756929637525,
|
|
"grad_norm": 0.018951121031385104,
|
|
"learning_rate": 1.4285030045172913e-05,
|
|
"loss": 0.962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25193238258361816,
|
|
"step": 745,
|
|
"valid_targets_mean": 16102.1,
|
|
"valid_targets_min": 13878
|
|
},
|
|
{
|
|
"epoch": 3.1748400852878467,
|
|
"grad_norm": 0.020575264360088823,
|
|
"learning_rate": 1.422809032263052e-05,
|
|
"loss": 0.9705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2647131681442261,
|
|
"step": 746,
|
|
"valid_targets_mean": 16179.4,
|
|
"valid_targets_min": 15497
|
|
},
|
|
{
|
|
"epoch": 3.1791044776119404,
|
|
"grad_norm": 0.019167641238050828,
|
|
"learning_rate": 1.4171201588205566e-05,
|
|
"loss": 0.9624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20981387794017792,
|
|
"step": 747,
|
|
"valid_targets_mean": 13688.9,
|
|
"valid_targets_min": 11230
|
|
},
|
|
{
|
|
"epoch": 3.183368869936034,
|
|
"grad_norm": 0.02030836214916242,
|
|
"learning_rate": 1.4114364344443935e-05,
|
|
"loss": 0.9324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25591540336608887,
|
|
"step": 748,
|
|
"valid_targets_mean": 16142.8,
|
|
"valid_targets_min": 14235
|
|
},
|
|
{
|
|
"epoch": 3.1876332622601278,
|
|
"grad_norm": 0.020045456783813,
|
|
"learning_rate": 1.4057579093436653e-05,
|
|
"loss": 0.9236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2760125398635864,
|
|
"step": 749,
|
|
"valid_targets_mean": 16200.9,
|
|
"valid_targets_min": 15645
|
|
},
|
|
{
|
|
"epoch": 3.191897654584222,
|
|
"grad_norm": 0.018862889515297524,
|
|
"learning_rate": 1.400084633681546e-05,
|
|
"loss": 0.9414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22148478031158447,
|
|
"step": 750,
|
|
"valid_targets_mean": 15740.9,
|
|
"valid_targets_min": 14751
|
|
},
|
|
{
|
|
"epoch": 3.1961620469083156,
|
|
"grad_norm": 0.020049668184691838,
|
|
"learning_rate": 1.3944166575748355e-05,
|
|
"loss": 0.9207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2560426592826843,
|
|
"step": 751,
|
|
"valid_targets_mean": 16142.4,
|
|
"valid_targets_min": 15278
|
|
},
|
|
{
|
|
"epoch": 3.2004264392324093,
|
|
"grad_norm": 0.02043807155115061,
|
|
"learning_rate": 1.3887540310935187e-05,
|
|
"loss": 0.9306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18204426765441895,
|
|
"step": 752,
|
|
"valid_targets_mean": 10657.5,
|
|
"valid_targets_min": 1810
|
|
},
|
|
{
|
|
"epoch": 3.204690831556503,
|
|
"grad_norm": 0.01837358022810376,
|
|
"learning_rate": 1.3830968042603226e-05,
|
|
"loss": 0.939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22368046641349792,
|
|
"step": 753,
|
|
"valid_targets_mean": 15654.6,
|
|
"valid_targets_min": 14814
|
|
},
|
|
{
|
|
"epoch": 3.208955223880597,
|
|
"grad_norm": 0.02103338712856932,
|
|
"learning_rate": 1.3774450270502762e-05,
|
|
"loss": 0.9164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2778072953224182,
|
|
"step": 754,
|
|
"valid_targets_mean": 16166.6,
|
|
"valid_targets_min": 14884
|
|
},
|
|
{
|
|
"epoch": 3.213219616204691,
|
|
"grad_norm": 0.020458295134725118,
|
|
"learning_rate": 1.3717987493902656e-05,
|
|
"loss": 0.9475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1427389681339264,
|
|
"step": 755,
|
|
"valid_targets_mean": 8417.1,
|
|
"valid_targets_min": 1650
|
|
},
|
|
{
|
|
"epoch": 3.2174840085287846,
|
|
"grad_norm": 0.019545845584103497,
|
|
"learning_rate": 1.3661580211585947e-05,
|
|
"loss": 0.9485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23285233974456787,
|
|
"step": 756,
|
|
"valid_targets_mean": 16097.5,
|
|
"valid_targets_min": 15461
|
|
},
|
|
{
|
|
"epoch": 3.2217484008528783,
|
|
"grad_norm": 0.021455978467988995,
|
|
"learning_rate": 1.3605228921845457e-05,
|
|
"loss": 0.9508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29971325397491455,
|
|
"step": 757,
|
|
"valid_targets_mean": 16102.5,
|
|
"valid_targets_min": 14171
|
|
},
|
|
{
|
|
"epoch": 3.2260127931769724,
|
|
"grad_norm": 0.0212416901058517,
|
|
"learning_rate": 1.3548934122479373e-05,
|
|
"loss": 0.9631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18999812006950378,
|
|
"step": 758,
|
|
"valid_targets_mean": 11966.1,
|
|
"valid_targets_min": 9134
|
|
},
|
|
{
|
|
"epoch": 3.230277185501066,
|
|
"grad_norm": 0.020962627862589466,
|
|
"learning_rate": 1.349269631078686e-05,
|
|
"loss": 0.9647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2594973146915436,
|
|
"step": 759,
|
|
"valid_targets_mean": 16099.2,
|
|
"valid_targets_min": 14077
|
|
},
|
|
{
|
|
"epoch": 3.23454157782516,
|
|
"grad_norm": 0.020266831571937163,
|
|
"learning_rate": 1.3436515983563659e-05,
|
|
"loss": 0.9643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27094706892967224,
|
|
"step": 760,
|
|
"valid_targets_mean": 16103.6,
|
|
"valid_targets_min": 15499
|
|
},
|
|
{
|
|
"epoch": 3.2388059701492535,
|
|
"grad_norm": 0.020546420753316537,
|
|
"learning_rate": 1.3380393637097692e-05,
|
|
"loss": 0.9227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20723867416381836,
|
|
"step": 761,
|
|
"valid_targets_mean": 14182.6,
|
|
"valid_targets_min": 12356
|
|
},
|
|
{
|
|
"epoch": 3.2430703624733477,
|
|
"grad_norm": 0.02016114795094157,
|
|
"learning_rate": 1.3324329767164708e-05,
|
|
"loss": 0.9221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2620246112346649,
|
|
"step": 762,
|
|
"valid_targets_mean": 16131.8,
|
|
"valid_targets_min": 13688
|
|
},
|
|
{
|
|
"epoch": 3.2473347547974414,
|
|
"grad_norm": 0.02069490676279116,
|
|
"learning_rate": 1.3268324869023878e-05,
|
|
"loss": 0.9529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23302558064460754,
|
|
"step": 763,
|
|
"valid_targets_mean": 13000.4,
|
|
"valid_targets_min": 1491
|
|
},
|
|
{
|
|
"epoch": 3.251599147121535,
|
|
"grad_norm": 0.01928188374180539,
|
|
"learning_rate": 1.3212379437413421e-05,
|
|
"loss": 0.9712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2205386757850647,
|
|
"step": 764,
|
|
"valid_targets_mean": 15649.5,
|
|
"valid_targets_min": 14511
|
|
},
|
|
{
|
|
"epoch": 3.2558635394456292,
|
|
"grad_norm": 0.02139527859861826,
|
|
"learning_rate": 1.3156493966546236e-05,
|
|
"loss": 0.9603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2754514515399933,
|
|
"step": 765,
|
|
"valid_targets_mean": 16113.0,
|
|
"valid_targets_min": 14930
|
|
},
|
|
{
|
|
"epoch": 3.260127931769723,
|
|
"grad_norm": 0.020953637682970046,
|
|
"learning_rate": 1.3100668950105534e-05,
|
|
"loss": 0.9835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18471094965934753,
|
|
"step": 766,
|
|
"valid_targets_mean": 10404.6,
|
|
"valid_targets_min": 2280
|
|
},
|
|
{
|
|
"epoch": 3.2643923240938166,
|
|
"grad_norm": 0.018822628343564865,
|
|
"learning_rate": 1.3044904881240507e-05,
|
|
"loss": 0.9247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2188999354839325,
|
|
"step": 767,
|
|
"valid_targets_mean": 15959.5,
|
|
"valid_targets_min": 15144
|
|
},
|
|
{
|
|
"epoch": 3.2686567164179103,
|
|
"grad_norm": 0.021965226604917044,
|
|
"learning_rate": 1.2989202252561926e-05,
|
|
"loss": 0.9447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28226226568222046,
|
|
"step": 768,
|
|
"valid_targets_mean": 16181.0,
|
|
"valid_targets_min": 15653
|
|
},
|
|
{
|
|
"epoch": 3.272921108742004,
|
|
"grad_norm": 0.02015600537269892,
|
|
"learning_rate": 1.2933561556137806e-05,
|
|
"loss": 0.9331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20127402245998383,
|
|
"step": 769,
|
|
"valid_targets_mean": 12095.2,
|
|
"valid_targets_min": 9046
|
|
},
|
|
{
|
|
"epoch": 3.277185501066098,
|
|
"grad_norm": 0.019128620055925853,
|
|
"learning_rate": 1.2877983283489062e-05,
|
|
"loss": 0.9398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24238646030426025,
|
|
"step": 770,
|
|
"valid_targets_mean": 16164.4,
|
|
"valid_targets_min": 15399
|
|
},
|
|
{
|
|
"epoch": 3.281449893390192,
|
|
"grad_norm": 0.02045613390934546,
|
|
"learning_rate": 1.2822467925585186e-05,
|
|
"loss": 0.963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2777571678161621,
|
|
"step": 771,
|
|
"valid_targets_mean": 16167.9,
|
|
"valid_targets_min": 15258
|
|
},
|
|
{
|
|
"epoch": 3.2857142857142856,
|
|
"grad_norm": 0.019626700406359533,
|
|
"learning_rate": 1.2767015972839879e-05,
|
|
"loss": 0.8984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19640398025512695,
|
|
"step": 772,
|
|
"valid_targets_mean": 13513.8,
|
|
"valid_targets_min": 11397
|
|
},
|
|
{
|
|
"epoch": 3.2899786780383797,
|
|
"grad_norm": 0.020143524668441715,
|
|
"learning_rate": 1.2711627915106728e-05,
|
|
"loss": 0.9716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2799719572067261,
|
|
"step": 773,
|
|
"valid_targets_mean": 15956.7,
|
|
"valid_targets_min": 8340
|
|
},
|
|
{
|
|
"epoch": 3.2942430703624734,
|
|
"grad_norm": 0.019963523788075515,
|
|
"learning_rate": 1.2656304241674877e-05,
|
|
"loss": 0.892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27263718843460083,
|
|
"step": 774,
|
|
"valid_targets_mean": 16204.5,
|
|
"valid_targets_min": 15660
|
|
},
|
|
{
|
|
"epoch": 3.298507462686567,
|
|
"grad_norm": 0.01875409178177124,
|
|
"learning_rate": 1.2601045441264734e-05,
|
|
"loss": 0.913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20908676087856293,
|
|
"step": 775,
|
|
"valid_targets_mean": 15602.8,
|
|
"valid_targets_min": 14377
|
|
},
|
|
{
|
|
"epoch": 3.302771855010661,
|
|
"grad_norm": 0.020132591957437246,
|
|
"learning_rate": 1.2545852002023599e-05,
|
|
"loss": 0.9326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.283974826335907,
|
|
"step": 776,
|
|
"valid_targets_mean": 16037.9,
|
|
"valid_targets_min": 14742
|
|
},
|
|
{
|
|
"epoch": 3.307036247334755,
|
|
"grad_norm": 0.021024498975620712,
|
|
"learning_rate": 1.2490724411521406e-05,
|
|
"loss": 0.9356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1926518827676773,
|
|
"step": 777,
|
|
"valid_targets_mean": 11420.1,
|
|
"valid_targets_min": 1432
|
|
},
|
|
{
|
|
"epoch": 3.3113006396588487,
|
|
"grad_norm": 0.019110612258239058,
|
|
"learning_rate": 1.243566315674637e-05,
|
|
"loss": 0.9564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21391458809375763,
|
|
"step": 778,
|
|
"valid_targets_mean": 15799.8,
|
|
"valid_targets_min": 15064
|
|
},
|
|
{
|
|
"epoch": 3.3155650319829424,
|
|
"grad_norm": 0.018962714768236386,
|
|
"learning_rate": 1.238066872410073e-05,
|
|
"loss": 0.8946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2711735963821411,
|
|
"step": 779,
|
|
"valid_targets_mean": 16161.2,
|
|
"valid_targets_min": 15170
|
|
},
|
|
{
|
|
"epoch": 3.319829424307036,
|
|
"grad_norm": 0.020710916375997993,
|
|
"learning_rate": 1.2325741599396418e-05,
|
|
"loss": 0.9249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12771998345851898,
|
|
"step": 780,
|
|
"valid_targets_mean": 7314.3,
|
|
"valid_targets_min": 2446
|
|
},
|
|
{
|
|
"epoch": 3.3240938166311302,
|
|
"grad_norm": 0.02007816436914826,
|
|
"learning_rate": 1.2270882267850765e-05,
|
|
"loss": 1.0139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2571679353713989,
|
|
"step": 781,
|
|
"valid_targets_mean": 16054.6,
|
|
"valid_targets_min": 14616
|
|
},
|
|
{
|
|
"epoch": 3.328358208955224,
|
|
"grad_norm": 0.020467017892201563,
|
|
"learning_rate": 1.2216091214082248e-05,
|
|
"loss": 0.9181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25981515645980835,
|
|
"step": 782,
|
|
"valid_targets_mean": 16100.8,
|
|
"valid_targets_min": 13818
|
|
},
|
|
{
|
|
"epoch": 3.3326226012793176,
|
|
"grad_norm": 0.019645814193523924,
|
|
"learning_rate": 1.2161368922106192e-05,
|
|
"loss": 0.9517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1756778508424759,
|
|
"step": 783,
|
|
"valid_targets_mean": 11651.5,
|
|
"valid_targets_min": 8118
|
|
},
|
|
{
|
|
"epoch": 3.3368869936034113,
|
|
"grad_norm": 0.0208673022474899,
|
|
"learning_rate": 1.2106715875330475e-05,
|
|
"loss": 0.9857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2559047043323517,
|
|
"step": 784,
|
|
"valid_targets_mean": 16147.8,
|
|
"valid_targets_min": 15193
|
|
},
|
|
{
|
|
"epoch": 3.3411513859275055,
|
|
"grad_norm": 0.019735067258807592,
|
|
"learning_rate": 1.2052132556551275e-05,
|
|
"loss": 0.9361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26459941267967224,
|
|
"step": 785,
|
|
"valid_targets_mean": 16188.1,
|
|
"valid_targets_min": 15688
|
|
},
|
|
{
|
|
"epoch": 3.345415778251599,
|
|
"grad_norm": 0.019408634126746732,
|
|
"learning_rate": 1.1997619447948814e-05,
|
|
"loss": 0.9895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22436955571174622,
|
|
"step": 786,
|
|
"valid_targets_mean": 14265.9,
|
|
"valid_targets_min": 11910
|
|
},
|
|
{
|
|
"epoch": 3.349680170575693,
|
|
"grad_norm": 0.019846185122479976,
|
|
"learning_rate": 1.1943177031083094e-05,
|
|
"loss": 0.9411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2632370889186859,
|
|
"step": 787,
|
|
"valid_targets_mean": 16058.8,
|
|
"valid_targets_min": 13190
|
|
},
|
|
{
|
|
"epoch": 3.3539445628997866,
|
|
"grad_norm": 0.020508200102526596,
|
|
"learning_rate": 1.1888805786889621e-05,
|
|
"loss": 0.9493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23721584677696228,
|
|
"step": 788,
|
|
"valid_targets_mean": 13427.5,
|
|
"valid_targets_min": 3746
|
|
},
|
|
{
|
|
"epoch": 3.3582089552238807,
|
|
"grad_norm": 0.018674874147048322,
|
|
"learning_rate": 1.183450619567518e-05,
|
|
"loss": 0.9194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.251160204410553,
|
|
"step": 789,
|
|
"valid_targets_mean": 15649.3,
|
|
"valid_targets_min": 14765
|
|
},
|
|
{
|
|
"epoch": 3.3624733475479744,
|
|
"grad_norm": 0.021128529340567925,
|
|
"learning_rate": 1.1780278737113581e-05,
|
|
"loss": 0.9753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26453351974487305,
|
|
"step": 790,
|
|
"valid_targets_mean": 15818.5,
|
|
"valid_targets_min": 6594
|
|
},
|
|
{
|
|
"epoch": 3.366737739872068,
|
|
"grad_norm": 0.019691629946780503,
|
|
"learning_rate": 1.1726123890241439e-05,
|
|
"loss": 0.9271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17516183853149414,
|
|
"step": 791,
|
|
"valid_targets_mean": 9972.9,
|
|
"valid_targets_min": 1642
|
|
},
|
|
{
|
|
"epoch": 3.3710021321961623,
|
|
"grad_norm": 0.018873515728585213,
|
|
"learning_rate": 1.1672042133453925e-05,
|
|
"loss": 0.9368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23451797664165497,
|
|
"step": 792,
|
|
"valid_targets_mean": 15801.9,
|
|
"valid_targets_min": 14891
|
|
},
|
|
{
|
|
"epoch": 3.375266524520256,
|
|
"grad_norm": 0.021268825080824835,
|
|
"learning_rate": 1.1618033944500527e-05,
|
|
"loss": 0.9327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3055174946784973,
|
|
"step": 793,
|
|
"valid_targets_mean": 16113.9,
|
|
"valid_targets_min": 15270
|
|
},
|
|
{
|
|
"epoch": 3.3795309168443497,
|
|
"grad_norm": 0.022084498960663015,
|
|
"learning_rate": 1.1564099800480864e-05,
|
|
"loss": 0.9769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.184430330991745,
|
|
"step": 794,
|
|
"valid_targets_mean": 11436.6,
|
|
"valid_targets_min": 6394
|
|
},
|
|
{
|
|
"epoch": 3.3837953091684434,
|
|
"grad_norm": 0.019318749753370997,
|
|
"learning_rate": 1.151024017784045e-05,
|
|
"loss": 0.9255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23337864875793457,
|
|
"step": 795,
|
|
"valid_targets_mean": 16223.6,
|
|
"valid_targets_min": 15149
|
|
},
|
|
{
|
|
"epoch": 3.388059701492537,
|
|
"grad_norm": 0.020540299050258195,
|
|
"learning_rate": 1.1456455552366488e-05,
|
|
"loss": 0.9635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2902466952800751,
|
|
"step": 796,
|
|
"valid_targets_mean": 16165.1,
|
|
"valid_targets_min": 14797
|
|
},
|
|
{
|
|
"epoch": 3.3923240938166312,
|
|
"grad_norm": 0.01924464902979827,
|
|
"learning_rate": 1.1402746399183671e-05,
|
|
"loss": 0.9412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21659094095230103,
|
|
"step": 797,
|
|
"valid_targets_mean": 13885.1,
|
|
"valid_targets_min": 11752
|
|
},
|
|
{
|
|
"epoch": 3.396588486140725,
|
|
"grad_norm": 0.02088841295660991,
|
|
"learning_rate": 1.1349113192749986e-05,
|
|
"loss": 0.9696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26330822706222534,
|
|
"step": 798,
|
|
"valid_targets_mean": 16094.8,
|
|
"valid_targets_min": 14248
|
|
},
|
|
{
|
|
"epoch": 3.4008528784648187,
|
|
"grad_norm": 0.020800320646272165,
|
|
"learning_rate": 1.1295556406852488e-05,
|
|
"loss": 0.9394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26885247230529785,
|
|
"step": 799,
|
|
"valid_targets_mean": 16118.3,
|
|
"valid_targets_min": 15163
|
|
},
|
|
{
|
|
"epoch": 3.405117270788913,
|
|
"grad_norm": 0.02117454657052615,
|
|
"learning_rate": 1.1242076514603201e-05,
|
|
"loss": 1.001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2230033576488495,
|
|
"step": 800,
|
|
"valid_targets_mean": 14451.2,
|
|
"valid_targets_min": 11507
|
|
},
|
|
{
|
|
"epoch": 3.4093816631130065,
|
|
"grad_norm": 0.021203671082902713,
|
|
"learning_rate": 1.1188673988434831e-05,
|
|
"loss": 0.9959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2879166603088379,
|
|
"step": 801,
|
|
"valid_targets_mean": 16170.4,
|
|
"valid_targets_min": 15404
|
|
},
|
|
{
|
|
"epoch": 3.4136460554371,
|
|
"grad_norm": 0.020704595370065665,
|
|
"learning_rate": 1.1135349300096667e-05,
|
|
"loss": 0.9193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18880903720855713,
|
|
"step": 802,
|
|
"valid_targets_mean": 10868.8,
|
|
"valid_targets_min": 1204
|
|
},
|
|
{
|
|
"epoch": 3.417910447761194,
|
|
"grad_norm": 0.018932518685680893,
|
|
"learning_rate": 1.1082102920650397e-05,
|
|
"loss": 0.9419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2400081902742386,
|
|
"step": 803,
|
|
"valid_targets_mean": 16137.1,
|
|
"valid_targets_min": 15192
|
|
},
|
|
{
|
|
"epoch": 3.4221748400852876,
|
|
"grad_norm": 0.01910239986499641,
|
|
"learning_rate": 1.102893532046593e-05,
|
|
"loss": 0.9506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2542757987976074,
|
|
"step": 804,
|
|
"valid_targets_mean": 16125.9,
|
|
"valid_targets_min": 14896
|
|
},
|
|
{
|
|
"epoch": 3.4264392324093818,
|
|
"grad_norm": 0.020128621415935497,
|
|
"learning_rate": 1.0975846969217258e-05,
|
|
"loss": 0.9381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14620298147201538,
|
|
"step": 805,
|
|
"valid_targets_mean": 8239.0,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 3.4307036247334755,
|
|
"grad_norm": 0.019574762648429465,
|
|
"learning_rate": 1.092283833587829e-05,
|
|
"loss": 0.9764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24655970931053162,
|
|
"step": 806,
|
|
"valid_targets_mean": 16052.1,
|
|
"valid_targets_min": 14898
|
|
},
|
|
{
|
|
"epoch": 3.434968017057569,
|
|
"grad_norm": 0.02150053961059598,
|
|
"learning_rate": 1.086990988871873e-05,
|
|
"loss": 0.9624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27228182554244995,
|
|
"step": 807,
|
|
"valid_targets_mean": 16088.6,
|
|
"valid_targets_min": 12845
|
|
},
|
|
{
|
|
"epoch": 3.4392324093816633,
|
|
"grad_norm": 0.020950604276474195,
|
|
"learning_rate": 1.0817062095299929e-05,
|
|
"loss": 0.9822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17617779970169067,
|
|
"step": 808,
|
|
"valid_targets_mean": 11074.7,
|
|
"valid_targets_min": 7631
|
|
},
|
|
{
|
|
"epoch": 3.443496801705757,
|
|
"grad_norm": 0.020122965765599604,
|
|
"learning_rate": 1.0764295422470755e-05,
|
|
"loss": 0.9359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.270962119102478,
|
|
"step": 809,
|
|
"valid_targets_mean": 16123.7,
|
|
"valid_targets_min": 13599
|
|
},
|
|
{
|
|
"epoch": 3.4477611940298507,
|
|
"grad_norm": 0.02182424544376422,
|
|
"learning_rate": 1.0711610336363477e-05,
|
|
"loss": 0.9468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2863156795501709,
|
|
"step": 810,
|
|
"valid_targets_mean": 16065.3,
|
|
"valid_targets_min": 14127
|
|
},
|
|
{
|
|
"epoch": 3.4520255863539444,
|
|
"grad_norm": 0.018653464933918357,
|
|
"learning_rate": 1.065900730238961e-05,
|
|
"loss": 0.9287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21440692245960236,
|
|
"step": 811,
|
|
"valid_targets_mean": 14763.3,
|
|
"valid_targets_min": 12853
|
|
},
|
|
{
|
|
"epoch": 3.4562899786780386,
|
|
"grad_norm": 0.020293838947214524,
|
|
"learning_rate": 1.0606486785235879e-05,
|
|
"loss": 0.9311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27220091223716736,
|
|
"step": 812,
|
|
"valid_targets_mean": 16119.9,
|
|
"valid_targets_min": 15078
|
|
},
|
|
{
|
|
"epoch": 3.4605543710021323,
|
|
"grad_norm": 0.020242908816244364,
|
|
"learning_rate": 1.0554049248860045e-05,
|
|
"loss": 0.9716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23688478767871857,
|
|
"step": 813,
|
|
"valid_targets_mean": 12901.8,
|
|
"valid_targets_min": 1945
|
|
},
|
|
{
|
|
"epoch": 3.464818763326226,
|
|
"grad_norm": 0.01964303931108298,
|
|
"learning_rate": 1.0501695156486819e-05,
|
|
"loss": 0.9785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22774483263492584,
|
|
"step": 814,
|
|
"valid_targets_mean": 15541.4,
|
|
"valid_targets_min": 10715
|
|
},
|
|
{
|
|
"epoch": 3.4690831556503197,
|
|
"grad_norm": 0.02060931287913502,
|
|
"learning_rate": 1.0449424970603796e-05,
|
|
"loss": 0.9339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24571114778518677,
|
|
"step": 815,
|
|
"valid_targets_mean": 16257.2,
|
|
"valid_targets_min": 15824
|
|
},
|
|
{
|
|
"epoch": 3.473347547974414,
|
|
"grad_norm": 0.0195710889476689,
|
|
"learning_rate": 1.0397239152957356e-05,
|
|
"loss": 0.9401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16588979959487915,
|
|
"step": 816,
|
|
"valid_targets_mean": 9123.7,
|
|
"valid_targets_min": 1188
|
|
},
|
|
{
|
|
"epoch": 3.4776119402985075,
|
|
"grad_norm": 0.01920736159583621,
|
|
"learning_rate": 1.034513816454858e-05,
|
|
"loss": 0.956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23873871564865112,
|
|
"step": 817,
|
|
"valid_targets_mean": 16090.6,
|
|
"valid_targets_min": 14985
|
|
},
|
|
{
|
|
"epoch": 3.481876332622601,
|
|
"grad_norm": 0.0199800308659551,
|
|
"learning_rate": 1.0293122465629186e-05,
|
|
"loss": 0.935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27036336064338684,
|
|
"step": 818,
|
|
"valid_targets_mean": 16081.2,
|
|
"valid_targets_min": 14097
|
|
},
|
|
{
|
|
"epoch": 3.486140724946695,
|
|
"grad_norm": 0.01883425148287786,
|
|
"learning_rate": 1.0241192515697432e-05,
|
|
"loss": 0.914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18650996685028076,
|
|
"step": 819,
|
|
"valid_targets_mean": 12321.2,
|
|
"valid_targets_min": 7293
|
|
},
|
|
{
|
|
"epoch": 3.490405117270789,
|
|
"grad_norm": 0.01986352652136667,
|
|
"learning_rate": 1.0189348773494135e-05,
|
|
"loss": 0.9476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24127691984176636,
|
|
"step": 820,
|
|
"valid_targets_mean": 16181.2,
|
|
"valid_targets_min": 15513
|
|
},
|
|
{
|
|
"epoch": 3.4946695095948828,
|
|
"grad_norm": 0.020981497459513828,
|
|
"learning_rate": 1.0137591696998514e-05,
|
|
"loss": 0.9648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29827940464019775,
|
|
"step": 821,
|
|
"valid_targets_mean": 16039.6,
|
|
"valid_targets_min": 15168
|
|
},
|
|
{
|
|
"epoch": 3.4989339019189765,
|
|
"grad_norm": 0.020018223937657058,
|
|
"learning_rate": 1.0085921743424225e-05,
|
|
"loss": 0.9335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2078550159931183,
|
|
"step": 822,
|
|
"valid_targets_mean": 13888.0,
|
|
"valid_targets_min": 11700
|
|
},
|
|
{
|
|
"epoch": 3.50319829424307,
|
|
"grad_norm": 0.02018218893986049,
|
|
"learning_rate": 1.0034339369215288e-05,
|
|
"loss": 0.9902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.262828528881073,
|
|
"step": 823,
|
|
"valid_targets_mean": 16164.1,
|
|
"valid_targets_min": 14675
|
|
},
|
|
{
|
|
"epoch": 3.5074626865671643,
|
|
"grad_norm": 0.020707242715184745,
|
|
"learning_rate": 9.982845030042068e-06,
|
|
"loss": 0.9751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3144455552101135,
|
|
"step": 824,
|
|
"valid_targets_mean": 16052.5,
|
|
"valid_targets_min": 14989
|
|
},
|
|
{
|
|
"epoch": 3.511727078891258,
|
|
"grad_norm": 0.018499758772752756,
|
|
"learning_rate": 9.931439180797237e-06,
|
|
"loss": 0.9474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20766568183898926,
|
|
"step": 825,
|
|
"valid_targets_mean": 14316.3,
|
|
"valid_targets_min": 12271
|
|
},
|
|
{
|
|
"epoch": 3.5159914712153517,
|
|
"grad_norm": 0.02041237831255458,
|
|
"learning_rate": 9.880122275591752e-06,
|
|
"loss": 0.9752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27884429693222046,
|
|
"step": 826,
|
|
"valid_targets_mean": 16242.4,
|
|
"valid_targets_min": 15869
|
|
},
|
|
{
|
|
"epoch": 3.520255863539446,
|
|
"grad_norm": 0.021242047654551812,
|
|
"learning_rate": 9.828894767750865e-06,
|
|
"loss": 0.9951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20700660347938538,
|
|
"step": 827,
|
|
"valid_targets_mean": 10522.8,
|
|
"valid_targets_min": 815
|
|
},
|
|
{
|
|
"epoch": 3.5245202558635396,
|
|
"grad_norm": 0.019754255375376287,
|
|
"learning_rate": 9.777757109810102e-06,
|
|
"loss": 0.9427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23735305666923523,
|
|
"step": 828,
|
|
"valid_targets_mean": 16110.6,
|
|
"valid_targets_min": 15428
|
|
},
|
|
{
|
|
"epoch": 3.5287846481876333,
|
|
"grad_norm": 0.020572431545494818,
|
|
"learning_rate": 9.726709753511275e-06,
|
|
"loss": 0.9853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27254074811935425,
|
|
"step": 829,
|
|
"valid_targets_mean": 16118.3,
|
|
"valid_targets_min": 15077
|
|
},
|
|
{
|
|
"epoch": 3.533049040511727,
|
|
"grad_norm": 0.020498214069708507,
|
|
"learning_rate": 9.675753149798474e-06,
|
|
"loss": 0.9585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.161421000957489,
|
|
"step": 830,
|
|
"valid_targets_mean": 9637.7,
|
|
"valid_targets_min": 1090
|
|
},
|
|
{
|
|
"epoch": 3.5373134328358207,
|
|
"grad_norm": 0.018789195583520317,
|
|
"learning_rate": 9.624887748814118e-06,
|
|
"loss": 0.9482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22719332575798035,
|
|
"step": 831,
|
|
"valid_targets_mean": 16272.5,
|
|
"valid_targets_min": 15785
|
|
},
|
|
{
|
|
"epoch": 3.541577825159915,
|
|
"grad_norm": 0.020000689076038462,
|
|
"learning_rate": 9.574113999894909e-06,
|
|
"loss": 0.9594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2673766613006592,
|
|
"step": 832,
|
|
"valid_targets_mean": 16194.5,
|
|
"valid_targets_min": 14909
|
|
},
|
|
{
|
|
"epoch": 3.5458422174840085,
|
|
"grad_norm": 0.019467256273518134,
|
|
"learning_rate": 9.523432351567979e-06,
|
|
"loss": 0.9474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20064501464366913,
|
|
"step": 833,
|
|
"valid_targets_mean": 12234.2,
|
|
"valid_targets_min": 9109
|
|
},
|
|
{
|
|
"epoch": 3.550106609808102,
|
|
"grad_norm": 0.02017314899849259,
|
|
"learning_rate": 9.472843251546792e-06,
|
|
"loss": 0.9857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30065226554870605,
|
|
"step": 834,
|
|
"valid_targets_mean": 16072.3,
|
|
"valid_targets_min": 15406
|
|
},
|
|
{
|
|
"epoch": 3.5543710021321964,
|
|
"grad_norm": 0.0202857190330068,
|
|
"learning_rate": 9.422347146727294e-06,
|
|
"loss": 0.9308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27644020318984985,
|
|
"step": 835,
|
|
"valid_targets_mean": 16105.0,
|
|
"valid_targets_min": 14780
|
|
},
|
|
{
|
|
"epoch": 3.55863539445629,
|
|
"grad_norm": 0.01974022645738321,
|
|
"learning_rate": 9.371944483183912e-06,
|
|
"loss": 0.9466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22801238298416138,
|
|
"step": 836,
|
|
"valid_targets_mean": 13700.1,
|
|
"valid_targets_min": 11435
|
|
},
|
|
{
|
|
"epoch": 3.5628997867803838,
|
|
"grad_norm": 0.02014572597409784,
|
|
"learning_rate": 9.321635706165635e-06,
|
|
"loss": 0.973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2532466650009155,
|
|
"step": 837,
|
|
"valid_targets_mean": 16206.3,
|
|
"valid_targets_min": 15650
|
|
},
|
|
{
|
|
"epoch": 3.5671641791044775,
|
|
"grad_norm": 0.020803074778654864,
|
|
"learning_rate": 9.271421260092075e-06,
|
|
"loss": 0.9861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24805572628974915,
|
|
"step": 838,
|
|
"valid_targets_mean": 12914.5,
|
|
"valid_targets_min": 1398
|
|
},
|
|
{
|
|
"epoch": 3.571428571428571,
|
|
"grad_norm": 0.018958609021885127,
|
|
"learning_rate": 9.221301588549519e-06,
|
|
"loss": 0.96,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24226129055023193,
|
|
"step": 839,
|
|
"valid_targets_mean": 15462.1,
|
|
"valid_targets_min": 14516
|
|
},
|
|
{
|
|
"epoch": 3.5756929637526653,
|
|
"grad_norm": 0.020590777221351386,
|
|
"learning_rate": 9.171277134287057e-06,
|
|
"loss": 0.96,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2752473056316376,
|
|
"step": 840,
|
|
"valid_targets_mean": 16130.3,
|
|
"valid_targets_min": 14758
|
|
},
|
|
{
|
|
"epoch": 3.579957356076759,
|
|
"grad_norm": 0.019420349708176462,
|
|
"learning_rate": 9.121348339212634e-06,
|
|
"loss": 0.9666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1752808392047882,
|
|
"step": 841,
|
|
"valid_targets_mean": 11204.8,
|
|
"valid_targets_min": 3042
|
|
},
|
|
{
|
|
"epoch": 3.5842217484008527,
|
|
"grad_norm": 0.018420891295295335,
|
|
"learning_rate": 9.07151564438916e-06,
|
|
"loss": 0.9547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2264341115951538,
|
|
"step": 842,
|
|
"valid_targets_mean": 15806.7,
|
|
"valid_targets_min": 14924
|
|
},
|
|
{
|
|
"epoch": 3.588486140724947,
|
|
"grad_norm": 0.02089734050245462,
|
|
"learning_rate": 9.021779490030611e-06,
|
|
"loss": 0.9472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28249192237854004,
|
|
"step": 843,
|
|
"valid_targets_mean": 16184.8,
|
|
"valid_targets_min": 15502
|
|
},
|
|
{
|
|
"epoch": 3.5927505330490406,
|
|
"grad_norm": 0.02102325158900612,
|
|
"learning_rate": 8.972140315498119e-06,
|
|
"loss": 0.9016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1559985876083374,
|
|
"step": 844,
|
|
"valid_targets_mean": 9700.5,
|
|
"valid_targets_min": 3874
|
|
},
|
|
{
|
|
"epoch": 3.5970149253731343,
|
|
"grad_norm": 0.019290844738156256,
|
|
"learning_rate": 8.922598559296154e-06,
|
|
"loss": 0.9277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22611713409423828,
|
|
"step": 845,
|
|
"valid_targets_mean": 16157.3,
|
|
"valid_targets_min": 14383
|
|
},
|
|
{
|
|
"epoch": 3.6012793176972284,
|
|
"grad_norm": 0.02037241229580003,
|
|
"learning_rate": 8.873154659068582e-06,
|
|
"loss": 0.9082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2590547204017639,
|
|
"step": 846,
|
|
"valid_targets_mean": 16124.6,
|
|
"valid_targets_min": 14573
|
|
},
|
|
{
|
|
"epoch": 3.605543710021322,
|
|
"grad_norm": 0.02048239115833522,
|
|
"learning_rate": 8.823809051594816e-06,
|
|
"loss": 0.9583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2129451036453247,
|
|
"step": 847,
|
|
"valid_targets_mean": 13583.2,
|
|
"valid_targets_min": 11966
|
|
},
|
|
{
|
|
"epoch": 3.609808102345416,
|
|
"grad_norm": 0.02038944990419857,
|
|
"learning_rate": 8.774562172785988e-06,
|
|
"loss": 0.9791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27725669741630554,
|
|
"step": 848,
|
|
"valid_targets_mean": 16088.6,
|
|
"valid_targets_min": 14847
|
|
},
|
|
{
|
|
"epoch": 3.6140724946695095,
|
|
"grad_norm": 0.02078026052152238,
|
|
"learning_rate": 8.725414457681063e-06,
|
|
"loss": 0.96,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2855116128921509,
|
|
"step": 849,
|
|
"valid_targets_mean": 16144.4,
|
|
"valid_targets_min": 15605
|
|
},
|
|
{
|
|
"epoch": 3.6183368869936032,
|
|
"grad_norm": 0.01903593864572435,
|
|
"learning_rate": 8.676366340443017e-06,
|
|
"loss": 0.9507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22052185237407684,
|
|
"step": 850,
|
|
"valid_targets_mean": 15525.0,
|
|
"valid_targets_min": 14101
|
|
},
|
|
{
|
|
"epoch": 3.6226012793176974,
|
|
"grad_norm": 0.020386355383629807,
|
|
"learning_rate": 8.627418254355e-06,
|
|
"loss": 0.9236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2551213502883911,
|
|
"step": 851,
|
|
"valid_targets_mean": 16130.1,
|
|
"valid_targets_min": 14780
|
|
},
|
|
{
|
|
"epoch": 3.626865671641791,
|
|
"grad_norm": 0.020239773796026896,
|
|
"learning_rate": 8.578570631816474e-06,
|
|
"loss": 0.9156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18237504363059998,
|
|
"step": 852,
|
|
"valid_targets_mean": 10837.0,
|
|
"valid_targets_min": 1729
|
|
},
|
|
{
|
|
"epoch": 3.631130063965885,
|
|
"grad_norm": 0.017935364354212308,
|
|
"learning_rate": 8.529823904339472e-06,
|
|
"loss": 0.9393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24889986217021942,
|
|
"step": 853,
|
|
"valid_targets_mean": 16013.2,
|
|
"valid_targets_min": 15259
|
|
},
|
|
{
|
|
"epoch": 3.635394456289979,
|
|
"grad_norm": 0.019854601110972397,
|
|
"learning_rate": 8.481178502544684e-06,
|
|
"loss": 0.8842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2533286511898041,
|
|
"step": 854,
|
|
"valid_targets_mean": 16151.4,
|
|
"valid_targets_min": 14218
|
|
},
|
|
{
|
|
"epoch": 3.6396588486140726,
|
|
"grad_norm": 0.020072256120860107,
|
|
"learning_rate": 8.43263485615774e-06,
|
|
"loss": 0.9821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15175075829029083,
|
|
"step": 855,
|
|
"valid_targets_mean": 8557.2,
|
|
"valid_targets_min": 1247
|
|
},
|
|
{
|
|
"epoch": 3.6439232409381663,
|
|
"grad_norm": 0.019325938318480583,
|
|
"learning_rate": 8.384193394005372e-06,
|
|
"loss": 0.9679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23528137803077698,
|
|
"step": 856,
|
|
"valid_targets_mean": 16036.3,
|
|
"valid_targets_min": 15170
|
|
},
|
|
{
|
|
"epoch": 3.64818763326226,
|
|
"grad_norm": 0.02045058830526576,
|
|
"learning_rate": 8.33585454401161e-06,
|
|
"loss": 0.9588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27999258041381836,
|
|
"step": 857,
|
|
"valid_targets_mean": 16106.5,
|
|
"valid_targets_min": 15419
|
|
},
|
|
{
|
|
"epoch": 3.6524520255863537,
|
|
"grad_norm": 0.019319659140186823,
|
|
"learning_rate": 8.287618733194073e-06,
|
|
"loss": 0.9591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19412197172641754,
|
|
"step": 858,
|
|
"valid_targets_mean": 12434.2,
|
|
"valid_targets_min": 9526
|
|
},
|
|
{
|
|
"epoch": 3.656716417910448,
|
|
"grad_norm": 0.01919009913355325,
|
|
"learning_rate": 8.239486387660096e-06,
|
|
"loss": 0.9259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25990206003189087,
|
|
"step": 859,
|
|
"valid_targets_mean": 16052.4,
|
|
"valid_targets_min": 14573
|
|
},
|
|
{
|
|
"epoch": 3.6609808102345416,
|
|
"grad_norm": 0.020264528668666418,
|
|
"learning_rate": 8.191457932603052e-06,
|
|
"loss": 0.9368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28368881344795227,
|
|
"step": 860,
|
|
"valid_targets_mean": 16189.9,
|
|
"valid_targets_min": 15318
|
|
},
|
|
{
|
|
"epoch": 3.6652452025586353,
|
|
"grad_norm": 0.019059097041754538,
|
|
"learning_rate": 8.143533792298545e-06,
|
|
"loss": 0.9942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24183300137519836,
|
|
"step": 861,
|
|
"valid_targets_mean": 14834.9,
|
|
"valid_targets_min": 13156
|
|
},
|
|
{
|
|
"epoch": 3.6695095948827294,
|
|
"grad_norm": 0.020381818453641103,
|
|
"learning_rate": 8.095714390100698e-06,
|
|
"loss": 0.9661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25934678316116333,
|
|
"step": 862,
|
|
"valid_targets_mean": 16086.7,
|
|
"valid_targets_min": 13688
|
|
},
|
|
{
|
|
"epoch": 3.673773987206823,
|
|
"grad_norm": 0.02054291413284647,
|
|
"learning_rate": 8.048000148438375e-06,
|
|
"loss": 0.9552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24759866297245026,
|
|
"step": 863,
|
|
"valid_targets_mean": 13057.0,
|
|
"valid_targets_min": 1195
|
|
},
|
|
{
|
|
"epoch": 3.678038379530917,
|
|
"grad_norm": 0.01888334538021628,
|
|
"learning_rate": 8.000391488811485e-06,
|
|
"loss": 0.9442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22527824342250824,
|
|
"step": 864,
|
|
"valid_targets_mean": 15543.2,
|
|
"valid_targets_min": 14356
|
|
},
|
|
{
|
|
"epoch": 3.6823027718550105,
|
|
"grad_norm": 0.020680553291352273,
|
|
"learning_rate": 7.952888831787215e-06,
|
|
"loss": 0.961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27160200476646423,
|
|
"step": 865,
|
|
"valid_targets_mean": 16168.2,
|
|
"valid_targets_min": 14583
|
|
},
|
|
{
|
|
"epoch": 3.6865671641791042,
|
|
"grad_norm": 0.020143536516145092,
|
|
"learning_rate": 7.905492596996391e-06,
|
|
"loss": 0.9308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1685619354248047,
|
|
"step": 866,
|
|
"valid_targets_mean": 9412.8,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 3.6908315565031984,
|
|
"grad_norm": 0.02017630476084744,
|
|
"learning_rate": 7.858203203129668e-06,
|
|
"loss": 0.9857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24850547313690186,
|
|
"step": 867,
|
|
"valid_targets_mean": 16077.5,
|
|
"valid_targets_min": 15219
|
|
},
|
|
{
|
|
"epoch": 3.695095948827292,
|
|
"grad_norm": 0.020842638039927433,
|
|
"learning_rate": 7.811021067933919e-06,
|
|
"loss": 0.9381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2560654282569885,
|
|
"step": 868,
|
|
"valid_targets_mean": 16130.3,
|
|
"valid_targets_min": 14950
|
|
},
|
|
{
|
|
"epoch": 3.699360341151386,
|
|
"grad_norm": 0.019791620307250967,
|
|
"learning_rate": 7.763946608208504e-06,
|
|
"loss": 0.9353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19826537370681763,
|
|
"step": 869,
|
|
"valid_targets_mean": 11080.4,
|
|
"valid_targets_min": 7791
|
|
},
|
|
{
|
|
"epoch": 3.70362473347548,
|
|
"grad_norm": 0.019809253707309758,
|
|
"learning_rate": 7.716980239801588e-06,
|
|
"loss": 0.9603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2749443054199219,
|
|
"step": 870,
|
|
"valid_targets_mean": 15998.9,
|
|
"valid_targets_min": 14869
|
|
},
|
|
{
|
|
"epoch": 3.7078891257995736,
|
|
"grad_norm": 0.021168406664035903,
|
|
"learning_rate": 7.670122377606495e-06,
|
|
"loss": 0.9597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2967666685581207,
|
|
"step": 871,
|
|
"valid_targets_mean": 16137.7,
|
|
"valid_targets_min": 15406
|
|
},
|
|
{
|
|
"epoch": 3.7121535181236673,
|
|
"grad_norm": 0.02021495361450913,
|
|
"learning_rate": 7.623373435557988e-06,
|
|
"loss": 0.95,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20782756805419922,
|
|
"step": 872,
|
|
"valid_targets_mean": 14003.7,
|
|
"valid_targets_min": 11758
|
|
},
|
|
{
|
|
"epoch": 3.716417910447761,
|
|
"grad_norm": 0.019088949478737544,
|
|
"learning_rate": 7.5767338266286775e-06,
|
|
"loss": 0.917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2613958716392517,
|
|
"step": 873,
|
|
"valid_targets_mean": 16199.3,
|
|
"valid_targets_min": 14847
|
|
},
|
|
{
|
|
"epoch": 3.7206823027718547,
|
|
"grad_norm": 0.021108531094344254,
|
|
"learning_rate": 7.530203962825331e-06,
|
|
"loss": 0.963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27362334728240967,
|
|
"step": 874,
|
|
"valid_targets_mean": 16180.5,
|
|
"valid_targets_min": 15478
|
|
},
|
|
{
|
|
"epoch": 3.724946695095949,
|
|
"grad_norm": 0.0192267162640742,
|
|
"learning_rate": 7.483784255185249e-06,
|
|
"loss": 0.9698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2244693636894226,
|
|
"step": 875,
|
|
"valid_targets_mean": 14706.2,
|
|
"valid_targets_min": 13322
|
|
},
|
|
{
|
|
"epoch": 3.7292110874200426,
|
|
"grad_norm": 0.020433696920117674,
|
|
"learning_rate": 7.437475113772632e-06,
|
|
"loss": 0.9524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28148141503334045,
|
|
"step": 876,
|
|
"valid_targets_mean": 16151.3,
|
|
"valid_targets_min": 15159
|
|
},
|
|
{
|
|
"epoch": 3.7334754797441363,
|
|
"grad_norm": 0.02032245296801021,
|
|
"learning_rate": 7.391276947674932e-06,
|
|
"loss": 0.9219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19244670867919922,
|
|
"step": 877,
|
|
"valid_targets_mean": 11038.8,
|
|
"valid_targets_min": 1523
|
|
},
|
|
{
|
|
"epoch": 3.7377398720682304,
|
|
"grad_norm": 0.019253702894391343,
|
|
"learning_rate": 7.345190164999307e-06,
|
|
"loss": 0.9269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22072041034698486,
|
|
"step": 878,
|
|
"valid_targets_mean": 15985.2,
|
|
"valid_targets_min": 15543
|
|
},
|
|
{
|
|
"epoch": 3.742004264392324,
|
|
"grad_norm": 0.020490208668798148,
|
|
"learning_rate": 7.299215172868947e-06,
|
|
"loss": 0.9434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27287909388542175,
|
|
"step": 879,
|
|
"valid_targets_mean": 16111.8,
|
|
"valid_targets_min": 15241
|
|
},
|
|
{
|
|
"epoch": 3.746268656716418,
|
|
"grad_norm": 0.01982248525086593,
|
|
"learning_rate": 7.2533523774194865e-06,
|
|
"loss": 0.9286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15191233158111572,
|
|
"step": 880,
|
|
"valid_targets_mean": 8807.7,
|
|
"valid_targets_min": 2078
|
|
},
|
|
{
|
|
"epoch": 3.750533049040512,
|
|
"grad_norm": 0.01822655065634172,
|
|
"learning_rate": 7.2076021837954616e-06,
|
|
"loss": 0.8957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22693949937820435,
|
|
"step": 881,
|
|
"valid_targets_mean": 16221.5,
|
|
"valid_targets_min": 15770
|
|
},
|
|
{
|
|
"epoch": 3.7547974413646057,
|
|
"grad_norm": 0.020255236120271018,
|
|
"learning_rate": 7.161964996146689e-06,
|
|
"loss": 0.9361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.288006067276001,
|
|
"step": 882,
|
|
"valid_targets_mean": 16108.4,
|
|
"valid_targets_min": 15388
|
|
},
|
|
{
|
|
"epoch": 3.7590618336886994,
|
|
"grad_norm": 0.01965959450607481,
|
|
"learning_rate": 7.116441217624708e-06,
|
|
"loss": 0.933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1873670220375061,
|
|
"step": 883,
|
|
"valid_targets_mean": 11916.7,
|
|
"valid_targets_min": 7811
|
|
},
|
|
{
|
|
"epoch": 3.763326226012793,
|
|
"grad_norm": 0.019525027193076777,
|
|
"learning_rate": 7.071031250379228e-06,
|
|
"loss": 0.9234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24530890583992004,
|
|
"step": 884,
|
|
"valid_targets_mean": 16183.9,
|
|
"valid_targets_min": 15441
|
|
},
|
|
{
|
|
"epoch": 3.767590618336887,
|
|
"grad_norm": 0.020417120031424443,
|
|
"learning_rate": 7.0257354955545466e-06,
|
|
"loss": 0.9491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2808264493942261,
|
|
"step": 885,
|
|
"valid_targets_mean": 16157.8,
|
|
"valid_targets_min": 14943
|
|
},
|
|
{
|
|
"epoch": 3.771855010660981,
|
|
"grad_norm": 0.019665052828138012,
|
|
"learning_rate": 6.980554353286066e-06,
|
|
"loss": 0.922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21257588267326355,
|
|
"step": 886,
|
|
"valid_targets_mean": 14326.7,
|
|
"valid_targets_min": 12870
|
|
},
|
|
{
|
|
"epoch": 3.7761194029850746,
|
|
"grad_norm": 0.019642816611163974,
|
|
"learning_rate": 6.935488222696676e-06,
|
|
"loss": 0.9766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.281036376953125,
|
|
"step": 887,
|
|
"valid_targets_mean": 16141.5,
|
|
"valid_targets_min": 15276
|
|
},
|
|
{
|
|
"epoch": 3.7803837953091683,
|
|
"grad_norm": 0.020851435860208885,
|
|
"learning_rate": 6.890537501893302e-06,
|
|
"loss": 0.9367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22510194778442383,
|
|
"step": 888,
|
|
"valid_targets_mean": 12642.1,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 3.7846481876332625,
|
|
"grad_norm": 0.01977506323122634,
|
|
"learning_rate": 6.845702587963352e-06,
|
|
"loss": 0.9764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2417793720960617,
|
|
"step": 889,
|
|
"valid_targets_mean": 15270.5,
|
|
"valid_targets_min": 10955
|
|
},
|
|
{
|
|
"epoch": 3.788912579957356,
|
|
"grad_norm": 0.020096940705365253,
|
|
"learning_rate": 6.800983876971192e-06,
|
|
"loss": 0.9061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2774786353111267,
|
|
"step": 890,
|
|
"valid_targets_mean": 15973.0,
|
|
"valid_targets_min": 13369
|
|
},
|
|
{
|
|
"epoch": 3.79317697228145,
|
|
"grad_norm": 0.020525329343428763,
|
|
"learning_rate": 6.756381763954718e-06,
|
|
"loss": 0.9528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1698097586631775,
|
|
"step": 891,
|
|
"valid_targets_mean": 10128.0,
|
|
"valid_targets_min": 1656
|
|
},
|
|
{
|
|
"epoch": 3.7974413646055436,
|
|
"grad_norm": 0.01906533468878749,
|
|
"learning_rate": 6.7118966429217645e-06,
|
|
"loss": 0.9508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2205936461687088,
|
|
"step": 892,
|
|
"valid_targets_mean": 15906.9,
|
|
"valid_targets_min": 15116
|
|
},
|
|
{
|
|
"epoch": 3.8017057569296373,
|
|
"grad_norm": 0.02121754206910809,
|
|
"learning_rate": 6.667528906846714e-06,
|
|
"loss": 0.9593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.297063946723938,
|
|
"step": 893,
|
|
"valid_targets_mean": 16108.9,
|
|
"valid_targets_min": 14544
|
|
},
|
|
{
|
|
"epoch": 3.8059701492537314,
|
|
"grad_norm": 0.020562388781868623,
|
|
"learning_rate": 6.623278947666974e-06,
|
|
"loss": 0.9727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18242833018302917,
|
|
"step": 894,
|
|
"valid_targets_mean": 10886.4,
|
|
"valid_targets_min": 6901
|
|
},
|
|
{
|
|
"epoch": 3.810234541577825,
|
|
"grad_norm": 0.019321978116372093,
|
|
"learning_rate": 6.579147156279538e-06,
|
|
"loss": 0.96,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.237883523106575,
|
|
"step": 895,
|
|
"valid_targets_mean": 16169.3,
|
|
"valid_targets_min": 14235
|
|
},
|
|
{
|
|
"epoch": 3.814498933901919,
|
|
"grad_norm": 0.021415482486533147,
|
|
"learning_rate": 6.535133922537513e-06,
|
|
"loss": 0.9462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2559899687767029,
|
|
"step": 896,
|
|
"valid_targets_mean": 16193.8,
|
|
"valid_targets_min": 15454
|
|
},
|
|
{
|
|
"epoch": 3.818763326226013,
|
|
"grad_norm": 0.01985727408960623,
|
|
"learning_rate": 6.491239635246709e-06,
|
|
"loss": 0.9479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2019175887107849,
|
|
"step": 897,
|
|
"valid_targets_mean": 13087.5,
|
|
"valid_targets_min": 11112
|
|
},
|
|
{
|
|
"epoch": 3.8230277185501067,
|
|
"grad_norm": 0.0202018257617995,
|
|
"learning_rate": 6.447464682162143e-06,
|
|
"loss": 0.9591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2563667297363281,
|
|
"step": 898,
|
|
"valid_targets_mean": 16222.2,
|
|
"valid_targets_min": 15811
|
|
},
|
|
{
|
|
"epoch": 3.8272921108742004,
|
|
"grad_norm": 0.020602664208626786,
|
|
"learning_rate": 6.403809449984704e-06,
|
|
"loss": 0.9464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27878546714782715,
|
|
"step": 899,
|
|
"valid_targets_mean": 16182.1,
|
|
"valid_targets_min": 15653
|
|
},
|
|
{
|
|
"epoch": 3.831556503198294,
|
|
"grad_norm": 0.018885260838559275,
|
|
"learning_rate": 6.3602743243576405e-06,
|
|
"loss": 0.9644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2239798903465271,
|
|
"step": 900,
|
|
"valid_targets_mean": 15295.7,
|
|
"valid_targets_min": 13928
|
|
},
|
|
{
|
|
"epoch": 3.835820895522388,
|
|
"grad_norm": 0.01936876358366128,
|
|
"learning_rate": 6.316859689863222e-06,
|
|
"loss": 0.9318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25518763065338135,
|
|
"step": 901,
|
|
"valid_targets_mean": 16097.6,
|
|
"valid_targets_min": 14311
|
|
},
|
|
{
|
|
"epoch": 3.840085287846482,
|
|
"grad_norm": 0.02056947824325605,
|
|
"learning_rate": 6.273565930019316e-06,
|
|
"loss": 0.9599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18020279705524445,
|
|
"step": 902,
|
|
"valid_targets_mean": 9883.0,
|
|
"valid_targets_min": 1799
|
|
},
|
|
{
|
|
"epoch": 3.8443496801705757,
|
|
"grad_norm": 0.019379436106792863,
|
|
"learning_rate": 6.230393427276e-06,
|
|
"loss": 0.9317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22614163160324097,
|
|
"step": 903,
|
|
"valid_targets_mean": 16042.1,
|
|
"valid_targets_min": 15095
|
|
},
|
|
{
|
|
"epoch": 3.8486140724946694,
|
|
"grad_norm": 0.01985940006281854,
|
|
"learning_rate": 6.187342563012198e-06,
|
|
"loss": 0.9178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2546388506889343,
|
|
"step": 904,
|
|
"valid_targets_mean": 16200.4,
|
|
"valid_targets_min": 15209
|
|
},
|
|
{
|
|
"epoch": 3.8528784648187635,
|
|
"grad_norm": 0.020858167990603856,
|
|
"learning_rate": 6.144413717532269e-06,
|
|
"loss": 0.9229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12831124663352966,
|
|
"step": 905,
|
|
"valid_targets_mean": 7844.1,
|
|
"valid_targets_min": 1762
|
|
},
|
|
{
|
|
"epoch": 3.857142857142857,
|
|
"grad_norm": 0.020299734316331298,
|
|
"learning_rate": 6.1016072700627106e-06,
|
|
"loss": 0.9632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21661680936813354,
|
|
"step": 906,
|
|
"valid_targets_mean": 16204.3,
|
|
"valid_targets_min": 15024
|
|
},
|
|
{
|
|
"epoch": 3.861407249466951,
|
|
"grad_norm": 0.020970201362039045,
|
|
"learning_rate": 6.058923598748756e-06,
|
|
"loss": 0.934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23938333988189697,
|
|
"step": 907,
|
|
"valid_targets_mean": 16184.8,
|
|
"valid_targets_min": 15327
|
|
},
|
|
{
|
|
"epoch": 3.8656716417910446,
|
|
"grad_norm": 0.0199014864871677,
|
|
"learning_rate": 6.016363080651066e-06,
|
|
"loss": 0.9581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21135063469409943,
|
|
"step": 908,
|
|
"valid_targets_mean": 12468.2,
|
|
"valid_targets_min": 9516
|
|
},
|
|
{
|
|
"epoch": 3.8699360341151388,
|
|
"grad_norm": 0.019922682556435267,
|
|
"learning_rate": 5.973926091742386e-06,
|
|
"loss": 0.9557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2579787075519562,
|
|
"step": 909,
|
|
"valid_targets_mean": 15898.5,
|
|
"valid_targets_min": 10543
|
|
},
|
|
{
|
|
"epoch": 3.8742004264392325,
|
|
"grad_norm": 0.020752840332169897,
|
|
"learning_rate": 5.931613006904196e-06,
|
|
"loss": 0.9072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25384050607681274,
|
|
"step": 910,
|
|
"valid_targets_mean": 16247.6,
|
|
"valid_targets_min": 15822
|
|
},
|
|
{
|
|
"epoch": 3.878464818763326,
|
|
"grad_norm": 0.018141803261086352,
|
|
"learning_rate": 5.889424199923473e-06,
|
|
"loss": 0.925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20717021822929382,
|
|
"step": 911,
|
|
"valid_targets_mean": 15147.2,
|
|
"valid_targets_min": 13818
|
|
},
|
|
{
|
|
"epoch": 3.88272921108742,
|
|
"grad_norm": 0.020676204789153763,
|
|
"learning_rate": 5.847360043489318e-06,
|
|
"loss": 0.9779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2605384588241577,
|
|
"step": 912,
|
|
"valid_targets_mean": 16017.9,
|
|
"valid_targets_min": 12845
|
|
},
|
|
{
|
|
"epoch": 3.886993603411514,
|
|
"grad_norm": 0.021030128039929593,
|
|
"learning_rate": 5.805420909189683e-06,
|
|
"loss": 0.9203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22057777643203735,
|
|
"step": 913,
|
|
"valid_targets_mean": 13012.9,
|
|
"valid_targets_min": 1458
|
|
},
|
|
{
|
|
"epoch": 3.8912579957356077,
|
|
"grad_norm": 0.018687977471461496,
|
|
"learning_rate": 5.7636071675081076e-06,
|
|
"loss": 0.947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2274516224861145,
|
|
"step": 914,
|
|
"valid_targets_mean": 15339.4,
|
|
"valid_targets_min": 13991
|
|
},
|
|
{
|
|
"epoch": 3.8955223880597014,
|
|
"grad_norm": 0.020972566548263225,
|
|
"learning_rate": 5.721919187820431e-06,
|
|
"loss": 0.9495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2706483006477356,
|
|
"step": 915,
|
|
"valid_targets_mean": 16154.9,
|
|
"valid_targets_min": 14656
|
|
},
|
|
{
|
|
"epoch": 3.8997867803837956,
|
|
"grad_norm": 0.021602443624440373,
|
|
"learning_rate": 5.6803573383915265e-06,
|
|
"loss": 0.9422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16196952760219574,
|
|
"step": 916,
|
|
"valid_targets_mean": 8931.5,
|
|
"valid_targets_min": 2223
|
|
},
|
|
{
|
|
"epoch": 3.9040511727078893,
|
|
"grad_norm": 0.019327614272166865,
|
|
"learning_rate": 5.638921986372064e-06,
|
|
"loss": 0.9628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21917498111724854,
|
|
"step": 917,
|
|
"valid_targets_mean": 16214.6,
|
|
"valid_targets_min": 15477
|
|
},
|
|
{
|
|
"epoch": 3.908315565031983,
|
|
"grad_norm": 0.020310064086023465,
|
|
"learning_rate": 5.5976134977952315e-06,
|
|
"loss": 0.9607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2687813937664032,
|
|
"step": 918,
|
|
"valid_targets_mean": 16166.4,
|
|
"valid_targets_min": 15189
|
|
},
|
|
{
|
|
"epoch": 3.9125799573560767,
|
|
"grad_norm": 0.01887915647256864,
|
|
"learning_rate": 5.556432237573564e-06,
|
|
"loss": 0.9402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18910843133926392,
|
|
"step": 919,
|
|
"valid_targets_mean": 11884.9,
|
|
"valid_targets_min": 7361
|
|
},
|
|
{
|
|
"epoch": 3.9168443496801704,
|
|
"grad_norm": 0.018925951684479053,
|
|
"learning_rate": 5.5153785694956416e-06,
|
|
"loss": 0.9093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22159405052661896,
|
|
"step": 920,
|
|
"valid_targets_mean": 16231.0,
|
|
"valid_targets_min": 15518
|
|
},
|
|
{
|
|
"epoch": 3.9211087420042645,
|
|
"grad_norm": 0.02050913800726315,
|
|
"learning_rate": 5.474452856222942e-06,
|
|
"loss": 0.9866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31141167879104614,
|
|
"step": 921,
|
|
"valid_targets_mean": 16215.7,
|
|
"valid_targets_min": 15798
|
|
},
|
|
{
|
|
"epoch": 3.925373134328358,
|
|
"grad_norm": 0.0195014685651669,
|
|
"learning_rate": 5.433655459286611e-06,
|
|
"loss": 0.9387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2009022831916809,
|
|
"step": 922,
|
|
"valid_targets_mean": 13356.7,
|
|
"valid_targets_min": 4627
|
|
},
|
|
{
|
|
"epoch": 3.929637526652452,
|
|
"grad_norm": 0.0201673913176739,
|
|
"learning_rate": 5.392986739084238e-06,
|
|
"loss": 0.97,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2672134041786194,
|
|
"step": 923,
|
|
"valid_targets_mean": 16151.2,
|
|
"valid_targets_min": 15399
|
|
},
|
|
{
|
|
"epoch": 3.933901918976546,
|
|
"grad_norm": 0.020206273649843613,
|
|
"learning_rate": 5.352447054876755e-06,
|
|
"loss": 0.9456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2632061243057251,
|
|
"step": 924,
|
|
"valid_targets_mean": 16230.8,
|
|
"valid_targets_min": 15333
|
|
},
|
|
{
|
|
"epoch": 3.9381663113006398,
|
|
"grad_norm": 0.018925170452307166,
|
|
"learning_rate": 5.31203676478516e-06,
|
|
"loss": 0.9408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22370021045207977,
|
|
"step": 925,
|
|
"valid_targets_mean": 14914.4,
|
|
"valid_targets_min": 13230
|
|
},
|
|
{
|
|
"epoch": 3.9424307036247335,
|
|
"grad_norm": 0.020110536031052215,
|
|
"learning_rate": 5.271756225787434e-06,
|
|
"loss": 0.9533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2864748239517212,
|
|
"step": 926,
|
|
"valid_targets_mean": 16142.2,
|
|
"valid_targets_min": 15503
|
|
},
|
|
{
|
|
"epoch": 3.946695095948827,
|
|
"grad_norm": 0.021286420212468706,
|
|
"learning_rate": 5.231605793715348e-06,
|
|
"loss": 0.9716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2008804976940155,
|
|
"step": 927,
|
|
"valid_targets_mean": 10368.6,
|
|
"valid_targets_min": 1108
|
|
},
|
|
{
|
|
"epoch": 3.950959488272921,
|
|
"grad_norm": 0.018356137372037225,
|
|
"learning_rate": 5.191585823251335e-06,
|
|
"loss": 0.9227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2315056324005127,
|
|
"step": 928,
|
|
"valid_targets_mean": 15598.7,
|
|
"valid_targets_min": 12180
|
|
},
|
|
{
|
|
"epoch": 3.955223880597015,
|
|
"grad_norm": 0.02062783659527409,
|
|
"learning_rate": 5.151696667925348e-06,
|
|
"loss": 0.9642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28864267468452454,
|
|
"step": 929,
|
|
"valid_targets_mean": 15863.9,
|
|
"valid_targets_min": 8340
|
|
},
|
|
{
|
|
"epoch": 3.9594882729211087,
|
|
"grad_norm": 0.02039134516059958,
|
|
"learning_rate": 5.111938680111732e-06,
|
|
"loss": 0.9275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15077480673789978,
|
|
"step": 930,
|
|
"valid_targets_mean": 8380.9,
|
|
"valid_targets_min": 2596
|
|
},
|
|
{
|
|
"epoch": 3.9637526652452024,
|
|
"grad_norm": 0.01771269354999458,
|
|
"learning_rate": 5.072312211026125e-06,
|
|
"loss": 0.9212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22803837060928345,
|
|
"step": 931,
|
|
"valid_targets_mean": 16135.7,
|
|
"valid_targets_min": 15176
|
|
},
|
|
{
|
|
"epoch": 3.9680170575692966,
|
|
"grad_norm": 0.020261584510011244,
|
|
"learning_rate": 5.032817610722369e-06,
|
|
"loss": 0.9158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24781104922294617,
|
|
"step": 932,
|
|
"valid_targets_mean": 16246.9,
|
|
"valid_targets_min": 15461
|
|
},
|
|
{
|
|
"epoch": 3.9722814498933903,
|
|
"grad_norm": 0.019669964350953445,
|
|
"learning_rate": 4.993455228089366e-06,
|
|
"loss": 0.918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18308615684509277,
|
|
"step": 933,
|
|
"valid_targets_mean": 12434.4,
|
|
"valid_targets_min": 8827
|
|
},
|
|
{
|
|
"epoch": 3.976545842217484,
|
|
"grad_norm": 0.020217888109120497,
|
|
"learning_rate": 4.954225410848048e-06,
|
|
"loss": 0.9985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2676087021827698,
|
|
"step": 934,
|
|
"valid_targets_mean": 16065.4,
|
|
"valid_targets_min": 14235
|
|
},
|
|
{
|
|
"epoch": 3.9808102345415777,
|
|
"grad_norm": 0.01983288467320496,
|
|
"learning_rate": 4.915128505548284e-06,
|
|
"loss": 0.9323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2623956799507141,
|
|
"step": 935,
|
|
"valid_targets_mean": 16177.4,
|
|
"valid_targets_min": 14992
|
|
},
|
|
{
|
|
"epoch": 3.9850746268656714,
|
|
"grad_norm": 0.018765559818772336,
|
|
"learning_rate": 4.8761648575658145e-06,
|
|
"loss": 0.9186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20636151731014252,
|
|
"step": 936,
|
|
"valid_targets_mean": 14474.0,
|
|
"valid_targets_min": 12286
|
|
},
|
|
{
|
|
"epoch": 3.9893390191897655,
|
|
"grad_norm": 0.021606738450663753,
|
|
"learning_rate": 4.837334811099217e-06,
|
|
"loss": 0.971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2978854775428772,
|
|
"step": 937,
|
|
"valid_targets_mean": 16000.5,
|
|
"valid_targets_min": 14018
|
|
},
|
|
{
|
|
"epoch": 3.9936034115138592,
|
|
"grad_norm": 0.019914481855883022,
|
|
"learning_rate": 4.7986387091668365e-06,
|
|
"loss": 0.9662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23050665855407715,
|
|
"step": 938,
|
|
"valid_targets_mean": 13337.4,
|
|
"valid_targets_min": 1770
|
|
},
|
|
{
|
|
"epoch": 3.997867803837953,
|
|
"grad_norm": 0.01785249742537866,
|
|
"learning_rate": 4.760076893603791e-06,
|
|
"loss": 0.9051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22730733454227448,
|
|
"step": 939,
|
|
"valid_targets_mean": 15949.0,
|
|
"valid_targets_min": 15004
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"grad_norm": 0.03133689173754819,
|
|
"learning_rate": 4.721649705058926e-06,
|
|
"loss": 0.9355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41851842403411865,
|
|
"step": 940,
|
|
"valid_targets_mean": 11365.5,
|
|
"valid_targets_min": 1530
|
|
},
|
|
{
|
|
"epoch": 4.004264392324094,
|
|
"grad_norm": 0.01851198991302267,
|
|
"learning_rate": 4.683357482991819e-06,
|
|
"loss": 0.9041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23511791229248047,
|
|
"step": 941,
|
|
"valid_targets_mean": 15964.9,
|
|
"valid_targets_min": 15165
|
|
},
|
|
{
|
|
"epoch": 4.008528784648187,
|
|
"grad_norm": 0.020708177935980277,
|
|
"learning_rate": 4.645200565669776e-06,
|
|
"loss": 0.9725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29803574085235596,
|
|
"step": 942,
|
|
"valid_targets_mean": 16060.5,
|
|
"valid_targets_min": 13650
|
|
},
|
|
{
|
|
"epoch": 4.0127931769722816,
|
|
"grad_norm": 0.019002529440224743,
|
|
"learning_rate": 4.607179290164823e-06,
|
|
"loss": 0.9361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19187182188034058,
|
|
"step": 943,
|
|
"valid_targets_mean": 13377.0,
|
|
"valid_targets_min": 9636
|
|
},
|
|
{
|
|
"epoch": 4.017057569296376,
|
|
"grad_norm": 0.019382554256365987,
|
|
"learning_rate": 4.569293992350783e-06,
|
|
"loss": 0.9393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2630158066749573,
|
|
"step": 944,
|
|
"valid_targets_mean": 16148.1,
|
|
"valid_targets_min": 14583
|
|
},
|
|
{
|
|
"epoch": 4.021321961620469,
|
|
"grad_norm": 0.02003293645282662,
|
|
"learning_rate": 4.531545006900244e-06,
|
|
"loss": 0.9269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27318400144577026,
|
|
"step": 945,
|
|
"valid_targets_mean": 16143.1,
|
|
"valid_targets_min": 15394
|
|
},
|
|
{
|
|
"epoch": 4.025586353944563,
|
|
"grad_norm": 0.018330575208918284,
|
|
"learning_rate": 4.493932667281646e-06,
|
|
"loss": 0.9443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20896703004837036,
|
|
"step": 946,
|
|
"valid_targets_mean": 14745.2,
|
|
"valid_targets_min": 12492
|
|
},
|
|
{
|
|
"epoch": 4.029850746268656,
|
|
"grad_norm": 0.019251349642787213,
|
|
"learning_rate": 4.456457305756321e-06,
|
|
"loss": 0.9698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2675608992576599,
|
|
"step": 947,
|
|
"valid_targets_mean": 16077.5,
|
|
"valid_targets_min": 15241
|
|
},
|
|
{
|
|
"epoch": 4.0341151385927505,
|
|
"grad_norm": 0.020144404823965494,
|
|
"learning_rate": 4.419119253375557e-06,
|
|
"loss": 0.9356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2502109408378601,
|
|
"step": 948,
|
|
"valid_targets_mean": 12985.3,
|
|
"valid_targets_min": 2610
|
|
},
|
|
{
|
|
"epoch": 4.038379530916845,
|
|
"grad_norm": 0.01901521226096702,
|
|
"learning_rate": 4.381918839977675e-06,
|
|
"loss": 0.9429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22387802600860596,
|
|
"step": 949,
|
|
"valid_targets_mean": 15468.7,
|
|
"valid_targets_min": 14270
|
|
},
|
|
{
|
|
"epoch": 4.042643923240938,
|
|
"grad_norm": 0.02034896091974211,
|
|
"learning_rate": 4.344856394185122e-06,
|
|
"loss": 0.9609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2930688261985779,
|
|
"step": 950,
|
|
"valid_targets_mean": 16203.9,
|
|
"valid_targets_min": 15461
|
|
},
|
|
{
|
|
"epoch": 4.046908315565032,
|
|
"grad_norm": 0.02041439591537469,
|
|
"learning_rate": 4.307932243401538e-06,
|
|
"loss": 0.9833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17511054873466492,
|
|
"step": 951,
|
|
"valid_targets_mean": 9410.5,
|
|
"valid_targets_min": 1441
|
|
},
|
|
{
|
|
"epoch": 4.051172707889126,
|
|
"grad_norm": 0.01881302505377758,
|
|
"learning_rate": 4.271146713808927e-06,
|
|
"loss": 0.912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2191697061061859,
|
|
"step": 952,
|
|
"valid_targets_mean": 15749.8,
|
|
"valid_targets_min": 13757
|
|
},
|
|
{
|
|
"epoch": 4.0554371002132195,
|
|
"grad_norm": 0.020791304361025123,
|
|
"learning_rate": 4.234500130364698e-06,
|
|
"loss": 0.9888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2906889319419861,
|
|
"step": 953,
|
|
"valid_targets_mean": 16113.4,
|
|
"valid_targets_min": 15022
|
|
},
|
|
{
|
|
"epoch": 4.059701492537314,
|
|
"grad_norm": 0.019114805676627586,
|
|
"learning_rate": 4.197992816798851e-06,
|
|
"loss": 0.9374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15856197476387024,
|
|
"step": 954,
|
|
"valid_targets_mean": 10371.4,
|
|
"valid_targets_min": 5470
|
|
},
|
|
{
|
|
"epoch": 4.063965884861407,
|
|
"grad_norm": 0.019271598505117338,
|
|
"learning_rate": 4.161625095611101e-06,
|
|
"loss": 0.9553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2697962522506714,
|
|
"step": 955,
|
|
"valid_targets_mean": 16006.1,
|
|
"valid_targets_min": 14672
|
|
},
|
|
{
|
|
"epoch": 4.068230277185501,
|
|
"grad_norm": 0.020279689337150263,
|
|
"learning_rate": 4.125397288068007e-06,
|
|
"loss": 0.9419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26132532954216003,
|
|
"step": 956,
|
|
"valid_targets_mean": 16058.7,
|
|
"valid_targets_min": 14985
|
|
},
|
|
{
|
|
"epoch": 4.072494669509595,
|
|
"grad_norm": 0.019535721768024136,
|
|
"learning_rate": 4.089309714200187e-06,
|
|
"loss": 0.9605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20936521887779236,
|
|
"step": 957,
|
|
"valid_targets_mean": 13024.5,
|
|
"valid_targets_min": 10278
|
|
},
|
|
{
|
|
"epoch": 4.076759061833688,
|
|
"grad_norm": 0.01988899415587063,
|
|
"learning_rate": 4.0533626927994185e-06,
|
|
"loss": 0.9685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2595033645629883,
|
|
"step": 958,
|
|
"valid_targets_mean": 16155.5,
|
|
"valid_targets_min": 14780
|
|
},
|
|
{
|
|
"epoch": 4.081023454157783,
|
|
"grad_norm": 0.020518116171747425,
|
|
"learning_rate": 4.017556541415888e-06,
|
|
"loss": 0.9602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2741844058036804,
|
|
"step": 959,
|
|
"valid_targets_mean": 16128.8,
|
|
"valid_targets_min": 15284
|
|
},
|
|
{
|
|
"epoch": 4.085287846481877,
|
|
"grad_norm": 0.019373297288481858,
|
|
"learning_rate": 3.981891576355352e-06,
|
|
"loss": 0.9746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20977728068828583,
|
|
"step": 960,
|
|
"valid_targets_mean": 14693.7,
|
|
"valid_targets_min": 12216
|
|
},
|
|
{
|
|
"epoch": 4.08955223880597,
|
|
"grad_norm": 0.02034551415714717,
|
|
"learning_rate": 3.946368112676346e-06,
|
|
"loss": 0.8977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2720681428909302,
|
|
"step": 961,
|
|
"valid_targets_mean": 16112.5,
|
|
"valid_targets_min": 15147
|
|
},
|
|
{
|
|
"epoch": 4.093816631130064,
|
|
"grad_norm": 0.020256333944331892,
|
|
"learning_rate": 3.9109864641874166e-06,
|
|
"loss": 0.9132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1881796419620514,
|
|
"step": 962,
|
|
"valid_targets_mean": 11331.5,
|
|
"valid_targets_min": 1945
|
|
},
|
|
{
|
|
"epoch": 4.098081023454157,
|
|
"grad_norm": 0.01880104633804974,
|
|
"learning_rate": 3.875746943444316e-06,
|
|
"loss": 0.9337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23153170943260193,
|
|
"step": 963,
|
|
"valid_targets_mean": 15332.2,
|
|
"valid_targets_min": 13890
|
|
},
|
|
{
|
|
"epoch": 4.1023454157782515,
|
|
"grad_norm": 0.02094933826031041,
|
|
"learning_rate": 3.840649861747278e-06,
|
|
"loss": 0.9623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2670443058013916,
|
|
"step": 964,
|
|
"valid_targets_mean": 16179.3,
|
|
"valid_targets_min": 15431
|
|
},
|
|
{
|
|
"epoch": 4.106609808102346,
|
|
"grad_norm": 0.020134098860842903,
|
|
"learning_rate": 3.8056955291382667e-06,
|
|
"loss": 0.9776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1415679156780243,
|
|
"step": 965,
|
|
"valid_targets_mean": 8255.3,
|
|
"valid_targets_min": 1798
|
|
},
|
|
{
|
|
"epoch": 4.110874200426439,
|
|
"grad_norm": 0.018321105716060028,
|
|
"learning_rate": 3.7708842543981928e-06,
|
|
"loss": 0.9506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24137112498283386,
|
|
"step": 966,
|
|
"valid_targets_mean": 16137.9,
|
|
"valid_targets_min": 14908
|
|
},
|
|
{
|
|
"epoch": 4.115138592750533,
|
|
"grad_norm": 0.020316004716552306,
|
|
"learning_rate": 3.736216345044237e-06,
|
|
"loss": 0.9295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2848181426525116,
|
|
"step": 967,
|
|
"valid_targets_mean": 16079.9,
|
|
"valid_targets_min": 13812
|
|
},
|
|
{
|
|
"epoch": 4.119402985074627,
|
|
"grad_norm": 0.019550064678395988,
|
|
"learning_rate": 3.7016921073271084e-06,
|
|
"loss": 0.9707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20551353693008423,
|
|
"step": 968,
|
|
"valid_targets_mean": 12475.4,
|
|
"valid_targets_min": 8221
|
|
},
|
|
{
|
|
"epoch": 4.1236673773987205,
|
|
"grad_norm": 0.019910875975105293,
|
|
"learning_rate": 3.6673118462283453e-06,
|
|
"loss": 0.9428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24119476974010468,
|
|
"step": 969,
|
|
"valid_targets_mean": 16200.9,
|
|
"valid_targets_min": 15736
|
|
},
|
|
{
|
|
"epoch": 4.127931769722815,
|
|
"grad_norm": 0.020936656726602422,
|
|
"learning_rate": 3.6330758654576227e-06,
|
|
"loss": 0.9321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27154016494750977,
|
|
"step": 970,
|
|
"valid_targets_mean": 16181.5,
|
|
"valid_targets_min": 14773
|
|
},
|
|
{
|
|
"epoch": 4.132196162046908,
|
|
"grad_norm": 0.0193752126978359,
|
|
"learning_rate": 3.598984467450055e-06,
|
|
"loss": 0.9721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22747036814689636,
|
|
"step": 971,
|
|
"valid_targets_mean": 14347.8,
|
|
"valid_targets_min": 12960
|
|
},
|
|
{
|
|
"epoch": 4.136460554371002,
|
|
"grad_norm": 0.019805545736292503,
|
|
"learning_rate": 3.565037953363546e-06,
|
|
"loss": 0.9688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2719014883041382,
|
|
"step": 972,
|
|
"valid_targets_mean": 16163.3,
|
|
"valid_targets_min": 15618
|
|
},
|
|
{
|
|
"epoch": 4.140724946695096,
|
|
"grad_norm": 0.020140326435135344,
|
|
"learning_rate": 3.5312366230761154e-06,
|
|
"loss": 0.9698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22768893837928772,
|
|
"step": 973,
|
|
"valid_targets_mean": 13224.1,
|
|
"valid_targets_min": 3308
|
|
},
|
|
{
|
|
"epoch": 4.144989339019189,
|
|
"grad_norm": 0.018780227480591393,
|
|
"learning_rate": 3.497580775183258e-06,
|
|
"loss": 0.9134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2152576446533203,
|
|
"step": 974,
|
|
"valid_targets_mean": 15624.3,
|
|
"valid_targets_min": 14317
|
|
},
|
|
{
|
|
"epoch": 4.149253731343284,
|
|
"grad_norm": 0.020098552793916362,
|
|
"learning_rate": 3.464070706995295e-06,
|
|
"loss": 0.9683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29875028133392334,
|
|
"step": 975,
|
|
"valid_targets_mean": 16016.7,
|
|
"valid_targets_min": 14349
|
|
},
|
|
{
|
|
"epoch": 4.153518123667378,
|
|
"grad_norm": 0.021203612334944222,
|
|
"learning_rate": 3.4307067145347417e-06,
|
|
"loss": 0.9597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16463643312454224,
|
|
"step": 976,
|
|
"valid_targets_mean": 8657.0,
|
|
"valid_targets_min": 1711
|
|
},
|
|
{
|
|
"epoch": 4.157782515991471,
|
|
"grad_norm": 0.018538554727998147,
|
|
"learning_rate": 3.397489092533739e-06,
|
|
"loss": 0.9736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24350497126579285,
|
|
"step": 977,
|
|
"valid_targets_mean": 16030.3,
|
|
"valid_targets_min": 15557
|
|
},
|
|
{
|
|
"epoch": 4.162046908315565,
|
|
"grad_norm": 0.020448226619210357,
|
|
"learning_rate": 3.364418134431371e-06,
|
|
"loss": 0.9737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28747472167015076,
|
|
"step": 978,
|
|
"valid_targets_mean": 16065.1,
|
|
"valid_targets_min": 15187
|
|
},
|
|
{
|
|
"epoch": 4.166311300639659,
|
|
"grad_norm": 0.020782494759939336,
|
|
"learning_rate": 3.331494132371149e-06,
|
|
"loss": 0.9251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17768849432468414,
|
|
"step": 979,
|
|
"valid_targets_mean": 10658.4,
|
|
"valid_targets_min": 5140
|
|
},
|
|
{
|
|
"epoch": 4.1705756929637525,
|
|
"grad_norm": 0.019615478508686855,
|
|
"learning_rate": 3.2987173771983816e-06,
|
|
"loss": 0.9493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24159884452819824,
|
|
"step": 980,
|
|
"valid_targets_mean": 16103.1,
|
|
"valid_targets_min": 14573
|
|
},
|
|
{
|
|
"epoch": 4.174840085287847,
|
|
"grad_norm": 0.020701605551052646,
|
|
"learning_rate": 3.266088158457634e-06,
|
|
"loss": 0.9226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2901107370853424,
|
|
"step": 981,
|
|
"valid_targets_mean": 16209.4,
|
|
"valid_targets_min": 15512
|
|
},
|
|
{
|
|
"epoch": 4.17910447761194,
|
|
"grad_norm": 0.019091127444802635,
|
|
"learning_rate": 3.233606764390147e-06,
|
|
"loss": 0.9389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21149133145809174,
|
|
"step": 982,
|
|
"valid_targets_mean": 14157.6,
|
|
"valid_targets_min": 10318
|
|
},
|
|
{
|
|
"epoch": 4.183368869936034,
|
|
"grad_norm": 0.019407388103521186,
|
|
"learning_rate": 3.2012734819313127e-06,
|
|
"loss": 0.9029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2528495490550995,
|
|
"step": 983,
|
|
"valid_targets_mean": 16177.3,
|
|
"valid_targets_min": 15537
|
|
},
|
|
{
|
|
"epoch": 4.187633262260128,
|
|
"grad_norm": 0.020179313653401046,
|
|
"learning_rate": 3.1690885967081187e-06,
|
|
"loss": 0.9382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2716379165649414,
|
|
"step": 984,
|
|
"valid_targets_mean": 16195.2,
|
|
"valid_targets_min": 15190
|
|
},
|
|
{
|
|
"epoch": 4.1918976545842215,
|
|
"grad_norm": 0.01916574265436273,
|
|
"learning_rate": 3.1370523930366393e-06,
|
|
"loss": 0.938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21267694234848022,
|
|
"step": 985,
|
|
"valid_targets_mean": 15314.6,
|
|
"valid_targets_min": 13641
|
|
},
|
|
{
|
|
"epoch": 4.196162046908316,
|
|
"grad_norm": 0.020043703144088876,
|
|
"learning_rate": 3.105165153919525e-06,
|
|
"loss": 0.9926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3024527430534363,
|
|
"step": 986,
|
|
"valid_targets_mean": 16018.2,
|
|
"valid_targets_min": 12845
|
|
},
|
|
{
|
|
"epoch": 4.20042643923241,
|
|
"grad_norm": 0.020254227267025506,
|
|
"learning_rate": 3.073427161043492e-06,
|
|
"loss": 0.9609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1856076717376709,
|
|
"step": 987,
|
|
"valid_targets_mean": 10659.9,
|
|
"valid_targets_min": 1144
|
|
},
|
|
{
|
|
"epoch": 4.204690831556503,
|
|
"grad_norm": 0.018878251074499083,
|
|
"learning_rate": 3.0418386947768463e-06,
|
|
"loss": 0.9578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2362675815820694,
|
|
"step": 988,
|
|
"valid_targets_mean": 15743.0,
|
|
"valid_targets_min": 15003
|
|
},
|
|
{
|
|
"epoch": 4.208955223880597,
|
|
"grad_norm": 0.01990760246769576,
|
|
"learning_rate": 3.01040003416698e-06,
|
|
"loss": 0.994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2868589162826538,
|
|
"step": 989,
|
|
"valid_targets_mean": 16090.3,
|
|
"valid_targets_min": 14502
|
|
},
|
|
{
|
|
"epoch": 4.21321961620469,
|
|
"grad_norm": 0.019676727478791903,
|
|
"learning_rate": 2.97911145693796e-06,
|
|
"loss": 0.9038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1564481556415558,
|
|
"step": 990,
|
|
"valid_targets_mean": 8862.3,
|
|
"valid_targets_min": 1491
|
|
},
|
|
{
|
|
"epoch": 4.217484008528785,
|
|
"grad_norm": 0.018744007057569526,
|
|
"learning_rate": 2.947973239488009e-06,
|
|
"loss": 0.958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25635114312171936,
|
|
"step": 991,
|
|
"valid_targets_mean": 15967.7,
|
|
"valid_targets_min": 13388
|
|
},
|
|
{
|
|
"epoch": 4.221748400852879,
|
|
"grad_norm": 0.021332341929546018,
|
|
"learning_rate": 2.91698565688711e-06,
|
|
"loss": 0.985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2885449528694153,
|
|
"step": 992,
|
|
"valid_targets_mean": 16077.7,
|
|
"valid_targets_min": 13943
|
|
},
|
|
{
|
|
"epoch": 4.226012793176972,
|
|
"grad_norm": 0.020157224661857626,
|
|
"learning_rate": 2.886148982874566e-06,
|
|
"loss": 0.906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1708824783563614,
|
|
"step": 993,
|
|
"valid_targets_mean": 12403.5,
|
|
"valid_targets_min": 10756
|
|
},
|
|
{
|
|
"epoch": 4.230277185501066,
|
|
"grad_norm": 0.01918973375733235,
|
|
"learning_rate": 2.8554634898565668e-06,
|
|
"loss": 0.947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27376455068588257,
|
|
"step": 994,
|
|
"valid_targets_mean": 16102.2,
|
|
"valid_targets_min": 15327
|
|
},
|
|
{
|
|
"epoch": 4.23454157782516,
|
|
"grad_norm": 0.020178299434711753,
|
|
"learning_rate": 2.824929448903806e-06,
|
|
"loss": 0.9081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2676343321800232,
|
|
"step": 995,
|
|
"valid_targets_mean": 16181.3,
|
|
"valid_targets_min": 14817
|
|
},
|
|
{
|
|
"epoch": 4.2388059701492535,
|
|
"grad_norm": 0.018129015337045525,
|
|
"learning_rate": 2.794547129749059e-06,
|
|
"loss": 0.9204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2107650637626648,
|
|
"step": 996,
|
|
"valid_targets_mean": 14244.8,
|
|
"valid_targets_min": 11995
|
|
},
|
|
{
|
|
"epoch": 4.243070362473348,
|
|
"grad_norm": 0.019779689302189523,
|
|
"learning_rate": 2.7643168007848255e-06,
|
|
"loss": 0.9191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2537640333175659,
|
|
"step": 997,
|
|
"valid_targets_mean": 16111.2,
|
|
"valid_targets_min": 15227
|
|
},
|
|
{
|
|
"epoch": 4.247334754797441,
|
|
"grad_norm": 0.02050204589685071,
|
|
"learning_rate": 2.734238729060956e-06,
|
|
"loss": 0.9416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19075614213943481,
|
|
"step": 998,
|
|
"valid_targets_mean": 13332.4,
|
|
"valid_targets_min": 1700
|
|
},
|
|
{
|
|
"epoch": 4.251599147121535,
|
|
"grad_norm": 0.019120076847851688,
|
|
"learning_rate": 2.7043131802822653e-06,
|
|
"loss": 0.9347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2312447428703308,
|
|
"step": 999,
|
|
"valid_targets_mean": 15897.9,
|
|
"valid_targets_min": 14742
|
|
},
|
|
{
|
|
"epoch": 4.255863539445629,
|
|
"grad_norm": 0.022391333648321896,
|
|
"learning_rate": 2.674540418806222e-06,
|
|
"loss": 1.0074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2841699719429016,
|
|
"step": 1000,
|
|
"valid_targets_mean": 16178.8,
|
|
"valid_targets_min": 15355
|
|
},
|
|
{
|
|
"epoch": 4.2601279317697225,
|
|
"grad_norm": 0.02076874296787263,
|
|
"learning_rate": 2.6449207076405857e-06,
|
|
"loss": 0.9552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1751863956451416,
|
|
"step": 1001,
|
|
"valid_targets_mean": 9174.9,
|
|
"valid_targets_min": 1530
|
|
},
|
|
{
|
|
"epoch": 4.264392324093817,
|
|
"grad_norm": 0.01830283877088007,
|
|
"learning_rate": 2.6154543084411035e-06,
|
|
"loss": 0.9345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22500377893447876,
|
|
"step": 1002,
|
|
"valid_targets_mean": 16115.2,
|
|
"valid_targets_min": 15329
|
|
},
|
|
{
|
|
"epoch": 4.268656716417911,
|
|
"grad_norm": 0.021275644853210155,
|
|
"learning_rate": 2.5861414815091834e-06,
|
|
"loss": 0.948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2887324094772339,
|
|
"step": 1003,
|
|
"valid_targets_mean": 16110.2,
|
|
"valid_targets_min": 15078
|
|
},
|
|
{
|
|
"epoch": 4.272921108742004,
|
|
"grad_norm": 0.018977062937362458,
|
|
"learning_rate": 2.5569824857895987e-06,
|
|
"loss": 0.8978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1514780968427658,
|
|
"step": 1004,
|
|
"valid_targets_mean": 10638.2,
|
|
"valid_targets_min": 5783
|
|
},
|
|
{
|
|
"epoch": 4.277185501066098,
|
|
"grad_norm": 0.01927458600962972,
|
|
"learning_rate": 2.5279775788682083e-06,
|
|
"loss": 0.943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25429248809814453,
|
|
"step": 1005,
|
|
"valid_targets_mean": 16120.9,
|
|
"valid_targets_min": 14989
|
|
},
|
|
{
|
|
"epoch": 4.281449893390192,
|
|
"grad_norm": 0.020481333757097736,
|
|
"learning_rate": 2.499127016969671e-06,
|
|
"loss": 0.912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2850632667541504,
|
|
"step": 1006,
|
|
"valid_targets_mean": 16069.5,
|
|
"valid_targets_min": 13650
|
|
},
|
|
{
|
|
"epoch": 4.285714285714286,
|
|
"grad_norm": 0.01873842417783604,
|
|
"learning_rate": 2.4704310549551934e-06,
|
|
"loss": 0.976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23339056968688965,
|
|
"step": 1007,
|
|
"valid_targets_mean": 15073.2,
|
|
"valid_targets_min": 13082
|
|
},
|
|
{
|
|
"epoch": 4.28997867803838,
|
|
"grad_norm": 0.019609226784137238,
|
|
"learning_rate": 2.441889946320266e-06,
|
|
"loss": 0.9143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24752402305603027,
|
|
"step": 1008,
|
|
"valid_targets_mean": 16160.9,
|
|
"valid_targets_min": 15423
|
|
},
|
|
{
|
|
"epoch": 4.294243070362473,
|
|
"grad_norm": 0.021297488256979004,
|
|
"learning_rate": 2.4135039431924233e-06,
|
|
"loss": 0.9752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32217937707901,
|
|
"step": 1009,
|
|
"valid_targets_mean": 16106.0,
|
|
"valid_targets_min": 15008
|
|
},
|
|
{
|
|
"epoch": 4.298507462686567,
|
|
"grad_norm": 0.018730018918134136,
|
|
"learning_rate": 2.3852732963290426e-06,
|
|
"loss": 0.9009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21674010157585144,
|
|
"step": 1010,
|
|
"valid_targets_mean": 15384.8,
|
|
"valid_targets_min": 14002
|
|
},
|
|
{
|
|
"epoch": 4.302771855010661,
|
|
"grad_norm": 0.01948387792165474,
|
|
"learning_rate": 2.3571982551150853e-06,
|
|
"loss": 0.9494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27372682094573975,
|
|
"step": 1011,
|
|
"valid_targets_mean": 16193.6,
|
|
"valid_targets_min": 15132
|
|
},
|
|
{
|
|
"epoch": 4.3070362473347545,
|
|
"grad_norm": 0.01999266350523194,
|
|
"learning_rate": 2.329279067560937e-06,
|
|
"loss": 0.9415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18281897902488708,
|
|
"step": 1012,
|
|
"valid_targets_mean": 11232.1,
|
|
"valid_targets_min": 2660
|
|
},
|
|
{
|
|
"epoch": 4.311300639658849,
|
|
"grad_norm": 0.018821194189866906,
|
|
"learning_rate": 2.301515980300182e-06,
|
|
"loss": 0.914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.207330584526062,
|
|
"step": 1013,
|
|
"valid_targets_mean": 15675.3,
|
|
"valid_targets_min": 14290
|
|
},
|
|
{
|
|
"epoch": 4.315565031982943,
|
|
"grad_norm": 0.021789786099465195,
|
|
"learning_rate": 2.2739092385874527e-06,
|
|
"loss": 0.9829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30179405212402344,
|
|
"step": 1014,
|
|
"valid_targets_mean": 15975.0,
|
|
"valid_targets_min": 12845
|
|
},
|
|
{
|
|
"epoch": 4.319829424307036,
|
|
"grad_norm": 0.019915079668746928,
|
|
"learning_rate": 2.2464590862962443e-06,
|
|
"loss": 0.9811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15248794853687286,
|
|
"step": 1015,
|
|
"valid_targets_mean": 9898.7,
|
|
"valid_targets_min": 1786
|
|
},
|
|
{
|
|
"epoch": 4.32409381663113,
|
|
"grad_norm": 0.018051755482265043,
|
|
"learning_rate": 2.219165765916769e-06,
|
|
"loss": 0.9265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23914167284965515,
|
|
"step": 1016,
|
|
"valid_targets_mean": 16139.6,
|
|
"valid_targets_min": 15284
|
|
},
|
|
{
|
|
"epoch": 4.3283582089552235,
|
|
"grad_norm": 0.019715851628171518,
|
|
"learning_rate": 2.192029518553798e-06,
|
|
"loss": 0.9487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26288068294525146,
|
|
"step": 1017,
|
|
"valid_targets_mean": 16168.2,
|
|
"valid_targets_min": 15185
|
|
},
|
|
{
|
|
"epoch": 4.332622601279318,
|
|
"grad_norm": 0.01929542912894581,
|
|
"learning_rate": 2.165050583924566e-06,
|
|
"loss": 0.9403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16973882913589478,
|
|
"step": 1018,
|
|
"valid_targets_mean": 10906.1,
|
|
"valid_targets_min": 8025
|
|
},
|
|
{
|
|
"epoch": 4.336886993603412,
|
|
"grad_norm": 0.01864997162138354,
|
|
"learning_rate": 2.1382292003566163e-06,
|
|
"loss": 0.9313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2570291757583618,
|
|
"step": 1019,
|
|
"valid_targets_mean": 16032.0,
|
|
"valid_targets_min": 13562
|
|
},
|
|
{
|
|
"epoch": 4.341151385927505,
|
|
"grad_norm": 0.021617010416131465,
|
|
"learning_rate": 2.1115656047857213e-06,
|
|
"loss": 0.9402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2757907807826996,
|
|
"step": 1020,
|
|
"valid_targets_mean": 16194.7,
|
|
"valid_targets_min": 15499
|
|
},
|
|
{
|
|
"epoch": 4.345415778251599,
|
|
"grad_norm": 0.018869530948784012,
|
|
"learning_rate": 2.0850600327537806e-06,
|
|
"loss": 0.9478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22190511226654053,
|
|
"step": 1021,
|
|
"valid_targets_mean": 14970.5,
|
|
"valid_targets_min": 12944
|
|
},
|
|
{
|
|
"epoch": 4.349680170575693,
|
|
"grad_norm": 0.020682322360513206,
|
|
"learning_rate": 2.058712718406719e-06,
|
|
"loss": 0.9257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2609671950340271,
|
|
"step": 1022,
|
|
"valid_targets_mean": 16127.3,
|
|
"valid_targets_min": 14336
|
|
},
|
|
{
|
|
"epoch": 4.353944562899787,
|
|
"grad_norm": 0.01925113797376703,
|
|
"learning_rate": 2.032523894492471e-06,
|
|
"loss": 0.9224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20982250571250916,
|
|
"step": 1023,
|
|
"valid_targets_mean": 13785.0,
|
|
"valid_targets_min": 1398
|
|
},
|
|
{
|
|
"epoch": 4.358208955223881,
|
|
"grad_norm": 0.01824307429681632,
|
|
"learning_rate": 2.0064937923588634e-06,
|
|
"loss": 0.9277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22576871514320374,
|
|
"step": 1024,
|
|
"valid_targets_mean": 15343.4,
|
|
"valid_targets_min": 13862
|
|
},
|
|
{
|
|
"epoch": 4.362473347547974,
|
|
"grad_norm": 0.02103756151736428,
|
|
"learning_rate": 1.9806226419516195e-06,
|
|
"loss": 0.9579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3052058219909668,
|
|
"step": 1025,
|
|
"valid_targets_mean": 16129.2,
|
|
"valid_targets_min": 15310
|
|
},
|
|
{
|
|
"epoch": 4.366737739872068,
|
|
"grad_norm": 0.01918587674521911,
|
|
"learning_rate": 1.954910671812298e-06,
|
|
"loss": 0.9024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1667296141386032,
|
|
"step": 1026,
|
|
"valid_targets_mean": 10905.8,
|
|
"valid_targets_min": 1982
|
|
},
|
|
{
|
|
"epoch": 4.371002132196162,
|
|
"grad_norm": 0.01807443588082017,
|
|
"learning_rate": 1.9293581090762894e-06,
|
|
"loss": 0.9121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2204025685787201,
|
|
"step": 1027,
|
|
"valid_targets_mean": 16147.0,
|
|
"valid_targets_min": 15444
|
|
},
|
|
{
|
|
"epoch": 4.3752665245202556,
|
|
"grad_norm": 0.01985421432149515,
|
|
"learning_rate": 1.9039651794708058e-06,
|
|
"loss": 0.9259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27803587913513184,
|
|
"step": 1028,
|
|
"valid_targets_mean": 16182.4,
|
|
"valid_targets_min": 15490
|
|
},
|
|
{
|
|
"epoch": 4.37953091684435,
|
|
"grad_norm": 0.01931159567366713,
|
|
"learning_rate": 1.8787321073128817e-06,
|
|
"loss": 0.9253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1606777161359787,
|
|
"step": 1029,
|
|
"valid_targets_mean": 11006.8,
|
|
"valid_targets_min": 6654
|
|
},
|
|
{
|
|
"epoch": 4.383795309168444,
|
|
"grad_norm": 0.018889975647310046,
|
|
"learning_rate": 1.8536591155073958e-06,
|
|
"loss": 0.9854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2571524381637573,
|
|
"step": 1030,
|
|
"valid_targets_mean": 16116.6,
|
|
"valid_targets_min": 14077
|
|
},
|
|
{
|
|
"epoch": 4.388059701492537,
|
|
"grad_norm": 0.020761191841908284,
|
|
"learning_rate": 1.8287464255451181e-06,
|
|
"loss": 0.9597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.302034854888916,
|
|
"step": 1031,
|
|
"valid_targets_mean": 16144.4,
|
|
"valid_targets_min": 15289
|
|
},
|
|
{
|
|
"epoch": 4.392324093816631,
|
|
"grad_norm": 0.020152254319662017,
|
|
"learning_rate": 1.803994257500714e-06,
|
|
"loss": 1.0041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2143762707710266,
|
|
"step": 1032,
|
|
"valid_targets_mean": 12591.1,
|
|
"valid_targets_min": 9857
|
|
},
|
|
{
|
|
"epoch": 4.396588486140725,
|
|
"grad_norm": 0.019142802883513072,
|
|
"learning_rate": 1.7794028300308474e-06,
|
|
"loss": 0.9168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27181658148765564,
|
|
"step": 1033,
|
|
"valid_targets_mean": 16063.5,
|
|
"valid_targets_min": 14192
|
|
},
|
|
{
|
|
"epoch": 4.400852878464819,
|
|
"grad_norm": 0.020039791831845007,
|
|
"learning_rate": 1.7549723603722003e-06,
|
|
"loss": 0.9227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2607421576976776,
|
|
"step": 1034,
|
|
"valid_targets_mean": 16118.1,
|
|
"valid_targets_min": 14102
|
|
},
|
|
{
|
|
"epoch": 4.405117270788913,
|
|
"grad_norm": 0.018909456709987973,
|
|
"learning_rate": 1.730703064339605e-06,
|
|
"loss": 0.9453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21211911737918854,
|
|
"step": 1035,
|
|
"valid_targets_mean": 15246.7,
|
|
"valid_targets_min": 13804
|
|
},
|
|
{
|
|
"epoch": 4.409381663113006,
|
|
"grad_norm": 0.020129469635513125,
|
|
"learning_rate": 1.7065951563241022e-06,
|
|
"loss": 0.9425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25638172030448914,
|
|
"step": 1036,
|
|
"valid_targets_mean": 16167.9,
|
|
"valid_targets_min": 15474
|
|
},
|
|
{
|
|
"epoch": 4.4136460554371,
|
|
"grad_norm": 0.020631503694230564,
|
|
"learning_rate": 1.682648849291051e-06,
|
|
"loss": 0.9398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18718141317367554,
|
|
"step": 1037,
|
|
"valid_targets_mean": 11739.2,
|
|
"valid_targets_min": 1786
|
|
},
|
|
{
|
|
"epoch": 4.417910447761194,
|
|
"grad_norm": 0.018940950759025323,
|
|
"learning_rate": 1.6588643547782579e-06,
|
|
"loss": 0.9232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23882335424423218,
|
|
"step": 1038,
|
|
"valid_targets_mean": 16085.4,
|
|
"valid_targets_min": 14218
|
|
},
|
|
{
|
|
"epoch": 4.422174840085288,
|
|
"grad_norm": 0.019559604988065877,
|
|
"learning_rate": 1.6352418828941052e-06,
|
|
"loss": 0.9529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2685803174972534,
|
|
"step": 1039,
|
|
"valid_targets_mean": 16194.3,
|
|
"valid_targets_min": 15703
|
|
},
|
|
{
|
|
"epoch": 4.426439232409382,
|
|
"grad_norm": 0.01978701031765758,
|
|
"learning_rate": 1.6117816423156952e-06,
|
|
"loss": 0.9388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1499197632074356,
|
|
"step": 1040,
|
|
"valid_targets_mean": 9103.2,
|
|
"valid_targets_min": 2249
|
|
},
|
|
{
|
|
"epoch": 4.430703624733475,
|
|
"grad_norm": 0.01789055519871019,
|
|
"learning_rate": 1.5884838402870029e-06,
|
|
"loss": 0.9216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22271327674388885,
|
|
"step": 1041,
|
|
"valid_targets_mean": 16154.8,
|
|
"valid_targets_min": 15284
|
|
},
|
|
{
|
|
"epoch": 4.434968017057569,
|
|
"grad_norm": 0.01990516856201002,
|
|
"learning_rate": 1.5653486826170384e-06,
|
|
"loss": 0.9428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25900232791900635,
|
|
"step": 1042,
|
|
"valid_targets_mean": 16143.0,
|
|
"valid_targets_min": 14127
|
|
},
|
|
{
|
|
"epoch": 4.439232409381663,
|
|
"grad_norm": 0.019195007428096437,
|
|
"learning_rate": 1.5423763736780583e-06,
|
|
"loss": 0.9062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19074153900146484,
|
|
"step": 1043,
|
|
"valid_targets_mean": 11563.5,
|
|
"valid_targets_min": 9108
|
|
},
|
|
{
|
|
"epoch": 4.443496801705757,
|
|
"grad_norm": 0.019733640159102155,
|
|
"learning_rate": 1.5195671164037173e-06,
|
|
"loss": 0.9436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24881459772586823,
|
|
"step": 1044,
|
|
"valid_targets_mean": 16244.1,
|
|
"valid_targets_min": 15406
|
|
},
|
|
{
|
|
"epoch": 4.447761194029851,
|
|
"grad_norm": 0.019176783532983285,
|
|
"learning_rate": 1.496921112287315e-06,
|
|
"loss": 0.9237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26078590750694275,
|
|
"step": 1045,
|
|
"valid_targets_mean": 16166.8,
|
|
"valid_targets_min": 14849
|
|
},
|
|
{
|
|
"epoch": 4.452025586353945,
|
|
"grad_norm": 0.018649964679026326,
|
|
"learning_rate": 1.4744385613799894e-06,
|
|
"loss": 0.9395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22432667016983032,
|
|
"step": 1046,
|
|
"valid_targets_mean": 14877.0,
|
|
"valid_targets_min": 13736
|
|
},
|
|
{
|
|
"epoch": 4.456289978678038,
|
|
"grad_norm": 0.01969549945940577,
|
|
"learning_rate": 1.4521196622889644e-06,
|
|
"loss": 0.9501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25626927614212036,
|
|
"step": 1047,
|
|
"valid_targets_mean": 16187.7,
|
|
"valid_targets_min": 15499
|
|
},
|
|
{
|
|
"epoch": 4.460554371002132,
|
|
"grad_norm": 0.020359676147203066,
|
|
"learning_rate": 1.4299646121757892e-06,
|
|
"loss": 0.9075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2075168788433075,
|
|
"step": 1048,
|
|
"valid_targets_mean": 12783.7,
|
|
"valid_targets_min": 1714
|
|
},
|
|
{
|
|
"epoch": 4.464818763326226,
|
|
"grad_norm": 0.0183011272714233,
|
|
"learning_rate": 1.4079736067545912e-06,
|
|
"loss": 0.9608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2400706559419632,
|
|
"step": 1049,
|
|
"valid_targets_mean": 15870.5,
|
|
"valid_targets_min": 13973
|
|
},
|
|
{
|
|
"epoch": 4.46908315565032,
|
|
"grad_norm": 0.020666342751524014,
|
|
"learning_rate": 1.3861468402903634e-06,
|
|
"loss": 0.9823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28990721702575684,
|
|
"step": 1050,
|
|
"valid_targets_mean": 15983.3,
|
|
"valid_targets_min": 12845
|
|
},
|
|
{
|
|
"epoch": 4.473347547974414,
|
|
"grad_norm": 0.020282319388245627,
|
|
"learning_rate": 1.3644845055972322e-06,
|
|
"loss": 0.934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.163664773106575,
|
|
"step": 1051,
|
|
"valid_targets_mean": 9544.9,
|
|
"valid_targets_min": 1940
|
|
},
|
|
{
|
|
"epoch": 4.477611940298507,
|
|
"grad_norm": 0.018975224835480333,
|
|
"learning_rate": 1.3429867940367626e-06,
|
|
"loss": 0.9506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24026456475257874,
|
|
"step": 1052,
|
|
"valid_targets_mean": 16061.4,
|
|
"valid_targets_min": 15168
|
|
},
|
|
{
|
|
"epoch": 4.481876332622601,
|
|
"grad_norm": 0.019871663045816706,
|
|
"learning_rate": 1.321653895516264e-06,
|
|
"loss": 0.968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27119681239128113,
|
|
"step": 1053,
|
|
"valid_targets_mean": 16133.0,
|
|
"valid_targets_min": 14239
|
|
},
|
|
{
|
|
"epoch": 4.486140724946695,
|
|
"grad_norm": 0.020292806622639034,
|
|
"learning_rate": 1.3004859984871199e-06,
|
|
"loss": 1.0012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.190711110830307,
|
|
"step": 1054,
|
|
"valid_targets_mean": 10429.0,
|
|
"valid_targets_min": 6343
|
|
},
|
|
{
|
|
"epoch": 4.490405117270789,
|
|
"grad_norm": 0.01925811074420303,
|
|
"learning_rate": 1.279483289943102e-06,
|
|
"loss": 0.9865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2431420087814331,
|
|
"step": 1055,
|
|
"valid_targets_mean": 16202.6,
|
|
"valid_targets_min": 15483
|
|
},
|
|
{
|
|
"epoch": 4.494669509594883,
|
|
"grad_norm": 0.02048172840568444,
|
|
"learning_rate": 1.2586459554187558e-06,
|
|
"loss": 0.9097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2681732773780823,
|
|
"step": 1056,
|
|
"valid_targets_mean": 15992.3,
|
|
"valid_targets_min": 10621
|
|
},
|
|
{
|
|
"epoch": 4.498933901918977,
|
|
"grad_norm": 0.01869103341150067,
|
|
"learning_rate": 1.2379741789877175e-06,
|
|
"loss": 0.9149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19013798236846924,
|
|
"step": 1057,
|
|
"valid_targets_mean": 13706.4,
|
|
"valid_targets_min": 11685
|
|
},
|
|
{
|
|
"epoch": 4.50319829424307,
|
|
"grad_norm": 0.02050350206169562,
|
|
"learning_rate": 1.2174681432611245e-06,
|
|
"loss": 0.9601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26107659935951233,
|
|
"step": 1058,
|
|
"valid_targets_mean": 16188.1,
|
|
"valid_targets_min": 15357
|
|
},
|
|
{
|
|
"epoch": 4.507462686567164,
|
|
"grad_norm": 0.021194636999938173,
|
|
"learning_rate": 1.1971280293859811e-06,
|
|
"loss": 1.0011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.289431631565094,
|
|
"step": 1059,
|
|
"valid_targets_mean": 16093.3,
|
|
"valid_targets_min": 15232
|
|
},
|
|
{
|
|
"epoch": 4.5117270788912585,
|
|
"grad_norm": 0.018570568768941257,
|
|
"learning_rate": 1.17695401704357e-06,
|
|
"loss": 0.929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2056170105934143,
|
|
"step": 1060,
|
|
"valid_targets_mean": 15146.8,
|
|
"valid_targets_min": 13915
|
|
},
|
|
{
|
|
"epoch": 4.515991471215352,
|
|
"grad_norm": 0.01972023742632219,
|
|
"learning_rate": 1.1569462844478552e-06,
|
|
"loss": 0.9201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2711913585662842,
|
|
"step": 1061,
|
|
"valid_targets_mean": 16124.9,
|
|
"valid_targets_min": 14851
|
|
},
|
|
{
|
|
"epoch": 4.520255863539446,
|
|
"grad_norm": 0.020203371249801778,
|
|
"learning_rate": 1.1371050083439107e-06,
|
|
"loss": 0.9441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1843033730983734,
|
|
"step": 1062,
|
|
"valid_targets_mean": 10570.6,
|
|
"valid_targets_min": 1387
|
|
},
|
|
{
|
|
"epoch": 4.524520255863539,
|
|
"grad_norm": 0.01910978437098148,
|
|
"learning_rate": 1.1174303640063622e-06,
|
|
"loss": 0.9635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2432161271572113,
|
|
"step": 1063,
|
|
"valid_targets_mean": 15839.4,
|
|
"valid_targets_min": 14739
|
|
},
|
|
{
|
|
"epoch": 4.528784648187633,
|
|
"grad_norm": 0.020043800214089625,
|
|
"learning_rate": 1.097922525237849e-06,
|
|
"loss": 0.9141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2833130359649658,
|
|
"step": 1064,
|
|
"valid_targets_mean": 16141.2,
|
|
"valid_targets_min": 15070
|
|
},
|
|
{
|
|
"epoch": 4.533049040511727,
|
|
"grad_norm": 0.01924196595731442,
|
|
"learning_rate": 1.078581664367455e-06,
|
|
"loss": 0.9233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16265374422073364,
|
|
"step": 1065,
|
|
"valid_targets_mean": 10046.9,
|
|
"valid_targets_min": 1150
|
|
},
|
|
{
|
|
"epoch": 4.537313432835821,
|
|
"grad_norm": 0.017901738299217932,
|
|
"learning_rate": 1.0594079522492274e-06,
|
|
"loss": 0.8955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2368486076593399,
|
|
"step": 1066,
|
|
"valid_targets_mean": 16090.2,
|
|
"valid_targets_min": 14758
|
|
},
|
|
{
|
|
"epoch": 4.541577825159915,
|
|
"grad_norm": 0.020072088984568293,
|
|
"learning_rate": 1.040401558260633e-06,
|
|
"loss": 0.9603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26347705721855164,
|
|
"step": 1067,
|
|
"valid_targets_mean": 16155.9,
|
|
"valid_targets_min": 15611
|
|
},
|
|
{
|
|
"epoch": 4.545842217484008,
|
|
"grad_norm": 0.020401643506958222,
|
|
"learning_rate": 1.0215626503010911e-06,
|
|
"loss": 0.9685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20076048374176025,
|
|
"step": 1068,
|
|
"valid_targets_mean": 12652.3,
|
|
"valid_targets_min": 10383
|
|
},
|
|
{
|
|
"epoch": 4.550106609808102,
|
|
"grad_norm": 0.019771204825658407,
|
|
"learning_rate": 1.002891394790475e-06,
|
|
"loss": 0.9547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2534538805484772,
|
|
"step": 1069,
|
|
"valid_targets_mean": 16164.8,
|
|
"valid_targets_min": 15522
|
|
},
|
|
{
|
|
"epoch": 4.554371002132196,
|
|
"grad_norm": 0.020585702696383327,
|
|
"learning_rate": 9.843879566676273e-07,
|
|
"loss": 0.93,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2665542662143707,
|
|
"step": 1070,
|
|
"valid_targets_mean": 16160.1,
|
|
"valid_targets_min": 15414
|
|
},
|
|
{
|
|
"epoch": 4.55863539445629,
|
|
"grad_norm": 0.01904138327369789,
|
|
"learning_rate": 9.660524993889386e-07,
|
|
"loss": 0.9423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2007942795753479,
|
|
"step": 1071,
|
|
"valid_targets_mean": 14538.0,
|
|
"valid_targets_min": 12255
|
|
},
|
|
{
|
|
"epoch": 4.562899786780384,
|
|
"grad_norm": 0.019279753795483478,
|
|
"learning_rate": 9.478851849268733e-07,
|
|
"loss": 0.9473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2665225863456726,
|
|
"step": 1072,
|
|
"valid_targets_mean": 16128.0,
|
|
"valid_targets_min": 14869
|
|
},
|
|
{
|
|
"epoch": 4.567164179104478,
|
|
"grad_norm": 0.020350788855639124,
|
|
"learning_rate": 9.298861737685527e-07,
|
|
"loss": 0.9172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.229437917470932,
|
|
"step": 1073,
|
|
"valid_targets_mean": 13209.4,
|
|
"valid_targets_min": 1945
|
|
},
|
|
{
|
|
"epoch": 4.571428571428571,
|
|
"grad_norm": 0.017765257678619778,
|
|
"learning_rate": 9.120556249143341e-07,
|
|
"loss": 0.9386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21569302678108215,
|
|
"step": 1074,
|
|
"valid_targets_mean": 15446.7,
|
|
"valid_targets_min": 14286
|
|
},
|
|
{
|
|
"epoch": 4.575692963752665,
|
|
"grad_norm": 0.020613563127185975,
|
|
"learning_rate": 8.943936958763988e-07,
|
|
"loss": 0.989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3179331421852112,
|
|
"step": 1075,
|
|
"valid_targets_mean": 16157.8,
|
|
"valid_targets_min": 15528
|
|
},
|
|
{
|
|
"epoch": 4.5799573560767595,
|
|
"grad_norm": 0.02087569224306535,
|
|
"learning_rate": 8.769005426773836e-07,
|
|
"loss": 0.9401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14700192213058472,
|
|
"step": 1076,
|
|
"valid_targets_mean": 8403.1,
|
|
"valid_targets_min": 1795
|
|
},
|
|
{
|
|
"epoch": 4.584221748400853,
|
|
"grad_norm": 0.018520911236622303,
|
|
"learning_rate": 8.595763198489714e-07,
|
|
"loss": 0.9527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23321984708309174,
|
|
"step": 1077,
|
|
"valid_targets_mean": 16010.0,
|
|
"valid_targets_min": 14235
|
|
},
|
|
{
|
|
"epoch": 4.588486140724947,
|
|
"grad_norm": 0.019872942849071977,
|
|
"learning_rate": 8.42421180430546e-07,
|
|
"loss": 0.9199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27338290214538574,
|
|
"step": 1078,
|
|
"valid_targets_mean": 16116.7,
|
|
"valid_targets_min": 14372
|
|
},
|
|
{
|
|
"epoch": 4.59275053304904,
|
|
"grad_norm": 0.02026651371770839,
|
|
"learning_rate": 8.254352759678386e-07,
|
|
"loss": 0.9501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18547531962394714,
|
|
"step": 1079,
|
|
"valid_targets_mean": 11327.2,
|
|
"valid_targets_min": 7544
|
|
},
|
|
{
|
|
"epoch": 4.597014925373134,
|
|
"grad_norm": 0.021061414699155047,
|
|
"learning_rate": 8.086187565115877e-07,
|
|
"loss": 0.9811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2594509720802307,
|
|
"step": 1080,
|
|
"valid_targets_mean": 16154.1,
|
|
"valid_targets_min": 14044
|
|
},
|
|
{
|
|
"epoch": 4.601279317697228,
|
|
"grad_norm": 0.019936994908473966,
|
|
"learning_rate": 7.919717706162067e-07,
|
|
"loss": 0.956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2651141881942749,
|
|
"step": 1081,
|
|
"valid_targets_mean": 16193.1,
|
|
"valid_targets_min": 15417
|
|
},
|
|
{
|
|
"epoch": 4.605543710021322,
|
|
"grad_norm": 0.018655647302865105,
|
|
"learning_rate": 7.754944653384777e-07,
|
|
"loss": 0.9403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.194203719496727,
|
|
"step": 1082,
|
|
"valid_targets_mean": 13500.2,
|
|
"valid_targets_min": 10645
|
|
},
|
|
{
|
|
"epoch": 4.609808102345416,
|
|
"grad_norm": 0.019440997879308208,
|
|
"learning_rate": 7.591869862362534e-07,
|
|
"loss": 0.9279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2430172562599182,
|
|
"step": 1083,
|
|
"valid_targets_mean": 16048.1,
|
|
"valid_targets_min": 12838
|
|
},
|
|
{
|
|
"epoch": 4.61407249466951,
|
|
"grad_norm": 0.020223103989993937,
|
|
"learning_rate": 7.430494773671682e-07,
|
|
"loss": 0.9205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28715401887893677,
|
|
"step": 1084,
|
|
"valid_targets_mean": 16187.6,
|
|
"valid_targets_min": 15019
|
|
},
|
|
{
|
|
"epoch": 4.618336886993603,
|
|
"grad_norm": 0.018473704359525063,
|
|
"learning_rate": 7.270820812873714e-07,
|
|
"loss": 0.9367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21766062080860138,
|
|
"step": 1085,
|
|
"valid_targets_mean": 14992.5,
|
|
"valid_targets_min": 13439
|
|
},
|
|
{
|
|
"epoch": 4.622601279317697,
|
|
"grad_norm": 0.01954316155484859,
|
|
"learning_rate": 7.112849390502563e-07,
|
|
"loss": 0.9374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2627962529659271,
|
|
"step": 1086,
|
|
"valid_targets_mean": 16168.8,
|
|
"valid_targets_min": 15193
|
|
},
|
|
{
|
|
"epoch": 4.6268656716417915,
|
|
"grad_norm": 0.019716506778305956,
|
|
"learning_rate": 6.956581902052306e-07,
|
|
"loss": 0.9438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18966548144817352,
|
|
"step": 1087,
|
|
"valid_targets_mean": 11818.3,
|
|
"valid_targets_min": 2133
|
|
},
|
|
{
|
|
"epoch": 4.631130063965885,
|
|
"grad_norm": 0.018270338085755972,
|
|
"learning_rate": 6.802019727964593e-07,
|
|
"loss": 0.9198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21619939804077148,
|
|
"step": 1088,
|
|
"valid_targets_mean": 15997.7,
|
|
"valid_targets_min": 15335
|
|
},
|
|
{
|
|
"epoch": 4.635394456289979,
|
|
"grad_norm": 0.020273657335507574,
|
|
"learning_rate": 6.64916423361679e-07,
|
|
"loss": 0.9602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28256916999816895,
|
|
"step": 1089,
|
|
"valid_targets_mean": 16124.8,
|
|
"valid_targets_min": 15255
|
|
},
|
|
{
|
|
"epoch": 4.639658848614072,
|
|
"grad_norm": 0.02004335471610223,
|
|
"learning_rate": 6.498016769309567e-07,
|
|
"loss": 0.9438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14430440962314606,
|
|
"step": 1090,
|
|
"valid_targets_mean": 8173.4,
|
|
"valid_targets_min": 2524
|
|
},
|
|
{
|
|
"epoch": 4.643923240938166,
|
|
"grad_norm": 0.018072612217830734,
|
|
"learning_rate": 6.348578670255224e-07,
|
|
"loss": 0.9438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22493374347686768,
|
|
"step": 1091,
|
|
"valid_targets_mean": 16221.3,
|
|
"valid_targets_min": 15341
|
|
},
|
|
{
|
|
"epoch": 4.6481876332622605,
|
|
"grad_norm": 0.01996854950596962,
|
|
"learning_rate": 6.200851256565799e-07,
|
|
"loss": 0.93,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.259277880191803,
|
|
"step": 1092,
|
|
"valid_targets_mean": 15863.2,
|
|
"valid_targets_min": 6539
|
|
},
|
|
{
|
|
"epoch": 4.652452025586354,
|
|
"grad_norm": 0.01911766089967093,
|
|
"learning_rate": 6.054835833241357e-07,
|
|
"loss": 0.9133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19015288352966309,
|
|
"step": 1093,
|
|
"valid_targets_mean": 12974.5,
|
|
"valid_targets_min": 4811
|
|
},
|
|
{
|
|
"epoch": 4.656716417910448,
|
|
"grad_norm": 0.01816663792518451,
|
|
"learning_rate": 5.910533690158593e-07,
|
|
"loss": 0.9378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24068477749824524,
|
|
"step": 1094,
|
|
"valid_targets_mean": 16194.9,
|
|
"valid_targets_min": 15219
|
|
},
|
|
{
|
|
"epoch": 4.660980810234541,
|
|
"grad_norm": 0.019139983297351965,
|
|
"learning_rate": 5.767946102059307e-07,
|
|
"loss": 0.9382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2753167748451233,
|
|
"step": 1095,
|
|
"valid_targets_mean": 16172.6,
|
|
"valid_targets_min": 15677
|
|
},
|
|
{
|
|
"epoch": 4.665245202558635,
|
|
"grad_norm": 0.019339773326163007,
|
|
"learning_rate": 5.627074328539173e-07,
|
|
"loss": 0.9273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20613351464271545,
|
|
"step": 1096,
|
|
"valid_targets_mean": 13807.2,
|
|
"valid_targets_min": 12058
|
|
},
|
|
{
|
|
"epoch": 4.669509594882729,
|
|
"grad_norm": 0.019460027415027836,
|
|
"learning_rate": 5.487919614036741e-07,
|
|
"loss": 0.9688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2606773376464844,
|
|
"step": 1097,
|
|
"valid_targets_mean": 16124.8,
|
|
"valid_targets_min": 14861
|
|
},
|
|
{
|
|
"epoch": 4.673773987206823,
|
|
"grad_norm": 0.020324225941773796,
|
|
"learning_rate": 5.350483187822231e-07,
|
|
"loss": 0.9727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24414348602294922,
|
|
"step": 1098,
|
|
"valid_targets_mean": 12730.3,
|
|
"valid_targets_min": 1434
|
|
},
|
|
{
|
|
"epoch": 4.678038379530917,
|
|
"grad_norm": 0.019423523837182856,
|
|
"learning_rate": 5.214766263986848e-07,
|
|
"loss": 0.9788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24133114516735077,
|
|
"step": 1099,
|
|
"valid_targets_mean": 15872.8,
|
|
"valid_targets_min": 15084
|
|
},
|
|
{
|
|
"epoch": 4.682302771855011,
|
|
"grad_norm": 0.019885956518777954,
|
|
"learning_rate": 5.080770041431926e-07,
|
|
"loss": 0.9122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2603484094142914,
|
|
"step": 1100,
|
|
"valid_targets_mean": 16111.3,
|
|
"valid_targets_min": 14235
|
|
},
|
|
{
|
|
"epoch": 4.686567164179104,
|
|
"grad_norm": 0.01989379354114257,
|
|
"learning_rate": 4.948495703858492e-07,
|
|
"loss": 0.9063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14753714203834534,
|
|
"step": 1101,
|
|
"valid_targets_mean": 8982.3,
|
|
"valid_targets_min": 1520
|
|
},
|
|
{
|
|
"epoch": 4.690831556503198,
|
|
"grad_norm": 0.01808512262773157,
|
|
"learning_rate": 4.81794441975667e-07,
|
|
"loss": 0.9449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23836557567119598,
|
|
"step": 1102,
|
|
"valid_targets_mean": 16088.1,
|
|
"valid_targets_min": 15116
|
|
},
|
|
{
|
|
"epoch": 4.6950959488272925,
|
|
"grad_norm": 0.01962494985245019,
|
|
"learning_rate": 4.689117342395388e-07,
|
|
"loss": 0.9137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24984899163246155,
|
|
"step": 1103,
|
|
"valid_targets_mean": 16186.2,
|
|
"valid_targets_min": 14984
|
|
},
|
|
{
|
|
"epoch": 4.699360341151386,
|
|
"grad_norm": 0.01874005204782074,
|
|
"learning_rate": 4.5620156098122204e-07,
|
|
"loss": 0.9474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17979282140731812,
|
|
"step": 1104,
|
|
"valid_targets_mean": 10895.8,
|
|
"valid_targets_min": 5729
|
|
},
|
|
{
|
|
"epoch": 4.70362473347548,
|
|
"grad_norm": 0.01891868980357014,
|
|
"learning_rate": 4.4366403448033334e-07,
|
|
"loss": 0.9853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2624548375606537,
|
|
"step": 1105,
|
|
"valid_targets_mean": 16050.8,
|
|
"valid_targets_min": 14032
|
|
},
|
|
{
|
|
"epoch": 4.707889125799573,
|
|
"grad_norm": 0.019866757310248034,
|
|
"learning_rate": 4.3129926549136057e-07,
|
|
"loss": 0.952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27252164483070374,
|
|
"step": 1106,
|
|
"valid_targets_mean": 16138.9,
|
|
"valid_targets_min": 15064
|
|
},
|
|
{
|
|
"epoch": 4.712153518123667,
|
|
"grad_norm": 0.018339878817270696,
|
|
"learning_rate": 4.191073632426701e-07,
|
|
"loss": 0.9062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2066860944032669,
|
|
"step": 1107,
|
|
"valid_targets_mean": 13648.0,
|
|
"valid_targets_min": 10758
|
|
},
|
|
{
|
|
"epoch": 4.7164179104477615,
|
|
"grad_norm": 0.01992180529264341,
|
|
"learning_rate": 4.0708843543555643e-07,
|
|
"loss": 0.938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2675975561141968,
|
|
"step": 1108,
|
|
"valid_targets_mean": 16005.7,
|
|
"valid_targets_min": 10520
|
|
},
|
|
{
|
|
"epoch": 4.720682302771855,
|
|
"grad_norm": 0.020441769288833504,
|
|
"learning_rate": 3.95242588243292e-07,
|
|
"loss": 0.9483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2877139747142792,
|
|
"step": 1109,
|
|
"valid_targets_mean": 16109.6,
|
|
"valid_targets_min": 15264
|
|
},
|
|
{
|
|
"epoch": 4.724946695095949,
|
|
"grad_norm": 0.01887396259047416,
|
|
"learning_rate": 3.8356992631017e-07,
|
|
"loss": 0.9529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21346427500247955,
|
|
"step": 1110,
|
|
"valid_targets_mean": 15018.7,
|
|
"valid_targets_min": 12859
|
|
},
|
|
{
|
|
"epoch": 4.729211087420042,
|
|
"grad_norm": 0.020186572819731935,
|
|
"learning_rate": 3.720705527506008e-07,
|
|
"loss": 0.9468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2794426679611206,
|
|
"step": 1111,
|
|
"valid_targets_mean": 16085.5,
|
|
"valid_targets_min": 14577
|
|
},
|
|
{
|
|
"epoch": 4.733475479744136,
|
|
"grad_norm": 0.020432184520846743,
|
|
"learning_rate": 3.60744569148197e-07,
|
|
"loss": 0.9987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19069266319274902,
|
|
"step": 1112,
|
|
"valid_targets_mean": 11420.5,
|
|
"valid_targets_min": 2315
|
|
},
|
|
{
|
|
"epoch": 4.73773987206823,
|
|
"grad_norm": 0.01835824934065439,
|
|
"learning_rate": 3.4959207555485873e-07,
|
|
"loss": 0.9824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2288331538438797,
|
|
"step": 1113,
|
|
"valid_targets_mean": 15814.1,
|
|
"valid_targets_min": 14487
|
|
},
|
|
{
|
|
"epoch": 4.742004264392325,
|
|
"grad_norm": 0.021240434421145087,
|
|
"learning_rate": 3.3861317048992317e-07,
|
|
"loss": 0.9831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2972368896007538,
|
|
"step": 1114,
|
|
"valid_targets_mean": 16147.9,
|
|
"valid_targets_min": 15209
|
|
},
|
|
{
|
|
"epoch": 4.746268656716418,
|
|
"grad_norm": 0.018790523093357544,
|
|
"learning_rate": 3.278079509392562e-07,
|
|
"loss": 0.9042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16758659482002258,
|
|
"step": 1115,
|
|
"valid_targets_mean": 9700.2,
|
|
"valid_targets_min": 3271
|
|
},
|
|
{
|
|
"epoch": 4.750533049040512,
|
|
"grad_norm": 0.018296665063432437,
|
|
"learning_rate": 3.171765123544224e-07,
|
|
"loss": 0.9414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2303854525089264,
|
|
"step": 1116,
|
|
"valid_targets_mean": 16104.6,
|
|
"valid_targets_min": 12745
|
|
},
|
|
{
|
|
"epoch": 4.754797441364605,
|
|
"grad_norm": 0.01991994086798728,
|
|
"learning_rate": 3.06718948651834e-07,
|
|
"loss": 0.952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27704036235809326,
|
|
"step": 1117,
|
|
"valid_targets_mean": 16112.6,
|
|
"valid_targets_min": 15188
|
|
},
|
|
{
|
|
"epoch": 4.759061833688699,
|
|
"grad_norm": 0.018481143054226407,
|
|
"learning_rate": 2.964353522119168e-07,
|
|
"loss": 0.9351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.211697518825531,
|
|
"step": 1118,
|
|
"valid_targets_mean": 13389.1,
|
|
"valid_targets_min": 11734
|
|
},
|
|
{
|
|
"epoch": 4.7633262260127935,
|
|
"grad_norm": 0.019671892059046594,
|
|
"learning_rate": 2.863258138783032e-07,
|
|
"loss": 0.981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26838481426239014,
|
|
"step": 1119,
|
|
"valid_targets_mean": 16131.8,
|
|
"valid_targets_min": 15229
|
|
},
|
|
{
|
|
"epoch": 4.767590618336887,
|
|
"grad_norm": 0.019157277597305473,
|
|
"learning_rate": 2.7639042295702245e-07,
|
|
"loss": 0.9363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27001243829727173,
|
|
"step": 1120,
|
|
"valid_targets_mean": 16105.2,
|
|
"valid_targets_min": 15170
|
|
},
|
|
{
|
|
"epoch": 4.771855010660981,
|
|
"grad_norm": 0.018242412531535034,
|
|
"learning_rate": 2.666292672157056e-07,
|
|
"loss": 0.9119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21758460998535156,
|
|
"step": 1121,
|
|
"valid_targets_mean": 15246.4,
|
|
"valid_targets_min": 14124
|
|
},
|
|
{
|
|
"epoch": 4.776119402985074,
|
|
"grad_norm": 0.020813732525062703,
|
|
"learning_rate": 2.570424328828325e-07,
|
|
"loss": 0.9536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2542939782142639,
|
|
"step": 1122,
|
|
"valid_targets_mean": 16168.6,
|
|
"valid_targets_min": 15654
|
|
},
|
|
{
|
|
"epoch": 4.780383795309168,
|
|
"grad_norm": 0.01985472258180318,
|
|
"learning_rate": 2.4763000464694377e-07,
|
|
"loss": 0.9341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21324247121810913,
|
|
"step": 1123,
|
|
"valid_targets_mean": 13085.1,
|
|
"valid_targets_min": 1516
|
|
},
|
|
{
|
|
"epoch": 4.7846481876332625,
|
|
"grad_norm": 0.01934632563097119,
|
|
"learning_rate": 2.383920656559102e-07,
|
|
"loss": 0.9646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24674351513385773,
|
|
"step": 1124,
|
|
"valid_targets_mean": 15750.4,
|
|
"valid_targets_min": 10516
|
|
},
|
|
{
|
|
"epoch": 4.788912579957356,
|
|
"grad_norm": 0.019893834259669278,
|
|
"learning_rate": 2.2932869751619568e-07,
|
|
"loss": 0.9336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27269020676612854,
|
|
"step": 1125,
|
|
"valid_targets_mean": 16109.5,
|
|
"valid_targets_min": 14773
|
|
},
|
|
{
|
|
"epoch": 4.79317697228145,
|
|
"grad_norm": 0.020137323098385278,
|
|
"learning_rate": 2.2043998029212643e-07,
|
|
"loss": 0.9712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19210724532604218,
|
|
"step": 1126,
|
|
"valid_targets_mean": 10349.0,
|
|
"valid_targets_min": 1810
|
|
},
|
|
{
|
|
"epoch": 4.797441364605544,
|
|
"grad_norm": 0.01867119479415427,
|
|
"learning_rate": 2.1172599250519398e-07,
|
|
"loss": 0.9318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21557371318340302,
|
|
"step": 1127,
|
|
"valid_targets_mean": 15912.2,
|
|
"valid_targets_min": 14744
|
|
},
|
|
{
|
|
"epoch": 4.801705756929637,
|
|
"grad_norm": 0.02085324777319541,
|
|
"learning_rate": 2.0318681113336013e-07,
|
|
"loss": 0.9642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25642406940460205,
|
|
"step": 1128,
|
|
"valid_targets_mean": 16088.1,
|
|
"valid_targets_min": 14742
|
|
},
|
|
{
|
|
"epoch": 4.8059701492537314,
|
|
"grad_norm": 0.0189902025256666,
|
|
"learning_rate": 1.9482251161037302e-07,
|
|
"loss": 0.9337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.169912189245224,
|
|
"step": 1129,
|
|
"valid_targets_mean": 9958.0,
|
|
"valid_targets_min": 5451
|
|
},
|
|
{
|
|
"epoch": 4.810234541577826,
|
|
"grad_norm": 0.019534886876350417,
|
|
"learning_rate": 1.866331678251032e-07,
|
|
"loss": 0.9735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26306942105293274,
|
|
"step": 1130,
|
|
"valid_targets_mean": 15974.4,
|
|
"valid_targets_min": 13514
|
|
},
|
|
{
|
|
"epoch": 4.814498933901919,
|
|
"grad_norm": 0.020285145946563297,
|
|
"learning_rate": 1.7861885212088869e-07,
|
|
"loss": 0.922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26303473114967346,
|
|
"step": 1131,
|
|
"valid_targets_mean": 15980.6,
|
|
"valid_targets_min": 13689
|
|
},
|
|
{
|
|
"epoch": 4.818763326226013,
|
|
"grad_norm": 0.019241129313043646,
|
|
"learning_rate": 1.7077963529490204e-07,
|
|
"loss": 0.9333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1944514811038971,
|
|
"step": 1132,
|
|
"valid_targets_mean": 13361.1,
|
|
"valid_targets_min": 10864
|
|
},
|
|
{
|
|
"epoch": 4.823027718550106,
|
|
"grad_norm": 0.018820130288281688,
|
|
"learning_rate": 1.6311558659751535e-07,
|
|
"loss": 0.9287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24629515409469604,
|
|
"step": 1133,
|
|
"valid_targets_mean": 16133.1,
|
|
"valid_targets_min": 14415
|
|
},
|
|
{
|
|
"epoch": 4.8272921108742,
|
|
"grad_norm": 0.019832879695033907,
|
|
"learning_rate": 1.5562677373169855e-07,
|
|
"loss": 0.9744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28646355867385864,
|
|
"step": 1134,
|
|
"valid_targets_mean": 15896.3,
|
|
"valid_targets_min": 10715
|
|
},
|
|
{
|
|
"epoch": 4.8315565031982945,
|
|
"grad_norm": 0.0197343452469959,
|
|
"learning_rate": 1.483132628524131e-07,
|
|
"loss": 0.9637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2155962884426117,
|
|
"step": 1135,
|
|
"valid_targets_mean": 14483.6,
|
|
"valid_targets_min": 4321
|
|
},
|
|
{
|
|
"epoch": 4.835820895522388,
|
|
"grad_norm": 0.02042300437316941,
|
|
"learning_rate": 1.4117511856603262e-07,
|
|
"loss": 0.9862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28481847047805786,
|
|
"step": 1136,
|
|
"valid_targets_mean": 16111.5,
|
|
"valid_targets_min": 15323
|
|
},
|
|
{
|
|
"epoch": 4.840085287846482,
|
|
"grad_norm": 0.01940284155294373,
|
|
"learning_rate": 1.342124039297721e-07,
|
|
"loss": 0.9634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1985420286655426,
|
|
"step": 1137,
|
|
"valid_targets_mean": 10730.5,
|
|
"valid_targets_min": 1032
|
|
},
|
|
{
|
|
"epoch": 4.844349680170575,
|
|
"grad_norm": 0.018779740346132966,
|
|
"learning_rate": 1.2742518045112396e-07,
|
|
"loss": 0.9631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23396380245685577,
|
|
"step": 1138,
|
|
"valid_targets_mean": 15437.0,
|
|
"valid_targets_min": 13372
|
|
},
|
|
{
|
|
"epoch": 4.848614072494669,
|
|
"grad_norm": 0.02075751282079989,
|
|
"learning_rate": 1.2081350808732518e-07,
|
|
"loss": 0.9634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29876869916915894,
|
|
"step": 1139,
|
|
"valid_targets_mean": 16123.2,
|
|
"valid_targets_min": 15429
|
|
},
|
|
{
|
|
"epoch": 4.8528784648187635,
|
|
"grad_norm": 0.02026897878825485,
|
|
"learning_rate": 1.143774452448243e-07,
|
|
"loss": 0.9629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15858730673789978,
|
|
"step": 1140,
|
|
"valid_targets_mean": 9889.2,
|
|
"valid_targets_min": 5133
|
|
},
|
|
{
|
|
"epoch": 4.857142857142857,
|
|
"grad_norm": 0.018650243641402803,
|
|
"learning_rate": 1.0811704877875528e-07,
|
|
"loss": 0.9311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23967748880386353,
|
|
"step": 1141,
|
|
"valid_targets_mean": 16170.9,
|
|
"valid_targets_min": 15717
|
|
},
|
|
{
|
|
"epoch": 4.861407249466951,
|
|
"grad_norm": 0.019866671730865704,
|
|
"learning_rate": 1.0203237399245336e-07,
|
|
"loss": 0.9526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2824961543083191,
|
|
"step": 1142,
|
|
"valid_targets_mean": 16121.9,
|
|
"valid_targets_min": 15297
|
|
},
|
|
{
|
|
"epoch": 4.865671641791045,
|
|
"grad_norm": 0.01961498292819618,
|
|
"learning_rate": 9.612347463694882e-08,
|
|
"loss": 0.9723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17665594816207886,
|
|
"step": 1143,
|
|
"valid_targets_mean": 12415.5,
|
|
"valid_targets_min": 9073
|
|
},
|
|
{
|
|
"epoch": 4.869936034115138,
|
|
"grad_norm": 0.0192330043712978,
|
|
"learning_rate": 9.039040291050738e-08,
|
|
"loss": 0.9136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24519477784633636,
|
|
"step": 1144,
|
|
"valid_targets_mean": 16223.4,
|
|
"valid_targets_min": 15270
|
|
},
|
|
{
|
|
"epoch": 4.8742004264392325,
|
|
"grad_norm": 0.020669791235859384,
|
|
"learning_rate": 8.483320945815499e-08,
|
|
"loss": 0.943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25907793641090393,
|
|
"step": 1145,
|
|
"valid_targets_mean": 16193.2,
|
|
"valid_targets_min": 14907
|
|
},
|
|
{
|
|
"epoch": 4.878464818763327,
|
|
"grad_norm": 0.018820248026841565,
|
|
"learning_rate": 7.945194337124262e-08,
|
|
"loss": 0.9537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22706489264965057,
|
|
"step": 1146,
|
|
"valid_targets_mean": 14415.3,
|
|
"valid_targets_min": 12759
|
|
},
|
|
{
|
|
"epoch": 4.88272921108742,
|
|
"grad_norm": 0.020144194892665834,
|
|
"learning_rate": 7.424665218700444e-08,
|
|
"loss": 0.9759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2790810167789459,
|
|
"step": 1147,
|
|
"valid_targets_mean": 16094.8,
|
|
"valid_targets_min": 15333
|
|
},
|
|
{
|
|
"epoch": 4.886993603411514,
|
|
"grad_norm": 0.019480476600740253,
|
|
"learning_rate": 6.921738188814254e-08,
|
|
"loss": 0.9029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2119375467300415,
|
|
"step": 1148,
|
|
"valid_targets_mean": 13064.2,
|
|
"valid_targets_min": 2446
|
|
},
|
|
{
|
|
"epoch": 4.891257995735607,
|
|
"grad_norm": 0.017803302499765316,
|
|
"learning_rate": 6.436417690241614e-08,
|
|
"loss": 0.9188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23075050115585327,
|
|
"step": 1149,
|
|
"valid_targets_mean": 15487.2,
|
|
"valid_targets_min": 14299
|
|
},
|
|
{
|
|
"epoch": 4.895522388059701,
|
|
"grad_norm": 0.020277463032196574,
|
|
"learning_rate": 5.968708010225532e-08,
|
|
"loss": 0.9132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.265217125415802,
|
|
"step": 1150,
|
|
"valid_targets_mean": 16214.2,
|
|
"valid_targets_min": 15478
|
|
},
|
|
{
|
|
"epoch": 4.899786780383796,
|
|
"grad_norm": 0.019571420728060226,
|
|
"learning_rate": 5.518613280437901e-08,
|
|
"loss": 0.9602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17219007015228271,
|
|
"step": 1151,
|
|
"valid_targets_mean": 9421.5,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 4.904051172707889,
|
|
"grad_norm": 0.01917942961864778,
|
|
"learning_rate": 5.0861374769426433e-08,
|
|
"loss": 0.9576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22660619020462036,
|
|
"step": 1152,
|
|
"valid_targets_mean": 15773.5,
|
|
"valid_targets_min": 14701
|
|
},
|
|
{
|
|
"epoch": 4.908315565031983,
|
|
"grad_norm": 0.021024370148425696,
|
|
"learning_rate": 4.671284420161071e-08,
|
|
"loss": 0.986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3083045482635498,
|
|
"step": 1153,
|
|
"valid_targets_mean": 15861.6,
|
|
"valid_targets_min": 11810
|
|
},
|
|
{
|
|
"epoch": 4.912579957356077,
|
|
"grad_norm": 0.019751841936360773,
|
|
"learning_rate": 4.274057774838136e-08,
|
|
"loss": 0.9545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16684508323669434,
|
|
"step": 1154,
|
|
"valid_targets_mean": 10812.0,
|
|
"valid_targets_min": 7573
|
|
},
|
|
{
|
|
"epoch": 4.91684434968017,
|
|
"grad_norm": 0.018156600223685367,
|
|
"learning_rate": 3.894461050010012e-08,
|
|
"loss": 0.9344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2604978382587433,
|
|
"step": 1155,
|
|
"valid_targets_mean": 16063.1,
|
|
"valid_targets_min": 14902
|
|
},
|
|
{
|
|
"epoch": 4.9211087420042645,
|
|
"grad_norm": 0.02016293647610925,
|
|
"learning_rate": 3.5324975989725615e-08,
|
|
"loss": 0.9071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2769468426704407,
|
|
"step": 1156,
|
|
"valid_targets_mean": 16185.2,
|
|
"valid_targets_min": 15581
|
|
},
|
|
{
|
|
"epoch": 4.925373134328359,
|
|
"grad_norm": 0.018138230554822953,
|
|
"learning_rate": 3.188170619252473e-08,
|
|
"loss": 0.9112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18603980541229248,
|
|
"step": 1157,
|
|
"valid_targets_mean": 12736.7,
|
|
"valid_targets_min": 10560
|
|
},
|
|
{
|
|
"epoch": 4.929637526652452,
|
|
"grad_norm": 0.01977457654472926,
|
|
"learning_rate": 2.8614831525786147e-08,
|
|
"loss": 0.9452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25331005454063416,
|
|
"step": 1158,
|
|
"valid_targets_mean": 16043.1,
|
|
"valid_targets_min": 13878
|
|
},
|
|
{
|
|
"epoch": 4.933901918976546,
|
|
"grad_norm": 0.01961550195506048,
|
|
"learning_rate": 2.552438084855613e-08,
|
|
"loss": 0.9537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.273205041885376,
|
|
"step": 1159,
|
|
"valid_targets_mean": 16118.6,
|
|
"valid_targets_min": 14586
|
|
},
|
|
{
|
|
"epoch": 4.938166311300639,
|
|
"grad_norm": 0.018770039815369902,
|
|
"learning_rate": 2.2610381461372068e-08,
|
|
"loss": 0.9607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25998565554618835,
|
|
"step": 1160,
|
|
"valid_targets_mean": 15774.5,
|
|
"valid_targets_min": 14325
|
|
},
|
|
{
|
|
"epoch": 4.9424307036247335,
|
|
"grad_norm": 0.02002194709986828,
|
|
"learning_rate": 1.987285910603598e-08,
|
|
"loss": 0.9377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26880884170532227,
|
|
"step": 1161,
|
|
"valid_targets_mean": 16147.4,
|
|
"valid_targets_min": 15229
|
|
},
|
|
{
|
|
"epoch": 4.946695095948828,
|
|
"grad_norm": 0.020022293530988847,
|
|
"learning_rate": 1.7311837965379164e-08,
|
|
"loss": 0.9461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19136518239974976,
|
|
"step": 1162,
|
|
"valid_targets_mean": 10860.5,
|
|
"valid_targets_min": 1451
|
|
},
|
|
{
|
|
"epoch": 4.950959488272921,
|
|
"grad_norm": 0.01894848588937281,
|
|
"learning_rate": 1.4927340663046798e-08,
|
|
"loss": 0.9797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2447078824043274,
|
|
"step": 1163,
|
|
"valid_targets_mean": 16073.1,
|
|
"valid_targets_min": 14742
|
|
},
|
|
{
|
|
"epoch": 4.955223880597015,
|
|
"grad_norm": 0.019754288868544214,
|
|
"learning_rate": 1.2719388263300325e-08,
|
|
"loss": 0.9457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2538267970085144,
|
|
"step": 1164,
|
|
"valid_targets_mean": 16167.9,
|
|
"valid_targets_min": 14898
|
|
},
|
|
{
|
|
"epoch": 4.959488272921108,
|
|
"grad_norm": 0.019676929525858772,
|
|
"learning_rate": 1.0688000270839827e-08,
|
|
"loss": 0.9291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15881095826625824,
|
|
"step": 1165,
|
|
"valid_targets_mean": 9630.9,
|
|
"valid_targets_min": 1656
|
|
},
|
|
{
|
|
"epoch": 4.963752665245202,
|
|
"grad_norm": 0.018405274008141796,
|
|
"learning_rate": 8.833194630615271e-09,
|
|
"loss": 0.9338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23524492979049683,
|
|
"step": 1166,
|
|
"valid_targets_mean": 16118.5,
|
|
"valid_targets_min": 13912
|
|
},
|
|
{
|
|
"epoch": 4.968017057569297,
|
|
"grad_norm": 0.019707235291191907,
|
|
"learning_rate": 7.154987727682194e-09,
|
|
"loss": 0.8953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26419392228126526,
|
|
"step": 1167,
|
|
"valid_targets_mean": 16209.6,
|
|
"valid_targets_min": 15484
|
|
},
|
|
{
|
|
"epoch": 4.97228144989339,
|
|
"grad_norm": 0.018995159516019613,
|
|
"learning_rate": 5.6533943870462625e-09,
|
|
"loss": 0.9307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19308148324489594,
|
|
"step": 1168,
|
|
"valid_targets_mean": 12608.6,
|
|
"valid_targets_min": 9739
|
|
},
|
|
{
|
|
"epoch": 4.976545842217484,
|
|
"grad_norm": 0.019414497576689102,
|
|
"learning_rate": 4.328427873541152e-09,
|
|
"loss": 0.9577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24841631948947906,
|
|
"step": 1169,
|
|
"valid_targets_mean": 16153.7,
|
|
"valid_targets_min": 15228
|
|
},
|
|
{
|
|
"epoch": 4.980810234541578,
|
|
"grad_norm": 0.019978352491516957,
|
|
"learning_rate": 3.1800998917086432e-09,
|
|
"loss": 0.9668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2768065631389618,
|
|
"step": 1170,
|
|
"valid_targets_mean": 16111.9,
|
|
"valid_targets_min": 15276
|
|
},
|
|
{
|
|
"epoch": 4.985074626865671,
|
|
"grad_norm": 0.01932392532126781,
|
|
"learning_rate": 2.2084205856920393e-09,
|
|
"loss": 0.9376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2058914601802826,
|
|
"step": 1171,
|
|
"valid_targets_mean": 14295.4,
|
|
"valid_targets_min": 12820
|
|
},
|
|
{
|
|
"epoch": 4.9893390191897655,
|
|
"grad_norm": 0.01962364763104275,
|
|
"learning_rate": 1.4133985391473482e-09,
|
|
"loss": 0.957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2604025602340698,
|
|
"step": 1172,
|
|
"valid_targets_mean": 16178.0,
|
|
"valid_targets_min": 15207
|
|
},
|
|
{
|
|
"epoch": 4.99360341151386,
|
|
"grad_norm": 0.01963584547110758,
|
|
"learning_rate": 7.950407751722288e-10,
|
|
"loss": 0.93,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23507188260555267,
|
|
"step": 1173,
|
|
"valid_targets_mean": 13002.3,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 4.997867803837953,
|
|
"grad_norm": 0.018796631392403233,
|
|
"learning_rate": 3.5335275624159835e-10,
|
|
"loss": 0.9864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2428944706916809,
|
|
"step": 1174,
|
|
"valid_targets_mean": 15749.2,
|
|
"valid_targets_min": 14276
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.03014196198442487,
|
|
"learning_rate": 8.833838415212014e-11,
|
|
"loss": 0.9264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43892961740493774,
|
|
"step": 1175,
|
|
"valid_targets_mean": 11464.1,
|
|
"valid_targets_min": 2081
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43892961740493774,
|
|
"step": 1175,
|
|
"total_flos": 9938320304898048.0,
|
|
"train_loss": 0.9563364079150748,
|
|
"train_runtime": 6894.0534,
|
|
"train_samples_per_second": 21.758,
|
|
"train_steps_per_second": 0.17,
|
|
"valid_targets_mean": 11464.1,
|
|
"valid_targets_min": 2081
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 1175,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 5,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 9938320304898048.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|