Model: open-sci/sft__ot30k_SmolLM2-1.7B-16k-SFT-Tulu3-decontaminated Source: Original Platform
12973 lines
375 KiB
JSON
12973 lines
375 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 5.0,
|
|
"eval_steps": 500,
|
|
"global_step": 1175,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0042643923240938165,
|
|
"grad_norm": 50.790045190656855,
|
|
"learning_rate": 0.0,
|
|
"loss": 2.170248031616211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5367974638938904,
|
|
"step": 1,
|
|
"valid_targets_mean": 14321.9,
|
|
"valid_targets_min": 2475
|
|
},
|
|
{
|
|
"epoch": 0.008528784648187633,
|
|
"grad_norm": 46.779308529450724,
|
|
"learning_rate": 3.3898305084745766e-07,
|
|
"loss": 2.188108205795288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5509403944015503,
|
|
"step": 2,
|
|
"valid_targets_mean": 14621.8,
|
|
"valid_targets_min": 3312
|
|
},
|
|
{
|
|
"epoch": 0.01279317697228145,
|
|
"grad_norm": 45.17243380366014,
|
|
"learning_rate": 6.779661016949153e-07,
|
|
"loss": 2.130862236022949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5246433019638062,
|
|
"step": 3,
|
|
"valid_targets_mean": 15019.3,
|
|
"valid_targets_min": 2408
|
|
},
|
|
{
|
|
"epoch": 0.017057569296375266,
|
|
"grad_norm": 47.185009501724274,
|
|
"learning_rate": 1.016949152542373e-06,
|
|
"loss": 2.1715633869171143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5440387725830078,
|
|
"step": 4,
|
|
"valid_targets_mean": 13981.1,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 0.021321961620469083,
|
|
"grad_norm": 44.57211946253561,
|
|
"learning_rate": 1.3559322033898307e-06,
|
|
"loss": 2.1357712745666504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5245351791381836,
|
|
"step": 5,
|
|
"valid_targets_mean": 14371.6,
|
|
"valid_targets_min": 4931
|
|
},
|
|
{
|
|
"epoch": 0.0255863539445629,
|
|
"grad_norm": 44.09480015186751,
|
|
"learning_rate": 1.6949152542372882e-06,
|
|
"loss": 2.0977840423583984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5290247201919556,
|
|
"step": 6,
|
|
"valid_targets_mean": 14774.8,
|
|
"valid_targets_min": 2091
|
|
},
|
|
{
|
|
"epoch": 0.029850746268656716,
|
|
"grad_norm": 44.95465500866499,
|
|
"learning_rate": 2.033898305084746e-06,
|
|
"loss": 2.1269590854644775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.540243923664093,
|
|
"step": 7,
|
|
"valid_targets_mean": 15021.7,
|
|
"valid_targets_min": 5734
|
|
},
|
|
{
|
|
"epoch": 0.03411513859275053,
|
|
"grad_norm": 45.532705317520396,
|
|
"learning_rate": 2.372881355932204e-06,
|
|
"loss": 2.130779981613159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5535479784011841,
|
|
"step": 8,
|
|
"valid_targets_mean": 15463.4,
|
|
"valid_targets_min": 5095
|
|
},
|
|
{
|
|
"epoch": 0.03837953091684435,
|
|
"grad_norm": 45.065532325499156,
|
|
"learning_rate": 2.7118644067796613e-06,
|
|
"loss": 2.1472434997558594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.562113881111145,
|
|
"step": 9,
|
|
"valid_targets_mean": 15351.3,
|
|
"valid_targets_min": 12169
|
|
},
|
|
{
|
|
"epoch": 0.042643923240938165,
|
|
"grad_norm": 44.571271806685175,
|
|
"learning_rate": 3.0508474576271192e-06,
|
|
"loss": 2.1082420349121094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5148895382881165,
|
|
"step": 10,
|
|
"valid_targets_mean": 15040.1,
|
|
"valid_targets_min": 3013
|
|
},
|
|
{
|
|
"epoch": 0.046908315565031986,
|
|
"grad_norm": 42.425308158522355,
|
|
"learning_rate": 3.3898305084745763e-06,
|
|
"loss": 2.1142380237579346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5395100116729736,
|
|
"step": 11,
|
|
"valid_targets_mean": 14724.2,
|
|
"valid_targets_min": 2001
|
|
},
|
|
{
|
|
"epoch": 0.0511727078891258,
|
|
"grad_norm": 39.31864273361584,
|
|
"learning_rate": 3.7288135593220342e-06,
|
|
"loss": 2.0478177070617676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5402687788009644,
|
|
"step": 12,
|
|
"valid_targets_mean": 15195.5,
|
|
"valid_targets_min": 1428
|
|
},
|
|
{
|
|
"epoch": 0.05543710021321962,
|
|
"grad_norm": 40.830049863509025,
|
|
"learning_rate": 4.067796610169492e-06,
|
|
"loss": 2.0825119018554688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5617735385894775,
|
|
"step": 13,
|
|
"valid_targets_mean": 15551.3,
|
|
"valid_targets_min": 11106
|
|
},
|
|
{
|
|
"epoch": 0.05970149253731343,
|
|
"grad_norm": 38.04846079305305,
|
|
"learning_rate": 4.40677966101695e-06,
|
|
"loss": 2.046898603439331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5019025802612305,
|
|
"step": 14,
|
|
"valid_targets_mean": 14848.9,
|
|
"valid_targets_min": 9958
|
|
},
|
|
{
|
|
"epoch": 0.06396588486140725,
|
|
"grad_norm": 34.95663903713111,
|
|
"learning_rate": 4.745762711864408e-06,
|
|
"loss": 2.0071847438812256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5296506285667419,
|
|
"step": 15,
|
|
"valid_targets_mean": 15191.9,
|
|
"valid_targets_min": 3766
|
|
},
|
|
{
|
|
"epoch": 0.06823027718550106,
|
|
"grad_norm": 27.054249981999543,
|
|
"learning_rate": 5.084745762711865e-06,
|
|
"loss": 1.8666037321090698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45470499992370605,
|
|
"step": 16,
|
|
"valid_targets_mean": 15216.2,
|
|
"valid_targets_min": 6239
|
|
},
|
|
{
|
|
"epoch": 0.07249466950959488,
|
|
"grad_norm": 26.261466564870013,
|
|
"learning_rate": 5.423728813559323e-06,
|
|
"loss": 1.916226863861084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47518885135650635,
|
|
"step": 17,
|
|
"valid_targets_mean": 15284.0,
|
|
"valid_targets_min": 2914
|
|
},
|
|
{
|
|
"epoch": 0.0767590618336887,
|
|
"grad_norm": 27.232268863118826,
|
|
"learning_rate": 5.7627118644067805e-06,
|
|
"loss": 1.9307100772857666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5244933366775513,
|
|
"step": 18,
|
|
"valid_targets_mean": 15771.8,
|
|
"valid_targets_min": 12412
|
|
},
|
|
{
|
|
"epoch": 0.08102345415778252,
|
|
"grad_norm": 27.47901741613614,
|
|
"learning_rate": 6.1016949152542385e-06,
|
|
"loss": 1.8952523469924927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4579699635505676,
|
|
"step": 19,
|
|
"valid_targets_mean": 14159.2,
|
|
"valid_targets_min": 2017
|
|
},
|
|
{
|
|
"epoch": 0.08528784648187633,
|
|
"grad_norm": 26.829717100318437,
|
|
"learning_rate": 6.440677966101695e-06,
|
|
"loss": 1.9043647050857544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47290629148483276,
|
|
"step": 20,
|
|
"valid_targets_mean": 14576.8,
|
|
"valid_targets_min": 1870
|
|
},
|
|
{
|
|
"epoch": 0.08955223880597014,
|
|
"grad_norm": 14.549811010244184,
|
|
"learning_rate": 6.779661016949153e-06,
|
|
"loss": 1.7417538166046143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42348814010620117,
|
|
"step": 21,
|
|
"valid_targets_mean": 14887.2,
|
|
"valid_targets_min": 2398
|
|
},
|
|
{
|
|
"epoch": 0.09381663113006397,
|
|
"grad_norm": 12.900341344592826,
|
|
"learning_rate": 7.1186440677966106e-06,
|
|
"loss": 1.6977145671844482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41473180055618286,
|
|
"step": 22,
|
|
"valid_targets_mean": 14468.2,
|
|
"valid_targets_min": 5296
|
|
},
|
|
{
|
|
"epoch": 0.09808102345415778,
|
|
"grad_norm": 12.245233952586673,
|
|
"learning_rate": 7.4576271186440685e-06,
|
|
"loss": 1.72165846824646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4268421232700348,
|
|
"step": 23,
|
|
"valid_targets_mean": 15792.5,
|
|
"valid_targets_min": 8823
|
|
},
|
|
{
|
|
"epoch": 0.1023454157782516,
|
|
"grad_norm": 11.887975704497741,
|
|
"learning_rate": 7.796610169491526e-06,
|
|
"loss": 1.6677861213684082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4327390491962433,
|
|
"step": 24,
|
|
"valid_targets_mean": 15825.5,
|
|
"valid_targets_min": 8654
|
|
},
|
|
{
|
|
"epoch": 0.10660980810234541,
|
|
"grad_norm": 10.062587166710248,
|
|
"learning_rate": 8.135593220338983e-06,
|
|
"loss": 1.6380515098571777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37642329931259155,
|
|
"step": 25,
|
|
"valid_targets_mean": 13636.7,
|
|
"valid_targets_min": 2118
|
|
},
|
|
{
|
|
"epoch": 0.11087420042643924,
|
|
"grad_norm": 10.24381012683112,
|
|
"learning_rate": 8.47457627118644e-06,
|
|
"loss": 1.6145225763320923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3890722990036011,
|
|
"step": 26,
|
|
"valid_targets_mean": 13839.9,
|
|
"valid_targets_min": 2406
|
|
},
|
|
{
|
|
"epoch": 0.11513859275053305,
|
|
"grad_norm": 8.757077786632445,
|
|
"learning_rate": 8.8135593220339e-06,
|
|
"loss": 1.551984190940857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38796466588974,
|
|
"step": 27,
|
|
"valid_targets_mean": 15686.6,
|
|
"valid_targets_min": 9771
|
|
},
|
|
{
|
|
"epoch": 0.11940298507462686,
|
|
"grad_norm": 6.502960108459334,
|
|
"learning_rate": 9.152542372881356e-06,
|
|
"loss": 1.5993372201919556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3797028660774231,
|
|
"step": 28,
|
|
"valid_targets_mean": 14795.3,
|
|
"valid_targets_min": 5847
|
|
},
|
|
{
|
|
"epoch": 0.12366737739872068,
|
|
"grad_norm": 4.686333165951496,
|
|
"learning_rate": 9.491525423728815e-06,
|
|
"loss": 1.499677300453186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3852105736732483,
|
|
"step": 29,
|
|
"valid_targets_mean": 14799.2,
|
|
"valid_targets_min": 3518
|
|
},
|
|
{
|
|
"epoch": 0.1279317697228145,
|
|
"grad_norm": 4.4376965727817215,
|
|
"learning_rate": 9.830508474576272e-06,
|
|
"loss": 1.5449557304382324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3900710344314575,
|
|
"step": 30,
|
|
"valid_targets_mean": 15237.7,
|
|
"valid_targets_min": 7796
|
|
},
|
|
{
|
|
"epoch": 0.13219616204690832,
|
|
"grad_norm": 3.7222824944675934,
|
|
"learning_rate": 1.016949152542373e-05,
|
|
"loss": 1.4495660066604614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36171507835388184,
|
|
"step": 31,
|
|
"valid_targets_mean": 14774.0,
|
|
"valid_targets_min": 1794
|
|
},
|
|
{
|
|
"epoch": 0.13646055437100213,
|
|
"grad_norm": 3.336876802962369,
|
|
"learning_rate": 1.0508474576271188e-05,
|
|
"loss": 1.4464428424835205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35169264674186707,
|
|
"step": 32,
|
|
"valid_targets_mean": 15008.9,
|
|
"valid_targets_min": 2413
|
|
},
|
|
{
|
|
"epoch": 0.14072494669509594,
|
|
"grad_norm": 3.3623869726867768,
|
|
"learning_rate": 1.0847457627118645e-05,
|
|
"loss": 1.509070873260498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.382954865694046,
|
|
"step": 33,
|
|
"valid_targets_mean": 15760.6,
|
|
"valid_targets_min": 9774
|
|
},
|
|
{
|
|
"epoch": 0.14498933901918976,
|
|
"grad_norm": 3.077441715939849,
|
|
"learning_rate": 1.1186440677966102e-05,
|
|
"loss": 1.4670354127883911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38647815585136414,
|
|
"step": 34,
|
|
"valid_targets_mean": 15682.1,
|
|
"valid_targets_min": 11789
|
|
},
|
|
{
|
|
"epoch": 0.14925373134328357,
|
|
"grad_norm": 2.7875707604785407,
|
|
"learning_rate": 1.1525423728813561e-05,
|
|
"loss": 1.439798355102539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.369956374168396,
|
|
"step": 35,
|
|
"valid_targets_mean": 15191.7,
|
|
"valid_targets_min": 5297
|
|
},
|
|
{
|
|
"epoch": 0.1535181236673774,
|
|
"grad_norm": 2.7217130683106054,
|
|
"learning_rate": 1.1864406779661018e-05,
|
|
"loss": 1.429795503616333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37528812885284424,
|
|
"step": 36,
|
|
"valid_targets_mean": 15571.7,
|
|
"valid_targets_min": 8792
|
|
},
|
|
{
|
|
"epoch": 0.15778251599147122,
|
|
"grad_norm": 2.452349939554238,
|
|
"learning_rate": 1.2203389830508477e-05,
|
|
"loss": 1.4219616651535034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3755497932434082,
|
|
"step": 37,
|
|
"valid_targets_mean": 15548.4,
|
|
"valid_targets_min": 5026
|
|
},
|
|
{
|
|
"epoch": 0.16204690831556504,
|
|
"grad_norm": 2.0406075011883225,
|
|
"learning_rate": 1.2542372881355932e-05,
|
|
"loss": 1.4063204526901245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36107003688812256,
|
|
"step": 38,
|
|
"valid_targets_mean": 15412.9,
|
|
"valid_targets_min": 7438
|
|
},
|
|
{
|
|
"epoch": 0.16631130063965885,
|
|
"grad_norm": 1.8192030106266306,
|
|
"learning_rate": 1.288135593220339e-05,
|
|
"loss": 1.4204202890396118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3498168885707855,
|
|
"step": 39,
|
|
"valid_targets_mean": 15181.0,
|
|
"valid_targets_min": 4903
|
|
},
|
|
{
|
|
"epoch": 0.17057569296375266,
|
|
"grad_norm": 1.502103623020904,
|
|
"learning_rate": 1.3220338983050848e-05,
|
|
"loss": 1.3523015975952148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35391467809677124,
|
|
"step": 40,
|
|
"valid_targets_mean": 14799.0,
|
|
"valid_targets_min": 5119
|
|
},
|
|
{
|
|
"epoch": 0.17484008528784648,
|
|
"grad_norm": 1.2704982545285404,
|
|
"learning_rate": 1.3559322033898305e-05,
|
|
"loss": 1.3674108982086182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3485657870769501,
|
|
"step": 41,
|
|
"valid_targets_mean": 15266.0,
|
|
"valid_targets_min": 7127
|
|
},
|
|
{
|
|
"epoch": 0.1791044776119403,
|
|
"grad_norm": 1.279513706830006,
|
|
"learning_rate": 1.3898305084745764e-05,
|
|
"loss": 1.3254966735839844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31960922479629517,
|
|
"step": 42,
|
|
"valid_targets_mean": 15077.7,
|
|
"valid_targets_min": 7788
|
|
},
|
|
{
|
|
"epoch": 0.18336886993603413,
|
|
"grad_norm": 1.1317311262001388,
|
|
"learning_rate": 1.4237288135593221e-05,
|
|
"loss": 1.3530699014663696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3286569118499756,
|
|
"step": 43,
|
|
"valid_targets_mean": 14599.4,
|
|
"valid_targets_min": 2746
|
|
},
|
|
{
|
|
"epoch": 0.18763326226012794,
|
|
"grad_norm": 1.0641683167005649,
|
|
"learning_rate": 1.4576271186440678e-05,
|
|
"loss": 1.3112722635269165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3280591368675232,
|
|
"step": 44,
|
|
"valid_targets_mean": 15166.9,
|
|
"valid_targets_min": 1286
|
|
},
|
|
{
|
|
"epoch": 0.19189765458422176,
|
|
"grad_norm": 0.9774596445143066,
|
|
"learning_rate": 1.4915254237288137e-05,
|
|
"loss": 1.3113558292388916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31965869665145874,
|
|
"step": 45,
|
|
"valid_targets_mean": 14425.3,
|
|
"valid_targets_min": 1628
|
|
},
|
|
{
|
|
"epoch": 0.19616204690831557,
|
|
"grad_norm": 0.8886761466812477,
|
|
"learning_rate": 1.5254237288135594e-05,
|
|
"loss": 1.3103699684143066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3260655701160431,
|
|
"step": 46,
|
|
"valid_targets_mean": 14651.3,
|
|
"valid_targets_min": 3805
|
|
},
|
|
{
|
|
"epoch": 0.20042643923240938,
|
|
"grad_norm": 0.867150505024027,
|
|
"learning_rate": 1.5593220338983053e-05,
|
|
"loss": 1.3560914993286133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.351958304643631,
|
|
"step": 47,
|
|
"valid_targets_mean": 15030.9,
|
|
"valid_targets_min": 2768
|
|
},
|
|
{
|
|
"epoch": 0.2046908315565032,
|
|
"grad_norm": 0.6978933550897056,
|
|
"learning_rate": 1.593220338983051e-05,
|
|
"loss": 1.2680150270462036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3324684500694275,
|
|
"step": 48,
|
|
"valid_targets_mean": 15424.2,
|
|
"valid_targets_min": 6409
|
|
},
|
|
{
|
|
"epoch": 0.208955223880597,
|
|
"grad_norm": 0.6586270729941489,
|
|
"learning_rate": 1.6271186440677967e-05,
|
|
"loss": 1.2598673105239868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2970837652683258,
|
|
"step": 49,
|
|
"valid_targets_mean": 14532.2,
|
|
"valid_targets_min": 3775
|
|
},
|
|
{
|
|
"epoch": 0.21321961620469082,
|
|
"grad_norm": 0.6298545489956912,
|
|
"learning_rate": 1.6610169491525424e-05,
|
|
"loss": 1.2781181335449219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32271119952201843,
|
|
"step": 50,
|
|
"valid_targets_mean": 14875.0,
|
|
"valid_targets_min": 2632
|
|
},
|
|
{
|
|
"epoch": 0.21748400852878466,
|
|
"grad_norm": 0.611408985629821,
|
|
"learning_rate": 1.694915254237288e-05,
|
|
"loss": 1.2840487957000732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33150315284729004,
|
|
"step": 51,
|
|
"valid_targets_mean": 15284.7,
|
|
"valid_targets_min": 2340
|
|
},
|
|
{
|
|
"epoch": 0.22174840085287847,
|
|
"grad_norm": 0.5231312351893965,
|
|
"learning_rate": 1.728813559322034e-05,
|
|
"loss": 1.2553491592407227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32812148332595825,
|
|
"step": 52,
|
|
"valid_targets_mean": 14851.4,
|
|
"valid_targets_min": 3202
|
|
},
|
|
{
|
|
"epoch": 0.2260127931769723,
|
|
"grad_norm": 0.49883544363495685,
|
|
"learning_rate": 1.76271186440678e-05,
|
|
"loss": 1.2403368949890137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33563610911369324,
|
|
"step": 53,
|
|
"valid_targets_mean": 14730.1,
|
|
"valid_targets_min": 6607
|
|
},
|
|
{
|
|
"epoch": 0.2302771855010661,
|
|
"grad_norm": 0.5070577245361446,
|
|
"learning_rate": 1.7966101694915256e-05,
|
|
"loss": 1.2707104682922363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3156570792198181,
|
|
"step": 54,
|
|
"valid_targets_mean": 15551.0,
|
|
"valid_targets_min": 10611
|
|
},
|
|
{
|
|
"epoch": 0.2345415778251599,
|
|
"grad_norm": 0.45288562442399766,
|
|
"learning_rate": 1.8305084745762713e-05,
|
|
"loss": 1.279565453529358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3232952654361725,
|
|
"step": 55,
|
|
"valid_targets_mean": 15073.3,
|
|
"valid_targets_min": 6688
|
|
},
|
|
{
|
|
"epoch": 0.23880597014925373,
|
|
"grad_norm": 0.5290725780424694,
|
|
"learning_rate": 1.864406779661017e-05,
|
|
"loss": 1.2902295589447021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31880152225494385,
|
|
"step": 56,
|
|
"valid_targets_mean": 13424.9,
|
|
"valid_targets_min": 1585
|
|
},
|
|
{
|
|
"epoch": 0.24307036247334754,
|
|
"grad_norm": 0.4538847755630704,
|
|
"learning_rate": 1.898305084745763e-05,
|
|
"loss": 1.2620139122009277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.323620080947876,
|
|
"step": 57,
|
|
"valid_targets_mean": 14232.4,
|
|
"valid_targets_min": 4414
|
|
},
|
|
{
|
|
"epoch": 0.24733475479744135,
|
|
"grad_norm": 0.4305521236043809,
|
|
"learning_rate": 1.9322033898305087e-05,
|
|
"loss": 1.2902776002883911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3640017807483673,
|
|
"step": 58,
|
|
"valid_targets_mean": 14867.7,
|
|
"valid_targets_min": 5976
|
|
},
|
|
{
|
|
"epoch": 0.2515991471215352,
|
|
"grad_norm": 0.37477169557749734,
|
|
"learning_rate": 1.9661016949152545e-05,
|
|
"loss": 1.2305490970611572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32125768065452576,
|
|
"step": 59,
|
|
"valid_targets_mean": 15468.5,
|
|
"valid_targets_min": 6915
|
|
},
|
|
{
|
|
"epoch": 0.255863539445629,
|
|
"grad_norm": 0.3817273149939568,
|
|
"learning_rate": 2e-05,
|
|
"loss": 1.2429394721984863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2957238256931305,
|
|
"step": 60,
|
|
"valid_targets_mean": 14228.8,
|
|
"valid_targets_min": 2944
|
|
},
|
|
{
|
|
"epoch": 0.2601279317697228,
|
|
"grad_norm": 0.3570598150770374,
|
|
"learning_rate": 2.033898305084746e-05,
|
|
"loss": 1.261240005493164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32640916109085083,
|
|
"step": 61,
|
|
"valid_targets_mean": 15100.5,
|
|
"valid_targets_min": 1716
|
|
},
|
|
{
|
|
"epoch": 0.26439232409381663,
|
|
"grad_norm": 0.3645149378751711,
|
|
"learning_rate": 2.0677966101694916e-05,
|
|
"loss": 1.2335644960403442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30785736441612244,
|
|
"step": 62,
|
|
"valid_targets_mean": 14905.6,
|
|
"valid_targets_min": 2641
|
|
},
|
|
{
|
|
"epoch": 0.26865671641791045,
|
|
"grad_norm": 0.33892261474262636,
|
|
"learning_rate": 2.1016949152542376e-05,
|
|
"loss": 1.201655626296997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29098716378211975,
|
|
"step": 63,
|
|
"valid_targets_mean": 14317.0,
|
|
"valid_targets_min": 3219
|
|
},
|
|
{
|
|
"epoch": 0.27292110874200426,
|
|
"grad_norm": 0.37778201349086643,
|
|
"learning_rate": 2.1355932203389833e-05,
|
|
"loss": 1.2824424505233765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3140537142753601,
|
|
"step": 64,
|
|
"valid_targets_mean": 14770.4,
|
|
"valid_targets_min": 3917
|
|
},
|
|
{
|
|
"epoch": 0.2771855010660981,
|
|
"grad_norm": 0.3788695246680898,
|
|
"learning_rate": 2.169491525423729e-05,
|
|
"loss": 1.2558549642562866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33100056648254395,
|
|
"step": 65,
|
|
"valid_targets_mean": 15132.9,
|
|
"valid_targets_min": 3888
|
|
},
|
|
{
|
|
"epoch": 0.2814498933901919,
|
|
"grad_norm": 0.31284044068396766,
|
|
"learning_rate": 2.2033898305084748e-05,
|
|
"loss": 1.2463667392730713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32382726669311523,
|
|
"step": 66,
|
|
"valid_targets_mean": 14523.2,
|
|
"valid_targets_min": 4563
|
|
},
|
|
{
|
|
"epoch": 0.2857142857142857,
|
|
"grad_norm": 0.33675881541153835,
|
|
"learning_rate": 2.2372881355932205e-05,
|
|
"loss": 1.211066484451294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31038177013397217,
|
|
"step": 67,
|
|
"valid_targets_mean": 15063.5,
|
|
"valid_targets_min": 1522
|
|
},
|
|
{
|
|
"epoch": 0.2899786780383795,
|
|
"grad_norm": 0.30764419001783483,
|
|
"learning_rate": 2.2711864406779665e-05,
|
|
"loss": 1.2415111064910889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32569655776023865,
|
|
"step": 68,
|
|
"valid_targets_mean": 14883.2,
|
|
"valid_targets_min": 5211
|
|
},
|
|
{
|
|
"epoch": 0.2942430703624733,
|
|
"grad_norm": 0.31007462729839713,
|
|
"learning_rate": 2.3050847457627122e-05,
|
|
"loss": 1.2593109607696533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28648436069488525,
|
|
"step": 69,
|
|
"valid_targets_mean": 15228.7,
|
|
"valid_targets_min": 4369
|
|
},
|
|
{
|
|
"epoch": 0.29850746268656714,
|
|
"grad_norm": 0.29151985488501436,
|
|
"learning_rate": 2.338983050847458e-05,
|
|
"loss": 1.2496416568756104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3293963372707367,
|
|
"step": 70,
|
|
"valid_targets_mean": 14773.6,
|
|
"valid_targets_min": 2391
|
|
},
|
|
{
|
|
"epoch": 0.302771855010661,
|
|
"grad_norm": 0.26553622861690374,
|
|
"learning_rate": 2.3728813559322036e-05,
|
|
"loss": 1.1612809896469116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28408282995224,
|
|
"step": 71,
|
|
"valid_targets_mean": 15099.8,
|
|
"valid_targets_min": 6361
|
|
},
|
|
{
|
|
"epoch": 0.3070362473347548,
|
|
"grad_norm": 0.282168884696635,
|
|
"learning_rate": 2.406779661016949e-05,
|
|
"loss": 1.199310064315796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30075767636299133,
|
|
"step": 72,
|
|
"valid_targets_mean": 14710.6,
|
|
"valid_targets_min": 2834
|
|
},
|
|
{
|
|
"epoch": 0.31130063965884863,
|
|
"grad_norm": 0.26313781745202286,
|
|
"learning_rate": 2.4406779661016954e-05,
|
|
"loss": 1.2104355096817017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31199613213539124,
|
|
"step": 73,
|
|
"valid_targets_mean": 15397.6,
|
|
"valid_targets_min": 7400
|
|
},
|
|
{
|
|
"epoch": 0.31556503198294245,
|
|
"grad_norm": 0.2916206397471456,
|
|
"learning_rate": 2.474576271186441e-05,
|
|
"loss": 1.229551076889038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2988601326942444,
|
|
"step": 74,
|
|
"valid_targets_mean": 15627.9,
|
|
"valid_targets_min": 8606
|
|
},
|
|
{
|
|
"epoch": 0.31982942430703626,
|
|
"grad_norm": 0.29466291213180473,
|
|
"learning_rate": 2.5084745762711865e-05,
|
|
"loss": 1.227362871170044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30007031559944153,
|
|
"step": 75,
|
|
"valid_targets_mean": 14642.2,
|
|
"valid_targets_min": 3142
|
|
},
|
|
{
|
|
"epoch": 0.32409381663113007,
|
|
"grad_norm": 0.2549008942709039,
|
|
"learning_rate": 2.5423728813559322e-05,
|
|
"loss": 1.2207566499710083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30205586552619934,
|
|
"step": 76,
|
|
"valid_targets_mean": 14562.2,
|
|
"valid_targets_min": 3393
|
|
},
|
|
{
|
|
"epoch": 0.3283582089552239,
|
|
"grad_norm": 0.2576377148416891,
|
|
"learning_rate": 2.576271186440678e-05,
|
|
"loss": 1.1718388795852661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2792397141456604,
|
|
"step": 77,
|
|
"valid_targets_mean": 14332.6,
|
|
"valid_targets_min": 2705
|
|
},
|
|
{
|
|
"epoch": 0.3326226012793177,
|
|
"grad_norm": 0.26834680952288187,
|
|
"learning_rate": 2.610169491525424e-05,
|
|
"loss": 1.2270526885986328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3089393079280853,
|
|
"step": 78,
|
|
"valid_targets_mean": 14822.8,
|
|
"valid_targets_min": 4243
|
|
},
|
|
{
|
|
"epoch": 0.3368869936034115,
|
|
"grad_norm": 0.2592568670684148,
|
|
"learning_rate": 2.6440677966101696e-05,
|
|
"loss": 1.1853585243225098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29312610626220703,
|
|
"step": 79,
|
|
"valid_targets_mean": 14435.2,
|
|
"valid_targets_min": 4632
|
|
},
|
|
{
|
|
"epoch": 0.3411513859275053,
|
|
"grad_norm": 0.28595591728726166,
|
|
"learning_rate": 2.6779661016949153e-05,
|
|
"loss": 1.1915867328643799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2720337212085724,
|
|
"step": 80,
|
|
"valid_targets_mean": 13855.5,
|
|
"valid_targets_min": 2021
|
|
},
|
|
{
|
|
"epoch": 0.34541577825159914,
|
|
"grad_norm": 0.2573363604315349,
|
|
"learning_rate": 2.711864406779661e-05,
|
|
"loss": 1.211092233657837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31523624062538147,
|
|
"step": 81,
|
|
"valid_targets_mean": 14988.9,
|
|
"valid_targets_min": 5031
|
|
},
|
|
{
|
|
"epoch": 0.34968017057569295,
|
|
"grad_norm": 0.2342943139176118,
|
|
"learning_rate": 2.7457627118644068e-05,
|
|
"loss": 1.1675188541412354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29798319935798645,
|
|
"step": 82,
|
|
"valid_targets_mean": 15173.8,
|
|
"valid_targets_min": 8052
|
|
},
|
|
{
|
|
"epoch": 0.35394456289978676,
|
|
"grad_norm": 0.19877480713294712,
|
|
"learning_rate": 2.7796610169491528e-05,
|
|
"loss": 1.1393442153930664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28084176778793335,
|
|
"step": 83,
|
|
"valid_targets_mean": 15112.2,
|
|
"valid_targets_min": 4150
|
|
},
|
|
{
|
|
"epoch": 0.3582089552238806,
|
|
"grad_norm": 0.27179759318244323,
|
|
"learning_rate": 2.8135593220338985e-05,
|
|
"loss": 1.2124311923980713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32078754901885986,
|
|
"step": 84,
|
|
"valid_targets_mean": 14934.1,
|
|
"valid_targets_min": 2774
|
|
},
|
|
{
|
|
"epoch": 0.3624733475479744,
|
|
"grad_norm": 0.23832409069098376,
|
|
"learning_rate": 2.8474576271186442e-05,
|
|
"loss": 1.1817233562469482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2796356678009033,
|
|
"step": 85,
|
|
"valid_targets_mean": 14164.3,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 0.36673773987206826,
|
|
"grad_norm": 0.2688215630024535,
|
|
"learning_rate": 2.88135593220339e-05,
|
|
"loss": 1.1816399097442627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2843422591686249,
|
|
"step": 86,
|
|
"valid_targets_mean": 14402.0,
|
|
"valid_targets_min": 2864
|
|
},
|
|
{
|
|
"epoch": 0.37100213219616207,
|
|
"grad_norm": 0.22649654551596493,
|
|
"learning_rate": 2.9152542372881356e-05,
|
|
"loss": 1.1729528903961182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28629714250564575,
|
|
"step": 87,
|
|
"valid_targets_mean": 14807.8,
|
|
"valid_targets_min": 5090
|
|
},
|
|
{
|
|
"epoch": 0.3752665245202559,
|
|
"grad_norm": 0.257460340358337,
|
|
"learning_rate": 2.9491525423728817e-05,
|
|
"loss": 1.2199232578277588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32469049096107483,
|
|
"step": 88,
|
|
"valid_targets_mean": 14527.9,
|
|
"valid_targets_min": 4227
|
|
},
|
|
{
|
|
"epoch": 0.3795309168443497,
|
|
"grad_norm": 0.21834766547678558,
|
|
"learning_rate": 2.9830508474576274e-05,
|
|
"loss": 1.1780121326446533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29766976833343506,
|
|
"step": 89,
|
|
"valid_targets_mean": 14080.5,
|
|
"valid_targets_min": 1811
|
|
},
|
|
{
|
|
"epoch": 0.3837953091684435,
|
|
"grad_norm": 0.22473034066650938,
|
|
"learning_rate": 3.016949152542373e-05,
|
|
"loss": 1.212519884109497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3092614412307739,
|
|
"step": 90,
|
|
"valid_targets_mean": 14296.3,
|
|
"valid_targets_min": 4603
|
|
},
|
|
{
|
|
"epoch": 0.3880597014925373,
|
|
"grad_norm": 0.22423131093306164,
|
|
"learning_rate": 3.0508474576271188e-05,
|
|
"loss": 1.2223443984985352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30183011293411255,
|
|
"step": 91,
|
|
"valid_targets_mean": 14442.9,
|
|
"valid_targets_min": 1387
|
|
},
|
|
{
|
|
"epoch": 0.39232409381663114,
|
|
"grad_norm": 0.18683420496979372,
|
|
"learning_rate": 3.084745762711865e-05,
|
|
"loss": 1.1356593370437622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2865544855594635,
|
|
"step": 92,
|
|
"valid_targets_mean": 15076.7,
|
|
"valid_targets_min": 9011
|
|
},
|
|
{
|
|
"epoch": 0.39658848614072495,
|
|
"grad_norm": 0.23936333466282408,
|
|
"learning_rate": 3.1186440677966106e-05,
|
|
"loss": 1.2264689207077026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3072168827056885,
|
|
"step": 93,
|
|
"valid_targets_mean": 14920.3,
|
|
"valid_targets_min": 5698
|
|
},
|
|
{
|
|
"epoch": 0.40085287846481876,
|
|
"grad_norm": 0.21590792040283036,
|
|
"learning_rate": 3.152542372881356e-05,
|
|
"loss": 1.1622889041900635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30374616384506226,
|
|
"step": 94,
|
|
"valid_targets_mean": 14341.4,
|
|
"valid_targets_min": 1469
|
|
},
|
|
{
|
|
"epoch": 0.4051172707889126,
|
|
"grad_norm": 0.2069156721423661,
|
|
"learning_rate": 3.186440677966102e-05,
|
|
"loss": 1.1822681427001953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31677788496017456,
|
|
"step": 95,
|
|
"valid_targets_mean": 14810.8,
|
|
"valid_targets_min": 7825
|
|
},
|
|
{
|
|
"epoch": 0.4093816631130064,
|
|
"grad_norm": 0.24683228222573494,
|
|
"learning_rate": 3.2203389830508473e-05,
|
|
"loss": 1.1557495594024658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2958802878856659,
|
|
"step": 96,
|
|
"valid_targets_mean": 14433.6,
|
|
"valid_targets_min": 7417
|
|
},
|
|
{
|
|
"epoch": 0.4136460554371002,
|
|
"grad_norm": 0.18561933156683028,
|
|
"learning_rate": 3.2542372881355934e-05,
|
|
"loss": 1.1642088890075684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30265098810195923,
|
|
"step": 97,
|
|
"valid_targets_mean": 15319.0,
|
|
"valid_targets_min": 3176
|
|
},
|
|
{
|
|
"epoch": 0.417910447761194,
|
|
"grad_norm": 0.17643190018249677,
|
|
"learning_rate": 3.2881355932203394e-05,
|
|
"loss": 1.1388523578643799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29216450452804565,
|
|
"step": 98,
|
|
"valid_targets_mean": 15300.7,
|
|
"valid_targets_min": 8249
|
|
},
|
|
{
|
|
"epoch": 0.42217484008528783,
|
|
"grad_norm": 0.2148851445465397,
|
|
"learning_rate": 3.322033898305085e-05,
|
|
"loss": 1.175142526626587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3050539493560791,
|
|
"step": 99,
|
|
"valid_targets_mean": 15150.8,
|
|
"valid_targets_min": 2770
|
|
},
|
|
{
|
|
"epoch": 0.42643923240938164,
|
|
"grad_norm": 0.22119889412054472,
|
|
"learning_rate": 3.355932203389831e-05,
|
|
"loss": 1.1640441417694092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2903299331665039,
|
|
"step": 100,
|
|
"valid_targets_mean": 15056.3,
|
|
"valid_targets_min": 7409
|
|
},
|
|
{
|
|
"epoch": 0.43070362473347545,
|
|
"grad_norm": 0.23698273121150892,
|
|
"learning_rate": 3.389830508474576e-05,
|
|
"loss": 1.1739205121994019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2950419783592224,
|
|
"step": 101,
|
|
"valid_targets_mean": 15414.8,
|
|
"valid_targets_min": 7871
|
|
},
|
|
{
|
|
"epoch": 0.4349680170575693,
|
|
"grad_norm": 0.24734898745279307,
|
|
"learning_rate": 3.423728813559322e-05,
|
|
"loss": 1.2048437595367432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30449986457824707,
|
|
"step": 102,
|
|
"valid_targets_mean": 15500.8,
|
|
"valid_targets_min": 6371
|
|
},
|
|
{
|
|
"epoch": 0.43923240938166314,
|
|
"grad_norm": 0.22838037683126972,
|
|
"learning_rate": 3.457627118644068e-05,
|
|
"loss": 1.1827456951141357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30405759811401367,
|
|
"step": 103,
|
|
"valid_targets_mean": 15590.4,
|
|
"valid_targets_min": 9132
|
|
},
|
|
{
|
|
"epoch": 0.44349680170575695,
|
|
"grad_norm": 0.1899632820175523,
|
|
"learning_rate": 3.491525423728814e-05,
|
|
"loss": 1.1353181600570679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27248257398605347,
|
|
"step": 104,
|
|
"valid_targets_mean": 13723.0,
|
|
"valid_targets_min": 1629
|
|
},
|
|
{
|
|
"epoch": 0.44776119402985076,
|
|
"grad_norm": 0.1893495240420767,
|
|
"learning_rate": 3.52542372881356e-05,
|
|
"loss": 1.1199802160263062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.271077036857605,
|
|
"step": 105,
|
|
"valid_targets_mean": 15511.4,
|
|
"valid_targets_min": 7773
|
|
},
|
|
{
|
|
"epoch": 0.4520255863539446,
|
|
"grad_norm": 0.1870760141178231,
|
|
"learning_rate": 3.559322033898305e-05,
|
|
"loss": 1.1274532079696655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28740498423576355,
|
|
"step": 106,
|
|
"valid_targets_mean": 15417.5,
|
|
"valid_targets_min": 8403
|
|
},
|
|
{
|
|
"epoch": 0.4562899786780384,
|
|
"grad_norm": 0.18128773432383705,
|
|
"learning_rate": 3.593220338983051e-05,
|
|
"loss": 1.1023752689361572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2651640474796295,
|
|
"step": 107,
|
|
"valid_targets_mean": 15189.9,
|
|
"valid_targets_min": 2801
|
|
},
|
|
{
|
|
"epoch": 0.4605543710021322,
|
|
"grad_norm": 0.20722213303473414,
|
|
"learning_rate": 3.627118644067797e-05,
|
|
"loss": 1.1903117895126343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31424200534820557,
|
|
"step": 108,
|
|
"valid_targets_mean": 15165.4,
|
|
"valid_targets_min": 2289
|
|
},
|
|
{
|
|
"epoch": 0.464818763326226,
|
|
"grad_norm": 0.22007617972531787,
|
|
"learning_rate": 3.6610169491525426e-05,
|
|
"loss": 1.1847251653671265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29289525747299194,
|
|
"step": 109,
|
|
"valid_targets_mean": 14900.2,
|
|
"valid_targets_min": 3567
|
|
},
|
|
{
|
|
"epoch": 0.4690831556503198,
|
|
"grad_norm": 0.18609065046651505,
|
|
"learning_rate": 3.6949152542372886e-05,
|
|
"loss": 1.1395936012268066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2833573520183563,
|
|
"step": 110,
|
|
"valid_targets_mean": 14091.4,
|
|
"valid_targets_min": 1764
|
|
},
|
|
{
|
|
"epoch": 0.47334754797441364,
|
|
"grad_norm": 0.1896764397690769,
|
|
"learning_rate": 3.728813559322034e-05,
|
|
"loss": 1.1483747959136963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29909899830818176,
|
|
"step": 111,
|
|
"valid_targets_mean": 15633.7,
|
|
"valid_targets_min": 9677
|
|
},
|
|
{
|
|
"epoch": 0.47761194029850745,
|
|
"grad_norm": 0.21657203678307657,
|
|
"learning_rate": 3.76271186440678e-05,
|
|
"loss": 1.2104594707489014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.304294228553772,
|
|
"step": 112,
|
|
"valid_targets_mean": 15184.1,
|
|
"valid_targets_min": 3399
|
|
},
|
|
{
|
|
"epoch": 0.48187633262260127,
|
|
"grad_norm": 0.2318529429539571,
|
|
"learning_rate": 3.796610169491526e-05,
|
|
"loss": 1.1787766218185425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3219996690750122,
|
|
"step": 113,
|
|
"valid_targets_mean": 14875.3,
|
|
"valid_targets_min": 4874
|
|
},
|
|
{
|
|
"epoch": 0.4861407249466951,
|
|
"grad_norm": 0.23313336032796225,
|
|
"learning_rate": 3.8305084745762714e-05,
|
|
"loss": 1.1813091039657593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2842368185520172,
|
|
"step": 114,
|
|
"valid_targets_mean": 14792.7,
|
|
"valid_targets_min": 3378
|
|
},
|
|
{
|
|
"epoch": 0.4904051172707889,
|
|
"grad_norm": 0.2865459503023693,
|
|
"learning_rate": 3.8644067796610175e-05,
|
|
"loss": 1.1326351165771484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29066264629364014,
|
|
"step": 115,
|
|
"valid_targets_mean": 15213.5,
|
|
"valid_targets_min": 2215
|
|
},
|
|
{
|
|
"epoch": 0.4946695095948827,
|
|
"grad_norm": 0.18733606326268779,
|
|
"learning_rate": 3.898305084745763e-05,
|
|
"loss": 1.1477142572402954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2806292176246643,
|
|
"step": 116,
|
|
"valid_targets_mean": 14922.4,
|
|
"valid_targets_min": 5314
|
|
},
|
|
{
|
|
"epoch": 0.4989339019189765,
|
|
"grad_norm": 0.20824519757855833,
|
|
"learning_rate": 3.932203389830509e-05,
|
|
"loss": 1.1102975606918335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27820122241973877,
|
|
"step": 117,
|
|
"valid_targets_mean": 15095.6,
|
|
"valid_targets_min": 1502
|
|
},
|
|
{
|
|
"epoch": 0.5031982942430704,
|
|
"grad_norm": 0.21111065222975572,
|
|
"learning_rate": 3.966101694915255e-05,
|
|
"loss": 1.1591339111328125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27559608221054077,
|
|
"step": 118,
|
|
"valid_targets_mean": 14135.6,
|
|
"valid_targets_min": 1385
|
|
},
|
|
{
|
|
"epoch": 0.5074626865671642,
|
|
"grad_norm": 0.2322836217059597,
|
|
"learning_rate": 4e-05,
|
|
"loss": 1.1544227600097656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2784320116043091,
|
|
"step": 119,
|
|
"valid_targets_mean": 14575.9,
|
|
"valid_targets_min": 3951
|
|
},
|
|
{
|
|
"epoch": 0.511727078891258,
|
|
"grad_norm": 0.2710200716654449,
|
|
"learning_rate": 3.999991166161585e-05,
|
|
"loss": 1.2046982049942017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2911280393600464,
|
|
"step": 120,
|
|
"valid_targets_mean": 14812.0,
|
|
"valid_targets_min": 1616
|
|
},
|
|
{
|
|
"epoch": 0.5159914712153518,
|
|
"grad_norm": 0.23275165068835507,
|
|
"learning_rate": 3.999964664724376e-05,
|
|
"loss": 1.149275541305542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2707687020301819,
|
|
"step": 121,
|
|
"valid_targets_mean": 14733.4,
|
|
"valid_targets_min": 2274
|
|
},
|
|
{
|
|
"epoch": 0.5202558635394456,
|
|
"grad_norm": 0.2188757010741002,
|
|
"learning_rate": 3.999920495922483e-05,
|
|
"loss": 1.1391420364379883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2791445255279541,
|
|
"step": 122,
|
|
"valid_targets_mean": 14549.0,
|
|
"valid_targets_min": 5406
|
|
},
|
|
{
|
|
"epoch": 0.5245202558635395,
|
|
"grad_norm": 0.18189706908612469,
|
|
"learning_rate": 3.999858660146085e-05,
|
|
"loss": 1.1473329067230225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28349411487579346,
|
|
"step": 123,
|
|
"valid_targets_mean": 16022.8,
|
|
"valid_targets_min": 12461
|
|
},
|
|
{
|
|
"epoch": 0.5287846481876333,
|
|
"grad_norm": 0.18166911355855728,
|
|
"learning_rate": 3.999779157941431e-05,
|
|
"loss": 1.1291731595993042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29129359126091003,
|
|
"step": 124,
|
|
"valid_targets_mean": 15672.1,
|
|
"valid_targets_min": 8473
|
|
},
|
|
{
|
|
"epoch": 0.5330490405117271,
|
|
"grad_norm": 0.20528213770322606,
|
|
"learning_rate": 3.99968199001083e-05,
|
|
"loss": 1.152724027633667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27808108925819397,
|
|
"step": 125,
|
|
"valid_targets_mean": 14904.5,
|
|
"valid_targets_min": 3339
|
|
},
|
|
{
|
|
"epoch": 0.5373134328358209,
|
|
"grad_norm": 0.21969182243753804,
|
|
"learning_rate": 3.999567157212646e-05,
|
|
"loss": 1.111305832862854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28850311040878296,
|
|
"step": 126,
|
|
"valid_targets_mean": 15135.7,
|
|
"valid_targets_min": 2671
|
|
},
|
|
{
|
|
"epoch": 0.5415778251599147,
|
|
"grad_norm": 0.18648321802421775,
|
|
"learning_rate": 3.9994346605612955e-05,
|
|
"loss": 1.1140379905700684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28390681743621826,
|
|
"step": 127,
|
|
"valid_targets_mean": 15173.3,
|
|
"valid_targets_min": 3389
|
|
},
|
|
{
|
|
"epoch": 0.5458422174840085,
|
|
"grad_norm": 0.23530061155007936,
|
|
"learning_rate": 3.999284501227232e-05,
|
|
"loss": 1.1197749376296997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2651119828224182,
|
|
"step": 128,
|
|
"valid_targets_mean": 14356.0,
|
|
"valid_targets_min": 2255
|
|
},
|
|
{
|
|
"epoch": 0.5501066098081023,
|
|
"grad_norm": 0.20756437909189696,
|
|
"learning_rate": 3.9991166805369393e-05,
|
|
"loss": 1.1634900569915771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30470719933509827,
|
|
"step": 129,
|
|
"valid_targets_mean": 14560.9,
|
|
"valid_targets_min": 2591
|
|
},
|
|
{
|
|
"epoch": 0.5543710021321961,
|
|
"grad_norm": 0.17806553404432293,
|
|
"learning_rate": 3.9989311999729166e-05,
|
|
"loss": 1.1636724472045898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3105810880661011,
|
|
"step": 130,
|
|
"valid_targets_mean": 14886.5,
|
|
"valid_targets_min": 6638
|
|
},
|
|
{
|
|
"epoch": 0.55863539445629,
|
|
"grad_norm": 0.18548318006086834,
|
|
"learning_rate": 3.99872806117367e-05,
|
|
"loss": 1.1348915100097656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2712128162384033,
|
|
"step": 131,
|
|
"valid_targets_mean": 14285.8,
|
|
"valid_targets_min": 2962
|
|
},
|
|
{
|
|
"epoch": 0.5628997867803838,
|
|
"grad_norm": 0.18014829073575278,
|
|
"learning_rate": 3.998507265933696e-05,
|
|
"loss": 1.138692855834961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2761489450931549,
|
|
"step": 132,
|
|
"valid_targets_mean": 14861.4,
|
|
"valid_targets_min": 4176
|
|
},
|
|
{
|
|
"epoch": 0.5671641791044776,
|
|
"grad_norm": 0.19763664170497103,
|
|
"learning_rate": 3.9982688162034624e-05,
|
|
"loss": 1.1620148420333862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28988659381866455,
|
|
"step": 133,
|
|
"valid_targets_mean": 15048.1,
|
|
"valid_targets_min": 2960
|
|
},
|
|
{
|
|
"epoch": 0.5714285714285714,
|
|
"grad_norm": 0.19597942625944698,
|
|
"learning_rate": 3.998012714089397e-05,
|
|
"loss": 1.1668446063995361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29461246728897095,
|
|
"step": 134,
|
|
"valid_targets_mean": 14693.3,
|
|
"valid_targets_min": 5121
|
|
},
|
|
{
|
|
"epoch": 0.5756929637526652,
|
|
"grad_norm": 0.18817064634454994,
|
|
"learning_rate": 3.997738961853863e-05,
|
|
"loss": 1.1524537801742554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29617851972579956,
|
|
"step": 135,
|
|
"valid_targets_mean": 15131.8,
|
|
"valid_targets_min": 2618
|
|
},
|
|
{
|
|
"epoch": 0.579957356076759,
|
|
"grad_norm": 0.17696374148992294,
|
|
"learning_rate": 3.9974475619151445e-05,
|
|
"loss": 1.121335744857788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27633947134017944,
|
|
"step": 136,
|
|
"valid_targets_mean": 15332.3,
|
|
"valid_targets_min": 5632
|
|
},
|
|
{
|
|
"epoch": 0.5842217484008528,
|
|
"grad_norm": 0.18262162123023695,
|
|
"learning_rate": 3.997138516847422e-05,
|
|
"loss": 1.094153642654419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28466087579727173,
|
|
"step": 137,
|
|
"valid_targets_mean": 15506.9,
|
|
"valid_targets_min": 7124
|
|
},
|
|
{
|
|
"epoch": 0.5884861407249466,
|
|
"grad_norm": 0.17335247209101903,
|
|
"learning_rate": 3.9968118293807476e-05,
|
|
"loss": 1.1545405387878418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2700340747833252,
|
|
"step": 138,
|
|
"valid_targets_mean": 14411.7,
|
|
"valid_targets_min": 1794
|
|
},
|
|
{
|
|
"epoch": 0.5927505330490405,
|
|
"grad_norm": 0.18739288689825637,
|
|
"learning_rate": 3.996467502401028e-05,
|
|
"loss": 1.1366984844207764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2842878997325897,
|
|
"step": 139,
|
|
"valid_targets_mean": 15288.6,
|
|
"valid_targets_min": 3103
|
|
},
|
|
{
|
|
"epoch": 0.5970149253731343,
|
|
"grad_norm": 0.1694782461744407,
|
|
"learning_rate": 3.9961055389499904e-05,
|
|
"loss": 1.1021239757537842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27984392642974854,
|
|
"step": 140,
|
|
"valid_targets_mean": 15177.2,
|
|
"valid_targets_min": 6389
|
|
},
|
|
{
|
|
"epoch": 0.6012793176972282,
|
|
"grad_norm": 0.17506510062496997,
|
|
"learning_rate": 3.995725942225162e-05,
|
|
"loss": 1.1278494596481323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26832592487335205,
|
|
"step": 141,
|
|
"valid_targets_mean": 13890.7,
|
|
"valid_targets_min": 2127
|
|
},
|
|
{
|
|
"epoch": 0.605543710021322,
|
|
"grad_norm": 0.1978149550323814,
|
|
"learning_rate": 3.995328715579839e-05,
|
|
"loss": 1.13820219039917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2887899875640869,
|
|
"step": 142,
|
|
"valid_targets_mean": 15583.9,
|
|
"valid_targets_min": 7852
|
|
},
|
|
{
|
|
"epoch": 0.6098081023454158,
|
|
"grad_norm": 0.15715197732905914,
|
|
"learning_rate": 3.994913862523058e-05,
|
|
"loss": 1.103088140487671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29257047176361084,
|
|
"step": 143,
|
|
"valid_targets_mean": 15232.1,
|
|
"valid_targets_min": 3068
|
|
},
|
|
{
|
|
"epoch": 0.6140724946695096,
|
|
"grad_norm": 0.17529266877105765,
|
|
"learning_rate": 3.9944813867195624e-05,
|
|
"loss": 1.1433424949645996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24143539369106293,
|
|
"step": 144,
|
|
"valid_targets_mean": 13227.5,
|
|
"valid_targets_min": 976
|
|
},
|
|
{
|
|
"epoch": 0.6183368869936035,
|
|
"grad_norm": 0.2004398857873124,
|
|
"learning_rate": 3.9940312919897744e-05,
|
|
"loss": 1.1780171394348145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3126358687877655,
|
|
"step": 145,
|
|
"valid_targets_mean": 15729.8,
|
|
"valid_targets_min": 9974
|
|
},
|
|
{
|
|
"epoch": 0.6226012793176973,
|
|
"grad_norm": 0.14474520036548985,
|
|
"learning_rate": 3.993563582309759e-05,
|
|
"loss": 1.1073493957519531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25966954231262207,
|
|
"step": 146,
|
|
"valid_targets_mean": 15278.3,
|
|
"valid_targets_min": 4366
|
|
},
|
|
{
|
|
"epoch": 0.6268656716417911,
|
|
"grad_norm": 0.18882046907257752,
|
|
"learning_rate": 3.993078261811186e-05,
|
|
"loss": 1.1332874298095703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28338098526000977,
|
|
"step": 147,
|
|
"valid_targets_mean": 15488.8,
|
|
"valid_targets_min": 1314
|
|
},
|
|
{
|
|
"epoch": 0.6311300639658849,
|
|
"grad_norm": 0.1673714307844799,
|
|
"learning_rate": 3.9925753347813e-05,
|
|
"loss": 1.179833173751831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29067516326904297,
|
|
"step": 148,
|
|
"valid_targets_mean": 14679.1,
|
|
"valid_targets_min": 2834
|
|
},
|
|
{
|
|
"epoch": 0.6353944562899787,
|
|
"grad_norm": 0.180023287639483,
|
|
"learning_rate": 3.992054805662876e-05,
|
|
"loss": 1.1655330657958984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2942131757736206,
|
|
"step": 149,
|
|
"valid_targets_mean": 14476.6,
|
|
"valid_targets_min": 2432
|
|
},
|
|
{
|
|
"epoch": 0.6396588486140725,
|
|
"grad_norm": 0.15842311678929336,
|
|
"learning_rate": 3.991516679054185e-05,
|
|
"loss": 1.1212868690490723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2804386615753174,
|
|
"step": 150,
|
|
"valid_targets_mean": 14696.7,
|
|
"valid_targets_min": 5334
|
|
},
|
|
{
|
|
"epoch": 0.6439232409381663,
|
|
"grad_norm": 0.13966841128795948,
|
|
"learning_rate": 3.9909609597089496e-05,
|
|
"loss": 1.1268808841705322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27525758743286133,
|
|
"step": 151,
|
|
"valid_targets_mean": 14507.3,
|
|
"valid_targets_min": 4150
|
|
},
|
|
{
|
|
"epoch": 0.6481876332622601,
|
|
"grad_norm": 0.17531682600850934,
|
|
"learning_rate": 3.9903876525363055e-05,
|
|
"loss": 1.15196692943573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.285283625125885,
|
|
"step": 152,
|
|
"valid_targets_mean": 15145.8,
|
|
"valid_targets_min": 7847
|
|
},
|
|
{
|
|
"epoch": 0.652452025586354,
|
|
"grad_norm": 0.15831262867201742,
|
|
"learning_rate": 3.989796762600755e-05,
|
|
"loss": 1.120331048965454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26696890592575073,
|
|
"step": 153,
|
|
"valid_targets_mean": 14839.2,
|
|
"valid_targets_min": 5196
|
|
},
|
|
{
|
|
"epoch": 0.6567164179104478,
|
|
"grad_norm": 0.15356917733409717,
|
|
"learning_rate": 3.9891882951221246e-05,
|
|
"loss": 1.2072778940200806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2713392376899719,
|
|
"step": 154,
|
|
"valid_targets_mean": 14136.1,
|
|
"valid_targets_min": 2060
|
|
},
|
|
{
|
|
"epoch": 0.6609808102345416,
|
|
"grad_norm": 0.15946919062196696,
|
|
"learning_rate": 3.988562255475518e-05,
|
|
"loss": 1.1008577346801758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2804443836212158,
|
|
"step": 155,
|
|
"valid_targets_mean": 15568.1,
|
|
"valid_targets_min": 7018
|
|
},
|
|
{
|
|
"epoch": 0.6652452025586354,
|
|
"grad_norm": 0.1473945663169557,
|
|
"learning_rate": 3.987918649191268e-05,
|
|
"loss": 1.142858624458313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28863316774368286,
|
|
"step": 156,
|
|
"valid_targets_mean": 14499.2,
|
|
"valid_targets_min": 6380
|
|
},
|
|
{
|
|
"epoch": 0.6695095948827292,
|
|
"grad_norm": 0.1464149921971081,
|
|
"learning_rate": 3.987257481954888e-05,
|
|
"loss": 1.1440634727478027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3091897964477539,
|
|
"step": 157,
|
|
"valid_targets_mean": 15097.0,
|
|
"valid_targets_min": 5838
|
|
},
|
|
{
|
|
"epoch": 0.673773987206823,
|
|
"grad_norm": 0.1559438697550048,
|
|
"learning_rate": 3.9865787596070236e-05,
|
|
"loss": 1.1320838928222656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28108370304107666,
|
|
"step": 158,
|
|
"valid_targets_mean": 14891.9,
|
|
"valid_targets_min": 5531
|
|
},
|
|
{
|
|
"epoch": 0.6780383795309168,
|
|
"grad_norm": 0.1463860512495006,
|
|
"learning_rate": 3.9858824881433975e-05,
|
|
"loss": 1.146373987197876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26698851585388184,
|
|
"step": 159,
|
|
"valid_targets_mean": 14650.7,
|
|
"valid_targets_min": 5075
|
|
},
|
|
{
|
|
"epoch": 0.6823027718550106,
|
|
"grad_norm": 0.14800624166047252,
|
|
"learning_rate": 3.9851686737147585e-05,
|
|
"loss": 1.1788896322250366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2629927694797516,
|
|
"step": 160,
|
|
"valid_targets_mean": 14034.7,
|
|
"valid_targets_min": 819
|
|
},
|
|
{
|
|
"epoch": 0.6865671641791045,
|
|
"grad_norm": 0.1484680361376596,
|
|
"learning_rate": 3.9844373226268305e-05,
|
|
"loss": 1.0999665260314941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2718093991279602,
|
|
"step": 161,
|
|
"valid_targets_mean": 14443.7,
|
|
"valid_targets_min": 2480
|
|
},
|
|
{
|
|
"epoch": 0.6908315565031983,
|
|
"grad_norm": 0.1625380040504571,
|
|
"learning_rate": 3.983688441340249e-05,
|
|
"loss": 1.1586577892303467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3006063997745514,
|
|
"step": 162,
|
|
"valid_targets_mean": 14396.2,
|
|
"valid_targets_min": 2776
|
|
},
|
|
{
|
|
"epoch": 0.6950959488272921,
|
|
"grad_norm": 0.15551941405753084,
|
|
"learning_rate": 3.98292203647051e-05,
|
|
"loss": 1.1422550678253174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28483057022094727,
|
|
"step": 163,
|
|
"valid_targets_mean": 14615.3,
|
|
"valid_targets_min": 4080
|
|
},
|
|
{
|
|
"epoch": 0.6993603411513859,
|
|
"grad_norm": 0.1497683685070015,
|
|
"learning_rate": 3.982138114787912e-05,
|
|
"loss": 1.1422796249389648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28290224075317383,
|
|
"step": 164,
|
|
"valid_targets_mean": 15051.8,
|
|
"valid_targets_min": 6250
|
|
},
|
|
{
|
|
"epoch": 0.7036247334754797,
|
|
"grad_norm": 0.12406388675079885,
|
|
"learning_rate": 3.98133668321749e-05,
|
|
"loss": 1.10989248752594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2753506898880005,
|
|
"step": 165,
|
|
"valid_targets_mean": 14864.9,
|
|
"valid_targets_min": 3870
|
|
},
|
|
{
|
|
"epoch": 0.7078891257995735,
|
|
"grad_norm": 0.15908448688320512,
|
|
"learning_rate": 3.980517748838963e-05,
|
|
"loss": 1.2064687013626099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28861990571022034,
|
|
"step": 166,
|
|
"valid_targets_mean": 14201.4,
|
|
"valid_targets_min": 1111
|
|
},
|
|
{
|
|
"epoch": 0.7121535181236673,
|
|
"grad_norm": 0.13241862947179323,
|
|
"learning_rate": 3.979681318886664e-05,
|
|
"loss": 1.1194779872894287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28165143728256226,
|
|
"step": 167,
|
|
"valid_targets_mean": 14527.2,
|
|
"valid_targets_min": 2850
|
|
},
|
|
{
|
|
"epoch": 0.7164179104477612,
|
|
"grad_norm": 0.1526864709951152,
|
|
"learning_rate": 3.978827400749481e-05,
|
|
"loss": 1.1441411972045898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2705109715461731,
|
|
"step": 168,
|
|
"valid_targets_mean": 14438.6,
|
|
"valid_targets_min": 2028
|
|
},
|
|
{
|
|
"epoch": 0.720682302771855,
|
|
"grad_norm": 0.13713852184898634,
|
|
"learning_rate": 3.977956001970788e-05,
|
|
"loss": 1.1992135047912598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31088030338287354,
|
|
"step": 169,
|
|
"valid_targets_mean": 15543.1,
|
|
"valid_targets_min": 9188
|
|
},
|
|
{
|
|
"epoch": 0.7249466950959488,
|
|
"grad_norm": 0.14966552540143047,
|
|
"learning_rate": 3.977067130248381e-05,
|
|
"loss": 1.1361427307128906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28439095616340637,
|
|
"step": 170,
|
|
"valid_targets_mean": 15418.8,
|
|
"valid_targets_min": 5937
|
|
},
|
|
{
|
|
"epoch": 0.7292110874200426,
|
|
"grad_norm": 0.13928076177764379,
|
|
"learning_rate": 3.9761607934344095e-05,
|
|
"loss": 1.078423023223877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26448649168014526,
|
|
"step": 171,
|
|
"valid_targets_mean": 14226.8,
|
|
"valid_targets_min": 3232
|
|
},
|
|
{
|
|
"epoch": 0.7334754797441365,
|
|
"grad_norm": 0.13495845491612382,
|
|
"learning_rate": 3.975236999535306e-05,
|
|
"loss": 1.09339439868927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26359957456588745,
|
|
"step": 172,
|
|
"valid_targets_mean": 14790.3,
|
|
"valid_targets_min": 3333
|
|
},
|
|
{
|
|
"epoch": 0.7377398720682303,
|
|
"grad_norm": 0.12058657267096401,
|
|
"learning_rate": 3.974295756711717e-05,
|
|
"loss": 1.138909101486206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2752223014831543,
|
|
"step": 173,
|
|
"valid_targets_mean": 14813.6,
|
|
"valid_targets_min": 2999
|
|
},
|
|
{
|
|
"epoch": 0.7420042643923241,
|
|
"grad_norm": 0.14248053847759223,
|
|
"learning_rate": 3.9733370732784296e-05,
|
|
"loss": 1.15055251121521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29128724336624146,
|
|
"step": 174,
|
|
"valid_targets_mean": 14909.1,
|
|
"valid_targets_min": 2310
|
|
},
|
|
{
|
|
"epoch": 0.746268656716418,
|
|
"grad_norm": 0.15695848742650992,
|
|
"learning_rate": 3.972360957704298e-05,
|
|
"loss": 1.193650245666504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26371991634368896,
|
|
"step": 175,
|
|
"valid_targets_mean": 14879.4,
|
|
"valid_targets_min": 1882
|
|
},
|
|
{
|
|
"epoch": 0.7505330490405118,
|
|
"grad_norm": 0.13939373132104746,
|
|
"learning_rate": 3.97136741861217e-05,
|
|
"loss": 1.107863187789917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29141396284103394,
|
|
"step": 176,
|
|
"valid_targets_mean": 15880.4,
|
|
"valid_targets_min": 11548
|
|
},
|
|
{
|
|
"epoch": 0.7547974413646056,
|
|
"grad_norm": 0.143994364121272,
|
|
"learning_rate": 3.970356464778808e-05,
|
|
"loss": 1.1400837898254395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2879534661769867,
|
|
"step": 177,
|
|
"valid_targets_mean": 15005.6,
|
|
"valid_targets_min": 3066
|
|
},
|
|
{
|
|
"epoch": 0.7590618336886994,
|
|
"grad_norm": 0.13660346643495577,
|
|
"learning_rate": 3.969328105134817e-05,
|
|
"loss": 1.123429298400879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2834112346172333,
|
|
"step": 178,
|
|
"valid_targets_mean": 15478.3,
|
|
"valid_targets_min": 1579
|
|
},
|
|
{
|
|
"epoch": 0.7633262260127932,
|
|
"grad_norm": 0.13491222978566783,
|
|
"learning_rate": 3.9682823487645584e-05,
|
|
"loss": 1.1422916650772095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2854680120944977,
|
|
"step": 179,
|
|
"valid_targets_mean": 14752.8,
|
|
"valid_targets_min": 5344
|
|
},
|
|
{
|
|
"epoch": 0.767590618336887,
|
|
"grad_norm": 0.1332118908758463,
|
|
"learning_rate": 3.9672192049060745e-05,
|
|
"loss": 1.1008455753326416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26276886463165283,
|
|
"step": 180,
|
|
"valid_targets_mean": 14767.6,
|
|
"valid_targets_min": 4143
|
|
},
|
|
{
|
|
"epoch": 0.7718550106609808,
|
|
"grad_norm": 0.1373151893452771,
|
|
"learning_rate": 3.966138682951008e-05,
|
|
"loss": 1.1371111869812012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.300091028213501,
|
|
"step": 181,
|
|
"valid_targets_mean": 15338.1,
|
|
"valid_targets_min": 3939
|
|
},
|
|
{
|
|
"epoch": 0.7761194029850746,
|
|
"grad_norm": 0.13795655344725627,
|
|
"learning_rate": 3.9650407924445147e-05,
|
|
"loss": 1.1715991497039795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2965927720069885,
|
|
"step": 182,
|
|
"valid_targets_mean": 14912.2,
|
|
"valid_targets_min": 1474
|
|
},
|
|
{
|
|
"epoch": 0.7803837953091685,
|
|
"grad_norm": 0.13539932831986434,
|
|
"learning_rate": 3.963925543085181e-05,
|
|
"loss": 1.130144476890564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2717178463935852,
|
|
"step": 183,
|
|
"valid_targets_mean": 14541.8,
|
|
"valid_targets_min": 4293
|
|
},
|
|
{
|
|
"epoch": 0.7846481876332623,
|
|
"grad_norm": 0.1283206245736198,
|
|
"learning_rate": 3.96279294472494e-05,
|
|
"loss": 1.054412841796875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2640993595123291,
|
|
"step": 184,
|
|
"valid_targets_mean": 14504.5,
|
|
"valid_targets_min": 1357
|
|
},
|
|
{
|
|
"epoch": 0.7889125799573561,
|
|
"grad_norm": 0.12619880431755937,
|
|
"learning_rate": 3.961643007368984e-05,
|
|
"loss": 1.1118097305297852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28860723972320557,
|
|
"step": 185,
|
|
"valid_targets_mean": 15696.8,
|
|
"valid_targets_min": 12143
|
|
},
|
|
{
|
|
"epoch": 0.7931769722814499,
|
|
"grad_norm": 0.14664943656055512,
|
|
"learning_rate": 3.960475741175671e-05,
|
|
"loss": 1.1636567115783691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2976000905036926,
|
|
"step": 186,
|
|
"valid_targets_mean": 14328.2,
|
|
"valid_targets_min": 1235
|
|
},
|
|
{
|
|
"epoch": 0.7974413646055437,
|
|
"grad_norm": 0.13587138316424627,
|
|
"learning_rate": 3.959291156456444e-05,
|
|
"loss": 1.1082062721252441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2804204821586609,
|
|
"step": 187,
|
|
"valid_targets_mean": 14589.9,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 0.8017057569296375,
|
|
"grad_norm": 0.11576610260324276,
|
|
"learning_rate": 3.9580892636757334e-05,
|
|
"loss": 1.0934619903564453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2750215530395508,
|
|
"step": 188,
|
|
"valid_targets_mean": 14877.4,
|
|
"valid_targets_min": 1719
|
|
},
|
|
{
|
|
"epoch": 0.8059701492537313,
|
|
"grad_norm": 0.15045642773107953,
|
|
"learning_rate": 3.9568700734508645e-05,
|
|
"loss": 1.1125788688659668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27089452743530273,
|
|
"step": 189,
|
|
"valid_targets_mean": 14498.4,
|
|
"valid_targets_min": 1974
|
|
},
|
|
{
|
|
"epoch": 0.8102345415778252,
|
|
"grad_norm": 0.122886924452415,
|
|
"learning_rate": 3.955633596551967e-05,
|
|
"loss": 1.151255488395691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3037102520465851,
|
|
"step": 190,
|
|
"valid_targets_mean": 14792.8,
|
|
"valid_targets_min": 2469
|
|
},
|
|
{
|
|
"epoch": 0.814498933901919,
|
|
"grad_norm": 0.14899832983861802,
|
|
"learning_rate": 3.9543798439018776e-05,
|
|
"loss": 1.126554250717163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.278128445148468,
|
|
"step": 191,
|
|
"valid_targets_mean": 14862.5,
|
|
"valid_targets_min": 2464
|
|
},
|
|
{
|
|
"epoch": 0.8187633262260128,
|
|
"grad_norm": 0.13132235585574215,
|
|
"learning_rate": 3.953108826576046e-05,
|
|
"loss": 1.0974340438842773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3035191595554352,
|
|
"step": 192,
|
|
"valid_targets_mean": 15164.5,
|
|
"valid_targets_min": 4317
|
|
},
|
|
{
|
|
"epoch": 0.8230277185501066,
|
|
"grad_norm": 0.130521931104875,
|
|
"learning_rate": 3.9518205558024334e-05,
|
|
"loss": 1.1168773174285889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2616330683231354,
|
|
"step": 193,
|
|
"valid_targets_mean": 14815.7,
|
|
"valid_targets_min": 2603
|
|
},
|
|
{
|
|
"epoch": 0.8272921108742004,
|
|
"grad_norm": 0.1341247926241614,
|
|
"learning_rate": 3.9505150429614154e-05,
|
|
"loss": 1.1154228448867798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2859729528427124,
|
|
"step": 194,
|
|
"valid_targets_mean": 14534.9,
|
|
"valid_targets_min": 4658
|
|
},
|
|
{
|
|
"epoch": 0.8315565031982942,
|
|
"grad_norm": 0.12691633298520677,
|
|
"learning_rate": 3.949192299585681e-05,
|
|
"loss": 1.1170120239257812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25178277492523193,
|
|
"step": 195,
|
|
"valid_targets_mean": 14242.1,
|
|
"valid_targets_min": 3649
|
|
},
|
|
{
|
|
"epoch": 0.835820895522388,
|
|
"grad_norm": 0.12846012014419028,
|
|
"learning_rate": 3.9478523373601325e-05,
|
|
"loss": 1.112591028213501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2866249084472656,
|
|
"step": 196,
|
|
"valid_targets_mean": 15200.2,
|
|
"valid_targets_min": 6631
|
|
},
|
|
{
|
|
"epoch": 0.8400852878464818,
|
|
"grad_norm": 0.13725011956395136,
|
|
"learning_rate": 3.946495168121778e-05,
|
|
"loss": 1.106331467628479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29948368668556213,
|
|
"step": 197,
|
|
"valid_targets_mean": 14759.3,
|
|
"valid_targets_min": 4909
|
|
},
|
|
{
|
|
"epoch": 0.8443496801705757,
|
|
"grad_norm": 0.13034881754079844,
|
|
"learning_rate": 3.9451208038596325e-05,
|
|
"loss": 1.0999813079833984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26882147789001465,
|
|
"step": 198,
|
|
"valid_targets_mean": 14875.5,
|
|
"valid_targets_min": 4081
|
|
},
|
|
{
|
|
"epoch": 0.8486140724946695,
|
|
"grad_norm": 0.1225494722450067,
|
|
"learning_rate": 3.943729256714608e-05,
|
|
"loss": 1.1127738952636719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2914237380027771,
|
|
"step": 199,
|
|
"valid_targets_mean": 15551.2,
|
|
"valid_targets_min": 10433
|
|
},
|
|
{
|
|
"epoch": 0.8528784648187633,
|
|
"grad_norm": 0.1300877292503286,
|
|
"learning_rate": 3.942320538979408e-05,
|
|
"loss": 1.103322148323059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2825484573841095,
|
|
"step": 200,
|
|
"valid_targets_mean": 15506.2,
|
|
"valid_targets_min": 7338
|
|
},
|
|
{
|
|
"epoch": 0.8571428571428571,
|
|
"grad_norm": 0.14098802712240638,
|
|
"learning_rate": 3.9408946630984144e-05,
|
|
"loss": 1.0541939735412598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2697862982749939,
|
|
"step": 201,
|
|
"valid_targets_mean": 15275.3,
|
|
"valid_targets_min": 6483
|
|
},
|
|
{
|
|
"epoch": 0.8614072494669509,
|
|
"grad_norm": 0.14224101556395885,
|
|
"learning_rate": 3.939451641667587e-05,
|
|
"loss": 1.0901778936386108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2783012390136719,
|
|
"step": 202,
|
|
"valid_targets_mean": 15515.6,
|
|
"valid_targets_min": 5670
|
|
},
|
|
{
|
|
"epoch": 0.8656716417910447,
|
|
"grad_norm": 0.16125617750462215,
|
|
"learning_rate": 3.937991487434342e-05,
|
|
"loss": 1.105463981628418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2722461521625519,
|
|
"step": 203,
|
|
"valid_targets_mean": 14926.7,
|
|
"valid_targets_min": 6670
|
|
},
|
|
{
|
|
"epoch": 0.8699360341151386,
|
|
"grad_norm": 0.13209796282365102,
|
|
"learning_rate": 3.9365142132974484e-05,
|
|
"loss": 1.1652926206588745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3184700608253479,
|
|
"step": 204,
|
|
"valid_targets_mean": 15101.2,
|
|
"valid_targets_min": 4098
|
|
},
|
|
{
|
|
"epoch": 0.8742004264392325,
|
|
"grad_norm": 0.13427219796772571,
|
|
"learning_rate": 3.935019832306905e-05,
|
|
"loss": 1.1196999549865723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2864650785923004,
|
|
"step": 205,
|
|
"valid_targets_mean": 14890.8,
|
|
"valid_targets_min": 5785
|
|
},
|
|
{
|
|
"epoch": 0.8784648187633263,
|
|
"grad_norm": 0.12296597238206593,
|
|
"learning_rate": 3.933508357663832e-05,
|
|
"loss": 1.1197096109390259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28627169132232666,
|
|
"step": 206,
|
|
"valid_targets_mean": 14440.3,
|
|
"valid_targets_min": 4342
|
|
},
|
|
{
|
|
"epoch": 0.8827292110874201,
|
|
"grad_norm": 0.15038922539621744,
|
|
"learning_rate": 3.9319798027203544e-05,
|
|
"loss": 1.0915428400039673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28127023577690125,
|
|
"step": 207,
|
|
"valid_targets_mean": 14453.1,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 0.8869936034115139,
|
|
"grad_norm": 0.148957816472187,
|
|
"learning_rate": 3.930434180979478e-05,
|
|
"loss": 1.110160231590271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2755948007106781,
|
|
"step": 208,
|
|
"valid_targets_mean": 14177.1,
|
|
"valid_targets_min": 2004
|
|
},
|
|
{
|
|
"epoch": 0.8912579957356077,
|
|
"grad_norm": 0.1506910772213843,
|
|
"learning_rate": 3.928871506094975e-05,
|
|
"loss": 1.1149358749389648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29100799560546875,
|
|
"step": 209,
|
|
"valid_targets_mean": 15418.4,
|
|
"valid_targets_min": 12817
|
|
},
|
|
{
|
|
"epoch": 0.8955223880597015,
|
|
"grad_norm": 0.1484641438960075,
|
|
"learning_rate": 3.927291791871264e-05,
|
|
"loss": 1.126612901687622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26460331678390503,
|
|
"step": 210,
|
|
"valid_targets_mean": 14732.7,
|
|
"valid_targets_min": 3427
|
|
},
|
|
{
|
|
"epoch": 0.8997867803837953,
|
|
"grad_norm": 0.12901650114907645,
|
|
"learning_rate": 3.925695052263284e-05,
|
|
"loss": 1.1095227003097534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26923397183418274,
|
|
"step": 211,
|
|
"valid_targets_mean": 14885.7,
|
|
"valid_targets_min": 2366
|
|
},
|
|
{
|
|
"epoch": 0.9040511727078892,
|
|
"grad_norm": 0.13562134156105302,
|
|
"learning_rate": 3.924081301376375e-05,
|
|
"loss": 1.0953495502471924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2668260931968689,
|
|
"step": 212,
|
|
"valid_targets_mean": 14759.1,
|
|
"valid_targets_min": 1783
|
|
},
|
|
{
|
|
"epoch": 0.908315565031983,
|
|
"grad_norm": 0.14446206741417195,
|
|
"learning_rate": 3.9224505534661525e-05,
|
|
"loss": 1.1020824909210205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25572067499160767,
|
|
"step": 213,
|
|
"valid_targets_mean": 14785.4,
|
|
"valid_targets_min": 4867
|
|
},
|
|
{
|
|
"epoch": 0.9125799573560768,
|
|
"grad_norm": 0.12584964455269773,
|
|
"learning_rate": 3.92080282293838e-05,
|
|
"loss": 1.1246337890625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29202979803085327,
|
|
"step": 214,
|
|
"valid_targets_mean": 15084.0,
|
|
"valid_targets_min": 1758
|
|
},
|
|
{
|
|
"epoch": 0.9168443496801706,
|
|
"grad_norm": 0.14600926284692883,
|
|
"learning_rate": 3.9191381243488417e-05,
|
|
"loss": 1.0968735218048096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28475409746170044,
|
|
"step": 215,
|
|
"valid_targets_mean": 14981.6,
|
|
"valid_targets_min": 3745
|
|
},
|
|
{
|
|
"epoch": 0.9211087420042644,
|
|
"grad_norm": 0.13297033649676046,
|
|
"learning_rate": 3.9174564724032167e-05,
|
|
"loss": 1.118091106414795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25068044662475586,
|
|
"step": 216,
|
|
"valid_targets_mean": 14151.1,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 0.9253731343283582,
|
|
"grad_norm": 0.13574354964287708,
|
|
"learning_rate": 3.9157578819569455e-05,
|
|
"loss": 1.0985863208770752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2713858187198639,
|
|
"step": 217,
|
|
"valid_targets_mean": 14968.6,
|
|
"valid_targets_min": 5942
|
|
},
|
|
{
|
|
"epoch": 0.929637526652452,
|
|
"grad_norm": 0.1304172223665982,
|
|
"learning_rate": 3.9140423680151036e-05,
|
|
"loss": 1.094936490058899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2801072895526886,
|
|
"step": 218,
|
|
"valid_targets_mean": 14687.3,
|
|
"valid_targets_min": 2303
|
|
},
|
|
{
|
|
"epoch": 0.9339019189765458,
|
|
"grad_norm": 0.14586104833661614,
|
|
"learning_rate": 3.9123099457322625e-05,
|
|
"loss": 1.1430864334106445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29844510555267334,
|
|
"step": 219,
|
|
"valid_targets_mean": 15042.7,
|
|
"valid_targets_min": 2496
|
|
},
|
|
{
|
|
"epoch": 0.9381663113006397,
|
|
"grad_norm": 0.11896042929231765,
|
|
"learning_rate": 3.9105606304123605e-05,
|
|
"loss": 1.1179921627044678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26598095893859863,
|
|
"step": 220,
|
|
"valid_targets_mean": 15458.4,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 0.9424307036247335,
|
|
"grad_norm": 0.13542883335058106,
|
|
"learning_rate": 3.908794437508567e-05,
|
|
"loss": 1.113782525062561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27418214082717896,
|
|
"step": 221,
|
|
"valid_targets_mean": 14993.8,
|
|
"valid_targets_min": 8475
|
|
},
|
|
{
|
|
"epoch": 0.9466950959488273,
|
|
"grad_norm": 0.12098801862055243,
|
|
"learning_rate": 3.907011382623145e-05,
|
|
"loss": 1.1109488010406494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27835947275161743,
|
|
"step": 222,
|
|
"valid_targets_mean": 14980.2,
|
|
"valid_targets_min": 2477
|
|
},
|
|
{
|
|
"epoch": 0.9509594882729211,
|
|
"grad_norm": 0.1446411585075237,
|
|
"learning_rate": 3.905211481507313e-05,
|
|
"loss": 1.1192302703857422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29389917850494385,
|
|
"step": 223,
|
|
"valid_targets_mean": 14959.2,
|
|
"valid_targets_min": 5807
|
|
},
|
|
{
|
|
"epoch": 0.9552238805970149,
|
|
"grad_norm": 0.13006340729995194,
|
|
"learning_rate": 3.903394750061106e-05,
|
|
"loss": 1.1109366416931152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.283753365278244,
|
|
"step": 224,
|
|
"valid_targets_mean": 14750.3,
|
|
"valid_targets_min": 3084
|
|
},
|
|
{
|
|
"epoch": 0.9594882729211087,
|
|
"grad_norm": 0.12444018866956234,
|
|
"learning_rate": 3.9015612043332375e-05,
|
|
"loss": 1.0865435600280762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2794000506401062,
|
|
"step": 225,
|
|
"valid_targets_mean": 15057.6,
|
|
"valid_targets_min": 1988
|
|
},
|
|
{
|
|
"epoch": 0.9637526652452025,
|
|
"grad_norm": 0.13100673247005967,
|
|
"learning_rate": 3.8997108605209535e-05,
|
|
"loss": 1.091223955154419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2665339708328247,
|
|
"step": 226,
|
|
"valid_targets_mean": 14488.4,
|
|
"valid_targets_min": 5745
|
|
},
|
|
{
|
|
"epoch": 0.9680170575692963,
|
|
"grad_norm": 0.14432664762257136,
|
|
"learning_rate": 3.897843734969891e-05,
|
|
"loss": 1.0613495111465454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25530362129211426,
|
|
"step": 227,
|
|
"valid_targets_mean": 15354.0,
|
|
"valid_targets_min": 9319
|
|
},
|
|
{
|
|
"epoch": 0.9722814498933902,
|
|
"grad_norm": 0.1295131341857359,
|
|
"learning_rate": 3.895959844173937e-05,
|
|
"loss": 1.1064207553863525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2939533293247223,
|
|
"step": 228,
|
|
"valid_targets_mean": 14773.6,
|
|
"valid_targets_min": 5422
|
|
},
|
|
{
|
|
"epoch": 0.976545842217484,
|
|
"grad_norm": 0.1266835881218933,
|
|
"learning_rate": 3.8940592047750774e-05,
|
|
"loss": 1.1071289777755737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25003114342689514,
|
|
"step": 229,
|
|
"valid_targets_mean": 14048.9,
|
|
"valid_targets_min": 4287
|
|
},
|
|
{
|
|
"epoch": 0.9808102345415778,
|
|
"grad_norm": 0.1334513405570654,
|
|
"learning_rate": 3.892141833563255e-05,
|
|
"loss": 1.1168997287750244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2899852991104126,
|
|
"step": 230,
|
|
"valid_targets_mean": 15266.1,
|
|
"valid_targets_min": 7873
|
|
},
|
|
{
|
|
"epoch": 0.9850746268656716,
|
|
"grad_norm": 0.12948007424703104,
|
|
"learning_rate": 3.8902077474762155e-05,
|
|
"loss": 1.0858094692230225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2779843211174011,
|
|
"step": 231,
|
|
"valid_targets_mean": 14952.2,
|
|
"valid_targets_min": 2521
|
|
},
|
|
{
|
|
"epoch": 0.9893390191897654,
|
|
"grad_norm": 0.12821152626259633,
|
|
"learning_rate": 3.888256963599364e-05,
|
|
"loss": 1.100785493850708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2768602967262268,
|
|
"step": 232,
|
|
"valid_targets_mean": 14126.9,
|
|
"valid_targets_min": 2079
|
|
},
|
|
{
|
|
"epoch": 0.9936034115138592,
|
|
"grad_norm": 0.1388168756240748,
|
|
"learning_rate": 3.886289499165609e-05,
|
|
"loss": 1.0943260192871094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30819663405418396,
|
|
"step": 233,
|
|
"valid_targets_mean": 15548.9,
|
|
"valid_targets_min": 6662
|
|
},
|
|
{
|
|
"epoch": 0.997867803837953,
|
|
"grad_norm": 0.13013553730526728,
|
|
"learning_rate": 3.884305371555215e-05,
|
|
"loss": 1.1210851669311523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2844943404197693,
|
|
"step": 234,
|
|
"valid_targets_mean": 15039.9,
|
|
"valid_targets_min": 7550
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 0.15419807521284662,
|
|
"learning_rate": 3.882304598295643e-05,
|
|
"loss": 1.1342179775238037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5554423332214355,
|
|
"step": 235,
|
|
"valid_targets_mean": 14957.4,
|
|
"valid_targets_min": 4497
|
|
},
|
|
{
|
|
"epoch": 1.004264392324094,
|
|
"grad_norm": 0.14832594302128513,
|
|
"learning_rate": 3.880287197061402e-05,
|
|
"loss": 1.1174304485321045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2663096785545349,
|
|
"step": 236,
|
|
"valid_targets_mean": 14268.1,
|
|
"valid_targets_min": 1579
|
|
},
|
|
{
|
|
"epoch": 1.0085287846481876,
|
|
"grad_norm": 0.11880863072507944,
|
|
"learning_rate": 3.878253185673888e-05,
|
|
"loss": 1.0903974771499634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27352577447891235,
|
|
"step": 237,
|
|
"valid_targets_mean": 15210.7,
|
|
"valid_targets_min": 5232
|
|
},
|
|
{
|
|
"epoch": 1.0127931769722816,
|
|
"grad_norm": 0.14804758953053165,
|
|
"learning_rate": 3.876202582101229e-05,
|
|
"loss": 1.1036317348480225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2706872224807739,
|
|
"step": 238,
|
|
"valid_targets_mean": 14478.7,
|
|
"valid_targets_min": 3156
|
|
},
|
|
{
|
|
"epoch": 1.0170575692963753,
|
|
"grad_norm": 0.13086327011528612,
|
|
"learning_rate": 3.874135404458125e-05,
|
|
"loss": 1.1062583923339844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2735649049282074,
|
|
"step": 239,
|
|
"valid_targets_mean": 14224.4,
|
|
"valid_targets_min": 2394
|
|
},
|
|
{
|
|
"epoch": 1.0213219616204692,
|
|
"grad_norm": 0.13332088768631248,
|
|
"learning_rate": 3.8720516710056905e-05,
|
|
"loss": 1.1267770528793335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2770707905292511,
|
|
"step": 240,
|
|
"valid_targets_mean": 15087.3,
|
|
"valid_targets_min": 2365
|
|
},
|
|
{
|
|
"epoch": 1.0255863539445629,
|
|
"grad_norm": 0.12428623621161497,
|
|
"learning_rate": 3.8699514001512885e-05,
|
|
"loss": 1.098931908607483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27049383521080017,
|
|
"step": 241,
|
|
"valid_targets_mean": 15256.3,
|
|
"valid_targets_min": 2378
|
|
},
|
|
{
|
|
"epoch": 1.0298507462686568,
|
|
"grad_norm": 0.1439726319791604,
|
|
"learning_rate": 3.867834610448374e-05,
|
|
"loss": 1.0865485668182373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2768022418022156,
|
|
"step": 242,
|
|
"valid_targets_mean": 14890.1,
|
|
"valid_targets_min": 1821
|
|
},
|
|
{
|
|
"epoch": 1.0341151385927505,
|
|
"grad_norm": 0.11278081360532456,
|
|
"learning_rate": 3.865701320596324e-05,
|
|
"loss": 1.0894007682800293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24118563532829285,
|
|
"step": 243,
|
|
"valid_targets_mean": 13464.3,
|
|
"valid_targets_min": 2748
|
|
},
|
|
{
|
|
"epoch": 1.0383795309168444,
|
|
"grad_norm": 0.1449049037554444,
|
|
"learning_rate": 3.863551549440277e-05,
|
|
"loss": 1.0954735279083252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24541440606117249,
|
|
"step": 244,
|
|
"valid_targets_mean": 14150.5,
|
|
"valid_targets_min": 2252
|
|
},
|
|
{
|
|
"epoch": 1.0426439232409381,
|
|
"grad_norm": 0.13169050587302447,
|
|
"learning_rate": 3.861385315970964e-05,
|
|
"loss": 1.067154884338379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26327186822891235,
|
|
"step": 245,
|
|
"valid_targets_mean": 14046.7,
|
|
"valid_targets_min": 2460
|
|
},
|
|
{
|
|
"epoch": 1.046908315565032,
|
|
"grad_norm": 0.13958573511012878,
|
|
"learning_rate": 3.859202639324542e-05,
|
|
"loss": 1.0889699459075928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2618789076805115,
|
|
"step": 246,
|
|
"valid_targets_mean": 13850.7,
|
|
"valid_targets_min": 2743
|
|
},
|
|
{
|
|
"epoch": 1.0511727078891258,
|
|
"grad_norm": 0.13766781074458856,
|
|
"learning_rate": 3.8570035387824214e-05,
|
|
"loss": 1.0873351097106934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2589748799800873,
|
|
"step": 247,
|
|
"valid_targets_mean": 14669.0,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 1.0554371002132197,
|
|
"grad_norm": 0.14171428522498714,
|
|
"learning_rate": 3.8547880337711036e-05,
|
|
"loss": 1.089716911315918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25200155377388,
|
|
"step": 248,
|
|
"valid_targets_mean": 13806.2,
|
|
"valid_targets_min": 1686
|
|
},
|
|
{
|
|
"epoch": 1.0597014925373134,
|
|
"grad_norm": 0.14485898879525483,
|
|
"learning_rate": 3.8525561438620016e-05,
|
|
"loss": 1.1417646408081055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31864017248153687,
|
|
"step": 249,
|
|
"valid_targets_mean": 15282.7,
|
|
"valid_targets_min": 1942
|
|
},
|
|
{
|
|
"epoch": 1.0639658848614073,
|
|
"grad_norm": 0.135343252461091,
|
|
"learning_rate": 3.850307888771269e-05,
|
|
"loss": 1.0761914253234863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26799869537353516,
|
|
"step": 250,
|
|
"valid_targets_mean": 14998.7,
|
|
"valid_targets_min": 9633
|
|
},
|
|
{
|
|
"epoch": 1.068230277185501,
|
|
"grad_norm": 0.15849624526072872,
|
|
"learning_rate": 3.848043288359629e-05,
|
|
"loss": 1.08738374710083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25412940979003906,
|
|
"step": 251,
|
|
"valid_targets_mean": 15102.2,
|
|
"valid_targets_min": 918
|
|
},
|
|
{
|
|
"epoch": 1.072494669509595,
|
|
"grad_norm": 0.1305684357841081,
|
|
"learning_rate": 3.8457623626321944e-05,
|
|
"loss": 1.0818676948547363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2856181859970093,
|
|
"step": 252,
|
|
"valid_targets_mean": 15646.4,
|
|
"valid_targets_min": 10670
|
|
},
|
|
{
|
|
"epoch": 1.0767590618336886,
|
|
"grad_norm": 0.18617939840508396,
|
|
"learning_rate": 3.843465131738296e-05,
|
|
"loss": 1.1018372774124146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29034340381622314,
|
|
"step": 253,
|
|
"valid_targets_mean": 15954.2,
|
|
"valid_targets_min": 12329
|
|
},
|
|
{
|
|
"epoch": 1.0810234541577826,
|
|
"grad_norm": 0.1508586557401326,
|
|
"learning_rate": 3.8411516159713e-05,
|
|
"loss": 1.0827605724334717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27897909283638,
|
|
"step": 254,
|
|
"valid_targets_mean": 15004.8,
|
|
"valid_targets_min": 1469
|
|
},
|
|
{
|
|
"epoch": 1.0852878464818763,
|
|
"grad_norm": 0.14613227348168586,
|
|
"learning_rate": 3.838821835768431e-05,
|
|
"loss": 1.0547592639923096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.259225070476532,
|
|
"step": 255,
|
|
"valid_targets_mean": 14690.4,
|
|
"valid_targets_min": 2857
|
|
},
|
|
{
|
|
"epoch": 1.0895522388059702,
|
|
"grad_norm": 0.16145844052520353,
|
|
"learning_rate": 3.83647581171059e-05,
|
|
"loss": 1.0921217203140259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24286618828773499,
|
|
"step": 256,
|
|
"valid_targets_mean": 14299.9,
|
|
"valid_targets_min": 1769
|
|
},
|
|
{
|
|
"epoch": 1.0938166311300639,
|
|
"grad_norm": 0.12185397357763571,
|
|
"learning_rate": 3.8341135645221744e-05,
|
|
"loss": 1.075549602508545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2710611820220947,
|
|
"step": 257,
|
|
"valid_targets_mean": 15126.9,
|
|
"valid_targets_min": 4921
|
|
},
|
|
{
|
|
"epoch": 1.0980810234541578,
|
|
"grad_norm": 0.17100699897849678,
|
|
"learning_rate": 3.831735115070895e-05,
|
|
"loss": 1.0602529048919678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2660970091819763,
|
|
"step": 258,
|
|
"valid_targets_mean": 14821.5,
|
|
"valid_targets_min": 1540
|
|
},
|
|
{
|
|
"epoch": 1.1023454157782515,
|
|
"grad_norm": 0.10522306123354634,
|
|
"learning_rate": 3.8293404843675904e-05,
|
|
"loss": 1.075049877166748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2423074096441269,
|
|
"step": 259,
|
|
"valid_targets_mean": 15089.9,
|
|
"valid_targets_min": 7013
|
|
},
|
|
{
|
|
"epoch": 1.1066098081023454,
|
|
"grad_norm": 0.17747972680014706,
|
|
"learning_rate": 3.8269296935660395e-05,
|
|
"loss": 1.078465461730957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25448617339134216,
|
|
"step": 260,
|
|
"valid_targets_mean": 14341.2,
|
|
"valid_targets_min": 4757
|
|
},
|
|
{
|
|
"epoch": 1.1108742004264391,
|
|
"grad_norm": 0.1086132454278875,
|
|
"learning_rate": 3.82450276396278e-05,
|
|
"loss": 1.0865874290466309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.271104633808136,
|
|
"step": 261,
|
|
"valid_targets_mean": 14731.7,
|
|
"valid_targets_min": 5798
|
|
},
|
|
{
|
|
"epoch": 1.115138592750533,
|
|
"grad_norm": 0.1433739056767183,
|
|
"learning_rate": 3.822059716996916e-05,
|
|
"loss": 1.0663318634033203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2696720361709595,
|
|
"step": 262,
|
|
"valid_targets_mean": 14853.6,
|
|
"valid_targets_min": 3326
|
|
},
|
|
{
|
|
"epoch": 1.1194029850746268,
|
|
"grad_norm": 0.1299031072559473,
|
|
"learning_rate": 3.819600574249929e-05,
|
|
"loss": 1.0397834777832031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2643989324569702,
|
|
"step": 263,
|
|
"valid_targets_mean": 15993.3,
|
|
"valid_targets_min": 11892
|
|
},
|
|
{
|
|
"epoch": 1.1236673773987207,
|
|
"grad_norm": 0.14090883541170693,
|
|
"learning_rate": 3.817125357445489e-05,
|
|
"loss": 1.108483076095581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2813621163368225,
|
|
"step": 264,
|
|
"valid_targets_mean": 14684.8,
|
|
"valid_targets_min": 3168
|
|
},
|
|
{
|
|
"epoch": 1.1279317697228146,
|
|
"grad_norm": 0.12439320569250074,
|
|
"learning_rate": 3.814634088449261e-05,
|
|
"loss": 1.0688656568527222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24935731291770935,
|
|
"step": 265,
|
|
"valid_targets_mean": 15106.6,
|
|
"valid_targets_min": 5531
|
|
},
|
|
{
|
|
"epoch": 1.1321961620469083,
|
|
"grad_norm": 0.12057175511413992,
|
|
"learning_rate": 3.812126789268712e-05,
|
|
"loss": 1.0883033275604248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27489930391311646,
|
|
"step": 266,
|
|
"valid_targets_mean": 15746.8,
|
|
"valid_targets_min": 11441
|
|
},
|
|
{
|
|
"epoch": 1.136460554371002,
|
|
"grad_norm": 0.11827871544961882,
|
|
"learning_rate": 3.80960348205292e-05,
|
|
"loss": 1.132977843284607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26096606254577637,
|
|
"step": 267,
|
|
"valid_targets_mean": 13970.7,
|
|
"valid_targets_min": 2079
|
|
},
|
|
{
|
|
"epoch": 1.140724946695096,
|
|
"grad_norm": 0.1464166498076275,
|
|
"learning_rate": 3.807064189092372e-05,
|
|
"loss": 1.1327526569366455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2805986702442169,
|
|
"step": 268,
|
|
"valid_targets_mean": 14159.4,
|
|
"valid_targets_min": 3761
|
|
},
|
|
{
|
|
"epoch": 1.1449893390191899,
|
|
"grad_norm": 0.1269926664130488,
|
|
"learning_rate": 3.804508932818771e-05,
|
|
"loss": 1.0420353412628174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.257866233587265,
|
|
"step": 269,
|
|
"valid_targets_mean": 15314.1,
|
|
"valid_targets_min": 8727
|
|
},
|
|
{
|
|
"epoch": 1.1492537313432836,
|
|
"grad_norm": 0.11830814377555596,
|
|
"learning_rate": 3.801937735804838e-05,
|
|
"loss": 1.0889430046081543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26712143421173096,
|
|
"step": 270,
|
|
"valid_targets_mean": 14388.7,
|
|
"valid_targets_min": 3917
|
|
},
|
|
{
|
|
"epoch": 1.1535181236673775,
|
|
"grad_norm": 0.13757483749987642,
|
|
"learning_rate": 3.799350620764114e-05,
|
|
"loss": 1.056231141090393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26202642917633057,
|
|
"step": 271,
|
|
"valid_targets_mean": 15144.8,
|
|
"valid_targets_min": 5375
|
|
},
|
|
{
|
|
"epoch": 1.1577825159914712,
|
|
"grad_norm": 0.12052227300706919,
|
|
"learning_rate": 3.7967476105507535e-05,
|
|
"loss": 1.052736759185791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2620790898799896,
|
|
"step": 272,
|
|
"valid_targets_mean": 14506.4,
|
|
"valid_targets_min": 4050
|
|
},
|
|
{
|
|
"epoch": 1.1620469083155651,
|
|
"grad_norm": 0.1413035094399248,
|
|
"learning_rate": 3.7941287281593284e-05,
|
|
"loss": 1.063258409500122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29303738474845886,
|
|
"step": 273,
|
|
"valid_targets_mean": 15518.4,
|
|
"valid_targets_min": 2433
|
|
},
|
|
{
|
|
"epoch": 1.1663113006396588,
|
|
"grad_norm": 0.12648462313373993,
|
|
"learning_rate": 3.7914939967246227e-05,
|
|
"loss": 1.0919417142868042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26150012016296387,
|
|
"step": 274,
|
|
"valid_targets_mean": 14888.8,
|
|
"valid_targets_min": 5565
|
|
},
|
|
{
|
|
"epoch": 1.1705756929637527,
|
|
"grad_norm": 0.13614134627727864,
|
|
"learning_rate": 3.7888434395214285e-05,
|
|
"loss": 1.0795249938964844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2505633234977722,
|
|
"step": 275,
|
|
"valid_targets_mean": 13772.5,
|
|
"valid_targets_min": 3323
|
|
},
|
|
{
|
|
"epoch": 1.1748400852878464,
|
|
"grad_norm": 0.1335296993591303,
|
|
"learning_rate": 3.786177079964339e-05,
|
|
"loss": 1.0871176719665527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26828983426094055,
|
|
"step": 276,
|
|
"valid_targets_mean": 14900.1,
|
|
"valid_targets_min": 6668
|
|
},
|
|
{
|
|
"epoch": 1.1791044776119404,
|
|
"grad_norm": 0.11536802042295306,
|
|
"learning_rate": 3.783494941607544e-05,
|
|
"loss": 1.100544810295105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2667681872844696,
|
|
"step": 277,
|
|
"valid_targets_mean": 14130.2,
|
|
"valid_targets_min": 1357
|
|
},
|
|
{
|
|
"epoch": 1.183368869936034,
|
|
"grad_norm": 0.13476544317466366,
|
|
"learning_rate": 3.780797048144621e-05,
|
|
"loss": 1.0960979461669922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2654196321964264,
|
|
"step": 278,
|
|
"valid_targets_mean": 14693.0,
|
|
"valid_targets_min": 7570
|
|
},
|
|
{
|
|
"epoch": 1.187633262260128,
|
|
"grad_norm": 0.1206209090055457,
|
|
"learning_rate": 3.7780834234083236e-05,
|
|
"loss": 1.0933949947357178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28174376487731934,
|
|
"step": 279,
|
|
"valid_targets_mean": 15056.8,
|
|
"valid_targets_min": 3329
|
|
},
|
|
{
|
|
"epoch": 1.1918976545842217,
|
|
"grad_norm": 0.1371892681600439,
|
|
"learning_rate": 3.775354091370376e-05,
|
|
"loss": 1.06509268283844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26960083842277527,
|
|
"step": 280,
|
|
"valid_targets_mean": 15729.2,
|
|
"valid_targets_min": 10373
|
|
},
|
|
{
|
|
"epoch": 1.1961620469083156,
|
|
"grad_norm": 0.15002131655927678,
|
|
"learning_rate": 3.772609076141255e-05,
|
|
"loss": 1.0583593845367432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2785288095474243,
|
|
"step": 281,
|
|
"valid_targets_mean": 15002.1,
|
|
"valid_targets_min": 6835
|
|
},
|
|
{
|
|
"epoch": 1.2004264392324093,
|
|
"grad_norm": 0.14303727644109057,
|
|
"learning_rate": 3.769848401969982e-05,
|
|
"loss": 1.084068775177002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27171340584754944,
|
|
"step": 282,
|
|
"valid_targets_mean": 15454.7,
|
|
"valid_targets_min": 10409
|
|
},
|
|
{
|
|
"epoch": 1.2046908315565032,
|
|
"grad_norm": 0.18103626349953328,
|
|
"learning_rate": 3.767072093243907e-05,
|
|
"loss": 1.1330029964447021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2785246968269348,
|
|
"step": 283,
|
|
"valid_targets_mean": 15136.9,
|
|
"valid_targets_min": 4653
|
|
},
|
|
{
|
|
"epoch": 1.208955223880597,
|
|
"grad_norm": 0.13298985920442377,
|
|
"learning_rate": 3.7642801744884915e-05,
|
|
"loss": 1.1546975374221802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2804200351238251,
|
|
"step": 284,
|
|
"valid_targets_mean": 14707.0,
|
|
"valid_targets_min": 2075
|
|
},
|
|
{
|
|
"epoch": 1.2132196162046909,
|
|
"grad_norm": 0.11773615532294529,
|
|
"learning_rate": 3.761472670367096e-05,
|
|
"loss": 1.0487704277038574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24607539176940918,
|
|
"step": 285,
|
|
"valid_targets_mean": 15109.0,
|
|
"valid_targets_min": 3679
|
|
},
|
|
{
|
|
"epoch": 1.2174840085287846,
|
|
"grad_norm": 0.15896186199143064,
|
|
"learning_rate": 3.758649605680758e-05,
|
|
"loss": 1.0542798042297363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26041775941848755,
|
|
"step": 286,
|
|
"valid_targets_mean": 14734.3,
|
|
"valid_targets_min": 7371
|
|
},
|
|
{
|
|
"epoch": 1.2217484008528785,
|
|
"grad_norm": 0.11629818493972385,
|
|
"learning_rate": 3.755811005367974e-05,
|
|
"loss": 1.0769155025482178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2768469750881195,
|
|
"step": 287,
|
|
"valid_targets_mean": 15378.3,
|
|
"valid_targets_min": 9901
|
|
},
|
|
{
|
|
"epoch": 1.2260127931769722,
|
|
"grad_norm": 0.15317536049975403,
|
|
"learning_rate": 3.752956894504481e-05,
|
|
"loss": 1.0881221294403076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2508203685283661,
|
|
"step": 288,
|
|
"valid_targets_mean": 13490.2,
|
|
"valid_targets_min": 4864
|
|
},
|
|
{
|
|
"epoch": 1.2302771855010661,
|
|
"grad_norm": 0.12954766231178697,
|
|
"learning_rate": 3.750087298303033e-05,
|
|
"loss": 1.0730881690979004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2726203203201294,
|
|
"step": 289,
|
|
"valid_targets_mean": 14460.5,
|
|
"valid_targets_min": 1257
|
|
},
|
|
{
|
|
"epoch": 1.2345415778251598,
|
|
"grad_norm": 0.12507778245593257,
|
|
"learning_rate": 3.7472022421131795e-05,
|
|
"loss": 1.0751492977142334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2857722043991089,
|
|
"step": 290,
|
|
"valid_targets_mean": 15260.0,
|
|
"valid_targets_min": 7189
|
|
},
|
|
{
|
|
"epoch": 1.2388059701492538,
|
|
"grad_norm": 0.15587805506906033,
|
|
"learning_rate": 3.7443017514210406e-05,
|
|
"loss": 1.099067211151123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2564665377140045,
|
|
"step": 291,
|
|
"valid_targets_mean": 14848.7,
|
|
"valid_targets_min": 4038
|
|
},
|
|
{
|
|
"epoch": 1.2430703624733475,
|
|
"grad_norm": 0.11756774331916273,
|
|
"learning_rate": 3.7413858518490825e-05,
|
|
"loss": 1.12516188621521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28718727827072144,
|
|
"step": 292,
|
|
"valid_targets_mean": 14068.1,
|
|
"valid_targets_min": 3639
|
|
},
|
|
{
|
|
"epoch": 1.2473347547974414,
|
|
"grad_norm": 0.12283437611787308,
|
|
"learning_rate": 3.7384545691558895e-05,
|
|
"loss": 1.0807175636291504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26683443784713745,
|
|
"step": 293,
|
|
"valid_targets_mean": 14908.2,
|
|
"valid_targets_min": 6526
|
|
},
|
|
{
|
|
"epoch": 1.251599147121535,
|
|
"grad_norm": 0.13281035030612356,
|
|
"learning_rate": 3.735507929235941e-05,
|
|
"loss": 1.0679543018341064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28069236874580383,
|
|
"step": 294,
|
|
"valid_targets_mean": 14810.7,
|
|
"valid_targets_min": 3027
|
|
},
|
|
{
|
|
"epoch": 1.255863539445629,
|
|
"grad_norm": 0.14243050458611214,
|
|
"learning_rate": 3.732545958119378e-05,
|
|
"loss": 1.068037748336792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2620224952697754,
|
|
"step": 295,
|
|
"valid_targets_mean": 15038.1,
|
|
"valid_targets_min": 6629
|
|
},
|
|
{
|
|
"epoch": 1.260127931769723,
|
|
"grad_norm": 0.12019922924167692,
|
|
"learning_rate": 3.729568681971774e-05,
|
|
"loss": 1.0990270376205444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25651150941848755,
|
|
"step": 296,
|
|
"valid_targets_mean": 14217.9,
|
|
"valid_targets_min": 4531
|
|
},
|
|
{
|
|
"epoch": 1.2643923240938166,
|
|
"grad_norm": 0.1491423573676665,
|
|
"learning_rate": 3.726576127093905e-05,
|
|
"loss": 1.1150866746902466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2706458568572998,
|
|
"step": 297,
|
|
"valid_targets_mean": 15299.6,
|
|
"valid_targets_min": 4706
|
|
},
|
|
{
|
|
"epoch": 1.2686567164179103,
|
|
"grad_norm": 0.10268285779318712,
|
|
"learning_rate": 3.7235683199215177e-05,
|
|
"loss": 1.0659804344177246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2478877604007721,
|
|
"step": 298,
|
|
"valid_targets_mean": 13945.4,
|
|
"valid_targets_min": 3752
|
|
},
|
|
{
|
|
"epoch": 1.2729211087420043,
|
|
"grad_norm": 0.18644928909399677,
|
|
"learning_rate": 3.7205452870250944e-05,
|
|
"loss": 1.0774941444396973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25649482011795044,
|
|
"step": 299,
|
|
"valid_targets_mean": 15283.8,
|
|
"valid_targets_min": 5485
|
|
},
|
|
{
|
|
"epoch": 1.2771855010660982,
|
|
"grad_norm": 0.10657869894944769,
|
|
"learning_rate": 3.7175070551096204e-05,
|
|
"loss": 1.0419113636016846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2594824433326721,
|
|
"step": 300,
|
|
"valid_targets_mean": 15005.8,
|
|
"valid_targets_min": 5446
|
|
},
|
|
{
|
|
"epoch": 1.2814498933901919,
|
|
"grad_norm": 0.1570629764202123,
|
|
"learning_rate": 3.7144536510143436e-05,
|
|
"loss": 1.0955400466918945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2824366092681885,
|
|
"step": 301,
|
|
"valid_targets_mean": 14508.9,
|
|
"valid_targets_min": 1732
|
|
},
|
|
{
|
|
"epoch": 1.2857142857142856,
|
|
"grad_norm": 0.12046527386393832,
|
|
"learning_rate": 3.711385101712544e-05,
|
|
"loss": 1.0840914249420166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2630878686904907,
|
|
"step": 302,
|
|
"valid_targets_mean": 14425.5,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 1.2899786780383795,
|
|
"grad_norm": 0.16778554640063564,
|
|
"learning_rate": 3.708301434311289e-05,
|
|
"loss": 1.0854768753051758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22701036930084229,
|
|
"step": 303,
|
|
"valid_targets_mean": 13604.7,
|
|
"valid_targets_min": 1132
|
|
},
|
|
{
|
|
"epoch": 1.2942430703624734,
|
|
"grad_norm": 0.10936643631687112,
|
|
"learning_rate": 3.7052026760511996e-05,
|
|
"loss": 1.0484946966171265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28306519985198975,
|
|
"step": 304,
|
|
"valid_targets_mean": 14979.1,
|
|
"valid_targets_min": 4469
|
|
},
|
|
{
|
|
"epoch": 1.2985074626865671,
|
|
"grad_norm": 0.14558359920557873,
|
|
"learning_rate": 3.7020888543062046e-05,
|
|
"loss": 1.0519603490829468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.288472980260849,
|
|
"step": 305,
|
|
"valid_targets_mean": 15788.1,
|
|
"valid_targets_min": 7730
|
|
},
|
|
{
|
|
"epoch": 1.302771855010661,
|
|
"grad_norm": 0.11589645762962353,
|
|
"learning_rate": 3.6989599965833024e-05,
|
|
"loss": 1.1148842573165894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30141183733940125,
|
|
"step": 306,
|
|
"valid_targets_mean": 15547.2,
|
|
"valid_targets_min": 7453
|
|
},
|
|
{
|
|
"epoch": 1.3070362473347548,
|
|
"grad_norm": 0.12471176158773442,
|
|
"learning_rate": 3.695816130522317e-05,
|
|
"loss": 1.111924648284912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2694132924079895,
|
|
"step": 307,
|
|
"valid_targets_mean": 14122.2,
|
|
"valid_targets_min": 3615
|
|
},
|
|
{
|
|
"epoch": 1.3113006396588487,
|
|
"grad_norm": 0.11638807047738552,
|
|
"learning_rate": 3.692657283895651e-05,
|
|
"loss": 1.072528600692749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28918564319610596,
|
|
"step": 308,
|
|
"valid_targets_mean": 14801.9,
|
|
"valid_targets_min": 2371
|
|
},
|
|
{
|
|
"epoch": 1.3155650319829424,
|
|
"grad_norm": 0.12111815195562205,
|
|
"learning_rate": 3.689483484608048e-05,
|
|
"loss": 1.1112918853759766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26899421215057373,
|
|
"step": 309,
|
|
"valid_targets_mean": 14900.5,
|
|
"valid_targets_min": 1828
|
|
},
|
|
{
|
|
"epoch": 1.3198294243070363,
|
|
"grad_norm": 0.1245083633247677,
|
|
"learning_rate": 3.6862947606963364e-05,
|
|
"loss": 1.1408207416534424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26744699478149414,
|
|
"step": 310,
|
|
"valid_targets_mean": 14643.7,
|
|
"valid_targets_min": 2167
|
|
},
|
|
{
|
|
"epoch": 1.32409381663113,
|
|
"grad_norm": 0.12496946153990728,
|
|
"learning_rate": 3.6830911403291885e-05,
|
|
"loss": 1.0849456787109375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26361972093582153,
|
|
"step": 311,
|
|
"valid_targets_mean": 13660.8,
|
|
"valid_targets_min": 3902
|
|
},
|
|
{
|
|
"epoch": 1.328358208955224,
|
|
"grad_norm": 0.12954559428204301,
|
|
"learning_rate": 3.679872651806869e-05,
|
|
"loss": 1.1030840873718262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2831391394138336,
|
|
"step": 312,
|
|
"valid_targets_mean": 14744.0,
|
|
"valid_targets_min": 3576
|
|
},
|
|
{
|
|
"epoch": 1.3326226012793176,
|
|
"grad_norm": 0.1312577209697291,
|
|
"learning_rate": 3.676639323560986e-05,
|
|
"loss": 1.0543076992034912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2637162208557129,
|
|
"step": 313,
|
|
"valid_targets_mean": 14281.3,
|
|
"valid_targets_min": 2552
|
|
},
|
|
{
|
|
"epoch": 1.3368869936034116,
|
|
"grad_norm": 0.12824651697136208,
|
|
"learning_rate": 3.6733911841542365e-05,
|
|
"loss": 1.071939468383789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25401657819747925,
|
|
"step": 314,
|
|
"valid_targets_mean": 14670.4,
|
|
"valid_targets_min": 5175
|
|
},
|
|
{
|
|
"epoch": 1.3411513859275053,
|
|
"grad_norm": 0.1446700029967673,
|
|
"learning_rate": 3.6701282622801626e-05,
|
|
"loss": 1.0685203075408936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2770332098007202,
|
|
"step": 315,
|
|
"valid_targets_mean": 14907.9,
|
|
"valid_targets_min": 6623
|
|
},
|
|
{
|
|
"epoch": 1.3454157782515992,
|
|
"grad_norm": 0.12940739382910618,
|
|
"learning_rate": 3.666850586762886e-05,
|
|
"loss": 1.0746192932128906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23956233263015747,
|
|
"step": 316,
|
|
"valid_targets_mean": 14811.0,
|
|
"valid_targets_min": 1628
|
|
},
|
|
{
|
|
"epoch": 1.349680170575693,
|
|
"grad_norm": 0.11413470379819687,
|
|
"learning_rate": 3.663558186556863e-05,
|
|
"loss": 1.071451187133789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28026705980300903,
|
|
"step": 317,
|
|
"valid_targets_mean": 14632.1,
|
|
"valid_targets_min": 4280
|
|
},
|
|
{
|
|
"epoch": 1.3539445628997868,
|
|
"grad_norm": 0.12106572305012814,
|
|
"learning_rate": 3.660251090746627e-05,
|
|
"loss": 1.0448592901229858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2789224088191986,
|
|
"step": 318,
|
|
"valid_targets_mean": 15106.6,
|
|
"valid_targets_min": 3644
|
|
},
|
|
{
|
|
"epoch": 1.3582089552238805,
|
|
"grad_norm": 0.10720335635524811,
|
|
"learning_rate": 3.656929328546526e-05,
|
|
"loss": 1.1154221296310425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29188746213912964,
|
|
"step": 319,
|
|
"valid_targets_mean": 15495.8,
|
|
"valid_targets_min": 7900
|
|
},
|
|
{
|
|
"epoch": 1.3624733475479744,
|
|
"grad_norm": 0.1031363776494928,
|
|
"learning_rate": 3.653592929300471e-05,
|
|
"loss": 1.0886147022247314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28728219866752625,
|
|
"step": 320,
|
|
"valid_targets_mean": 15197.1,
|
|
"valid_targets_min": 4028
|
|
},
|
|
{
|
|
"epoch": 1.3667377398720681,
|
|
"grad_norm": 0.09877354984739944,
|
|
"learning_rate": 3.650241922481675e-05,
|
|
"loss": 1.1005971431732178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2516176700592041,
|
|
"step": 321,
|
|
"valid_targets_mean": 14793.1,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 1.371002132196162,
|
|
"grad_norm": 0.12162139075657939,
|
|
"learning_rate": 3.6468763376923886e-05,
|
|
"loss": 1.0966756343841553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29920968413352966,
|
|
"step": 322,
|
|
"valid_targets_mean": 15084.0,
|
|
"valid_targets_min": 9424
|
|
},
|
|
{
|
|
"epoch": 1.375266524520256,
|
|
"grad_norm": 0.11338523434311469,
|
|
"learning_rate": 3.6434962046636464e-05,
|
|
"loss": 1.0764126777648926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26871368288993835,
|
|
"step": 323,
|
|
"valid_targets_mean": 14493.0,
|
|
"valid_targets_min": 6413
|
|
},
|
|
{
|
|
"epoch": 1.3795309168443497,
|
|
"grad_norm": 0.1021787802517544,
|
|
"learning_rate": 3.6401015532549957e-05,
|
|
"loss": 1.0411429405212402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2578125,
|
|
"step": 324,
|
|
"valid_targets_mean": 14568.0,
|
|
"valid_targets_min": 3212
|
|
},
|
|
{
|
|
"epoch": 1.3837953091684434,
|
|
"grad_norm": 0.11266268201860755,
|
|
"learning_rate": 3.6366924134542386e-05,
|
|
"loss": 1.1386549472808838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27760809659957886,
|
|
"step": 325,
|
|
"valid_targets_mean": 14527.2,
|
|
"valid_targets_min": 3462
|
|
},
|
|
{
|
|
"epoch": 1.3880597014925373,
|
|
"grad_norm": 0.1243854058332879,
|
|
"learning_rate": 3.633268815377166e-05,
|
|
"loss": 1.067612886428833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2844510078430176,
|
|
"step": 326,
|
|
"valid_targets_mean": 15790.6,
|
|
"valid_targets_min": 12765
|
|
},
|
|
{
|
|
"epoch": 1.3923240938166312,
|
|
"grad_norm": 0.12457675042727309,
|
|
"learning_rate": 3.6298307892672895e-05,
|
|
"loss": 1.1009567975997925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25199833512306213,
|
|
"step": 327,
|
|
"valid_targets_mean": 13961.3,
|
|
"valid_targets_min": 1199
|
|
},
|
|
{
|
|
"epoch": 1.396588486140725,
|
|
"grad_norm": 0.12405821883090433,
|
|
"learning_rate": 3.626378365495577e-05,
|
|
"loss": 1.0614802837371826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26193326711654663,
|
|
"step": 328,
|
|
"valid_targets_mean": 14690.6,
|
|
"valid_targets_min": 3389
|
|
},
|
|
{
|
|
"epoch": 1.4008528784648187,
|
|
"grad_norm": 0.12792072364392368,
|
|
"learning_rate": 3.622911574560181e-05,
|
|
"loss": 1.0712215900421143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3001205325126648,
|
|
"step": 329,
|
|
"valid_targets_mean": 15184.8,
|
|
"valid_targets_min": 2477
|
|
},
|
|
{
|
|
"epoch": 1.4051172707889126,
|
|
"grad_norm": 0.11802506426659638,
|
|
"learning_rate": 3.6194304470861744e-05,
|
|
"loss": 1.0776985883712769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24896389245986938,
|
|
"step": 330,
|
|
"valid_targets_mean": 14307.6,
|
|
"valid_targets_min": 1576
|
|
},
|
|
{
|
|
"epoch": 1.4093816631130065,
|
|
"grad_norm": 0.14193456034629987,
|
|
"learning_rate": 3.615935013825272e-05,
|
|
"loss": 1.0828070640563965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24934512376785278,
|
|
"step": 331,
|
|
"valid_targets_mean": 14097.0,
|
|
"valid_targets_min": 2289
|
|
},
|
|
{
|
|
"epoch": 1.4136460554371002,
|
|
"grad_norm": 0.11454156731125559,
|
|
"learning_rate": 3.612425305655569e-05,
|
|
"loss": 1.0726284980773926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23363390564918518,
|
|
"step": 332,
|
|
"valid_targets_mean": 13896.2,
|
|
"valid_targets_min": 4213
|
|
},
|
|
{
|
|
"epoch": 1.417910447761194,
|
|
"grad_norm": 0.1119485811276927,
|
|
"learning_rate": 3.6089013535812593e-05,
|
|
"loss": 1.0638244152069092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27090781927108765,
|
|
"step": 333,
|
|
"valid_targets_mean": 14418.3,
|
|
"valid_targets_min": 2850
|
|
},
|
|
{
|
|
"epoch": 1.4221748400852878,
|
|
"grad_norm": 0.15578617722434687,
|
|
"learning_rate": 3.6053631887323656e-05,
|
|
"loss": 1.0885016918182373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2501731216907501,
|
|
"step": 334,
|
|
"valid_targets_mean": 13383.7,
|
|
"valid_targets_min": 1372
|
|
},
|
|
{
|
|
"epoch": 1.4264392324093818,
|
|
"grad_norm": 0.11178099610928291,
|
|
"learning_rate": 3.601810842364465e-05,
|
|
"loss": 1.0959135293960571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2762734591960907,
|
|
"step": 335,
|
|
"valid_targets_mean": 14974.3,
|
|
"valid_targets_min": 2770
|
|
},
|
|
{
|
|
"epoch": 1.4307036247334755,
|
|
"grad_norm": 0.12735356011823407,
|
|
"learning_rate": 3.598244345858412e-05,
|
|
"loss": 1.0883452892303467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2735271751880646,
|
|
"step": 336,
|
|
"valid_targets_mean": 14451.0,
|
|
"valid_targets_min": 3723
|
|
},
|
|
{
|
|
"epoch": 1.4349680170575694,
|
|
"grad_norm": 0.14595604051059685,
|
|
"learning_rate": 3.594663730720059e-05,
|
|
"loss": 1.0951387882232666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2695275545120239,
|
|
"step": 337,
|
|
"valid_targets_mean": 14800.2,
|
|
"valid_targets_min": 5120
|
|
},
|
|
{
|
|
"epoch": 1.439232409381663,
|
|
"grad_norm": 0.12801498029917474,
|
|
"learning_rate": 3.591069028579982e-05,
|
|
"loss": 1.075453281402588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26048654317855835,
|
|
"step": 338,
|
|
"valid_targets_mean": 14641.3,
|
|
"valid_targets_min": 2632
|
|
},
|
|
{
|
|
"epoch": 1.443496801705757,
|
|
"grad_norm": 0.13185458746968054,
|
|
"learning_rate": 3.5874602711931994e-05,
|
|
"loss": 1.0665796995162964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24829693138599396,
|
|
"step": 339,
|
|
"valid_targets_mean": 14248.4,
|
|
"valid_targets_min": 1286
|
|
},
|
|
{
|
|
"epoch": 1.4477611940298507,
|
|
"grad_norm": 0.10891407912156989,
|
|
"learning_rate": 3.5838374904388904e-05,
|
|
"loss": 1.102658987045288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27470141649246216,
|
|
"step": 340,
|
|
"valid_targets_mean": 14699.8,
|
|
"valid_targets_min": 2641
|
|
},
|
|
{
|
|
"epoch": 1.4520255863539446,
|
|
"grad_norm": 0.13165961307673427,
|
|
"learning_rate": 3.580200718320115e-05,
|
|
"loss": 1.0745452642440796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2544945478439331,
|
|
"step": 341,
|
|
"valid_targets_mean": 14787.8,
|
|
"valid_targets_min": 4107
|
|
},
|
|
{
|
|
"epoch": 1.4562899786780383,
|
|
"grad_norm": 0.13748001969414841,
|
|
"learning_rate": 3.576549986963531e-05,
|
|
"loss": 1.1194934844970703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2657977342605591,
|
|
"step": 342,
|
|
"valid_targets_mean": 14735.9,
|
|
"valid_targets_min": 2093
|
|
},
|
|
{
|
|
"epoch": 1.4605543710021323,
|
|
"grad_norm": 0.10477711935401506,
|
|
"learning_rate": 3.5728853286191075e-05,
|
|
"loss": 1.072913408279419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24339430034160614,
|
|
"step": 343,
|
|
"valid_targets_mean": 15050.7,
|
|
"valid_targets_min": 6088
|
|
},
|
|
{
|
|
"epoch": 1.464818763326226,
|
|
"grad_norm": 0.13206563549600187,
|
|
"learning_rate": 3.5692067756598465e-05,
|
|
"loss": 1.1285758018493652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2717724144458771,
|
|
"step": 344,
|
|
"valid_targets_mean": 14498.3,
|
|
"valid_targets_min": 1700
|
|
},
|
|
{
|
|
"epoch": 1.4690831556503199,
|
|
"grad_norm": 0.1035041729683242,
|
|
"learning_rate": 3.5655143605814885e-05,
|
|
"loss": 1.127623200416565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3060557544231415,
|
|
"step": 345,
|
|
"valid_targets_mean": 15268.4,
|
|
"valid_targets_min": 5483
|
|
},
|
|
{
|
|
"epoch": 1.4733475479744136,
|
|
"grad_norm": 0.1287861596639523,
|
|
"learning_rate": 3.561808116002232e-05,
|
|
"loss": 1.0981791019439697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27787846326828003,
|
|
"step": 346,
|
|
"valid_targets_mean": 14271.1,
|
|
"valid_targets_min": 4778
|
|
},
|
|
{
|
|
"epoch": 1.4776119402985075,
|
|
"grad_norm": 0.10400314698445817,
|
|
"learning_rate": 3.5580880746624444e-05,
|
|
"loss": 1.150883674621582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29765594005584717,
|
|
"step": 347,
|
|
"valid_targets_mean": 14915.9,
|
|
"valid_targets_min": 3438
|
|
},
|
|
{
|
|
"epoch": 1.4818763326226012,
|
|
"grad_norm": 0.11796248919050434,
|
|
"learning_rate": 3.5543542694243685e-05,
|
|
"loss": 1.0655814409255981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26165351271629333,
|
|
"step": 348,
|
|
"valid_targets_mean": 14939.1,
|
|
"valid_targets_min": 3812
|
|
},
|
|
{
|
|
"epoch": 1.4861407249466951,
|
|
"grad_norm": 0.12131810205592578,
|
|
"learning_rate": 3.5506067332718355e-05,
|
|
"loss": 1.0898313522338867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2613683342933655,
|
|
"step": 349,
|
|
"valid_targets_mean": 15062.3,
|
|
"valid_targets_min": 7177
|
|
},
|
|
{
|
|
"epoch": 1.4904051172707888,
|
|
"grad_norm": 0.11277146846030922,
|
|
"learning_rate": 3.546845499309976e-05,
|
|
"loss": 1.074210524559021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2859177887439728,
|
|
"step": 350,
|
|
"valid_targets_mean": 14775.0,
|
|
"valid_targets_min": 1162
|
|
},
|
|
{
|
|
"epoch": 1.4946695095948828,
|
|
"grad_norm": 0.1125743066693261,
|
|
"learning_rate": 3.5430706007649225e-05,
|
|
"loss": 1.057504653930664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2800464630126953,
|
|
"step": 351,
|
|
"valid_targets_mean": 15699.2,
|
|
"valid_targets_min": 12993
|
|
},
|
|
{
|
|
"epoch": 1.4989339019189765,
|
|
"grad_norm": 0.10877437021463807,
|
|
"learning_rate": 3.539282070983518e-05,
|
|
"loss": 1.037824273109436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2656164765357971,
|
|
"step": 352,
|
|
"valid_targets_mean": 15310.8,
|
|
"valid_targets_min": 8363
|
|
},
|
|
{
|
|
"epoch": 1.5031982942430704,
|
|
"grad_norm": 0.11534094943493284,
|
|
"learning_rate": 3.535479943433023e-05,
|
|
"loss": 1.1390454769134521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2967601418495178,
|
|
"step": 353,
|
|
"valid_targets_mean": 15405.8,
|
|
"valid_targets_min": 7774
|
|
},
|
|
{
|
|
"epoch": 1.5074626865671643,
|
|
"grad_norm": 0.10781391911043343,
|
|
"learning_rate": 3.5316642517008184e-05,
|
|
"loss": 1.0576155185699463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2640992999076843,
|
|
"step": 354,
|
|
"valid_targets_mean": 14781.1,
|
|
"valid_targets_min": 6106
|
|
},
|
|
{
|
|
"epoch": 1.511727078891258,
|
|
"grad_norm": 0.11597676190927564,
|
|
"learning_rate": 3.5278350294941074e-05,
|
|
"loss": 1.0749003887176514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2610117197036743,
|
|
"step": 355,
|
|
"valid_targets_mean": 15589.6,
|
|
"valid_targets_min": 10997
|
|
},
|
|
{
|
|
"epoch": 1.5159914712153517,
|
|
"grad_norm": 0.10241416092293668,
|
|
"learning_rate": 3.523992310639622e-05,
|
|
"loss": 1.1029225587844849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2503296434879303,
|
|
"step": 356,
|
|
"valid_targets_mean": 14615.8,
|
|
"valid_targets_min": 3881
|
|
},
|
|
{
|
|
"epoch": 1.5202558635394456,
|
|
"grad_norm": 0.10442030403521325,
|
|
"learning_rate": 3.5201361290833165e-05,
|
|
"loss": 1.0899959802627563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2842419445514679,
|
|
"step": 357,
|
|
"valid_targets_mean": 15131.1,
|
|
"valid_targets_min": 6438
|
|
},
|
|
{
|
|
"epoch": 1.5245202558635396,
|
|
"grad_norm": 0.09987263919645031,
|
|
"learning_rate": 3.516266518890079e-05,
|
|
"loss": 1.0917596817016602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29235658049583435,
|
|
"step": 358,
|
|
"valid_targets_mean": 15751.2,
|
|
"valid_targets_min": 8085
|
|
},
|
|
{
|
|
"epoch": 1.5287846481876333,
|
|
"grad_norm": 0.10144690514214857,
|
|
"learning_rate": 3.512383514243419e-05,
|
|
"loss": 1.0388712882995605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2658079266548157,
|
|
"step": 359,
|
|
"valid_targets_mean": 15070.4,
|
|
"valid_targets_min": 5750
|
|
},
|
|
{
|
|
"epoch": 1.533049040511727,
|
|
"grad_norm": 0.10670074757602906,
|
|
"learning_rate": 3.5084871494451716e-05,
|
|
"loss": 1.0851870775222778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2761267423629761,
|
|
"step": 360,
|
|
"valid_targets_mean": 14674.6,
|
|
"valid_targets_min": 4970
|
|
},
|
|
{
|
|
"epoch": 1.537313432835821,
|
|
"grad_norm": 0.13550624376630838,
|
|
"learning_rate": 3.5045774589151955e-05,
|
|
"loss": 1.1175588369369507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2806294560432434,
|
|
"step": 361,
|
|
"valid_targets_mean": 14360.2,
|
|
"valid_targets_min": 5217
|
|
},
|
|
{
|
|
"epoch": 1.5415778251599148,
|
|
"grad_norm": 0.1160123616496653,
|
|
"learning_rate": 3.500654477191064e-05,
|
|
"loss": 1.0992441177368164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2676744759082794,
|
|
"step": 362,
|
|
"valid_targets_mean": 15394.1,
|
|
"valid_targets_min": 9383
|
|
},
|
|
{
|
|
"epoch": 1.5458422174840085,
|
|
"grad_norm": 0.1379411380537933,
|
|
"learning_rate": 3.496718238927764e-05,
|
|
"loss": 1.0704309940338135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28866302967071533,
|
|
"step": 363,
|
|
"valid_targets_mean": 14926.6,
|
|
"valid_targets_min": 5543
|
|
},
|
|
{
|
|
"epoch": 1.5501066098081022,
|
|
"grad_norm": 0.11459417748764718,
|
|
"learning_rate": 3.492768778897388e-05,
|
|
"loss": 1.0766518115997314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26298147439956665,
|
|
"step": 364,
|
|
"valid_targets_mean": 14691.7,
|
|
"valid_targets_min": 5379
|
|
},
|
|
{
|
|
"epoch": 1.5543710021321961,
|
|
"grad_norm": 0.13962495731920585,
|
|
"learning_rate": 3.4888061319888276e-05,
|
|
"loss": 1.0948641300201416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2818235754966736,
|
|
"step": 365,
|
|
"valid_targets_mean": 14954.8,
|
|
"valid_targets_min": 6380
|
|
},
|
|
{
|
|
"epoch": 1.55863539445629,
|
|
"grad_norm": 0.14024882925786547,
|
|
"learning_rate": 3.484830333207466e-05,
|
|
"loss": 1.05232834815979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24685053527355194,
|
|
"step": 366,
|
|
"valid_targets_mean": 14426.7,
|
|
"valid_targets_min": 1946
|
|
},
|
|
{
|
|
"epoch": 1.5628997867803838,
|
|
"grad_norm": 0.11749558563639338,
|
|
"learning_rate": 3.4808414176748666e-05,
|
|
"loss": 1.0615160465240479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26881107687950134,
|
|
"step": 367,
|
|
"valid_targets_mean": 14904.8,
|
|
"valid_targets_min": 3346
|
|
},
|
|
{
|
|
"epoch": 1.5671641791044775,
|
|
"grad_norm": 0.12682181363550105,
|
|
"learning_rate": 3.476839420628466e-05,
|
|
"loss": 1.1117736101150513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2780311405658722,
|
|
"step": 368,
|
|
"valid_targets_mean": 15296.2,
|
|
"valid_targets_min": 4781
|
|
},
|
|
{
|
|
"epoch": 1.5714285714285714,
|
|
"grad_norm": 0.13037371369646736,
|
|
"learning_rate": 3.472824377421257e-05,
|
|
"loss": 1.0968823432922363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.271740198135376,
|
|
"step": 369,
|
|
"valid_targets_mean": 14465.7,
|
|
"valid_targets_min": 2541
|
|
},
|
|
{
|
|
"epoch": 1.5756929637526653,
|
|
"grad_norm": 0.14933048671018942,
|
|
"learning_rate": 3.4687963235214845e-05,
|
|
"loss": 1.089181900024414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28124457597732544,
|
|
"step": 370,
|
|
"valid_targets_mean": 14964.9,
|
|
"valid_targets_min": 6430
|
|
},
|
|
{
|
|
"epoch": 1.579957356076759,
|
|
"grad_norm": 0.10907412780232184,
|
|
"learning_rate": 3.464755294512325e-05,
|
|
"loss": 1.0588440895080566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28056174516677856,
|
|
"step": 371,
|
|
"valid_targets_mean": 14473.8,
|
|
"valid_targets_min": 2297
|
|
},
|
|
{
|
|
"epoch": 1.5842217484008527,
|
|
"grad_norm": 0.14953753298333028,
|
|
"learning_rate": 3.4607013260915765e-05,
|
|
"loss": 1.1049847602844238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2830548584461212,
|
|
"step": 372,
|
|
"valid_targets_mean": 15298.8,
|
|
"valid_targets_min": 3393
|
|
},
|
|
{
|
|
"epoch": 1.5884861407249466,
|
|
"grad_norm": 0.10873163112780194,
|
|
"learning_rate": 3.4566344540713404e-05,
|
|
"loss": 1.0358188152313232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2706957459449768,
|
|
"step": 373,
|
|
"valid_targets_mean": 15668.7,
|
|
"valid_targets_min": 3920
|
|
},
|
|
{
|
|
"epoch": 1.5927505330490406,
|
|
"grad_norm": 0.11698205233230456,
|
|
"learning_rate": 3.452554714377706e-05,
|
|
"loss": 1.0651031732559204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28714728355407715,
|
|
"step": 374,
|
|
"valid_targets_mean": 15681.1,
|
|
"valid_targets_min": 8328
|
|
},
|
|
{
|
|
"epoch": 1.5970149253731343,
|
|
"grad_norm": 0.11071017387878183,
|
|
"learning_rate": 3.448462143050436e-05,
|
|
"loss": 1.0621004104614258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25595471262931824,
|
|
"step": 375,
|
|
"valid_targets_mean": 14501.1,
|
|
"valid_targets_min": 4215
|
|
},
|
|
{
|
|
"epoch": 1.6012793176972282,
|
|
"grad_norm": 0.14376534752385098,
|
|
"learning_rate": 3.4443567762426444e-05,
|
|
"loss": 1.0872830152511597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2558348476886749,
|
|
"step": 376,
|
|
"valid_targets_mean": 14658.4,
|
|
"valid_targets_min": 2487
|
|
},
|
|
{
|
|
"epoch": 1.6055437100213221,
|
|
"grad_norm": 0.12196747305937251,
|
|
"learning_rate": 3.440238650220477e-05,
|
|
"loss": 1.0826265811920166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2578622102737427,
|
|
"step": 377,
|
|
"valid_targets_mean": 14727.9,
|
|
"valid_targets_min": 2655
|
|
},
|
|
{
|
|
"epoch": 1.6098081023454158,
|
|
"grad_norm": 0.12896634830847412,
|
|
"learning_rate": 3.4361078013627945e-05,
|
|
"loss": 1.064319133758545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26188498735427856,
|
|
"step": 378,
|
|
"valid_targets_mean": 15258.2,
|
|
"valid_targets_min": 6548
|
|
},
|
|
{
|
|
"epoch": 1.6140724946695095,
|
|
"grad_norm": 0.12319401063348305,
|
|
"learning_rate": 3.4319642661608474e-05,
|
|
"loss": 1.0894337892532349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26469263434410095,
|
|
"step": 379,
|
|
"valid_targets_mean": 14385.2,
|
|
"valid_targets_min": 2132
|
|
},
|
|
{
|
|
"epoch": 1.6183368869936035,
|
|
"grad_norm": 0.11053513057906855,
|
|
"learning_rate": 3.427808081217957e-05,
|
|
"loss": 1.122054100036621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28404873609542847,
|
|
"step": 380,
|
|
"valid_targets_mean": 15179.6,
|
|
"valid_targets_min": 2591
|
|
},
|
|
{
|
|
"epoch": 1.6226012793176974,
|
|
"grad_norm": 0.1454053882522242,
|
|
"learning_rate": 3.423639283249189e-05,
|
|
"loss": 1.044264554977417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2711837887763977,
|
|
"step": 381,
|
|
"valid_targets_mean": 14525.2,
|
|
"valid_targets_min": 2255
|
|
},
|
|
{
|
|
"epoch": 1.626865671641791,
|
|
"grad_norm": 0.11551695691219174,
|
|
"learning_rate": 3.419457909081032e-05,
|
|
"loss": 1.0583226680755615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2718050181865692,
|
|
"step": 382,
|
|
"valid_targets_mean": 14423.9,
|
|
"valid_targets_min": 2865
|
|
},
|
|
{
|
|
"epoch": 1.6311300639658848,
|
|
"grad_norm": 0.14466450550667942,
|
|
"learning_rate": 3.415263995651069e-05,
|
|
"loss": 1.0639991760253906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2571932375431061,
|
|
"step": 383,
|
|
"valid_targets_mean": 14661.0,
|
|
"valid_targets_min": 3144
|
|
},
|
|
{
|
|
"epoch": 1.6353944562899787,
|
|
"grad_norm": 0.12475348154804808,
|
|
"learning_rate": 3.411057580007653e-05,
|
|
"loss": 1.0943620204925537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27506619691848755,
|
|
"step": 384,
|
|
"valid_targets_mean": 14811.4,
|
|
"valid_targets_min": 3535
|
|
},
|
|
{
|
|
"epoch": 1.6396588486140726,
|
|
"grad_norm": 0.14302915071496577,
|
|
"learning_rate": 3.4068386993095806e-05,
|
|
"loss": 1.0778902769088745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2766057848930359,
|
|
"step": 385,
|
|
"valid_targets_mean": 14840.3,
|
|
"valid_targets_min": 4287
|
|
},
|
|
{
|
|
"epoch": 1.6439232409381663,
|
|
"grad_norm": 0.1180188877301216,
|
|
"learning_rate": 3.402607390825762e-05,
|
|
"loss": 1.0909117460250854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2802852392196655,
|
|
"step": 386,
|
|
"valid_targets_mean": 15010.8,
|
|
"valid_targets_min": 5771
|
|
},
|
|
{
|
|
"epoch": 1.64818763326226,
|
|
"grad_norm": 0.13089171211986675,
|
|
"learning_rate": 3.398363691934894e-05,
|
|
"loss": 1.0828299522399902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25012773275375366,
|
|
"step": 387,
|
|
"valid_targets_mean": 13943.8,
|
|
"valid_targets_min": 2088
|
|
},
|
|
{
|
|
"epoch": 1.652452025586354,
|
|
"grad_norm": 0.12237534327201931,
|
|
"learning_rate": 3.3941076401251244e-05,
|
|
"loss": 1.041419506072998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2453901618719101,
|
|
"step": 388,
|
|
"valid_targets_mean": 15080.3,
|
|
"valid_targets_min": 2731
|
|
},
|
|
{
|
|
"epoch": 1.6567164179104479,
|
|
"grad_norm": 0.13086318292233995,
|
|
"learning_rate": 3.3898392729937295e-05,
|
|
"loss": 1.0613362789154053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2675091624259949,
|
|
"step": 389,
|
|
"valid_targets_mean": 15001.9,
|
|
"valid_targets_min": 5555
|
|
},
|
|
{
|
|
"epoch": 1.6609808102345416,
|
|
"grad_norm": 0.11304032288068386,
|
|
"learning_rate": 3.385558628246774e-05,
|
|
"loss": 1.076442003250122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2726753354072571,
|
|
"step": 390,
|
|
"valid_targets_mean": 15597.4,
|
|
"valid_targets_min": 2622
|
|
},
|
|
{
|
|
"epoch": 1.6652452025586353,
|
|
"grad_norm": 0.1190897954944693,
|
|
"learning_rate": 3.381265743698781e-05,
|
|
"loss": 1.097648024559021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28474539518356323,
|
|
"step": 391,
|
|
"valid_targets_mean": 14942.6,
|
|
"valid_targets_min": 6294
|
|
},
|
|
{
|
|
"epoch": 1.6695095948827292,
|
|
"grad_norm": 0.12775310359227787,
|
|
"learning_rate": 3.3769606572724e-05,
|
|
"loss": 1.094165563583374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2992146909236908,
|
|
"step": 392,
|
|
"valid_targets_mean": 15014.3,
|
|
"valid_targets_min": 5741
|
|
},
|
|
{
|
|
"epoch": 1.6737739872068231,
|
|
"grad_norm": 0.14495496452762205,
|
|
"learning_rate": 3.3726434069980686e-05,
|
|
"loss": 1.067185878753662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2758198380470276,
|
|
"step": 393,
|
|
"valid_targets_mean": 14591.5,
|
|
"valid_targets_min": 7497
|
|
},
|
|
{
|
|
"epoch": 1.6780383795309168,
|
|
"grad_norm": 0.12361810148821586,
|
|
"learning_rate": 3.368314031013678e-05,
|
|
"loss": 1.0652775764465332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25097572803497314,
|
|
"step": 394,
|
|
"valid_targets_mean": 14844.7,
|
|
"valid_targets_min": 2469
|
|
},
|
|
{
|
|
"epoch": 1.6823027718550105,
|
|
"grad_norm": 0.12963110186975382,
|
|
"learning_rate": 3.363972567564236e-05,
|
|
"loss": 1.0334590673446655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26233670115470886,
|
|
"step": 395,
|
|
"valid_targets_mean": 15396.5,
|
|
"valid_targets_min": 4076
|
|
},
|
|
{
|
|
"epoch": 1.6865671641791045,
|
|
"grad_norm": 0.12723586535144157,
|
|
"learning_rate": 3.35961905500153e-05,
|
|
"loss": 1.0554689168930054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2530735731124878,
|
|
"step": 396,
|
|
"valid_targets_mean": 14052.4,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 1.6908315565031984,
|
|
"grad_norm": 0.13385106046467465,
|
|
"learning_rate": 3.3552535317837855e-05,
|
|
"loss": 1.0330636501312256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26997625827789307,
|
|
"step": 397,
|
|
"valid_targets_mean": 15229.6,
|
|
"valid_targets_min": 7310
|
|
},
|
|
{
|
|
"epoch": 1.695095948827292,
|
|
"grad_norm": 0.14161022441301999,
|
|
"learning_rate": 3.35087603647533e-05,
|
|
"loss": 1.0561280250549316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27111226320266724,
|
|
"step": 398,
|
|
"valid_targets_mean": 14640.5,
|
|
"valid_targets_min": 5156
|
|
},
|
|
{
|
|
"epoch": 1.6993603411513858,
|
|
"grad_norm": 0.12540099350168007,
|
|
"learning_rate": 3.346486607746249e-05,
|
|
"loss": 1.0786890983581543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2515331506729126,
|
|
"step": 399,
|
|
"valid_targets_mean": 14456.3,
|
|
"valid_targets_min": 2706
|
|
},
|
|
{
|
|
"epoch": 1.7036247334754797,
|
|
"grad_norm": 0.13225597797368216,
|
|
"learning_rate": 3.342085284372047e-05,
|
|
"loss": 1.0656988620758057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2741518020629883,
|
|
"step": 400,
|
|
"valid_targets_mean": 14828.6,
|
|
"valid_targets_min": 7909
|
|
},
|
|
{
|
|
"epoch": 1.7078891257995736,
|
|
"grad_norm": 0.12420832917978644,
|
|
"learning_rate": 3.337672105233303e-05,
|
|
"loss": 1.0593976974487305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28430697321891785,
|
|
"step": 401,
|
|
"valid_targets_mean": 15189.5,
|
|
"valid_targets_min": 8192
|
|
},
|
|
{
|
|
"epoch": 1.7121535181236673,
|
|
"grad_norm": 0.09414277111755848,
|
|
"learning_rate": 3.3332471093153296e-05,
|
|
"loss": 1.0656559467315674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26005491614341736,
|
|
"step": 402,
|
|
"valid_targets_mean": 13691.6,
|
|
"valid_targets_min": 1692
|
|
},
|
|
{
|
|
"epoch": 1.716417910447761,
|
|
"grad_norm": 0.12643311373908572,
|
|
"learning_rate": 3.3288103357078244e-05,
|
|
"loss": 1.0774431228637695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27346837520599365,
|
|
"step": 403,
|
|
"valid_targets_mean": 14881.9,
|
|
"valid_targets_min": 2540
|
|
},
|
|
{
|
|
"epoch": 1.720682302771855,
|
|
"grad_norm": 0.10056017571548824,
|
|
"learning_rate": 3.324361823604529e-05,
|
|
"loss": 1.0104659795761108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24728821218013763,
|
|
"step": 404,
|
|
"valid_targets_mean": 14765.2,
|
|
"valid_targets_min": 5334
|
|
},
|
|
{
|
|
"epoch": 1.724946695095949,
|
|
"grad_norm": 0.12926179256069892,
|
|
"learning_rate": 3.319901612302881e-05,
|
|
"loss": 1.0784205198287964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2719265818595886,
|
|
"step": 405,
|
|
"valid_targets_mean": 14719.6,
|
|
"valid_targets_min": 1640
|
|
},
|
|
{
|
|
"epoch": 1.7292110874200426,
|
|
"grad_norm": 0.13405661708287817,
|
|
"learning_rate": 3.315429741203666e-05,
|
|
"loss": 1.1021925210952759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2841549217700958,
|
|
"step": 406,
|
|
"valid_targets_mean": 14608.9,
|
|
"valid_targets_min": 5331
|
|
},
|
|
{
|
|
"epoch": 1.7334754797441365,
|
|
"grad_norm": 0.15464756892728024,
|
|
"learning_rate": 3.3109462498106705e-05,
|
|
"loss": 1.0628423690795898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2621581554412842,
|
|
"step": 407,
|
|
"valid_targets_mean": 14532.0,
|
|
"valid_targets_min": 2679
|
|
},
|
|
{
|
|
"epoch": 1.7377398720682304,
|
|
"grad_norm": 0.13342598132568445,
|
|
"learning_rate": 3.306451177730333e-05,
|
|
"loss": 1.083683729171753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25192922353744507,
|
|
"step": 408,
|
|
"valid_targets_mean": 14070.4,
|
|
"valid_targets_min": 1668
|
|
},
|
|
{
|
|
"epoch": 1.7420042643923241,
|
|
"grad_norm": 0.1269433236572954,
|
|
"learning_rate": 3.301944564671394e-05,
|
|
"loss": 1.0548983812332153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26055648922920227,
|
|
"step": 409,
|
|
"valid_targets_mean": 14576.8,
|
|
"valid_targets_min": 7139
|
|
},
|
|
{
|
|
"epoch": 1.7462686567164178,
|
|
"grad_norm": 0.14703547699476646,
|
|
"learning_rate": 3.297426450444546e-05,
|
|
"loss": 1.1159348487854004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27076256275177,
|
|
"step": 410,
|
|
"valid_targets_mean": 14292.3,
|
|
"valid_targets_min": 1475
|
|
},
|
|
{
|
|
"epoch": 1.7505330490405118,
|
|
"grad_norm": 0.10197192181409305,
|
|
"learning_rate": 3.292896874962078e-05,
|
|
"loss": 1.0458035469055176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25788283348083496,
|
|
"step": 411,
|
|
"valid_targets_mean": 15507.5,
|
|
"valid_targets_min": 2480
|
|
},
|
|
{
|
|
"epoch": 1.7547974413646057,
|
|
"grad_norm": 0.1489970784949312,
|
|
"learning_rate": 3.2883558782375294e-05,
|
|
"loss": 1.1026990413665771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29226580262184143,
|
|
"step": 412,
|
|
"valid_targets_mean": 15263.1,
|
|
"valid_targets_min": 4783
|
|
},
|
|
{
|
|
"epoch": 1.7590618336886994,
|
|
"grad_norm": 0.11466976398835169,
|
|
"learning_rate": 3.283803500385332e-05,
|
|
"loss": 1.0824158191680908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2684157192707062,
|
|
"step": 413,
|
|
"valid_targets_mean": 14599.2,
|
|
"valid_targets_min": 2408
|
|
},
|
|
{
|
|
"epoch": 1.763326226012793,
|
|
"grad_norm": 0.13130715112735317,
|
|
"learning_rate": 3.2792397816204546e-05,
|
|
"loss": 1.0827077627182007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2868741452693939,
|
|
"step": 414,
|
|
"valid_targets_mean": 14412.4,
|
|
"valid_targets_min": 1068
|
|
},
|
|
{
|
|
"epoch": 1.767590618336887,
|
|
"grad_norm": 0.140547167284465,
|
|
"learning_rate": 3.2746647622580524e-05,
|
|
"loss": 1.0429410934448242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24891036748886108,
|
|
"step": 415,
|
|
"valid_targets_mean": 15049.4,
|
|
"valid_targets_min": 2241
|
|
},
|
|
{
|
|
"epoch": 1.771855010660981,
|
|
"grad_norm": 0.12616450226001252,
|
|
"learning_rate": 3.270078482713106e-05,
|
|
"loss": 1.0499267578125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27030113339424133,
|
|
"step": 416,
|
|
"valid_targets_mean": 14881.8,
|
|
"valid_targets_min": 7545
|
|
},
|
|
{
|
|
"epoch": 1.7761194029850746,
|
|
"grad_norm": 0.11139463648724142,
|
|
"learning_rate": 3.265480983500069e-05,
|
|
"loss": 1.0741684436798096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2622406780719757,
|
|
"step": 417,
|
|
"valid_targets_mean": 14172.5,
|
|
"valid_targets_min": 1877
|
|
},
|
|
{
|
|
"epoch": 1.7803837953091683,
|
|
"grad_norm": 0.10357766938227037,
|
|
"learning_rate": 3.260872305232507e-05,
|
|
"loss": 1.0451233386993408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25643688440322876,
|
|
"step": 418,
|
|
"valid_targets_mean": 14493.0,
|
|
"valid_targets_min": 1864
|
|
},
|
|
{
|
|
"epoch": 1.7846481876332623,
|
|
"grad_norm": 0.11323009649143977,
|
|
"learning_rate": 3.256252488622738e-05,
|
|
"loss": 1.058302879333496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26543742418289185,
|
|
"step": 419,
|
|
"valid_targets_mean": 15440.0,
|
|
"valid_targets_min": 10672
|
|
},
|
|
{
|
|
"epoch": 1.7889125799573562,
|
|
"grad_norm": 0.09500605448492094,
|
|
"learning_rate": 3.251621574481475e-05,
|
|
"loss": 1.1008851528167725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2919309139251709,
|
|
"step": 420,
|
|
"valid_targets_mean": 14934.4,
|
|
"valid_targets_min": 1411
|
|
},
|
|
{
|
|
"epoch": 1.79317697228145,
|
|
"grad_norm": 0.10026409809880978,
|
|
"learning_rate": 3.246979603717467e-05,
|
|
"loss": 1.0398553609848022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24688240885734558,
|
|
"step": 421,
|
|
"valid_targets_mean": 15338.0,
|
|
"valid_targets_min": 6373
|
|
},
|
|
{
|
|
"epoch": 1.7974413646055436,
|
|
"grad_norm": 0.10637793824507388,
|
|
"learning_rate": 3.242326617337133e-05,
|
|
"loss": 1.0642235279083252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25144168734550476,
|
|
"step": 422,
|
|
"valid_targets_mean": 14103.1,
|
|
"valid_targets_min": 2156
|
|
},
|
|
{
|
|
"epoch": 1.8017057569296375,
|
|
"grad_norm": 0.1112609047665103,
|
|
"learning_rate": 3.2376626564442016e-05,
|
|
"loss": 1.0859400033950806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2681505084037781,
|
|
"step": 423,
|
|
"valid_targets_mean": 15312.2,
|
|
"valid_targets_min": 7312
|
|
},
|
|
{
|
|
"epoch": 1.8059701492537314,
|
|
"grad_norm": 0.09902903723295896,
|
|
"learning_rate": 3.2329877622393515e-05,
|
|
"loss": 1.0946044921875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2805511951446533,
|
|
"step": 424,
|
|
"valid_targets_mean": 14936.2,
|
|
"valid_targets_min": 7976
|
|
},
|
|
{
|
|
"epoch": 1.8102345415778252,
|
|
"grad_norm": 0.11097833926698866,
|
|
"learning_rate": 3.228301976019841e-05,
|
|
"loss": 1.0197291374206543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24550142884254456,
|
|
"step": 425,
|
|
"valid_targets_mean": 14873.5,
|
|
"valid_targets_min": 2002
|
|
},
|
|
{
|
|
"epoch": 1.8144989339019189,
|
|
"grad_norm": 0.11486538208500076,
|
|
"learning_rate": 3.22360533917915e-05,
|
|
"loss": 1.0315191745758057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2519758939743042,
|
|
"step": 426,
|
|
"valid_targets_mean": 14839.6,
|
|
"valid_targets_min": 1648
|
|
},
|
|
{
|
|
"epoch": 1.8187633262260128,
|
|
"grad_norm": 0.10629637636047039,
|
|
"learning_rate": 3.218897893206608e-05,
|
|
"loss": 1.1044940948486328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2893705368041992,
|
|
"step": 427,
|
|
"valid_targets_mean": 15546.2,
|
|
"valid_targets_min": 11310
|
|
},
|
|
{
|
|
"epoch": 1.8230277185501067,
|
|
"grad_norm": 0.10264882731900092,
|
|
"learning_rate": 3.2141796796870335e-05,
|
|
"loss": 1.0161839723587036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2706839442253113,
|
|
"step": 428,
|
|
"valid_targets_mean": 15591.0,
|
|
"valid_targets_min": 5277
|
|
},
|
|
{
|
|
"epoch": 1.8272921108742004,
|
|
"grad_norm": 0.10817961844103864,
|
|
"learning_rate": 3.2094507403003614e-05,
|
|
"loss": 1.0440422296524048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2556973993778229,
|
|
"step": 429,
|
|
"valid_targets_mean": 14453.8,
|
|
"valid_targets_min": 5891
|
|
},
|
|
{
|
|
"epoch": 1.831556503198294,
|
|
"grad_norm": 0.10738883155066847,
|
|
"learning_rate": 3.2047111168212785e-05,
|
|
"loss": 1.0380173921585083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24675682187080383,
|
|
"step": 430,
|
|
"valid_targets_mean": 14939.2,
|
|
"valid_targets_min": 4208
|
|
},
|
|
{
|
|
"epoch": 1.835820895522388,
|
|
"grad_norm": 0.12514250497768345,
|
|
"learning_rate": 3.1999608511188524e-05,
|
|
"loss": 1.0663530826568604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24724867939949036,
|
|
"step": 431,
|
|
"valid_targets_mean": 14757.4,
|
|
"valid_targets_min": 5356
|
|
},
|
|
{
|
|
"epoch": 1.840085287846482,
|
|
"grad_norm": 0.11608314296561673,
|
|
"learning_rate": 3.1951999851561625e-05,
|
|
"loss": 1.0794222354888916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2597395181655884,
|
|
"step": 432,
|
|
"valid_targets_mean": 14086.0,
|
|
"valid_targets_min": 3318
|
|
},
|
|
{
|
|
"epoch": 1.8443496801705757,
|
|
"grad_norm": 0.1075628756787644,
|
|
"learning_rate": 3.190428560989931e-05,
|
|
"loss": 1.0682449340820312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26385074853897095,
|
|
"step": 433,
|
|
"valid_targets_mean": 14313.8,
|
|
"valid_targets_min": 4171
|
|
},
|
|
{
|
|
"epoch": 1.8486140724946694,
|
|
"grad_norm": 0.13116117414410683,
|
|
"learning_rate": 3.185646620770146e-05,
|
|
"loss": 1.0830940008163452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.273065447807312,
|
|
"step": 434,
|
|
"valid_targets_mean": 14730.8,
|
|
"valid_targets_min": 5238
|
|
},
|
|
{
|
|
"epoch": 1.8528784648187633,
|
|
"grad_norm": 0.11804865936613032,
|
|
"learning_rate": 3.180854206739696e-05,
|
|
"loss": 1.082724928855896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2746577858924866,
|
|
"step": 435,
|
|
"valid_targets_mean": 14328.6,
|
|
"valid_targets_min": 2063
|
|
},
|
|
{
|
|
"epoch": 1.8571428571428572,
|
|
"grad_norm": 0.10949828846379563,
|
|
"learning_rate": 3.176051361233991e-05,
|
|
"loss": 1.0387179851531982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2705637514591217,
|
|
"step": 436,
|
|
"valid_targets_mean": 15597.6,
|
|
"valid_targets_min": 10814
|
|
},
|
|
{
|
|
"epoch": 1.861407249466951,
|
|
"grad_norm": 0.11561094279879677,
|
|
"learning_rate": 3.171238126680594e-05,
|
|
"loss": 1.0813112258911133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25433382391929626,
|
|
"step": 437,
|
|
"valid_targets_mean": 14030.3,
|
|
"valid_targets_min": 3190
|
|
},
|
|
{
|
|
"epoch": 1.8656716417910446,
|
|
"grad_norm": 0.12798901681396424,
|
|
"learning_rate": 3.166414545598839e-05,
|
|
"loss": 1.0915324687957764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26344460248947144,
|
|
"step": 438,
|
|
"valid_targets_mean": 15152.7,
|
|
"valid_targets_min": 7099
|
|
},
|
|
{
|
|
"epoch": 1.8699360341151388,
|
|
"grad_norm": 0.1271214513570374,
|
|
"learning_rate": 3.161580660599464e-05,
|
|
"loss": 1.094179630279541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2800491452217102,
|
|
"step": 439,
|
|
"valid_targets_mean": 15732.6,
|
|
"valid_targets_min": 10404
|
|
},
|
|
{
|
|
"epoch": 1.8742004264392325,
|
|
"grad_norm": 0.10646008681655,
|
|
"learning_rate": 3.1567365143842264e-05,
|
|
"loss": 1.0482121706008911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2668571472167969,
|
|
"step": 440,
|
|
"valid_targets_mean": 15047.2,
|
|
"valid_targets_min": 6021
|
|
},
|
|
{
|
|
"epoch": 1.8784648187633262,
|
|
"grad_norm": 0.10433435989144153,
|
|
"learning_rate": 3.1518821497455326e-05,
|
|
"loss": 1.0509119033813477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25341445207595825,
|
|
"step": 441,
|
|
"valid_targets_mean": 13925.6,
|
|
"valid_targets_min": 1433
|
|
},
|
|
{
|
|
"epoch": 1.88272921108742,
|
|
"grad_norm": 0.10174469704920425,
|
|
"learning_rate": 3.147017609566054e-05,
|
|
"loss": 1.0561842918395996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2720150351524353,
|
|
"step": 442,
|
|
"valid_targets_mean": 15477.6,
|
|
"valid_targets_min": 8415
|
|
},
|
|
{
|
|
"epoch": 1.886993603411514,
|
|
"grad_norm": 0.11118964172110678,
|
|
"learning_rate": 3.142142936818353e-05,
|
|
"loss": 1.0753270387649536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.276233971118927,
|
|
"step": 443,
|
|
"valid_targets_mean": 14627.8,
|
|
"valid_targets_min": 1613
|
|
},
|
|
{
|
|
"epoch": 1.8912579957356077,
|
|
"grad_norm": 0.11814802413618256,
|
|
"learning_rate": 3.137258174564501e-05,
|
|
"loss": 1.042363166809082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25078055262565613,
|
|
"step": 444,
|
|
"valid_targets_mean": 14520.2,
|
|
"valid_targets_min": 2962
|
|
},
|
|
{
|
|
"epoch": 1.8955223880597014,
|
|
"grad_norm": 0.11466348515085173,
|
|
"learning_rate": 3.1323633659556986e-05,
|
|
"loss": 1.09202241897583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27767413854599,
|
|
"step": 445,
|
|
"valid_targets_mean": 15421.9,
|
|
"valid_targets_min": 2208
|
|
},
|
|
{
|
|
"epoch": 1.8997867803837953,
|
|
"grad_norm": 0.10291753255190214,
|
|
"learning_rate": 3.127458554231894e-05,
|
|
"loss": 1.0517168045043945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23889979720115662,
|
|
"step": 446,
|
|
"valid_targets_mean": 14309.4,
|
|
"valid_targets_min": 5753
|
|
},
|
|
{
|
|
"epoch": 1.9040511727078893,
|
|
"grad_norm": 0.12461980461067836,
|
|
"learning_rate": 3.122543782721402e-05,
|
|
"loss": 1.050453543663025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2615568935871124,
|
|
"step": 447,
|
|
"valid_targets_mean": 13918.3,
|
|
"valid_targets_min": 2475
|
|
},
|
|
{
|
|
"epoch": 1.908315565031983,
|
|
"grad_norm": 0.09428324002827486,
|
|
"learning_rate": 3.1176190948405194e-05,
|
|
"loss": 1.104711651802063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2562685012817383,
|
|
"step": 448,
|
|
"valid_targets_mean": 14324.2,
|
|
"valid_targets_min": 1440
|
|
},
|
|
{
|
|
"epoch": 1.9125799573560767,
|
|
"grad_norm": 0.16893958621958116,
|
|
"learning_rate": 3.112684534093142e-05,
|
|
"loss": 1.0504666566848755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2534116506576538,
|
|
"step": 449,
|
|
"valid_targets_mean": 15158.3,
|
|
"valid_targets_min": 7789
|
|
},
|
|
{
|
|
"epoch": 1.9168443496801706,
|
|
"grad_norm": 0.09952080562448751,
|
|
"learning_rate": 3.107740144070385e-05,
|
|
"loss": 1.0780071020126343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26799800992012024,
|
|
"step": 450,
|
|
"valid_targets_mean": 14461.8,
|
|
"valid_targets_min": 2774
|
|
},
|
|
{
|
|
"epoch": 1.9211087420042645,
|
|
"grad_norm": 0.12942635643583963,
|
|
"learning_rate": 3.102785968450188e-05,
|
|
"loss": 1.0668766498565674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.270796000957489,
|
|
"step": 451,
|
|
"valid_targets_mean": 15381.7,
|
|
"valid_targets_min": 4575
|
|
},
|
|
{
|
|
"epoch": 1.9253731343283582,
|
|
"grad_norm": 0.10894682530418832,
|
|
"learning_rate": 3.09782205099694e-05,
|
|
"loss": 1.0592687129974365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25496721267700195,
|
|
"step": 452,
|
|
"valid_targets_mean": 14738.8,
|
|
"valid_targets_min": 1851
|
|
},
|
|
{
|
|
"epoch": 1.929637526652452,
|
|
"grad_norm": 0.1314620891674012,
|
|
"learning_rate": 3.092848435561084e-05,
|
|
"loss": 1.0456840991973877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2802231013774872,
|
|
"step": 453,
|
|
"valid_targets_mean": 14959.6,
|
|
"valid_targets_min": 5240
|
|
},
|
|
{
|
|
"epoch": 1.9339019189765458,
|
|
"grad_norm": 0.1408044774027632,
|
|
"learning_rate": 3.0878651660787376e-05,
|
|
"loss": 1.0969831943511963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27857857942581177,
|
|
"step": 454,
|
|
"valid_targets_mean": 15442.2,
|
|
"valid_targets_min": 8079
|
|
},
|
|
{
|
|
"epoch": 1.9381663113006398,
|
|
"grad_norm": 0.12883710039602786,
|
|
"learning_rate": 3.082872286571295e-05,
|
|
"loss": 1.0829414129257202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28005450963974,
|
|
"step": 455,
|
|
"valid_targets_mean": 15453.0,
|
|
"valid_targets_min": 2824
|
|
},
|
|
{
|
|
"epoch": 1.9424307036247335,
|
|
"grad_norm": 0.13891804038641842,
|
|
"learning_rate": 3.077869841145049e-05,
|
|
"loss": 1.0985287427902222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2765417695045471,
|
|
"step": 456,
|
|
"valid_targets_mean": 15028.1,
|
|
"valid_targets_min": 6473
|
|
},
|
|
{
|
|
"epoch": 1.9466950959488272,
|
|
"grad_norm": 0.12140438513294068,
|
|
"learning_rate": 3.0728578739907934e-05,
|
|
"loss": 1.0545512437820435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2601082921028137,
|
|
"step": 457,
|
|
"valid_targets_mean": 14672.4,
|
|
"valid_targets_min": 4414
|
|
},
|
|
{
|
|
"epoch": 1.950959488272921,
|
|
"grad_norm": 0.12549491266772111,
|
|
"learning_rate": 3.067836429383437e-05,
|
|
"loss": 1.0950112342834473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.269310861825943,
|
|
"step": 458,
|
|
"valid_targets_mean": 14378.7,
|
|
"valid_targets_min": 2366
|
|
},
|
|
{
|
|
"epoch": 1.955223880597015,
|
|
"grad_norm": 0.14450297661194325,
|
|
"learning_rate": 3.062805551681609e-05,
|
|
"loss": 1.0445308685302734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.275421679019928,
|
|
"step": 459,
|
|
"valid_targets_mean": 15370.7,
|
|
"valid_targets_min": 8461
|
|
},
|
|
{
|
|
"epoch": 1.9594882729211087,
|
|
"grad_norm": 0.1336770981792758,
|
|
"learning_rate": 3.057765285327271e-05,
|
|
"loss": 1.063051462173462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2548394203186035,
|
|
"step": 460,
|
|
"valid_targets_mean": 15211.4,
|
|
"valid_targets_min": 6680
|
|
},
|
|
{
|
|
"epoch": 1.9637526652452024,
|
|
"grad_norm": 0.13802337184331176,
|
|
"learning_rate": 3.0527156748453214e-05,
|
|
"loss": 1.0926513671875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27013474702835083,
|
|
"step": 461,
|
|
"valid_targets_mean": 15154.7,
|
|
"valid_targets_min": 4077
|
|
},
|
|
{
|
|
"epoch": 1.9680170575692963,
|
|
"grad_norm": 0.1292011752261259,
|
|
"learning_rate": 3.047656764843203e-05,
|
|
"loss": 1.020573377609253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25721079111099243,
|
|
"step": 462,
|
|
"valid_targets_mean": 15334.2,
|
|
"valid_targets_min": 4963
|
|
},
|
|
{
|
|
"epoch": 1.9722814498933903,
|
|
"grad_norm": 0.12136773774009396,
|
|
"learning_rate": 3.0425886000105094e-05,
|
|
"loss": 1.068652868270874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2549925446510315,
|
|
"step": 463,
|
|
"valid_targets_mean": 14423.4,
|
|
"valid_targets_min": 2083
|
|
},
|
|
{
|
|
"epoch": 1.976545842217484,
|
|
"grad_norm": 0.13903637021737197,
|
|
"learning_rate": 3.0375112251185892e-05,
|
|
"loss": 1.0581138134002686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28126290440559387,
|
|
"step": 464,
|
|
"valid_targets_mean": 14793.0,
|
|
"valid_targets_min": 5216
|
|
},
|
|
{
|
|
"epoch": 1.9808102345415777,
|
|
"grad_norm": 0.11383420277981499,
|
|
"learning_rate": 3.0324246850201527e-05,
|
|
"loss": 1.0606683492660522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26790544390678406,
|
|
"step": 465,
|
|
"valid_targets_mean": 14773.3,
|
|
"valid_targets_min": 997
|
|
},
|
|
{
|
|
"epoch": 1.9850746268656716,
|
|
"grad_norm": 0.11989728522400829,
|
|
"learning_rate": 3.0273290246488732e-05,
|
|
"loss": 1.0910248756408691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27009302377700806,
|
|
"step": 466,
|
|
"valid_targets_mean": 14553.0,
|
|
"valid_targets_min": 3103
|
|
},
|
|
{
|
|
"epoch": 1.9893390191897655,
|
|
"grad_norm": 0.0960490749851528,
|
|
"learning_rate": 3.0222242890189904e-05,
|
|
"loss": 1.051931381225586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26700809597969055,
|
|
"step": 467,
|
|
"valid_targets_mean": 15334.7,
|
|
"valid_targets_min": 8463
|
|
},
|
|
{
|
|
"epoch": 1.9936034115138592,
|
|
"grad_norm": 0.138459359183186,
|
|
"learning_rate": 3.017110523224914e-05,
|
|
"loss": 1.0271093845367432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26433560252189636,
|
|
"step": 468,
|
|
"valid_targets_mean": 15226.9,
|
|
"valid_targets_min": 4884
|
|
},
|
|
{
|
|
"epoch": 1.997867803837953,
|
|
"grad_norm": 0.11985609012079511,
|
|
"learning_rate": 3.011987772440825e-05,
|
|
"loss": 1.0503500699996948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2652145028114319,
|
|
"step": 469,
|
|
"valid_targets_mean": 14280.6,
|
|
"valid_targets_min": 1974
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 0.13892504913177423,
|
|
"learning_rate": 3.006856081920277e-05,
|
|
"loss": 1.0645921230316162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5336452126502991,
|
|
"step": 470,
|
|
"valid_targets_mean": 15389.5,
|
|
"valid_targets_min": 10681
|
|
},
|
|
{
|
|
"epoch": 2.0042643923240937,
|
|
"grad_norm": 0.14654076190383902,
|
|
"learning_rate": 3.001715496995793e-05,
|
|
"loss": 1.0488369464874268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2612645626068115,
|
|
"step": 471,
|
|
"valid_targets_mean": 14465.4,
|
|
"valid_targets_min": 7416
|
|
},
|
|
{
|
|
"epoch": 2.008528784648188,
|
|
"grad_norm": 0.09765859901344034,
|
|
"learning_rate": 2.9965660630784715e-05,
|
|
"loss": 1.0690792798995972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27206361293792725,
|
|
"step": 472,
|
|
"valid_targets_mean": 15750.6,
|
|
"valid_targets_min": 11215
|
|
},
|
|
{
|
|
"epoch": 2.0127931769722816,
|
|
"grad_norm": 0.12410263632364689,
|
|
"learning_rate": 2.9914078256575782e-05,
|
|
"loss": 1.0798900127410889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.262641966342926,
|
|
"step": 473,
|
|
"valid_targets_mean": 15144.8,
|
|
"valid_targets_min": 6244
|
|
},
|
|
{
|
|
"epoch": 2.0170575692963753,
|
|
"grad_norm": 0.10765759057868828,
|
|
"learning_rate": 2.9862408303001492e-05,
|
|
"loss": 1.1243364810943604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3022628724575043,
|
|
"step": 474,
|
|
"valid_targets_mean": 14726.7,
|
|
"valid_targets_min": 2342
|
|
},
|
|
{
|
|
"epoch": 2.021321961620469,
|
|
"grad_norm": 0.11877832476358277,
|
|
"learning_rate": 2.9810651226505875e-05,
|
|
"loss": 1.0872790813446045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28186672925949097,
|
|
"step": 475,
|
|
"valid_targets_mean": 14472.4,
|
|
"valid_targets_min": 2395
|
|
},
|
|
{
|
|
"epoch": 2.025586353944563,
|
|
"grad_norm": 0.12106720736020389,
|
|
"learning_rate": 2.9758807484302566e-05,
|
|
"loss": 1.0768089294433594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25935864448547363,
|
|
"step": 476,
|
|
"valid_targets_mean": 14108.2,
|
|
"valid_targets_min": 2124
|
|
},
|
|
{
|
|
"epoch": 2.029850746268657,
|
|
"grad_norm": 0.10492971526449288,
|
|
"learning_rate": 2.9706877534370822e-05,
|
|
"loss": 1.0892443656921387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.264812707901001,
|
|
"step": 477,
|
|
"valid_targets_mean": 14513.5,
|
|
"valid_targets_min": 2124
|
|
},
|
|
{
|
|
"epoch": 2.0341151385927505,
|
|
"grad_norm": 0.10812163248652905,
|
|
"learning_rate": 2.965486183545142e-05,
|
|
"loss": 1.060435175895691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2632817029953003,
|
|
"step": 478,
|
|
"valid_targets_mean": 14508.8,
|
|
"valid_targets_min": 3080
|
|
},
|
|
{
|
|
"epoch": 2.038379530916844,
|
|
"grad_norm": 0.12476950594114133,
|
|
"learning_rate": 2.9602760847042645e-05,
|
|
"loss": 1.0474357604980469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2663882076740265,
|
|
"step": 479,
|
|
"valid_targets_mean": 15315.6,
|
|
"valid_targets_min": 5085
|
|
},
|
|
{
|
|
"epoch": 2.0426439232409384,
|
|
"grad_norm": 0.10331436161121431,
|
|
"learning_rate": 2.955057502939621e-05,
|
|
"loss": 1.0485198497772217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24855202436447144,
|
|
"step": 480,
|
|
"valid_targets_mean": 13571.3,
|
|
"valid_targets_min": 1473
|
|
},
|
|
{
|
|
"epoch": 2.046908315565032,
|
|
"grad_norm": 0.12705868998848419,
|
|
"learning_rate": 2.9498304843513193e-05,
|
|
"loss": 1.0671385526657104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.263543963432312,
|
|
"step": 481,
|
|
"valid_targets_mean": 14991.1,
|
|
"valid_targets_min": 2646
|
|
},
|
|
{
|
|
"epoch": 2.0511727078891258,
|
|
"grad_norm": 0.12583377208587335,
|
|
"learning_rate": 2.9445950751139957e-05,
|
|
"loss": 1.0316877365112305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2590932846069336,
|
|
"step": 482,
|
|
"valid_targets_mean": 15205.2,
|
|
"valid_targets_min": 8022
|
|
},
|
|
{
|
|
"epoch": 2.0554371002132195,
|
|
"grad_norm": 0.1033382884036626,
|
|
"learning_rate": 2.939351321476412e-05,
|
|
"loss": 1.0455005168914795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25957465171813965,
|
|
"step": 483,
|
|
"valid_targets_mean": 15534.1,
|
|
"valid_targets_min": 10502
|
|
},
|
|
{
|
|
"epoch": 2.0597014925373136,
|
|
"grad_norm": 0.11256282136959297,
|
|
"learning_rate": 2.9340992697610393e-05,
|
|
"loss": 1.0130512714385986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24798721075057983,
|
|
"step": 484,
|
|
"valid_targets_mean": 14460.0,
|
|
"valid_targets_min": 2138
|
|
},
|
|
{
|
|
"epoch": 2.0639658848614073,
|
|
"grad_norm": 0.10108481622924992,
|
|
"learning_rate": 2.9288389663636537e-05,
|
|
"loss": 1.0002269744873047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23295074701309204,
|
|
"step": 485,
|
|
"valid_targets_mean": 14413.2,
|
|
"valid_targets_min": 4603
|
|
},
|
|
{
|
|
"epoch": 2.068230277185501,
|
|
"grad_norm": 0.10947225638999011,
|
|
"learning_rate": 2.923570457752925e-05,
|
|
"loss": 1.0374996662139893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23345667123794556,
|
|
"step": 486,
|
|
"valid_targets_mean": 13273.5,
|
|
"valid_targets_min": 1864
|
|
},
|
|
{
|
|
"epoch": 2.0724946695095947,
|
|
"grad_norm": 0.10364319135942143,
|
|
"learning_rate": 2.9182937904700078e-05,
|
|
"loss": 1.0086736679077148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2536025643348694,
|
|
"step": 487,
|
|
"valid_targets_mean": 14879.2,
|
|
"valid_targets_min": 6175
|
|
},
|
|
{
|
|
"epoch": 2.076759061833689,
|
|
"grad_norm": 0.12504876779626076,
|
|
"learning_rate": 2.9130090111281278e-05,
|
|
"loss": 1.0883104801177979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2776550054550171,
|
|
"step": 488,
|
|
"valid_targets_mean": 14863.7,
|
|
"valid_targets_min": 6154
|
|
},
|
|
{
|
|
"epoch": 2.0810234541577826,
|
|
"grad_norm": 0.09758465515459959,
|
|
"learning_rate": 2.9077161664121722e-05,
|
|
"loss": 1.0496957302093506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2773936986923218,
|
|
"step": 489,
|
|
"valid_targets_mean": 15143.3,
|
|
"valid_targets_min": 8949
|
|
},
|
|
{
|
|
"epoch": 2.0852878464818763,
|
|
"grad_norm": 0.11514922029821574,
|
|
"learning_rate": 2.902415303078275e-05,
|
|
"loss": 1.0517609119415283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25558456778526306,
|
|
"step": 490,
|
|
"valid_targets_mean": 14162.9,
|
|
"valid_targets_min": 2296
|
|
},
|
|
{
|
|
"epoch": 2.08955223880597,
|
|
"grad_norm": 0.09452937643372553,
|
|
"learning_rate": 2.8971064679534072e-05,
|
|
"loss": 1.0327926874160767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27184581756591797,
|
|
"step": 491,
|
|
"valid_targets_mean": 14765.9,
|
|
"valid_targets_min": 3752
|
|
},
|
|
{
|
|
"epoch": 2.093816631130064,
|
|
"grad_norm": 0.10052766633828619,
|
|
"learning_rate": 2.8917897079349604e-05,
|
|
"loss": 1.0536391735076904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2497003972530365,
|
|
"step": 492,
|
|
"valid_targets_mean": 14619.1,
|
|
"valid_targets_min": 1784
|
|
},
|
|
{
|
|
"epoch": 2.098081023454158,
|
|
"grad_norm": 0.10265245574534791,
|
|
"learning_rate": 2.8864650699903336e-05,
|
|
"loss": 1.036287784576416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2603163421154022,
|
|
"step": 493,
|
|
"valid_targets_mean": 14403.3,
|
|
"valid_targets_min": 6370
|
|
},
|
|
{
|
|
"epoch": 2.1023454157782515,
|
|
"grad_norm": 0.10298965529413852,
|
|
"learning_rate": 2.881132601156518e-05,
|
|
"loss": 1.0232374668121338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2467612475156784,
|
|
"step": 494,
|
|
"valid_targets_mean": 14734.8,
|
|
"valid_targets_min": 4443
|
|
},
|
|
{
|
|
"epoch": 2.106609808102345,
|
|
"grad_norm": 0.09547113669015726,
|
|
"learning_rate": 2.8757923485396805e-05,
|
|
"loss": 1.0421087741851807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.252534955739975,
|
|
"step": 495,
|
|
"valid_targets_mean": 14424.6,
|
|
"valid_targets_min": 3054
|
|
},
|
|
{
|
|
"epoch": 2.1108742004264394,
|
|
"grad_norm": 0.09405820960580409,
|
|
"learning_rate": 2.8704443593147517e-05,
|
|
"loss": 1.028683066368103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26103419065475464,
|
|
"step": 496,
|
|
"valid_targets_mean": 14415.9,
|
|
"valid_targets_min": 1783
|
|
},
|
|
{
|
|
"epoch": 2.115138592750533,
|
|
"grad_norm": 0.08798921543478255,
|
|
"learning_rate": 2.8650886807250024e-05,
|
|
"loss": 1.0675265789031982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25961798429489136,
|
|
"step": 497,
|
|
"valid_targets_mean": 15019.7,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 2.1194029850746268,
|
|
"grad_norm": 0.12299984240915911,
|
|
"learning_rate": 2.8597253600816332e-05,
|
|
"loss": 1.0105128288269043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24885162711143494,
|
|
"step": 498,
|
|
"valid_targets_mean": 14422.4,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 2.1236673773987205,
|
|
"grad_norm": 0.10708533151927112,
|
|
"learning_rate": 2.8543544447633517e-05,
|
|
"loss": 1.06392502784729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2735995650291443,
|
|
"step": 499,
|
|
"valid_targets_mean": 15586.0,
|
|
"valid_targets_min": 9093
|
|
},
|
|
{
|
|
"epoch": 2.1279317697228146,
|
|
"grad_norm": 0.1111465159995418,
|
|
"learning_rate": 2.8489759822159558e-05,
|
|
"loss": 1.0755326747894287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26666438579559326,
|
|
"step": 500,
|
|
"valid_targets_mean": 14653.4,
|
|
"valid_targets_min": 3780
|
|
},
|
|
{
|
|
"epoch": 2.1321961620469083,
|
|
"grad_norm": 0.10381862287254379,
|
|
"learning_rate": 2.843590019951914e-05,
|
|
"loss": 1.0193350315093994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2575969099998474,
|
|
"step": 501,
|
|
"valid_targets_mean": 15342.2,
|
|
"valid_targets_min": 6106
|
|
},
|
|
{
|
|
"epoch": 2.136460554371002,
|
|
"grad_norm": 0.15702765736050914,
|
|
"learning_rate": 2.838196605549948e-05,
|
|
"loss": 1.0695016384124756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27278703451156616,
|
|
"step": 502,
|
|
"valid_targets_mean": 14913.7,
|
|
"valid_targets_min": 4352
|
|
},
|
|
{
|
|
"epoch": 2.140724946695096,
|
|
"grad_norm": 0.09524830986021622,
|
|
"learning_rate": 2.8327957866546082e-05,
|
|
"loss": 1.0777015686035156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2562635540962219,
|
|
"step": 503,
|
|
"valid_targets_mean": 15117.6,
|
|
"valid_targets_min": 4562
|
|
},
|
|
{
|
|
"epoch": 2.14498933901919,
|
|
"grad_norm": 0.1257807444880808,
|
|
"learning_rate": 2.8273876109758568e-05,
|
|
"loss": 1.0276715755462646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2588934302330017,
|
|
"step": 504,
|
|
"valid_targets_mean": 14904.7,
|
|
"valid_targets_min": 3462
|
|
},
|
|
{
|
|
"epoch": 2.1492537313432836,
|
|
"grad_norm": 0.11853517800630066,
|
|
"learning_rate": 2.8219721262886427e-05,
|
|
"loss": 1.0456304550170898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26853859424591064,
|
|
"step": 505,
|
|
"valid_targets_mean": 15175.2,
|
|
"valid_targets_min": 7027
|
|
},
|
|
{
|
|
"epoch": 2.1535181236673773,
|
|
"grad_norm": 0.12490590365139391,
|
|
"learning_rate": 2.816549380432483e-05,
|
|
"loss": 1.0226656198501587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2514593005180359,
|
|
"step": 506,
|
|
"valid_targets_mean": 15446.9,
|
|
"valid_targets_min": 5452
|
|
},
|
|
{
|
|
"epoch": 2.1577825159914714,
|
|
"grad_norm": 0.10570630724959912,
|
|
"learning_rate": 2.8111194213110386e-05,
|
|
"loss": 1.0459332466125488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.263741672039032,
|
|
"step": 507,
|
|
"valid_targets_mean": 15202.8,
|
|
"valid_targets_min": 7273
|
|
},
|
|
{
|
|
"epoch": 2.162046908315565,
|
|
"grad_norm": 0.1244887321697419,
|
|
"learning_rate": 2.805682296891691e-05,
|
|
"loss": 1.08240807056427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2581389248371124,
|
|
"step": 508,
|
|
"valid_targets_mean": 14719.4,
|
|
"valid_targets_min": 2806
|
|
},
|
|
{
|
|
"epoch": 2.166311300639659,
|
|
"grad_norm": 0.10604438243953074,
|
|
"learning_rate": 2.8002380552051186e-05,
|
|
"loss": 1.0506298542022705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25669777393341064,
|
|
"step": 509,
|
|
"valid_targets_mean": 14633.6,
|
|
"valid_targets_min": 5078
|
|
},
|
|
{
|
|
"epoch": 2.1705756929637525,
|
|
"grad_norm": 0.09916450205483587,
|
|
"learning_rate": 2.7947867443448728e-05,
|
|
"loss": 1.0945768356323242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.273698627948761,
|
|
"step": 510,
|
|
"valid_targets_mean": 15665.9,
|
|
"valid_targets_min": 7606
|
|
},
|
|
{
|
|
"epoch": 2.1748400852878467,
|
|
"grad_norm": 0.1451666319315318,
|
|
"learning_rate": 2.789328412466953e-05,
|
|
"loss": 1.0555355548858643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25316354632377625,
|
|
"step": 511,
|
|
"valid_targets_mean": 14995.8,
|
|
"valid_targets_min": 3326
|
|
},
|
|
{
|
|
"epoch": 2.1791044776119404,
|
|
"grad_norm": 0.10069828710900193,
|
|
"learning_rate": 2.7838631077893813e-05,
|
|
"loss": 1.069288730621338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25883394479751587,
|
|
"step": 512,
|
|
"valid_targets_mean": 14451.3,
|
|
"valid_targets_min": 2320
|
|
},
|
|
{
|
|
"epoch": 2.183368869936034,
|
|
"grad_norm": 0.12195550659367309,
|
|
"learning_rate": 2.7783908785917753e-05,
|
|
"loss": 1.0519754886627197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2789909243583679,
|
|
"step": 513,
|
|
"valid_targets_mean": 14679.6,
|
|
"valid_targets_min": 4470
|
|
},
|
|
{
|
|
"epoch": 2.1876332622601278,
|
|
"grad_norm": 0.09827810793873502,
|
|
"learning_rate": 2.7729117732149244e-05,
|
|
"loss": 1.0605522394180298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26741981506347656,
|
|
"step": 514,
|
|
"valid_targets_mean": 15088.2,
|
|
"valid_targets_min": 2524
|
|
},
|
|
{
|
|
"epoch": 2.191897654584222,
|
|
"grad_norm": 0.148086971370313,
|
|
"learning_rate": 2.7674258400603587e-05,
|
|
"loss": 1.0701409578323364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29160070419311523,
|
|
"step": 515,
|
|
"valid_targets_mean": 15567.1,
|
|
"valid_targets_min": 3615
|
|
},
|
|
{
|
|
"epoch": 2.1961620469083156,
|
|
"grad_norm": 0.11372463082623945,
|
|
"learning_rate": 2.761933127589927e-05,
|
|
"loss": 1.040367603302002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2650470435619354,
|
|
"step": 516,
|
|
"valid_targets_mean": 15260.5,
|
|
"valid_targets_min": 8467
|
|
},
|
|
{
|
|
"epoch": 2.2004264392324093,
|
|
"grad_norm": 0.11489693774950514,
|
|
"learning_rate": 2.7564336843253633e-05,
|
|
"loss": 1.0579705238342285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2514723539352417,
|
|
"step": 517,
|
|
"valid_targets_mean": 14808.2,
|
|
"valid_targets_min": 4287
|
|
},
|
|
{
|
|
"epoch": 2.204690831556503,
|
|
"grad_norm": 0.13092540532321323,
|
|
"learning_rate": 2.7509275588478606e-05,
|
|
"loss": 1.0046842098236084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24405673146247864,
|
|
"step": 518,
|
|
"valid_targets_mean": 14465.2,
|
|
"valid_targets_min": 2088
|
|
},
|
|
{
|
|
"epoch": 2.208955223880597,
|
|
"grad_norm": 0.1400019168183909,
|
|
"learning_rate": 2.7454147997976404e-05,
|
|
"loss": 1.0589232444763184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26718807220458984,
|
|
"step": 519,
|
|
"valid_targets_mean": 15228.6,
|
|
"valid_targets_min": 8280
|
|
},
|
|
{
|
|
"epoch": 2.213219616204691,
|
|
"grad_norm": 0.11124391098691529,
|
|
"learning_rate": 2.7398954558735272e-05,
|
|
"loss": 1.0676054954528809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2782477140426636,
|
|
"step": 520,
|
|
"valid_targets_mean": 14580.4,
|
|
"valid_targets_min": 4572
|
|
},
|
|
{
|
|
"epoch": 2.2174840085287846,
|
|
"grad_norm": 0.1412550977287731,
|
|
"learning_rate": 2.7343695758325125e-05,
|
|
"loss": 1.0848462581634521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26414549350738525,
|
|
"step": 521,
|
|
"valid_targets_mean": 14224.1,
|
|
"valid_targets_min": 2748
|
|
},
|
|
{
|
|
"epoch": 2.2217484008528783,
|
|
"grad_norm": 0.1230055981160842,
|
|
"learning_rate": 2.7288372084893282e-05,
|
|
"loss": 1.0164406299591064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2596801519393921,
|
|
"step": 522,
|
|
"valid_targets_mean": 15459.9,
|
|
"valid_targets_min": 1540
|
|
},
|
|
{
|
|
"epoch": 2.2260127931769724,
|
|
"grad_norm": 0.12744240689331374,
|
|
"learning_rate": 2.7232984027160126e-05,
|
|
"loss": 1.0437984466552734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2318439483642578,
|
|
"step": 523,
|
|
"valid_targets_mean": 15135.6,
|
|
"valid_targets_min": 2091
|
|
},
|
|
{
|
|
"epoch": 2.230277185501066,
|
|
"grad_norm": 0.1612875067947998,
|
|
"learning_rate": 2.7177532074414822e-05,
|
|
"loss": 1.0547361373901367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2623888850212097,
|
|
"step": 524,
|
|
"valid_targets_mean": 13888.8,
|
|
"valid_targets_min": 1240
|
|
},
|
|
{
|
|
"epoch": 2.23454157782516,
|
|
"grad_norm": 0.12041519813523655,
|
|
"learning_rate": 2.712201671651094e-05,
|
|
"loss": 1.059849500656128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2720355987548828,
|
|
"step": 525,
|
|
"valid_targets_mean": 14897.0,
|
|
"valid_targets_min": 3212
|
|
},
|
|
{
|
|
"epoch": 2.2388059701492535,
|
|
"grad_norm": 0.14067840789590605,
|
|
"learning_rate": 2.7066438443862205e-05,
|
|
"loss": 1.0363714694976807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24360054731369019,
|
|
"step": 526,
|
|
"valid_targets_mean": 14457.7,
|
|
"valid_targets_min": 3828
|
|
},
|
|
{
|
|
"epoch": 2.2430703624733477,
|
|
"grad_norm": 0.14427320836445826,
|
|
"learning_rate": 2.701079774743808e-05,
|
|
"loss": 1.0071572065353394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2501201629638672,
|
|
"step": 527,
|
|
"valid_targets_mean": 15279.3,
|
|
"valid_targets_min": 6688
|
|
},
|
|
{
|
|
"epoch": 2.2473347547974414,
|
|
"grad_norm": 0.10563699141677124,
|
|
"learning_rate": 2.6955095118759496e-05,
|
|
"loss": 1.0626296997070312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2656599283218384,
|
|
"step": 528,
|
|
"valid_targets_mean": 14363.7,
|
|
"valid_targets_min": 3823
|
|
},
|
|
{
|
|
"epoch": 2.251599147121535,
|
|
"grad_norm": 0.16100690940471393,
|
|
"learning_rate": 2.689933104989447e-05,
|
|
"loss": 1.0518825054168701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25958943367004395,
|
|
"step": 529,
|
|
"valid_targets_mean": 15011.1,
|
|
"valid_targets_min": 2193
|
|
},
|
|
{
|
|
"epoch": 2.2558635394456292,
|
|
"grad_norm": 0.11964514918135785,
|
|
"learning_rate": 2.6843506033453777e-05,
|
|
"loss": 1.0230783224105835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24286603927612305,
|
|
"step": 530,
|
|
"valid_targets_mean": 15347.9,
|
|
"valid_targets_min": 6526
|
|
},
|
|
{
|
|
"epoch": 2.260127931769723,
|
|
"grad_norm": 0.13360952002492774,
|
|
"learning_rate": 2.6787620562586587e-05,
|
|
"loss": 1.0393277406692505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2762875258922577,
|
|
"step": 531,
|
|
"valid_targets_mean": 15673.6,
|
|
"valid_targets_min": 9246
|
|
},
|
|
{
|
|
"epoch": 2.2643923240938166,
|
|
"grad_norm": 0.13524679242007398,
|
|
"learning_rate": 2.673167513097613e-05,
|
|
"loss": 1.031490683555603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25706005096435547,
|
|
"step": 532,
|
|
"valid_targets_mean": 14525.1,
|
|
"valid_targets_min": 2960
|
|
},
|
|
{
|
|
"epoch": 2.2686567164179103,
|
|
"grad_norm": 0.10535102533517332,
|
|
"learning_rate": 2.6675670232835297e-05,
|
|
"loss": 1.0020201206207275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23740331828594208,
|
|
"step": 533,
|
|
"valid_targets_mean": 14035.9,
|
|
"valid_targets_min": 1882
|
|
},
|
|
{
|
|
"epoch": 2.272921108742004,
|
|
"grad_norm": 0.1458520051591525,
|
|
"learning_rate": 2.661960636290231e-05,
|
|
"loss": 1.051685094833374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2684166729450226,
|
|
"step": 534,
|
|
"valid_targets_mean": 14944.8,
|
|
"valid_targets_min": 3425
|
|
},
|
|
{
|
|
"epoch": 2.277185501066098,
|
|
"grad_norm": 0.12900710771871615,
|
|
"learning_rate": 2.6563484016436346e-05,
|
|
"loss": 1.0715006589889526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26723259687423706,
|
|
"step": 535,
|
|
"valid_targets_mean": 14942.6,
|
|
"valid_targets_min": 5829
|
|
},
|
|
{
|
|
"epoch": 2.281449893390192,
|
|
"grad_norm": 0.12004783567705744,
|
|
"learning_rate": 2.6507303689213143e-05,
|
|
"loss": 1.0671104192733765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3115587532520294,
|
|
"step": 536,
|
|
"valid_targets_mean": 15606.5,
|
|
"valid_targets_min": 11520
|
|
},
|
|
{
|
|
"epoch": 2.2857142857142856,
|
|
"grad_norm": 0.10282452687968437,
|
|
"learning_rate": 2.6451065877520634e-05,
|
|
"loss": 1.0426161289215088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26459482312202454,
|
|
"step": 537,
|
|
"valid_targets_mean": 15469.4,
|
|
"valid_targets_min": 8077
|
|
},
|
|
{
|
|
"epoch": 2.2899786780383797,
|
|
"grad_norm": 0.12209134268555552,
|
|
"learning_rate": 2.639477107815455e-05,
|
|
"loss": 1.000340223312378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2603060007095337,
|
|
"step": 538,
|
|
"valid_targets_mean": 15048.8,
|
|
"valid_targets_min": 7278
|
|
},
|
|
{
|
|
"epoch": 2.2942430703624734,
|
|
"grad_norm": 0.12311188310647632,
|
|
"learning_rate": 2.633841978841406e-05,
|
|
"loss": 1.0291199684143066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2517496943473816,
|
|
"step": 539,
|
|
"valid_targets_mean": 14726.7,
|
|
"valid_targets_min": 8625
|
|
},
|
|
{
|
|
"epoch": 2.298507462686567,
|
|
"grad_norm": 0.11903138455692189,
|
|
"learning_rate": 2.6282012506097347e-05,
|
|
"loss": 1.0493464469909668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2605173587799072,
|
|
"step": 540,
|
|
"valid_targets_mean": 14719.3,
|
|
"valid_targets_min": 1522
|
|
},
|
|
{
|
|
"epoch": 2.302771855010661,
|
|
"grad_norm": 0.10950615934598747,
|
|
"learning_rate": 2.622554972949724e-05,
|
|
"loss": 1.0875517129898071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.258272647857666,
|
|
"step": 541,
|
|
"valid_targets_mean": 13960.9,
|
|
"valid_targets_min": 1921
|
|
},
|
|
{
|
|
"epoch": 2.307036247334755,
|
|
"grad_norm": 0.12626593813951068,
|
|
"learning_rate": 2.6169031957396778e-05,
|
|
"loss": 1.0525660514831543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2733648419380188,
|
|
"step": 542,
|
|
"valid_targets_mean": 14309.1,
|
|
"valid_targets_min": 2133
|
|
},
|
|
{
|
|
"epoch": 2.3113006396588487,
|
|
"grad_norm": 0.10880109845764573,
|
|
"learning_rate": 2.611245968906482e-05,
|
|
"loss": 1.0387375354766846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2732793092727661,
|
|
"step": 543,
|
|
"valid_targets_mean": 14985.1,
|
|
"valid_targets_min": 3590
|
|
},
|
|
{
|
|
"epoch": 2.3155650319829424,
|
|
"grad_norm": 0.11600835728660512,
|
|
"learning_rate": 2.605583342425165e-05,
|
|
"loss": 1.0523663759231567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24638208746910095,
|
|
"step": 544,
|
|
"valid_targets_mean": 14012.8,
|
|
"valid_targets_min": 1893
|
|
},
|
|
{
|
|
"epoch": 2.319829424307036,
|
|
"grad_norm": 0.12730261852879615,
|
|
"learning_rate": 2.5999153663184546e-05,
|
|
"loss": 1.1032425165176392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2970106601715088,
|
|
"step": 545,
|
|
"valid_targets_mean": 14787.3,
|
|
"valid_targets_min": 4073
|
|
},
|
|
{
|
|
"epoch": 2.3240938166311302,
|
|
"grad_norm": 0.09408360904652946,
|
|
"learning_rate": 2.594242090656335e-05,
|
|
"loss": 1.0487980842590332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28500109910964966,
|
|
"step": 546,
|
|
"valid_targets_mean": 15352.6,
|
|
"valid_targets_min": 1613
|
|
},
|
|
{
|
|
"epoch": 2.328358208955224,
|
|
"grad_norm": 0.12953695636281873,
|
|
"learning_rate": 2.5885635655556075e-05,
|
|
"loss": 1.0353131294250488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2830761671066284,
|
|
"step": 547,
|
|
"valid_targets_mean": 15987.8,
|
|
"valid_targets_min": 12612
|
|
},
|
|
{
|
|
"epoch": 2.3326226012793176,
|
|
"grad_norm": 0.11541132531604792,
|
|
"learning_rate": 2.5828798411794443e-05,
|
|
"loss": 1.0664570331573486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.238357812166214,
|
|
"step": 548,
|
|
"valid_targets_mean": 14100.9,
|
|
"valid_targets_min": 2760
|
|
},
|
|
{
|
|
"epoch": 2.3368869936034113,
|
|
"grad_norm": 0.11172920727522387,
|
|
"learning_rate": 2.5771909677369484e-05,
|
|
"loss": 1.026410698890686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2699667513370514,
|
|
"step": 549,
|
|
"valid_targets_mean": 15412.9,
|
|
"valid_targets_min": 9229
|
|
},
|
|
{
|
|
"epoch": 2.3411513859275055,
|
|
"grad_norm": 0.11690823967802624,
|
|
"learning_rate": 2.571496995482709e-05,
|
|
"loss": 1.0822011232376099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2635524868965149,
|
|
"step": 550,
|
|
"valid_targets_mean": 14972.8,
|
|
"valid_targets_min": 2851
|
|
},
|
|
{
|
|
"epoch": 2.345415778251599,
|
|
"grad_norm": 0.12618407659970443,
|
|
"learning_rate": 2.565797974716357e-05,
|
|
"loss": 1.0665310621261597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24963496625423431,
|
|
"step": 551,
|
|
"valid_targets_mean": 14868.7,
|
|
"valid_targets_min": 5691
|
|
},
|
|
{
|
|
"epoch": 2.349680170575693,
|
|
"grad_norm": 0.10116100775051301,
|
|
"learning_rate": 2.5600939557821205e-05,
|
|
"loss": 1.021169900894165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2607884407043457,
|
|
"step": 552,
|
|
"valid_targets_mean": 14750.9,
|
|
"valid_targets_min": 2857
|
|
},
|
|
{
|
|
"epoch": 2.3539445628997866,
|
|
"grad_norm": 0.10624136092421245,
|
|
"learning_rate": 2.5543849890683813e-05,
|
|
"loss": 1.0231151580810547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26667192578315735,
|
|
"step": 553,
|
|
"valid_targets_mean": 15451.8,
|
|
"valid_targets_min": 8293
|
|
},
|
|
{
|
|
"epoch": 2.3582089552238807,
|
|
"grad_norm": 0.11314597081105966,
|
|
"learning_rate": 2.548671125007229e-05,
|
|
"loss": 1.025337815284729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2604908347129822,
|
|
"step": 554,
|
|
"valid_targets_mean": 13587.4,
|
|
"valid_targets_min": 2477
|
|
},
|
|
{
|
|
"epoch": 2.3624733475479744,
|
|
"grad_norm": 0.09734334725076549,
|
|
"learning_rate": 2.5429524140740155e-05,
|
|
"loss": 1.0691876411437988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27501150965690613,
|
|
"step": 555,
|
|
"valid_targets_mean": 14751.3,
|
|
"valid_targets_min": 4350
|
|
},
|
|
{
|
|
"epoch": 2.366737739872068,
|
|
"grad_norm": 0.12089441342104176,
|
|
"learning_rate": 2.537228906786908e-05,
|
|
"loss": 1.054142951965332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26418209075927734,
|
|
"step": 556,
|
|
"valid_targets_mean": 15161.1,
|
|
"valid_targets_min": 8012
|
|
},
|
|
{
|
|
"epoch": 2.3710021321961623,
|
|
"grad_norm": 0.10876736149695052,
|
|
"learning_rate": 2.5315006537064473e-05,
|
|
"loss": 1.0506845712661743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24916253983974457,
|
|
"step": 557,
|
|
"valid_targets_mean": 14328.4,
|
|
"valid_targets_min": 5090
|
|
},
|
|
{
|
|
"epoch": 2.375266524520256,
|
|
"grad_norm": 0.11063627553501197,
|
|
"learning_rate": 2.5257677054350927e-05,
|
|
"loss": 1.029079556465149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.246652752161026,
|
|
"step": 558,
|
|
"valid_targets_mean": 14815.7,
|
|
"valid_targets_min": 2327
|
|
},
|
|
{
|
|
"epoch": 2.3795309168443497,
|
|
"grad_norm": 0.11486869153400572,
|
|
"learning_rate": 2.5200301126167857e-05,
|
|
"loss": 1.0258636474609375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2491309493780136,
|
|
"step": 559,
|
|
"valid_targets_mean": 14963.6,
|
|
"valid_targets_min": 7539
|
|
},
|
|
{
|
|
"epoch": 2.3837953091684434,
|
|
"grad_norm": 0.09927324377094049,
|
|
"learning_rate": 2.514287925936492e-05,
|
|
"loss": 1.0641462802886963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27500349283218384,
|
|
"step": 560,
|
|
"valid_targets_mean": 15140.9,
|
|
"valid_targets_min": 4632
|
|
},
|
|
{
|
|
"epoch": 2.388059701492537,
|
|
"grad_norm": 0.10288750250199045,
|
|
"learning_rate": 2.5085411961197626e-05,
|
|
"loss": 1.0823527574539185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25461655855178833,
|
|
"step": 561,
|
|
"valid_targets_mean": 14990.3,
|
|
"valid_targets_min": 5484
|
|
},
|
|
{
|
|
"epoch": 2.3923240938166312,
|
|
"grad_norm": 0.13256053021297226,
|
|
"learning_rate": 2.502789973932278e-05,
|
|
"loss": 1.0496501922607422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2626480460166931,
|
|
"step": 562,
|
|
"valid_targets_mean": 14573.9,
|
|
"valid_targets_min": 1669
|
|
},
|
|
{
|
|
"epoch": 2.396588486140725,
|
|
"grad_norm": 0.10248061933901362,
|
|
"learning_rate": 2.4970343101794073e-05,
|
|
"loss": 1.05353844165802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25262895226478577,
|
|
"step": 563,
|
|
"valid_targets_mean": 14119.2,
|
|
"valid_targets_min": 2611
|
|
},
|
|
{
|
|
"epoch": 2.4008528784648187,
|
|
"grad_norm": 0.107275484222771,
|
|
"learning_rate": 2.4912742557057538e-05,
|
|
"loss": 1.0425084829330444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25184744596481323,
|
|
"step": 564,
|
|
"valid_targets_mean": 14551.3,
|
|
"valid_targets_min": 1199
|
|
},
|
|
{
|
|
"epoch": 2.405117270788913,
|
|
"grad_norm": 0.11233400291999039,
|
|
"learning_rate": 2.485509861394708e-05,
|
|
"loss": 1.0836342573165894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27323925495147705,
|
|
"step": 565,
|
|
"valid_targets_mean": 14915.9,
|
|
"valid_targets_min": 2938
|
|
},
|
|
{
|
|
"epoch": 2.4093816631130065,
|
|
"grad_norm": 0.10118669997629698,
|
|
"learning_rate": 2.4797411781679975e-05,
|
|
"loss": 0.997891902923584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2830112874507904,
|
|
"step": 566,
|
|
"valid_targets_mean": 14926.4,
|
|
"valid_targets_min": 3260
|
|
},
|
|
{
|
|
"epoch": 2.4136460554371,
|
|
"grad_norm": 0.14380065598081848,
|
|
"learning_rate": 2.473968256985238e-05,
|
|
"loss": 1.0502943992614746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2830333709716797,
|
|
"step": 567,
|
|
"valid_targets_mean": 15068.0,
|
|
"valid_targets_min": 6241
|
|
},
|
|
{
|
|
"epoch": 2.417910447761194,
|
|
"grad_norm": 0.10008203837515717,
|
|
"learning_rate": 2.4681911488434825e-05,
|
|
"loss": 1.0760055780410767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24836665391921997,
|
|
"step": 568,
|
|
"valid_targets_mean": 14463.9,
|
|
"valid_targets_min": 3930
|
|
},
|
|
{
|
|
"epoch": 2.4221748400852876,
|
|
"grad_norm": 0.09836327892263215,
|
|
"learning_rate": 2.4624099047767702e-05,
|
|
"loss": 1.0567803382873535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2802816927433014,
|
|
"step": 569,
|
|
"valid_targets_mean": 15400.0,
|
|
"valid_targets_min": 7478
|
|
},
|
|
{
|
|
"epoch": 2.4264392324093818,
|
|
"grad_norm": 0.09992321153620136,
|
|
"learning_rate": 2.4566245758556787e-05,
|
|
"loss": 1.0168672800064087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24785345792770386,
|
|
"step": 570,
|
|
"valid_targets_mean": 14920.9,
|
|
"valid_targets_min": 2776
|
|
},
|
|
{
|
|
"epoch": 2.4307036247334755,
|
|
"grad_norm": 0.10037597222951485,
|
|
"learning_rate": 2.4508352131868664e-05,
|
|
"loss": 1.0484085083007812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27643099427223206,
|
|
"step": 571,
|
|
"valid_targets_mean": 15005.2,
|
|
"valid_targets_min": 1389
|
|
},
|
|
{
|
|
"epoch": 2.434968017057569,
|
|
"grad_norm": 0.12136623582803767,
|
|
"learning_rate": 2.445041867912629e-05,
|
|
"loss": 1.0116479396820068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26011645793914795,
|
|
"step": 572,
|
|
"valid_targets_mean": 14960.0,
|
|
"valid_targets_min": 4520
|
|
},
|
|
{
|
|
"epoch": 2.4392324093816633,
|
|
"grad_norm": 0.11241936997999565,
|
|
"learning_rate": 2.439244591210443e-05,
|
|
"loss": 1.0097274780273438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2608654499053955,
|
|
"step": 573,
|
|
"valid_targets_mean": 15309.7,
|
|
"valid_targets_min": 7542
|
|
},
|
|
{
|
|
"epoch": 2.443496801705757,
|
|
"grad_norm": 0.11445438301863396,
|
|
"learning_rate": 2.4334434342925133e-05,
|
|
"loss": 1.03176748752594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2606462240219116,
|
|
"step": 574,
|
|
"valid_targets_mean": 14829.2,
|
|
"valid_targets_min": 4136
|
|
},
|
|
{
|
|
"epoch": 2.4477611940298507,
|
|
"grad_norm": 0.10495839525238645,
|
|
"learning_rate": 2.4276384484053227e-05,
|
|
"loss": 1.017263650894165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2691393494606018,
|
|
"step": 575,
|
|
"valid_targets_mean": 15787.1,
|
|
"valid_targets_min": 5599
|
|
},
|
|
{
|
|
"epoch": 2.4520255863539444,
|
|
"grad_norm": 0.13339390506906976,
|
|
"learning_rate": 2.4218296848291795e-05,
|
|
"loss": 1.0785963535308838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2539163827896118,
|
|
"step": 576,
|
|
"valid_targets_mean": 14497.7,
|
|
"valid_targets_min": 7315
|
|
},
|
|
{
|
|
"epoch": 2.4562899786780386,
|
|
"grad_norm": 0.12957093965260985,
|
|
"learning_rate": 2.4160171948777603e-05,
|
|
"loss": 1.033412218093872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27271324396133423,
|
|
"step": 577,
|
|
"valid_targets_mean": 15432.2,
|
|
"valid_targets_min": 6030
|
|
},
|
|
{
|
|
"epoch": 2.4605543710021323,
|
|
"grad_norm": 0.12244323692075133,
|
|
"learning_rate": 2.410201029897665e-05,
|
|
"loss": 1.0976747274398804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28120583295822144,
|
|
"step": 578,
|
|
"valid_targets_mean": 15840.6,
|
|
"valid_targets_min": 12270
|
|
},
|
|
{
|
|
"epoch": 2.464818763326226,
|
|
"grad_norm": 0.12582676034955095,
|
|
"learning_rate": 2.4043812412679532e-05,
|
|
"loss": 1.0250262022018433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2601637840270996,
|
|
"step": 579,
|
|
"valid_targets_mean": 15205.8,
|
|
"valid_targets_min": 6494
|
|
},
|
|
{
|
|
"epoch": 2.4690831556503197,
|
|
"grad_norm": 0.12580469890835344,
|
|
"learning_rate": 2.3985578803996985e-05,
|
|
"loss": 1.078566551208496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2532144784927368,
|
|
"step": 580,
|
|
"valid_targets_mean": 14033.4,
|
|
"valid_targets_min": 1579
|
|
},
|
|
{
|
|
"epoch": 2.473347547974414,
|
|
"grad_norm": 0.11174049402117477,
|
|
"learning_rate": 2.392730998735529e-05,
|
|
"loss": 1.076801061630249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26035818457603455,
|
|
"step": 581,
|
|
"valid_targets_mean": 15582.1,
|
|
"valid_targets_min": 3901
|
|
},
|
|
{
|
|
"epoch": 2.4776119402985075,
|
|
"grad_norm": 0.13591649752794477,
|
|
"learning_rate": 2.3869006477491755e-05,
|
|
"loss": 1.0493996143341064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25792649388313293,
|
|
"step": 582,
|
|
"valid_targets_mean": 15696.8,
|
|
"valid_targets_min": 4585
|
|
},
|
|
{
|
|
"epoch": 2.481876332622601,
|
|
"grad_norm": 0.0950522497644923,
|
|
"learning_rate": 2.381066878945017e-05,
|
|
"loss": 1.0378646850585938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26439088582992554,
|
|
"step": 583,
|
|
"valid_targets_mean": 15172.5,
|
|
"valid_targets_min": 2999
|
|
},
|
|
{
|
|
"epoch": 2.486140724946695,
|
|
"grad_norm": 0.1354084961137629,
|
|
"learning_rate": 2.3752297438576257e-05,
|
|
"loss": 1.030313491821289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2559170424938202,
|
|
"step": 584,
|
|
"valid_targets_mean": 15268.1,
|
|
"valid_targets_min": 5344
|
|
},
|
|
{
|
|
"epoch": 2.490405117270789,
|
|
"grad_norm": 0.10265331295285009,
|
|
"learning_rate": 2.3693892940513074e-05,
|
|
"loss": 1.0901963710784912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.267939031124115,
|
|
"step": 585,
|
|
"valid_targets_mean": 14409.6,
|
|
"valid_targets_min": 4600
|
|
},
|
|
{
|
|
"epoch": 2.4946695095948828,
|
|
"grad_norm": 0.12583451372831064,
|
|
"learning_rate": 2.3635455811196536e-05,
|
|
"loss": 1.06803560256958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27083370089530945,
|
|
"step": 586,
|
|
"valid_targets_mean": 14663.4,
|
|
"valid_targets_min": 2241
|
|
},
|
|
{
|
|
"epoch": 2.4989339019189765,
|
|
"grad_norm": 0.11239371132679929,
|
|
"learning_rate": 2.3576986566850796e-05,
|
|
"loss": 1.0789930820465088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23372890055179596,
|
|
"step": 587,
|
|
"valid_targets_mean": 13293.2,
|
|
"valid_targets_min": 3881
|
|
},
|
|
{
|
|
"epoch": 2.50319829424307,
|
|
"grad_norm": 0.10871538748917994,
|
|
"learning_rate": 2.351848572398371e-05,
|
|
"loss": 1.018543004989624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24593549966812134,
|
|
"step": 588,
|
|
"valid_targets_mean": 13829.2,
|
|
"valid_targets_min": 1634
|
|
},
|
|
{
|
|
"epoch": 2.5074626865671643,
|
|
"grad_norm": 0.12245068189358427,
|
|
"learning_rate": 2.3459953799382276e-05,
|
|
"loss": 1.0276107788085938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2681216597557068,
|
|
"step": 589,
|
|
"valid_targets_mean": 14876.3,
|
|
"valid_targets_min": 1179
|
|
},
|
|
{
|
|
"epoch": 2.511727078891258,
|
|
"grad_norm": 0.12396468143561087,
|
|
"learning_rate": 2.3401391310108054e-05,
|
|
"loss": 1.0470104217529297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2828315794467926,
|
|
"step": 590,
|
|
"valid_targets_mean": 15137.2,
|
|
"valid_targets_min": 2132
|
|
},
|
|
{
|
|
"epoch": 2.5159914712153517,
|
|
"grad_norm": 0.10200845616471743,
|
|
"learning_rate": 2.3342798773492602e-05,
|
|
"loss": 1.0102992057800293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2586694359779358,
|
|
"step": 591,
|
|
"valid_targets_mean": 14308.5,
|
|
"valid_targets_min": 6489
|
|
},
|
|
{
|
|
"epoch": 2.520255863539446,
|
|
"grad_norm": 0.10909927341005053,
|
|
"learning_rate": 2.328417670713294e-05,
|
|
"loss": 1.0673726797103882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2538418769836426,
|
|
"step": 592,
|
|
"valid_targets_mean": 13910.1,
|
|
"valid_targets_min": 1629
|
|
},
|
|
{
|
|
"epoch": 2.5245202558635396,
|
|
"grad_norm": 0.12108120500001548,
|
|
"learning_rate": 2.3225525628886918e-05,
|
|
"loss": 1.0630145072937012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24985308945178986,
|
|
"step": 593,
|
|
"valid_targets_mean": 14432.2,
|
|
"valid_targets_min": 1452
|
|
},
|
|
{
|
|
"epoch": 2.5287846481876333,
|
|
"grad_norm": 0.10183242128325715,
|
|
"learning_rate": 2.3166846056868687e-05,
|
|
"loss": 1.1043426990509033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26531025767326355,
|
|
"step": 594,
|
|
"valid_targets_mean": 14467.5,
|
|
"valid_targets_min": 2025
|
|
},
|
|
{
|
|
"epoch": 2.533049040511727,
|
|
"grad_norm": 0.1215922302860279,
|
|
"learning_rate": 2.31081385094441e-05,
|
|
"loss": 1.0936028957366943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2828601598739624,
|
|
"step": 595,
|
|
"valid_targets_mean": 14950.9,
|
|
"valid_targets_min": 1791
|
|
},
|
|
{
|
|
"epoch": 2.5373134328358207,
|
|
"grad_norm": 0.11486160173354906,
|
|
"learning_rate": 2.304940350522615e-05,
|
|
"loss": 1.0198354721069336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2564467489719391,
|
|
"step": 596,
|
|
"valid_targets_mean": 15492.4,
|
|
"valid_targets_min": 5938
|
|
},
|
|
{
|
|
"epoch": 2.541577825159915,
|
|
"grad_norm": 0.12209475506650519,
|
|
"learning_rate": 2.299064156307037e-05,
|
|
"loss": 1.013113260269165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2631484866142273,
|
|
"step": 597,
|
|
"valid_targets_mean": 15406.1,
|
|
"valid_targets_min": 7409
|
|
},
|
|
{
|
|
"epoch": 2.5458422174840085,
|
|
"grad_norm": 0.1203561337180622,
|
|
"learning_rate": 2.2931853202070275e-05,
|
|
"loss": 1.063555121421814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26198774576187134,
|
|
"step": 598,
|
|
"valid_targets_mean": 14713.0,
|
|
"valid_targets_min": 5438
|
|
},
|
|
{
|
|
"epoch": 2.550106609808102,
|
|
"grad_norm": 0.1052190176714238,
|
|
"learning_rate": 2.2873038941552724e-05,
|
|
"loss": 1.0354630947113037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2526911497116089,
|
|
"step": 599,
|
|
"valid_targets_mean": 15203.8,
|
|
"valid_targets_min": 5398
|
|
},
|
|
{
|
|
"epoch": 2.5543710021321964,
|
|
"grad_norm": 0.11305662624693819,
|
|
"learning_rate": 2.2814199301073412e-05,
|
|
"loss": 1.0255736112594604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24174556136131287,
|
|
"step": 600,
|
|
"valid_targets_mean": 14636.3,
|
|
"valid_targets_min": 2864
|
|
},
|
|
{
|
|
"epoch": 2.55863539445629,
|
|
"grad_norm": 0.10992216917702095,
|
|
"learning_rate": 2.27553348004122e-05,
|
|
"loss": 1.0456774234771729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26446160674095154,
|
|
"step": 601,
|
|
"valid_targets_mean": 14595.3,
|
|
"valid_targets_min": 2438
|
|
},
|
|
{
|
|
"epoch": 2.5628997867803838,
|
|
"grad_norm": 0.12243937686717783,
|
|
"learning_rate": 2.2696445959568577e-05,
|
|
"loss": 1.034912109375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2511717975139618,
|
|
"step": 602,
|
|
"valid_targets_mean": 14794.9,
|
|
"valid_targets_min": 4607
|
|
},
|
|
{
|
|
"epoch": 2.5671641791044775,
|
|
"grad_norm": 0.1142548129129476,
|
|
"learning_rate": 2.2637533298757064e-05,
|
|
"loss": 1.0757339000701904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26432859897613525,
|
|
"step": 603,
|
|
"valid_targets_mean": 14861.0,
|
|
"valid_targets_min": 6411
|
|
},
|
|
{
|
|
"epoch": 2.571428571428571,
|
|
"grad_norm": 0.13637563685961449,
|
|
"learning_rate": 2.2578597338402567e-05,
|
|
"loss": 1.06773042678833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2747154235839844,
|
|
"step": 604,
|
|
"valid_targets_mean": 14348.2,
|
|
"valid_targets_min": 1464
|
|
},
|
|
{
|
|
"epoch": 2.5756929637526653,
|
|
"grad_norm": 0.10516547564188432,
|
|
"learning_rate": 2.2519638599135844e-05,
|
|
"loss": 1.0419844388961792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2577703297138214,
|
|
"step": 605,
|
|
"valid_targets_mean": 14575.1,
|
|
"valid_targets_min": 1985
|
|
},
|
|
{
|
|
"epoch": 2.579957356076759,
|
|
"grad_norm": 0.1380551588227109,
|
|
"learning_rate": 2.2460657601788875e-05,
|
|
"loss": 1.0456421375274658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26205676794052124,
|
|
"step": 606,
|
|
"valid_targets_mean": 14975.0,
|
|
"valid_targets_min": 5379
|
|
},
|
|
{
|
|
"epoch": 2.5842217484008527,
|
|
"grad_norm": 0.09776367679894095,
|
|
"learning_rate": 2.2401654867390256e-05,
|
|
"loss": 1.0459275245666504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2624329924583435,
|
|
"step": 607,
|
|
"valid_targets_mean": 15002.1,
|
|
"valid_targets_min": 3892
|
|
},
|
|
{
|
|
"epoch": 2.588486140724947,
|
|
"grad_norm": 0.11100803708478867,
|
|
"learning_rate": 2.2342630917160605e-05,
|
|
"loss": 1.0408620834350586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2793847322463989,
|
|
"step": 608,
|
|
"valid_targets_mean": 15514.0,
|
|
"valid_targets_min": 11398
|
|
},
|
|
{
|
|
"epoch": 2.5927505330490406,
|
|
"grad_norm": 0.1069956325660906,
|
|
"learning_rate": 2.2283586272507975e-05,
|
|
"loss": 1.0688080787658691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2711299657821655,
|
|
"step": 609,
|
|
"valid_targets_mean": 15076.6,
|
|
"valid_targets_min": 4028
|
|
},
|
|
{
|
|
"epoch": 2.5970149253731343,
|
|
"grad_norm": 0.10430185525731406,
|
|
"learning_rate": 2.2224521455023193e-05,
|
|
"loss": 1.0676207542419434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2783691883087158,
|
|
"step": 610,
|
|
"valid_targets_mean": 15037.4,
|
|
"valid_targets_min": 5704
|
|
},
|
|
{
|
|
"epoch": 2.6012793176972284,
|
|
"grad_norm": 0.12130993394649214,
|
|
"learning_rate": 2.216543698647534e-05,
|
|
"loss": 1.0438411235809326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25039076805114746,
|
|
"step": 611,
|
|
"valid_targets_mean": 15198.2,
|
|
"valid_targets_min": 7861
|
|
},
|
|
{
|
|
"epoch": 2.605543710021322,
|
|
"grad_norm": 0.09933572538572656,
|
|
"learning_rate": 2.210633338880704e-05,
|
|
"loss": 1.0441901683807373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2598780393600464,
|
|
"step": 612,
|
|
"valid_targets_mean": 14514.8,
|
|
"valid_targets_min": 2095
|
|
},
|
|
{
|
|
"epoch": 2.609808102345416,
|
|
"grad_norm": 0.10510212088154962,
|
|
"learning_rate": 2.204721118412994e-05,
|
|
"loss": 1.007887840270996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24712875485420227,
|
|
"step": 613,
|
|
"valid_targets_mean": 15719.6,
|
|
"valid_targets_min": 7064
|
|
},
|
|
{
|
|
"epoch": 2.6140724946695095,
|
|
"grad_norm": 0.12157250964609065,
|
|
"learning_rate": 2.1988070894720037e-05,
|
|
"loss": 1.0408642292022705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27903807163238525,
|
|
"step": 614,
|
|
"valid_targets_mean": 14801.3,
|
|
"valid_targets_min": 3424
|
|
},
|
|
{
|
|
"epoch": 2.6183368869936032,
|
|
"grad_norm": 0.10948158452482029,
|
|
"learning_rate": 2.192891304301309e-05,
|
|
"loss": 1.059133768081665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2628347873687744,
|
|
"step": 615,
|
|
"valid_targets_mean": 15173.8,
|
|
"valid_targets_min": 6350
|
|
},
|
|
{
|
|
"epoch": 2.6226012793176974,
|
|
"grad_norm": 0.09943412757871808,
|
|
"learning_rate": 2.18697381516e-05,
|
|
"loss": 1.0339713096618652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25985080003738403,
|
|
"step": 616,
|
|
"valid_targets_mean": 15364.6,
|
|
"valid_targets_min": 2549
|
|
},
|
|
{
|
|
"epoch": 2.626865671641791,
|
|
"grad_norm": 0.14626355180109923,
|
|
"learning_rate": 2.181054674322221e-05,
|
|
"loss": 1.0592225790023804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2634222209453583,
|
|
"step": 617,
|
|
"valid_targets_mean": 15133.7,
|
|
"valid_targets_min": 3155
|
|
},
|
|
{
|
|
"epoch": 2.631130063965885,
|
|
"grad_norm": 0.12943488698610622,
|
|
"learning_rate": 2.1751339340767043e-05,
|
|
"loss": 1.0118565559387207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2643774151802063,
|
|
"step": 618,
|
|
"valid_targets_mean": 14694.1,
|
|
"valid_targets_min": 5175
|
|
},
|
|
{
|
|
"epoch": 2.635394456289979,
|
|
"grad_norm": 0.12678721384266212,
|
|
"learning_rate": 2.169211646726313e-05,
|
|
"loss": 1.061182975769043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25831732153892517,
|
|
"step": 619,
|
|
"valid_targets_mean": 14516.6,
|
|
"valid_targets_min": 1424
|
|
},
|
|
{
|
|
"epoch": 2.6396588486140726,
|
|
"grad_norm": 0.13879656324942302,
|
|
"learning_rate": 2.163287864587576e-05,
|
|
"loss": 1.0580122470855713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2813308835029602,
|
|
"step": 620,
|
|
"valid_targets_mean": 15262.5,
|
|
"valid_targets_min": 2999
|
|
},
|
|
{
|
|
"epoch": 2.6439232409381663,
|
|
"grad_norm": 0.10883361846948157,
|
|
"learning_rate": 2.157362639990229e-05,
|
|
"loss": 1.0226809978485107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26501500606536865,
|
|
"step": 621,
|
|
"valid_targets_mean": 15207.4,
|
|
"valid_targets_min": 4205
|
|
},
|
|
{
|
|
"epoch": 2.64818763326226,
|
|
"grad_norm": 0.13565539309033717,
|
|
"learning_rate": 2.151436025276747e-05,
|
|
"loss": 1.065739393234253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.270394891500473,
|
|
"step": 622,
|
|
"valid_targets_mean": 14962.2,
|
|
"valid_targets_min": 6660
|
|
},
|
|
{
|
|
"epoch": 2.6524520255863537,
|
|
"grad_norm": 0.09854028333086322,
|
|
"learning_rate": 2.145508072801888e-05,
|
|
"loss": 1.0685371160507202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29033178091049194,
|
|
"step": 623,
|
|
"valid_targets_mean": 15455.7,
|
|
"valid_targets_min": 9705
|
|
},
|
|
{
|
|
"epoch": 2.656716417910448,
|
|
"grad_norm": 0.11318088658698934,
|
|
"learning_rate": 2.1395788349322256e-05,
|
|
"loss": 1.082135796546936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27790725231170654,
|
|
"step": 624,
|
|
"valid_targets_mean": 14922.1,
|
|
"valid_targets_min": 1106
|
|
},
|
|
{
|
|
"epoch": 2.6609808102345416,
|
|
"grad_norm": 0.10243948174873596,
|
|
"learning_rate": 2.133648364045689e-05,
|
|
"loss": 1.019806146621704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22797921299934387,
|
|
"step": 625,
|
|
"valid_targets_mean": 13488.0,
|
|
"valid_targets_min": 1715
|
|
},
|
|
{
|
|
"epoch": 2.6652452025586353,
|
|
"grad_norm": 0.10047404613950953,
|
|
"learning_rate": 2.1277167125310996e-05,
|
|
"loss": 1.0143678188323975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2520124018192291,
|
|
"step": 626,
|
|
"valid_targets_mean": 14776.7,
|
|
"valid_targets_min": 2289
|
|
},
|
|
{
|
|
"epoch": 2.6695095948827294,
|
|
"grad_norm": 0.09145231866156223,
|
|
"learning_rate": 2.1217839327877098e-05,
|
|
"loss": 1.040644645690918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28392672538757324,
|
|
"step": 627,
|
|
"valid_targets_mean": 15072.8,
|
|
"valid_targets_min": 4011
|
|
},
|
|
{
|
|
"epoch": 2.673773987206823,
|
|
"grad_norm": 0.11401894390880575,
|
|
"learning_rate": 2.1158500772247352e-05,
|
|
"loss": 1.0729954242706299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2513941824436188,
|
|
"step": 628,
|
|
"valid_targets_mean": 15610.4,
|
|
"valid_targets_min": 10526
|
|
},
|
|
{
|
|
"epoch": 2.678038379530917,
|
|
"grad_norm": 0.10828405956368052,
|
|
"learning_rate": 2.1099151982608985e-05,
|
|
"loss": 1.0410047769546509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24934270977973938,
|
|
"step": 629,
|
|
"valid_targets_mean": 13862.6,
|
|
"valid_targets_min": 2641
|
|
},
|
|
{
|
|
"epoch": 2.6823027718550105,
|
|
"grad_norm": 0.09448126661051112,
|
|
"learning_rate": 2.1039793483239607e-05,
|
|
"loss": 1.0613449811935425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2732912600040436,
|
|
"step": 630,
|
|
"valid_targets_mean": 15320.3,
|
|
"valid_targets_min": 4702
|
|
},
|
|
{
|
|
"epoch": 2.6865671641791042,
|
|
"grad_norm": 0.10889944780585988,
|
|
"learning_rate": 2.0980425798502616e-05,
|
|
"loss": 1.043823480606079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27058395743370056,
|
|
"step": 631,
|
|
"valid_targets_mean": 14775.0,
|
|
"valid_targets_min": 5934
|
|
},
|
|
{
|
|
"epoch": 2.6908315565031984,
|
|
"grad_norm": 0.10598838076405855,
|
|
"learning_rate": 2.092104945284255e-05,
|
|
"loss": 1.0220303535461426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26301729679107666,
|
|
"step": 632,
|
|
"valid_targets_mean": 14825.2,
|
|
"valid_targets_min": 3761
|
|
},
|
|
{
|
|
"epoch": 2.695095948827292,
|
|
"grad_norm": 0.096649254343725,
|
|
"learning_rate": 2.0861664970780434e-05,
|
|
"loss": 1.0587990283966064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2864176630973816,
|
|
"step": 633,
|
|
"valid_targets_mean": 14869.1,
|
|
"valid_targets_min": 2366
|
|
},
|
|
{
|
|
"epoch": 2.699360341151386,
|
|
"grad_norm": 0.12492744041403084,
|
|
"learning_rate": 2.08022728769092e-05,
|
|
"loss": 1.0611028671264648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2796041965484619,
|
|
"step": 634,
|
|
"valid_targets_mean": 15264.3,
|
|
"valid_targets_min": 9411
|
|
},
|
|
{
|
|
"epoch": 2.70362473347548,
|
|
"grad_norm": 0.08815457331053626,
|
|
"learning_rate": 2.0742873695889005e-05,
|
|
"loss": 1.029858112335205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22491423785686493,
|
|
"step": 635,
|
|
"valid_targets_mean": 14082.1,
|
|
"valid_targets_min": 2521
|
|
},
|
|
{
|
|
"epoch": 2.7078891257995736,
|
|
"grad_norm": 0.11401596610202915,
|
|
"learning_rate": 2.0683467952442626e-05,
|
|
"loss": 1.0549201965332031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26610907912254333,
|
|
"step": 636,
|
|
"valid_targets_mean": 14397.4,
|
|
"valid_targets_min": 1580
|
|
},
|
|
{
|
|
"epoch": 2.7121535181236673,
|
|
"grad_norm": 0.09170930694937314,
|
|
"learning_rate": 2.0624056171350785e-05,
|
|
"loss": 1.0417375564575195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26557955145835876,
|
|
"step": 637,
|
|
"valid_targets_mean": 15208.9,
|
|
"valid_targets_min": 1582
|
|
},
|
|
{
|
|
"epoch": 2.716417910447761,
|
|
"grad_norm": 0.09373484805402636,
|
|
"learning_rate": 2.0564638877447566e-05,
|
|
"loss": 1.0557622909545898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2590869665145874,
|
|
"step": 638,
|
|
"valid_targets_mean": 14894.8,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 2.7206823027718547,
|
|
"grad_norm": 0.11086311830705485,
|
|
"learning_rate": 2.0505216595615742e-05,
|
|
"loss": 1.0656099319458008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2670261859893799,
|
|
"step": 639,
|
|
"valid_targets_mean": 15368.2,
|
|
"valid_targets_min": 8104
|
|
},
|
|
{
|
|
"epoch": 2.724946695095949,
|
|
"grad_norm": 0.099693187192522,
|
|
"learning_rate": 2.044578985078215e-05,
|
|
"loss": 1.066105842590332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25466716289520264,
|
|
"step": 640,
|
|
"valid_targets_mean": 13972.0,
|
|
"valid_targets_min": 2953
|
|
},
|
|
{
|
|
"epoch": 2.7292110874200426,
|
|
"grad_norm": 0.09144786981608774,
|
|
"learning_rate": 2.0386359167913046e-05,
|
|
"loss": 1.005486011505127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2582522928714752,
|
|
"step": 641,
|
|
"valid_targets_mean": 15838.5,
|
|
"valid_targets_min": 13199
|
|
},
|
|
{
|
|
"epoch": 2.7334754797441363,
|
|
"grad_norm": 0.09290876021103756,
|
|
"learning_rate": 2.0326925072009485e-05,
|
|
"loss": 1.00834321975708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24156692624092102,
|
|
"step": 642,
|
|
"valid_targets_mean": 14860.7,
|
|
"valid_targets_min": 3311
|
|
},
|
|
{
|
|
"epoch": 2.7377398720682304,
|
|
"grad_norm": 0.08683659207009448,
|
|
"learning_rate": 2.0267488088102657e-05,
|
|
"loss": 1.0450940132141113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24575310945510864,
|
|
"step": 643,
|
|
"valid_targets_mean": 15412.2,
|
|
"valid_targets_min": 6629
|
|
},
|
|
{
|
|
"epoch": 2.742004264392324,
|
|
"grad_norm": 0.11853027309215325,
|
|
"learning_rate": 2.0208048741249288e-05,
|
|
"loss": 1.0088589191436768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2650904655456543,
|
|
"step": 644,
|
|
"valid_targets_mean": 15457.6,
|
|
"valid_targets_min": 9740
|
|
},
|
|
{
|
|
"epoch": 2.746268656716418,
|
|
"grad_norm": 0.09888933405924138,
|
|
"learning_rate": 2.014860755652695e-05,
|
|
"loss": 1.0865031480789185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2950332760810852,
|
|
"step": 645,
|
|
"valid_targets_mean": 15230.5,
|
|
"valid_targets_min": 7258
|
|
},
|
|
{
|
|
"epoch": 2.750533049040512,
|
|
"grad_norm": 0.09077056957188934,
|
|
"learning_rate": 2.0089165059029477e-05,
|
|
"loss": 1.0576354265213013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27440112829208374,
|
|
"step": 646,
|
|
"valid_targets_mean": 15387.4,
|
|
"valid_targets_min": 5120
|
|
},
|
|
{
|
|
"epoch": 2.7547974413646057,
|
|
"grad_norm": 0.12727069338504046,
|
|
"learning_rate": 2.0029721773862277e-05,
|
|
"loss": 1.040118932723999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25948765873908997,
|
|
"step": 647,
|
|
"valid_targets_mean": 13869.9,
|
|
"valid_targets_min": 1854
|
|
},
|
|
{
|
|
"epoch": 2.7590618336886994,
|
|
"grad_norm": 0.09414659738141111,
|
|
"learning_rate": 1.997027822613773e-05,
|
|
"loss": 1.0343791246414185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23842459917068481,
|
|
"step": 648,
|
|
"valid_targets_mean": 13911.2,
|
|
"valid_targets_min": 5753
|
|
},
|
|
{
|
|
"epoch": 2.763326226012793,
|
|
"grad_norm": 0.09760780079744967,
|
|
"learning_rate": 1.9910834940970533e-05,
|
|
"loss": 1.0626685619354248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26802858710289,
|
|
"step": 649,
|
|
"valid_targets_mean": 14167.7,
|
|
"valid_targets_min": 1974
|
|
},
|
|
{
|
|
"epoch": 2.767590618336887,
|
|
"grad_norm": 0.10697492490503085,
|
|
"learning_rate": 1.985139244347305e-05,
|
|
"loss": 1.0661125183105469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26134780049324036,
|
|
"step": 650,
|
|
"valid_targets_mean": 14691.3,
|
|
"valid_targets_min": 1668
|
|
},
|
|
{
|
|
"epoch": 2.771855010660981,
|
|
"grad_norm": 0.10702606984974812,
|
|
"learning_rate": 1.979195125875072e-05,
|
|
"loss": 1.0712954998016357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28778448700904846,
|
|
"step": 651,
|
|
"valid_targets_mean": 15400.3,
|
|
"valid_targets_min": 7004
|
|
},
|
|
{
|
|
"epoch": 2.7761194029850746,
|
|
"grad_norm": 0.11922889580109083,
|
|
"learning_rate": 1.9732511911897353e-05,
|
|
"loss": 1.0359854698181152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.259382426738739,
|
|
"step": 652,
|
|
"valid_targets_mean": 14325.9,
|
|
"valid_targets_min": 2297
|
|
},
|
|
{
|
|
"epoch": 2.7803837953091683,
|
|
"grad_norm": 0.0908638612768627,
|
|
"learning_rate": 1.9673074927990525e-05,
|
|
"loss": 1.029666543006897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22730061411857605,
|
|
"step": 653,
|
|
"valid_targets_mean": 14054.5,
|
|
"valid_targets_min": 2204
|
|
},
|
|
{
|
|
"epoch": 2.7846481876332625,
|
|
"grad_norm": 0.0981224678390093,
|
|
"learning_rate": 1.9613640832086957e-05,
|
|
"loss": 1.0567508935928345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2554134428501129,
|
|
"step": 654,
|
|
"valid_targets_mean": 14729.9,
|
|
"valid_targets_min": 2303
|
|
},
|
|
{
|
|
"epoch": 2.788912579957356,
|
|
"grad_norm": 0.09557671725625344,
|
|
"learning_rate": 1.9554210149217855e-05,
|
|
"loss": 0.9999919533729553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24661889672279358,
|
|
"step": 655,
|
|
"valid_targets_mean": 14907.3,
|
|
"valid_targets_min": 1247
|
|
},
|
|
{
|
|
"epoch": 2.79317697228145,
|
|
"grad_norm": 0.1014944267267325,
|
|
"learning_rate": 1.9494783404384265e-05,
|
|
"loss": 1.0318797826766968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2632007598876953,
|
|
"step": 656,
|
|
"valid_targets_mean": 13960.8,
|
|
"valid_targets_min": 2671
|
|
},
|
|
{
|
|
"epoch": 2.7974413646055436,
|
|
"grad_norm": 0.08523674515848591,
|
|
"learning_rate": 1.9435361122552437e-05,
|
|
"loss": 1.0454580783843994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2793329358100891,
|
|
"step": 657,
|
|
"valid_targets_mean": 15929.2,
|
|
"valid_targets_min": 12378
|
|
},
|
|
{
|
|
"epoch": 2.8017057569296373,
|
|
"grad_norm": 0.13838290480866708,
|
|
"learning_rate": 1.9375943828649215e-05,
|
|
"loss": 1.056179404258728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25556135177612305,
|
|
"step": 658,
|
|
"valid_targets_mean": 14254.8,
|
|
"valid_targets_min": 4890
|
|
},
|
|
{
|
|
"epoch": 2.8059701492537314,
|
|
"grad_norm": 0.0958833509051413,
|
|
"learning_rate": 1.9316532047557378e-05,
|
|
"loss": 1.0423905849456787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2633678913116455,
|
|
"step": 659,
|
|
"valid_targets_mean": 14146.8,
|
|
"valid_targets_min": 3569
|
|
},
|
|
{
|
|
"epoch": 2.810234541577825,
|
|
"grad_norm": 0.10588928996243993,
|
|
"learning_rate": 1.9257126304110998e-05,
|
|
"loss": 1.0271477699279785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26135048270225525,
|
|
"step": 660,
|
|
"valid_targets_mean": 15311.8,
|
|
"valid_targets_min": 7238
|
|
},
|
|
{
|
|
"epoch": 2.814498933901919,
|
|
"grad_norm": 0.09116876800147194,
|
|
"learning_rate": 1.919772712309081e-05,
|
|
"loss": 1.0520766973495483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24236315488815308,
|
|
"step": 661,
|
|
"valid_targets_mean": 15055.7,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 2.818763326226013,
|
|
"grad_norm": 0.08819172131027095,
|
|
"learning_rate": 1.9138335029219572e-05,
|
|
"loss": 1.0467054843902588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2655482888221741,
|
|
"step": 662,
|
|
"valid_targets_mean": 15517.8,
|
|
"valid_targets_min": 5075
|
|
},
|
|
{
|
|
"epoch": 2.8230277185501067,
|
|
"grad_norm": 0.09693945905139087,
|
|
"learning_rate": 1.9078950547157458e-05,
|
|
"loss": 1.0835331678390503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2648315727710724,
|
|
"step": 663,
|
|
"valid_targets_mean": 14957.7,
|
|
"valid_targets_min": 5183
|
|
},
|
|
{
|
|
"epoch": 2.8272921108742004,
|
|
"grad_norm": 0.1007305055166871,
|
|
"learning_rate": 1.9019574201497387e-05,
|
|
"loss": 1.0432779788970947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.268159955739975,
|
|
"step": 664,
|
|
"valid_targets_mean": 14936.0,
|
|
"valid_targets_min": 5933
|
|
},
|
|
{
|
|
"epoch": 2.831556503198294,
|
|
"grad_norm": 0.10633071458684165,
|
|
"learning_rate": 1.8960206516760396e-05,
|
|
"loss": 1.0779266357421875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2926626205444336,
|
|
"step": 665,
|
|
"valid_targets_mean": 15472.3,
|
|
"valid_targets_min": 10243
|
|
},
|
|
{
|
|
"epoch": 2.835820895522388,
|
|
"grad_norm": 0.10396166205798389,
|
|
"learning_rate": 1.890084801739102e-05,
|
|
"loss": 1.0034468173980713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22967484593391418,
|
|
"step": 666,
|
|
"valid_targets_mean": 14684.1,
|
|
"valid_targets_min": 1769
|
|
},
|
|
{
|
|
"epoch": 2.840085287846482,
|
|
"grad_norm": 0.09638580119199787,
|
|
"learning_rate": 1.884149922775265e-05,
|
|
"loss": 1.0276405811309814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25226348638534546,
|
|
"step": 667,
|
|
"valid_targets_mean": 14988.4,
|
|
"valid_targets_min": 7267
|
|
},
|
|
{
|
|
"epoch": 2.8443496801705757,
|
|
"grad_norm": 0.1084264848469337,
|
|
"learning_rate": 1.878216067212291e-05,
|
|
"loss": 1.047234058380127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24949602782726288,
|
|
"step": 668,
|
|
"valid_targets_mean": 14842.2,
|
|
"valid_targets_min": 5609
|
|
},
|
|
{
|
|
"epoch": 2.8486140724946694,
|
|
"grad_norm": 0.1086383727198765,
|
|
"learning_rate": 1.8722832874689007e-05,
|
|
"loss": 1.0727641582489014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27868297696113586,
|
|
"step": 669,
|
|
"valid_targets_mean": 15820.2,
|
|
"valid_targets_min": 12116
|
|
},
|
|
{
|
|
"epoch": 2.8528784648187635,
|
|
"grad_norm": 0.10255326339360858,
|
|
"learning_rate": 1.8663516359543123e-05,
|
|
"loss": 1.0397084951400757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2564503848552704,
|
|
"step": 670,
|
|
"valid_targets_mean": 14142.6,
|
|
"valid_targets_min": 2427
|
|
},
|
|
{
|
|
"epoch": 2.857142857142857,
|
|
"grad_norm": 0.10417892551941761,
|
|
"learning_rate": 1.860421165067775e-05,
|
|
"loss": 1.0493979454040527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26024430990219116,
|
|
"step": 671,
|
|
"valid_targets_mean": 15493.0,
|
|
"valid_targets_min": 3446
|
|
},
|
|
{
|
|
"epoch": 2.861407249466951,
|
|
"grad_norm": 0.11404069874671212,
|
|
"learning_rate": 1.8544919271981125e-05,
|
|
"loss": 1.031550645828247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2422025054693222,
|
|
"step": 672,
|
|
"valid_targets_mean": 14255.4,
|
|
"valid_targets_min": 2365
|
|
},
|
|
{
|
|
"epoch": 2.8656716417910446,
|
|
"grad_norm": 0.09200020440410975,
|
|
"learning_rate": 1.8485639747232535e-05,
|
|
"loss": 1.0501012802124023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26862165331840515,
|
|
"step": 673,
|
|
"valid_targets_mean": 15296.3,
|
|
"valid_targets_min": 9904
|
|
},
|
|
{
|
|
"epoch": 2.8699360341151388,
|
|
"grad_norm": 0.10187007735608053,
|
|
"learning_rate": 1.8426373600097723e-05,
|
|
"loss": 1.0608158111572266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26771557331085205,
|
|
"step": 674,
|
|
"valid_targets_mean": 15466.8,
|
|
"valid_targets_min": 8984
|
|
},
|
|
{
|
|
"epoch": 2.8742004264392325,
|
|
"grad_norm": 0.10036687917718154,
|
|
"learning_rate": 1.836712135412424e-05,
|
|
"loss": 1.0379321575164795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2642658054828644,
|
|
"step": 675,
|
|
"valid_targets_mean": 15383.2,
|
|
"valid_targets_min": 8605
|
|
},
|
|
{
|
|
"epoch": 2.878464818763326,
|
|
"grad_norm": 0.10553277141761286,
|
|
"learning_rate": 1.8307883532736878e-05,
|
|
"loss": 1.0531381368637085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2562502324581146,
|
|
"step": 676,
|
|
"valid_targets_mean": 15152.2,
|
|
"valid_targets_min": 4101
|
|
},
|
|
{
|
|
"epoch": 2.88272921108742,
|
|
"grad_norm": 0.11831413276872216,
|
|
"learning_rate": 1.8248660659232964e-05,
|
|
"loss": 1.0127050876617432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24492374062538147,
|
|
"step": 677,
|
|
"valid_targets_mean": 14975.7,
|
|
"valid_targets_min": 5802
|
|
},
|
|
{
|
|
"epoch": 2.886993603411514,
|
|
"grad_norm": 0.10569822396333257,
|
|
"learning_rate": 1.8189453256777798e-05,
|
|
"loss": 1.0561635494232178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27034202218055725,
|
|
"step": 678,
|
|
"valid_targets_mean": 14902.4,
|
|
"valid_targets_min": 4824
|
|
},
|
|
{
|
|
"epoch": 2.8912579957356077,
|
|
"grad_norm": 0.09405934992777647,
|
|
"learning_rate": 1.8130261848399996e-05,
|
|
"loss": 1.0346713066101074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2444111555814743,
|
|
"step": 679,
|
|
"valid_targets_mean": 14605.8,
|
|
"valid_targets_min": 3224
|
|
},
|
|
{
|
|
"epoch": 2.8955223880597014,
|
|
"grad_norm": 0.09224547225200644,
|
|
"learning_rate": 1.8071086956986916e-05,
|
|
"loss": 1.0024101734161377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2338026911020279,
|
|
"step": 680,
|
|
"valid_targets_mean": 14041.8,
|
|
"valid_targets_min": 2001
|
|
},
|
|
{
|
|
"epoch": 2.8997867803837956,
|
|
"grad_norm": 0.09486187094848662,
|
|
"learning_rate": 1.8011929105279967e-05,
|
|
"loss": 1.0379791259765625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24782249331474304,
|
|
"step": 681,
|
|
"valid_targets_mean": 14689.2,
|
|
"valid_targets_min": 5201
|
|
},
|
|
{
|
|
"epoch": 2.9040511727078893,
|
|
"grad_norm": 0.09941225169123405,
|
|
"learning_rate": 1.795278881587007e-05,
|
|
"loss": 1.0000572204589844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25220823287963867,
|
|
"step": 682,
|
|
"valid_targets_mean": 15114.9,
|
|
"valid_targets_min": 1974
|
|
},
|
|
{
|
|
"epoch": 2.908315565031983,
|
|
"grad_norm": 0.10310676050628857,
|
|
"learning_rate": 1.7893666611192962e-05,
|
|
"loss": 1.0490434169769287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27309685945510864,
|
|
"step": 683,
|
|
"valid_targets_mean": 14711.1,
|
|
"valid_targets_min": 6430
|
|
},
|
|
{
|
|
"epoch": 2.9125799573560767,
|
|
"grad_norm": 0.08271677365501538,
|
|
"learning_rate": 1.783456301352467e-05,
|
|
"loss": 1.0730600357055664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25433292984962463,
|
|
"step": 684,
|
|
"valid_targets_mean": 14433.8,
|
|
"valid_targets_min": 2865
|
|
},
|
|
{
|
|
"epoch": 2.9168443496801704,
|
|
"grad_norm": 0.09299727132373173,
|
|
"learning_rate": 1.7775478544976813e-05,
|
|
"loss": 1.0093896389007568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25548744201660156,
|
|
"step": 685,
|
|
"valid_targets_mean": 14471.1,
|
|
"valid_targets_min": 1903
|
|
},
|
|
{
|
|
"epoch": 2.9211087420042645,
|
|
"grad_norm": 0.10065084732629,
|
|
"learning_rate": 1.7716413727492035e-05,
|
|
"loss": 1.0522160530090332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27982860803604126,
|
|
"step": 686,
|
|
"valid_targets_mean": 14811.4,
|
|
"valid_targets_min": 4757
|
|
},
|
|
{
|
|
"epoch": 2.925373134328358,
|
|
"grad_norm": 0.11894198761188435,
|
|
"learning_rate": 1.7657369082839392e-05,
|
|
"loss": 1.0661197900772095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26254287362098694,
|
|
"step": 687,
|
|
"valid_targets_mean": 14819.1,
|
|
"valid_targets_min": 6555
|
|
},
|
|
{
|
|
"epoch": 2.929637526652452,
|
|
"grad_norm": 0.08160775336225609,
|
|
"learning_rate": 1.7598345132609747e-05,
|
|
"loss": 1.0675498247146606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2637835144996643,
|
|
"step": 688,
|
|
"valid_targets_mean": 14699.1,
|
|
"valid_targets_min": 2986
|
|
},
|
|
{
|
|
"epoch": 2.933901918976546,
|
|
"grad_norm": 0.09459313499815636,
|
|
"learning_rate": 1.7539342398211132e-05,
|
|
"loss": 1.0492291450500488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28006109595298767,
|
|
"step": 689,
|
|
"valid_targets_mean": 14990.8,
|
|
"valid_targets_min": 2220
|
|
},
|
|
{
|
|
"epoch": 2.9381663113006398,
|
|
"grad_norm": 0.09449307185356025,
|
|
"learning_rate": 1.748036140086416e-05,
|
|
"loss": 1.0591932535171509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2770892381668091,
|
|
"step": 690,
|
|
"valid_targets_mean": 15341.5,
|
|
"valid_targets_min": 2657
|
|
},
|
|
{
|
|
"epoch": 2.9424307036247335,
|
|
"grad_norm": 0.09296315101594112,
|
|
"learning_rate": 1.742140266159744e-05,
|
|
"loss": 1.0429664850234985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2597864270210266,
|
|
"step": 691,
|
|
"valid_targets_mean": 14504.1,
|
|
"valid_targets_min": 1811
|
|
},
|
|
{
|
|
"epoch": 2.946695095948827,
|
|
"grad_norm": 0.08475856682483765,
|
|
"learning_rate": 1.7362466701242943e-05,
|
|
"loss": 1.0481195449829102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24358433485031128,
|
|
"step": 692,
|
|
"valid_targets_mean": 13964.4,
|
|
"valid_targets_min": 1469
|
|
},
|
|
{
|
|
"epoch": 2.950959488272921,
|
|
"grad_norm": 0.09027294943267872,
|
|
"learning_rate": 1.7303554040431426e-05,
|
|
"loss": 1.0421435832977295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24681347608566284,
|
|
"step": 693,
|
|
"valid_targets_mean": 14935.4,
|
|
"valid_targets_min": 8591
|
|
},
|
|
{
|
|
"epoch": 2.955223880597015,
|
|
"grad_norm": 0.0839739016581147,
|
|
"learning_rate": 1.7244665199587812e-05,
|
|
"loss": 1.033682107925415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27021023631095886,
|
|
"step": 694,
|
|
"valid_targets_mean": 15466.5,
|
|
"valid_targets_min": 8408
|
|
},
|
|
{
|
|
"epoch": 2.9594882729211087,
|
|
"grad_norm": 0.08173455945996605,
|
|
"learning_rate": 1.7185800698926594e-05,
|
|
"loss": 1.0710523128509521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2841835618019104,
|
|
"step": 695,
|
|
"valid_targets_mean": 15041.3,
|
|
"valid_targets_min": 11444
|
|
},
|
|
{
|
|
"epoch": 2.9637526652452024,
|
|
"grad_norm": 0.08848434516434239,
|
|
"learning_rate": 1.7126961058447276e-05,
|
|
"loss": 1.0324208736419678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24441203474998474,
|
|
"step": 696,
|
|
"valid_targets_mean": 15568.1,
|
|
"valid_targets_min": 7312
|
|
},
|
|
{
|
|
"epoch": 2.9680170575692966,
|
|
"grad_norm": 0.10044429365981941,
|
|
"learning_rate": 1.706814679792973e-05,
|
|
"loss": 1.0719600915908813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26880744099617004,
|
|
"step": 697,
|
|
"valid_targets_mean": 14781.2,
|
|
"valid_targets_min": 1017
|
|
},
|
|
{
|
|
"epoch": 2.9722814498933903,
|
|
"grad_norm": 0.09076415272551094,
|
|
"learning_rate": 1.7009358436929632e-05,
|
|
"loss": 1.0307958126068115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24411264061927795,
|
|
"step": 698,
|
|
"valid_targets_mean": 14271.6,
|
|
"valid_targets_min": 2049
|
|
},
|
|
{
|
|
"epoch": 2.976545842217484,
|
|
"grad_norm": 0.09210955856047405,
|
|
"learning_rate": 1.6950596494773855e-05,
|
|
"loss": 1.0544676780700684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27991074323654175,
|
|
"step": 699,
|
|
"valid_targets_mean": 15393.5,
|
|
"valid_targets_min": 3346
|
|
},
|
|
{
|
|
"epoch": 2.9808102345415777,
|
|
"grad_norm": 0.10288934723753675,
|
|
"learning_rate": 1.6891861490555906e-05,
|
|
"loss": 1.087593913078308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2660730481147766,
|
|
"step": 700,
|
|
"valid_targets_mean": 13862.9,
|
|
"valid_targets_min": 4574
|
|
},
|
|
{
|
|
"epoch": 2.9850746268656714,
|
|
"grad_norm": 0.10865555318770762,
|
|
"learning_rate": 1.683315394313132e-05,
|
|
"loss": 1.0225476026535034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2520694434642792,
|
|
"step": 701,
|
|
"valid_targets_mean": 15160.9,
|
|
"valid_targets_min": 1387
|
|
},
|
|
{
|
|
"epoch": 2.9893390191897655,
|
|
"grad_norm": 0.09438237715753746,
|
|
"learning_rate": 1.677447437111309e-05,
|
|
"loss": 1.0214576721191406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.256600022315979,
|
|
"step": 702,
|
|
"valid_targets_mean": 14271.9,
|
|
"valid_targets_min": 3723
|
|
},
|
|
{
|
|
"epoch": 2.9936034115138592,
|
|
"grad_norm": 0.08834727879820044,
|
|
"learning_rate": 1.671582329286707e-05,
|
|
"loss": 1.066161870956421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28262799978256226,
|
|
"step": 703,
|
|
"valid_targets_mean": 14603.6,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 2.997867803837953,
|
|
"grad_norm": 0.08907937443691644,
|
|
"learning_rate": 1.66572012265074e-05,
|
|
"loss": 1.0634660720825195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2691338062286377,
|
|
"step": 704,
|
|
"valid_targets_mean": 15085.3,
|
|
"valid_targets_min": 9587
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 0.12800644975192477,
|
|
"learning_rate": 1.6598608689891953e-05,
|
|
"loss": 0.9655362963676453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42965245246887207,
|
|
"step": 705,
|
|
"valid_targets_mean": 14399.2,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 3.0042643923240937,
|
|
"grad_norm": 0.0966075643245703,
|
|
"learning_rate": 1.654004620061773e-05,
|
|
"loss": 1.0344445705413818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2689897119998932,
|
|
"step": 706,
|
|
"valid_targets_mean": 15146.1,
|
|
"valid_targets_min": 2204
|
|
},
|
|
{
|
|
"epoch": 3.008528784648188,
|
|
"grad_norm": 0.1079883798589847,
|
|
"learning_rate": 1.6481514276016297e-05,
|
|
"loss": 1.016173005104065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24318349361419678,
|
|
"step": 707,
|
|
"valid_targets_mean": 13645.3,
|
|
"valid_targets_min": 1860
|
|
},
|
|
{
|
|
"epoch": 3.0127931769722816,
|
|
"grad_norm": 0.09787193381476755,
|
|
"learning_rate": 1.6423013433149207e-05,
|
|
"loss": 1.0453088283538818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.250877320766449,
|
|
"step": 708,
|
|
"valid_targets_mean": 14240.2,
|
|
"valid_targets_min": 1828
|
|
},
|
|
{
|
|
"epoch": 3.0170575692963753,
|
|
"grad_norm": 0.09282614762286959,
|
|
"learning_rate": 1.636454418880347e-05,
|
|
"loss": 1.045073390007019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2581164240837097,
|
|
"step": 709,
|
|
"valid_targets_mean": 14470.3,
|
|
"valid_targets_min": 1433
|
|
},
|
|
{
|
|
"epoch": 3.021321961620469,
|
|
"grad_norm": 0.09449004440800723,
|
|
"learning_rate": 1.630610705948693e-05,
|
|
"loss": 1.0387874841690063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25838711857795715,
|
|
"step": 710,
|
|
"valid_targets_mean": 15462.9,
|
|
"valid_targets_min": 1378
|
|
},
|
|
{
|
|
"epoch": 3.025586353944563,
|
|
"grad_norm": 0.10389798980629657,
|
|
"learning_rate": 1.6247702561423753e-05,
|
|
"loss": 1.076232671737671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2638155221939087,
|
|
"step": 711,
|
|
"valid_targets_mean": 15069.9,
|
|
"valid_targets_min": 2655
|
|
},
|
|
{
|
|
"epoch": 3.029850746268657,
|
|
"grad_norm": 0.08297534210077734,
|
|
"learning_rate": 1.6189331210549828e-05,
|
|
"loss": 1.03501296043396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27074500918388367,
|
|
"step": 712,
|
|
"valid_targets_mean": 14851.6,
|
|
"valid_targets_min": 6865
|
|
},
|
|
{
|
|
"epoch": 3.0341151385927505,
|
|
"grad_norm": 0.1211232230567513,
|
|
"learning_rate": 1.613099352250825e-05,
|
|
"loss": 1.0407111644744873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2587260603904724,
|
|
"step": 713,
|
|
"valid_targets_mean": 14498.7,
|
|
"valid_targets_min": 1794
|
|
},
|
|
{
|
|
"epoch": 3.038379530916844,
|
|
"grad_norm": 0.09095746118093166,
|
|
"learning_rate": 1.6072690012644717e-05,
|
|
"loss": 1.0345542430877686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2534143328666687,
|
|
"step": 714,
|
|
"valid_targets_mean": 15436.5,
|
|
"valid_targets_min": 8776
|
|
},
|
|
{
|
|
"epoch": 3.0426439232409384,
|
|
"grad_norm": 0.08902095291749587,
|
|
"learning_rate": 1.6014421196003022e-05,
|
|
"loss": 1.0071500539779663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2860822081565857,
|
|
"step": 715,
|
|
"valid_targets_mean": 15381.8,
|
|
"valid_targets_min": 7119
|
|
},
|
|
{
|
|
"epoch": 3.046908315565032,
|
|
"grad_norm": 0.0998280174233871,
|
|
"learning_rate": 1.5956187587320468e-05,
|
|
"loss": 1.0196133852005005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.257158488035202,
|
|
"step": 716,
|
|
"valid_targets_mean": 15293.0,
|
|
"valid_targets_min": 6810
|
|
},
|
|
{
|
|
"epoch": 3.0511727078891258,
|
|
"grad_norm": 0.08682173954913219,
|
|
"learning_rate": 1.5897989701023355e-05,
|
|
"loss": 1.0229237079620361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25756093859672546,
|
|
"step": 717,
|
|
"valid_targets_mean": 14520.8,
|
|
"valid_targets_min": 2888
|
|
},
|
|
{
|
|
"epoch": 3.0554371002132195,
|
|
"grad_norm": 0.08737913315661264,
|
|
"learning_rate": 1.58398280512224e-05,
|
|
"loss": 1.0397634506225586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2673379182815552,
|
|
"step": 718,
|
|
"valid_targets_mean": 14851.3,
|
|
"valid_targets_min": 3462
|
|
},
|
|
{
|
|
"epoch": 3.0597014925373136,
|
|
"grad_norm": 0.11209766886967511,
|
|
"learning_rate": 1.5781703151708215e-05,
|
|
"loss": 1.0350569486618042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24551504850387573,
|
|
"step": 719,
|
|
"valid_targets_mean": 14170.4,
|
|
"valid_targets_min": 2259
|
|
},
|
|
{
|
|
"epoch": 3.0639658848614073,
|
|
"grad_norm": 0.0873560626410917,
|
|
"learning_rate": 1.5723615515946773e-05,
|
|
"loss": 1.0226738452911377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24808737635612488,
|
|
"step": 720,
|
|
"valid_targets_mean": 14073.4,
|
|
"valid_targets_min": 2808
|
|
},
|
|
{
|
|
"epoch": 3.068230277185501,
|
|
"grad_norm": 0.0818518394347687,
|
|
"learning_rate": 1.5665565657074874e-05,
|
|
"loss": 1.0064092874526978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26570916175842285,
|
|
"step": 721,
|
|
"valid_targets_mean": 15137.1,
|
|
"valid_targets_min": 4471
|
|
},
|
|
{
|
|
"epoch": 3.0724946695095947,
|
|
"grad_norm": 0.08924836568132075,
|
|
"learning_rate": 1.560755408789558e-05,
|
|
"loss": 1.0179588794708252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23607303202152252,
|
|
"step": 722,
|
|
"valid_targets_mean": 13179.7,
|
|
"valid_targets_min": 2366
|
|
},
|
|
{
|
|
"epoch": 3.076759061833689,
|
|
"grad_norm": 0.09797054455620641,
|
|
"learning_rate": 1.5549581320873715e-05,
|
|
"loss": 1.0557875633239746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2701112627983093,
|
|
"step": 723,
|
|
"valid_targets_mean": 15342.3,
|
|
"valid_targets_min": 8112
|
|
},
|
|
{
|
|
"epoch": 3.0810234541577826,
|
|
"grad_norm": 0.09576198787365933,
|
|
"learning_rate": 1.5491647868131343e-05,
|
|
"loss": 1.074175238609314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27521955966949463,
|
|
"step": 724,
|
|
"valid_targets_mean": 15699.3,
|
|
"valid_targets_min": 7970
|
|
},
|
|
{
|
|
"epoch": 3.0852878464818763,
|
|
"grad_norm": 0.09888061179924362,
|
|
"learning_rate": 1.5433754241443223e-05,
|
|
"loss": 1.00786554813385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2511468827724457,
|
|
"step": 725,
|
|
"valid_targets_mean": 15114.1,
|
|
"valid_targets_min": 6528
|
|
},
|
|
{
|
|
"epoch": 3.08955223880597,
|
|
"grad_norm": 0.09793442627923624,
|
|
"learning_rate": 1.53759009522323e-05,
|
|
"loss": 1.066227674484253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2882387042045593,
|
|
"step": 726,
|
|
"valid_targets_mean": 15416.6,
|
|
"valid_targets_min": 9705
|
|
},
|
|
{
|
|
"epoch": 3.093816631130064,
|
|
"grad_norm": 0.10874989018487148,
|
|
"learning_rate": 1.5318088511565185e-05,
|
|
"loss": 1.0540597438812256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2334098219871521,
|
|
"step": 727,
|
|
"valid_targets_mean": 14811.9,
|
|
"valid_targets_min": 4603
|
|
},
|
|
{
|
|
"epoch": 3.098081023454158,
|
|
"grad_norm": 0.10887111228658802,
|
|
"learning_rate": 1.5260317430147627e-05,
|
|
"loss": 1.0411701202392578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26907092332839966,
|
|
"step": 728,
|
|
"valid_targets_mean": 15423.6,
|
|
"valid_targets_min": 7996
|
|
},
|
|
{
|
|
"epoch": 3.1023454157782515,
|
|
"grad_norm": 0.09474672934125986,
|
|
"learning_rate": 1.5202588218320024e-05,
|
|
"loss": 1.062959909439087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27440589666366577,
|
|
"step": 729,
|
|
"valid_targets_mean": 15051.2,
|
|
"valid_targets_min": 10266
|
|
},
|
|
{
|
|
"epoch": 3.106609808102345,
|
|
"grad_norm": 0.12479139765419613,
|
|
"learning_rate": 1.5144901386052924e-05,
|
|
"loss": 1.0482529401779175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2644003629684448,
|
|
"step": 730,
|
|
"valid_targets_mean": 14408.5,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 3.1108742004264394,
|
|
"grad_norm": 0.1190939558336681,
|
|
"learning_rate": 1.5087257442942467e-05,
|
|
"loss": 1.0438718795776367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2560356557369232,
|
|
"step": 731,
|
|
"valid_targets_mean": 14297.5,
|
|
"valid_targets_min": 2063
|
|
},
|
|
{
|
|
"epoch": 3.115138592750533,
|
|
"grad_norm": 0.09211139347692805,
|
|
"learning_rate": 1.502965689820593e-05,
|
|
"loss": 1.0472971200942993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2688388228416443,
|
|
"step": 732,
|
|
"valid_targets_mean": 15084.3,
|
|
"valid_targets_min": 6095
|
|
},
|
|
{
|
|
"epoch": 3.1194029850746268,
|
|
"grad_norm": 0.10217519839154926,
|
|
"learning_rate": 1.4972100260677222e-05,
|
|
"loss": 1.0276672840118408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2611340284347534,
|
|
"step": 733,
|
|
"valid_targets_mean": 15733.6,
|
|
"valid_targets_min": 10570
|
|
},
|
|
{
|
|
"epoch": 3.1236673773987205,
|
|
"grad_norm": 0.10100137254304324,
|
|
"learning_rate": 1.4914588038802383e-05,
|
|
"loss": 1.013986587524414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2715274691581726,
|
|
"step": 734,
|
|
"valid_targets_mean": 15451.9,
|
|
"valid_targets_min": 8164
|
|
},
|
|
{
|
|
"epoch": 3.1279317697228146,
|
|
"grad_norm": 0.09590402587002252,
|
|
"learning_rate": 1.4857120740635084e-05,
|
|
"loss": 1.0150028467178345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22995221614837646,
|
|
"step": 735,
|
|
"valid_targets_mean": 14393.3,
|
|
"valid_targets_min": 3224
|
|
},
|
|
{
|
|
"epoch": 3.1321961620469083,
|
|
"grad_norm": 0.0931608093286139,
|
|
"learning_rate": 1.4799698873832153e-05,
|
|
"loss": 1.0242356061935425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2570406496524811,
|
|
"step": 736,
|
|
"valid_targets_mean": 14658.0,
|
|
"valid_targets_min": 4752
|
|
},
|
|
{
|
|
"epoch": 3.136460554371002,
|
|
"grad_norm": 0.12882920488629043,
|
|
"learning_rate": 1.4742322945649073e-05,
|
|
"loss": 1.020544171333313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2499198317527771,
|
|
"step": 737,
|
|
"valid_targets_mean": 15270.8,
|
|
"valid_targets_min": 3754
|
|
},
|
|
{
|
|
"epoch": 3.140724946695096,
|
|
"grad_norm": 0.09152776153119625,
|
|
"learning_rate": 1.4684993462935532e-05,
|
|
"loss": 1.0674588680267334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25742030143737793,
|
|
"step": 738,
|
|
"valid_targets_mean": 14184.7,
|
|
"valid_targets_min": 2159
|
|
},
|
|
{
|
|
"epoch": 3.14498933901919,
|
|
"grad_norm": 0.08868227195162083,
|
|
"learning_rate": 1.462771093213092e-05,
|
|
"loss": 1.0410614013671875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26647597551345825,
|
|
"step": 739,
|
|
"valid_targets_mean": 14999.5,
|
|
"valid_targets_min": 4763
|
|
},
|
|
{
|
|
"epoch": 3.1492537313432836,
|
|
"grad_norm": 0.09805915284911056,
|
|
"learning_rate": 1.4570475859259856e-05,
|
|
"loss": 1.0700287818908691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2671729326248169,
|
|
"step": 740,
|
|
"valid_targets_mean": 14543.1,
|
|
"valid_targets_min": 4241
|
|
},
|
|
{
|
|
"epoch": 3.1535181236673773,
|
|
"grad_norm": 0.09831970751982794,
|
|
"learning_rate": 1.4513288749927714e-05,
|
|
"loss": 1.014609932899475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2614646553993225,
|
|
"step": 741,
|
|
"valid_targets_mean": 14626.1,
|
|
"valid_targets_min": 7544
|
|
},
|
|
{
|
|
"epoch": 3.1577825159914714,
|
|
"grad_norm": 0.08775283241054559,
|
|
"learning_rate": 1.4456150109316192e-05,
|
|
"loss": 1.0609573125839233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2644979953765869,
|
|
"step": 742,
|
|
"valid_targets_mean": 15274.3,
|
|
"valid_targets_min": 7076
|
|
},
|
|
{
|
|
"epoch": 3.162046908315565,
|
|
"grad_norm": 0.08815796513892146,
|
|
"learning_rate": 1.4399060442178798e-05,
|
|
"loss": 1.0535483360290527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26069730520248413,
|
|
"step": 743,
|
|
"valid_targets_mean": 14454.4,
|
|
"valid_targets_min": 2747
|
|
},
|
|
{
|
|
"epoch": 3.166311300639659,
|
|
"grad_norm": 0.07945435266847604,
|
|
"learning_rate": 1.4342020252836437e-05,
|
|
"loss": 1.0029892921447754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24885860085487366,
|
|
"step": 744,
|
|
"valid_targets_mean": 15031.0,
|
|
"valid_targets_min": 5119
|
|
},
|
|
{
|
|
"epoch": 3.1705756929637525,
|
|
"grad_norm": 0.10653046057731785,
|
|
"learning_rate": 1.4285030045172913e-05,
|
|
"loss": 1.0199320316314697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26799827814102173,
|
|
"step": 745,
|
|
"valid_targets_mean": 15330.1,
|
|
"valid_targets_min": 7525
|
|
},
|
|
{
|
|
"epoch": 3.1748400852878467,
|
|
"grad_norm": 0.0842329636878113,
|
|
"learning_rate": 1.422809032263052e-05,
|
|
"loss": 1.026256799697876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2702501118183136,
|
|
"step": 746,
|
|
"valid_targets_mean": 15265.4,
|
|
"valid_targets_min": 3922
|
|
},
|
|
{
|
|
"epoch": 3.1791044776119404,
|
|
"grad_norm": 0.081964176490656,
|
|
"learning_rate": 1.4171201588205566e-05,
|
|
"loss": 1.0050396919250488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25963956117630005,
|
|
"step": 747,
|
|
"valid_targets_mean": 14697.8,
|
|
"valid_targets_min": 3662
|
|
},
|
|
{
|
|
"epoch": 3.183368869936034,
|
|
"grad_norm": 0.09545709255137813,
|
|
"learning_rate": 1.4114364344443935e-05,
|
|
"loss": 1.0147085189819336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24517302215099335,
|
|
"step": 748,
|
|
"valid_targets_mean": 15314.1,
|
|
"valid_targets_min": 6045
|
|
},
|
|
{
|
|
"epoch": 3.1876332622601278,
|
|
"grad_norm": 0.07898969208815516,
|
|
"learning_rate": 1.4057579093436653e-05,
|
|
"loss": 1.0305474996566772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2477126270532608,
|
|
"step": 749,
|
|
"valid_targets_mean": 14982.3,
|
|
"valid_targets_min": 5448
|
|
},
|
|
{
|
|
"epoch": 3.191897654584222,
|
|
"grad_norm": 0.08357378212331808,
|
|
"learning_rate": 1.400084633681546e-05,
|
|
"loss": 1.0168681144714355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22804000973701477,
|
|
"step": 750,
|
|
"valid_targets_mean": 15144.0,
|
|
"valid_targets_min": 6307
|
|
},
|
|
{
|
|
"epoch": 3.1961620469083156,
|
|
"grad_norm": 0.08707590690686529,
|
|
"learning_rate": 1.3944166575748355e-05,
|
|
"loss": 1.0593773126602173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2545916438102722,
|
|
"step": 751,
|
|
"valid_targets_mean": 15302.6,
|
|
"valid_targets_min": 6494
|
|
},
|
|
{
|
|
"epoch": 3.2004264392324093,
|
|
"grad_norm": 0.09822240582339954,
|
|
"learning_rate": 1.3887540310935187e-05,
|
|
"loss": 1.003342866897583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2594917118549347,
|
|
"step": 752,
|
|
"valid_targets_mean": 14751.0,
|
|
"valid_targets_min": 2259
|
|
},
|
|
{
|
|
"epoch": 3.204690831556503,
|
|
"grad_norm": 0.08709540834030992,
|
|
"learning_rate": 1.3830968042603226e-05,
|
|
"loss": 1.0506478548049927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2556101679801941,
|
|
"step": 753,
|
|
"valid_targets_mean": 14109.2,
|
|
"valid_targets_min": 1870
|
|
},
|
|
{
|
|
"epoch": 3.208955223880597,
|
|
"grad_norm": 0.09010887681364946,
|
|
"learning_rate": 1.3774450270502762e-05,
|
|
"loss": 0.995093047618866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2390972077846527,
|
|
"step": 754,
|
|
"valid_targets_mean": 14549.6,
|
|
"valid_targets_min": 1385
|
|
},
|
|
{
|
|
"epoch": 3.213219616204691,
|
|
"grad_norm": 0.09392396378094517,
|
|
"learning_rate": 1.3717987493902656e-05,
|
|
"loss": 1.0172679424285889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24836307764053345,
|
|
"step": 755,
|
|
"valid_targets_mean": 15020.7,
|
|
"valid_targets_min": 2162
|
|
},
|
|
{
|
|
"epoch": 3.2174840085287846,
|
|
"grad_norm": 0.0934883283220574,
|
|
"learning_rate": 1.3661580211585947e-05,
|
|
"loss": 1.048058032989502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25592565536499023,
|
|
"step": 756,
|
|
"valid_targets_mean": 15018.7,
|
|
"valid_targets_min": 2052
|
|
},
|
|
{
|
|
"epoch": 3.2217484008528783,
|
|
"grad_norm": 0.09792396868886702,
|
|
"learning_rate": 1.3605228921845457e-05,
|
|
"loss": 1.021390676498413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2654317021369934,
|
|
"step": 757,
|
|
"valid_targets_mean": 15263.4,
|
|
"valid_targets_min": 1769
|
|
},
|
|
{
|
|
"epoch": 3.2260127931769724,
|
|
"grad_norm": 0.09309475608287478,
|
|
"learning_rate": 1.3548934122479373e-05,
|
|
"loss": 1.0489925146102905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26911330223083496,
|
|
"step": 758,
|
|
"valid_targets_mean": 14592.7,
|
|
"valid_targets_min": 5192
|
|
},
|
|
{
|
|
"epoch": 3.230277185501066,
|
|
"grad_norm": 0.08393644496387626,
|
|
"learning_rate": 1.349269631078686e-05,
|
|
"loss": 1.0268635749816895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26067325472831726,
|
|
"step": 759,
|
|
"valid_targets_mean": 15398.3,
|
|
"valid_targets_min": 6104
|
|
},
|
|
{
|
|
"epoch": 3.23454157782516,
|
|
"grad_norm": 0.10672128903203676,
|
|
"learning_rate": 1.3436515983563659e-05,
|
|
"loss": 1.0773091316223145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25594526529312134,
|
|
"step": 760,
|
|
"valid_targets_mean": 14472.5,
|
|
"valid_targets_min": 2071
|
|
},
|
|
{
|
|
"epoch": 3.2388059701492535,
|
|
"grad_norm": 0.08602383455711411,
|
|
"learning_rate": 1.3380393637097692e-05,
|
|
"loss": 0.9852066040039062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24024848639965057,
|
|
"step": 761,
|
|
"valid_targets_mean": 14740.8,
|
|
"valid_targets_min": 4599
|
|
},
|
|
{
|
|
"epoch": 3.2430703624733477,
|
|
"grad_norm": 0.08711459473212163,
|
|
"learning_rate": 1.3324329767164708e-05,
|
|
"loss": 1.0126616954803467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2438962608575821,
|
|
"step": 762,
|
|
"valid_targets_mean": 14971.6,
|
|
"valid_targets_min": 7000
|
|
},
|
|
{
|
|
"epoch": 3.2473347547974414,
|
|
"grad_norm": 0.10700771034341505,
|
|
"learning_rate": 1.3268324869023878e-05,
|
|
"loss": 1.0273852348327637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23463326692581177,
|
|
"step": 763,
|
|
"valid_targets_mean": 14168.8,
|
|
"valid_targets_min": 1791
|
|
},
|
|
{
|
|
"epoch": 3.251599147121535,
|
|
"grad_norm": 0.0908818140117839,
|
|
"learning_rate": 1.3212379437413421e-05,
|
|
"loss": 1.0689035654067993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26748207211494446,
|
|
"step": 764,
|
|
"valid_targets_mean": 13935.3,
|
|
"valid_targets_min": 3111
|
|
},
|
|
{
|
|
"epoch": 3.2558635394456292,
|
|
"grad_norm": 0.08904191404840264,
|
|
"learning_rate": 1.3156493966546236e-05,
|
|
"loss": 1.017478108406067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25639939308166504,
|
|
"step": 765,
|
|
"valid_targets_mean": 15500.0,
|
|
"valid_targets_min": 11246
|
|
},
|
|
{
|
|
"epoch": 3.260127931769723,
|
|
"grad_norm": 0.08990591936584785,
|
|
"learning_rate": 1.3100668950105534e-05,
|
|
"loss": 1.0317257642745972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2547551393508911,
|
|
"step": 766,
|
|
"valid_targets_mean": 14275.1,
|
|
"valid_targets_min": 2124
|
|
},
|
|
{
|
|
"epoch": 3.2643923240938166,
|
|
"grad_norm": 0.07967233196602748,
|
|
"learning_rate": 1.3044904881240507e-05,
|
|
"loss": 0.9703269004821777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23579402267932892,
|
|
"step": 767,
|
|
"valid_targets_mean": 14848.2,
|
|
"valid_targets_min": 1183
|
|
},
|
|
{
|
|
"epoch": 3.2686567164179103,
|
|
"grad_norm": 0.09888289059073466,
|
|
"learning_rate": 1.2989202252561926e-05,
|
|
"loss": 1.0312223434448242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27673935890197754,
|
|
"step": 768,
|
|
"valid_targets_mean": 15244.9,
|
|
"valid_targets_min": 10098
|
|
},
|
|
{
|
|
"epoch": 3.272921108742004,
|
|
"grad_norm": 0.10656929671281667,
|
|
"learning_rate": 1.2933561556137806e-05,
|
|
"loss": 1.0420056581497192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26682281494140625,
|
|
"step": 769,
|
|
"valid_targets_mean": 15391.3,
|
|
"valid_targets_min": 8556
|
|
},
|
|
{
|
|
"epoch": 3.277185501066098,
|
|
"grad_norm": 0.09441416382065476,
|
|
"learning_rate": 1.2877983283489062e-05,
|
|
"loss": 1.0019264221191406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2664748430252075,
|
|
"step": 770,
|
|
"valid_targets_mean": 15493.1,
|
|
"valid_targets_min": 8459
|
|
},
|
|
{
|
|
"epoch": 3.281449893390192,
|
|
"grad_norm": 0.08990039393288893,
|
|
"learning_rate": 1.2822467925585186e-05,
|
|
"loss": 1.0493959188461304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2742432951927185,
|
|
"step": 771,
|
|
"valid_targets_mean": 15133.0,
|
|
"valid_targets_min": 4809
|
|
},
|
|
{
|
|
"epoch": 3.2857142857142856,
|
|
"grad_norm": 0.08358419050812697,
|
|
"learning_rate": 1.2767015972839879e-05,
|
|
"loss": 0.9922876358032227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2552100419998169,
|
|
"step": 772,
|
|
"valid_targets_mean": 14905.7,
|
|
"valid_targets_min": 2055
|
|
},
|
|
{
|
|
"epoch": 3.2899786780383797,
|
|
"grad_norm": 0.09834313288741205,
|
|
"learning_rate": 1.2711627915106728e-05,
|
|
"loss": 1.0197315216064453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22460338473320007,
|
|
"step": 773,
|
|
"valid_targets_mean": 14333.1,
|
|
"valid_targets_min": 2091
|
|
},
|
|
{
|
|
"epoch": 3.2942430703624734,
|
|
"grad_norm": 0.08287782605828424,
|
|
"learning_rate": 1.2656304241674877e-05,
|
|
"loss": 1.0270302295684814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22859780490398407,
|
|
"step": 774,
|
|
"valid_targets_mean": 13977.2,
|
|
"valid_targets_min": 1616
|
|
},
|
|
{
|
|
"epoch": 3.298507462686567,
|
|
"grad_norm": 0.08852464909783203,
|
|
"learning_rate": 1.2601045441264734e-05,
|
|
"loss": 1.0251355171203613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25335028767585754,
|
|
"step": 775,
|
|
"valid_targets_mean": 14786.3,
|
|
"valid_targets_min": 1440
|
|
},
|
|
{
|
|
"epoch": 3.302771855010661,
|
|
"grad_norm": 0.08893171919055332,
|
|
"learning_rate": 1.2545852002023599e-05,
|
|
"loss": 1.035538673400879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23715808987617493,
|
|
"step": 776,
|
|
"valid_targets_mean": 14105.4,
|
|
"valid_targets_min": 1469
|
|
},
|
|
{
|
|
"epoch": 3.307036247334755,
|
|
"grad_norm": 0.08968860057598359,
|
|
"learning_rate": 1.2490724411521406e-05,
|
|
"loss": 1.1000900268554688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26642662286758423,
|
|
"step": 777,
|
|
"valid_targets_mean": 15000.0,
|
|
"valid_targets_min": 3080
|
|
},
|
|
{
|
|
"epoch": 3.3113006396588487,
|
|
"grad_norm": 0.08646055380898798,
|
|
"learning_rate": 1.243566315674637e-05,
|
|
"loss": 1.022867202758789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26644641160964966,
|
|
"step": 778,
|
|
"valid_targets_mean": 15140.9,
|
|
"valid_targets_min": 4136
|
|
},
|
|
{
|
|
"epoch": 3.3155650319829424,
|
|
"grad_norm": 0.09771195624050971,
|
|
"learning_rate": 1.238066872410073e-05,
|
|
"loss": 1.1028798818588257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2694508135318756,
|
|
"step": 779,
|
|
"valid_targets_mean": 13780.2,
|
|
"valid_targets_min": 2914
|
|
},
|
|
{
|
|
"epoch": 3.319829424307036,
|
|
"grad_norm": 0.09907125640509128,
|
|
"learning_rate": 1.2325741599396418e-05,
|
|
"loss": 1.0412553548812866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24542492628097534,
|
|
"step": 780,
|
|
"valid_targets_mean": 14005.5,
|
|
"valid_targets_min": 2250
|
|
},
|
|
{
|
|
"epoch": 3.3240938166311302,
|
|
"grad_norm": 0.09043274695084158,
|
|
"learning_rate": 1.2270882267850765e-05,
|
|
"loss": 0.974776566028595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2505064308643341,
|
|
"step": 781,
|
|
"valid_targets_mean": 15828.2,
|
|
"valid_targets_min": 6805
|
|
},
|
|
{
|
|
"epoch": 3.328358208955224,
|
|
"grad_norm": 0.08819385852514708,
|
|
"learning_rate": 1.2216091214082248e-05,
|
|
"loss": 0.9863239526748657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2462753802537918,
|
|
"step": 782,
|
|
"valid_targets_mean": 14715.3,
|
|
"valid_targets_min": 4136
|
|
},
|
|
{
|
|
"epoch": 3.3326226012793176,
|
|
"grad_norm": 0.08864037315292998,
|
|
"learning_rate": 1.2161368922106192e-05,
|
|
"loss": 1.0442707538604736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2768702507019043,
|
|
"step": 783,
|
|
"valid_targets_mean": 15831.5,
|
|
"valid_targets_min": 11302
|
|
},
|
|
{
|
|
"epoch": 3.3368869936034113,
|
|
"grad_norm": 0.09255623316637619,
|
|
"learning_rate": 1.2106715875330475e-05,
|
|
"loss": 1.048423171043396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27612558007240295,
|
|
"step": 784,
|
|
"valid_targets_mean": 14867.5,
|
|
"valid_targets_min": 3892
|
|
},
|
|
{
|
|
"epoch": 3.3411513859275055,
|
|
"grad_norm": 0.09357936738476177,
|
|
"learning_rate": 1.2052132556551275e-05,
|
|
"loss": 1.090922236442566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25026872754096985,
|
|
"step": 785,
|
|
"valid_targets_mean": 14097.4,
|
|
"valid_targets_min": 2004
|
|
},
|
|
{
|
|
"epoch": 3.345415778251599,
|
|
"grad_norm": 0.08335719660288012,
|
|
"learning_rate": 1.1997619447948814e-05,
|
|
"loss": 1.0451135635375977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2629640996456146,
|
|
"step": 786,
|
|
"valid_targets_mean": 14466.1,
|
|
"valid_targets_min": 2478
|
|
},
|
|
{
|
|
"epoch": 3.349680170575693,
|
|
"grad_norm": 0.12144505843352507,
|
|
"learning_rate": 1.1943177031083094e-05,
|
|
"loss": 1.0659239292144775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2779039144515991,
|
|
"step": 787,
|
|
"valid_targets_mean": 15759.0,
|
|
"valid_targets_min": 10275
|
|
},
|
|
{
|
|
"epoch": 3.3539445628997866,
|
|
"grad_norm": 0.09509795265273671,
|
|
"learning_rate": 1.1888805786889621e-05,
|
|
"loss": 1.0627827644348145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26427340507507324,
|
|
"step": 788,
|
|
"valid_targets_mean": 15404.2,
|
|
"valid_targets_min": 3780
|
|
},
|
|
{
|
|
"epoch": 3.3582089552238807,
|
|
"grad_norm": 0.08684363993738695,
|
|
"learning_rate": 1.183450619567518e-05,
|
|
"loss": 1.0289561748504639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25893306732177734,
|
|
"step": 789,
|
|
"valid_targets_mean": 14487.5,
|
|
"valid_targets_min": 4562
|
|
},
|
|
{
|
|
"epoch": 3.3624733475479744,
|
|
"grad_norm": 0.09049543801727776,
|
|
"learning_rate": 1.1780278737113581e-05,
|
|
"loss": 1.0684177875518799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2586260735988617,
|
|
"step": 790,
|
|
"valid_targets_mean": 15182.4,
|
|
"valid_targets_min": 3870
|
|
},
|
|
{
|
|
"epoch": 3.366737739872068,
|
|
"grad_norm": 0.09511393414317239,
|
|
"learning_rate": 1.1726123890241439e-05,
|
|
"loss": 1.0418816804885864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26466333866119385,
|
|
"step": 791,
|
|
"valid_targets_mean": 14249.2,
|
|
"valid_targets_min": 3173
|
|
},
|
|
{
|
|
"epoch": 3.3710021321961623,
|
|
"grad_norm": 0.0833172917991391,
|
|
"learning_rate": 1.1672042133453925e-05,
|
|
"loss": 1.03490149974823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2661225497722626,
|
|
"step": 792,
|
|
"valid_targets_mean": 15123.5,
|
|
"valid_targets_min": 6370
|
|
},
|
|
{
|
|
"epoch": 3.375266524520256,
|
|
"grad_norm": 0.09122665974378442,
|
|
"learning_rate": 1.1618033944500527e-05,
|
|
"loss": 0.9761382341384888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23575016856193542,
|
|
"step": 793,
|
|
"valid_targets_mean": 15160.9,
|
|
"valid_targets_min": 2453
|
|
},
|
|
{
|
|
"epoch": 3.3795309168443497,
|
|
"grad_norm": 0.08375954561827709,
|
|
"learning_rate": 1.1564099800480864e-05,
|
|
"loss": 1.0144245624542236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23563915491104126,
|
|
"step": 794,
|
|
"valid_targets_mean": 14153.6,
|
|
"valid_targets_min": 1701
|
|
},
|
|
{
|
|
"epoch": 3.3837953091684434,
|
|
"grad_norm": 0.1136926122525388,
|
|
"learning_rate": 1.151024017784045e-05,
|
|
"loss": 1.027612566947937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25527799129486084,
|
|
"step": 795,
|
|
"valid_targets_mean": 14646.9,
|
|
"valid_targets_min": 4829
|
|
},
|
|
{
|
|
"epoch": 3.388059701492537,
|
|
"grad_norm": 0.08609159834005825,
|
|
"learning_rate": 1.1456455552366488e-05,
|
|
"loss": 1.0193374156951904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2715807259082794,
|
|
"step": 796,
|
|
"valid_targets_mean": 15776.1,
|
|
"valid_targets_min": 12003
|
|
},
|
|
{
|
|
"epoch": 3.3923240938166312,
|
|
"grad_norm": 0.083867845288976,
|
|
"learning_rate": 1.1402746399183671e-05,
|
|
"loss": 1.021320104598999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25966545939445496,
|
|
"step": 797,
|
|
"valid_targets_mean": 15247.9,
|
|
"valid_targets_min": 7778
|
|
},
|
|
{
|
|
"epoch": 3.396588486140725,
|
|
"grad_norm": 0.09215925032697382,
|
|
"learning_rate": 1.1349113192749986e-05,
|
|
"loss": 1.0328900814056396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.267875075340271,
|
|
"step": 798,
|
|
"valid_targets_mean": 15269.8,
|
|
"valid_targets_min": 7239
|
|
},
|
|
{
|
|
"epoch": 3.4008528784648187,
|
|
"grad_norm": 0.08094579179999896,
|
|
"learning_rate": 1.1295556406852488e-05,
|
|
"loss": 1.0496559143066406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24950623512268066,
|
|
"step": 799,
|
|
"valid_targets_mean": 14462.9,
|
|
"valid_targets_min": 1475
|
|
},
|
|
{
|
|
"epoch": 3.405117270788913,
|
|
"grad_norm": 0.07939076724256122,
|
|
"learning_rate": 1.1242076514603201e-05,
|
|
"loss": 1.0052372217178345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23071691393852234,
|
|
"step": 800,
|
|
"valid_targets_mean": 14676.0,
|
|
"valid_targets_min": 3066
|
|
},
|
|
{
|
|
"epoch": 3.4093816631130065,
|
|
"grad_norm": 0.10488223002634041,
|
|
"learning_rate": 1.1188673988434831e-05,
|
|
"loss": 1.0472116470336914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25849294662475586,
|
|
"step": 801,
|
|
"valid_targets_mean": 14420.9,
|
|
"valid_targets_min": 3393
|
|
},
|
|
{
|
|
"epoch": 3.4136460554371,
|
|
"grad_norm": 0.08589246757161735,
|
|
"learning_rate": 1.1135349300096667e-05,
|
|
"loss": 1.0184279680252075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2548155188560486,
|
|
"step": 802,
|
|
"valid_targets_mean": 15368.0,
|
|
"valid_targets_min": 3409
|
|
},
|
|
{
|
|
"epoch": 3.417910447761194,
|
|
"grad_norm": 0.08333682776191335,
|
|
"learning_rate": 1.1082102920650397e-05,
|
|
"loss": 1.0307121276855469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24245811998844147,
|
|
"step": 803,
|
|
"valid_targets_mean": 14432.2,
|
|
"valid_targets_min": 4607
|
|
},
|
|
{
|
|
"epoch": 3.4221748400852876,
|
|
"grad_norm": 0.0848050044592849,
|
|
"learning_rate": 1.102893532046593e-05,
|
|
"loss": 1.053544282913208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2702012062072754,
|
|
"step": 804,
|
|
"valid_targets_mean": 14449.1,
|
|
"valid_targets_min": 2245
|
|
},
|
|
{
|
|
"epoch": 3.4264392324093818,
|
|
"grad_norm": 0.0891906939724688,
|
|
"learning_rate": 1.0975846969217258e-05,
|
|
"loss": 1.0232945680618286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27444830536842346,
|
|
"step": 805,
|
|
"valid_targets_mean": 15194.3,
|
|
"valid_targets_min": 7986
|
|
},
|
|
{
|
|
"epoch": 3.4307036247334755,
|
|
"grad_norm": 0.10949028163406868,
|
|
"learning_rate": 1.092283833587829e-05,
|
|
"loss": 1.0414726734161377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2692057490348816,
|
|
"step": 806,
|
|
"valid_targets_mean": 14994.5,
|
|
"valid_targets_min": 4552
|
|
},
|
|
{
|
|
"epoch": 3.434968017057569,
|
|
"grad_norm": 0.08958543707570064,
|
|
"learning_rate": 1.086990988871873e-05,
|
|
"loss": 1.023406744003296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2501792907714844,
|
|
"step": 807,
|
|
"valid_targets_mean": 14401.8,
|
|
"valid_targets_min": 2574
|
|
},
|
|
{
|
|
"epoch": 3.4392324093816633,
|
|
"grad_norm": 0.08975523446559025,
|
|
"learning_rate": 1.0817062095299929e-05,
|
|
"loss": 1.051383376121521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2544962763786316,
|
|
"step": 808,
|
|
"valid_targets_mean": 13971.6,
|
|
"valid_targets_min": 1228
|
|
},
|
|
{
|
|
"epoch": 3.443496801705757,
|
|
"grad_norm": 0.09703685485955625,
|
|
"learning_rate": 1.0764295422470755e-05,
|
|
"loss": 1.0269638299942017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25050097703933716,
|
|
"step": 809,
|
|
"valid_targets_mean": 14947.8,
|
|
"valid_targets_min": 3223
|
|
},
|
|
{
|
|
"epoch": 3.4477611940298507,
|
|
"grad_norm": 0.09300711665012112,
|
|
"learning_rate": 1.0711610336363477e-05,
|
|
"loss": 1.0450351238250732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2627946734428406,
|
|
"step": 810,
|
|
"valid_targets_mean": 15129.2,
|
|
"valid_targets_min": 5196
|
|
},
|
|
{
|
|
"epoch": 3.4520255863539444,
|
|
"grad_norm": 0.08382352073085601,
|
|
"learning_rate": 1.065900730238961e-05,
|
|
"loss": 0.9718806743621826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24593904614448547,
|
|
"step": 811,
|
|
"valid_targets_mean": 15566.4,
|
|
"valid_targets_min": 1428
|
|
},
|
|
{
|
|
"epoch": 3.4562899786780386,
|
|
"grad_norm": 0.08119503430332328,
|
|
"learning_rate": 1.0606486785235879e-05,
|
|
"loss": 1.0306479930877686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2581614851951599,
|
|
"step": 812,
|
|
"valid_targets_mean": 14889.2,
|
|
"valid_targets_min": 7534
|
|
},
|
|
{
|
|
"epoch": 3.4605543710021323,
|
|
"grad_norm": 0.08028081567325662,
|
|
"learning_rate": 1.0554049248860045e-05,
|
|
"loss": 1.0699286460876465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.257074236869812,
|
|
"step": 813,
|
|
"valid_targets_mean": 13585.2,
|
|
"valid_targets_min": 2276
|
|
},
|
|
{
|
|
"epoch": 3.464818763326226,
|
|
"grad_norm": 0.10569534948008809,
|
|
"learning_rate": 1.0501695156486819e-05,
|
|
"loss": 1.031941294670105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24841266870498657,
|
|
"step": 814,
|
|
"valid_targets_mean": 15114.4,
|
|
"valid_targets_min": 2661
|
|
},
|
|
{
|
|
"epoch": 3.4690831556503197,
|
|
"grad_norm": 0.10269222842321281,
|
|
"learning_rate": 1.0449424970603796e-05,
|
|
"loss": 0.9980067610740662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2390216439962387,
|
|
"step": 815,
|
|
"valid_targets_mean": 14622.7,
|
|
"valid_targets_min": 2132
|
|
},
|
|
{
|
|
"epoch": 3.473347547974414,
|
|
"grad_norm": 0.08769027466424714,
|
|
"learning_rate": 1.0397239152957356e-05,
|
|
"loss": 1.0175775289535522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24682655930519104,
|
|
"step": 816,
|
|
"valid_targets_mean": 14203.2,
|
|
"valid_targets_min": 1247
|
|
},
|
|
{
|
|
"epoch": 3.4776119402985075,
|
|
"grad_norm": 0.09103252026860258,
|
|
"learning_rate": 1.034513816454858e-05,
|
|
"loss": 1.074886441230774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26357603073120117,
|
|
"step": 817,
|
|
"valid_targets_mean": 14293.2,
|
|
"valid_targets_min": 2038
|
|
},
|
|
{
|
|
"epoch": 3.481876332622601,
|
|
"grad_norm": 0.08612886065959924,
|
|
"learning_rate": 1.0293122465629186e-05,
|
|
"loss": 1.001979112625122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2555549740791321,
|
|
"step": 818,
|
|
"valid_targets_mean": 14997.3,
|
|
"valid_targets_min": 3084
|
|
},
|
|
{
|
|
"epoch": 3.486140724946695,
|
|
"grad_norm": 0.08380119684770174,
|
|
"learning_rate": 1.0241192515697432e-05,
|
|
"loss": 1.028665542602539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24578630924224854,
|
|
"step": 819,
|
|
"valid_targets_mean": 14548.8,
|
|
"valid_targets_min": 2986
|
|
},
|
|
{
|
|
"epoch": 3.490405117270789,
|
|
"grad_norm": 0.09646116991438991,
|
|
"learning_rate": 1.0189348773494135e-05,
|
|
"loss": 1.0738844871520996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2945839762687683,
|
|
"step": 820,
|
|
"valid_targets_mean": 15371.6,
|
|
"valid_targets_min": 4414
|
|
},
|
|
{
|
|
"epoch": 3.4946695095948828,
|
|
"grad_norm": 0.08212133510972003,
|
|
"learning_rate": 1.0137591696998514e-05,
|
|
"loss": 1.0327086448669434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2559362053871155,
|
|
"step": 821,
|
|
"valid_targets_mean": 14371.6,
|
|
"valid_targets_min": 1542
|
|
},
|
|
{
|
|
"epoch": 3.4989339019189765,
|
|
"grad_norm": 0.08462013635773709,
|
|
"learning_rate": 1.0085921743424225e-05,
|
|
"loss": 1.0202784538269043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2736544609069824,
|
|
"step": 822,
|
|
"valid_targets_mean": 15118.9,
|
|
"valid_targets_min": 4470
|
|
},
|
|
{
|
|
"epoch": 3.50319829424307,
|
|
"grad_norm": 0.08282828440700495,
|
|
"learning_rate": 1.0034339369215288e-05,
|
|
"loss": 1.0661686658859253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27339330315589905,
|
|
"step": 823,
|
|
"valid_targets_mean": 15402.6,
|
|
"valid_targets_min": 9918
|
|
},
|
|
{
|
|
"epoch": 3.5074626865671643,
|
|
"grad_norm": 0.0939885019575759,
|
|
"learning_rate": 9.982845030042068e-06,
|
|
"loss": 1.0056620836257935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24403563141822815,
|
|
"step": 824,
|
|
"valid_targets_mean": 15011.3,
|
|
"valid_targets_min": 4139
|
|
},
|
|
{
|
|
"epoch": 3.511727078891258,
|
|
"grad_norm": 0.08441867702001134,
|
|
"learning_rate": 9.931439180797237e-06,
|
|
"loss": 1.0503032207489014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2536422610282898,
|
|
"step": 825,
|
|
"valid_targets_mean": 15612.0,
|
|
"valid_targets_min": 9230
|
|
},
|
|
{
|
|
"epoch": 3.5159914712153517,
|
|
"grad_norm": 0.08594513150129277,
|
|
"learning_rate": 9.880122275591752e-06,
|
|
"loss": 0.9856399297714233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2723175883293152,
|
|
"step": 826,
|
|
"valid_targets_mean": 14429.9,
|
|
"valid_targets_min": 3244
|
|
},
|
|
{
|
|
"epoch": 3.520255863539446,
|
|
"grad_norm": 0.09026586508608515,
|
|
"learning_rate": 9.828894767750865e-06,
|
|
"loss": 1.0838544368743896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2742552161216736,
|
|
"step": 827,
|
|
"valid_targets_mean": 15251.6,
|
|
"valid_targets_min": 3087
|
|
},
|
|
{
|
|
"epoch": 3.5245202558635396,
|
|
"grad_norm": 0.07855066068637408,
|
|
"learning_rate": 9.777757109810102e-06,
|
|
"loss": 1.0196110010147095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25420427322387695,
|
|
"step": 828,
|
|
"valid_targets_mean": 14387.2,
|
|
"valid_targets_min": 2297
|
|
},
|
|
{
|
|
"epoch": 3.5287846481876333,
|
|
"grad_norm": 0.08061819139145802,
|
|
"learning_rate": 9.726709753511275e-06,
|
|
"loss": 0.9941138029098511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2532624304294586,
|
|
"step": 829,
|
|
"valid_targets_mean": 15536.9,
|
|
"valid_targets_min": 9734
|
|
},
|
|
{
|
|
"epoch": 3.533049040511727,
|
|
"grad_norm": 0.10392351976244897,
|
|
"learning_rate": 9.675753149798474e-06,
|
|
"loss": 1.0283386707305908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26266804337501526,
|
|
"step": 830,
|
|
"valid_targets_mean": 15026.4,
|
|
"valid_targets_min": 7521
|
|
},
|
|
{
|
|
"epoch": 3.5373134328358207,
|
|
"grad_norm": 0.08752778954012838,
|
|
"learning_rate": 9.624887748814118e-06,
|
|
"loss": 1.0715502500534058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2519451975822449,
|
|
"step": 831,
|
|
"valid_targets_mean": 15512.7,
|
|
"valid_targets_min": 10422
|
|
},
|
|
{
|
|
"epoch": 3.541577825159915,
|
|
"grad_norm": 0.08502642572582692,
|
|
"learning_rate": 9.574113999894909e-06,
|
|
"loss": 1.04764986038208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2436976432800293,
|
|
"step": 832,
|
|
"valid_targets_mean": 14332.5,
|
|
"valid_targets_min": 2296
|
|
},
|
|
{
|
|
"epoch": 3.5458422174840085,
|
|
"grad_norm": 0.08183690163182732,
|
|
"learning_rate": 9.523432351567979e-06,
|
|
"loss": 1.009023904800415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2624768018722534,
|
|
"step": 833,
|
|
"valid_targets_mean": 15468.8,
|
|
"valid_targets_min": 8079
|
|
},
|
|
{
|
|
"epoch": 3.550106609808102,
|
|
"grad_norm": 0.08505381444826653,
|
|
"learning_rate": 9.472843251546792e-06,
|
|
"loss": 1.0535686016082764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24943134188652039,
|
|
"step": 834,
|
|
"valid_targets_mean": 13647.8,
|
|
"valid_targets_min": 2611
|
|
},
|
|
{
|
|
"epoch": 3.5543710021321964,
|
|
"grad_norm": 0.0897777424153501,
|
|
"learning_rate": 9.422347146727294e-06,
|
|
"loss": 1.0463916063308716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25419020652770996,
|
|
"step": 835,
|
|
"valid_targets_mean": 14754.8,
|
|
"valid_targets_min": 1320
|
|
},
|
|
{
|
|
"epoch": 3.55863539445629,
|
|
"grad_norm": 0.08791761119999952,
|
|
"learning_rate": 9.371944483183912e-06,
|
|
"loss": 1.0125620365142822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24228467047214508,
|
|
"step": 836,
|
|
"valid_targets_mean": 14726.8,
|
|
"valid_targets_min": 6332
|
|
},
|
|
{
|
|
"epoch": 3.5628997867803838,
|
|
"grad_norm": 0.07614687201404623,
|
|
"learning_rate": 9.321635706165635e-06,
|
|
"loss": 1.0277087688446045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26706069707870483,
|
|
"step": 837,
|
|
"valid_targets_mean": 15485.4,
|
|
"valid_targets_min": 9379
|
|
},
|
|
{
|
|
"epoch": 3.5671641791044775,
|
|
"grad_norm": 0.0812121227567108,
|
|
"learning_rate": 9.271421260092075e-06,
|
|
"loss": 1.0401611328125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2550191283226013,
|
|
"step": 838,
|
|
"valid_targets_mean": 14509.5,
|
|
"valid_targets_min": 8384
|
|
},
|
|
{
|
|
"epoch": 3.571428571428571,
|
|
"grad_norm": 0.0890663436170254,
|
|
"learning_rate": 9.221301588549519e-06,
|
|
"loss": 0.9943847060203552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2645444869995117,
|
|
"step": 839,
|
|
"valid_targets_mean": 15257.1,
|
|
"valid_targets_min": 9607
|
|
},
|
|
{
|
|
"epoch": 3.5756929637526653,
|
|
"grad_norm": 0.08286455702656072,
|
|
"learning_rate": 9.171277134287057e-06,
|
|
"loss": 1.0006598234176636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24054405093193054,
|
|
"step": 840,
|
|
"valid_targets_mean": 14489.2,
|
|
"valid_targets_min": 1593
|
|
},
|
|
{
|
|
"epoch": 3.579957356076759,
|
|
"grad_norm": 0.1077040295098192,
|
|
"learning_rate": 9.121348339212634e-06,
|
|
"loss": 1.0696783065795898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2620352804660797,
|
|
"step": 841,
|
|
"valid_targets_mean": 15070.0,
|
|
"valid_targets_min": 3025
|
|
},
|
|
{
|
|
"epoch": 3.5842217484008527,
|
|
"grad_norm": 0.07951933388848838,
|
|
"learning_rate": 9.07151564438916e-06,
|
|
"loss": 1.0507144927978516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2834031283855438,
|
|
"step": 842,
|
|
"valid_targets_mean": 14870.1,
|
|
"valid_targets_min": 6984
|
|
},
|
|
{
|
|
"epoch": 3.588486140724947,
|
|
"grad_norm": 0.082957792336547,
|
|
"learning_rate": 9.021779490030611e-06,
|
|
"loss": 1.0006059408187866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23502129316329956,
|
|
"step": 843,
|
|
"valid_targets_mean": 14381.8,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 3.5927505330490406,
|
|
"grad_norm": 0.07715242189716764,
|
|
"learning_rate": 8.972140315498119e-06,
|
|
"loss": 1.0210398435592651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2652220129966736,
|
|
"step": 844,
|
|
"valid_targets_mean": 15549.3,
|
|
"valid_targets_min": 8712
|
|
},
|
|
{
|
|
"epoch": 3.5970149253731343,
|
|
"grad_norm": 0.08284734582275866,
|
|
"learning_rate": 8.922598559296154e-06,
|
|
"loss": 1.041926383972168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2657508850097656,
|
|
"step": 845,
|
|
"valid_targets_mean": 15537.5,
|
|
"valid_targets_min": 10056
|
|
},
|
|
{
|
|
"epoch": 3.6012793176972284,
|
|
"grad_norm": 0.08480649073786375,
|
|
"learning_rate": 8.873154659068582e-06,
|
|
"loss": 0.990920901298523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2244873195886612,
|
|
"step": 846,
|
|
"valid_targets_mean": 14691.8,
|
|
"valid_targets_min": 4286
|
|
},
|
|
{
|
|
"epoch": 3.605543710021322,
|
|
"grad_norm": 0.08549953988143626,
|
|
"learning_rate": 8.823809051594816e-06,
|
|
"loss": 1.0318598747253418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24531647562980652,
|
|
"step": 847,
|
|
"valid_targets_mean": 14132.1,
|
|
"valid_targets_min": 1537
|
|
},
|
|
{
|
|
"epoch": 3.609808102345416,
|
|
"grad_norm": 0.10109613135718828,
|
|
"learning_rate": 8.774562172785988e-06,
|
|
"loss": 1.013717770576477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23869401216506958,
|
|
"step": 848,
|
|
"valid_targets_mean": 14452.1,
|
|
"valid_targets_min": 2491
|
|
},
|
|
{
|
|
"epoch": 3.6140724946695095,
|
|
"grad_norm": 0.08165886038369316,
|
|
"learning_rate": 8.725414457681063e-06,
|
|
"loss": 1.063403844833374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2838260531425476,
|
|
"step": 849,
|
|
"valid_targets_mean": 15697.1,
|
|
"valid_targets_min": 4350
|
|
},
|
|
{
|
|
"epoch": 3.6183368869936032,
|
|
"grad_norm": 0.08435369862288104,
|
|
"learning_rate": 8.676366340443017e-06,
|
|
"loss": 0.9874081611633301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2375505268573761,
|
|
"step": 850,
|
|
"valid_targets_mean": 15112.3,
|
|
"valid_targets_min": 6566
|
|
},
|
|
{
|
|
"epoch": 3.6226012793176974,
|
|
"grad_norm": 0.09404947567739144,
|
|
"learning_rate": 8.627418254355e-06,
|
|
"loss": 1.0434298515319824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26521697640419006,
|
|
"step": 851,
|
|
"valid_targets_mean": 14487.1,
|
|
"valid_targets_min": 1235
|
|
},
|
|
{
|
|
"epoch": 3.626865671641791,
|
|
"grad_norm": 0.08544312834408434,
|
|
"learning_rate": 8.578570631816474e-06,
|
|
"loss": 1.0267388820648193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2539098262786865,
|
|
"step": 852,
|
|
"valid_targets_mean": 14514.5,
|
|
"valid_targets_min": 8625
|
|
},
|
|
{
|
|
"epoch": 3.631130063965885,
|
|
"grad_norm": 0.09434929655794602,
|
|
"learning_rate": 8.529823904339472e-06,
|
|
"loss": 1.0240907669067383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26411551237106323,
|
|
"step": 853,
|
|
"valid_targets_mean": 14144.6,
|
|
"valid_targets_min": 4400
|
|
},
|
|
{
|
|
"epoch": 3.635394456289979,
|
|
"grad_norm": 0.08374456165741265,
|
|
"learning_rate": 8.481178502544684e-06,
|
|
"loss": 1.045487642288208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23568733036518097,
|
|
"step": 854,
|
|
"valid_targets_mean": 14572.8,
|
|
"valid_targets_min": 3510
|
|
},
|
|
{
|
|
"epoch": 3.6396588486140726,
|
|
"grad_norm": 0.10784522513002642,
|
|
"learning_rate": 8.43263485615774e-06,
|
|
"loss": 1.023280143737793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2622443437576294,
|
|
"step": 855,
|
|
"valid_targets_mean": 15251.7,
|
|
"valid_targets_min": 4597
|
|
},
|
|
{
|
|
"epoch": 3.6439232409381663,
|
|
"grad_norm": 0.10854481191429571,
|
|
"learning_rate": 8.384193394005372e-06,
|
|
"loss": 1.0276780128479004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26878824830055237,
|
|
"step": 856,
|
|
"valid_targets_mean": 15089.5,
|
|
"valid_targets_min": 10868
|
|
},
|
|
{
|
|
"epoch": 3.64818763326226,
|
|
"grad_norm": 0.09826703831891268,
|
|
"learning_rate": 8.33585454401161e-06,
|
|
"loss": 1.0379751920700073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24941225349903107,
|
|
"step": 857,
|
|
"valid_targets_mean": 14409.4,
|
|
"valid_targets_min": 6141
|
|
},
|
|
{
|
|
"epoch": 3.6524520255863537,
|
|
"grad_norm": 0.10278878232245207,
|
|
"learning_rate": 8.287618733194073e-06,
|
|
"loss": 1.0222830772399902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25176340341567993,
|
|
"step": 858,
|
|
"valid_targets_mean": 15116.8,
|
|
"valid_targets_min": 4752
|
|
},
|
|
{
|
|
"epoch": 3.656716417910448,
|
|
"grad_norm": 0.1019910802283647,
|
|
"learning_rate": 8.239486387660096e-06,
|
|
"loss": 1.0164682865142822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2575756311416626,
|
|
"step": 859,
|
|
"valid_targets_mean": 15095.1,
|
|
"valid_targets_min": 2327
|
|
},
|
|
{
|
|
"epoch": 3.6609808102345416,
|
|
"grad_norm": 0.09850413329710703,
|
|
"learning_rate": 8.191457932603052e-06,
|
|
"loss": 1.0082552433013916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.266116201877594,
|
|
"step": 860,
|
|
"valid_targets_mean": 14914.2,
|
|
"valid_targets_min": 2351
|
|
},
|
|
{
|
|
"epoch": 3.6652452025586353,
|
|
"grad_norm": 0.11295918923336667,
|
|
"learning_rate": 8.143533792298545e-06,
|
|
"loss": 1.044511318206787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2526169419288635,
|
|
"step": 861,
|
|
"valid_targets_mean": 13972.2,
|
|
"valid_targets_min": 1240
|
|
},
|
|
{
|
|
"epoch": 3.6695095948827294,
|
|
"grad_norm": 0.1049263909360616,
|
|
"learning_rate": 8.095714390100698e-06,
|
|
"loss": 1.027268886566162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27266305685043335,
|
|
"step": 862,
|
|
"valid_targets_mean": 15099.7,
|
|
"valid_targets_min": 5832
|
|
},
|
|
{
|
|
"epoch": 3.673773987206823,
|
|
"grad_norm": 0.0856143507453554,
|
|
"learning_rate": 8.048000148438375e-06,
|
|
"loss": 1.0215270519256592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2595754861831665,
|
|
"step": 863,
|
|
"valid_targets_mean": 14788.7,
|
|
"valid_targets_min": 2120
|
|
},
|
|
{
|
|
"epoch": 3.678038379530917,
|
|
"grad_norm": 0.07908065366604718,
|
|
"learning_rate": 8.000391488811485e-06,
|
|
"loss": 1.0316400527954102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26067250967025757,
|
|
"step": 864,
|
|
"valid_targets_mean": 15144.5,
|
|
"valid_targets_min": 5179
|
|
},
|
|
{
|
|
"epoch": 3.6823027718550105,
|
|
"grad_norm": 0.0834953997636233,
|
|
"learning_rate": 7.952888831787215e-06,
|
|
"loss": 1.0844944715499878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2583216428756714,
|
|
"step": 865,
|
|
"valid_targets_mean": 15116.6,
|
|
"valid_targets_min": 6478
|
|
},
|
|
{
|
|
"epoch": 3.6865671641791042,
|
|
"grad_norm": 0.08378814354500813,
|
|
"learning_rate": 7.905492596996391e-06,
|
|
"loss": 1.0059458017349243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2446044236421585,
|
|
"step": 866,
|
|
"valid_targets_mean": 15007.4,
|
|
"valid_targets_min": 6362
|
|
},
|
|
{
|
|
"epoch": 3.6908315565031984,
|
|
"grad_norm": 0.07999522658287574,
|
|
"learning_rate": 7.858203203129668e-06,
|
|
"loss": 1.0042922496795654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2598060369491577,
|
|
"step": 867,
|
|
"valid_targets_mean": 15510.6,
|
|
"valid_targets_min": 8530
|
|
},
|
|
{
|
|
"epoch": 3.695095948827292,
|
|
"grad_norm": 0.07987464923521465,
|
|
"learning_rate": 7.811021067933919e-06,
|
|
"loss": 1.0510263442993164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2577937841415405,
|
|
"step": 868,
|
|
"valid_targets_mean": 14630.5,
|
|
"valid_targets_min": 2021
|
|
},
|
|
{
|
|
"epoch": 3.699360341151386,
|
|
"grad_norm": 0.07992043014716169,
|
|
"learning_rate": 7.763946608208504e-06,
|
|
"loss": 1.0136570930480957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2394932359457016,
|
|
"step": 869,
|
|
"valid_targets_mean": 14532.7,
|
|
"valid_targets_min": 2449
|
|
},
|
|
{
|
|
"epoch": 3.70362473347548,
|
|
"grad_norm": 0.08927827389107539,
|
|
"learning_rate": 7.716980239801588e-06,
|
|
"loss": 1.0263105630874634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23384422063827515,
|
|
"step": 870,
|
|
"valid_targets_mean": 13523.9,
|
|
"valid_targets_min": 2750
|
|
},
|
|
{
|
|
"epoch": 3.7078891257995736,
|
|
"grad_norm": 0.11056747979307219,
|
|
"learning_rate": 7.670122377606495e-06,
|
|
"loss": 1.0767567157745361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2681053578853607,
|
|
"step": 871,
|
|
"valid_targets_mean": 13881.6,
|
|
"valid_targets_min": 1017
|
|
},
|
|
{
|
|
"epoch": 3.7121535181236673,
|
|
"grad_norm": 0.09470717288374518,
|
|
"learning_rate": 7.623373435557988e-06,
|
|
"loss": 0.9856612682342529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24675241112709045,
|
|
"step": 872,
|
|
"valid_targets_mean": 15218.8,
|
|
"valid_targets_min": 7013
|
|
},
|
|
{
|
|
"epoch": 3.716417910447761,
|
|
"grad_norm": 0.07802928855827182,
|
|
"learning_rate": 7.5767338266286775e-06,
|
|
"loss": 1.035343885421753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2520187497138977,
|
|
"step": 873,
|
|
"valid_targets_mean": 14378.0,
|
|
"valid_targets_min": 5190
|
|
},
|
|
{
|
|
"epoch": 3.7206823027718547,
|
|
"grad_norm": 0.08614787647067453,
|
|
"learning_rate": 7.530203962825331e-06,
|
|
"loss": 1.0437312126159668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22808116674423218,
|
|
"step": 874,
|
|
"valid_targets_mean": 13500.3,
|
|
"valid_targets_min": 2591
|
|
},
|
|
{
|
|
"epoch": 3.724946695095949,
|
|
"grad_norm": 0.0874181178466684,
|
|
"learning_rate": 7.483784255185249e-06,
|
|
"loss": 1.0509166717529297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26214781403541565,
|
|
"step": 875,
|
|
"valid_targets_mean": 15184.8,
|
|
"valid_targets_min": 3965
|
|
},
|
|
{
|
|
"epoch": 3.7292110874200426,
|
|
"grad_norm": 0.08075657614659544,
|
|
"learning_rate": 7.437475113772632e-06,
|
|
"loss": 1.0664618015289307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2666980028152466,
|
|
"step": 876,
|
|
"valid_targets_mean": 15203.5,
|
|
"valid_targets_min": 10335
|
|
},
|
|
{
|
|
"epoch": 3.7334754797441363,
|
|
"grad_norm": 0.07477859358481892,
|
|
"learning_rate": 7.391276947674932e-06,
|
|
"loss": 1.022497534751892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2488812357187271,
|
|
"step": 877,
|
|
"valid_targets_mean": 14774.1,
|
|
"valid_targets_min": 6826
|
|
},
|
|
{
|
|
"epoch": 3.7377398720682304,
|
|
"grad_norm": 0.08084873648482122,
|
|
"learning_rate": 7.345190164999307e-06,
|
|
"loss": 1.0268759727478027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24368500709533691,
|
|
"step": 878,
|
|
"valid_targets_mean": 15164.0,
|
|
"valid_targets_min": 8113
|
|
},
|
|
{
|
|
"epoch": 3.742004264392324,
|
|
"grad_norm": 0.10466196957153916,
|
|
"learning_rate": 7.299215172868947e-06,
|
|
"loss": 1.0474107265472412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2600746154785156,
|
|
"step": 879,
|
|
"valid_targets_mean": 14153.2,
|
|
"valid_targets_min": 1784
|
|
},
|
|
{
|
|
"epoch": 3.746268656716418,
|
|
"grad_norm": 0.08225310002176738,
|
|
"learning_rate": 7.2533523774194865e-06,
|
|
"loss": 0.9963715076446533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2696053385734558,
|
|
"step": 880,
|
|
"valid_targets_mean": 15587.3,
|
|
"valid_targets_min": 12542
|
|
},
|
|
{
|
|
"epoch": 3.750533049040512,
|
|
"grad_norm": 0.0794321576113467,
|
|
"learning_rate": 7.2076021837954616e-06,
|
|
"loss": 1.0367623567581177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2496190220117569,
|
|
"step": 881,
|
|
"valid_targets_mean": 14915.2,
|
|
"valid_targets_min": 1946
|
|
},
|
|
{
|
|
"epoch": 3.7547974413646057,
|
|
"grad_norm": 0.08209307196316702,
|
|
"learning_rate": 7.161964996146689e-06,
|
|
"loss": 1.0878024101257324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26012054085731506,
|
|
"step": 882,
|
|
"valid_targets_mean": 15025.1,
|
|
"valid_targets_min": 4369
|
|
},
|
|
{
|
|
"epoch": 3.7590618336886994,
|
|
"grad_norm": 0.07259322999036513,
|
|
"learning_rate": 7.116441217624708e-06,
|
|
"loss": 1.0414860248565674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27827656269073486,
|
|
"step": 883,
|
|
"valid_targets_mean": 15538.1,
|
|
"valid_targets_min": 9241
|
|
},
|
|
{
|
|
"epoch": 3.763326226012793,
|
|
"grad_norm": 0.08696090704266997,
|
|
"learning_rate": 7.071031250379228e-06,
|
|
"loss": 1.0478577613830566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25591087341308594,
|
|
"step": 884,
|
|
"valid_targets_mean": 14598.1,
|
|
"valid_targets_min": 4809
|
|
},
|
|
{
|
|
"epoch": 3.767590618336887,
|
|
"grad_norm": 0.07693999903967946,
|
|
"learning_rate": 7.0257354955545466e-06,
|
|
"loss": 1.0575886964797974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2571733593940735,
|
|
"step": 885,
|
|
"valid_targets_mean": 15282.5,
|
|
"valid_targets_min": 7892
|
|
},
|
|
{
|
|
"epoch": 3.771855010660981,
|
|
"grad_norm": 0.07565798434991149,
|
|
"learning_rate": 6.980554353286066e-06,
|
|
"loss": 1.0402332544326782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2695630192756653,
|
|
"step": 886,
|
|
"valid_targets_mean": 14452.4,
|
|
"valid_targets_min": 2427
|
|
},
|
|
{
|
|
"epoch": 3.7761194029850746,
|
|
"grad_norm": 0.07909328602982077,
|
|
"learning_rate": 6.935488222696676e-06,
|
|
"loss": 1.0011862516403198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2459937185049057,
|
|
"step": 887,
|
|
"valid_targets_mean": 14569.8,
|
|
"valid_targets_min": 2550
|
|
},
|
|
{
|
|
"epoch": 3.7803837953091683,
|
|
"grad_norm": 0.07818319017123801,
|
|
"learning_rate": 6.890537501893302e-06,
|
|
"loss": 1.0591342449188232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27399516105651855,
|
|
"step": 888,
|
|
"valid_targets_mean": 15348.9,
|
|
"valid_targets_min": 5567
|
|
},
|
|
{
|
|
"epoch": 3.7846481876332625,
|
|
"grad_norm": 0.10386307909675437,
|
|
"learning_rate": 6.845702587963352e-06,
|
|
"loss": 1.0306096076965332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24255593121051788,
|
|
"step": 889,
|
|
"valid_targets_mean": 14806.3,
|
|
"valid_targets_min": 4626
|
|
},
|
|
{
|
|
"epoch": 3.788912579957356,
|
|
"grad_norm": 0.09351565469204826,
|
|
"learning_rate": 6.800983876971192e-06,
|
|
"loss": 1.0489108562469482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2679540812969208,
|
|
"step": 890,
|
|
"valid_targets_mean": 15298.2,
|
|
"valid_targets_min": 4554
|
|
},
|
|
{
|
|
"epoch": 3.79317697228145,
|
|
"grad_norm": 0.08060750467788766,
|
|
"learning_rate": 6.756381763954718e-06,
|
|
"loss": 1.0338560342788696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23931586742401123,
|
|
"step": 891,
|
|
"valid_targets_mean": 14668.2,
|
|
"valid_targets_min": 4243
|
|
},
|
|
{
|
|
"epoch": 3.7974413646055436,
|
|
"grad_norm": 0.09301035975158325,
|
|
"learning_rate": 6.7118966429217645e-06,
|
|
"loss": 1.0454421043395996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24078507721424103,
|
|
"step": 892,
|
|
"valid_targets_mean": 13780.3,
|
|
"valid_targets_min": 2241
|
|
},
|
|
{
|
|
"epoch": 3.8017057569296373,
|
|
"grad_norm": 0.09941275253824729,
|
|
"learning_rate": 6.667528906846714e-06,
|
|
"loss": 1.0338785648345947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26446783542633057,
|
|
"step": 893,
|
|
"valid_targets_mean": 15612.3,
|
|
"valid_targets_min": 8208
|
|
},
|
|
{
|
|
"epoch": 3.8059701492537314,
|
|
"grad_norm": 0.0773705763130956,
|
|
"learning_rate": 6.623278947666974e-06,
|
|
"loss": 1.056181788444519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2693255543708801,
|
|
"step": 894,
|
|
"valid_targets_mean": 14974.3,
|
|
"valid_targets_min": 2241
|
|
},
|
|
{
|
|
"epoch": 3.810234541577825,
|
|
"grad_norm": 0.09469480760796763,
|
|
"learning_rate": 6.579147156279538e-06,
|
|
"loss": 1.0422710180282593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25310590863227844,
|
|
"step": 895,
|
|
"valid_targets_mean": 14745.0,
|
|
"valid_targets_min": 5518
|
|
},
|
|
{
|
|
"epoch": 3.814498933901919,
|
|
"grad_norm": 0.0844441965122201,
|
|
"learning_rate": 6.535133922537513e-06,
|
|
"loss": 1.051576852798462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2646353840827942,
|
|
"step": 896,
|
|
"valid_targets_mean": 15239.8,
|
|
"valid_targets_min": 3569
|
|
},
|
|
{
|
|
"epoch": 3.818763326226013,
|
|
"grad_norm": 0.09791535485186709,
|
|
"learning_rate": 6.491239635246709e-06,
|
|
"loss": 1.0070401430130005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26541510224342346,
|
|
"step": 897,
|
|
"valid_targets_mean": 14494.6,
|
|
"valid_targets_min": 3954
|
|
},
|
|
{
|
|
"epoch": 3.8230277185501067,
|
|
"grad_norm": 0.0804234348689826,
|
|
"learning_rate": 6.447464682162143e-06,
|
|
"loss": 1.0272142887115479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26719045639038086,
|
|
"step": 898,
|
|
"valid_targets_mean": 14542.8,
|
|
"valid_targets_min": 5861
|
|
},
|
|
{
|
|
"epoch": 3.8272921108742004,
|
|
"grad_norm": 0.07719355729984614,
|
|
"learning_rate": 6.403809449984704e-06,
|
|
"loss": 1.005959153175354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24028396606445312,
|
|
"step": 899,
|
|
"valid_targets_mean": 13866.1,
|
|
"valid_targets_min": 2611
|
|
},
|
|
{
|
|
"epoch": 3.831556503198294,
|
|
"grad_norm": 0.09003236822137275,
|
|
"learning_rate": 6.3602743243576405e-06,
|
|
"loss": 1.0521509647369385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2590738534927368,
|
|
"step": 900,
|
|
"valid_targets_mean": 15023.3,
|
|
"valid_targets_min": 2394
|
|
},
|
|
{
|
|
"epoch": 3.835820895522388,
|
|
"grad_norm": 0.07941972186305471,
|
|
"learning_rate": 6.316859689863222e-06,
|
|
"loss": 1.030562400817871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2312874048948288,
|
|
"step": 901,
|
|
"valid_targets_mean": 14708.4,
|
|
"valid_targets_min": 5908
|
|
},
|
|
{
|
|
"epoch": 3.840085287846482,
|
|
"grad_norm": 0.08724123781291919,
|
|
"learning_rate": 6.273565930019316e-06,
|
|
"loss": 1.0391294956207275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24585530161857605,
|
|
"step": 902,
|
|
"valid_targets_mean": 14240.8,
|
|
"valid_targets_min": 1669
|
|
},
|
|
{
|
|
"epoch": 3.8443496801705757,
|
|
"grad_norm": 0.07910350722012889,
|
|
"learning_rate": 6.230393427276e-06,
|
|
"loss": 1.0073344707489014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2446940541267395,
|
|
"step": 903,
|
|
"valid_targets_mean": 15278.1,
|
|
"valid_targets_min": 7088
|
|
},
|
|
{
|
|
"epoch": 3.8486140724946694,
|
|
"grad_norm": 0.0785625433640836,
|
|
"learning_rate": 6.187342563012198e-06,
|
|
"loss": 1.0122864246368408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24303199350833893,
|
|
"step": 904,
|
|
"valid_targets_mean": 15377.3,
|
|
"valid_targets_min": 7820
|
|
},
|
|
{
|
|
"epoch": 3.8528784648187635,
|
|
"grad_norm": 0.0809758203341804,
|
|
"learning_rate": 6.144413717532269e-06,
|
|
"loss": 1.0475292205810547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25803524255752563,
|
|
"step": 905,
|
|
"valid_targets_mean": 14760.0,
|
|
"valid_targets_min": 4632
|
|
},
|
|
{
|
|
"epoch": 3.857142857142857,
|
|
"grad_norm": 0.07554715750922045,
|
|
"learning_rate": 6.1016072700627106e-06,
|
|
"loss": 1.059013843536377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2788810729980469,
|
|
"step": 906,
|
|
"valid_targets_mean": 15474.2,
|
|
"valid_targets_min": 10789
|
|
},
|
|
{
|
|
"epoch": 3.861407249466951,
|
|
"grad_norm": 0.08815171055493336,
|
|
"learning_rate": 6.058923598748756e-06,
|
|
"loss": 1.0405542850494385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2618822455406189,
|
|
"step": 907,
|
|
"valid_targets_mean": 14841.5,
|
|
"valid_targets_min": 5120
|
|
},
|
|
{
|
|
"epoch": 3.8656716417910446,
|
|
"grad_norm": 0.09065135669638827,
|
|
"learning_rate": 6.016363080651066e-06,
|
|
"loss": 1.02406644821167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2756415605545044,
|
|
"step": 908,
|
|
"valid_targets_mean": 15457.6,
|
|
"valid_targets_min": 1412
|
|
},
|
|
{
|
|
"epoch": 3.8699360341151388,
|
|
"grad_norm": 0.06956184698630777,
|
|
"learning_rate": 5.973926091742386e-06,
|
|
"loss": 1.0235509872436523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2693585753440857,
|
|
"step": 909,
|
|
"valid_targets_mean": 15067.2,
|
|
"valid_targets_min": 2737
|
|
},
|
|
{
|
|
"epoch": 3.8742004264392325,
|
|
"grad_norm": 0.08197048416260708,
|
|
"learning_rate": 5.931613006904196e-06,
|
|
"loss": 1.0860520601272583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2711891829967499,
|
|
"step": 910,
|
|
"valid_targets_mean": 14860.2,
|
|
"valid_targets_min": 2074
|
|
},
|
|
{
|
|
"epoch": 3.878464818763326,
|
|
"grad_norm": 0.08495311148379922,
|
|
"learning_rate": 5.889424199923473e-06,
|
|
"loss": 1.0736467838287354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28341278433799744,
|
|
"step": 911,
|
|
"valid_targets_mean": 14724.0,
|
|
"valid_targets_min": 2671
|
|
},
|
|
{
|
|
"epoch": 3.88272921108742,
|
|
"grad_norm": 0.07777898132875226,
|
|
"learning_rate": 5.847360043489318e-06,
|
|
"loss": 1.0422983169555664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2551887631416321,
|
|
"step": 912,
|
|
"valid_targets_mean": 14038.2,
|
|
"valid_targets_min": 4427
|
|
},
|
|
{
|
|
"epoch": 3.886993603411514,
|
|
"grad_norm": 0.07555614702580156,
|
|
"learning_rate": 5.805420909189683e-06,
|
|
"loss": 1.0190186500549316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2498110830783844,
|
|
"step": 913,
|
|
"valid_targets_mean": 13927.7,
|
|
"valid_targets_min": 2133
|
|
},
|
|
{
|
|
"epoch": 3.8912579957356077,
|
|
"grad_norm": 0.07536806197422509,
|
|
"learning_rate": 5.7636071675081076e-06,
|
|
"loss": 0.9859417676925659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23977015912532806,
|
|
"step": 914,
|
|
"valid_targets_mean": 14980.1,
|
|
"valid_targets_min": 4753
|
|
},
|
|
{
|
|
"epoch": 3.8955223880597014,
|
|
"grad_norm": 0.08129778873936544,
|
|
"learning_rate": 5.721919187820431e-06,
|
|
"loss": 1.0162174701690674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.249849334359169,
|
|
"step": 915,
|
|
"valid_targets_mean": 15068.5,
|
|
"valid_targets_min": 1714
|
|
},
|
|
{
|
|
"epoch": 3.8997867803837956,
|
|
"grad_norm": 0.0844611542297401,
|
|
"learning_rate": 5.6803573383915265e-06,
|
|
"loss": 1.0243160724639893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25158846378326416,
|
|
"step": 916,
|
|
"valid_targets_mean": 15044.6,
|
|
"valid_targets_min": 6670
|
|
},
|
|
{
|
|
"epoch": 3.9040511727078893,
|
|
"grad_norm": 0.09715740428020808,
|
|
"learning_rate": 5.638921986372064e-06,
|
|
"loss": 1.0170881748199463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26079803705215454,
|
|
"step": 917,
|
|
"valid_targets_mean": 14739.3,
|
|
"valid_targets_min": 4791
|
|
},
|
|
{
|
|
"epoch": 3.908315565031983,
|
|
"grad_norm": 0.08186217636269322,
|
|
"learning_rate": 5.5976134977952315e-06,
|
|
"loss": 1.0299415588378906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2786436080932617,
|
|
"step": 918,
|
|
"valid_targets_mean": 14845.4,
|
|
"valid_targets_min": 5095
|
|
},
|
|
{
|
|
"epoch": 3.9125799573560767,
|
|
"grad_norm": 0.0807959881624803,
|
|
"learning_rate": 5.556432237573564e-06,
|
|
"loss": 1.0457395315170288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2563976049423218,
|
|
"step": 919,
|
|
"valid_targets_mean": 14489.6,
|
|
"valid_targets_min": 1727
|
|
},
|
|
{
|
|
"epoch": 3.9168443496801704,
|
|
"grad_norm": 0.08968468146141904,
|
|
"learning_rate": 5.5153785694956416e-06,
|
|
"loss": 1.0066556930541992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.255768746137619,
|
|
"step": 920,
|
|
"valid_targets_mean": 14957.5,
|
|
"valid_targets_min": 6027
|
|
},
|
|
{
|
|
"epoch": 3.9211087420042645,
|
|
"grad_norm": 0.08029937904141751,
|
|
"learning_rate": 5.474452856222942e-06,
|
|
"loss": 1.0962214469909668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26948976516723633,
|
|
"step": 921,
|
|
"valid_targets_mean": 14551.8,
|
|
"valid_targets_min": 2854
|
|
},
|
|
{
|
|
"epoch": 3.925373134328358,
|
|
"grad_norm": 0.07610502521435782,
|
|
"learning_rate": 5.433655459286611e-06,
|
|
"loss": 0.9986022710800171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24567736685276031,
|
|
"step": 922,
|
|
"valid_targets_mean": 14516.1,
|
|
"valid_targets_min": 4692
|
|
},
|
|
{
|
|
"epoch": 3.929637526652452,
|
|
"grad_norm": 0.0813681926726865,
|
|
"learning_rate": 5.392986739084238e-06,
|
|
"loss": 1.052161693572998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25333261489868164,
|
|
"step": 923,
|
|
"valid_targets_mean": 14106.4,
|
|
"valid_targets_min": 3663
|
|
},
|
|
{
|
|
"epoch": 3.933901918976546,
|
|
"grad_norm": 0.08172004028488414,
|
|
"learning_rate": 5.352447054876755e-06,
|
|
"loss": 1.0313013792037964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24101045727729797,
|
|
"step": 924,
|
|
"valid_targets_mean": 14219.2,
|
|
"valid_targets_min": 1903
|
|
},
|
|
{
|
|
"epoch": 3.9381663113006398,
|
|
"grad_norm": 0.07506210209930378,
|
|
"learning_rate": 5.31203676478516e-06,
|
|
"loss": 1.0536484718322754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26649099588394165,
|
|
"step": 925,
|
|
"valid_targets_mean": 15425.6,
|
|
"valid_targets_min": 1511
|
|
},
|
|
{
|
|
"epoch": 3.9424307036247335,
|
|
"grad_norm": 0.072633133405953,
|
|
"learning_rate": 5.271756225787434e-06,
|
|
"loss": 1.0264238119125366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.270125150680542,
|
|
"step": 926,
|
|
"valid_targets_mean": 15440.1,
|
|
"valid_targets_min": 6631
|
|
},
|
|
{
|
|
"epoch": 3.946695095948827,
|
|
"grad_norm": 0.08527675393769442,
|
|
"learning_rate": 5.231605793715348e-06,
|
|
"loss": 1.0151232481002808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2394932210445404,
|
|
"step": 927,
|
|
"valid_targets_mean": 15057.4,
|
|
"valid_targets_min": 7393
|
|
},
|
|
{
|
|
"epoch": 3.950959488272921,
|
|
"grad_norm": 0.10557885289457988,
|
|
"learning_rate": 5.191585823251335e-06,
|
|
"loss": 1.0420904159545898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23350343108177185,
|
|
"step": 928,
|
|
"valid_targets_mean": 14315.7,
|
|
"valid_targets_min": 1357
|
|
},
|
|
{
|
|
"epoch": 3.955223880597015,
|
|
"grad_norm": 0.07403391899294842,
|
|
"learning_rate": 5.151696667925348e-06,
|
|
"loss": 1.034696340560913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26972413063049316,
|
|
"step": 929,
|
|
"valid_targets_mean": 14701.4,
|
|
"valid_targets_min": 2446
|
|
},
|
|
{
|
|
"epoch": 3.9594882729211087,
|
|
"grad_norm": 0.07940484753771229,
|
|
"learning_rate": 5.111938680111732e-06,
|
|
"loss": 1.018942952156067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2700170874595642,
|
|
"step": 930,
|
|
"valid_targets_mean": 14576.6,
|
|
"valid_targets_min": 2477
|
|
},
|
|
{
|
|
"epoch": 3.9637526652452024,
|
|
"grad_norm": 0.07412034004160253,
|
|
"learning_rate": 5.072312211026125e-06,
|
|
"loss": 1.0595375299453735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28804612159729004,
|
|
"step": 931,
|
|
"valid_targets_mean": 15064.4,
|
|
"valid_targets_min": 1783
|
|
},
|
|
{
|
|
"epoch": 3.9680170575692966,
|
|
"grad_norm": 0.07385148862505388,
|
|
"learning_rate": 5.032817610722369e-06,
|
|
"loss": 1.0153112411499023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2420693337917328,
|
|
"step": 932,
|
|
"valid_targets_mean": 14734.0,
|
|
"valid_targets_min": 5734
|
|
},
|
|
{
|
|
"epoch": 3.9722814498933903,
|
|
"grad_norm": 0.07857758438062754,
|
|
"learning_rate": 4.993455228089366e-06,
|
|
"loss": 1.0582267045974731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2619363069534302,
|
|
"step": 933,
|
|
"valid_targets_mean": 14335.9,
|
|
"valid_targets_min": 2310
|
|
},
|
|
{
|
|
"epoch": 3.976545842217484,
|
|
"grad_norm": 0.08014441535975936,
|
|
"learning_rate": 4.954225410848048e-06,
|
|
"loss": 1.0665839910507202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26797688007354736,
|
|
"step": 934,
|
|
"valid_targets_mean": 14975.0,
|
|
"valid_targets_min": 6684
|
|
},
|
|
{
|
|
"epoch": 3.9808102345415777,
|
|
"grad_norm": 0.07594610005732107,
|
|
"learning_rate": 4.915128505548284e-06,
|
|
"loss": 1.051414132118225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2612667977809906,
|
|
"step": 935,
|
|
"valid_targets_mean": 14959.4,
|
|
"valid_targets_min": 1648
|
|
},
|
|
{
|
|
"epoch": 3.9850746268656714,
|
|
"grad_norm": 0.07315444416867845,
|
|
"learning_rate": 4.8761648575658145e-06,
|
|
"loss": 1.042731761932373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2497975081205368,
|
|
"step": 936,
|
|
"valid_targets_mean": 14657.7,
|
|
"valid_targets_min": 3326
|
|
},
|
|
{
|
|
"epoch": 3.9893390191897655,
|
|
"grad_norm": 0.08179327051953446,
|
|
"learning_rate": 4.837334811099217e-06,
|
|
"loss": 1.02659010887146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26457613706588745,
|
|
"step": 937,
|
|
"valid_targets_mean": 14456.5,
|
|
"valid_targets_min": 3188
|
|
},
|
|
{
|
|
"epoch": 3.9936034115138592,
|
|
"grad_norm": 0.07583105755160399,
|
|
"learning_rate": 4.7986387091668365e-06,
|
|
"loss": 1.025386095046997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26111188530921936,
|
|
"step": 938,
|
|
"valid_targets_mean": 14776.4,
|
|
"valid_targets_min": 6350
|
|
},
|
|
{
|
|
"epoch": 3.997867803837953,
|
|
"grad_norm": 0.07316451888866704,
|
|
"learning_rate": 4.760076893603791e-06,
|
|
"loss": 1.0235109329223633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25054243206977844,
|
|
"step": 939,
|
|
"valid_targets_mean": 14792.3,
|
|
"valid_targets_min": 4066
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"grad_norm": 0.09665112068128351,
|
|
"learning_rate": 4.721649705058926e-06,
|
|
"loss": 0.9654463529586792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4785127639770508,
|
|
"step": 940,
|
|
"valid_targets_mean": 15362.8,
|
|
"valid_targets_min": 6870
|
|
},
|
|
{
|
|
"epoch": 4.004264392324094,
|
|
"grad_norm": 0.07694955817517563,
|
|
"learning_rate": 4.683357482991819e-06,
|
|
"loss": 1.0603652000427246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28040334582328796,
|
|
"step": 941,
|
|
"valid_targets_mean": 14933.3,
|
|
"valid_targets_min": 2774
|
|
},
|
|
{
|
|
"epoch": 4.008528784648187,
|
|
"grad_norm": 0.08687304347329067,
|
|
"learning_rate": 4.645200565669776e-06,
|
|
"loss": 1.058302640914917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27226993441581726,
|
|
"step": 942,
|
|
"valid_targets_mean": 15328.0,
|
|
"valid_targets_min": 3204
|
|
},
|
|
{
|
|
"epoch": 4.0127931769722816,
|
|
"grad_norm": 0.07633447398504972,
|
|
"learning_rate": 4.607179290164823e-06,
|
|
"loss": 1.0665823221206665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2525603175163269,
|
|
"step": 943,
|
|
"valid_targets_mean": 14727.6,
|
|
"valid_targets_min": 3893
|
|
},
|
|
{
|
|
"epoch": 4.017057569296376,
|
|
"grad_norm": 0.07561892650399228,
|
|
"learning_rate": 4.569293992350783e-06,
|
|
"loss": 0.9884703159332275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23644356429576874,
|
|
"step": 944,
|
|
"valid_targets_mean": 15077.0,
|
|
"valid_targets_min": 3168
|
|
},
|
|
{
|
|
"epoch": 4.021321961620469,
|
|
"grad_norm": 0.07846946482719135,
|
|
"learning_rate": 4.531545006900244e-06,
|
|
"loss": 1.0185267925262451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2440236508846283,
|
|
"step": 945,
|
|
"valid_targets_mean": 14482.2,
|
|
"valid_targets_min": 2348
|
|
},
|
|
{
|
|
"epoch": 4.025586353944563,
|
|
"grad_norm": 0.0710106944898757,
|
|
"learning_rate": 4.493932667281646e-06,
|
|
"loss": 0.9847319722175598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22929999232292175,
|
|
"step": 946,
|
|
"valid_targets_mean": 15876.1,
|
|
"valid_targets_min": 11782
|
|
},
|
|
{
|
|
"epoch": 4.029850746268656,
|
|
"grad_norm": 0.07091824957614329,
|
|
"learning_rate": 4.456457305756321e-06,
|
|
"loss": 0.9619096517562866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2401641607284546,
|
|
"step": 947,
|
|
"valid_targets_mean": 14348.1,
|
|
"valid_targets_min": 1881
|
|
},
|
|
{
|
|
"epoch": 4.0341151385927505,
|
|
"grad_norm": 0.07607293193675675,
|
|
"learning_rate": 4.419119253375557e-06,
|
|
"loss": 1.0721756219863892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2654160261154175,
|
|
"step": 948,
|
|
"valid_targets_mean": 14250.8,
|
|
"valid_targets_min": 2276
|
|
},
|
|
{
|
|
"epoch": 4.038379530916845,
|
|
"grad_norm": 0.07259281785500683,
|
|
"learning_rate": 4.381918839977675e-06,
|
|
"loss": 1.0606753826141357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2531473934650421,
|
|
"step": 949,
|
|
"valid_targets_mean": 14412.8,
|
|
"valid_targets_min": 4632
|
|
},
|
|
{
|
|
"epoch": 4.042643923240938,
|
|
"grad_norm": 0.08118109856185132,
|
|
"learning_rate": 4.344856394185122e-06,
|
|
"loss": 1.0495662689208984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24468651413917542,
|
|
"step": 950,
|
|
"valid_targets_mean": 15159.2,
|
|
"valid_targets_min": 6548
|
|
},
|
|
{
|
|
"epoch": 4.046908315565032,
|
|
"grad_norm": 0.09998418224519554,
|
|
"learning_rate": 4.307932243401538e-06,
|
|
"loss": 1.0190573930740356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24616962671279907,
|
|
"step": 951,
|
|
"valid_targets_mean": 14866.3,
|
|
"valid_targets_min": 1378
|
|
},
|
|
{
|
|
"epoch": 4.051172707889126,
|
|
"grad_norm": 0.09469202139223815,
|
|
"learning_rate": 4.271146713808927e-06,
|
|
"loss": 1.0151586532592773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24089258909225464,
|
|
"step": 952,
|
|
"valid_targets_mean": 14237.8,
|
|
"valid_targets_min": 6752
|
|
},
|
|
{
|
|
"epoch": 4.0554371002132195,
|
|
"grad_norm": 0.07921318660079313,
|
|
"learning_rate": 4.234500130364698e-06,
|
|
"loss": 1.0283575057983398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24566279351711273,
|
|
"step": 953,
|
|
"valid_targets_mean": 13966.0,
|
|
"valid_targets_min": 3102
|
|
},
|
|
{
|
|
"epoch": 4.059701492537314,
|
|
"grad_norm": 0.07964596853684529,
|
|
"learning_rate": 4.197992816798851e-06,
|
|
"loss": 1.0123951435089111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26131612062454224,
|
|
"step": 954,
|
|
"valid_targets_mean": 15317.1,
|
|
"valid_targets_min": 9724
|
|
},
|
|
{
|
|
"epoch": 4.063965884861407,
|
|
"grad_norm": 0.07276343367944109,
|
|
"learning_rate": 4.161625095611101e-06,
|
|
"loss": 1.0430335998535156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2427283227443695,
|
|
"step": 955,
|
|
"valid_targets_mean": 13846.8,
|
|
"valid_targets_min": 1988
|
|
},
|
|
{
|
|
"epoch": 4.068230277185501,
|
|
"grad_norm": 0.08289405414794294,
|
|
"learning_rate": 4.125397288068007e-06,
|
|
"loss": 0.9959665536880493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26747429370880127,
|
|
"step": 956,
|
|
"valid_targets_mean": 15741.6,
|
|
"valid_targets_min": 9550
|
|
},
|
|
{
|
|
"epoch": 4.072494669509595,
|
|
"grad_norm": 0.0941427414212429,
|
|
"learning_rate": 4.089309714200187e-06,
|
|
"loss": 1.0306720733642578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24888575077056885,
|
|
"step": 957,
|
|
"valid_targets_mean": 14479.0,
|
|
"valid_targets_min": 4645
|
|
},
|
|
{
|
|
"epoch": 4.076759061833688,
|
|
"grad_norm": 0.08024261726161806,
|
|
"learning_rate": 4.0533626927994185e-06,
|
|
"loss": 1.010699987411499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2400101125240326,
|
|
"step": 958,
|
|
"valid_targets_mean": 14365.2,
|
|
"valid_targets_min": 1469
|
|
},
|
|
{
|
|
"epoch": 4.081023454157783,
|
|
"grad_norm": 0.08383657437700691,
|
|
"learning_rate": 4.017556541415888e-06,
|
|
"loss": 1.0223714113235474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25305095314979553,
|
|
"step": 959,
|
|
"valid_targets_mean": 14697.6,
|
|
"valid_targets_min": 4543
|
|
},
|
|
{
|
|
"epoch": 4.085287846481877,
|
|
"grad_norm": 0.06929096722852933,
|
|
"learning_rate": 3.981891576355352e-06,
|
|
"loss": 1.0068535804748535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26727986335754395,
|
|
"step": 960,
|
|
"valid_targets_mean": 15997.8,
|
|
"valid_targets_min": 11888
|
|
},
|
|
{
|
|
"epoch": 4.08955223880597,
|
|
"grad_norm": 0.07531370708433341,
|
|
"learning_rate": 3.946368112676346e-06,
|
|
"loss": 0.9880790710449219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24824348092079163,
|
|
"step": 961,
|
|
"valid_targets_mean": 14625.4,
|
|
"valid_targets_min": 5805
|
|
},
|
|
{
|
|
"epoch": 4.093816631130064,
|
|
"grad_norm": 0.10162148816151736,
|
|
"learning_rate": 3.9109864641874166e-06,
|
|
"loss": 1.04616379737854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26773929595947266,
|
|
"step": 962,
|
|
"valid_targets_mean": 15228.8,
|
|
"valid_targets_min": 10158
|
|
},
|
|
{
|
|
"epoch": 4.098081023454157,
|
|
"grad_norm": 0.06935177859830995,
|
|
"learning_rate": 3.875746943444316e-06,
|
|
"loss": 1.014711618423462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2293023318052292,
|
|
"step": 963,
|
|
"valid_targets_mean": 14586.9,
|
|
"valid_targets_min": 1683
|
|
},
|
|
{
|
|
"epoch": 4.1023454157782515,
|
|
"grad_norm": 0.07230353147219123,
|
|
"learning_rate": 3.840649861747278e-06,
|
|
"loss": 1.0353587865829468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2691650986671448,
|
|
"step": 964,
|
|
"valid_targets_mean": 15218.3,
|
|
"valid_targets_min": 5957
|
|
},
|
|
{
|
|
"epoch": 4.106609808102346,
|
|
"grad_norm": 0.08871474277209189,
|
|
"learning_rate": 3.8056955291382667e-06,
|
|
"loss": 1.0306223630905151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23993517458438873,
|
|
"step": 965,
|
|
"valid_targets_mean": 14445.4,
|
|
"valid_targets_min": 3093
|
|
},
|
|
{
|
|
"epoch": 4.110874200426439,
|
|
"grad_norm": 0.07560960717340694,
|
|
"learning_rate": 3.7708842543981928e-06,
|
|
"loss": 1.0723929405212402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23837114870548248,
|
|
"step": 966,
|
|
"valid_targets_mean": 14178.5,
|
|
"valid_targets_min": 3837
|
|
},
|
|
{
|
|
"epoch": 4.115138592750533,
|
|
"grad_norm": 0.07000342089971023,
|
|
"learning_rate": 3.736216345044237e-06,
|
|
"loss": 1.0193781852722168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2601016163825989,
|
|
"step": 967,
|
|
"valid_targets_mean": 15474.5,
|
|
"valid_targets_min": 10675
|
|
},
|
|
{
|
|
"epoch": 4.119402985074627,
|
|
"grad_norm": 0.07589849526555094,
|
|
"learning_rate": 3.7016921073271084e-06,
|
|
"loss": 1.0299336910247803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26432567834854126,
|
|
"step": 968,
|
|
"valid_targets_mean": 14921.3,
|
|
"valid_targets_min": 5478
|
|
},
|
|
{
|
|
"epoch": 4.1236673773987205,
|
|
"grad_norm": 0.07763151155765381,
|
|
"learning_rate": 3.6673118462283453e-06,
|
|
"loss": 1.0336275100708008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24046000838279724,
|
|
"step": 969,
|
|
"valid_targets_mean": 14821.7,
|
|
"valid_targets_min": 7388
|
|
},
|
|
{
|
|
"epoch": 4.127931769722815,
|
|
"grad_norm": 0.08528106397009493,
|
|
"learning_rate": 3.6330758654576227e-06,
|
|
"loss": 1.0443201065063477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25907987356185913,
|
|
"step": 970,
|
|
"valid_targets_mean": 15185.0,
|
|
"valid_targets_min": 5704
|
|
},
|
|
{
|
|
"epoch": 4.132196162046908,
|
|
"grad_norm": 0.07761338166852766,
|
|
"learning_rate": 3.598984467450055e-06,
|
|
"loss": 0.9852661490440369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26297616958618164,
|
|
"step": 971,
|
|
"valid_targets_mean": 15349.3,
|
|
"valid_targets_min": 8002
|
|
},
|
|
{
|
|
"epoch": 4.136460554371002,
|
|
"grad_norm": 0.08457441832162528,
|
|
"learning_rate": 3.565037953363546e-06,
|
|
"loss": 1.0084260702133179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2558494210243225,
|
|
"step": 972,
|
|
"valid_targets_mean": 15253.0,
|
|
"valid_targets_min": 4456
|
|
},
|
|
{
|
|
"epoch": 4.140724946695096,
|
|
"grad_norm": 0.09004092446913044,
|
|
"learning_rate": 3.5312366230761154e-06,
|
|
"loss": 0.9975166320800781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25923553109169006,
|
|
"step": 973,
|
|
"valid_targets_mean": 14746.0,
|
|
"valid_targets_min": 2591
|
|
},
|
|
{
|
|
"epoch": 4.144989339019189,
|
|
"grad_norm": 0.07987252291433689,
|
|
"learning_rate": 3.497580775183258e-06,
|
|
"loss": 1.007072925567627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22519609332084656,
|
|
"step": 974,
|
|
"valid_targets_mean": 14035.6,
|
|
"valid_targets_min": 3378
|
|
},
|
|
{
|
|
"epoch": 4.149253731343284,
|
|
"grad_norm": 0.07537962159081893,
|
|
"learning_rate": 3.464070706995295e-06,
|
|
"loss": 1.0661035776138306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26262167096138,
|
|
"step": 975,
|
|
"valid_targets_mean": 15155.7,
|
|
"valid_targets_min": 2804
|
|
},
|
|
{
|
|
"epoch": 4.153518123667378,
|
|
"grad_norm": 0.0770008882437443,
|
|
"learning_rate": 3.4307067145347417e-06,
|
|
"loss": 1.0307209491729736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2410483956336975,
|
|
"step": 976,
|
|
"valid_targets_mean": 14065.8,
|
|
"valid_targets_min": 2854
|
|
},
|
|
{
|
|
"epoch": 4.157782515991471,
|
|
"grad_norm": 0.08222489461818047,
|
|
"learning_rate": 3.397489092533739e-06,
|
|
"loss": 0.9927602410316467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24605350196361542,
|
|
"step": 977,
|
|
"valid_targets_mean": 15237.8,
|
|
"valid_targets_min": 4359
|
|
},
|
|
{
|
|
"epoch": 4.162046908315565,
|
|
"grad_norm": 0.07693172808089653,
|
|
"learning_rate": 3.364418134431371e-06,
|
|
"loss": 1.0070375204086304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2607457935810089,
|
|
"step": 978,
|
|
"valid_targets_mean": 15019.0,
|
|
"valid_targets_min": 6113
|
|
},
|
|
{
|
|
"epoch": 4.166311300639659,
|
|
"grad_norm": 0.07609038645825848,
|
|
"learning_rate": 3.331494132371149e-06,
|
|
"loss": 1.067734956741333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2723226845264435,
|
|
"step": 979,
|
|
"valid_targets_mean": 14850.1,
|
|
"valid_targets_min": 6481
|
|
},
|
|
{
|
|
"epoch": 4.1705756929637525,
|
|
"grad_norm": 0.09036706056283246,
|
|
"learning_rate": 3.2987173771983816e-06,
|
|
"loss": 1.0353126525878906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2645860016345978,
|
|
"step": 980,
|
|
"valid_targets_mean": 13865.8,
|
|
"valid_targets_min": 1701
|
|
},
|
|
{
|
|
"epoch": 4.174840085287847,
|
|
"grad_norm": 0.07444923273861716,
|
|
"learning_rate": 3.266088158457634e-06,
|
|
"loss": 1.0469627380371094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24626043438911438,
|
|
"step": 981,
|
|
"valid_targets_mean": 15693.0,
|
|
"valid_targets_min": 1823
|
|
},
|
|
{
|
|
"epoch": 4.17910447761194,
|
|
"grad_norm": 0.08060051372433151,
|
|
"learning_rate": 3.233606764390147e-06,
|
|
"loss": 1.0352073907852173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27150383591651917,
|
|
"step": 982,
|
|
"valid_targets_mean": 14859.0,
|
|
"valid_targets_min": 2521
|
|
},
|
|
{
|
|
"epoch": 4.183368869936034,
|
|
"grad_norm": 0.07177347163427206,
|
|
"learning_rate": 3.2012734819313127e-06,
|
|
"loss": 1.0209245681762695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2500031590461731,
|
|
"step": 983,
|
|
"valid_targets_mean": 14918.2,
|
|
"valid_targets_min": 2570
|
|
},
|
|
{
|
|
"epoch": 4.187633262260128,
|
|
"grad_norm": 0.07271972399433943,
|
|
"learning_rate": 3.1690885967081187e-06,
|
|
"loss": 1.0066120624542236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24489077925682068,
|
|
"step": 984,
|
|
"valid_targets_mean": 14431.8,
|
|
"valid_targets_min": 4504
|
|
},
|
|
{
|
|
"epoch": 4.1918976545842215,
|
|
"grad_norm": 0.07720189666214854,
|
|
"learning_rate": 3.1370523930366393e-06,
|
|
"loss": 1.0655772686004639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29166609048843384,
|
|
"step": 985,
|
|
"valid_targets_mean": 15563.3,
|
|
"valid_targets_min": 8619
|
|
},
|
|
{
|
|
"epoch": 4.196162046908316,
|
|
"grad_norm": 0.07770212977606279,
|
|
"learning_rate": 3.105165153919525e-06,
|
|
"loss": 1.0116127729415894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2627195715904236,
|
|
"step": 986,
|
|
"valid_targets_mean": 14734.6,
|
|
"valid_targets_min": 8192
|
|
},
|
|
{
|
|
"epoch": 4.20042643923241,
|
|
"grad_norm": 0.07651083979721754,
|
|
"learning_rate": 3.073427161043492e-06,
|
|
"loss": 1.0490927696228027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2582077085971832,
|
|
"step": 987,
|
|
"valid_targets_mean": 14594.6,
|
|
"valid_targets_min": 1921
|
|
},
|
|
{
|
|
"epoch": 4.204690831556503,
|
|
"grad_norm": 0.07640785916466152,
|
|
"learning_rate": 3.0418386947768463e-06,
|
|
"loss": 1.0269312858581543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27077004313468933,
|
|
"step": 988,
|
|
"valid_targets_mean": 15284.5,
|
|
"valid_targets_min": 1025
|
|
},
|
|
{
|
|
"epoch": 4.208955223880597,
|
|
"grad_norm": 0.073572451297806,
|
|
"learning_rate": 3.01040003416698e-06,
|
|
"loss": 1.0495110750198364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2981879413127899,
|
|
"step": 989,
|
|
"valid_targets_mean": 15558.4,
|
|
"valid_targets_min": 12694
|
|
},
|
|
{
|
|
"epoch": 4.21321961620469,
|
|
"grad_norm": 0.06993818307990839,
|
|
"learning_rate": 2.97911145693796e-06,
|
|
"loss": 1.0402381420135498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27741050720214844,
|
|
"step": 990,
|
|
"valid_targets_mean": 14987.6,
|
|
"valid_targets_min": 2914
|
|
},
|
|
{
|
|
"epoch": 4.217484008528785,
|
|
"grad_norm": 0.07329490772794882,
|
|
"learning_rate": 2.947973239488009e-06,
|
|
"loss": 1.037589192390442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2652382552623749,
|
|
"step": 991,
|
|
"valid_targets_mean": 14648.6,
|
|
"valid_targets_min": 2540
|
|
},
|
|
{
|
|
"epoch": 4.221748400852879,
|
|
"grad_norm": 0.087682726264505,
|
|
"learning_rate": 2.91698565688711e-06,
|
|
"loss": 1.0166399478912354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27021846175193787,
|
|
"step": 992,
|
|
"valid_targets_mean": 14552.6,
|
|
"valid_targets_min": 1542
|
|
},
|
|
{
|
|
"epoch": 4.226012793176972,
|
|
"grad_norm": 0.07109782672383178,
|
|
"learning_rate": 2.886148982874566e-06,
|
|
"loss": 0.9839938879013062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26262885332107544,
|
|
"step": 993,
|
|
"valid_targets_mean": 15046.0,
|
|
"valid_targets_min": 5766
|
|
},
|
|
{
|
|
"epoch": 4.230277185501066,
|
|
"grad_norm": 0.07017360486346118,
|
|
"learning_rate": 2.8554634898565668e-06,
|
|
"loss": 1.027765154838562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24965378642082214,
|
|
"step": 994,
|
|
"valid_targets_mean": 14635.0,
|
|
"valid_targets_min": 2453
|
|
},
|
|
{
|
|
"epoch": 4.23454157782516,
|
|
"grad_norm": 0.06970586433829537,
|
|
"learning_rate": 2.824929448903806e-06,
|
|
"loss": 1.053523063659668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24483788013458252,
|
|
"step": 995,
|
|
"valid_targets_mean": 15306.7,
|
|
"valid_targets_min": 4422
|
|
},
|
|
{
|
|
"epoch": 4.2388059701492535,
|
|
"grad_norm": 0.07184274637231955,
|
|
"learning_rate": 2.794547129749059e-06,
|
|
"loss": 0.9990878105163574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24802438914775848,
|
|
"step": 996,
|
|
"valid_targets_mean": 15407.0,
|
|
"valid_targets_min": 4412
|
|
},
|
|
{
|
|
"epoch": 4.243070362473348,
|
|
"grad_norm": 0.06925109863912099,
|
|
"learning_rate": 2.7643168007848255e-06,
|
|
"loss": 1.0482103824615479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25303930044174194,
|
|
"step": 997,
|
|
"valid_targets_mean": 13620.5,
|
|
"valid_targets_min": 1435
|
|
},
|
|
{
|
|
"epoch": 4.247334754797441,
|
|
"grad_norm": 0.06735821822689972,
|
|
"learning_rate": 2.734238729060956e-06,
|
|
"loss": 0.9925441741943359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2445787787437439,
|
|
"step": 998,
|
|
"valid_targets_mean": 14932.4,
|
|
"valid_targets_min": 6850
|
|
},
|
|
{
|
|
"epoch": 4.251599147121535,
|
|
"grad_norm": 0.07138994852587088,
|
|
"learning_rate": 2.7043131802822653e-06,
|
|
"loss": 0.9845558404922485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24605567753314972,
|
|
"step": 999,
|
|
"valid_targets_mean": 14351.1,
|
|
"valid_targets_min": 1464
|
|
},
|
|
{
|
|
"epoch": 4.255863539445629,
|
|
"grad_norm": 0.06786370583115167,
|
|
"learning_rate": 2.674540418806222e-06,
|
|
"loss": 1.0410676002502441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24401737749576569,
|
|
"step": 1000,
|
|
"valid_targets_mean": 13642.3,
|
|
"valid_targets_min": 1974
|
|
},
|
|
{
|
|
"epoch": 4.2601279317697225,
|
|
"grad_norm": 0.07517027725363455,
|
|
"learning_rate": 2.6449207076405857e-06,
|
|
"loss": 1.0475417375564575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2740405797958374,
|
|
"step": 1001,
|
|
"valid_targets_mean": 14205.2,
|
|
"valid_targets_min": 3655
|
|
},
|
|
{
|
|
"epoch": 4.264392324093817,
|
|
"grad_norm": 0.0719817351889638,
|
|
"learning_rate": 2.6154543084411035e-06,
|
|
"loss": 1.0560872554779053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24943794310092926,
|
|
"step": 1002,
|
|
"valid_targets_mean": 14085.6,
|
|
"valid_targets_min": 3241
|
|
},
|
|
{
|
|
"epoch": 4.268656716417911,
|
|
"grad_norm": 0.07320803777615464,
|
|
"learning_rate": 2.5861414815091834e-06,
|
|
"loss": 1.025554895401001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2566283941268921,
|
|
"step": 1003,
|
|
"valid_targets_mean": 14465.6,
|
|
"valid_targets_min": 3571
|
|
},
|
|
{
|
|
"epoch": 4.272921108742004,
|
|
"grad_norm": 0.07273708392360496,
|
|
"learning_rate": 2.5569824857895987e-06,
|
|
"loss": 1.0195801258087158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2482813149690628,
|
|
"step": 1004,
|
|
"valid_targets_mean": 15730.6,
|
|
"valid_targets_min": 8771
|
|
},
|
|
{
|
|
"epoch": 4.277185501066098,
|
|
"grad_norm": 0.07326459178916632,
|
|
"learning_rate": 2.5279775788682083e-06,
|
|
"loss": 1.0226316452026367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24337130784988403,
|
|
"step": 1005,
|
|
"valid_targets_mean": 14080.0,
|
|
"valid_targets_min": 1232
|
|
},
|
|
{
|
|
"epoch": 4.281449893390192,
|
|
"grad_norm": 0.07572586528562553,
|
|
"learning_rate": 2.499127016969671e-06,
|
|
"loss": 1.0520446300506592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2823803722858429,
|
|
"step": 1006,
|
|
"valid_targets_mean": 15276.1,
|
|
"valid_targets_min": 7310
|
|
},
|
|
{
|
|
"epoch": 4.285714285714286,
|
|
"grad_norm": 0.0693548817022254,
|
|
"learning_rate": 2.4704310549551934e-06,
|
|
"loss": 1.0366793870925903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26049014925956726,
|
|
"step": 1007,
|
|
"valid_targets_mean": 15119.7,
|
|
"valid_targets_min": 2743
|
|
},
|
|
{
|
|
"epoch": 4.28997867803838,
|
|
"grad_norm": 0.07494014315230327,
|
|
"learning_rate": 2.441889946320266e-06,
|
|
"loss": 1.0158147811889648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23623259365558624,
|
|
"step": 1008,
|
|
"valid_targets_mean": 14626.1,
|
|
"valid_targets_min": 828
|
|
},
|
|
{
|
|
"epoch": 4.294243070362473,
|
|
"grad_norm": 0.07200429185575995,
|
|
"learning_rate": 2.4135039431924233e-06,
|
|
"loss": 1.0710257291793823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2550296187400818,
|
|
"step": 1009,
|
|
"valid_targets_mean": 14658.4,
|
|
"valid_targets_min": 2366
|
|
},
|
|
{
|
|
"epoch": 4.298507462686567,
|
|
"grad_norm": 0.07758205609112181,
|
|
"learning_rate": 2.3852732963290426e-06,
|
|
"loss": 1.0198653936386108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25936928391456604,
|
|
"step": 1010,
|
|
"valid_targets_mean": 15914.5,
|
|
"valid_targets_min": 11866
|
|
},
|
|
{
|
|
"epoch": 4.302771855010661,
|
|
"grad_norm": 0.0780502400085244,
|
|
"learning_rate": 2.3571982551150853e-06,
|
|
"loss": 1.025866150856018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24136891961097717,
|
|
"step": 1011,
|
|
"valid_targets_mean": 15073.8,
|
|
"valid_targets_min": 1974
|
|
},
|
|
{
|
|
"epoch": 4.3070362473347545,
|
|
"grad_norm": 0.07807756375486362,
|
|
"learning_rate": 2.329279067560937e-06,
|
|
"loss": 1.004814624786377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24564941227436066,
|
|
"step": 1012,
|
|
"valid_targets_mean": 15867.1,
|
|
"valid_targets_min": 11603
|
|
},
|
|
{
|
|
"epoch": 4.311300639658849,
|
|
"grad_norm": 0.06614656614991912,
|
|
"learning_rate": 2.301515980300182e-06,
|
|
"loss": 1.053879976272583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25577622652053833,
|
|
"step": 1013,
|
|
"valid_targets_mean": 15025.0,
|
|
"valid_targets_min": 4192
|
|
},
|
|
{
|
|
"epoch": 4.315565031982943,
|
|
"grad_norm": 0.06535902664908347,
|
|
"learning_rate": 2.2739092385874527e-06,
|
|
"loss": 1.0165059566497803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25913357734680176,
|
|
"step": 1014,
|
|
"valid_targets_mean": 15433.1,
|
|
"valid_targets_min": 10715
|
|
},
|
|
{
|
|
"epoch": 4.319829424307036,
|
|
"grad_norm": 0.07002332106979323,
|
|
"learning_rate": 2.2464590862962443e-06,
|
|
"loss": 1.05423903465271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26438140869140625,
|
|
"step": 1015,
|
|
"valid_targets_mean": 15712.1,
|
|
"valid_targets_min": 12197
|
|
},
|
|
{
|
|
"epoch": 4.32409381663113,
|
|
"grad_norm": 0.08196991753288417,
|
|
"learning_rate": 2.219165765916769e-06,
|
|
"loss": 1.026976466178894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26024115085601807,
|
|
"step": 1016,
|
|
"valid_targets_mean": 14208.7,
|
|
"valid_targets_min": 5468
|
|
},
|
|
{
|
|
"epoch": 4.3283582089552235,
|
|
"grad_norm": 0.07318657487755899,
|
|
"learning_rate": 2.192029518553798e-06,
|
|
"loss": 0.9968132972717285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2568413317203522,
|
|
"step": 1017,
|
|
"valid_targets_mean": 14789.1,
|
|
"valid_targets_min": 2464
|
|
},
|
|
{
|
|
"epoch": 4.332622601279318,
|
|
"grad_norm": 0.06858060825507008,
|
|
"learning_rate": 2.165050583924566e-06,
|
|
"loss": 1.0049998760223389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25639671087265015,
|
|
"step": 1018,
|
|
"valid_targets_mean": 14663.2,
|
|
"valid_targets_min": 1783
|
|
},
|
|
{
|
|
"epoch": 4.336886993603412,
|
|
"grad_norm": 0.07411521045428578,
|
|
"learning_rate": 2.1382292003566163e-06,
|
|
"loss": 1.0314619541168213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2556789517402649,
|
|
"step": 1019,
|
|
"valid_targets_mean": 14624.2,
|
|
"valid_targets_min": 3025
|
|
},
|
|
{
|
|
"epoch": 4.341151385927505,
|
|
"grad_norm": 0.06511150425437823,
|
|
"learning_rate": 2.1115656047857213e-06,
|
|
"loss": 0.9586358070373535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23440968990325928,
|
|
"step": 1020,
|
|
"valid_targets_mean": 14541.2,
|
|
"valid_targets_min": 6528
|
|
},
|
|
{
|
|
"epoch": 4.345415778251599,
|
|
"grad_norm": 0.06719230934513776,
|
|
"learning_rate": 2.0850600327537806e-06,
|
|
"loss": 1.0058658123016357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24526192247867584,
|
|
"step": 1021,
|
|
"valid_targets_mean": 13832.8,
|
|
"valid_targets_min": 1874
|
|
},
|
|
{
|
|
"epoch": 4.349680170575693,
|
|
"grad_norm": 0.07113438156818028,
|
|
"learning_rate": 2.058712718406719e-06,
|
|
"loss": 1.0557451248168945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25755804777145386,
|
|
"step": 1022,
|
|
"valid_targets_mean": 14744.3,
|
|
"valid_targets_min": 3343
|
|
},
|
|
{
|
|
"epoch": 4.353944562899787,
|
|
"grad_norm": 0.06669766102073219,
|
|
"learning_rate": 2.032523894492471e-06,
|
|
"loss": 1.0497808456420898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2627623975276947,
|
|
"step": 1023,
|
|
"valid_targets_mean": 15213.4,
|
|
"valid_targets_min": 1537
|
|
},
|
|
{
|
|
"epoch": 4.358208955223881,
|
|
"grad_norm": 0.07607320541794989,
|
|
"learning_rate": 2.0064937923588634e-06,
|
|
"loss": 1.0534238815307617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2619283199310303,
|
|
"step": 1024,
|
|
"valid_targets_mean": 14656.3,
|
|
"valid_targets_min": 5183
|
|
},
|
|
{
|
|
"epoch": 4.362473347547974,
|
|
"grad_norm": 0.08119606687645467,
|
|
"learning_rate": 1.9806226419516195e-06,
|
|
"loss": 1.0498461723327637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26316332817077637,
|
|
"step": 1025,
|
|
"valid_targets_mean": 14722.5,
|
|
"valid_targets_min": 5331
|
|
},
|
|
{
|
|
"epoch": 4.366737739872068,
|
|
"grad_norm": 0.06862596308820929,
|
|
"learning_rate": 1.954910671812298e-06,
|
|
"loss": 1.0143101215362549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2361828088760376,
|
|
"step": 1026,
|
|
"valid_targets_mean": 14906.2,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 4.371002132196162,
|
|
"grad_norm": 0.07326739032031071,
|
|
"learning_rate": 1.9293581090762894e-06,
|
|
"loss": 1.0275071859359741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28308725357055664,
|
|
"step": 1027,
|
|
"valid_targets_mean": 14888.9,
|
|
"valid_targets_min": 6743
|
|
},
|
|
{
|
|
"epoch": 4.3752665245202556,
|
|
"grad_norm": 0.06863868612680125,
|
|
"learning_rate": 1.9039651794708058e-06,
|
|
"loss": 0.9960910081863403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24030712246894836,
|
|
"step": 1028,
|
|
"valid_targets_mean": 14711.3,
|
|
"valid_targets_min": 4340
|
|
},
|
|
{
|
|
"epoch": 4.37953091684435,
|
|
"grad_norm": 0.07549350149667627,
|
|
"learning_rate": 1.8787321073128817e-06,
|
|
"loss": 1.0455996990203857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2824949026107788,
|
|
"step": 1029,
|
|
"valid_targets_mean": 15550.6,
|
|
"valid_targets_min": 12301
|
|
},
|
|
{
|
|
"epoch": 4.383795309168444,
|
|
"grad_norm": 0.07038637724183405,
|
|
"learning_rate": 1.8536591155073958e-06,
|
|
"loss": 1.0913045406341553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2597290277481079,
|
|
"step": 1030,
|
|
"valid_targets_mean": 14563.6,
|
|
"valid_targets_min": 2697
|
|
},
|
|
{
|
|
"epoch": 4.388059701492537,
|
|
"grad_norm": 0.07162871574818422,
|
|
"learning_rate": 1.8287464255451181e-06,
|
|
"loss": 1.0274858474731445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24804572761058807,
|
|
"step": 1031,
|
|
"valid_targets_mean": 14384.5,
|
|
"valid_targets_min": 5063
|
|
},
|
|
{
|
|
"epoch": 4.392324093816631,
|
|
"grad_norm": 0.07930045599964562,
|
|
"learning_rate": 1.803994257500714e-06,
|
|
"loss": 1.024867057800293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25626641511917114,
|
|
"step": 1032,
|
|
"valid_targets_mean": 14941.4,
|
|
"valid_targets_min": 2972
|
|
},
|
|
{
|
|
"epoch": 4.396588486140725,
|
|
"grad_norm": 0.07200737738660623,
|
|
"learning_rate": 1.7794028300308474e-06,
|
|
"loss": 1.0426933765411377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2666964530944824,
|
|
"step": 1033,
|
|
"valid_targets_mean": 14783.8,
|
|
"valid_targets_min": 1582
|
|
},
|
|
{
|
|
"epoch": 4.400852878464819,
|
|
"grad_norm": 0.06776972771605225,
|
|
"learning_rate": 1.7549723603722003e-06,
|
|
"loss": 1.026928424835205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2570304870605469,
|
|
"step": 1034,
|
|
"valid_targets_mean": 15220.8,
|
|
"valid_targets_min": 5119
|
|
},
|
|
{
|
|
"epoch": 4.405117270788913,
|
|
"grad_norm": 0.06979431930494794,
|
|
"learning_rate": 1.730703064339605e-06,
|
|
"loss": 1.0423942804336548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26676464080810547,
|
|
"step": 1035,
|
|
"valid_targets_mean": 15308.4,
|
|
"valid_targets_min": 7131
|
|
},
|
|
{
|
|
"epoch": 4.409381663113006,
|
|
"grad_norm": 0.07193218377728709,
|
|
"learning_rate": 1.7065951563241022e-06,
|
|
"loss": 0.9990702271461487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2415003776550293,
|
|
"step": 1036,
|
|
"valid_targets_mean": 14650.9,
|
|
"valid_targets_min": 2484
|
|
},
|
|
{
|
|
"epoch": 4.4136460554371,
|
|
"grad_norm": 0.06619207075683764,
|
|
"learning_rate": 1.682648849291051e-06,
|
|
"loss": 1.0610606670379639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25422343611717224,
|
|
"step": 1037,
|
|
"valid_targets_mean": 15126.1,
|
|
"valid_targets_min": 5196
|
|
},
|
|
{
|
|
"epoch": 4.417910447761194,
|
|
"grad_norm": 0.07334265144582895,
|
|
"learning_rate": 1.6588643547782579e-06,
|
|
"loss": 1.0670053958892822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2618800401687622,
|
|
"step": 1038,
|
|
"valid_targets_mean": 15004.0,
|
|
"valid_targets_min": 6704
|
|
},
|
|
{
|
|
"epoch": 4.422174840085288,
|
|
"grad_norm": 0.07771088694803348,
|
|
"learning_rate": 1.6352418828941052e-06,
|
|
"loss": 1.0343689918518066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24691098928451538,
|
|
"step": 1039,
|
|
"valid_targets_mean": 14356.8,
|
|
"valid_targets_min": 5911
|
|
},
|
|
{
|
|
"epoch": 4.426439232409382,
|
|
"grad_norm": 0.08304686069812293,
|
|
"learning_rate": 1.6117816423156952e-06,
|
|
"loss": 1.0338213443756104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2660481631755829,
|
|
"step": 1040,
|
|
"valid_targets_mean": 15205.1,
|
|
"valid_targets_min": 9283
|
|
},
|
|
{
|
|
"epoch": 4.430703624733475,
|
|
"grad_norm": 0.06920176032446822,
|
|
"learning_rate": 1.5884838402870029e-06,
|
|
"loss": 1.0237390995025635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25461459159851074,
|
|
"step": 1041,
|
|
"valid_targets_mean": 14998.1,
|
|
"valid_targets_min": 1692
|
|
},
|
|
{
|
|
"epoch": 4.434968017057569,
|
|
"grad_norm": 0.0714054983112944,
|
|
"learning_rate": 1.5653486826170384e-06,
|
|
"loss": 1.0401220321655273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2660042643547058,
|
|
"step": 1042,
|
|
"valid_targets_mean": 14499.9,
|
|
"valid_targets_min": 2371
|
|
},
|
|
{
|
|
"epoch": 4.439232409381663,
|
|
"grad_norm": 0.07396556116131836,
|
|
"learning_rate": 1.5423763736780583e-06,
|
|
"loss": 1.0034527778625488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2357511669397354,
|
|
"step": 1043,
|
|
"valid_targets_mean": 14302.2,
|
|
"valid_targets_min": 2342
|
|
},
|
|
{
|
|
"epoch": 4.443496801705757,
|
|
"grad_norm": 0.07134451067312644,
|
|
"learning_rate": 1.5195671164037173e-06,
|
|
"loss": 1.0172343254089355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27009013295173645,
|
|
"step": 1044,
|
|
"valid_targets_mean": 14220.5,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 4.447761194029851,
|
|
"grad_norm": 0.06601604046921343,
|
|
"learning_rate": 1.496921112287315e-06,
|
|
"loss": 1.0245767831802368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25725841522216797,
|
|
"step": 1045,
|
|
"valid_targets_mean": 15405.5,
|
|
"valid_targets_min": 7009
|
|
},
|
|
{
|
|
"epoch": 4.452025586353945,
|
|
"grad_norm": 0.068600962040597,
|
|
"learning_rate": 1.4744385613799894e-06,
|
|
"loss": 1.038365125656128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2502679228782654,
|
|
"step": 1046,
|
|
"valid_targets_mean": 14626.3,
|
|
"valid_targets_min": 6602
|
|
},
|
|
{
|
|
"epoch": 4.456289978678038,
|
|
"grad_norm": 0.07591179379461509,
|
|
"learning_rate": 1.4521196622889644e-06,
|
|
"loss": 0.9995392560958862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24727779626846313,
|
|
"step": 1047,
|
|
"valid_targets_mean": 13940.6,
|
|
"valid_targets_min": 2208
|
|
},
|
|
{
|
|
"epoch": 4.460554371002132,
|
|
"grad_norm": 0.06940371444223539,
|
|
"learning_rate": 1.4299646121757892e-06,
|
|
"loss": 1.057096242904663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25691652297973633,
|
|
"step": 1048,
|
|
"valid_targets_mean": 15446.3,
|
|
"valid_targets_min": 7365
|
|
},
|
|
{
|
|
"epoch": 4.464818763326226,
|
|
"grad_norm": 0.06763819548862086,
|
|
"learning_rate": 1.4079736067545912e-06,
|
|
"loss": 1.0104975700378418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25632327795028687,
|
|
"step": 1049,
|
|
"valid_targets_mean": 15491.4,
|
|
"valid_targets_min": 6949
|
|
},
|
|
{
|
|
"epoch": 4.46908315565032,
|
|
"grad_norm": 0.06514807434099039,
|
|
"learning_rate": 1.3861468402903634e-06,
|
|
"loss": 1.0043401718139648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25664448738098145,
|
|
"step": 1050,
|
|
"valid_targets_mean": 14708.8,
|
|
"valid_targets_min": 2469
|
|
},
|
|
{
|
|
"epoch": 4.473347547974414,
|
|
"grad_norm": 0.0731922706826196,
|
|
"learning_rate": 1.3644845055972322e-06,
|
|
"loss": 0.9790605306625366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.230549156665802,
|
|
"step": 1051,
|
|
"valid_targets_mean": 14711.8,
|
|
"valid_targets_min": 5728
|
|
},
|
|
{
|
|
"epoch": 4.477611940298507,
|
|
"grad_norm": 0.06594645842384818,
|
|
"learning_rate": 1.3429867940367626e-06,
|
|
"loss": 1.0149694681167603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28294098377227783,
|
|
"step": 1052,
|
|
"valid_targets_mean": 15629.1,
|
|
"valid_targets_min": 9281
|
|
},
|
|
{
|
|
"epoch": 4.481876332622601,
|
|
"grad_norm": 0.0686014278034404,
|
|
"learning_rate": 1.321653895516264e-06,
|
|
"loss": 1.0112483501434326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24371042847633362,
|
|
"step": 1053,
|
|
"valid_targets_mean": 14560.8,
|
|
"valid_targets_min": 2432
|
|
},
|
|
{
|
|
"epoch": 4.486140724946695,
|
|
"grad_norm": 0.0731223461819718,
|
|
"learning_rate": 1.3004859984871199e-06,
|
|
"loss": 1.0422847270965576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2515747547149658,
|
|
"step": 1054,
|
|
"valid_targets_mean": 14970.4,
|
|
"valid_targets_min": 3323
|
|
},
|
|
{
|
|
"epoch": 4.490405117270789,
|
|
"grad_norm": 0.06687333199186574,
|
|
"learning_rate": 1.279483289943102e-06,
|
|
"loss": 1.011574387550354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24524162709712982,
|
|
"step": 1055,
|
|
"valid_targets_mean": 15123.1,
|
|
"valid_targets_min": 5425
|
|
},
|
|
{
|
|
"epoch": 4.494669509594883,
|
|
"grad_norm": 0.07073431773921562,
|
|
"learning_rate": 1.2586459554187558e-06,
|
|
"loss": 1.0168395042419434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.236406609416008,
|
|
"step": 1056,
|
|
"valid_targets_mean": 13915.4,
|
|
"valid_targets_min": 2083
|
|
},
|
|
{
|
|
"epoch": 4.498933901918977,
|
|
"grad_norm": 0.0634225450722247,
|
|
"learning_rate": 1.2379741789877175e-06,
|
|
"loss": 0.9951160550117493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24048984050750732,
|
|
"step": 1057,
|
|
"valid_targets_mean": 14242.8,
|
|
"valid_targets_min": 1715
|
|
},
|
|
{
|
|
"epoch": 4.50319829424307,
|
|
"grad_norm": 0.07766800622585368,
|
|
"learning_rate": 1.2174681432611245e-06,
|
|
"loss": 1.0221624374389648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26837795972824097,
|
|
"step": 1058,
|
|
"valid_targets_mean": 15247.6,
|
|
"valid_targets_min": 5344
|
|
},
|
|
{
|
|
"epoch": 4.507462686567164,
|
|
"grad_norm": 0.07060976347461718,
|
|
"learning_rate": 1.1971280293859811e-06,
|
|
"loss": 0.9929161071777344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27398234605789185,
|
|
"step": 1059,
|
|
"valid_targets_mean": 15723.8,
|
|
"valid_targets_min": 9482
|
|
},
|
|
{
|
|
"epoch": 4.5117270788912585,
|
|
"grad_norm": 0.0753410465734406,
|
|
"learning_rate": 1.17695401704357e-06,
|
|
"loss": 1.0152283906936646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2540476322174072,
|
|
"step": 1060,
|
|
"valid_targets_mean": 14697.4,
|
|
"valid_targets_min": 4926
|
|
},
|
|
{
|
|
"epoch": 4.515991471215352,
|
|
"grad_norm": 0.07061174409259466,
|
|
"learning_rate": 1.1569462844478552e-06,
|
|
"loss": 1.0479135513305664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26879221200942993,
|
|
"step": 1061,
|
|
"valid_targets_mean": 14628.4,
|
|
"valid_targets_min": 7780
|
|
},
|
|
{
|
|
"epoch": 4.520255863539446,
|
|
"grad_norm": 0.07137569128746588,
|
|
"learning_rate": 1.1371050083439107e-06,
|
|
"loss": 1.0195355415344238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26304522156715393,
|
|
"step": 1062,
|
|
"valid_targets_mean": 14773.9,
|
|
"valid_targets_min": 3384
|
|
},
|
|
{
|
|
"epoch": 4.524520255863539,
|
|
"grad_norm": 0.0680445565318658,
|
|
"learning_rate": 1.1174303640063622e-06,
|
|
"loss": 1.0261856317520142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.262905091047287,
|
|
"step": 1063,
|
|
"valid_targets_mean": 15105.0,
|
|
"valid_targets_min": 9075
|
|
},
|
|
{
|
|
"epoch": 4.528784648187633,
|
|
"grad_norm": 0.0702614080770827,
|
|
"learning_rate": 1.097922525237849e-06,
|
|
"loss": 1.0566270351409912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2552175521850586,
|
|
"step": 1064,
|
|
"valid_targets_mean": 15474.4,
|
|
"valid_targets_min": 6595
|
|
},
|
|
{
|
|
"epoch": 4.533049040511727,
|
|
"grad_norm": 0.06862292834965017,
|
|
"learning_rate": 1.078581664367455e-06,
|
|
"loss": 1.0485453605651855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27617308497428894,
|
|
"step": 1065,
|
|
"valid_targets_mean": 14793.9,
|
|
"valid_targets_min": 5085
|
|
},
|
|
{
|
|
"epoch": 4.537313432835821,
|
|
"grad_norm": 0.06681411071007434,
|
|
"learning_rate": 1.0594079522492274e-06,
|
|
"loss": 1.0239062309265137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2609993517398834,
|
|
"step": 1066,
|
|
"valid_targets_mean": 15103.1,
|
|
"valid_targets_min": 7909
|
|
},
|
|
{
|
|
"epoch": 4.541577825159915,
|
|
"grad_norm": 0.06528963957554342,
|
|
"learning_rate": 1.040401558260633e-06,
|
|
"loss": 0.9920482039451599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24381262063980103,
|
|
"step": 1067,
|
|
"valid_targets_mean": 15355.5,
|
|
"valid_targets_min": 4640
|
|
},
|
|
{
|
|
"epoch": 4.545842217484008,
|
|
"grad_norm": 0.07676207346706825,
|
|
"learning_rate": 1.0215626503010911e-06,
|
|
"loss": 1.0099127292633057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2594386339187622,
|
|
"step": 1068,
|
|
"valid_targets_mean": 15372.2,
|
|
"valid_targets_min": 6069
|
|
},
|
|
{
|
|
"epoch": 4.550106609808102,
|
|
"grad_norm": 0.06516871142633743,
|
|
"learning_rate": 1.002891394790475e-06,
|
|
"loss": 1.0267794132232666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2759925127029419,
|
|
"step": 1069,
|
|
"valid_targets_mean": 15337.8,
|
|
"valid_targets_min": 11067
|
|
},
|
|
{
|
|
"epoch": 4.554371002132196,
|
|
"grad_norm": 0.06530391631580551,
|
|
"learning_rate": 9.843879566676273e-07,
|
|
"loss": 0.992304265499115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23234936594963074,
|
|
"step": 1070,
|
|
"valid_targets_mean": 14723.0,
|
|
"valid_targets_min": 7371
|
|
},
|
|
{
|
|
"epoch": 4.55863539445629,
|
|
"grad_norm": 0.07210719157423057,
|
|
"learning_rate": 9.660524993889386e-07,
|
|
"loss": 1.0287081003189087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2639193832874298,
|
|
"step": 1071,
|
|
"valid_targets_mean": 14991.1,
|
|
"valid_targets_min": 2865
|
|
},
|
|
{
|
|
"epoch": 4.562899786780384,
|
|
"grad_norm": 0.07594692212651825,
|
|
"learning_rate": 9.478851849268733e-07,
|
|
"loss": 1.064483404159546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25527459383010864,
|
|
"step": 1072,
|
|
"valid_targets_mean": 15471.6,
|
|
"valid_targets_min": 4632
|
|
},
|
|
{
|
|
"epoch": 4.567164179104478,
|
|
"grad_norm": 0.07518135770790788,
|
|
"learning_rate": 9.298861737685527e-07,
|
|
"loss": 1.040311336517334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26637256145477295,
|
|
"step": 1073,
|
|
"valid_targets_mean": 15332.1,
|
|
"valid_targets_min": 4525
|
|
},
|
|
{
|
|
"epoch": 4.571428571428571,
|
|
"grad_norm": 0.06628130113815535,
|
|
"learning_rate": 9.120556249143341e-07,
|
|
"loss": 1.0161117315292358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2659457325935364,
|
|
"step": 1074,
|
|
"valid_targets_mean": 15334.8,
|
|
"valid_targets_min": 6423
|
|
},
|
|
{
|
|
"epoch": 4.575692963752665,
|
|
"grad_norm": 0.06477883899758395,
|
|
"learning_rate": 8.943936958763988e-07,
|
|
"loss": 1.0359610319137573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25776997208595276,
|
|
"step": 1075,
|
|
"valid_targets_mean": 15012.3,
|
|
"valid_targets_min": 7695
|
|
},
|
|
{
|
|
"epoch": 4.5799573560767595,
|
|
"grad_norm": 0.07148636728641779,
|
|
"learning_rate": 8.769005426773836e-07,
|
|
"loss": 1.0297731161117554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26610493659973145,
|
|
"step": 1076,
|
|
"valid_targets_mean": 14544.8,
|
|
"valid_targets_min": 3244
|
|
},
|
|
{
|
|
"epoch": 4.584221748400853,
|
|
"grad_norm": 0.06839633103821331,
|
|
"learning_rate": 8.595763198489714e-07,
|
|
"loss": 1.0279765129089355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28054559230804443,
|
|
"step": 1077,
|
|
"valid_targets_mean": 15303.6,
|
|
"valid_targets_min": 2857
|
|
},
|
|
{
|
|
"epoch": 4.588486140724947,
|
|
"grad_norm": 0.06731727414613335,
|
|
"learning_rate": 8.42421180430546e-07,
|
|
"loss": 1.0172414779663086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2570076584815979,
|
|
"step": 1078,
|
|
"valid_targets_mean": 13805.4,
|
|
"valid_targets_min": 3144
|
|
},
|
|
{
|
|
"epoch": 4.59275053304904,
|
|
"grad_norm": 0.06393586449266486,
|
|
"learning_rate": 8.254352759678386e-07,
|
|
"loss": 1.027620792388916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25848716497421265,
|
|
"step": 1079,
|
|
"valid_targets_mean": 14849.2,
|
|
"valid_targets_min": 1385
|
|
},
|
|
{
|
|
"epoch": 4.597014925373134,
|
|
"grad_norm": 0.0686772807194171,
|
|
"learning_rate": 8.086187565115877e-07,
|
|
"loss": 1.0338844060897827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26648616790771484,
|
|
"step": 1080,
|
|
"valid_targets_mean": 15024.0,
|
|
"valid_targets_min": 6453
|
|
},
|
|
{
|
|
"epoch": 4.601279317697228,
|
|
"grad_norm": 0.07337205384563839,
|
|
"learning_rate": 7.919717706162067e-07,
|
|
"loss": 0.9947189688682556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2360054850578308,
|
|
"step": 1081,
|
|
"valid_targets_mean": 15262.1,
|
|
"valid_targets_min": 4427
|
|
},
|
|
{
|
|
"epoch": 4.605543710021322,
|
|
"grad_norm": 0.06594479533951486,
|
|
"learning_rate": 7.754944653384777e-07,
|
|
"loss": 1.0052752494812012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24188333749771118,
|
|
"step": 1082,
|
|
"valid_targets_mean": 14649.6,
|
|
"valid_targets_min": 3870
|
|
},
|
|
{
|
|
"epoch": 4.609808102345416,
|
|
"grad_norm": 0.07064212719654153,
|
|
"learning_rate": 7.591869862362534e-07,
|
|
"loss": 1.0418689250946045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26256895065307617,
|
|
"step": 1083,
|
|
"valid_targets_mean": 14202.5,
|
|
"valid_targets_min": 2408
|
|
},
|
|
{
|
|
"epoch": 4.61407249466951,
|
|
"grad_norm": 0.07087438339567025,
|
|
"learning_rate": 7.430494773671682e-07,
|
|
"loss": 1.0515038967132568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23694118857383728,
|
|
"step": 1084,
|
|
"valid_targets_mean": 13859.9,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 4.618336886993603,
|
|
"grad_norm": 0.07274082838805951,
|
|
"learning_rate": 7.270820812873714e-07,
|
|
"loss": 0.9710164666175842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2399815320968628,
|
|
"step": 1085,
|
|
"valid_targets_mean": 14702.4,
|
|
"valid_targets_min": 3313
|
|
},
|
|
{
|
|
"epoch": 4.622601279317697,
|
|
"grad_norm": 0.08432480970803712,
|
|
"learning_rate": 7.112849390502563e-07,
|
|
"loss": 1.022752285003662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25002214312553406,
|
|
"step": 1086,
|
|
"valid_targets_mean": 14602.7,
|
|
"valid_targets_min": 5116
|
|
},
|
|
{
|
|
"epoch": 4.6268656716417915,
|
|
"grad_norm": 0.0696628864265594,
|
|
"learning_rate": 6.956581902052306e-07,
|
|
"loss": 0.9715498685836792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2668806314468384,
|
|
"step": 1087,
|
|
"valid_targets_mean": 15010.3,
|
|
"valid_targets_min": 4978
|
|
},
|
|
{
|
|
"epoch": 4.631130063965885,
|
|
"grad_norm": 0.07163952955323827,
|
|
"learning_rate": 6.802019727964593e-07,
|
|
"loss": 1.032862663269043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25951439142227173,
|
|
"step": 1088,
|
|
"valid_targets_mean": 15371.7,
|
|
"valid_targets_min": 8077
|
|
},
|
|
{
|
|
"epoch": 4.635394456289979,
|
|
"grad_norm": 0.06306104281711407,
|
|
"learning_rate": 6.64916423361679e-07,
|
|
"loss": 1.0174851417541504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2721509337425232,
|
|
"step": 1089,
|
|
"valid_targets_mean": 15294.1,
|
|
"valid_targets_min": 2618
|
|
},
|
|
{
|
|
"epoch": 4.639658848614072,
|
|
"grad_norm": 0.06659952941914901,
|
|
"learning_rate": 6.498016769309567e-07,
|
|
"loss": 1.0645792484283447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26195454597473145,
|
|
"step": 1090,
|
|
"valid_targets_mean": 14959.5,
|
|
"valid_targets_min": 7016
|
|
},
|
|
{
|
|
"epoch": 4.643923240938166,
|
|
"grad_norm": 0.06450710582873058,
|
|
"learning_rate": 6.348578670255224e-07,
|
|
"loss": 1.0212966203689575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23479299247264862,
|
|
"step": 1091,
|
|
"valid_targets_mean": 14402.1,
|
|
"valid_targets_min": 2491
|
|
},
|
|
{
|
|
"epoch": 4.6481876332622605,
|
|
"grad_norm": 0.0725568214493117,
|
|
"learning_rate": 6.200851256565799e-07,
|
|
"loss": 1.0405863523483276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2567938268184662,
|
|
"step": 1092,
|
|
"valid_targets_mean": 15573.6,
|
|
"valid_targets_min": 6038
|
|
},
|
|
{
|
|
"epoch": 4.652452025586354,
|
|
"grad_norm": 0.07133862694492665,
|
|
"learning_rate": 6.054835833241357e-07,
|
|
"loss": 1.095170497894287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2838001549243927,
|
|
"step": 1093,
|
|
"valid_targets_mean": 15277.7,
|
|
"valid_targets_min": 4293
|
|
},
|
|
{
|
|
"epoch": 4.656716417910448,
|
|
"grad_norm": 0.07002347703730735,
|
|
"learning_rate": 5.910533690158593e-07,
|
|
"loss": 0.9981926679611206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2459121197462082,
|
|
"step": 1094,
|
|
"valid_targets_mean": 14975.6,
|
|
"valid_targets_min": 3615
|
|
},
|
|
{
|
|
"epoch": 4.660980810234541,
|
|
"grad_norm": 0.06953398777655154,
|
|
"learning_rate": 5.767946102059307e-07,
|
|
"loss": 0.9948866367340088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2484731674194336,
|
|
"step": 1095,
|
|
"valid_targets_mean": 15209.9,
|
|
"valid_targets_min": 9230
|
|
},
|
|
{
|
|
"epoch": 4.665245202558635,
|
|
"grad_norm": 0.07088669909338102,
|
|
"learning_rate": 5.627074328539173e-07,
|
|
"loss": 1.0146452188491821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2458096444606781,
|
|
"step": 1096,
|
|
"valid_targets_mean": 14502.3,
|
|
"valid_targets_min": 2478
|
|
},
|
|
{
|
|
"epoch": 4.669509594882729,
|
|
"grad_norm": 0.06364041390975456,
|
|
"learning_rate": 5.487919614036741e-07,
|
|
"loss": 1.041628360748291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2578613758087158,
|
|
"step": 1097,
|
|
"valid_targets_mean": 14867.4,
|
|
"valid_targets_min": 918
|
|
},
|
|
{
|
|
"epoch": 4.673773987206823,
|
|
"grad_norm": 0.06729007917374852,
|
|
"learning_rate": 5.350483187822231e-07,
|
|
"loss": 1.0418592691421509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23668396472930908,
|
|
"step": 1098,
|
|
"valid_targets_mean": 14719.7,
|
|
"valid_targets_min": 2299
|
|
},
|
|
{
|
|
"epoch": 4.678038379530917,
|
|
"grad_norm": 0.07078240243018288,
|
|
"learning_rate": 5.214766263986848e-07,
|
|
"loss": 1.0214943885803223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24630528688430786,
|
|
"step": 1099,
|
|
"valid_targets_mean": 14434.8,
|
|
"valid_targets_min": 4838
|
|
},
|
|
{
|
|
"epoch": 4.682302771855011,
|
|
"grad_norm": 0.0732667090432839,
|
|
"learning_rate": 5.080770041431926e-07,
|
|
"loss": 1.0413594245910645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26989224553108215,
|
|
"step": 1100,
|
|
"valid_targets_mean": 15117.5,
|
|
"valid_targets_min": 7453
|
|
},
|
|
{
|
|
"epoch": 4.686567164179104,
|
|
"grad_norm": 0.07371846187994414,
|
|
"learning_rate": 4.948495703858492e-07,
|
|
"loss": 1.0573935508728027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2608649432659149,
|
|
"step": 1101,
|
|
"valid_targets_mean": 15384.5,
|
|
"valid_targets_min": 8024
|
|
},
|
|
{
|
|
"epoch": 4.690831556503198,
|
|
"grad_norm": 0.0693879158573615,
|
|
"learning_rate": 4.81794441975667e-07,
|
|
"loss": 1.036195993423462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2616111934185028,
|
|
"step": 1102,
|
|
"valid_targets_mean": 14345.1,
|
|
"valid_targets_min": 4227
|
|
},
|
|
{
|
|
"epoch": 4.6950959488272925,
|
|
"grad_norm": 0.06407155646007538,
|
|
"learning_rate": 4.689117342395388e-07,
|
|
"loss": 1.004786491394043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25563374161720276,
|
|
"step": 1103,
|
|
"valid_targets_mean": 15178.0,
|
|
"valid_targets_min": 7904
|
|
},
|
|
{
|
|
"epoch": 4.699360341151386,
|
|
"grad_norm": 0.07298714830099647,
|
|
"learning_rate": 4.5620156098122204e-07,
|
|
"loss": 1.011723279953003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2569216191768646,
|
|
"step": 1104,
|
|
"valid_targets_mean": 15558.1,
|
|
"valid_targets_min": 5908
|
|
},
|
|
{
|
|
"epoch": 4.70362473347548,
|
|
"grad_norm": 0.0685237557864966,
|
|
"learning_rate": 4.4366403448033334e-07,
|
|
"loss": 1.0312525033950806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25594452023506165,
|
|
"step": 1105,
|
|
"valid_targets_mean": 14322.6,
|
|
"valid_targets_min": 3879
|
|
},
|
|
{
|
|
"epoch": 4.707889125799573,
|
|
"grad_norm": 0.0703474360253264,
|
|
"learning_rate": 4.3129926549136057e-07,
|
|
"loss": 1.0201470851898193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22442176938056946,
|
|
"step": 1106,
|
|
"valid_targets_mean": 13867.3,
|
|
"valid_targets_min": 1629
|
|
},
|
|
{
|
|
"epoch": 4.712153518123667,
|
|
"grad_norm": 0.07336197186961974,
|
|
"learning_rate": 4.191073632426701e-07,
|
|
"loss": 1.0148887634277344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25482404232025146,
|
|
"step": 1107,
|
|
"valid_targets_mean": 15000.6,
|
|
"valid_targets_min": 3103
|
|
},
|
|
{
|
|
"epoch": 4.7164179104477615,
|
|
"grad_norm": 0.07056139284812354,
|
|
"learning_rate": 4.0708843543555643e-07,
|
|
"loss": 1.0253814458847046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2721610963344574,
|
|
"step": 1108,
|
|
"valid_targets_mean": 15198.7,
|
|
"valid_targets_min": 7499
|
|
},
|
|
{
|
|
"epoch": 4.720682302771855,
|
|
"grad_norm": 0.06803380374321673,
|
|
"learning_rate": 3.95242588243292e-07,
|
|
"loss": 1.0191898345947266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26221030950546265,
|
|
"step": 1109,
|
|
"valid_targets_mean": 15062.7,
|
|
"valid_targets_min": 5829
|
|
},
|
|
{
|
|
"epoch": 4.724946695095949,
|
|
"grad_norm": 0.0725987925080408,
|
|
"learning_rate": 3.8356992631017e-07,
|
|
"loss": 1.02607262134552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2717529535293579,
|
|
"step": 1110,
|
|
"valid_targets_mean": 14682.3,
|
|
"valid_targets_min": 5934
|
|
},
|
|
{
|
|
"epoch": 4.729211087420042,
|
|
"grad_norm": 0.0658803796851549,
|
|
"learning_rate": 3.720705527506008e-07,
|
|
"loss": 1.0193414688110352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2442600578069687,
|
|
"step": 1111,
|
|
"valid_targets_mean": 14764.0,
|
|
"valid_targets_min": 2716
|
|
},
|
|
{
|
|
"epoch": 4.733475479744136,
|
|
"grad_norm": 0.06990139750491582,
|
|
"learning_rate": 3.60744569148197e-07,
|
|
"loss": 1.0192015171051025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26712778210639954,
|
|
"step": 1112,
|
|
"valid_targets_mean": 15267.0,
|
|
"valid_targets_min": 8504
|
|
},
|
|
{
|
|
"epoch": 4.73773987206823,
|
|
"grad_norm": 0.06506833736536219,
|
|
"learning_rate": 3.4959207555485873e-07,
|
|
"loss": 1.0410380363464355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2598136067390442,
|
|
"step": 1113,
|
|
"valid_targets_mean": 14534.2,
|
|
"valid_targets_min": 1877
|
|
},
|
|
{
|
|
"epoch": 4.742004264392325,
|
|
"grad_norm": 0.0709803119615405,
|
|
"learning_rate": 3.3861317048992317e-07,
|
|
"loss": 1.0387706756591797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25980570912361145,
|
|
"step": 1114,
|
|
"valid_targets_mean": 14683.8,
|
|
"valid_targets_min": 4848
|
|
},
|
|
{
|
|
"epoch": 4.746268656716418,
|
|
"grad_norm": 0.0795429644147577,
|
|
"learning_rate": 3.278079509392562e-07,
|
|
"loss": 1.0260200500488281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27000313997268677,
|
|
"step": 1115,
|
|
"valid_targets_mean": 15214.4,
|
|
"valid_targets_min": 1314
|
|
},
|
|
{
|
|
"epoch": 4.750533049040512,
|
|
"grad_norm": 0.07080126753595156,
|
|
"learning_rate": 3.171765123544224e-07,
|
|
"loss": 1.0400683879852295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27333077788352966,
|
|
"step": 1116,
|
|
"valid_targets_mean": 14569.4,
|
|
"valid_targets_min": 1579
|
|
},
|
|
{
|
|
"epoch": 4.754797441364605,
|
|
"grad_norm": 0.06601794056438176,
|
|
"learning_rate": 3.06718948651834e-07,
|
|
"loss": 0.9994338154792786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26492956280708313,
|
|
"step": 1117,
|
|
"valid_targets_mean": 15930.9,
|
|
"valid_targets_min": 10189
|
|
},
|
|
{
|
|
"epoch": 4.759061833688699,
|
|
"grad_norm": 0.07388133571360742,
|
|
"learning_rate": 2.964353522119168e-07,
|
|
"loss": 1.0069022178649902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25079345703125,
|
|
"step": 1118,
|
|
"valid_targets_mean": 14631.0,
|
|
"valid_targets_min": 3805
|
|
},
|
|
{
|
|
"epoch": 4.7633262260127935,
|
|
"grad_norm": 0.07064063240173123,
|
|
"learning_rate": 2.863258138783032e-07,
|
|
"loss": 0.9954191446304321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24160648882389069,
|
|
"step": 1119,
|
|
"valid_targets_mean": 15717.5,
|
|
"valid_targets_min": 8591
|
|
},
|
|
{
|
|
"epoch": 4.767590618336887,
|
|
"grad_norm": 0.06882403332283994,
|
|
"learning_rate": 2.7639042295702245e-07,
|
|
"loss": 1.0442975759506226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26812103390693665,
|
|
"step": 1120,
|
|
"valid_targets_mean": 14855.8,
|
|
"valid_targets_min": 2705
|
|
},
|
|
{
|
|
"epoch": 4.771855010660981,
|
|
"grad_norm": 0.06332263057368195,
|
|
"learning_rate": 2.666292672157056e-07,
|
|
"loss": 1.0528706312179565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26132845878601074,
|
|
"step": 1121,
|
|
"valid_targets_mean": 15290.7,
|
|
"valid_targets_min": 4054
|
|
},
|
|
{
|
|
"epoch": 4.776119402985074,
|
|
"grad_norm": 0.07675127563438205,
|
|
"learning_rate": 2.570424328828325e-07,
|
|
"loss": 1.0675115585327148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2756962478160858,
|
|
"step": 1122,
|
|
"valid_targets_mean": 15214.0,
|
|
"valid_targets_min": 7186
|
|
},
|
|
{
|
|
"epoch": 4.780383795309168,
|
|
"grad_norm": 0.07197151758312617,
|
|
"learning_rate": 2.4763000464694377e-07,
|
|
"loss": 1.0107743740081787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2568907141685486,
|
|
"step": 1123,
|
|
"valid_targets_mean": 15773.6,
|
|
"valid_targets_min": 8408
|
|
},
|
|
{
|
|
"epoch": 4.7846481876332625,
|
|
"grad_norm": 0.06500712771537732,
|
|
"learning_rate": 2.383920656559102e-07,
|
|
"loss": 1.0048414468765259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23881423473358154,
|
|
"step": 1124,
|
|
"valid_targets_mean": 14540.1,
|
|
"valid_targets_min": 1433
|
|
},
|
|
{
|
|
"epoch": 4.788912579957356,
|
|
"grad_norm": 0.06381821089228037,
|
|
"learning_rate": 2.2932869751619568e-07,
|
|
"loss": 0.9843084812164307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2623206377029419,
|
|
"step": 1125,
|
|
"valid_targets_mean": 15185.5,
|
|
"valid_targets_min": 4701
|
|
},
|
|
{
|
|
"epoch": 4.79317697228145,
|
|
"grad_norm": 0.06671285990992103,
|
|
"learning_rate": 2.2043998029212643e-07,
|
|
"loss": 1.0114637613296509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2629712224006653,
|
|
"step": 1126,
|
|
"valid_targets_mean": 14384.1,
|
|
"valid_targets_min": 3439
|
|
},
|
|
{
|
|
"epoch": 4.797441364605544,
|
|
"grad_norm": 0.06733736034184398,
|
|
"learning_rate": 2.1172599250519398e-07,
|
|
"loss": 1.0296120643615723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24320898950099945,
|
|
"step": 1127,
|
|
"valid_targets_mean": 14321.7,
|
|
"valid_targets_min": 5222
|
|
},
|
|
{
|
|
"epoch": 4.801705756929637,
|
|
"grad_norm": 0.06714757058555061,
|
|
"learning_rate": 2.0318681113336013e-07,
|
|
"loss": 1.0549672842025757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2984343469142914,
|
|
"step": 1128,
|
|
"valid_targets_mean": 15225.4,
|
|
"valid_targets_min": 8459
|
|
},
|
|
{
|
|
"epoch": 4.8059701492537314,
|
|
"grad_norm": 0.06960677138396562,
|
|
"learning_rate": 1.9482251161037302e-07,
|
|
"loss": 1.021659016609192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24553701281547546,
|
|
"step": 1129,
|
|
"valid_targets_mean": 15276.0,
|
|
"valid_targets_min": 4215
|
|
},
|
|
{
|
|
"epoch": 4.810234541577826,
|
|
"grad_norm": 0.06841403613884264,
|
|
"learning_rate": 1.866331678251032e-07,
|
|
"loss": 1.044973373413086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23122069239616394,
|
|
"step": 1130,
|
|
"valid_targets_mean": 14052.2,
|
|
"valid_targets_min": 2103
|
|
},
|
|
{
|
|
"epoch": 4.814498933901919,
|
|
"grad_norm": 0.06404617257701016,
|
|
"learning_rate": 1.7861885212088869e-07,
|
|
"loss": 1.0095136165618896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2586168646812439,
|
|
"step": 1131,
|
|
"valid_targets_mean": 15317.8,
|
|
"valid_targets_min": 8293
|
|
},
|
|
{
|
|
"epoch": 4.818763326226013,
|
|
"grad_norm": 0.07514066868771424,
|
|
"learning_rate": 1.7077963529490204e-07,
|
|
"loss": 1.0391095876693726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26711028814315796,
|
|
"step": 1132,
|
|
"valid_targets_mean": 14660.0,
|
|
"valid_targets_min": 5829
|
|
},
|
|
{
|
|
"epoch": 4.823027718550106,
|
|
"grad_norm": 0.06940507091598883,
|
|
"learning_rate": 1.6311558659751535e-07,
|
|
"loss": 1.0170023441314697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24466711282730103,
|
|
"step": 1133,
|
|
"valid_targets_mean": 14753.7,
|
|
"valid_targets_min": 1286
|
|
},
|
|
{
|
|
"epoch": 4.8272921108742,
|
|
"grad_norm": 0.06982357949012676,
|
|
"learning_rate": 1.5562677373169855e-07,
|
|
"loss": 1.0421884059906006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2834610939025879,
|
|
"step": 1134,
|
|
"valid_targets_mean": 14628.7,
|
|
"valid_targets_min": 2398
|
|
},
|
|
{
|
|
"epoch": 4.8315565031982945,
|
|
"grad_norm": 0.07543983717415494,
|
|
"learning_rate": 1.483132628524131e-07,
|
|
"loss": 1.0707218647003174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25955963134765625,
|
|
"step": 1135,
|
|
"valid_targets_mean": 13616.1,
|
|
"valid_targets_min": 1714
|
|
},
|
|
{
|
|
"epoch": 4.835820895522388,
|
|
"grad_norm": 0.07005877911193274,
|
|
"learning_rate": 1.4117511856603262e-07,
|
|
"loss": 1.0253279209136963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25752508640289307,
|
|
"step": 1136,
|
|
"valid_targets_mean": 14949.2,
|
|
"valid_targets_min": 7892
|
|
},
|
|
{
|
|
"epoch": 4.840085287846482,
|
|
"grad_norm": 0.07538440424078686,
|
|
"learning_rate": 1.342124039297721e-07,
|
|
"loss": 1.05033278465271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27345603704452515,
|
|
"step": 1137,
|
|
"valid_targets_mean": 15026.5,
|
|
"valid_targets_min": 3711
|
|
},
|
|
{
|
|
"epoch": 4.844349680170575,
|
|
"grad_norm": 0.07216648298727922,
|
|
"learning_rate": 1.2742518045112396e-07,
|
|
"loss": 1.0316473245620728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26999539136886597,
|
|
"step": 1138,
|
|
"valid_targets_mean": 14801.5,
|
|
"valid_targets_min": 5212
|
|
},
|
|
{
|
|
"epoch": 4.848614072494669,
|
|
"grad_norm": 0.06808362405102462,
|
|
"learning_rate": 1.2081350808732518e-07,
|
|
"loss": 0.9786754846572876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2707619369029999,
|
|
"step": 1139,
|
|
"valid_targets_mean": 14802.5,
|
|
"valid_targets_min": 5878
|
|
},
|
|
{
|
|
"epoch": 4.8528784648187635,
|
|
"grad_norm": 0.07003955658995022,
|
|
"learning_rate": 1.143774452448243e-07,
|
|
"loss": 1.1124558448791504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2772119641304016,
|
|
"step": 1140,
|
|
"valid_targets_mean": 14827.2,
|
|
"valid_targets_min": 2060
|
|
},
|
|
{
|
|
"epoch": 4.857142857142857,
|
|
"grad_norm": 0.06466853838334986,
|
|
"learning_rate": 1.0811704877875528e-07,
|
|
"loss": 1.015834093093872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23194001615047455,
|
|
"step": 1141,
|
|
"valid_targets_mean": 15103.3,
|
|
"valid_targets_min": 2931
|
|
},
|
|
{
|
|
"epoch": 4.861407249466951,
|
|
"grad_norm": 0.07089414025408837,
|
|
"learning_rate": 1.0203237399245336e-07,
|
|
"loss": 1.05367910861969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2548222839832306,
|
|
"step": 1142,
|
|
"valid_targets_mean": 14965.0,
|
|
"valid_targets_min": 3366
|
|
},
|
|
{
|
|
"epoch": 4.865671641791045,
|
|
"grad_norm": 0.06378486026277501,
|
|
"learning_rate": 9.612347463694882e-08,
|
|
"loss": 1.0144537687301636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25325194001197815,
|
|
"step": 1143,
|
|
"valid_targets_mean": 15282.1,
|
|
"valid_targets_min": 3329
|
|
},
|
|
{
|
|
"epoch": 4.869936034115138,
|
|
"grad_norm": 0.06362389374547285,
|
|
"learning_rate": 9.039040291050738e-08,
|
|
"loss": 1.017199993133545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.267633855342865,
|
|
"step": 1144,
|
|
"valid_targets_mean": 15420.7,
|
|
"valid_targets_min": 4095
|
|
},
|
|
{
|
|
"epoch": 4.8742004264392325,
|
|
"grad_norm": 0.06777612886576416,
|
|
"learning_rate": 8.483320945815499e-08,
|
|
"loss": 1.0206801891326904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25900164246559143,
|
|
"step": 1145,
|
|
"valid_targets_mean": 15594.0,
|
|
"valid_targets_min": 2624
|
|
},
|
|
{
|
|
"epoch": 4.878464818763327,
|
|
"grad_norm": 0.06845353018871973,
|
|
"learning_rate": 7.945194337124262e-08,
|
|
"loss": 1.012258529663086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2568066120147705,
|
|
"step": 1146,
|
|
"valid_targets_mean": 15442.4,
|
|
"valid_targets_min": 4506
|
|
},
|
|
{
|
|
"epoch": 4.88272921108742,
|
|
"grad_norm": 0.06926892166019896,
|
|
"learning_rate": 7.424665218700444e-08,
|
|
"loss": 1.1038507223129272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29018303751945496,
|
|
"step": 1147,
|
|
"valid_targets_mean": 15503.6,
|
|
"valid_targets_min": 9628
|
|
},
|
|
{
|
|
"epoch": 4.886993603411514,
|
|
"grad_norm": 0.06315295442237721,
|
|
"learning_rate": 6.921738188814254e-08,
|
|
"loss": 1.0237013101577759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25076520442962646,
|
|
"step": 1148,
|
|
"valid_targets_mean": 15241.7,
|
|
"valid_targets_min": 7900
|
|
},
|
|
{
|
|
"epoch": 4.891257995735607,
|
|
"grad_norm": 0.06599875528952254,
|
|
"learning_rate": 6.436417690241614e-08,
|
|
"loss": 1.0230720043182373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2280958890914917,
|
|
"step": 1149,
|
|
"valid_targets_mean": 13944.6,
|
|
"valid_targets_min": 2002
|
|
},
|
|
{
|
|
"epoch": 4.895522388059701,
|
|
"grad_norm": 0.06474129841007441,
|
|
"learning_rate": 5.968708010225532e-08,
|
|
"loss": 1.0266101360321045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2811443507671356,
|
|
"step": 1150,
|
|
"valid_targets_mean": 15421.0,
|
|
"valid_targets_min": 10460
|
|
},
|
|
{
|
|
"epoch": 4.899786780383796,
|
|
"grad_norm": 0.06290320997335248,
|
|
"learning_rate": 5.518613280437901e-08,
|
|
"loss": 1.0010175704956055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23834742605686188,
|
|
"step": 1151,
|
|
"valid_targets_mean": 14993.0,
|
|
"valid_targets_min": 2074
|
|
},
|
|
{
|
|
"epoch": 4.904051172707889,
|
|
"grad_norm": 0.06706259129263993,
|
|
"learning_rate": 5.0861374769426433e-08,
|
|
"loss": 1.023618459701538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2484329491853714,
|
|
"step": 1152,
|
|
"valid_targets_mean": 14297.7,
|
|
"valid_targets_min": 4970
|
|
},
|
|
{
|
|
"epoch": 4.908315565031983,
|
|
"grad_norm": 0.06522941130543658,
|
|
"learning_rate": 4.671284420161071e-08,
|
|
"loss": 1.0196456909179688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24993275105953217,
|
|
"step": 1153,
|
|
"valid_targets_mean": 14473.3,
|
|
"valid_targets_min": 5515
|
|
},
|
|
{
|
|
"epoch": 4.912579957356077,
|
|
"grad_norm": 0.06823336910362307,
|
|
"learning_rate": 4.274057774838136e-08,
|
|
"loss": 1.0153276920318604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24666878581047058,
|
|
"step": 1154,
|
|
"valid_targets_mean": 14443.9,
|
|
"valid_targets_min": 1942
|
|
},
|
|
{
|
|
"epoch": 4.91684434968017,
|
|
"grad_norm": 0.06610513270076279,
|
|
"learning_rate": 3.894461050010012e-08,
|
|
"loss": 0.9828450679779053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21773172914981842,
|
|
"step": 1155,
|
|
"valid_targets_mean": 14033.9,
|
|
"valid_targets_min": 2029
|
|
},
|
|
{
|
|
"epoch": 4.9211087420042645,
|
|
"grad_norm": 0.06996793504950291,
|
|
"learning_rate": 3.5324975989725615e-08,
|
|
"loss": 1.0309505462646484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2472284585237503,
|
|
"step": 1156,
|
|
"valid_targets_mean": 14439.8,
|
|
"valid_targets_min": 1580
|
|
},
|
|
{
|
|
"epoch": 4.925373134328359,
|
|
"grad_norm": 0.06388795519447886,
|
|
"learning_rate": 3.188170619252473e-08,
|
|
"loss": 1.011868953704834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2571587562561035,
|
|
"step": 1157,
|
|
"valid_targets_mean": 15696.2,
|
|
"valid_targets_min": 11166
|
|
},
|
|
{
|
|
"epoch": 4.929637526652452,
|
|
"grad_norm": 0.07129646520897767,
|
|
"learning_rate": 2.8614831525786147e-08,
|
|
"loss": 1.0241844654083252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2564241290092468,
|
|
"step": 1158,
|
|
"valid_targets_mean": 15321.8,
|
|
"valid_targets_min": 9103
|
|
},
|
|
{
|
|
"epoch": 4.933901918976546,
|
|
"grad_norm": 0.0684903319385578,
|
|
"learning_rate": 2.552438084855613e-08,
|
|
"loss": 1.0295789241790771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2541089355945587,
|
|
"step": 1159,
|
|
"valid_targets_mean": 15070.4,
|
|
"valid_targets_min": 3439
|
|
},
|
|
{
|
|
"epoch": 4.938166311300639,
|
|
"grad_norm": 0.07131368949668086,
|
|
"learning_rate": 2.2610381461372068e-08,
|
|
"loss": 1.066670298576355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28374648094177246,
|
|
"step": 1160,
|
|
"valid_targets_mean": 15213.1,
|
|
"valid_targets_min": 7497
|
|
},
|
|
{
|
|
"epoch": 4.9424307036247335,
|
|
"grad_norm": 0.06726554839796847,
|
|
"learning_rate": 1.987285910603598e-08,
|
|
"loss": 1.0335086584091187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25023555755615234,
|
|
"step": 1161,
|
|
"valid_targets_mean": 14933.2,
|
|
"valid_targets_min": 4176
|
|
},
|
|
{
|
|
"epoch": 4.946695095948828,
|
|
"grad_norm": 0.07738661305412707,
|
|
"learning_rate": 1.7311837965379164e-08,
|
|
"loss": 1.0337916612625122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26403045654296875,
|
|
"step": 1162,
|
|
"valid_targets_mean": 15545.3,
|
|
"valid_targets_min": 7887
|
|
},
|
|
{
|
|
"epoch": 4.950959488272921,
|
|
"grad_norm": 0.06573063147229223,
|
|
"learning_rate": 1.4927340663046798e-08,
|
|
"loss": 1.0332417488098145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2633013129234314,
|
|
"step": 1163,
|
|
"valid_targets_mean": 14736.1,
|
|
"valid_targets_min": 2169
|
|
},
|
|
{
|
|
"epoch": 4.955223880597015,
|
|
"grad_norm": 0.0682202091918646,
|
|
"learning_rate": 1.2719388263300325e-08,
|
|
"loss": 1.0377440452575684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24617770314216614,
|
|
"step": 1164,
|
|
"valid_targets_mean": 14002.5,
|
|
"valid_targets_min": 2427
|
|
},
|
|
{
|
|
"epoch": 4.959488272921108,
|
|
"grad_norm": 0.06994186375652835,
|
|
"learning_rate": 1.0688000270839827e-08,
|
|
"loss": 1.0595359802246094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26381662487983704,
|
|
"step": 1165,
|
|
"valid_targets_mean": 14263.2,
|
|
"valid_targets_min": 1828
|
|
},
|
|
{
|
|
"epoch": 4.963752665245202,
|
|
"grad_norm": 0.0760962519951349,
|
|
"learning_rate": 8.833194630615271e-09,
|
|
"loss": 1.0510656833648682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26617372035980225,
|
|
"step": 1166,
|
|
"valid_targets_mean": 15018.2,
|
|
"valid_targets_min": 8113
|
|
},
|
|
{
|
|
"epoch": 4.968017057569297,
|
|
"grad_norm": 0.07119874327563927,
|
|
"learning_rate": 7.154987727682194e-09,
|
|
"loss": 1.029045581817627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25893861055374146,
|
|
"step": 1167,
|
|
"valid_targets_mean": 15168.5,
|
|
"valid_targets_min": 4208
|
|
},
|
|
{
|
|
"epoch": 4.97228144989339,
|
|
"grad_norm": 0.07054456507076376,
|
|
"learning_rate": 5.6533943870462625e-09,
|
|
"loss": 1.040916919708252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25821077823638916,
|
|
"step": 1168,
|
|
"valid_targets_mean": 15334.2,
|
|
"valid_targets_min": 3305
|
|
},
|
|
{
|
|
"epoch": 4.976545842217484,
|
|
"grad_norm": 0.07218125077908605,
|
|
"learning_rate": 4.328427873541152e-09,
|
|
"loss": 1.0736911296844482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2677837014198303,
|
|
"step": 1169,
|
|
"valid_targets_mean": 15248.6,
|
|
"valid_targets_min": 9453
|
|
},
|
|
{
|
|
"epoch": 4.980810234541578,
|
|
"grad_norm": 0.06630067150008963,
|
|
"learning_rate": 3.1800998917086432e-09,
|
|
"loss": 1.0300840139389038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26069217920303345,
|
|
"step": 1170,
|
|
"valid_targets_mean": 15299.4,
|
|
"valid_targets_min": 6430
|
|
},
|
|
{
|
|
"epoch": 4.985074626865671,
|
|
"grad_norm": 0.06887200852111346,
|
|
"learning_rate": 2.2084205856920393e-09,
|
|
"loss": 1.0282962322235107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24474342167377472,
|
|
"step": 1171,
|
|
"valid_targets_mean": 14832.7,
|
|
"valid_targets_min": 3234
|
|
},
|
|
{
|
|
"epoch": 4.9893390191897655,
|
|
"grad_norm": 0.06800509684188603,
|
|
"learning_rate": 1.4133985391473482e-09,
|
|
"loss": 1.0762146711349487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27158939838409424,
|
|
"step": 1172,
|
|
"valid_targets_mean": 14859.8,
|
|
"valid_targets_min": 4076
|
|
},
|
|
{
|
|
"epoch": 4.99360341151386,
|
|
"grad_norm": 0.06471716304235586,
|
|
"learning_rate": 7.950407751722288e-10,
|
|
"loss": 0.9734071493148804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2380818873643875,
|
|
"step": 1173,
|
|
"valid_targets_mean": 14930.9,
|
|
"valid_targets_min": 6932
|
|
},
|
|
{
|
|
"epoch": 4.997867803837953,
|
|
"grad_norm": 0.07038236636255867,
|
|
"learning_rate": 3.5335275624159835e-10,
|
|
"loss": 1.0113223791122437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2581731975078583,
|
|
"step": 1174,
|
|
"valid_targets_mean": 15478.7,
|
|
"valid_targets_min": 8933
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.0946850373732479,
|
|
"learning_rate": 8.833838415212014e-11,
|
|
"loss": 1.1067698001861572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49867311120033264,
|
|
"step": 1175,
|
|
"valid_targets_mean": 13716.8,
|
|
"valid_targets_min": 3353
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49867311120033264,
|
|
"step": 1175,
|
|
"total_flos": 1367557351931904.0,
|
|
"train_loss": 1.0911384893985505,
|
|
"train_runtime": 4706.648,
|
|
"train_samples_per_second": 31.87,
|
|
"train_steps_per_second": 0.25,
|
|
"valid_targets_mean": 13716.8,
|
|
"valid_targets_min": 3353
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 1175,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 5,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1367557351931904.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|