Files
sft__ot30k_Qwen3-1.7B-Base-…/trainer_state.json
ModelHub XC f3e1426944 初始化项目,由ModelHub XC社区提供模型
Model: open-sci/sft__ot30k_Qwen3-1.7B-Base-SFT-Tulu3-decontaminated
Source: Original Platform
2026-05-10 17:38:27 +08:00

12973 lines
375 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 1175,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0042643923240938165,
"grad_norm": 3.6095034261999874,
"learning_rate": 0.0,
"loss": 1.3665432929992676,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.35155701637268066,
"step": 1,
"valid_targets_mean": 14037.0,
"valid_targets_min": 2354
},
{
"epoch": 0.008528784648187633,
"grad_norm": 3.559692570865644,
"learning_rate": 3.3898305084745766e-07,
"loss": 1.3736059665679932,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3430137634277344,
"step": 2,
"valid_targets_mean": 14289.4,
"valid_targets_min": 3217
},
{
"epoch": 0.01279317697228145,
"grad_norm": 3.61021997519416,
"learning_rate": 6.779661016949153e-07,
"loss": 1.3179807662963867,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32869216799736023,
"step": 3,
"valid_targets_mean": 14767.9,
"valid_targets_min": 2376
},
{
"epoch": 0.017057569296375266,
"grad_norm": 3.624420248034226,
"learning_rate": 1.016949152542373e-06,
"loss": 1.37325918674469,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3541400134563446,
"step": 4,
"valid_targets_mean": 13822.7,
"valid_targets_min": 1341
},
{
"epoch": 0.021321961620469083,
"grad_norm": 3.530294809260869,
"learning_rate": 1.3559322033898307e-06,
"loss": 1.3412843942642212,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.33716025948524475,
"step": 5,
"valid_targets_mean": 14168.3,
"valid_targets_min": 4549
},
{
"epoch": 0.0255863539445629,
"grad_norm": 3.4703468458999343,
"learning_rate": 1.6949152542372882e-06,
"loss": 1.32222318649292,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.33370861411094666,
"step": 6,
"valid_targets_mean": 14643.4,
"valid_targets_min": 2062
},
{
"epoch": 0.029850746268656716,
"grad_norm": 3.2896475452822505,
"learning_rate": 2.033898305084746e-06,
"loss": 1.3199026584625244,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.33475369215011597,
"step": 7,
"valid_targets_mean": 14722.1,
"valid_targets_min": 5706
},
{
"epoch": 0.03411513859275053,
"grad_norm": 3.2575036864876643,
"learning_rate": 2.372881355932204e-06,
"loss": 1.3366254568099976,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.35241830348968506,
"step": 8,
"valid_targets_mean": 15226.8,
"valid_targets_min": 4690
},
{
"epoch": 0.03837953091684435,
"grad_norm": 2.7465656112914267,
"learning_rate": 2.7118644067796613e-06,
"loss": 1.3531262874603271,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3520984649658203,
"step": 9,
"valid_targets_mean": 15026.3,
"valid_targets_min": 11234
},
{
"epoch": 0.042643923240938165,
"grad_norm": 2.599760232807994,
"learning_rate": 3.0508474576271192e-06,
"loss": 1.3007540702819824,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3108808696269989,
"step": 10,
"valid_targets_mean": 14829.6,
"valid_targets_min": 2873
},
{
"epoch": 0.046908315565031986,
"grad_norm": 2.3165871951448813,
"learning_rate": 3.3898305084745763e-06,
"loss": 1.324140191078186,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.33954882621765137,
"step": 11,
"valid_targets_mean": 14368.6,
"valid_targets_min": 1932
},
{
"epoch": 0.0511727078891258,
"grad_norm": 2.0999943066707485,
"learning_rate": 3.7288135593220342e-06,
"loss": 1.2988896369934082,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.34070098400115967,
"step": 12,
"valid_targets_mean": 14815.2,
"valid_targets_min": 1409
},
{
"epoch": 0.05543710021321962,
"grad_norm": 2.0452655458107496,
"learning_rate": 4.067796610169492e-06,
"loss": 1.3134877681732178,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3563053607940674,
"step": 13,
"valid_targets_mean": 15171.1,
"valid_targets_min": 9739
},
{
"epoch": 0.05970149253731343,
"grad_norm": 1.8633852133385178,
"learning_rate": 4.40677966101695e-06,
"loss": 1.3092262744903564,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3214074671268463,
"step": 14,
"valid_targets_mean": 14483.7,
"valid_targets_min": 9364
},
{
"epoch": 0.06396588486140725,
"grad_norm": 1.643565378324802,
"learning_rate": 4.745762711864408e-06,
"loss": 1.3089406490325928,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3482273519039154,
"step": 15,
"valid_targets_mean": 15076.1,
"valid_targets_min": 3559
},
{
"epoch": 0.06823027718550106,
"grad_norm": 1.9389715014736066,
"learning_rate": 5.084745762711865e-06,
"loss": 1.2389326095581055,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29310089349746704,
"step": 16,
"valid_targets_mean": 15001.3,
"valid_targets_min": 6070
},
{
"epoch": 0.07249466950959488,
"grad_norm": 1.935317154593614,
"learning_rate": 5.423728813559323e-06,
"loss": 1.2891302108764648,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31626296043395996,
"step": 17,
"valid_targets_mean": 15154.1,
"valid_targets_min": 2936
},
{
"epoch": 0.0767590618336887,
"grad_norm": 1.6797105874067981,
"learning_rate": 5.7627118644067805e-06,
"loss": 1.3249022960662842,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3653038442134857,
"step": 18,
"valid_targets_mean": 15572.8,
"valid_targets_min": 11400
},
{
"epoch": 0.08102345415778252,
"grad_norm": 1.536225534974527,
"learning_rate": 6.1016949152542385e-06,
"loss": 1.2798724174499512,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3003271818161011,
"step": 19,
"valid_targets_mean": 13826.0,
"valid_targets_min": 1943
},
{
"epoch": 0.08528784648187633,
"grad_norm": 1.3559392185650754,
"learning_rate": 6.440677966101695e-06,
"loss": 1.3002123832702637,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3291531801223755,
"step": 20,
"valid_targets_mean": 14183.2,
"valid_targets_min": 1730
},
{
"epoch": 0.08955223880597014,
"grad_norm": 1.5279645356369358,
"learning_rate": 6.779661016949153e-06,
"loss": 1.2929916381835938,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3127654790878296,
"step": 21,
"valid_targets_mean": 14698.1,
"valid_targets_min": 2343
},
{
"epoch": 0.09381663113006397,
"grad_norm": 1.7988299073867975,
"learning_rate": 7.1186440677966106e-06,
"loss": 1.2641987800598145,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3098265528678894,
"step": 22,
"valid_targets_mean": 14052.0,
"valid_targets_min": 5016
},
{
"epoch": 0.09808102345415778,
"grad_norm": 1.6343366766286063,
"learning_rate": 7.4576271186440685e-06,
"loss": 1.2987055778503418,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3260132968425751,
"step": 23,
"valid_targets_mean": 15747.2,
"valid_targets_min": 7977
},
{
"epoch": 0.1023454157782516,
"grad_norm": 1.2620053993929847,
"learning_rate": 7.796610169491526e-06,
"loss": 1.2431552410125732,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3168746531009674,
"step": 24,
"valid_targets_mean": 15501.2,
"valid_targets_min": 8014
},
{
"epoch": 0.10660980810234541,
"grad_norm": 1.0528680343805368,
"learning_rate": 8.135593220338983e-06,
"loss": 1.238523006439209,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2770693302154541,
"step": 25,
"valid_targets_mean": 13349.6,
"valid_targets_min": 2049
},
{
"epoch": 0.11087420042643924,
"grad_norm": 1.0516962195407495,
"learning_rate": 8.47457627118644e-06,
"loss": 1.2010830640792847,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2899758219718933,
"step": 26,
"valid_targets_mean": 13670.7,
"valid_targets_min": 2377
},
{
"epoch": 0.11513859275053305,
"grad_norm": 0.7940073699194692,
"learning_rate": 8.8135593220339e-06,
"loss": 1.1597228050231934,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2890457510948181,
"step": 27,
"valid_targets_mean": 15432.1,
"valid_targets_min": 9230
},
{
"epoch": 0.11940298507462686,
"grad_norm": 0.8645868310726478,
"learning_rate": 9.152542372881356e-06,
"loss": 1.2551610469818115,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2977900505065918,
"step": 28,
"valid_targets_mean": 14637.2,
"valid_targets_min": 5532
},
{
"epoch": 0.12366737739872068,
"grad_norm": 0.8359560136170403,
"learning_rate": 9.491525423728815e-06,
"loss": 1.1766114234924316,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.303621768951416,
"step": 29,
"valid_targets_mean": 14625.5,
"valid_targets_min": 3306
},
{
"epoch": 0.1279317697228145,
"grad_norm": 0.7943946669901569,
"learning_rate": 9.830508474576272e-06,
"loss": 1.2415480613708496,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3172162175178528,
"step": 30,
"valid_targets_mean": 15120.5,
"valid_targets_min": 7362
},
{
"epoch": 0.13219616204690832,
"grad_norm": 0.7207217500352064,
"learning_rate": 1.016949152542373e-05,
"loss": 1.1535303592681885,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28830111026763916,
"step": 31,
"valid_targets_mean": 14608.3,
"valid_targets_min": 1802
},
{
"epoch": 0.13646055437100213,
"grad_norm": 0.759995540842057,
"learning_rate": 1.0508474576271188e-05,
"loss": 1.1601849794387817,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28582918643951416,
"step": 32,
"valid_targets_mean": 14868.9,
"valid_targets_min": 2381
},
{
"epoch": 0.14072494669509594,
"grad_norm": 0.6830737276669886,
"learning_rate": 1.0847457627118645e-05,
"loss": 1.2404439449310303,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31198614835739136,
"step": 33,
"valid_targets_mean": 15542.3,
"valid_targets_min": 9239
},
{
"epoch": 0.14498933901918976,
"grad_norm": 0.5929314585486694,
"learning_rate": 1.1186440677966102e-05,
"loss": 1.1997580528259277,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.314759224653244,
"step": 34,
"valid_targets_mean": 15298.9,
"valid_targets_min": 10829
},
{
"epoch": 0.14925373134328357,
"grad_norm": 0.6925944424841863,
"learning_rate": 1.1525423728813561e-05,
"loss": 1.1844682693481445,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30469638109207153,
"step": 35,
"valid_targets_mean": 15101.9,
"valid_targets_min": 4685
},
{
"epoch": 0.1535181236673774,
"grad_norm": 0.5763489262005752,
"learning_rate": 1.1864406779661018e-05,
"loss": 1.1813849210739136,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30967646837234497,
"step": 36,
"valid_targets_mean": 15434.2,
"valid_targets_min": 8304
},
{
"epoch": 0.15778251599147122,
"grad_norm": 0.5005241806924791,
"learning_rate": 1.2203389830508477e-05,
"loss": 1.1754765510559082,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31492385268211365,
"step": 37,
"valid_targets_mean": 15374.5,
"valid_targets_min": 4818
},
{
"epoch": 0.16204690831556504,
"grad_norm": 0.5718581392632069,
"learning_rate": 1.2542372881355932e-05,
"loss": 1.1840746402740479,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30204474925994873,
"step": 38,
"valid_targets_mean": 15148.5,
"valid_targets_min": 7056
},
{
"epoch": 0.16631130063965885,
"grad_norm": 0.5334830486510865,
"learning_rate": 1.288135593220339e-05,
"loss": 1.2005257606506348,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29177170991897583,
"step": 39,
"valid_targets_mean": 14903.9,
"valid_targets_min": 4758
},
{
"epoch": 0.17057569296375266,
"grad_norm": 0.5072131399300707,
"learning_rate": 1.3220338983050848e-05,
"loss": 1.1401925086975098,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30095767974853516,
"step": 40,
"valid_targets_mean": 14631.2,
"valid_targets_min": 4776
},
{
"epoch": 0.17484008528784648,
"grad_norm": 0.4722470199708251,
"learning_rate": 1.3559322033898305e-05,
"loss": 1.1670279502868652,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29240870475769043,
"step": 41,
"valid_targets_mean": 14968.9,
"valid_targets_min": 6853
},
{
"epoch": 0.1791044776119403,
"grad_norm": 0.5434354066399931,
"learning_rate": 1.3898305084745764e-05,
"loss": 1.130545973777771,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2652777433395386,
"step": 42,
"valid_targets_mean": 14805.2,
"valid_targets_min": 7412
},
{
"epoch": 0.18336886993603413,
"grad_norm": 0.5058616408220706,
"learning_rate": 1.4237288135593221e-05,
"loss": 1.1756665706634521,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28690627217292786,
"step": 43,
"valid_targets_mean": 14401.4,
"valid_targets_min": 2709
},
{
"epoch": 0.18763326226012794,
"grad_norm": 0.43897750797538987,
"learning_rate": 1.4576271186440678e-05,
"loss": 1.1239181756973267,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2766701579093933,
"step": 44,
"valid_targets_mean": 14983.8,
"valid_targets_min": 1264
},
{
"epoch": 0.19189765458422176,
"grad_norm": 0.43365275567782985,
"learning_rate": 1.4915254237288137e-05,
"loss": 1.1461116075515747,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2780134081840515,
"step": 45,
"valid_targets_mean": 14252.4,
"valid_targets_min": 1572
},
{
"epoch": 0.19616204690831557,
"grad_norm": 0.4512989265238024,
"learning_rate": 1.5254237288135594e-05,
"loss": 1.1434454917907715,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2802661955356598,
"step": 46,
"valid_targets_mean": 14348.7,
"valid_targets_min": 3473
},
{
"epoch": 0.20042643923240938,
"grad_norm": 0.4159187399960977,
"learning_rate": 1.5593220338983053e-05,
"loss": 1.1971436738967896,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31074339151382446,
"step": 47,
"valid_targets_mean": 14754.3,
"valid_targets_min": 2628
},
{
"epoch": 0.2046908315565032,
"grad_norm": 0.4287002324444236,
"learning_rate": 1.593220338983051e-05,
"loss": 1.1085567474365234,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.292905330657959,
"step": 48,
"valid_targets_mean": 15185.8,
"valid_targets_min": 5859
},
{
"epoch": 0.208955223880597,
"grad_norm": 0.40116357705924865,
"learning_rate": 1.6271186440677967e-05,
"loss": 1.1042619943618774,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2574828863143921,
"step": 49,
"valid_targets_mean": 14226.5,
"valid_targets_min": 3280
},
{
"epoch": 0.21321961620469082,
"grad_norm": 0.43044245903683137,
"learning_rate": 1.6610169491525424e-05,
"loss": 1.1156997680664062,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28450608253479004,
"step": 50,
"valid_targets_mean": 14587.2,
"valid_targets_min": 2470
},
{
"epoch": 0.21748400852878466,
"grad_norm": 0.40451054894986177,
"learning_rate": 1.694915254237288e-05,
"loss": 1.137367844581604,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2972930073738098,
"step": 51,
"valid_targets_mean": 15135.2,
"valid_targets_min": 2341
},
{
"epoch": 0.22174840085287847,
"grad_norm": 0.40087462915441974,
"learning_rate": 1.728813559322034e-05,
"loss": 1.105331301689148,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29150766134262085,
"step": 52,
"valid_targets_mean": 14628.2,
"valid_targets_min": 3150
},
{
"epoch": 0.2260127931769723,
"grad_norm": 0.3661850552460705,
"learning_rate": 1.76271186440678e-05,
"loss": 1.0930171012878418,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3009355068206787,
"step": 53,
"valid_targets_mean": 14552.2,
"valid_targets_min": 6412
},
{
"epoch": 0.2302771855010661,
"grad_norm": 0.3979272702937718,
"learning_rate": 1.7966101694915256e-05,
"loss": 1.1302450895309448,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2833348512649536,
"step": 54,
"valid_targets_mean": 15306.2,
"valid_targets_min": 9939
},
{
"epoch": 0.2345415778251599,
"grad_norm": 0.3884881406972628,
"learning_rate": 1.8305084745762713e-05,
"loss": 1.152329921722412,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29095304012298584,
"step": 55,
"valid_targets_mean": 14843.6,
"valid_targets_min": 6321
},
{
"epoch": 0.23880597014925373,
"grad_norm": 0.37487295806975274,
"learning_rate": 1.864406779661017e-05,
"loss": 1.1638445854187012,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2842888832092285,
"step": 56,
"valid_targets_mean": 13096.1,
"valid_targets_min": 1559
},
{
"epoch": 0.24307036247334754,
"grad_norm": 0.3423858598949549,
"learning_rate": 1.898305084745763e-05,
"loss": 1.1405789852142334,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28892263770103455,
"step": 57,
"valid_targets_mean": 13918.6,
"valid_targets_min": 4167
},
{
"epoch": 0.24733475479744135,
"grad_norm": 0.4222702129525128,
"learning_rate": 1.9322033898305087e-05,
"loss": 1.1632418632507324,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3402993977069855,
"step": 58,
"valid_targets_mean": 14827.4,
"valid_targets_min": 5551
},
{
"epoch": 0.2515991471215352,
"grad_norm": 0.3657817117369871,
"learning_rate": 1.9661016949152545e-05,
"loss": 1.1034752130508423,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28796225786209106,
"step": 59,
"valid_targets_mean": 15212.9,
"valid_targets_min": 6383
},
{
"epoch": 0.255863539445629,
"grad_norm": 0.4055278504822321,
"learning_rate": 2e-05,
"loss": 1.1177839040756226,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26372405886650085,
"step": 60,
"valid_targets_mean": 13981.9,
"valid_targets_min": 2976
},
{
"epoch": 0.2601279317697228,
"grad_norm": 0.40228392304363,
"learning_rate": 2.033898305084746e-05,
"loss": 1.1403781175613403,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29810065031051636,
"step": 61,
"valid_targets_mean": 14926.7,
"valid_targets_min": 1673
},
{
"epoch": 0.26439232409381663,
"grad_norm": 0.3679776772252244,
"learning_rate": 2.0677966101694916e-05,
"loss": 1.1196714639663696,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2828063666820526,
"step": 62,
"valid_targets_mean": 14801.3,
"valid_targets_min": 2559
},
{
"epoch": 0.26865671641791045,
"grad_norm": 0.40777945519207404,
"learning_rate": 2.1016949152542376e-05,
"loss": 1.0709521770477295,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25932246446609497,
"step": 63,
"valid_targets_mean": 14049.6,
"valid_targets_min": 3022
},
{
"epoch": 0.27292110874200426,
"grad_norm": 0.3727146822040382,
"learning_rate": 2.1355932203389833e-05,
"loss": 1.1754413843154907,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28779110312461853,
"step": 64,
"valid_targets_mean": 14548.8,
"valid_targets_min": 3780
},
{
"epoch": 0.2771855010660981,
"grad_norm": 0.4366871373370923,
"learning_rate": 2.169491525423729e-05,
"loss": 1.1435012817382812,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30721503496170044,
"step": 65,
"valid_targets_mean": 14906.1,
"valid_targets_min": 3792
},
{
"epoch": 0.2814498933901919,
"grad_norm": 0.4279774688028308,
"learning_rate": 2.2033898305084748e-05,
"loss": 1.1329200267791748,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29479724168777466,
"step": 66,
"valid_targets_mean": 14316.4,
"valid_targets_min": 4259
},
{
"epoch": 0.2857142857142857,
"grad_norm": 0.400678735476192,
"learning_rate": 2.2372881355932205e-05,
"loss": 1.1038789749145508,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2855043411254883,
"step": 67,
"valid_targets_mean": 14869.5,
"valid_targets_min": 1498
},
{
"epoch": 0.2899786780383795,
"grad_norm": 0.39497795610151304,
"learning_rate": 2.2711864406779665e-05,
"loss": 1.1426966190338135,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30167877674102783,
"step": 68,
"valid_targets_mean": 14551.2,
"valid_targets_min": 4821
},
{
"epoch": 0.2942430703624733,
"grad_norm": 0.43715196243542226,
"learning_rate": 2.3050847457627122e-05,
"loss": 1.1619558334350586,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2610081434249878,
"step": 69,
"valid_targets_mean": 15006.9,
"valid_targets_min": 4126
},
{
"epoch": 0.29850746268656714,
"grad_norm": 0.42497754165786217,
"learning_rate": 2.338983050847458e-05,
"loss": 1.1545188426971436,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3084549307823181,
"step": 70,
"valid_targets_mean": 14557.5,
"valid_targets_min": 2311
},
{
"epoch": 0.302771855010661,
"grad_norm": 0.39300490131269866,
"learning_rate": 2.3728813559322036e-05,
"loss": 1.0535039901733398,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.256248414516449,
"step": 71,
"valid_targets_mean": 14938.7,
"valid_targets_min": 5689
},
{
"epoch": 0.3070362473347548,
"grad_norm": 0.4460473113733746,
"learning_rate": 2.406779661016949e-05,
"loss": 1.1028974056243896,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27757105231285095,
"step": 72,
"valid_targets_mean": 14522.3,
"valid_targets_min": 2727
},
{
"epoch": 0.31130063965884863,
"grad_norm": 0.37242828127501076,
"learning_rate": 2.4406779661016954e-05,
"loss": 1.112041711807251,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2917976677417755,
"step": 73,
"valid_targets_mean": 15216.2,
"valid_targets_min": 6987
},
{
"epoch": 0.31556503198294245,
"grad_norm": 0.4481446274782485,
"learning_rate": 2.474576271186441e-05,
"loss": 1.1347367763519287,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2750678062438965,
"step": 74,
"valid_targets_mean": 15336.4,
"valid_targets_min": 7965
},
{
"epoch": 0.31982942430703626,
"grad_norm": 0.43733581410184924,
"learning_rate": 2.5084745762711865e-05,
"loss": 1.1315557956695557,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28009670972824097,
"step": 75,
"valid_targets_mean": 14431.6,
"valid_targets_min": 3089
},
{
"epoch": 0.32409381663113007,
"grad_norm": 0.3954285378329976,
"learning_rate": 2.5423728813559322e-05,
"loss": 1.1246960163116455,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2804466784000397,
"step": 76,
"valid_targets_mean": 14309.1,
"valid_targets_min": 3138
},
{
"epoch": 0.3283582089552239,
"grad_norm": 0.4779571096323128,
"learning_rate": 2.576271186440678e-05,
"loss": 1.0751593112945557,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2525976300239563,
"step": 77,
"valid_targets_mean": 13998.6,
"valid_targets_min": 2602
},
{
"epoch": 0.3326226012793177,
"grad_norm": 0.44585135554412114,
"learning_rate": 2.610169491525424e-05,
"loss": 1.1375093460083008,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28445789217948914,
"step": 78,
"valid_targets_mean": 14369.1,
"valid_targets_min": 4167
},
{
"epoch": 0.3368869936034115,
"grad_norm": 0.470417374465623,
"learning_rate": 2.6440677966101696e-05,
"loss": 1.0897612571716309,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26500076055526733,
"step": 79,
"valid_targets_mean": 14101.2,
"valid_targets_min": 4432
},
{
"epoch": 0.3411513859275053,
"grad_norm": 0.6068780185494126,
"learning_rate": 2.6779661016949153e-05,
"loss": 1.0993152856826782,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24612003564834595,
"step": 80,
"valid_targets_mean": 13424.2,
"valid_targets_min": 1895
},
{
"epoch": 0.34541577825159914,
"grad_norm": 0.45799024316399983,
"learning_rate": 2.711864406779661e-05,
"loss": 1.1197320222854614,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2934653162956238,
"step": 81,
"valid_targets_mean": 14789.6,
"valid_targets_min": 4851
},
{
"epoch": 0.34968017057569295,
"grad_norm": 0.5782558776257897,
"learning_rate": 2.7457627118644068e-05,
"loss": 1.0771827697753906,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27633094787597656,
"step": 82,
"valid_targets_mean": 14809.2,
"valid_targets_min": 7600
},
{
"epoch": 0.35394456289978676,
"grad_norm": 0.4390423118562453,
"learning_rate": 2.7796610169491528e-05,
"loss": 1.0495096445083618,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25998854637145996,
"step": 83,
"valid_targets_mean": 14823.2,
"valid_targets_min": 3855
},
{
"epoch": 0.3582089552238806,
"grad_norm": 0.4477868865873094,
"learning_rate": 2.8135593220338985e-05,
"loss": 1.125932216644287,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2996492385864258,
"step": 84,
"valid_targets_mean": 14658.2,
"valid_targets_min": 2691
},
{
"epoch": 0.3624733475479744,
"grad_norm": 0.5435108771538101,
"learning_rate": 2.8474576271186442e-05,
"loss": 1.099273681640625,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2577955722808838,
"step": 85,
"valid_targets_mean": 13998.5,
"valid_targets_min": 714
},
{
"epoch": 0.36673773987206826,
"grad_norm": 0.501630159169242,
"learning_rate": 2.88135593220339e-05,
"loss": 1.0962635278701782,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26727327704429626,
"step": 86,
"valid_targets_mean": 14302.5,
"valid_targets_min": 2859
},
{
"epoch": 0.37100213219616207,
"grad_norm": 0.711049212898963,
"learning_rate": 2.9152542372881356e-05,
"loss": 1.089374303817749,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26695096492767334,
"step": 87,
"valid_targets_mean": 14668.7,
"valid_targets_min": 4710
},
{
"epoch": 0.3752665245202559,
"grad_norm": 0.7892843671973578,
"learning_rate": 2.9491525423728817e-05,
"loss": 1.1430811882019043,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3085484504699707,
"step": 88,
"valid_targets_mean": 14349.1,
"valid_targets_min": 4164
},
{
"epoch": 0.3795309168443497,
"grad_norm": 0.5183689462744986,
"learning_rate": 2.9830508474576274e-05,
"loss": 1.0902841091156006,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2737078070640564,
"step": 89,
"valid_targets_mean": 13790.9,
"valid_targets_min": 1776
},
{
"epoch": 0.3837953091684435,
"grad_norm": 0.7646312445763587,
"learning_rate": 3.016949152542373e-05,
"loss": 1.1340059041976929,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2919940650463104,
"step": 90,
"valid_targets_mean": 14075.8,
"valid_targets_min": 4288
},
{
"epoch": 0.3880597014925373,
"grad_norm": 0.64752048623271,
"learning_rate": 3.0508474576271188e-05,
"loss": 1.1458442211151123,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2805406153202057,
"step": 91,
"valid_targets_mean": 14182.6,
"valid_targets_min": 1376
},
{
"epoch": 0.39232409381663114,
"grad_norm": 0.5454429003352456,
"learning_rate": 3.084745762711865e-05,
"loss": 1.048313856124878,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26023590564727783,
"step": 92,
"valid_targets_mean": 14727.0,
"valid_targets_min": 8468
},
{
"epoch": 0.39658848614072495,
"grad_norm": 0.5637206883804008,
"learning_rate": 3.1186440677966106e-05,
"loss": 1.1587541103363037,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.287053644657135,
"step": 93,
"valid_targets_mean": 14595.0,
"valid_targets_min": 5134
},
{
"epoch": 0.40085287846481876,
"grad_norm": 0.5811785866053178,
"learning_rate": 3.152542372881356e-05,
"loss": 1.0834836959838867,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2870972752571106,
"step": 94,
"valid_targets_mean": 14092.9,
"valid_targets_min": 1414
},
{
"epoch": 0.4051172707889126,
"grad_norm": 0.553967312244566,
"learning_rate": 3.186440677966102e-05,
"loss": 1.0956907272338867,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2961192727088928,
"step": 95,
"valid_targets_mean": 14336.4,
"valid_targets_min": 7342
},
{
"epoch": 0.4093816631130064,
"grad_norm": 0.5781669241689168,
"learning_rate": 3.2203389830508473e-05,
"loss": 1.0783826112747192,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2741866707801819,
"step": 96,
"valid_targets_mean": 14126.5,
"valid_targets_min": 7041
},
{
"epoch": 0.4136460554371002,
"grad_norm": 0.6498875686597626,
"learning_rate": 3.2542372881355934e-05,
"loss": 1.0799309015274048,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28361836075782776,
"step": 97,
"valid_targets_mean": 15288.1,
"valid_targets_min": 3032
},
{
"epoch": 0.417910447761194,
"grad_norm": 0.652805962867391,
"learning_rate": 3.2881355932203394e-05,
"loss": 1.0566236972808838,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2684245705604553,
"step": 98,
"valid_targets_mean": 15102.7,
"valid_targets_min": 7890
},
{
"epoch": 0.42217484008528783,
"grad_norm": 0.7484133821403403,
"learning_rate": 3.322033898305085e-05,
"loss": 1.0985007286071777,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.292073130607605,
"step": 99,
"valid_targets_mean": 15020.4,
"valid_targets_min": 2603
},
{
"epoch": 0.42643923240938164,
"grad_norm": 0.8870600868691184,
"learning_rate": 3.355932203389831e-05,
"loss": 1.083620548248291,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26947855949401855,
"step": 100,
"valid_targets_mean": 14664.2,
"valid_targets_min": 6535
},
{
"epoch": 0.43070362473347545,
"grad_norm": 0.9948262878402319,
"learning_rate": 3.389830508474576e-05,
"loss": 1.104163646697998,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27336013317108154,
"step": 101,
"valid_targets_mean": 15139.3,
"valid_targets_min": 7516
},
{
"epoch": 0.4349680170575693,
"grad_norm": 1.2094358996328345,
"learning_rate": 3.423728813559322e-05,
"loss": 1.1375088691711426,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28912967443466187,
"step": 102,
"valid_targets_mean": 15199.2,
"valid_targets_min": 5997
},
{
"epoch": 0.43923240938166314,
"grad_norm": 0.5917111610802872,
"learning_rate": 3.457627118644068e-05,
"loss": 1.106834888458252,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28450143337249756,
"step": 103,
"valid_targets_mean": 15446.5,
"valid_targets_min": 7746
},
{
"epoch": 0.44349680170575695,
"grad_norm": 0.8378893547218664,
"learning_rate": 3.491525423728814e-05,
"loss": 1.0580928325653076,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2566010653972626,
"step": 104,
"valid_targets_mean": 13467.9,
"valid_targets_min": 1542
},
{
"epoch": 0.44776119402985076,
"grad_norm": 0.9584890988278837,
"learning_rate": 3.52542372881356e-05,
"loss": 1.0532739162445068,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2541162073612213,
"step": 105,
"valid_targets_mean": 15373.5,
"valid_targets_min": 7168
},
{
"epoch": 0.4520255863539446,
"grad_norm": 0.76420718622364,
"learning_rate": 3.559322033898305e-05,
"loss": 1.0518145561218262,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2692555785179138,
"step": 106,
"valid_targets_mean": 15254.9,
"valid_targets_min": 7796
},
{
"epoch": 0.4562899786780384,
"grad_norm": 0.7166791138340219,
"learning_rate": 3.593220338983051e-05,
"loss": 1.0239077806472778,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24565693736076355,
"step": 107,
"valid_targets_mean": 15089.0,
"valid_targets_min": 2795
},
{
"epoch": 0.4605543710021322,
"grad_norm": 0.848653700045334,
"learning_rate": 3.627118644067797e-05,
"loss": 1.121671199798584,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2981266379356384,
"step": 108,
"valid_targets_mean": 14925.8,
"valid_targets_min": 2210
},
{
"epoch": 0.464818763326226,
"grad_norm": 0.8008103618988017,
"learning_rate": 3.6610169491525426e-05,
"loss": 1.1195881366729736,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27601322531700134,
"step": 109,
"valid_targets_mean": 14718.9,
"valid_targets_min": 3165
},
{
"epoch": 0.4690831556503198,
"grad_norm": 0.5038896313627277,
"learning_rate": 3.6949152542372886e-05,
"loss": 1.0606300830841064,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2645370066165924,
"step": 110,
"valid_targets_mean": 13860.9,
"valid_targets_min": 1750
},
{
"epoch": 0.47334754797441364,
"grad_norm": 0.6981474544248948,
"learning_rate": 3.728813559322034e-05,
"loss": 1.077075481414795,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2845897078514099,
"step": 111,
"valid_targets_mean": 15522.3,
"valid_targets_min": 8644
},
{
"epoch": 0.47761194029850745,
"grad_norm": 0.7881255811353257,
"learning_rate": 3.76271186440678e-05,
"loss": 1.1480183601379395,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28835219144821167,
"step": 112,
"valid_targets_mean": 15039.8,
"valid_targets_min": 3059
},
{
"epoch": 0.48187633262260127,
"grad_norm": 0.8593954262553742,
"learning_rate": 3.796610169491526e-05,
"loss": 1.1095085144042969,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3078654408454895,
"step": 113,
"valid_targets_mean": 14650.3,
"valid_targets_min": 4726
},
{
"epoch": 0.4861407249466951,
"grad_norm": 0.6675081661635298,
"learning_rate": 3.8305084745762714e-05,
"loss": 1.121692419052124,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26774919033050537,
"step": 114,
"valid_targets_mean": 14586.5,
"valid_targets_min": 3354
},
{
"epoch": 0.4904051172707889,
"grad_norm": 0.4926421652997165,
"learning_rate": 3.8644067796610175e-05,
"loss": 1.062641978263855,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2730582356452942,
"step": 115,
"valid_targets_mean": 14931.8,
"valid_targets_min": 2234
},
{
"epoch": 0.4946695095948827,
"grad_norm": 0.739902860395688,
"learning_rate": 3.898305084745763e-05,
"loss": 1.091822624206543,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2674185037612915,
"step": 116,
"valid_targets_mean": 14651.6,
"valid_targets_min": 4922
},
{
"epoch": 0.4989339019189765,
"grad_norm": 0.8457422864663297,
"learning_rate": 3.932203389830509e-05,
"loss": 1.042148470878601,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2632845938205719,
"step": 117,
"valid_targets_mean": 15044.4,
"valid_targets_min": 1486
},
{
"epoch": 0.5031982942430704,
"grad_norm": 0.8923134861757134,
"learning_rate": 3.966101694915255e-05,
"loss": 1.0997896194458008,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25651633739471436,
"step": 118,
"valid_targets_mean": 13800.0,
"valid_targets_min": 1422
},
{
"epoch": 0.5074626865671642,
"grad_norm": 0.7633195427196144,
"learning_rate": 4e-05,
"loss": 1.087050199508667,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26247042417526245,
"step": 119,
"valid_targets_mean": 14306.9,
"valid_targets_min": 3745
},
{
"epoch": 0.511727078891258,
"grad_norm": 0.5565450965383134,
"learning_rate": 3.999991166161585e-05,
"loss": 1.1498842239379883,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2769172191619873,
"step": 120,
"valid_targets_mean": 14492.4,
"valid_targets_min": 1617
},
{
"epoch": 0.5159914712153518,
"grad_norm": 0.7677755911311013,
"learning_rate": 3.999964664724376e-05,
"loss": 1.090078592300415,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25574517250061035,
"step": 121,
"valid_targets_mean": 14547.6,
"valid_targets_min": 2173
},
{
"epoch": 0.5202558635394456,
"grad_norm": 0.8690168799669488,
"learning_rate": 3.999920495922483e-05,
"loss": 1.0715370178222656,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2652612030506134,
"step": 122,
"valid_targets_mean": 14360.5,
"valid_targets_min": 5181
},
{
"epoch": 0.5245202558635395,
"grad_norm": 0.8105117911085009,
"learning_rate": 3.999858660146085e-05,
"loss": 1.084350824356079,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26827168464660645,
"step": 123,
"valid_targets_mean": 15934.4,
"valid_targets_min": 11306
},
{
"epoch": 0.5287846481876333,
"grad_norm": 0.5485361451611932,
"learning_rate": 3.999779157941431e-05,
"loss": 1.070378303527832,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2750994861125946,
"step": 124,
"valid_targets_mean": 15529.0,
"valid_targets_min": 7890
},
{
"epoch": 0.5330490405117271,
"grad_norm": 0.7358670167267896,
"learning_rate": 3.99968199001083e-05,
"loss": 1.0922883749008179,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2585696578025818,
"step": 125,
"valid_targets_mean": 14589.0,
"valid_targets_min": 3231
},
{
"epoch": 0.5373134328358209,
"grad_norm": 0.797597736750579,
"learning_rate": 3.999567157212646e-05,
"loss": 1.043330192565918,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27747562527656555,
"step": 126,
"valid_targets_mean": 15039.9,
"valid_targets_min": 2574
},
{
"epoch": 0.5415778251599147,
"grad_norm": 0.8052083330157771,
"learning_rate": 3.9994346605612955e-05,
"loss": 1.0537865161895752,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2740834355354309,
"step": 127,
"valid_targets_mean": 15032.1,
"valid_targets_min": 3330
},
{
"epoch": 0.5458422174840085,
"grad_norm": 0.7059667190927159,
"learning_rate": 3.999284501227232e-05,
"loss": 1.0584338903427124,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25231048464775085,
"step": 128,
"valid_targets_mean": 14215.7,
"valid_targets_min": 2195
},
{
"epoch": 0.5501066098081023,
"grad_norm": 0.5789841548740489,
"learning_rate": 3.9991166805369393e-05,
"loss": 1.1101102828979492,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28956037759780884,
"step": 129,
"valid_targets_mean": 14395.0,
"valid_targets_min": 2490
},
{
"epoch": 0.5543710021321961,
"grad_norm": 0.6010665822792227,
"learning_rate": 3.9989311999729166e-05,
"loss": 1.1048550605773926,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2960030138492584,
"step": 130,
"valid_targets_mean": 14569.9,
"valid_targets_min": 5811
},
{
"epoch": 0.55863539445629,
"grad_norm": 0.6569701409702448,
"learning_rate": 3.99872806117367e-05,
"loss": 1.0684092044830322,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2546845078468323,
"step": 131,
"valid_targets_mean": 13998.8,
"valid_targets_min": 2805
},
{
"epoch": 0.5628997867803838,
"grad_norm": 0.5314946187145101,
"learning_rate": 3.998507265933696e-05,
"loss": 1.06695556640625,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2615054249763489,
"step": 132,
"valid_targets_mean": 14621.0,
"valid_targets_min": 3960
},
{
"epoch": 0.5671641791044776,
"grad_norm": 0.4863285223090466,
"learning_rate": 3.9982688162034624e-05,
"loss": 1.1031931638717651,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27331802248954773,
"step": 133,
"valid_targets_mean": 14888.8,
"valid_targets_min": 2856
},
{
"epoch": 0.5714285714285714,
"grad_norm": 0.6707730371726137,
"learning_rate": 3.998012714089397e-05,
"loss": 1.1016449928283691,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27774661779403687,
"step": 134,
"valid_targets_mean": 14446.8,
"valid_targets_min": 4971
},
{
"epoch": 0.5756929637526652,
"grad_norm": 0.5788621605217005,
"learning_rate": 3.997738961853863e-05,
"loss": 1.0966145992279053,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28476226329803467,
"step": 135,
"valid_targets_mean": 14908.9,
"valid_targets_min": 2543
},
{
"epoch": 0.579957356076759,
"grad_norm": 0.5263939820561895,
"learning_rate": 3.9974475619151445e-05,
"loss": 1.055633783340454,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25982269644737244,
"step": 136,
"valid_targets_mean": 15042.8,
"valid_targets_min": 5253
},
{
"epoch": 0.5842217484008528,
"grad_norm": 0.6148184853109188,
"learning_rate": 3.997138516847422e-05,
"loss": 1.036048412322998,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27854201197624207,
"step": 137,
"valid_targets_mean": 15314.4,
"valid_targets_min": 6815
},
{
"epoch": 0.5884861407249466,
"grad_norm": 0.5491786862115935,
"learning_rate": 3.9968118293807476e-05,
"loss": 1.1014585494995117,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2563304305076599,
"step": 138,
"valid_targets_mean": 14211.9,
"valid_targets_min": 1760
},
{
"epoch": 0.5927505330490405,
"grad_norm": 0.5855027604643234,
"learning_rate": 3.996467502401028e-05,
"loss": 1.0730267763137817,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26775574684143066,
"step": 139,
"valid_targets_mean": 15098.0,
"valid_targets_min": 2956
},
{
"epoch": 0.5970149253731343,
"grad_norm": 0.5642037245742461,
"learning_rate": 3.9961055389499904e-05,
"loss": 1.0382061004638672,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2667868137359619,
"step": 140,
"valid_targets_mean": 14864.3,
"valid_targets_min": 6004
},
{
"epoch": 0.6012793176972282,
"grad_norm": 0.5472681549512222,
"learning_rate": 3.995725942225162e-05,
"loss": 1.0722460746765137,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2508922219276428,
"step": 141,
"valid_targets_mean": 13533.8,
"valid_targets_min": 2100
},
{
"epoch": 0.605543710021322,
"grad_norm": 0.4851680900294786,
"learning_rate": 3.995328715579839e-05,
"loss": 1.0785164833068848,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27565503120422363,
"step": 142,
"valid_targets_mean": 15253.0,
"valid_targets_min": 7270
},
{
"epoch": 0.6098081023454158,
"grad_norm": 0.5680145964228549,
"learning_rate": 3.994913862523058e-05,
"loss": 1.0484199523925781,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2863396108150482,
"step": 143,
"valid_targets_mean": 15135.0,
"valid_targets_min": 2961
},
{
"epoch": 0.6140724946695096,
"grad_norm": 0.4764345561789664,
"learning_rate": 3.9944813867195624e-05,
"loss": 1.0812712907791138,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22366584837436676,
"step": 144,
"valid_targets_mean": 12900.4,
"valid_targets_min": 976
},
{
"epoch": 0.6183368869936035,
"grad_norm": 0.5015035827929697,
"learning_rate": 3.9940312919897744e-05,
"loss": 1.1256424188613892,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30476629734039307,
"step": 145,
"valid_targets_mean": 15631.7,
"valid_targets_min": 9492
},
{
"epoch": 0.6226012793176973,
"grad_norm": 0.47020999167538946,
"learning_rate": 3.993563582309759e-05,
"loss": 1.050999402999878,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2459922432899475,
"step": 146,
"valid_targets_mean": 15147.3,
"valid_targets_min": 4251
},
{
"epoch": 0.6268656716417911,
"grad_norm": 0.5699061540685109,
"learning_rate": 3.993078261811186e-05,
"loss": 1.0804365873336792,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27163904905319214,
"step": 147,
"valid_targets_mean": 15246.7,
"valid_targets_min": 1331
},
{
"epoch": 0.6311300639658849,
"grad_norm": 0.5584228247861476,
"learning_rate": 3.9925753347813e-05,
"loss": 1.130464792251587,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2737225890159607,
"step": 148,
"valid_targets_mean": 14379.8,
"valid_targets_min": 2708
},
{
"epoch": 0.6353944562899787,
"grad_norm": 0.47830315667039336,
"learning_rate": 3.992054805662876e-05,
"loss": 1.1157536506652832,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27968811988830566,
"step": 149,
"valid_targets_mean": 14061.5,
"valid_targets_min": 2210
},
{
"epoch": 0.6396588486140725,
"grad_norm": 0.5499285487136879,
"learning_rate": 3.991516679054185e-05,
"loss": 1.0605140924453735,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2635878324508667,
"step": 150,
"valid_targets_mean": 14323.0,
"valid_targets_min": 5017
},
{
"epoch": 0.6439232409381663,
"grad_norm": 0.6044012708282325,
"learning_rate": 3.9909609597089496e-05,
"loss": 1.0675933361053467,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2633005976676941,
"step": 151,
"valid_targets_mean": 14257.8,
"valid_targets_min": 3979
},
{
"epoch": 0.6481876332622601,
"grad_norm": 0.6358081635077131,
"learning_rate": 3.9903876525363055e-05,
"loss": 1.0996378660202026,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26810404658317566,
"step": 152,
"valid_targets_mean": 14785.4,
"valid_targets_min": 7633
},
{
"epoch": 0.652452025586354,
"grad_norm": 0.6458024943907579,
"learning_rate": 3.989796762600755e-05,
"loss": 1.072128415107727,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2497999668121338,
"step": 153,
"valid_targets_mean": 14481.2,
"valid_targets_min": 4915
},
{
"epoch": 0.6567164179104478,
"grad_norm": 0.5745683876644413,
"learning_rate": 3.9891882951221246e-05,
"loss": 1.1608052253723145,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25260549783706665,
"step": 154,
"valid_targets_mean": 13851.4,
"valid_targets_min": 2001
},
{
"epoch": 0.6609808102345416,
"grad_norm": 0.4803649247442655,
"learning_rate": 3.988562255475518e-05,
"loss": 1.043982982635498,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26936691999435425,
"step": 155,
"valid_targets_mean": 15546.7,
"valid_targets_min": 6669
},
{
"epoch": 0.6652452025586354,
"grad_norm": 0.6486434078035109,
"learning_rate": 3.987918649191268e-05,
"loss": 1.0851833820343018,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2768741846084595,
"step": 156,
"valid_targets_mean": 14214.1,
"valid_targets_min": 5957
},
{
"epoch": 0.6695095948827292,
"grad_norm": 0.6615015585222541,
"learning_rate": 3.987257481954888e-05,
"loss": 1.0836174488067627,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29727786779403687,
"step": 157,
"valid_targets_mean": 14841.3,
"valid_targets_min": 5718
},
{
"epoch": 0.673773987206823,
"grad_norm": 0.6947073895856398,
"learning_rate": 3.9865787596070236e-05,
"loss": 1.0783438682556152,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2678397297859192,
"step": 158,
"valid_targets_mean": 14575.0,
"valid_targets_min": 5142
},
{
"epoch": 0.6780383795309168,
"grad_norm": 0.5751686277845317,
"learning_rate": 3.9858824881433975e-05,
"loss": 1.0921587944030762,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25064414739608765,
"step": 159,
"valid_targets_mean": 14356.6,
"valid_targets_min": 4878
},
{
"epoch": 0.6823027718550106,
"grad_norm": 0.5503535429377369,
"learning_rate": 3.9851686737147585e-05,
"loss": 1.1289031505584717,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2460019886493683,
"step": 160,
"valid_targets_mean": 13718.2,
"valid_targets_min": 805
},
{
"epoch": 0.6865671641791045,
"grad_norm": 0.5708076518560402,
"learning_rate": 3.9844373226268305e-05,
"loss": 1.0423595905303955,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2565052807331085,
"step": 161,
"valid_targets_mean": 14228.8,
"valid_targets_min": 2382
},
{
"epoch": 0.6908315565031983,
"grad_norm": 0.5174361675534761,
"learning_rate": 3.983688441340249e-05,
"loss": 1.109586477279663,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29132208228111267,
"step": 162,
"valid_targets_mean": 14159.3,
"valid_targets_min": 2792
},
{
"epoch": 0.6950959488272921,
"grad_norm": 0.5350993430608216,
"learning_rate": 3.98292203647051e-05,
"loss": 1.0937280654907227,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2752472162246704,
"step": 163,
"valid_targets_mean": 14302.1,
"valid_targets_min": 3711
},
{
"epoch": 0.6993603411513859,
"grad_norm": 0.4756314468049185,
"learning_rate": 3.982138114787912e-05,
"loss": 1.0845508575439453,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26880186796188354,
"step": 164,
"valid_targets_mean": 14744.2,
"valid_targets_min": 6098
},
{
"epoch": 0.7036247334754797,
"grad_norm": 0.5062212740230443,
"learning_rate": 3.98133668321749e-05,
"loss": 1.057763695716858,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2630884349346161,
"step": 165,
"valid_targets_mean": 14565.0,
"valid_targets_min": 3680
},
{
"epoch": 0.7078891257995735,
"grad_norm": 0.6144712387209604,
"learning_rate": 3.980517748838963e-05,
"loss": 1.1555659770965576,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27543964982032776,
"step": 166,
"valid_targets_mean": 13850.9,
"valid_targets_min": 1110
},
{
"epoch": 0.7121535181236673,
"grad_norm": 0.5547824607569634,
"learning_rate": 3.979681318886664e-05,
"loss": 1.0604078769683838,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2662920653820038,
"step": 167,
"valid_targets_mean": 14264.0,
"valid_targets_min": 2853
},
{
"epoch": 0.7164179104477612,
"grad_norm": 0.4947741759818403,
"learning_rate": 3.978827400749481e-05,
"loss": 1.0976730585098267,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25428158044815063,
"step": 168,
"valid_targets_mean": 14187.2,
"valid_targets_min": 1974
},
{
"epoch": 0.720682302771855,
"grad_norm": 0.4156843059384637,
"learning_rate": 3.977956001970788e-05,
"loss": 1.1516985893249512,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2969016134738922,
"step": 169,
"valid_targets_mean": 15474.6,
"valid_targets_min": 8501
},
{
"epoch": 0.7249466950959488,
"grad_norm": 0.5895819516860525,
"learning_rate": 3.977067130248381e-05,
"loss": 1.090247392654419,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2736845016479492,
"step": 170,
"valid_targets_mean": 15222.5,
"valid_targets_min": 5620
},
{
"epoch": 0.7292110874200426,
"grad_norm": 0.5849578776142048,
"learning_rate": 3.9761607934344095e-05,
"loss": 1.0230085849761963,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24778780341148376,
"step": 171,
"valid_targets_mean": 13855.9,
"valid_targets_min": 3112
},
{
"epoch": 0.7334754797441365,
"grad_norm": 0.4548579226147538,
"learning_rate": 3.975236999535306e-05,
"loss": 1.0465095043182373,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.252557635307312,
"step": 172,
"valid_targets_mean": 14650.7,
"valid_targets_min": 3113
},
{
"epoch": 0.7377398720682303,
"grad_norm": 0.5269196435658914,
"learning_rate": 3.974295756711717e-05,
"loss": 1.0935044288635254,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26230406761169434,
"step": 173,
"valid_targets_mean": 14595.8,
"valid_targets_min": 2880
},
{
"epoch": 0.7420042643923241,
"grad_norm": 0.5082604397593701,
"learning_rate": 3.9733370732784296e-05,
"loss": 1.115492820739746,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2864971160888672,
"step": 174,
"valid_targets_mean": 14760.0,
"valid_targets_min": 2181
},
{
"epoch": 0.746268656716418,
"grad_norm": 0.42559255705028526,
"learning_rate": 3.972360957704298e-05,
"loss": 1.1452744007110596,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24954742193222046,
"step": 175,
"valid_targets_mean": 14682.2,
"valid_targets_min": 1778
},
{
"epoch": 0.7505330490405118,
"grad_norm": 0.5072042814146205,
"learning_rate": 3.97136741861217e-05,
"loss": 1.0543792247772217,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2778635621070862,
"step": 176,
"valid_targets_mean": 15725.2,
"valid_targets_min": 10505
},
{
"epoch": 0.7547974413646056,
"grad_norm": 0.5132699398104911,
"learning_rate": 3.970356464778808e-05,
"loss": 1.089555025100708,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2771134376525879,
"step": 177,
"valid_targets_mean": 14786.6,
"valid_targets_min": 2882
},
{
"epoch": 0.7590618336886994,
"grad_norm": 0.4165975680037749,
"learning_rate": 3.969328105134817e-05,
"loss": 1.077789068222046,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2723352909088135,
"step": 178,
"valid_targets_mean": 15287.1,
"valid_targets_min": 1532
},
{
"epoch": 0.7633262260127932,
"grad_norm": 0.5047990050415179,
"learning_rate": 3.9682823487645584e-05,
"loss": 1.0952332019805908,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27827781438827515,
"step": 179,
"valid_targets_mean": 14615.7,
"valid_targets_min": 5225
},
{
"epoch": 0.767590618336887,
"grad_norm": 0.539807553746683,
"learning_rate": 3.9672192049060745e-05,
"loss": 1.05403733253479,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24824458360671997,
"step": 180,
"valid_targets_mean": 14464.0,
"valid_targets_min": 3973
},
{
"epoch": 0.7718550106609808,
"grad_norm": 0.4146757724736937,
"learning_rate": 3.966138682951008e-05,
"loss": 1.091308355331421,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2893642783164978,
"step": 181,
"valid_targets_mean": 15169.6,
"valid_targets_min": 3780
},
{
"epoch": 0.7761194029850746,
"grad_norm": 0.40834987432982356,
"learning_rate": 3.9650407924445147e-05,
"loss": 1.1261098384857178,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2868396043777466,
"step": 182,
"valid_targets_mean": 14721.4,
"valid_targets_min": 1440
},
{
"epoch": 0.7803837953091685,
"grad_norm": 0.5175874864584123,
"learning_rate": 3.963925543085181e-05,
"loss": 1.0834410190582275,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2594006061553955,
"step": 183,
"valid_targets_mean": 14252.9,
"valid_targets_min": 4037
},
{
"epoch": 0.7846481876332623,
"grad_norm": 0.4684557852543165,
"learning_rate": 3.96279294472494e-05,
"loss": 1.0013039112091064,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2477436661720276,
"step": 184,
"valid_targets_mean": 14142.4,
"valid_targets_min": 1337
},
{
"epoch": 0.7889125799573561,
"grad_norm": 0.44505492822146303,
"learning_rate": 3.961643007368984e-05,
"loss": 1.0587292909622192,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2760450839996338,
"step": 185,
"valid_targets_mean": 15403.3,
"valid_targets_min": 10688
},
{
"epoch": 0.7931769722814499,
"grad_norm": 0.4344064548320857,
"learning_rate": 3.960475741175671e-05,
"loss": 1.1106066703796387,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28772372007369995,
"step": 186,
"valid_targets_mean": 14154.4,
"valid_targets_min": 1176
},
{
"epoch": 0.7974413646055437,
"grad_norm": 0.43159567556898226,
"learning_rate": 3.959291156456444e-05,
"loss": 1.0540430545806885,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2685752511024475,
"step": 187,
"valid_targets_mean": 14308.6,
"valid_targets_min": 1306
},
{
"epoch": 0.8017057569296375,
"grad_norm": 0.5410700638072535,
"learning_rate": 3.9580892636757334e-05,
"loss": 1.039066195487976,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26641255617141724,
"step": 188,
"valid_targets_mean": 14659.4,
"valid_targets_min": 1687
},
{
"epoch": 0.8059701492537313,
"grad_norm": 0.6059334352321268,
"learning_rate": 3.9568700734508645e-05,
"loss": 1.0647523403167725,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26160457730293274,
"step": 189,
"valid_targets_mean": 14306.6,
"valid_targets_min": 1896
},
{
"epoch": 0.8102345415778252,
"grad_norm": 0.4489821078015655,
"learning_rate": 3.955633596551967e-05,
"loss": 1.0983606576919556,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2925269305706024,
"step": 190,
"valid_targets_mean": 14534.1,
"valid_targets_min": 2343
},
{
"epoch": 0.814498933901919,
"grad_norm": 0.45895194414412344,
"learning_rate": 3.9543798439018776e-05,
"loss": 1.0771918296813965,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2698771357536316,
"step": 191,
"valid_targets_mean": 14733.8,
"valid_targets_min": 2367
},
{
"epoch": 0.8187633262260128,
"grad_norm": 0.5218764358198349,
"learning_rate": 3.953108826576046e-05,
"loss": 1.0556185245513916,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29757291078567505,
"step": 192,
"valid_targets_mean": 15014.3,
"valid_targets_min": 3991
},
{
"epoch": 0.8230277185501066,
"grad_norm": 0.4414775718360485,
"learning_rate": 3.9518205558024334e-05,
"loss": 1.0616166591644287,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24648046493530273,
"step": 193,
"valid_targets_mean": 14510.7,
"valid_targets_min": 2499
},
{
"epoch": 0.8272921108742004,
"grad_norm": 0.48302874637309473,
"learning_rate": 3.9505150429614154e-05,
"loss": 1.057494878768921,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2708985209465027,
"step": 194,
"valid_targets_mean": 14285.1,
"valid_targets_min": 4255
},
{
"epoch": 0.8315565031982942,
"grad_norm": 0.47971106129306745,
"learning_rate": 3.949192299585681e-05,
"loss": 1.0607072114944458,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23464509844779968,
"step": 195,
"valid_targets_mean": 13950.8,
"valid_targets_min": 3588
},
{
"epoch": 0.835820895522388,
"grad_norm": 0.5123313865384012,
"learning_rate": 3.9478523373601325e-05,
"loss": 1.062612533569336,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2731626629829407,
"step": 196,
"valid_targets_mean": 14909.9,
"valid_targets_min": 6385
},
{
"epoch": 0.8400852878464818,
"grad_norm": 0.47835288821616884,
"learning_rate": 3.946495168121778e-05,
"loss": 1.0612168312072754,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28909754753112793,
"step": 197,
"valid_targets_mean": 14388.2,
"valid_targets_min": 4776
},
{
"epoch": 0.8443496801705757,
"grad_norm": 0.4652131056026324,
"learning_rate": 3.9451208038596325e-05,
"loss": 1.047271728515625,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2545679211616516,
"step": 198,
"valid_targets_mean": 14635.1,
"valid_targets_min": 4017
},
{
"epoch": 0.8486140724946695,
"grad_norm": 0.45502944460531947,
"learning_rate": 3.943729256714608e-05,
"loss": 1.062045931816101,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27325451374053955,
"step": 199,
"valid_targets_mean": 15327.0,
"valid_targets_min": 9625
},
{
"epoch": 0.8528784648187633,
"grad_norm": 0.5633842836653237,
"learning_rate": 3.942320538979408e-05,
"loss": 1.0524030923843384,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26734107732772827,
"step": 200,
"valid_targets_mean": 15212.5,
"valid_targets_min": 6862
},
{
"epoch": 0.8571428571428571,
"grad_norm": 0.4506850114722311,
"learning_rate": 3.9408946630984144e-05,
"loss": 1.0022788047790527,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2606180012226105,
"step": 201,
"valid_targets_mean": 15162.9,
"valid_targets_min": 6042
},
{
"epoch": 0.8614072494669509,
"grad_norm": 0.4766784153086289,
"learning_rate": 3.939451641667587e-05,
"loss": 1.0410001277923584,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26700738072395325,
"step": 202,
"valid_targets_mean": 15394.2,
"valid_targets_min": 5438
},
{
"epoch": 0.8656716417910447,
"grad_norm": 0.4928195041878012,
"learning_rate": 3.937991487434342e-05,
"loss": 1.0641515254974365,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26238715648651123,
"step": 203,
"valid_targets_mean": 14678.1,
"valid_targets_min": 6220
},
{
"epoch": 0.8699360341151386,
"grad_norm": 0.4383260035803565,
"learning_rate": 3.9365142132974484e-05,
"loss": 1.13057541847229,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31222638487815857,
"step": 204,
"valid_targets_mean": 14939.2,
"valid_targets_min": 3887
},
{
"epoch": 0.8742004264392325,
"grad_norm": 0.5422248497475302,
"learning_rate": 3.935019832306905e-05,
"loss": 1.0646021366119385,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27387237548828125,
"step": 205,
"valid_targets_mean": 14681.6,
"valid_targets_min": 5499
},
{
"epoch": 0.8784648187633263,
"grad_norm": 0.6087441640111857,
"learning_rate": 3.933508357663832e-05,
"loss": 1.0732862949371338,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2741767168045044,
"step": 206,
"valid_targets_mean": 14105.4,
"valid_targets_min": 4262
},
{
"epoch": 0.8827292110874201,
"grad_norm": 0.47249365924433767,
"learning_rate": 3.9319798027203544e-05,
"loss": 1.0405564308166504,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.270063579082489,
"step": 207,
"valid_targets_mean": 14148.8,
"valid_targets_min": 575
},
{
"epoch": 0.8869936034115139,
"grad_norm": 0.5119659019318072,
"learning_rate": 3.930434180979478e-05,
"loss": 1.0693408250808716,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2622353434562683,
"step": 208,
"valid_targets_mean": 13855.8,
"valid_targets_min": 1919
},
{
"epoch": 0.8912579957356077,
"grad_norm": 0.5689354263807636,
"learning_rate": 3.928871506094975e-05,
"loss": 1.0629595518112183,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27715960144996643,
"step": 209,
"valid_targets_mean": 15001.1,
"valid_targets_min": 11504
},
{
"epoch": 0.8955223880597015,
"grad_norm": 0.5404150402667937,
"learning_rate": 3.927291791871264e-05,
"loss": 1.0810761451721191,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2562071681022644,
"step": 210,
"valid_targets_mean": 14553.4,
"valid_targets_min": 3171
},
{
"epoch": 0.8997867803837953,
"grad_norm": 0.6095076613029825,
"learning_rate": 3.925695052263284e-05,
"loss": 1.069692611694336,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25801941752433777,
"step": 211,
"valid_targets_mean": 14692.5,
"valid_targets_min": 2295
},
{
"epoch": 0.9040511727078892,
"grad_norm": 0.43981803829640587,
"learning_rate": 3.924081301376375e-05,
"loss": 1.043962836265564,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2519513964653015,
"step": 212,
"valid_targets_mean": 14521.2,
"valid_targets_min": 1843
},
{
"epoch": 0.908315565031983,
"grad_norm": 0.5055732416079766,
"learning_rate": 3.9224505534661525e-05,
"loss": 1.0576932430267334,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23972484469413757,
"step": 213,
"valid_targets_mean": 14508.8,
"valid_targets_min": 4589
},
{
"epoch": 0.9125799573560768,
"grad_norm": 0.5624113760542802,
"learning_rate": 3.92080282293838e-05,
"loss": 1.07790207862854,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28238582611083984,
"step": 214,
"valid_targets_mean": 14945.4,
"valid_targets_min": 1789
},
{
"epoch": 0.9168443496801706,
"grad_norm": 0.4579607500075873,
"learning_rate": 3.9191381243488417e-05,
"loss": 1.0570908784866333,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2734872102737427,
"step": 215,
"valid_targets_mean": 14692.0,
"valid_targets_min": 3666
},
{
"epoch": 0.9211087420042644,
"grad_norm": 0.5891814686766607,
"learning_rate": 3.9174564724032167e-05,
"loss": 1.0729179382324219,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23688025772571564,
"step": 216,
"valid_targets_mean": 13916.4,
"valid_targets_min": 934
},
{
"epoch": 0.9253731343283582,
"grad_norm": 0.4276149009317814,
"learning_rate": 3.9157578819569455e-05,
"loss": 1.0518217086791992,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26122087240219116,
"step": 217,
"valid_targets_mean": 14793.8,
"valid_targets_min": 5449
},
{
"epoch": 0.929637526652452,
"grad_norm": 0.49226769754512945,
"learning_rate": 3.9140423680151036e-05,
"loss": 1.046657919883728,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2635388970375061,
"step": 218,
"valid_targets_mean": 14376.8,
"valid_targets_min": 2255
},
{
"epoch": 0.9339019189765458,
"grad_norm": 0.49153221979541967,
"learning_rate": 3.9123099457322625e-05,
"loss": 1.1028754711151123,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2881706655025482,
"step": 219,
"valid_targets_mean": 14753.6,
"valid_targets_min": 2483
},
{
"epoch": 0.9381663113006397,
"grad_norm": 0.4206683982581794,
"learning_rate": 3.9105606304123605e-05,
"loss": 1.0750335454940796,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2556455433368683,
"step": 220,
"valid_targets_mean": 15313.4,
"valid_targets_min": 1380
},
{
"epoch": 0.9424307036247335,
"grad_norm": 0.518948199595769,
"learning_rate": 3.908794437508567e-05,
"loss": 1.0630940198898315,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26047244668006897,
"step": 221,
"valid_targets_mean": 14588.6,
"valid_targets_min": 7319
},
{
"epoch": 0.9466950959488273,
"grad_norm": 0.4833013373073034,
"learning_rate": 3.907011382623145e-05,
"loss": 1.0762577056884766,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26928189396858215,
"step": 222,
"valid_targets_mean": 14791.3,
"valid_targets_min": 2399
},
{
"epoch": 0.9509594882729211,
"grad_norm": 0.3960814041459837,
"learning_rate": 3.905211481507313e-05,
"loss": 1.065406322479248,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28226155042648315,
"step": 223,
"valid_targets_mean": 14754.5,
"valid_targets_min": 5518
},
{
"epoch": 0.9552238805970149,
"grad_norm": 0.48261177892907264,
"learning_rate": 3.903394750061106e-05,
"loss": 1.0659347772598267,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2690155506134033,
"step": 224,
"valid_targets_mean": 14576.5,
"valid_targets_min": 3004
},
{
"epoch": 0.9594882729211087,
"grad_norm": 0.44808324622894463,
"learning_rate": 3.9015612043332375e-05,
"loss": 1.0389450788497925,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2707400321960449,
"step": 225,
"valid_targets_mean": 14913.6,
"valid_targets_min": 2041
},
{
"epoch": 0.9637526652452025,
"grad_norm": 0.47198420641783995,
"learning_rate": 3.8997108605209535e-05,
"loss": 1.03799307346344,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2540205717086792,
"step": 226,
"valid_targets_mean": 14266.9,
"valid_targets_min": 5364
},
{
"epoch": 0.9680170575692963,
"grad_norm": 0.4381815788424525,
"learning_rate": 3.897843734969891e-05,
"loss": 1.0135846138000488,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24278923869132996,
"step": 227,
"valid_targets_mean": 15244.7,
"valid_targets_min": 8682
},
{
"epoch": 0.9722814498933902,
"grad_norm": 0.4447673018725514,
"learning_rate": 3.895959844173937e-05,
"loss": 1.0680896043777466,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2827821969985962,
"step": 228,
"valid_targets_mean": 14613.2,
"valid_targets_min": 5184
},
{
"epoch": 0.976545842217484,
"grad_norm": 0.4951715137155527,
"learning_rate": 3.8940592047750774e-05,
"loss": 1.0651593208312988,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23733918368816376,
"step": 229,
"valid_targets_mean": 13796.5,
"valid_targets_min": 4055
},
{
"epoch": 0.9808102345415778,
"grad_norm": 0.5067672333856692,
"learning_rate": 3.892141833563255e-05,
"loss": 1.0773837566375732,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27972668409347534,
"step": 230,
"valid_targets_mean": 15119.4,
"valid_targets_min": 6852
},
{
"epoch": 0.9850746268656716,
"grad_norm": 0.46407214737248004,
"learning_rate": 3.8902077474762155e-05,
"loss": 1.0360264778137207,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26833784580230713,
"step": 231,
"valid_targets_mean": 14657.4,
"valid_targets_min": 2488
},
{
"epoch": 0.9893390191897654,
"grad_norm": 0.4153102558348407,
"learning_rate": 3.888256963599364e-05,
"loss": 1.0562363862991333,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2683772146701813,
"step": 232,
"valid_targets_mean": 13907.6,
"valid_targets_min": 2050
},
{
"epoch": 0.9936034115138592,
"grad_norm": 0.4123808103332072,
"learning_rate": 3.886289499165609e-05,
"loss": 1.0481302738189697,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29726642370224,
"step": 233,
"valid_targets_mean": 15261.6,
"valid_targets_min": 6414
},
{
"epoch": 0.997867803837953,
"grad_norm": 0.43109991180516594,
"learning_rate": 3.884305371555215e-05,
"loss": 1.079208493232727,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27206656336784363,
"step": 234,
"valid_targets_mean": 14794.5,
"valid_targets_min": 7270
},
{
"epoch": 1.0,
"grad_norm": 0.5291174543372417,
"learning_rate": 3.882304598295643e-05,
"loss": 1.1089693307876587,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.540338397026062,
"step": 235,
"valid_targets_mean": 14646.9,
"valid_targets_min": 4497
},
{
"epoch": 1.004264392324094,
"grad_norm": 0.46209142075677184,
"learning_rate": 3.880287197061402e-05,
"loss": 1.0655412673950195,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2537649869918823,
"step": 236,
"valid_targets_mean": 14099.9,
"valid_targets_min": 1532
},
{
"epoch": 1.0085287846481876,
"grad_norm": 0.5435573851854639,
"learning_rate": 3.878253185673888e-05,
"loss": 1.045609951019287,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26505547761917114,
"step": 237,
"valid_targets_mean": 14961.6,
"valid_targets_min": 4982
},
{
"epoch": 1.0127931769722816,
"grad_norm": 0.5413226401381751,
"learning_rate": 3.876202582101229e-05,
"loss": 1.04897141456604,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2555493116378784,
"step": 238,
"valid_targets_mean": 14361.9,
"valid_targets_min": 3011
},
{
"epoch": 1.0170575692963753,
"grad_norm": 0.4153700682870838,
"learning_rate": 3.874135404458125e-05,
"loss": 1.0530734062194824,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2591307759284973,
"step": 239,
"valid_targets_mean": 13943.9,
"valid_targets_min": 2264
},
{
"epoch": 1.0213219616204692,
"grad_norm": 0.3891461925005388,
"learning_rate": 3.8720516710056905e-05,
"loss": 1.0717518329620361,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26199546456336975,
"step": 240,
"valid_targets_mean": 14627.2,
"valid_targets_min": 2267
},
{
"epoch": 1.0255863539445629,
"grad_norm": 0.7135766939621001,
"learning_rate": 3.8699514001512885e-05,
"loss": 1.0576286315917969,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2684980630874634,
"step": 241,
"valid_targets_mean": 15091.4,
"valid_targets_min": 2326
},
{
"epoch": 1.0298507462686568,
"grad_norm": 0.4620797583276411,
"learning_rate": 3.867834610448374e-05,
"loss": 1.026517629623413,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2597351372241974,
"step": 242,
"valid_targets_mean": 14510.1,
"valid_targets_min": 1886
},
{
"epoch": 1.0341151385927505,
"grad_norm": 0.49028085372128044,
"learning_rate": 3.865701320596324e-05,
"loss": 1.0431249141693115,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22768917679786682,
"step": 243,
"valid_targets_mean": 13234.3,
"valid_targets_min": 2697
},
{
"epoch": 1.0383795309168444,
"grad_norm": 0.40014372336732035,
"learning_rate": 3.863551549440277e-05,
"loss": 1.0454719066619873,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2336433231830597,
"step": 244,
"valid_targets_mean": 13984.1,
"valid_targets_min": 2184
},
{
"epoch": 1.0426439232409381,
"grad_norm": 0.4286915819783026,
"learning_rate": 3.861385315970964e-05,
"loss": 1.014958143234253,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24774692952632904,
"step": 245,
"valid_targets_mean": 13774.0,
"valid_targets_min": 2392
},
{
"epoch": 1.046908315565032,
"grad_norm": 0.4891269148797478,
"learning_rate": 3.859202639324542e-05,
"loss": 1.0368402004241943,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24918904900550842,
"step": 246,
"valid_targets_mean": 13648.0,
"valid_targets_min": 2810
},
{
"epoch": 1.0511727078891258,
"grad_norm": 0.5785437585611787,
"learning_rate": 3.8570035387824214e-05,
"loss": 1.029822826385498,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24193452298641205,
"step": 247,
"valid_targets_mean": 14391.8,
"valid_targets_min": 1380
},
{
"epoch": 1.0554371002132197,
"grad_norm": 0.49027906008514305,
"learning_rate": 3.8547880337711036e-05,
"loss": 1.0420994758605957,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.240075021982193,
"step": 248,
"valid_targets_mean": 13562.6,
"valid_targets_min": 1765
},
{
"epoch": 1.0597014925373134,
"grad_norm": 0.4531599901069784,
"learning_rate": 3.8525561438620016e-05,
"loss": 1.092591404914856,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31094545125961304,
"step": 249,
"valid_targets_mean": 15091.4,
"valid_targets_min": 1968
},
{
"epoch": 1.0639658848614073,
"grad_norm": 0.4789861520712335,
"learning_rate": 3.850307888771269e-05,
"loss": 1.0229907035827637,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25899046659469604,
"step": 250,
"valid_targets_mean": 14780.2,
"valid_targets_min": 8635
},
{
"epoch": 1.068230277185501,
"grad_norm": 0.5003673261621284,
"learning_rate": 3.848043288359629e-05,
"loss": 1.036152958869934,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2403426468372345,
"step": 251,
"valid_targets_mean": 15008.4,
"valid_targets_min": 927
},
{
"epoch": 1.072494669509595,
"grad_norm": 0.35225675218456176,
"learning_rate": 3.8457623626321944e-05,
"loss": 1.0302397012710571,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2747737765312195,
"step": 252,
"valid_targets_mean": 15544.8,
"valid_targets_min": 9842
},
{
"epoch": 1.0767590618336886,
"grad_norm": 0.4592550905412805,
"learning_rate": 3.843465131738296e-05,
"loss": 1.0454399585723877,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27861565351486206,
"step": 253,
"valid_targets_mean": 15859.3,
"valid_targets_min": 11105
},
{
"epoch": 1.0810234541577826,
"grad_norm": 0.5429737259358417,
"learning_rate": 3.8411516159713e-05,
"loss": 1.025251865386963,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2655147314071655,
"step": 254,
"valid_targets_mean": 14621.3,
"valid_targets_min": 1414
},
{
"epoch": 1.0852878464818763,
"grad_norm": 0.5125998906864528,
"learning_rate": 3.838821835768431e-05,
"loss": 0.9942444562911987,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24379202723503113,
"step": 255,
"valid_targets_mean": 14590.2,
"valid_targets_min": 2797
},
{
"epoch": 1.0895522388059702,
"grad_norm": 0.42817212203890226,
"learning_rate": 3.83647581171059e-05,
"loss": 1.0361202955245972,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2260512411594391,
"step": 256,
"valid_targets_mean": 14038.0,
"valid_targets_min": 1677
},
{
"epoch": 1.0938166311300639,
"grad_norm": 0.44374680049258125,
"learning_rate": 3.8341135645221744e-05,
"loss": 1.018730878829956,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2549141049385071,
"step": 257,
"valid_targets_mean": 14797.3,
"valid_targets_min": 4346
},
{
"epoch": 1.0980810234541578,
"grad_norm": 0.4205594672534289,
"learning_rate": 3.831735115070895e-05,
"loss": 1.0082337856292725,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2516046464443207,
"step": 258,
"valid_targets_mean": 14662.9,
"valid_targets_min": 1517
},
{
"epoch": 1.1023454157782515,
"grad_norm": 0.4454694983574608,
"learning_rate": 3.8293404843675904e-05,
"loss": 1.0284898281097412,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2286127209663391,
"step": 259,
"valid_targets_mean": 14888.3,
"valid_targets_min": 6449
},
{
"epoch": 1.1066098081023454,
"grad_norm": 0.5045885678028411,
"learning_rate": 3.8269296935660395e-05,
"loss": 1.0335543155670166,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23934660851955414,
"step": 260,
"valid_targets_mean": 14052.8,
"valid_targets_min": 4177
},
{
"epoch": 1.1108742004264391,
"grad_norm": 0.48150807409703467,
"learning_rate": 3.82450276396278e-05,
"loss": 1.0389349460601807,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2629929482936859,
"step": 261,
"valid_targets_mean": 14497.7,
"valid_targets_min": 5676
},
{
"epoch": 1.115138592750533,
"grad_norm": 0.37723589464290347,
"learning_rate": 3.822059716996916e-05,
"loss": 1.0075374841690063,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25467902421951294,
"step": 262,
"valid_targets_mean": 14696.9,
"valid_targets_min": 3243
},
{
"epoch": 1.1194029850746268,
"grad_norm": 0.4127260559764244,
"learning_rate": 3.819600574249929e-05,
"loss": 0.9890443086624146,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25443321466445923,
"step": 263,
"valid_targets_mean": 15884.4,
"valid_targets_min": 11415
},
{
"epoch": 1.1236673773987207,
"grad_norm": 0.5260964350191876,
"learning_rate": 3.817125357445489e-05,
"loss": 1.054425597190857,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2661350965499878,
"step": 264,
"valid_targets_mean": 14409.9,
"valid_targets_min": 2985
},
{
"epoch": 1.1279317697228146,
"grad_norm": 0.46323425077370667,
"learning_rate": 3.814634088449261e-05,
"loss": 1.0141037702560425,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2333107590675354,
"step": 265,
"valid_targets_mean": 14820.0,
"valid_targets_min": 5142
},
{
"epoch": 1.1321961620469083,
"grad_norm": 0.422437846529809,
"learning_rate": 3.812126789268712e-05,
"loss": 1.0493249893188477,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26373741030693054,
"step": 266,
"valid_targets_mean": 15534.1,
"valid_targets_min": 10907
},
{
"epoch": 1.136460554371002,
"grad_norm": 0.46961962629963355,
"learning_rate": 3.80960348205292e-05,
"loss": 1.0849034786224365,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24709264934062958,
"step": 267,
"valid_targets_mean": 13681.9,
"valid_targets_min": 2050
},
{
"epoch": 1.140724946695096,
"grad_norm": 0.47819964623494876,
"learning_rate": 3.807064189092372e-05,
"loss": 1.0932810306549072,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27637046575546265,
"step": 268,
"valid_targets_mean": 14011.6,
"valid_targets_min": 3608
},
{
"epoch": 1.1449893390191899,
"grad_norm": 0.4868072859995262,
"learning_rate": 3.804508932818771e-05,
"loss": 0.9904080629348755,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24220585823059082,
"step": 269,
"valid_targets_mean": 15122.8,
"valid_targets_min": 8015
},
{
"epoch": 1.1492537313432836,
"grad_norm": 0.4307347346802511,
"learning_rate": 3.801937735804838e-05,
"loss": 1.034711241722107,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2537466883659363,
"step": 270,
"valid_targets_mean": 14167.3,
"valid_targets_min": 3780
},
{
"epoch": 1.1535181236673775,
"grad_norm": 0.41557607247834316,
"learning_rate": 3.799350620764114e-05,
"loss": 1.0160858631134033,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.249742329120636,
"step": 271,
"valid_targets_mean": 14951.7,
"valid_targets_min": 4915
},
{
"epoch": 1.1577825159914712,
"grad_norm": 0.5268253932391055,
"learning_rate": 3.7967476105507535e-05,
"loss": 1.003743052482605,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2506830394268036,
"step": 272,
"valid_targets_mean": 14177.5,
"valid_targets_min": 3884
},
{
"epoch": 1.1620469083155651,
"grad_norm": 0.442145519622731,
"learning_rate": 3.7941287281593284e-05,
"loss": 1.0129845142364502,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2885182201862335,
"step": 273,
"valid_targets_mean": 15371.1,
"valid_targets_min": 2393
},
{
"epoch": 1.1663113006396588,
"grad_norm": 0.3908456257968783,
"learning_rate": 3.7914939967246227e-05,
"loss": 1.0502171516418457,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2536545395851135,
"step": 274,
"valid_targets_mean": 14664.4,
"valid_targets_min": 5128
},
{
"epoch": 1.1705756929637527,
"grad_norm": 0.3862780244923522,
"learning_rate": 3.7888434395214285e-05,
"loss": 1.0400927066802979,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.237798273563385,
"step": 275,
"valid_targets_mean": 13460.7,
"valid_targets_min": 3188
},
{
"epoch": 1.1748400852878464,
"grad_norm": 0.4404520569169047,
"learning_rate": 3.786177079964339e-05,
"loss": 1.0391854047775269,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2632058262825012,
"step": 276,
"valid_targets_mean": 14753.7,
"valid_targets_min": 6232
},
{
"epoch": 1.1791044776119404,
"grad_norm": 0.3756219096684917,
"learning_rate": 3.783494941607544e-05,
"loss": 1.0601963996887207,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25917044281959534,
"step": 277,
"valid_targets_mean": 13945.5,
"valid_targets_min": 1337
},
{
"epoch": 1.183368869936034,
"grad_norm": 0.451547633414741,
"learning_rate": 3.780797048144621e-05,
"loss": 1.0429885387420654,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2507634162902832,
"step": 278,
"valid_targets_mean": 14387.9,
"valid_targets_min": 7158
},
{
"epoch": 1.187633262260128,
"grad_norm": 0.4712111097628715,
"learning_rate": 3.7780834234083236e-05,
"loss": 1.050649881362915,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2704152464866638,
"step": 279,
"valid_targets_mean": 14935.3,
"valid_targets_min": 3303
},
{
"epoch": 1.1918976545842217,
"grad_norm": 0.3669185185782326,
"learning_rate": 3.775354091370376e-05,
"loss": 1.016859769821167,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26059484481811523,
"step": 280,
"valid_targets_mean": 15610.1,
"valid_targets_min": 9687
},
{
"epoch": 1.1961620469083156,
"grad_norm": 0.4144137396489066,
"learning_rate": 3.772609076141255e-05,
"loss": 0.9994684457778931,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2601706385612488,
"step": 281,
"valid_targets_mean": 14643.5,
"valid_targets_min": 6660
},
{
"epoch": 1.2004264392324093,
"grad_norm": 0.45567675891650883,
"learning_rate": 3.769848401969982e-05,
"loss": 1.0300400257110596,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2580612301826477,
"step": 282,
"valid_targets_mean": 15233.8,
"valid_targets_min": 9758
},
{
"epoch": 1.2046908315565032,
"grad_norm": 0.3555022390818503,
"learning_rate": 3.767072093243907e-05,
"loss": 1.0938390493392944,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.266682505607605,
"step": 283,
"valid_targets_mean": 14910.6,
"valid_targets_min": 4441
},
{
"epoch": 1.208955223880597,
"grad_norm": 0.42569732647686503,
"learning_rate": 3.7642801744884915e-05,
"loss": 1.1198451519012451,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2663855254650116,
"step": 284,
"valid_targets_mean": 14378.9,
"valid_targets_min": 1902
},
{
"epoch": 1.2132196162046909,
"grad_norm": 0.4127343265468672,
"learning_rate": 3.761472670367096e-05,
"loss": 0.9990887641906738,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23396140336990356,
"step": 285,
"valid_targets_mean": 14860.0,
"valid_targets_min": 3398
},
{
"epoch": 1.2174840085287846,
"grad_norm": 0.4232592265214671,
"learning_rate": 3.758649605680758e-05,
"loss": 1.0035858154296875,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24486322700977325,
"step": 286,
"valid_targets_mean": 14400.8,
"valid_targets_min": 7174
},
{
"epoch": 1.2217484008528785,
"grad_norm": 0.47083507604672054,
"learning_rate": 3.755811005367974e-05,
"loss": 1.0343601703643799,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26513391733169556,
"step": 287,
"valid_targets_mean": 15103.7,
"valid_targets_min": 8717
},
{
"epoch": 1.2260127931769722,
"grad_norm": 0.4170006342101319,
"learning_rate": 3.752956894504481e-05,
"loss": 1.036698818206787,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2382010668516159,
"step": 288,
"valid_targets_mean": 13118.1,
"valid_targets_min": 4521
},
{
"epoch": 1.2302771855010661,
"grad_norm": 0.45922410144773373,
"learning_rate": 3.750087298303033e-05,
"loss": 1.0224305391311646,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2617640495300293,
"step": 289,
"valid_targets_mean": 14181.2,
"valid_targets_min": 1237
},
{
"epoch": 1.2345415778251598,
"grad_norm": 0.4639650600037873,
"learning_rate": 3.7472022421131795e-05,
"loss": 1.040165662765503,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.279234915971756,
"step": 290,
"valid_targets_mean": 15071.4,
"valid_targets_min": 7071
},
{
"epoch": 1.2388059701492538,
"grad_norm": 0.5052614475399082,
"learning_rate": 3.7443017514210406e-05,
"loss": 1.0578022003173828,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24848178029060364,
"step": 291,
"valid_targets_mean": 14662.9,
"valid_targets_min": 3892
},
{
"epoch": 1.2430703624733475,
"grad_norm": 0.5246082126633732,
"learning_rate": 3.7413858518490825e-05,
"loss": 1.0782644748687744,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.277226984500885,
"step": 292,
"valid_targets_mean": 13855.1,
"valid_targets_min": 3518
},
{
"epoch": 1.2473347547974414,
"grad_norm": 0.4064959781816505,
"learning_rate": 3.7384545691558895e-05,
"loss": 1.0355021953582764,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25371286273002625,
"step": 293,
"valid_targets_mean": 14714.8,
"valid_targets_min": 6099
},
{
"epoch": 1.251599147121535,
"grad_norm": 0.5042943693981129,
"learning_rate": 3.735507929235941e-05,
"loss": 1.0233511924743652,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2694402039051056,
"step": 294,
"valid_targets_mean": 14589.5,
"valid_targets_min": 2832
},
{
"epoch": 1.255863539445629,
"grad_norm": 0.49380678655207805,
"learning_rate": 3.732545958119378e-05,
"loss": 1.0197874307632446,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2528603672981262,
"step": 295,
"valid_targets_mean": 14854.3,
"valid_targets_min": 6387
},
{
"epoch": 1.260127931769723,
"grad_norm": 0.45062570460712725,
"learning_rate": 3.729568681971774e-05,
"loss": 1.052213191986084,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24603819847106934,
"step": 296,
"valid_targets_mean": 13976.9,
"valid_targets_min": 4314
},
{
"epoch": 1.2643923240938166,
"grad_norm": 0.3746896617567477,
"learning_rate": 3.726576127093905e-05,
"loss": 1.075683355331421,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.261888712644577,
"step": 297,
"valid_targets_mean": 15158.6,
"valid_targets_min": 4461
},
{
"epoch": 1.2686567164179103,
"grad_norm": 0.41056261687110635,
"learning_rate": 3.7235683199215177e-05,
"loss": 1.021393060684204,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2325923889875412,
"step": 298,
"valid_targets_mean": 13554.0,
"valid_targets_min": 3668
},
{
"epoch": 1.2729211087420043,
"grad_norm": 0.4935296888861505,
"learning_rate": 3.7205452870250944e-05,
"loss": 1.0408051013946533,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24672801792621613,
"step": 299,
"valid_targets_mean": 15081.8,
"valid_targets_min": 5372
},
{
"epoch": 1.2771855010660982,
"grad_norm": 0.43061196618725706,
"learning_rate": 3.7175070551096204e-05,
"loss": 0.9903295040130615,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24351432919502258,
"step": 300,
"valid_targets_mean": 14829.8,
"valid_targets_min": 5064
},
{
"epoch": 1.2814498933901919,
"grad_norm": 0.4823871541432052,
"learning_rate": 3.7144536510143436e-05,
"loss": 1.0472469329833984,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2732764482498169,
"step": 301,
"valid_targets_mean": 14287.4,
"valid_targets_min": 1719
},
{
"epoch": 1.2857142857142856,
"grad_norm": 0.451775820650649,
"learning_rate": 3.711385101712544e-05,
"loss": 1.0415022373199463,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2545892298221588,
"step": 302,
"valid_targets_mean": 14111.8,
"valid_targets_min": 714
},
{
"epoch": 1.2899786780383795,
"grad_norm": 0.5371461398762536,
"learning_rate": 3.708301434311289e-05,
"loss": 1.0422717332839966,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21339184045791626,
"step": 303,
"valid_targets_mean": 13497.2,
"valid_targets_min": 1108
},
{
"epoch": 1.2942430703624734,
"grad_norm": 0.5271872774214361,
"learning_rate": 3.7052026760511996e-05,
"loss": 1.00404691696167,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2703721523284912,
"step": 304,
"valid_targets_mean": 14671.9,
"valid_targets_min": 4066
},
{
"epoch": 1.2985074626865671,
"grad_norm": 0.46773495382617897,
"learning_rate": 3.7020888543062046e-05,
"loss": 1.0046180486679077,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2816005051136017,
"step": 305,
"valid_targets_mean": 15683.1,
"valid_targets_min": 6707
},
{
"epoch": 1.302771855010661,
"grad_norm": 0.47056842659745385,
"learning_rate": 3.6989599965833024e-05,
"loss": 1.0792807340621948,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3000812828540802,
"step": 306,
"valid_targets_mean": 15442.5,
"valid_targets_min": 7315
},
{
"epoch": 1.3070362473347548,
"grad_norm": 0.4364095538608938,
"learning_rate": 3.695816130522317e-05,
"loss": 1.0605202913284302,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2570754289627075,
"step": 307,
"valid_targets_mean": 13909.1,
"valid_targets_min": 3271
},
{
"epoch": 1.3113006396588487,
"grad_norm": 0.48886491062143717,
"learning_rate": 3.692657283895651e-05,
"loss": 1.0294058322906494,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2760119140148163,
"step": 308,
"valid_targets_mean": 14565.0,
"valid_targets_min": 2334
},
{
"epoch": 1.3155650319829424,
"grad_norm": 0.5029760678480856,
"learning_rate": 3.689483484608048e-05,
"loss": 1.0614323616027832,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2552529573440552,
"step": 309,
"valid_targets_mean": 14474.2,
"valid_targets_min": 1756
},
{
"epoch": 1.3198294243070363,
"grad_norm": 0.423583818084647,
"learning_rate": 3.6862947606963364e-05,
"loss": 1.096575140953064,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2549227476119995,
"step": 310,
"valid_targets_mean": 14506.5,
"valid_targets_min": 2082
},
{
"epoch": 1.32409381663113,
"grad_norm": 0.42315859890827595,
"learning_rate": 3.6830911403291885e-05,
"loss": 1.038635015487671,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25086838006973267,
"step": 311,
"valid_targets_mean": 13291.1,
"valid_targets_min": 3734
},
{
"epoch": 1.328358208955224,
"grad_norm": 0.4449274886943997,
"learning_rate": 3.679872651806869e-05,
"loss": 1.0621452331542969,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2692762613296509,
"step": 312,
"valid_targets_mean": 14425.4,
"valid_targets_min": 3203
},
{
"epoch": 1.3326226012793176,
"grad_norm": 0.40353806956953464,
"learning_rate": 3.676639323560986e-05,
"loss": 1.00935697555542,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25382930040359497,
"step": 313,
"valid_targets_mean": 13922.9,
"valid_targets_min": 2388
},
{
"epoch": 1.3368869936034116,
"grad_norm": 0.38240160643844234,
"learning_rate": 3.6733911841542365e-05,
"loss": 1.0217959880828857,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2370169460773468,
"step": 314,
"valid_targets_mean": 14275.7,
"valid_targets_min": 4836
},
{
"epoch": 1.3411513859275053,
"grad_norm": 0.44912430742395426,
"learning_rate": 3.6701282622801626e-05,
"loss": 1.0224769115447998,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2658455967903137,
"step": 315,
"valid_targets_mean": 14691.5,
"valid_targets_min": 6289
},
{
"epoch": 1.3454157782515992,
"grad_norm": 0.4380862194803825,
"learning_rate": 3.666850586762886e-05,
"loss": 1.0295928716659546,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23124775290489197,
"step": 316,
"valid_targets_mean": 14788.2,
"valid_targets_min": 1572
},
{
"epoch": 1.349680170575693,
"grad_norm": 0.3780351915048266,
"learning_rate": 3.663558186556863e-05,
"loss": 1.0240471363067627,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26825106143951416,
"step": 317,
"valid_targets_mean": 14430.4,
"valid_targets_min": 4225
},
{
"epoch": 1.3539445628997868,
"grad_norm": 0.38969522738546963,
"learning_rate": 3.660251090746627e-05,
"loss": 0.9949407577514648,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2697961926460266,
"step": 318,
"valid_targets_mean": 14902.6,
"valid_targets_min": 3428
},
{
"epoch": 1.3582089552238805,
"grad_norm": 0.4384210627180082,
"learning_rate": 3.656929328546526e-05,
"loss": 1.0744171142578125,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2820214629173279,
"step": 319,
"valid_targets_mean": 15383.8,
"valid_targets_min": 7527
},
{
"epoch": 1.3624733475479744,
"grad_norm": 0.3984289616259255,
"learning_rate": 3.653592929300471e-05,
"loss": 1.0498393774032593,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27668696641921997,
"step": 320,
"valid_targets_mean": 15038.8,
"valid_targets_min": 3785
},
{
"epoch": 1.3667377398720681,
"grad_norm": 0.432696068635002,
"learning_rate": 3.650241922481675e-05,
"loss": 1.0534286499023438,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2388778030872345,
"step": 321,
"valid_targets_mean": 14543.2,
"valid_targets_min": 752
},
{
"epoch": 1.371002132196162,
"grad_norm": 0.48698829590428744,
"learning_rate": 3.6468763376923886e-05,
"loss": 1.0516881942749023,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2882721722126007,
"step": 322,
"valid_targets_mean": 14838.8,
"valid_targets_min": 9079
},
{
"epoch": 1.375266524520256,
"grad_norm": 0.442918488237151,
"learning_rate": 3.6434962046636464e-05,
"loss": 1.030785083770752,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.253054678440094,
"step": 323,
"valid_targets_mean": 14099.2,
"valid_targets_min": 5779
},
{
"epoch": 1.3795309168443497,
"grad_norm": 0.45324470211257595,
"learning_rate": 3.6401015532549957e-05,
"loss": 0.9938373565673828,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24963873624801636,
"step": 324,
"valid_targets_mean": 14335.0,
"valid_targets_min": 3001
},
{
"epoch": 1.3837953091684434,
"grad_norm": 0.49752146852740525,
"learning_rate": 3.6366924134542386e-05,
"loss": 1.1035189628601074,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26730480790138245,
"step": 325,
"valid_targets_mean": 14326.0,
"valid_targets_min": 3428
},
{
"epoch": 1.3880597014925373,
"grad_norm": 0.4043390562619949,
"learning_rate": 3.633268815377166e-05,
"loss": 1.023439645767212,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2766946256160736,
"step": 326,
"valid_targets_mean": 15623.8,
"valid_targets_min": 12359
},
{
"epoch": 1.3923240938166312,
"grad_norm": 0.4473813682718078,
"learning_rate": 3.6298307892672895e-05,
"loss": 1.060289978981018,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24249878525733948,
"step": 327,
"valid_targets_mean": 13869.2,
"valid_targets_min": 1192
},
{
"epoch": 1.396588486140725,
"grad_norm": 0.39880333868369167,
"learning_rate": 3.626378365495577e-05,
"loss": 1.0092850923538208,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2509322762489319,
"step": 328,
"valid_targets_mean": 14535.7,
"valid_targets_min": 3330
},
{
"epoch": 1.4008528784648187,
"grad_norm": 0.4223878979770423,
"learning_rate": 3.622911574560181e-05,
"loss": 1.021120309829712,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28987523913383484,
"step": 329,
"valid_targets_mean": 15023.1,
"valid_targets_min": 2399
},
{
"epoch": 1.4051172707889126,
"grad_norm": 0.40875151541174765,
"learning_rate": 3.6194304470861744e-05,
"loss": 1.0362828969955444,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24342569708824158,
"step": 330,
"valid_targets_mean": 14159.5,
"valid_targets_min": 1544
},
{
"epoch": 1.4093816631130065,
"grad_norm": 0.41445312343144597,
"learning_rate": 3.615935013825272e-05,
"loss": 1.0330214500427246,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23608511686325073,
"step": 331,
"valid_targets_mean": 13884.3,
"valid_targets_min": 2210
},
{
"epoch": 1.4136460554371002,
"grad_norm": 0.40369141173086437,
"learning_rate": 3.612425305655569e-05,
"loss": 1.032184362411499,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22079363465309143,
"step": 332,
"valid_targets_mean": 13598.4,
"valid_targets_min": 4280
},
{
"epoch": 1.417910447761194,
"grad_norm": 0.41599210161287786,
"learning_rate": 3.6089013535812593e-05,
"loss": 1.016597867012024,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26309871673583984,
"step": 333,
"valid_targets_mean": 14173.4,
"valid_targets_min": 2833
},
{
"epoch": 1.4221748400852878,
"grad_norm": 0.4153459164865125,
"learning_rate": 3.6053631887323656e-05,
"loss": 1.0433218479156494,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24018901586532593,
"step": 334,
"valid_targets_mean": 13169.0,
"valid_targets_min": 1366
},
{
"epoch": 1.4264392324093818,
"grad_norm": 0.4228156331265008,
"learning_rate": 3.601810842364465e-05,
"loss": 1.0579197406768799,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26747769117355347,
"step": 335,
"valid_targets_mean": 14832.1,
"valid_targets_min": 2603
},
{
"epoch": 1.4307036247334755,
"grad_norm": 0.37510951898211387,
"learning_rate": 3.598244345858412e-05,
"loss": 1.044649600982666,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.266460657119751,
"step": 336,
"valid_targets_mean": 14194.0,
"valid_targets_min": 3392
},
{
"epoch": 1.4349680170575694,
"grad_norm": 0.3730794365026243,
"learning_rate": 3.594663730720059e-05,
"loss": 1.0548815727233887,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2614656388759613,
"step": 337,
"valid_targets_mean": 14611.4,
"valid_targets_min": 4873
},
{
"epoch": 1.439232409381663,
"grad_norm": 0.46642538995969157,
"learning_rate": 3.591069028579982e-05,
"loss": 1.0309990644454956,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25115907192230225,
"step": 338,
"valid_targets_mean": 14504.5,
"valid_targets_min": 2470
},
{
"epoch": 1.443496801705757,
"grad_norm": 0.40116665083162756,
"learning_rate": 3.5874602711931994e-05,
"loss": 1.019059658050537,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23682564496994019,
"step": 339,
"valid_targets_mean": 14049.9,
"valid_targets_min": 1264
},
{
"epoch": 1.4477611940298507,
"grad_norm": 0.3623754460535039,
"learning_rate": 3.5838374904388904e-05,
"loss": 1.0602333545684814,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26520347595214844,
"step": 340,
"valid_targets_mean": 14519.9,
"valid_targets_min": 2559
},
{
"epoch": 1.4520255863539446,
"grad_norm": 0.432940910513449,
"learning_rate": 3.580200718320115e-05,
"loss": 1.034498691558838,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2453865110874176,
"step": 341,
"valid_targets_mean": 14551.1,
"valid_targets_min": 4057
},
{
"epoch": 1.4562899786780383,
"grad_norm": 0.41479612142784555,
"learning_rate": 3.576549986963531e-05,
"loss": 1.071781873703003,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25374463200569153,
"step": 342,
"valid_targets_mean": 14550.2,
"valid_targets_min": 2094
},
{
"epoch": 1.4605543710021323,
"grad_norm": 0.4388319041148267,
"learning_rate": 3.5728853286191075e-05,
"loss": 1.0310769081115723,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23260226845741272,
"step": 343,
"valid_targets_mean": 14847.7,
"valid_targets_min": 5643
},
{
"epoch": 1.464818763326226,
"grad_norm": 0.5097439382078064,
"learning_rate": 3.5692067756598465e-05,
"loss": 1.085401177406311,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26018932461738586,
"step": 344,
"valid_targets_mean": 14242.3,
"valid_targets_min": 1644
},
{
"epoch": 1.4690831556503199,
"grad_norm": 0.4906254933146025,
"learning_rate": 3.5655143605814885e-05,
"loss": 1.0883269309997559,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29738789796829224,
"step": 345,
"valid_targets_mean": 15187.0,
"valid_targets_min": 5367
},
{
"epoch": 1.4733475479744136,
"grad_norm": 0.43429736392701807,
"learning_rate": 3.561808116002232e-05,
"loss": 1.0500903129577637,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26342469453811646,
"step": 346,
"valid_targets_mean": 13944.7,
"valid_targets_min": 4459
},
{
"epoch": 1.4776119402985075,
"grad_norm": 0.5529170127999757,
"learning_rate": 3.5580880746624444e-05,
"loss": 1.1078016757965088,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28638672828674316,
"step": 347,
"valid_targets_mean": 14733.8,
"valid_targets_min": 3275
},
{
"epoch": 1.4818763326226012,
"grad_norm": 0.5253935878945599,
"learning_rate": 3.5543542694243685e-05,
"loss": 1.0176830291748047,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2500063180923462,
"step": 348,
"valid_targets_mean": 14831.7,
"valid_targets_min": 3693
},
{
"epoch": 1.4861407249466951,
"grad_norm": 0.43894141234318407,
"learning_rate": 3.5506067332718355e-05,
"loss": 1.0444616079330444,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2473248541355133,
"step": 349,
"valid_targets_mean": 14922.2,
"valid_targets_min": 6830
},
{
"epoch": 1.4904051172707888,
"grad_norm": 0.558730332863415,
"learning_rate": 3.546845499309976e-05,
"loss": 1.0343124866485596,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27866482734680176,
"step": 350,
"valid_targets_mean": 14642.0,
"valid_targets_min": 1155
},
{
"epoch": 1.4946695095948828,
"grad_norm": 0.4185560934360798,
"learning_rate": 3.5430706007649225e-05,
"loss": 1.0102611780166626,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2702333331108093,
"step": 351,
"valid_targets_mean": 15455.7,
"valid_targets_min": 12148
},
{
"epoch": 1.4989339019189765,
"grad_norm": 0.46798829218110205,
"learning_rate": 3.539282070983518e-05,
"loss": 0.9864039421081543,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2526516914367676,
"step": 352,
"valid_targets_mean": 14920.8,
"valid_targets_min": 7864
},
{
"epoch": 1.5031982942430704,
"grad_norm": 0.4076542842351871,
"learning_rate": 3.535479943433023e-05,
"loss": 1.0977790355682373,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2841411828994751,
"step": 353,
"valid_targets_mean": 15169.3,
"valid_targets_min": 7521
},
{
"epoch": 1.5074626865671643,
"grad_norm": 0.3932038387603887,
"learning_rate": 3.5316642517008184e-05,
"loss": 1.0146563053131104,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2530601918697357,
"step": 354,
"valid_targets_mean": 14533.0,
"valid_targets_min": 5862
},
{
"epoch": 1.511727078891258,
"grad_norm": 0.4004458039033539,
"learning_rate": 3.5278350294941074e-05,
"loss": 1.027766466140747,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24737542867660522,
"step": 355,
"valid_targets_mean": 15259.2,
"valid_targets_min": 9655
},
{
"epoch": 1.5159914712153517,
"grad_norm": 0.4041518470303862,
"learning_rate": 3.523992310639622e-05,
"loss": 1.0604379177093506,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23790603876113892,
"step": 356,
"valid_targets_mean": 14440.0,
"valid_targets_min": 3563
},
{
"epoch": 1.5202558635394456,
"grad_norm": 0.3853938737065251,
"learning_rate": 3.5201361290833165e-05,
"loss": 1.0405174493789673,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2695203125476837,
"step": 357,
"valid_targets_mean": 14853.6,
"valid_targets_min": 5910
},
{
"epoch": 1.5245202558635396,
"grad_norm": 0.3700881121843563,
"learning_rate": 3.516266518890079e-05,
"loss": 1.055159091949463,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2846561670303345,
"step": 358,
"valid_targets_mean": 15617.7,
"valid_targets_min": 7355
},
{
"epoch": 1.5287846481876333,
"grad_norm": 0.41639254141432347,
"learning_rate": 3.512383514243419e-05,
"loss": 0.9890848994255066,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25430652499198914,
"step": 359,
"valid_targets_mean": 14814.4,
"valid_targets_min": 5326
},
{
"epoch": 1.533049040511727,
"grad_norm": 0.4257510522388653,
"learning_rate": 3.5084871494451716e-05,
"loss": 1.0411036014556885,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26373782753944397,
"step": 360,
"valid_targets_mean": 14399.5,
"valid_targets_min": 4828
},
{
"epoch": 1.537313432835821,
"grad_norm": 0.4332882415420207,
"learning_rate": 3.5045774589151955e-05,
"loss": 1.080575942993164,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2707892656326294,
"step": 361,
"valid_targets_mean": 14051.8,
"valid_targets_min": 5008
},
{
"epoch": 1.5415778251599148,
"grad_norm": 0.4312474465754125,
"learning_rate": 3.500654477191064e-05,
"loss": 1.0592353343963623,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2550530433654785,
"step": 362,
"valid_targets_mean": 15092.4,
"valid_targets_min": 8853
},
{
"epoch": 1.5458422174840085,
"grad_norm": 0.388247184125693,
"learning_rate": 3.496718238927764e-05,
"loss": 1.0313072204589844,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2804388403892517,
"step": 363,
"valid_targets_mean": 14726.6,
"valid_targets_min": 4844
},
{
"epoch": 1.5501066098081022,
"grad_norm": 0.3715356387359413,
"learning_rate": 3.492768778897388e-05,
"loss": 1.032320499420166,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2508130967617035,
"step": 364,
"valid_targets_mean": 14377.2,
"valid_targets_min": 4826
},
{
"epoch": 1.5543710021321961,
"grad_norm": 0.3904825246518211,
"learning_rate": 3.4888061319888276e-05,
"loss": 1.0460598468780518,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2676374912261963,
"step": 365,
"valid_targets_mean": 14739.1,
"valid_targets_min": 5957
},
{
"epoch": 1.55863539445629,
"grad_norm": 0.44926171747857924,
"learning_rate": 3.484830333207466e-05,
"loss": 1.003669023513794,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23656132817268372,
"step": 366,
"valid_targets_mean": 14270.9,
"valid_targets_min": 1848
},
{
"epoch": 1.5628997867803838,
"grad_norm": 0.3559883543601324,
"learning_rate": 3.4808414176748666e-05,
"loss": 1.0113545656204224,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25722262263298035,
"step": 367,
"valid_targets_mean": 14459.8,
"valid_targets_min": 3202
},
{
"epoch": 1.5671641791044775,
"grad_norm": 0.46220961836637714,
"learning_rate": 3.476839420628466e-05,
"loss": 1.0731767416000366,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2682396173477173,
"step": 368,
"valid_targets_mean": 15118.6,
"valid_targets_min": 4725
},
{
"epoch": 1.5714285714285714,
"grad_norm": 0.37036023858488,
"learning_rate": 3.472824377421257e-05,
"loss": 1.0478543043136597,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25996726751327515,
"step": 369,
"valid_targets_mean": 14105.8,
"valid_targets_min": 2477
},
{
"epoch": 1.5756929637526653,
"grad_norm": 0.3932506540896539,
"learning_rate": 3.4687963235214845e-05,
"loss": 1.048224687576294,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2726746201515198,
"step": 370,
"valid_targets_mean": 14638.0,
"valid_targets_min": 5709
},
{
"epoch": 1.579957356076759,
"grad_norm": 0.3881717185807725,
"learning_rate": 3.464755294512325e-05,
"loss": 1.0166910886764526,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2702951729297638,
"step": 371,
"valid_targets_mean": 14126.0,
"valid_targets_min": 2212
},
{
"epoch": 1.5842217484008527,
"grad_norm": 0.3773858725739773,
"learning_rate": 3.4607013260915765e-05,
"loss": 1.0563862323760986,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2720091938972473,
"step": 372,
"valid_targets_mean": 15196.1,
"valid_targets_min": 3138
},
{
"epoch": 1.5884861407249466,
"grad_norm": 0.3516127575139888,
"learning_rate": 3.4566344540713404e-05,
"loss": 0.9865554571151733,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26179879903793335,
"step": 373,
"valid_targets_mean": 15523.9,
"valid_targets_min": 3813
},
{
"epoch": 1.5927505330490406,
"grad_norm": 0.34214633347445444,
"learning_rate": 3.452554714377706e-05,
"loss": 1.0230598449707031,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2778155505657196,
"step": 374,
"valid_targets_mean": 15356.0,
"valid_targets_min": 7797
},
{
"epoch": 1.5970149253731343,
"grad_norm": 0.35945798893790865,
"learning_rate": 3.448462143050436e-05,
"loss": 1.0132288932800293,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2472197711467743,
"step": 375,
"valid_targets_mean": 14254.1,
"valid_targets_min": 4122
},
{
"epoch": 1.6012793176972282,
"grad_norm": 0.3691893165416932,
"learning_rate": 3.4443567762426444e-05,
"loss": 1.0427764654159546,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2452818751335144,
"step": 376,
"valid_targets_mean": 14382.5,
"valid_targets_min": 1979
},
{
"epoch": 1.6055437100213221,
"grad_norm": 0.32656514899943623,
"learning_rate": 3.440238650220477e-05,
"loss": 1.0427534580230713,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24343901872634888,
"step": 377,
"valid_targets_mean": 14360.2,
"valid_targets_min": 2659
},
{
"epoch": 1.6098081023454158,
"grad_norm": 0.35145024748398823,
"learning_rate": 3.4361078013627945e-05,
"loss": 1.0215208530426025,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24857380986213684,
"step": 378,
"valid_targets_mean": 14909.9,
"valid_targets_min": 5877
},
{
"epoch": 1.6140724946695095,
"grad_norm": 0.37012176740954145,
"learning_rate": 3.4319642661608474e-05,
"loss": 1.041208028793335,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2563413977622986,
"step": 379,
"valid_targets_mean": 14180.1,
"valid_targets_min": 2024
},
{
"epoch": 1.6183368869936035,
"grad_norm": 0.373177192589185,
"learning_rate": 3.427808081217957e-05,
"loss": 1.0833510160446167,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2780710458755493,
"step": 380,
"valid_targets_mean": 14983.0,
"valid_targets_min": 2490
},
{
"epoch": 1.6226012793176974,
"grad_norm": 0.33354305565043396,
"learning_rate": 3.423639283249189e-05,
"loss": 0.9997053742408752,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2615281939506531,
"step": 381,
"valid_targets_mean": 14317.3,
"valid_targets_min": 2195
},
{
"epoch": 1.626865671641791,
"grad_norm": 0.3471963364637768,
"learning_rate": 3.419457909081032e-05,
"loss": 1.0127842426300049,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2638373374938965,
"step": 382,
"valid_targets_mean": 14404.6,
"valid_targets_min": 2852
},
{
"epoch": 1.6311300639658848,
"grad_norm": 0.33501867942244973,
"learning_rate": 3.415263995651069e-05,
"loss": 1.0155236721038818,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24730534851551056,
"step": 383,
"valid_targets_mean": 14532.0,
"valid_targets_min": 3217
},
{
"epoch": 1.6353944562899787,
"grad_norm": 0.334467361139862,
"learning_rate": 3.411057580007653e-05,
"loss": 1.051874041557312,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26716887950897217,
"step": 384,
"valid_targets_mean": 14626.3,
"valid_targets_min": 3330
},
{
"epoch": 1.6396588486140726,
"grad_norm": 0.3900211482958431,
"learning_rate": 3.4068386993095806e-05,
"loss": 1.0342919826507568,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27061301469802856,
"step": 385,
"valid_targets_mean": 14689.8,
"valid_targets_min": 4055
},
{
"epoch": 1.6439232409381663,
"grad_norm": 0.3371460016615242,
"learning_rate": 3.402607390825762e-05,
"loss": 1.0449540615081787,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2670671343803406,
"step": 386,
"valid_targets_mean": 14720.9,
"valid_targets_min": 5646
},
{
"epoch": 1.64818763326226,
"grad_norm": 0.32759083808119127,
"learning_rate": 3.398363691934894e-05,
"loss": 1.0454938411712646,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23546123504638672,
"step": 387,
"valid_targets_mean": 13642.0,
"valid_targets_min": 2066
},
{
"epoch": 1.652452025586354,
"grad_norm": 0.3767978047948565,
"learning_rate": 3.3941076401251244e-05,
"loss": 1.0003044605255127,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2335018813610077,
"step": 388,
"valid_targets_mean": 14856.9,
"valid_targets_min": 2551
},
{
"epoch": 1.6567164179104479,
"grad_norm": 0.33539311118940945,
"learning_rate": 3.3898392729937295e-05,
"loss": 1.0234074592590332,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2576403021812439,
"step": 389,
"valid_targets_mean": 14729.4,
"valid_targets_min": 5174
},
{
"epoch": 1.6609808102345416,
"grad_norm": 0.38301412923847056,
"learning_rate": 3.385558628246774e-05,
"loss": 1.036074161529541,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2639666199684143,
"step": 390,
"valid_targets_mean": 15467.5,
"valid_targets_min": 2531
},
{
"epoch": 1.6652452025586353,
"grad_norm": 0.4182031657844117,
"learning_rate": 3.381265743698781e-05,
"loss": 1.0574541091918945,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2728482782840729,
"step": 391,
"valid_targets_mean": 14620.8,
"valid_targets_min": 5987
},
{
"epoch": 1.6695095948827292,
"grad_norm": 0.3408222839521064,
"learning_rate": 3.3769606572724e-05,
"loss": 1.0520439147949219,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29153692722320557,
"step": 392,
"valid_targets_mean": 14701.7,
"valid_targets_min": 5351
},
{
"epoch": 1.6737739872068231,
"grad_norm": 0.33485626829486975,
"learning_rate": 3.3726434069980686e-05,
"loss": 1.0322532653808594,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2689778804779053,
"step": 393,
"valid_targets_mean": 14448.6,
"valid_targets_min": 7360
},
{
"epoch": 1.6780383795309168,
"grad_norm": 0.30661591637001406,
"learning_rate": 3.368314031013678e-05,
"loss": 1.0209152698516846,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23672515153884888,
"step": 394,
"valid_targets_mean": 14592.0,
"valid_targets_min": 2343
},
{
"epoch": 1.6823027718550105,
"grad_norm": 0.3594261214971763,
"learning_rate": 3.363972567564236e-05,
"loss": 0.9918817281723022,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25489190220832825,
"step": 395,
"valid_targets_mean": 15144.4,
"valid_targets_min": 3992
},
{
"epoch": 1.6865671641791045,
"grad_norm": 0.33742571789338416,
"learning_rate": 3.35961905500153e-05,
"loss": 1.0087916851043701,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24313384294509888,
"step": 396,
"valid_targets_mean": 13942.8,
"valid_targets_min": 575
},
{
"epoch": 1.6908315565031984,
"grad_norm": 0.34745731043603895,
"learning_rate": 3.3552535317837855e-05,
"loss": 0.9876875877380371,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25758689641952515,
"step": 397,
"valid_targets_mean": 14985.8,
"valid_targets_min": 7044
},
{
"epoch": 1.695095948827292,
"grad_norm": 0.3870431140851168,
"learning_rate": 3.35087603647533e-05,
"loss": 1.0088675022125244,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26093339920043945,
"step": 398,
"valid_targets_mean": 14448.2,
"valid_targets_min": 4964
},
{
"epoch": 1.6993603411513858,
"grad_norm": 0.3167918925838153,
"learning_rate": 3.346486607746249e-05,
"loss": 1.0424166917800903,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24083197116851807,
"step": 399,
"valid_targets_mean": 14033.4,
"valid_targets_min": 2426
},
{
"epoch": 1.7036247334754797,
"grad_norm": 0.3828292733495057,
"learning_rate": 3.342085284372047e-05,
"loss": 1.0191667079925537,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2616402506828308,
"step": 400,
"valid_targets_mean": 14583.3,
"valid_targets_min": 7331
},
{
"epoch": 1.7078891257995736,
"grad_norm": 0.3649425592457212,
"learning_rate": 3.337672105233303e-05,
"loss": 1.0168663263320923,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2752700448036194,
"step": 401,
"valid_targets_mean": 14993.9,
"valid_targets_min": 7835
},
{
"epoch": 1.7121535181236673,
"grad_norm": 0.3643524380815211,
"learning_rate": 3.3332471093153296e-05,
"loss": 1.0180773735046387,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2487078756093979,
"step": 402,
"valid_targets_mean": 13492.8,
"valid_targets_min": 1627
},
{
"epoch": 1.716417910447761,
"grad_norm": 0.3780037653881082,
"learning_rate": 3.3288103357078244e-05,
"loss": 1.033068299293518,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26770901679992676,
"step": 403,
"valid_targets_mean": 14617.1,
"valid_targets_min": 2467
},
{
"epoch": 1.720682302771855,
"grad_norm": 0.34536872530694906,
"learning_rate": 3.324361823604529e-05,
"loss": 0.9663518667221069,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23401600122451782,
"step": 404,
"valid_targets_mean": 14467.1,
"valid_targets_min": 5017
},
{
"epoch": 1.724946695095949,
"grad_norm": 0.3809124784742419,
"learning_rate": 3.319901612302881e-05,
"loss": 1.0394078493118286,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2640204429626465,
"step": 405,
"valid_targets_mean": 14536.2,
"valid_targets_min": 1635
},
{
"epoch": 1.7292110874200426,
"grad_norm": 0.3378638020700729,
"learning_rate": 3.315429741203666e-05,
"loss": 1.053601861000061,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2702086865901947,
"step": 406,
"valid_targets_mean": 14123.1,
"valid_targets_min": 4839
},
{
"epoch": 1.7334754797441365,
"grad_norm": 0.4175023493480647,
"learning_rate": 3.3109462498106705e-05,
"loss": 1.020158290863037,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24757951498031616,
"step": 407,
"valid_targets_mean": 14131.3,
"valid_targets_min": 2620
},
{
"epoch": 1.7377398720682304,
"grad_norm": 0.420943686556577,
"learning_rate": 3.306451177730333e-05,
"loss": 1.0457191467285156,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24467140436172485,
"step": 408,
"valid_targets_mean": 13883.8,
"valid_targets_min": 1684
},
{
"epoch": 1.7420042643923241,
"grad_norm": 0.4250661617912362,
"learning_rate": 3.301944564671394e-05,
"loss": 1.0097477436065674,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2460739016532898,
"step": 409,
"valid_targets_mean": 14259.0,
"valid_targets_min": 6646
},
{
"epoch": 1.7462686567164178,
"grad_norm": 0.4058959096130296,
"learning_rate": 3.297426450444546e-05,
"loss": 1.0764334201812744,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25958263874053955,
"step": 410,
"valid_targets_mean": 14020.4,
"valid_targets_min": 1460
},
{
"epoch": 1.7505330490405118,
"grad_norm": 0.41814093158530896,
"learning_rate": 3.292896874962078e-05,
"loss": 1.0006964206695557,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24656634032726288,
"step": 411,
"valid_targets_mean": 15418.4,
"valid_targets_min": 2382
},
{
"epoch": 1.7547974413646057,
"grad_norm": 0.403135172590173,
"learning_rate": 3.2883558782375294e-05,
"loss": 1.0603384971618652,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2844768166542053,
"step": 412,
"valid_targets_mean": 15048.8,
"valid_targets_min": 4661
},
{
"epoch": 1.7590618336886994,
"grad_norm": 0.40013299639055083,
"learning_rate": 3.283803500385332e-05,
"loss": 1.043904423713684,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2568932771682739,
"step": 413,
"valid_targets_mean": 14366.2,
"valid_targets_min": 2376
},
{
"epoch": 1.763326226012793,
"grad_norm": 0.4405862304897585,
"learning_rate": 3.2792397816204546e-05,
"loss": 1.0399513244628906,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2780348062515259,
"step": 414,
"valid_targets_mean": 14281.8,
"valid_targets_min": 1040
},
{
"epoch": 1.767590618336887,
"grad_norm": 0.3703678306152304,
"learning_rate": 3.2746647622580524e-05,
"loss": 0.9982384443283081,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24133169651031494,
"step": 415,
"valid_targets_mean": 14948.2,
"valid_targets_min": 2248
},
{
"epoch": 1.771855010660981,
"grad_norm": 0.38058065355445136,
"learning_rate": 3.270078482713106e-05,
"loss": 1.008116364479065,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2592852711677551,
"step": 416,
"valid_targets_mean": 14538.5,
"valid_targets_min": 6985
},
{
"epoch": 1.7761194029850746,
"grad_norm": 0.3807576126149718,
"learning_rate": 3.265480983500069e-05,
"loss": 1.034130573272705,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25203263759613037,
"step": 417,
"valid_targets_mean": 13763.7,
"valid_targets_min": 1735
},
{
"epoch": 1.7803837953091683,
"grad_norm": 0.35469677958006324,
"learning_rate": 3.260872305232507e-05,
"loss": 1.0008351802825928,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24766606092453003,
"step": 418,
"valid_targets_mean": 14314.2,
"valid_targets_min": 1831
},
{
"epoch": 1.7846481876332623,
"grad_norm": 0.34953838065509935,
"learning_rate": 3.256252488622738e-05,
"loss": 1.0125362873077393,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24885194003582,
"step": 419,
"valid_targets_mean": 15054.8,
"valid_targets_min": 10156
},
{
"epoch": 1.7889125799573562,
"grad_norm": 0.3464933663297695,
"learning_rate": 3.251621574481475e-05,
"loss": 1.066127061843872,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2821931540966034,
"step": 420,
"valid_targets_mean": 14622.4,
"valid_targets_min": 1382
},
{
"epoch": 1.79317697228145,
"grad_norm": 0.37117640473424757,
"learning_rate": 3.246979603717467e-05,
"loss": 0.9925398230552673,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23529556393623352,
"step": 421,
"valid_targets_mean": 15146.0,
"valid_targets_min": 5987
},
{
"epoch": 1.7974413646055436,
"grad_norm": 0.3770563485294222,
"learning_rate": 3.242326617337133e-05,
"loss": 1.0189104080200195,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2386004775762558,
"step": 422,
"valid_targets_mean": 13824.1,
"valid_targets_min": 2070
},
{
"epoch": 1.8017057569296375,
"grad_norm": 0.35822765744928253,
"learning_rate": 3.2376626564442016e-05,
"loss": 1.0439180135726929,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2584801912307739,
"step": 423,
"valid_targets_mean": 15041.2,
"valid_targets_min": 7090
},
{
"epoch": 1.8059701492537314,
"grad_norm": 0.3324831953290299,
"learning_rate": 3.2329877622393515e-05,
"loss": 1.054990291595459,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2689490020275116,
"step": 424,
"valid_targets_mean": 14683.2,
"valid_targets_min": 7228
},
{
"epoch": 1.8102345415778252,
"grad_norm": 0.34915086832559983,
"learning_rate": 3.228301976019841e-05,
"loss": 0.9770750999450684,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23467862606048584,
"step": 425,
"valid_targets_mean": 14624.9,
"valid_targets_min": 1875
},
{
"epoch": 1.8144989339019189,
"grad_norm": 0.32965841703308824,
"learning_rate": 3.22360533917915e-05,
"loss": 0.9906047582626343,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24453550577163696,
"step": 426,
"valid_targets_mean": 14685.0,
"valid_targets_min": 1573
},
{
"epoch": 1.8187633262260128,
"grad_norm": 0.332438503069151,
"learning_rate": 3.218897893206608e-05,
"loss": 1.0660655498504639,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2804543375968933,
"step": 427,
"valid_targets_mean": 15406.8,
"valid_targets_min": 10790
},
{
"epoch": 1.8230277185501067,
"grad_norm": 0.3689774867731172,
"learning_rate": 3.2141796796870335e-05,
"loss": 0.9783341288566589,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26350972056388855,
"step": 428,
"valid_targets_mean": 15478.9,
"valid_targets_min": 5018
},
{
"epoch": 1.8272921108742004,
"grad_norm": 0.32744116016781977,
"learning_rate": 3.2094507403003614e-05,
"loss": 1.0029715299606323,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24373041093349457,
"step": 429,
"valid_targets_mean": 14231.6,
"valid_targets_min": 5631
},
{
"epoch": 1.831556503198294,
"grad_norm": 0.33963983474338727,
"learning_rate": 3.2047111168212785e-05,
"loss": 0.987981915473938,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23339219391345978,
"step": 430,
"valid_targets_mean": 14735.6,
"valid_targets_min": 4105
},
{
"epoch": 1.835820895522388,
"grad_norm": 0.3831323506936333,
"learning_rate": 3.1999608511188524e-05,
"loss": 1.0297985076904297,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23571500182151794,
"step": 431,
"valid_targets_mean": 14458.9,
"valid_targets_min": 4385
},
{
"epoch": 1.840085287846482,
"grad_norm": 0.28973582326105785,
"learning_rate": 3.1951999851561625e-05,
"loss": 1.0437334775924683,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.250507652759552,
"step": 432,
"valid_targets_mean": 13971.0,
"valid_targets_min": 3231
},
{
"epoch": 1.8443496801705757,
"grad_norm": 0.3608534729433716,
"learning_rate": 3.190428560989931e-05,
"loss": 1.0184440612792969,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25230586528778076,
"step": 433,
"valid_targets_mean": 14063.9,
"valid_targets_min": 4039
},
{
"epoch": 1.8486140724946694,
"grad_norm": 0.3468183299999246,
"learning_rate": 3.185646620770146e-05,
"loss": 1.0451010465621948,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2619550824165344,
"step": 434,
"valid_targets_mean": 14315.4,
"valid_targets_min": 5088
},
{
"epoch": 1.8528784648187633,
"grad_norm": 0.36334244080969874,
"learning_rate": 3.180854206739696e-05,
"loss": 1.0416852235794067,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2673511207103729,
"step": 435,
"valid_targets_mean": 14091.8,
"valid_targets_min": 2033
},
{
"epoch": 1.8571428571428572,
"grad_norm": 0.39437556533817586,
"learning_rate": 3.176051361233991e-05,
"loss": 1.007869005203247,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2646036148071289,
"step": 436,
"valid_targets_mean": 15415.9,
"valid_targets_min": 9952
},
{
"epoch": 1.861407249466951,
"grad_norm": 0.3855523767310849,
"learning_rate": 3.171238126680594e-05,
"loss": 1.0384873151779175,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24490685760974884,
"step": 437,
"valid_targets_mean": 13899.1,
"valid_targets_min": 3160
},
{
"epoch": 1.8656716417910446,
"grad_norm": 0.3511120043115705,
"learning_rate": 3.166414545598839e-05,
"loss": 1.05495285987854,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2525789141654968,
"step": 438,
"valid_targets_mean": 14838.1,
"valid_targets_min": 6803
},
{
"epoch": 1.8699360341151388,
"grad_norm": 0.3492038967686587,
"learning_rate": 3.161580660599464e-05,
"loss": 1.0517874956130981,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26977279782295227,
"step": 439,
"valid_targets_mean": 15549.5,
"valid_targets_min": 9668
},
{
"epoch": 1.8742004264392325,
"grad_norm": 0.3211662787472829,
"learning_rate": 3.1567365143842264e-05,
"loss": 1.005902886390686,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2564969062805176,
"step": 440,
"valid_targets_mean": 14757.1,
"valid_targets_min": 5741
},
{
"epoch": 1.8784648187633262,
"grad_norm": 0.343359757201269,
"learning_rate": 3.1518821497455326e-05,
"loss": 1.0198183059692383,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2403055876493454,
"step": 441,
"valid_targets_mean": 13502.8,
"valid_targets_min": 1366
},
{
"epoch": 1.88272921108742,
"grad_norm": 0.33782769808390783,
"learning_rate": 3.147017609566054e-05,
"loss": 1.0181028842926025,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2621736526489258,
"step": 442,
"valid_targets_mean": 15241.9,
"valid_targets_min": 8209
},
{
"epoch": 1.886993603411514,
"grad_norm": 0.3578357014545001,
"learning_rate": 3.142142936818353e-05,
"loss": 1.0328750610351562,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2619187831878662,
"step": 443,
"valid_targets_mean": 14370.1,
"valid_targets_min": 1593
},
{
"epoch": 1.8912579957356077,
"grad_norm": 0.31801043564050785,
"learning_rate": 3.137258174564501e-05,
"loss": 0.999271035194397,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23855340480804443,
"step": 444,
"valid_targets_mean": 14367.9,
"valid_targets_min": 2805
},
{
"epoch": 1.8955223880597014,
"grad_norm": 0.39506035505892084,
"learning_rate": 3.1323633659556986e-05,
"loss": 1.0545252561569214,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.265545517206192,
"step": 445,
"valid_targets_mean": 15161.8,
"valid_targets_min": 2201
},
{
"epoch": 1.8997867803837953,
"grad_norm": 0.309023363110396,
"learning_rate": 3.127458554231894e-05,
"loss": 1.0253419876098633,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22738057374954224,
"step": 446,
"valid_targets_mean": 14012.2,
"valid_targets_min": 5255
},
{
"epoch": 1.9040511727078893,
"grad_norm": 0.35490324818398,
"learning_rate": 3.122543782721402e-05,
"loss": 1.0158578157424927,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2512471675872803,
"step": 447,
"valid_targets_mean": 13560.5,
"valid_targets_min": 2354
},
{
"epoch": 1.908315565031983,
"grad_norm": 0.3036891305581489,
"learning_rate": 3.1176190948405194e-05,
"loss": 1.069814920425415,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24784015119075775,
"step": 448,
"valid_targets_mean": 14122.2,
"valid_targets_min": 1497
},
{
"epoch": 1.9125799573560767,
"grad_norm": 0.3419726999138405,
"learning_rate": 3.112684534093142e-05,
"loss": 1.0053629875183105,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23691532015800476,
"step": 449,
"valid_targets_mean": 14771.7,
"valid_targets_min": 7360
},
{
"epoch": 1.9168443496801706,
"grad_norm": 0.40838753290198465,
"learning_rate": 3.107740144070385e-05,
"loss": 1.0423624515533447,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2578592300415039,
"step": 450,
"valid_targets_mean": 14059.9,
"valid_targets_min": 2691
},
{
"epoch": 1.9211087420042645,
"grad_norm": 0.36196495758103997,
"learning_rate": 3.102785968450188e-05,
"loss": 1.0227258205413818,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2570173144340515,
"step": 451,
"valid_targets_mean": 15053.9,
"valid_targets_min": 4019
},
{
"epoch": 1.9253731343283582,
"grad_norm": 0.3548321726590971,
"learning_rate": 3.09782205099694e-05,
"loss": 1.0171830654144287,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2440769076347351,
"step": 452,
"valid_targets_mean": 14496.3,
"valid_targets_min": 1756
},
{
"epoch": 1.929637526652452,
"grad_norm": 0.4058071231863198,
"learning_rate": 3.092848435561084e-05,
"loss": 1.0100500583648682,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27710479497909546,
"step": 453,
"valid_targets_mean": 14780.3,
"valid_targets_min": 4633
},
{
"epoch": 1.9339019189765458,
"grad_norm": 0.4070188851510975,
"learning_rate": 3.0878651660787376e-05,
"loss": 1.0602819919586182,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2704969644546509,
"step": 454,
"valid_targets_mean": 15116.1,
"valid_targets_min": 7740
},
{
"epoch": 1.9381663113006398,
"grad_norm": 0.4187493934603921,
"learning_rate": 3.082872286571295e-05,
"loss": 1.0430219173431396,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2716018855571747,
"step": 455,
"valid_targets_mean": 15225.5,
"valid_targets_min": 2666
},
{
"epoch": 1.9424307036247335,
"grad_norm": 0.4808979187386021,
"learning_rate": 3.077869841145049e-05,
"loss": 1.0674023628234863,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27071070671081543,
"step": 456,
"valid_targets_mean": 14779.5,
"valid_targets_min": 6332
},
{
"epoch": 1.9466950959488272,
"grad_norm": 0.34694791859336266,
"learning_rate": 3.0728578739907934e-05,
"loss": 1.0135544538497925,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2522042393684387,
"step": 457,
"valid_targets_mean": 14540.5,
"valid_targets_min": 4167
},
{
"epoch": 1.950959488272921,
"grad_norm": 0.4736629987050759,
"learning_rate": 3.067836429383437e-05,
"loss": 1.0493121147155762,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25577256083488464,
"step": 458,
"valid_targets_mean": 14053.8,
"valid_targets_min": 2295
},
{
"epoch": 1.955223880597015,
"grad_norm": 0.389170647954697,
"learning_rate": 3.062805551681609e-05,
"loss": 1.0064876079559326,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2679370641708374,
"step": 459,
"valid_targets_mean": 15165.4,
"valid_targets_min": 8024
},
{
"epoch": 1.9594882729211087,
"grad_norm": 0.4128770419419475,
"learning_rate": 3.057765285327271e-05,
"loss": 1.0187498331069946,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24309395253658295,
"step": 460,
"valid_targets_mean": 14963.2,
"valid_targets_min": 6194
},
{
"epoch": 1.9637526652452024,
"grad_norm": 0.39902135993292137,
"learning_rate": 3.0527156748453214e-05,
"loss": 1.0560030937194824,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2578599750995636,
"step": 461,
"valid_targets_mean": 14774.2,
"valid_targets_min": 3990
},
{
"epoch": 1.9680170575692963,
"grad_norm": 0.3304981556798958,
"learning_rate": 3.047656764843203e-05,
"loss": 0.9743139743804932,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24647156894207,
"step": 462,
"valid_targets_mean": 15133.5,
"valid_targets_min": 4797
},
{
"epoch": 1.9722814498933903,
"grad_norm": 0.3545188406065809,
"learning_rate": 3.0425886000105094e-05,
"loss": 1.0284925699234009,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24679070711135864,
"step": 463,
"valid_targets_mean": 14326.2,
"valid_targets_min": 2013
},
{
"epoch": 1.976545842217484,
"grad_norm": 0.321978376788113,
"learning_rate": 3.0375112251185892e-05,
"loss": 1.011577844619751,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2750028073787689,
"step": 464,
"valid_targets_mean": 14633.4,
"valid_targets_min": 4870
},
{
"epoch": 1.9808102345415777,
"grad_norm": 0.3826133894972065,
"learning_rate": 3.0324246850201527e-05,
"loss": 1.0314466953277588,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2604595422744751,
"step": 465,
"valid_targets_mean": 14508.2,
"valid_targets_min": 980
},
{
"epoch": 1.9850746268656716,
"grad_norm": 0.3106361799547543,
"learning_rate": 3.0273290246488732e-05,
"loss": 1.0470771789550781,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25823935866355896,
"step": 466,
"valid_targets_mean": 14294.9,
"valid_targets_min": 2956
},
{
"epoch": 1.9893390191897655,
"grad_norm": 0.41078985221388087,
"learning_rate": 3.0222242890189904e-05,
"loss": 1.0223674774169922,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25699979066848755,
"step": 467,
"valid_targets_mean": 14991.4,
"valid_targets_min": 7709
},
{
"epoch": 1.9936034115138592,
"grad_norm": 0.3252010849073714,
"learning_rate": 3.017110523224914e-05,
"loss": 0.9852697849273682,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2593464255332947,
"step": 468,
"valid_targets_mean": 15114.2,
"valid_targets_min": 4733
},
{
"epoch": 1.997867803837953,
"grad_norm": 0.37058215950930407,
"learning_rate": 3.011987772440825e-05,
"loss": 1.0171148777008057,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25525736808776855,
"step": 469,
"valid_targets_mean": 13891.4,
"valid_targets_min": 1938
},
{
"epoch": 2.0,
"grad_norm": 0.4261470032561998,
"learning_rate": 3.006856081920277e-05,
"loss": 1.0270267724990845,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5216562747955322,
"step": 470,
"valid_targets_mean": 15169.9,
"valid_targets_min": 9901
},
{
"epoch": 2.0042643923240937,
"grad_norm": 0.45304103112110533,
"learning_rate": 3.001715496995793e-05,
"loss": 0.9987781643867493,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.251964271068573,
"step": 471,
"valid_targets_mean": 14206.0,
"valid_targets_min": 6976
},
{
"epoch": 2.008528784648188,
"grad_norm": 0.47018294489753626,
"learning_rate": 2.9965660630784715e-05,
"loss": 1.0175721645355225,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2600884437561035,
"step": 472,
"valid_targets_mean": 15563.6,
"valid_targets_min": 10378
},
{
"epoch": 2.0127931769722816,
"grad_norm": 0.3649858551533112,
"learning_rate": 2.9914078256575782e-05,
"loss": 1.0302276611328125,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25027453899383545,
"step": 473,
"valid_targets_mean": 14867.3,
"valid_targets_min": 6280
},
{
"epoch": 2.0170575692963753,
"grad_norm": 0.45896794431925675,
"learning_rate": 2.9862408303001492e-05,
"loss": 1.076301097869873,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2921193242073059,
"step": 474,
"valid_targets_mean": 14555.3,
"valid_targets_min": 2249
},
{
"epoch": 2.021321961620469,
"grad_norm": 0.3481667692800604,
"learning_rate": 2.9810651226505875e-05,
"loss": 1.036280632019043,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27446985244750977,
"step": 475,
"valid_targets_mean": 14317.6,
"valid_targets_min": 2299
},
{
"epoch": 2.025586353944563,
"grad_norm": 0.4601511024144995,
"learning_rate": 2.9758807484302566e-05,
"loss": 1.0325391292572021,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24933630228042603,
"step": 476,
"valid_targets_mean": 13864.8,
"valid_targets_min": 2151
},
{
"epoch": 2.029850746268657,
"grad_norm": 0.448089815579243,
"learning_rate": 2.9706877534370822e-05,
"loss": 1.0386598110198975,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25227582454681396,
"step": 477,
"valid_targets_mean": 14256.4,
"valid_targets_min": 2066
},
{
"epoch": 2.0341151385927505,
"grad_norm": 0.45781618197082224,
"learning_rate": 2.965486183545142e-05,
"loss": 1.0114233493804932,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24822190403938293,
"step": 478,
"valid_targets_mean": 14081.0,
"valid_targets_min": 2909
},
{
"epoch": 2.038379530916844,
"grad_norm": 0.4349737541090553,
"learning_rate": 2.9602760847042645e-05,
"loss": 0.9995619058609009,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25437045097351074,
"step": 479,
"valid_targets_mean": 15119.9,
"valid_targets_min": 4874
},
{
"epoch": 2.0426439232409384,
"grad_norm": 0.46144207194679,
"learning_rate": 2.955057502939621e-05,
"loss": 0.9988946318626404,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23824626207351685,
"step": 480,
"valid_targets_mean": 13312.1,
"valid_targets_min": 1519
},
{
"epoch": 2.046908315565032,
"grad_norm": 0.4196200363878079,
"learning_rate": 2.9498304843513193e-05,
"loss": 1.019971251487732,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2508181631565094,
"step": 481,
"valid_targets_mean": 14688.1,
"valid_targets_min": 2511
},
{
"epoch": 2.0511727078891258,
"grad_norm": 0.5292738143769523,
"learning_rate": 2.9445950751139957e-05,
"loss": 0.9783295392990112,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24422825872898102,
"step": 482,
"valid_targets_mean": 14867.3,
"valid_targets_min": 7621
},
{
"epoch": 2.0554371002132195,
"grad_norm": 0.4368934079863611,
"learning_rate": 2.939351321476412e-05,
"loss": 0.9889360666275024,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24530810117721558,
"step": 483,
"valid_targets_mean": 15264.8,
"valid_targets_min": 9523
},
{
"epoch": 2.0597014925373136,
"grad_norm": 0.4067956384715909,
"learning_rate": 2.9340992697610393e-05,
"loss": 0.9587419033050537,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23476752638816833,
"step": 484,
"valid_targets_mean": 14411.7,
"valid_targets_min": 2050
},
{
"epoch": 2.0639658848614073,
"grad_norm": 0.3663229598923624,
"learning_rate": 2.9288389663636537e-05,
"loss": 0.9435993432998657,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2204243242740631,
"step": 485,
"valid_targets_mean": 14179.5,
"valid_targets_min": 4288
},
{
"epoch": 2.068230277185501,
"grad_norm": 0.3948874114859334,
"learning_rate": 2.923570457752925e-05,
"loss": 0.9886394143104553,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22084525227546692,
"step": 486,
"valid_targets_mean": 13013.9,
"valid_targets_min": 1831
},
{
"epoch": 2.0724946695095947,
"grad_norm": 0.4053367288722847,
"learning_rate": 2.9182937904700078e-05,
"loss": 0.9575808644294739,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23733314871788025,
"step": 487,
"valid_targets_mean": 14603.0,
"valid_targets_min": 5807
},
{
"epoch": 2.076759061833689,
"grad_norm": 0.357959078881158,
"learning_rate": 2.9130090111281278e-05,
"loss": 1.0362825393676758,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26652559638023376,
"step": 488,
"valid_targets_mean": 14712.9,
"valid_targets_min": 5686
},
{
"epoch": 2.0810234541577826,
"grad_norm": 0.3625030051910193,
"learning_rate": 2.9077161664121722e-05,
"loss": 0.998286247253418,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2629318833351135,
"step": 489,
"valid_targets_mean": 14764.6,
"valid_targets_min": 8317
},
{
"epoch": 2.0852878464818763,
"grad_norm": 0.3581426489116462,
"learning_rate": 2.902415303078275e-05,
"loss": 1.0012212991714478,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2437218427658081,
"step": 490,
"valid_targets_mean": 14034.1,
"valid_targets_min": 2242
},
{
"epoch": 2.08955223880597,
"grad_norm": 0.33251620156796063,
"learning_rate": 2.8971064679534072e-05,
"loss": 0.9862103462219238,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26053065061569214,
"step": 491,
"valid_targets_mean": 14437.6,
"valid_targets_min": 3668
},
{
"epoch": 2.093816631130064,
"grad_norm": 0.3758321639405964,
"learning_rate": 2.8917897079349604e-05,
"loss": 1.006826400756836,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2373615801334381,
"step": 492,
"valid_targets_mean": 14412.9,
"valid_targets_min": 1726
},
{
"epoch": 2.098081023454158,
"grad_norm": 0.3514965810267716,
"learning_rate": 2.8864650699903336e-05,
"loss": 0.9882227778434753,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2510005235671997,
"step": 493,
"valid_targets_mean": 14247.2,
"valid_targets_min": 6013
},
{
"epoch": 2.1023454157782515,
"grad_norm": 0.3471368865177626,
"learning_rate": 2.881132601156518e-05,
"loss": 0.9697372913360596,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2327420562505722,
"step": 494,
"valid_targets_mean": 14427.5,
"valid_targets_min": 4288
},
{
"epoch": 2.106609808102345,
"grad_norm": 0.2885460017410152,
"learning_rate": 2.8757923485396805e-05,
"loss": 0.9951438903808594,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23933443427085876,
"step": 495,
"valid_targets_mean": 14137.9,
"valid_targets_min": 2994
},
{
"epoch": 2.1108742004264394,
"grad_norm": 0.3467994325621212,
"learning_rate": 2.8704443593147517e-05,
"loss": 0.9793698787689209,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2518441677093506,
"step": 496,
"valid_targets_mean": 14064.2,
"valid_targets_min": 1843
},
{
"epoch": 2.115138592750533,
"grad_norm": 0.3033452462810623,
"learning_rate": 2.8650886807250024e-05,
"loss": 1.0147504806518555,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24597987532615662,
"step": 497,
"valid_targets_mean": 14743.6,
"valid_targets_min": 752
},
{
"epoch": 2.1194029850746268,
"grad_norm": 0.3408070825535228,
"learning_rate": 2.8597253600816332e-05,
"loss": 0.952460527420044,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23443883657455444,
"step": 498,
"valid_targets_mean": 14247.1,
"valid_targets_min": 575
},
{
"epoch": 2.1236673773987205,
"grad_norm": 0.3054078298080589,
"learning_rate": 2.8543544447633517e-05,
"loss": 1.0123192071914673,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2612317204475403,
"step": 499,
"valid_targets_mean": 15464.2,
"valid_targets_min": 8312
},
{
"epoch": 2.1279317697228146,
"grad_norm": 0.3700609183978435,
"learning_rate": 2.8489759822159558e-05,
"loss": 1.0260965824127197,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2564004361629486,
"step": 500,
"valid_targets_mean": 14635.7,
"valid_targets_min": 3694
},
{
"epoch": 2.1321961620469083,
"grad_norm": 0.33328575187573406,
"learning_rate": 2.843590019951914e-05,
"loss": 0.9698889255523682,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24240043759346008,
"step": 501,
"valid_targets_mean": 15021.9,
"valid_targets_min": 5862
},
{
"epoch": 2.136460554371002,
"grad_norm": 0.384299590636117,
"learning_rate": 2.838196605549948e-05,
"loss": 1.0217312574386597,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2600533366203308,
"step": 502,
"valid_targets_mean": 14611.5,
"valid_targets_min": 3847
},
{
"epoch": 2.140724946695096,
"grad_norm": 0.3764425664813159,
"learning_rate": 2.8327957866546082e-05,
"loss": 1.0324029922485352,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24160277843475342,
"step": 503,
"valid_targets_mean": 14911.7,
"valid_targets_min": 4490
},
{
"epoch": 2.14498933901919,
"grad_norm": 0.3441607332393246,
"learning_rate": 2.8273876109758568e-05,
"loss": 0.9821799397468567,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24784724414348602,
"step": 504,
"valid_targets_mean": 14725.2,
"valid_targets_min": 3428
},
{
"epoch": 2.1492537313432836,
"grad_norm": 0.37941453583874735,
"learning_rate": 2.8219721262886427e-05,
"loss": 0.9939541816711426,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2545952796936035,
"step": 505,
"valid_targets_mean": 14990.6,
"valid_targets_min": 6710
},
{
"epoch": 2.1535181236673773,
"grad_norm": 0.3281635762444424,
"learning_rate": 2.816549380432483e-05,
"loss": 0.9698700308799744,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23794811964035034,
"step": 506,
"valid_targets_mean": 15332.5,
"valid_targets_min": 5308
},
{
"epoch": 2.1577825159914714,
"grad_norm": 0.38885336366973805,
"learning_rate": 2.8111194213110386e-05,
"loss": 0.9915518164634705,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2491394281387329,
"step": 507,
"valid_targets_mean": 14937.6,
"valid_targets_min": 6979
},
{
"epoch": 2.162046908315565,
"grad_norm": 0.331297503961614,
"learning_rate": 2.805682296891691e-05,
"loss": 1.0347942113876343,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24668872356414795,
"step": 508,
"valid_targets_mean": 14618.8,
"valid_targets_min": 2698
},
{
"epoch": 2.166311300639659,
"grad_norm": 0.3529187036876421,
"learning_rate": 2.8002380552051186e-05,
"loss": 1.0014612674713135,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24774719774723053,
"step": 509,
"valid_targets_mean": 14498.8,
"valid_targets_min": 4776
},
{
"epoch": 2.1705756929637525,
"grad_norm": 0.366965077773044,
"learning_rate": 2.7947867443448728e-05,
"loss": 1.0508100986480713,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.268143892288208,
"step": 510,
"valid_targets_mean": 15571.0,
"valid_targets_min": 7360
},
{
"epoch": 2.1748400852878467,
"grad_norm": 0.32906318378662697,
"learning_rate": 2.789328412466953e-05,
"loss": 0.9997203350067139,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23755742609500885,
"step": 511,
"valid_targets_mean": 14751.3,
"valid_targets_min": 3243
},
{
"epoch": 2.1791044776119404,
"grad_norm": 0.35177788903347434,
"learning_rate": 2.7838631077893813e-05,
"loss": 1.0263261795043945,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25490206480026245,
"step": 512,
"valid_targets_mean": 14273.8,
"valid_targets_min": 2266
},
{
"epoch": 2.183368869936034,
"grad_norm": 0.34028455426739546,
"learning_rate": 2.7783908785917753e-05,
"loss": 1.010587215423584,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2671951353549957,
"step": 513,
"valid_targets_mean": 14390.0,
"valid_targets_min": 4092
},
{
"epoch": 2.1876332622601278,
"grad_norm": 0.36693282979107555,
"learning_rate": 2.7729117732149244e-05,
"loss": 1.0085017681121826,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25137555599212646,
"step": 514,
"valid_targets_mean": 14906.8,
"valid_targets_min": 2502
},
{
"epoch": 2.191897654584222,
"grad_norm": 0.3795448830054028,
"learning_rate": 2.7674258400603587e-05,
"loss": 1.0182987451553345,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28089064359664917,
"step": 515,
"valid_targets_mean": 15310.5,
"valid_targets_min": 3271
},
{
"epoch": 2.1961620469083156,
"grad_norm": 0.38053785378880434,
"learning_rate": 2.761933127589927e-05,
"loss": 0.9880110621452332,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.254066526889801,
"step": 516,
"valid_targets_mean": 15095.4,
"valid_targets_min": 7989
},
{
"epoch": 2.2004264392324093,
"grad_norm": 0.34255644955233777,
"learning_rate": 2.7564336843253633e-05,
"loss": 1.0093759298324585,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23695147037506104,
"step": 517,
"valid_targets_mean": 14574.7,
"valid_targets_min": 4055
},
{
"epoch": 2.204690831556503,
"grad_norm": 0.3928719556564055,
"learning_rate": 2.7509275588478606e-05,
"loss": 0.9518511295318604,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22793179750442505,
"step": 518,
"valid_targets_mean": 14171.1,
"valid_targets_min": 2066
},
{
"epoch": 2.208955223880597,
"grad_norm": 0.33004662568032267,
"learning_rate": 2.7454147997976404e-05,
"loss": 1.013105869293213,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25704652070999146,
"step": 519,
"valid_targets_mean": 14889.4,
"valid_targets_min": 7756
},
{
"epoch": 2.213219616204691,
"grad_norm": 0.38022286100808406,
"learning_rate": 2.7398954558735272e-05,
"loss": 1.0273163318634033,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2711549997329712,
"step": 520,
"valid_targets_mean": 14467.1,
"valid_targets_min": 4204
},
{
"epoch": 2.2174840085287846,
"grad_norm": 0.33660418341875553,
"learning_rate": 2.7343695758325125e-05,
"loss": 1.0346674919128418,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25120994448661804,
"step": 521,
"valid_targets_mean": 13929.1,
"valid_targets_min": 2697
},
{
"epoch": 2.2217484008528783,
"grad_norm": 0.37649788693011715,
"learning_rate": 2.7288372084893282e-05,
"loss": 0.9666027426719666,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24410052597522736,
"step": 522,
"valid_targets_mean": 15248.3,
"valid_targets_min": 1517
},
{
"epoch": 2.2260127931769724,
"grad_norm": 0.34413713775157817,
"learning_rate": 2.7232984027160126e-05,
"loss": 0.9904748201370239,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21707294881343842,
"step": 523,
"valid_targets_mean": 14821.8,
"valid_targets_min": 2062
},
{
"epoch": 2.230277185501066,
"grad_norm": 0.34164135590434347,
"learning_rate": 2.7177532074414822e-05,
"loss": 1.0020073652267456,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25181591510772705,
"step": 524,
"valid_targets_mean": 13669.2,
"valid_targets_min": 1189
},
{
"epoch": 2.23454157782516,
"grad_norm": 0.36586032609517777,
"learning_rate": 2.712201671651094e-05,
"loss": 1.0094950199127197,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2598450779914856,
"step": 525,
"valid_targets_mean": 14686.9,
"valid_targets_min": 3001
},
{
"epoch": 2.2388059701492535,
"grad_norm": 0.37728559760742353,
"learning_rate": 2.7066438443862205e-05,
"loss": 0.9910581111907959,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23355071246623993,
"step": 526,
"valid_targets_mean": 14217.2,
"valid_targets_min": 3674
},
{
"epoch": 2.2430703624733477,
"grad_norm": 0.30553105358648747,
"learning_rate": 2.701079774743808e-05,
"loss": 0.9488228559494019,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2328556776046753,
"step": 527,
"valid_targets_mean": 14927.9,
"valid_targets_min": 6321
},
{
"epoch": 2.2473347547974414,
"grad_norm": 0.3599644735835354,
"learning_rate": 2.6955095118759496e-05,
"loss": 1.0226023197174072,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2568567991256714,
"step": 528,
"valid_targets_mean": 14094.9,
"valid_targets_min": 3640
},
{
"epoch": 2.251599147121535,
"grad_norm": 0.29077069956911633,
"learning_rate": 2.689933104989447e-05,
"loss": 1.003893256187439,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2466341108083725,
"step": 529,
"valid_targets_mean": 14749.3,
"valid_targets_min": 2099
},
{
"epoch": 2.2558635394456292,
"grad_norm": 0.3365561847842144,
"learning_rate": 2.6843506033453777e-05,
"loss": 0.9687828421592712,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22781942784786224,
"step": 530,
"valid_targets_mean": 15208.4,
"valid_targets_min": 6099
},
{
"epoch": 2.260127931769723,
"grad_norm": 0.29568995670779963,
"learning_rate": 2.6787620562586587e-05,
"loss": 0.9960900545120239,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2638796269893646,
"step": 531,
"valid_targets_mean": 15470.2,
"valid_targets_min": 8826
},
{
"epoch": 2.2643923240938166,
"grad_norm": 0.3472237072820283,
"learning_rate": 2.673167513097613e-05,
"loss": 0.9789157509803772,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2449982911348343,
"step": 532,
"valid_targets_mean": 14315.0,
"valid_targets_min": 2856
},
{
"epoch": 2.2686567164179103,
"grad_norm": 0.3108163488961833,
"learning_rate": 2.6675670232835297e-05,
"loss": 0.9460334777832031,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2233898937702179,
"step": 533,
"valid_targets_mean": 13737.3,
"valid_targets_min": 1778
},
{
"epoch": 2.272921108742004,
"grad_norm": 0.3297898255290531,
"learning_rate": 2.661960636290231e-05,
"loss": 0.9994051456451416,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2590315639972687,
"step": 534,
"valid_targets_mean": 14841.2,
"valid_targets_min": 3402
},
{
"epoch": 2.277185501066098,
"grad_norm": 0.3264775214516908,
"learning_rate": 2.6563484016436346e-05,
"loss": 1.02659273147583,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2553572654724121,
"step": 535,
"valid_targets_mean": 14679.2,
"valid_targets_min": 5570
},
{
"epoch": 2.281449893390192,
"grad_norm": 0.33784002917961925,
"learning_rate": 2.6507303689213143e-05,
"loss": 1.0258793830871582,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3054291605949402,
"step": 536,
"valid_targets_mean": 15405.3,
"valid_targets_min": 11021
},
{
"epoch": 2.2857142857142856,
"grad_norm": 0.32626498868482645,
"learning_rate": 2.6451065877520634e-05,
"loss": 0.9970362186431885,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.254366397857666,
"step": 537,
"valid_targets_mean": 15113.9,
"valid_targets_min": 7736
},
{
"epoch": 2.2899786780383797,
"grad_norm": 0.3414737788321613,
"learning_rate": 2.639477107815455e-05,
"loss": 0.9489619731903076,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25024449825286865,
"step": 538,
"valid_targets_mean": 14803.9,
"valid_targets_min": 6681
},
{
"epoch": 2.2942430703624734,
"grad_norm": 0.3636952386802788,
"learning_rate": 2.633841978841406e-05,
"loss": 0.9728654623031616,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2321648895740509,
"step": 539,
"valid_targets_mean": 14362.3,
"valid_targets_min": 7984
},
{
"epoch": 2.298507462686567,
"grad_norm": 0.3057411522579838,
"learning_rate": 2.6282012506097347e-05,
"loss": 1.0068259239196777,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2509293556213379,
"step": 540,
"valid_targets_mean": 14533.2,
"valid_targets_min": 1498
},
{
"epoch": 2.302771855010661,
"grad_norm": 0.351676162724982,
"learning_rate": 2.622554972949724e-05,
"loss": 1.0444014072418213,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24729007482528687,
"step": 541,
"valid_targets_mean": 13808.7,
"valid_targets_min": 1923
},
{
"epoch": 2.307036247334755,
"grad_norm": 0.32443165073172486,
"learning_rate": 2.6169031957396778e-05,
"loss": 1.0184507369995117,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2640179991722107,
"step": 542,
"valid_targets_mean": 14197.4,
"valid_targets_min": 1999
},
{
"epoch": 2.3113006396588487,
"grad_norm": 0.3414797944901497,
"learning_rate": 2.611245968906482e-05,
"loss": 0.9821099638938904,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.259810209274292,
"step": 543,
"valid_targets_mean": 14675.0,
"valid_targets_min": 3536
},
{
"epoch": 2.3155650319829424,
"grad_norm": 0.4023877924977479,
"learning_rate": 2.605583342425165e-05,
"loss": 0.9992808103561401,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23244965076446533,
"step": 544,
"valid_targets_mean": 13720.2,
"valid_targets_min": 1863
},
{
"epoch": 2.319829424307036,
"grad_norm": 0.36349616641953175,
"learning_rate": 2.5999153663184546e-05,
"loss": 1.0641918182373047,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29147133231163025,
"step": 545,
"valid_targets_mean": 14533.0,
"valid_targets_min": 3852
},
{
"epoch": 2.3240938166311302,
"grad_norm": 0.3775915564466432,
"learning_rate": 2.594242090656335e-05,
"loss": 0.9966145753860474,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27358198165893555,
"step": 546,
"valid_targets_mean": 15236.0,
"valid_targets_min": 1593
},
{
"epoch": 2.328358208955224,
"grad_norm": 0.4314874779898479,
"learning_rate": 2.5885635655556075e-05,
"loss": 0.9890132546424866,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2735801339149475,
"step": 547,
"valid_targets_mean": 15963.0,
"valid_targets_min": 11813
},
{
"epoch": 2.3326226012793176,
"grad_norm": 0.3854914731013695,
"learning_rate": 2.5828798411794443e-05,
"loss": 1.017390489578247,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2258615344762802,
"step": 548,
"valid_targets_mean": 13778.4,
"valid_targets_min": 2715
},
{
"epoch": 2.3368869936034113,
"grad_norm": 0.3659662850075527,
"learning_rate": 2.5771909677369484e-05,
"loss": 0.9837027788162231,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2616751790046692,
"step": 549,
"valid_targets_mean": 15342.6,
"valid_targets_min": 9001
},
{
"epoch": 2.3411513859275055,
"grad_norm": 0.4445302071685335,
"learning_rate": 2.571496995482709e-05,
"loss": 1.036919116973877,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25041985511779785,
"step": 550,
"valid_targets_mean": 14808.0,
"valid_targets_min": 2730
},
{
"epoch": 2.345415778251599,
"grad_norm": 0.3727552816752512,
"learning_rate": 2.565797974716357e-05,
"loss": 1.0211420059204102,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23311321437358856,
"step": 551,
"valid_targets_mean": 14403.5,
"valid_targets_min": 5487
},
{
"epoch": 2.349680170575693,
"grad_norm": 0.42161360368650125,
"learning_rate": 2.5600939557821205e-05,
"loss": 0.9743055105209351,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24897313117980957,
"step": 552,
"valid_targets_mean": 14613.9,
"valid_targets_min": 2797
},
{
"epoch": 2.3539445628997866,
"grad_norm": 0.3041903132801738,
"learning_rate": 2.5543849890683813e-05,
"loss": 0.9749882221221924,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25994178652763367,
"step": 553,
"valid_targets_mean": 15261.2,
"valid_targets_min": 7622
},
{
"epoch": 2.3582089552238807,
"grad_norm": 0.3643368797701379,
"learning_rate": 2.548671125007229e-05,
"loss": 0.9856359958648682,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2501842975616455,
"step": 554,
"valid_targets_mean": 13247.3,
"valid_targets_min": 2399
},
{
"epoch": 2.3624733475479744,
"grad_norm": 0.3502290439799157,
"learning_rate": 2.5429524140740155e-05,
"loss": 1.0194714069366455,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2653921842575073,
"step": 555,
"valid_targets_mean": 14490.5,
"valid_targets_min": 3899
},
{
"epoch": 2.366737739872068,
"grad_norm": 0.3018990261751899,
"learning_rate": 2.537228906786908e-05,
"loss": 1.0033659934997559,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24577811360359192,
"step": 556,
"valid_targets_mean": 14929.9,
"valid_targets_min": 7722
},
{
"epoch": 2.3710021321961623,
"grad_norm": 0.3580593178576187,
"learning_rate": 2.5315006537064473e-05,
"loss": 0.996933102607727,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2340065836906433,
"step": 557,
"valid_targets_mean": 14046.7,
"valid_targets_min": 4710
},
{
"epoch": 2.375266524520256,
"grad_norm": 0.3256183790600885,
"learning_rate": 2.5257677054350927e-05,
"loss": 0.978560209274292,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2356126606464386,
"step": 558,
"valid_targets_mean": 14559.5,
"valid_targets_min": 2243
},
{
"epoch": 2.3795309168443497,
"grad_norm": 0.3407321064506055,
"learning_rate": 2.5200301126167857e-05,
"loss": 0.9780471920967102,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23114609718322754,
"step": 559,
"valid_targets_mean": 14566.1,
"valid_targets_min": 7120
},
{
"epoch": 2.3837953091684434,
"grad_norm": 0.3583571276732938,
"learning_rate": 2.514287925936492e-05,
"loss": 1.0212950706481934,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2661419212818146,
"step": 560,
"valid_targets_mean": 15004.2,
"valid_targets_min": 4432
},
{
"epoch": 2.388059701492537,
"grad_norm": 0.33664147331934247,
"learning_rate": 2.5085411961197626e-05,
"loss": 1.0373973846435547,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24310649931430817,
"step": 561,
"valid_targets_mean": 14641.2,
"valid_targets_min": 5183
},
{
"epoch": 2.3923240938166312,
"grad_norm": 0.35707958581650207,
"learning_rate": 2.502789973932278e-05,
"loss": 1.0003979206085205,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25113213062286377,
"step": 562,
"valid_targets_mean": 14444.3,
"valid_targets_min": 1649
},
{
"epoch": 2.396588486140725,
"grad_norm": 0.3209570544747951,
"learning_rate": 2.4970343101794073e-05,
"loss": 1.0033340454101562,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23442038893699646,
"step": 563,
"valid_targets_mean": 13773.7,
"valid_targets_min": 2585
},
{
"epoch": 2.4008528784648187,
"grad_norm": 0.34273894903375246,
"learning_rate": 2.4912742557057538e-05,
"loss": 0.9911829233169556,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2385222613811493,
"step": 564,
"valid_targets_mean": 14328.3,
"valid_targets_min": 1192
},
{
"epoch": 2.405117270788913,
"grad_norm": 0.30874004275234807,
"learning_rate": 2.485509861394708e-05,
"loss": 1.0349599123001099,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2621227502822876,
"step": 565,
"valid_targets_mean": 14846.8,
"valid_targets_min": 2791
},
{
"epoch": 2.4093816631130065,
"grad_norm": 0.28513054852536457,
"learning_rate": 2.4797411781679975e-05,
"loss": 0.9519776701927185,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2727797031402588,
"step": 566,
"valid_targets_mean": 14597.2,
"valid_targets_min": 3095
},
{
"epoch": 2.4136460554371,
"grad_norm": 0.3681769122591231,
"learning_rate": 2.473968256985238e-05,
"loss": 1.0031790733337402,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2726595401763916,
"step": 567,
"valid_targets_mean": 14863.2,
"valid_targets_min": 5634
},
{
"epoch": 2.417910447761194,
"grad_norm": 0.2820332686420988,
"learning_rate": 2.4681911488434825e-05,
"loss": 1.0346243381500244,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23928891122341156,
"step": 568,
"valid_targets_mean": 14235.1,
"valid_targets_min": 3758
},
{
"epoch": 2.4221748400852876,
"grad_norm": 0.345959881180222,
"learning_rate": 2.4624099047767702e-05,
"loss": 1.0115197896957397,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2657356262207031,
"step": 569,
"valid_targets_mean": 15144.8,
"valid_targets_min": 6786
},
{
"epoch": 2.4264392324093818,
"grad_norm": 0.3438789637735161,
"learning_rate": 2.4566245758556787e-05,
"loss": 0.968398928642273,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23302385210990906,
"step": 570,
"valid_targets_mean": 14613.6,
"valid_targets_min": 2792
},
{
"epoch": 2.4307036247334755,
"grad_norm": 0.33811336765989447,
"learning_rate": 2.4508352131868664e-05,
"loss": 1.0073961019515991,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26625654101371765,
"step": 571,
"valid_targets_mean": 14827.8,
"valid_targets_min": 1388
},
{
"epoch": 2.434968017057569,
"grad_norm": 0.35683634326397096,
"learning_rate": 2.445041867912629e-05,
"loss": 0.9651147723197937,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25018009543418884,
"step": 572,
"valid_targets_mean": 14830.1,
"valid_targets_min": 4156
},
{
"epoch": 2.4392324093816633,
"grad_norm": 0.3524895752220168,
"learning_rate": 2.439244591210443e-05,
"loss": 0.9606080651283264,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25262510776519775,
"step": 573,
"valid_targets_mean": 15141.2,
"valid_targets_min": 7503
},
{
"epoch": 2.443496801705757,
"grad_norm": 0.35416817181758903,
"learning_rate": 2.4334434342925133e-05,
"loss": 0.9872428178787231,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24746586382389069,
"step": 574,
"valid_targets_mean": 14546.2,
"valid_targets_min": 3910
},
{
"epoch": 2.4477611940298507,
"grad_norm": 0.3714939792072648,
"learning_rate": 2.4276384484053227e-05,
"loss": 0.9739286303520203,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2626081705093384,
"step": 575,
"valid_targets_mean": 15743.1,
"valid_targets_min": 5391
},
{
"epoch": 2.4520255863539444,
"grad_norm": 0.3267786224236743,
"learning_rate": 2.4218296848291795e-05,
"loss": 1.035171389579773,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23967371881008148,
"step": 576,
"valid_targets_mean": 14141.6,
"valid_targets_min": 6849
},
{
"epoch": 2.4562899786780386,
"grad_norm": 0.37728983603227556,
"learning_rate": 2.4160171948777603e-05,
"loss": 0.9901844263076782,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.260466992855072,
"step": 577,
"valid_targets_mean": 15361.8,
"valid_targets_min": 5801
},
{
"epoch": 2.4605543710021323,
"grad_norm": 0.34470084170040144,
"learning_rate": 2.410201029897665e-05,
"loss": 1.0634323358535767,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2721429765224457,
"step": 578,
"valid_targets_mean": 15740.7,
"valid_targets_min": 11262
},
{
"epoch": 2.464818763326226,
"grad_norm": 0.29350111028258313,
"learning_rate": 2.4043812412679532e-05,
"loss": 0.9816789031028748,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24625752866268158,
"step": 579,
"valid_targets_mean": 14815.9,
"valid_targets_min": 6065
},
{
"epoch": 2.4690831556503197,
"grad_norm": 0.38332974196139513,
"learning_rate": 2.3985578803996985e-05,
"loss": 1.0299735069274902,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2443036288022995,
"step": 580,
"valid_targets_mean": 13771.2,
"valid_targets_min": 1532
},
{
"epoch": 2.473347547974414,
"grad_norm": 0.27749887123963046,
"learning_rate": 2.392730998735529e-05,
"loss": 1.0363097190856934,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24988089501857758,
"step": 581,
"valid_targets_mean": 15384.0,
"valid_targets_min": 3886
},
{
"epoch": 2.4776119402985075,
"grad_norm": 0.3714639432663393,
"learning_rate": 2.3869006477491755e-05,
"loss": 1.0010910034179688,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24456912279129028,
"step": 582,
"valid_targets_mean": 15580.9,
"valid_targets_min": 4483
},
{
"epoch": 2.481876332622601,
"grad_norm": 0.2826495723285361,
"learning_rate": 2.381066878945017e-05,
"loss": 0.9860137701034546,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2536880075931549,
"step": 583,
"valid_targets_mean": 14908.8,
"valid_targets_min": 2910
},
{
"epoch": 2.486140724946695,
"grad_norm": 0.40701249580578086,
"learning_rate": 2.3752297438576257e-05,
"loss": 0.9822453260421753,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24656826257705688,
"step": 584,
"valid_targets_mean": 15053.9,
"valid_targets_min": 5225
},
{
"epoch": 2.490405117270789,
"grad_norm": 0.3510159174487897,
"learning_rate": 2.3693892940513074e-05,
"loss": 1.0518721342086792,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2570648193359375,
"step": 585,
"valid_targets_mean": 14092.7,
"valid_targets_min": 4352
},
{
"epoch": 2.4946695095948828,
"grad_norm": 0.3880025590367412,
"learning_rate": 2.3635455811196536e-05,
"loss": 1.0154331922531128,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2572515606880188,
"step": 586,
"valid_targets_mean": 14364.0,
"valid_targets_min": 2248
},
{
"epoch": 2.4989339019189765,
"grad_norm": 0.35629403874874577,
"learning_rate": 2.3576986566850796e-05,
"loss": 1.0354433059692383,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22509129345417023,
"step": 587,
"valid_targets_mean": 13014.9,
"valid_targets_min": 3563
},
{
"epoch": 2.50319829424307,
"grad_norm": 0.40934673917041575,
"learning_rate": 2.351848572398371e-05,
"loss": 0.9651215672492981,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23150211572647095,
"step": 588,
"valid_targets_mean": 13630.2,
"valid_targets_min": 1625
},
{
"epoch": 2.5074626865671643,
"grad_norm": 0.29781159312023847,
"learning_rate": 2.3459953799382276e-05,
"loss": 0.9885987639427185,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25822120904922485,
"step": 589,
"valid_targets_mean": 14645.2,
"valid_targets_min": 1150
},
{
"epoch": 2.511727078891258,
"grad_norm": 0.3673013969280717,
"learning_rate": 2.3401391310108054e-05,
"loss": 1.0021411180496216,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27557167410850525,
"step": 590,
"valid_targets_mean": 15001.2,
"valid_targets_min": 2024
},
{
"epoch": 2.5159914712153517,
"grad_norm": 0.29338489273964863,
"learning_rate": 2.3342798773492602e-05,
"loss": 0.9634994268417358,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2440796196460724,
"step": 591,
"valid_targets_mean": 13855.1,
"valid_targets_min": 5690
},
{
"epoch": 2.520255863539446,
"grad_norm": 0.3488525245756821,
"learning_rate": 2.328417670713294e-05,
"loss": 1.0279403924942017,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2383851408958435,
"step": 592,
"valid_targets_mean": 13533.6,
"valid_targets_min": 1542
},
{
"epoch": 2.5245202558635396,
"grad_norm": 0.3109864425433428,
"learning_rate": 2.3225525628886918e-05,
"loss": 1.0075408220291138,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.236200213432312,
"step": 593,
"valid_targets_mean": 14168.2,
"valid_targets_min": 1389
},
{
"epoch": 2.5287846481876333,
"grad_norm": 0.32460960511845355,
"learning_rate": 2.3166846056868687e-05,
"loss": 1.0584495067596436,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.251211017370224,
"step": 594,
"valid_targets_mean": 14296.9,
"valid_targets_min": 2014
},
{
"epoch": 2.533049040511727,
"grad_norm": 0.3605753288926691,
"learning_rate": 2.31081385094441e-05,
"loss": 1.050918698310852,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27081745862960815,
"step": 595,
"valid_targets_mean": 14749.0,
"valid_targets_min": 1741
},
{
"epoch": 2.5373134328358207,
"grad_norm": 0.30930846078014956,
"learning_rate": 2.304940350522615e-05,
"loss": 0.979073703289032,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24194717407226562,
"step": 596,
"valid_targets_mean": 15197.2,
"valid_targets_min": 5355
},
{
"epoch": 2.541577825159915,
"grad_norm": 0.3451913732751782,
"learning_rate": 2.299064156307037e-05,
"loss": 0.9640562534332275,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2509298622608185,
"step": 597,
"valid_targets_mean": 15269.8,
"valid_targets_min": 6535
},
{
"epoch": 2.5458422174840085,
"grad_norm": 0.32132576352199543,
"learning_rate": 2.2931853202070275e-05,
"loss": 1.0127673149108887,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24850039184093475,
"step": 598,
"valid_targets_mean": 14430.6,
"valid_targets_min": 5331
},
{
"epoch": 2.550106609808102,
"grad_norm": 0.3447521945135351,
"learning_rate": 2.2873038941552724e-05,
"loss": 0.9941070079803467,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24251219630241394,
"step": 599,
"valid_targets_mean": 14965.9,
"valid_targets_min": 5019
},
{
"epoch": 2.5543710021321964,
"grad_norm": 0.3603682710041226,
"learning_rate": 2.2814199301073412e-05,
"loss": 0.9789334535598755,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23000304400920868,
"step": 600,
"valid_targets_mean": 14331.5,
"valid_targets_min": 2859
},
{
"epoch": 2.55863539445629,
"grad_norm": 0.4524316632207512,
"learning_rate": 2.27553348004122e-05,
"loss": 1.0233545303344727,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2751467823982239,
"step": 601,
"valid_targets_mean": 14395.6,
"valid_targets_min": 2323
},
{
"epoch": 2.5628997867803838,
"grad_norm": 0.3405044476615485,
"learning_rate": 2.2696445959568577e-05,
"loss": 0.987399697303772,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2388504594564438,
"step": 602,
"valid_targets_mean": 14543.6,
"valid_targets_min": 4364
},
{
"epoch": 2.5671641791044775,
"grad_norm": 0.3664481016997422,
"learning_rate": 2.2637533298757064e-05,
"loss": 1.0295848846435547,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2575531601905823,
"step": 603,
"valid_targets_mean": 14556.6,
"valid_targets_min": 5755
},
{
"epoch": 2.571428571428571,
"grad_norm": 0.2958680019677503,
"learning_rate": 2.2578597338402567e-05,
"loss": 1.0304653644561768,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2653025686740875,
"step": 604,
"valid_targets_mean": 14233.3,
"valid_targets_min": 1448
},
{
"epoch": 2.5756929637526653,
"grad_norm": 0.3380173235948929,
"learning_rate": 2.2519638599135844e-05,
"loss": 0.9955521821975708,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2450968623161316,
"step": 605,
"valid_targets_mean": 14312.9,
"valid_targets_min": 1957
},
{
"epoch": 2.579957356076759,
"grad_norm": 0.2779481999776168,
"learning_rate": 2.2460657601788875e-05,
"loss": 1.0013747215270996,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25416964292526245,
"step": 606,
"valid_targets_mean": 14795.7,
"valid_targets_min": 4826
},
{
"epoch": 2.5842217484008527,
"grad_norm": 0.3435486497386558,
"learning_rate": 2.2401654867390256e-05,
"loss": 1.004853367805481,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2516235411167145,
"step": 607,
"valid_targets_mean": 14743.6,
"valid_targets_min": 3783
},
{
"epoch": 2.588486140724947,
"grad_norm": 0.2929995479537222,
"learning_rate": 2.2342630917160605e-05,
"loss": 0.9981028437614441,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2684582471847534,
"step": 608,
"valid_targets_mean": 15170.2,
"valid_targets_min": 10692
},
{
"epoch": 2.5927505330490406,
"grad_norm": 0.3834421277554267,
"learning_rate": 2.2283586272507975e-05,
"loss": 1.0357897281646729,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2626771926879883,
"step": 609,
"valid_targets_mean": 14811.3,
"valid_targets_min": 3785
},
{
"epoch": 2.5970149253731343,
"grad_norm": 0.29333535838418107,
"learning_rate": 2.2224521455023193e-05,
"loss": 1.023173213005066,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27299946546554565,
"step": 610,
"valid_targets_mean": 14908.2,
"valid_targets_min": 5562
},
{
"epoch": 2.6012793176972284,
"grad_norm": 0.36625779396404645,
"learning_rate": 2.216543698647534e-05,
"loss": 1.0014777183532715,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23676812648773193,
"step": 611,
"valid_targets_mean": 14785.9,
"valid_targets_min": 7223
},
{
"epoch": 2.605543710021322,
"grad_norm": 0.28588149465647306,
"learning_rate": 2.210633338880704e-05,
"loss": 0.9888811707496643,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25012534856796265,
"step": 612,
"valid_targets_mean": 14344.6,
"valid_targets_min": 2032
},
{
"epoch": 2.609808102345416,
"grad_norm": 0.32582274237071657,
"learning_rate": 2.204721118412994e-05,
"loss": 0.9636735916137695,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23523156344890594,
"step": 613,
"valid_targets_mean": 15641.1,
"valid_targets_min": 6636
},
{
"epoch": 2.6140724946695095,
"grad_norm": 0.2704351653560179,
"learning_rate": 2.1988070894720037e-05,
"loss": 0.9971131086349487,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26915496587753296,
"step": 614,
"valid_targets_mean": 14540.5,
"valid_targets_min": 3303
},
{
"epoch": 2.6183368869936032,
"grad_norm": 0.32530372087862747,
"learning_rate": 2.192891304301309e-05,
"loss": 1.0155519247055054,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25380975008010864,
"step": 615,
"valid_targets_mean": 15039.4,
"valid_targets_min": 6067
},
{
"epoch": 2.6226012793176974,
"grad_norm": 0.2812902887326392,
"learning_rate": 2.18697381516e-05,
"loss": 0.9872410893440247,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2469387799501419,
"step": 616,
"valid_targets_mean": 15058.1,
"valid_targets_min": 2443
},
{
"epoch": 2.626865671641791,
"grad_norm": 0.30977938730271676,
"learning_rate": 2.181054674322221e-05,
"loss": 1.0182151794433594,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25168952345848083,
"step": 617,
"valid_targets_mean": 14824.7,
"valid_targets_min": 3184
},
{
"epoch": 2.631130063965885,
"grad_norm": 0.32384586042183855,
"learning_rate": 2.1751339340767043e-05,
"loss": 0.9708200097084045,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25685781240463257,
"step": 618,
"valid_targets_mean": 14443.2,
"valid_targets_min": 4836
},
{
"epoch": 2.635394456289979,
"grad_norm": 0.28582353916291997,
"learning_rate": 2.169211646726313e-05,
"loss": 1.0166131258010864,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24662283062934875,
"step": 619,
"valid_targets_mean": 14274.7,
"valid_targets_min": 1403
},
{
"epoch": 2.6396588486140726,
"grad_norm": 0.34566164860426407,
"learning_rate": 2.163287864587576e-05,
"loss": 1.012539029121399,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2719329297542572,
"step": 620,
"valid_targets_mean": 15074.4,
"valid_targets_min": 2880
},
{
"epoch": 2.6439232409381663,
"grad_norm": 0.28191692959061687,
"learning_rate": 2.157362639990229e-05,
"loss": 0.9720137119293213,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2529488205909729,
"step": 621,
"valid_targets_mean": 14954.9,
"valid_targets_min": 4040
},
{
"epoch": 2.64818763326226,
"grad_norm": 0.32181494490315404,
"learning_rate": 2.151436025276747e-05,
"loss": 1.0196996927261353,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2558142840862274,
"step": 622,
"valid_targets_mean": 14778.2,
"valid_targets_min": 6227
},
{
"epoch": 2.6524520255863537,
"grad_norm": 0.3193343120806747,
"learning_rate": 2.145508072801888e-05,
"loss": 1.0273431539535522,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2799755334854126,
"step": 623,
"valid_targets_mean": 15236.8,
"valid_targets_min": 8987
},
{
"epoch": 2.656716417910448,
"grad_norm": 0.2959598694056831,
"learning_rate": 2.1395788349322256e-05,
"loss": 1.0347175598144531,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2716575860977173,
"step": 624,
"valid_targets_mean": 14910.9,
"valid_targets_min": 1083
},
{
"epoch": 2.6609808102345416,
"grad_norm": 0.32615860148425246,
"learning_rate": 2.133648364045689e-05,
"loss": 0.9719746112823486,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21385571360588074,
"step": 625,
"valid_targets_mean": 13264.8,
"valid_targets_min": 1699
},
{
"epoch": 2.6652452025586353,
"grad_norm": 0.3092815072790684,
"learning_rate": 2.1277167125310996e-05,
"loss": 0.9729279279708862,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24361443519592285,
"step": 626,
"valid_targets_mean": 14669.8,
"valid_targets_min": 2210
},
{
"epoch": 2.6695095948827294,
"grad_norm": 0.3171929824255565,
"learning_rate": 2.1217839327877098e-05,
"loss": 0.9938591718673706,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2757403254508972,
"step": 627,
"valid_targets_mean": 14886.6,
"valid_targets_min": 3979
},
{
"epoch": 2.673773987206823,
"grad_norm": 0.32162050849555684,
"learning_rate": 2.1158500772247352e-05,
"loss": 1.0283288955688477,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24078741669654846,
"step": 628,
"valid_targets_mean": 15458.4,
"valid_targets_min": 9633
},
{
"epoch": 2.678038379530917,
"grad_norm": 0.2955236736778956,
"learning_rate": 2.1099151982608985e-05,
"loss": 0.996048092842102,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23683753609657288,
"step": 629,
"valid_targets_mean": 13504.6,
"valid_targets_min": 2559
},
{
"epoch": 2.6823027718550105,
"grad_norm": 0.2607480919393273,
"learning_rate": 2.1039793483239607e-05,
"loss": 1.012598991394043,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2531105577945709,
"step": 630,
"valid_targets_mean": 15036.3,
"valid_targets_min": 4520
},
{
"epoch": 2.6865671641791042,
"grad_norm": 0.3023500155803285,
"learning_rate": 2.0980425798502616e-05,
"loss": 0.9994162321090698,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25880444049835205,
"step": 631,
"valid_targets_mean": 14533.3,
"valid_targets_min": 4140
},
{
"epoch": 2.6908315565031984,
"grad_norm": 0.25438203117222763,
"learning_rate": 2.092104945284255e-05,
"loss": 0.9778115153312683,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25283128023147583,
"step": 632,
"valid_targets_mean": 14683.9,
"valid_targets_min": 3664
},
{
"epoch": 2.695095948827292,
"grad_norm": 0.2830217534754341,
"learning_rate": 2.0861664970780434e-05,
"loss": 1.0162835121154785,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2786669135093689,
"step": 633,
"valid_targets_mean": 14680.2,
"valid_targets_min": 2295
},
{
"epoch": 2.699360341151386,
"grad_norm": 0.2764232598639447,
"learning_rate": 2.08022728769092e-05,
"loss": 1.0076497793197632,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2617707848548889,
"step": 634,
"valid_targets_mean": 14868.3,
"valid_targets_min": 8834
},
{
"epoch": 2.70362473347548,
"grad_norm": 0.27947688140337634,
"learning_rate": 2.0742873695889005e-05,
"loss": 0.9848247766494751,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21173720061779022,
"step": 635,
"valid_targets_mean": 13743.9,
"valid_targets_min": 2490
},
{
"epoch": 2.7078891257995736,
"grad_norm": 0.2950208095330011,
"learning_rate": 2.0683467952442626e-05,
"loss": 1.0044206380844116,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25531816482543945,
"step": 636,
"valid_targets_mean": 14205.1,
"valid_targets_min": 1530
},
{
"epoch": 2.7121535181236673,
"grad_norm": 0.31921787069887186,
"learning_rate": 2.0624056171350785e-05,
"loss": 1.0012754201889038,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2538597881793976,
"step": 637,
"valid_targets_mean": 14988.6,
"valid_targets_min": 1570
},
{
"epoch": 2.716417910447761,
"grad_norm": 0.29814243793990697,
"learning_rate": 2.0564638877447566e-05,
"loss": 1.0156222581863403,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2510865330696106,
"step": 638,
"valid_targets_mean": 14630.3,
"valid_targets_min": 1235
},
{
"epoch": 2.7206823027718547,
"grad_norm": 0.36746929301245546,
"learning_rate": 2.0505216595615742e-05,
"loss": 1.0245939493179321,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2569143772125244,
"step": 639,
"valid_targets_mean": 15198.6,
"valid_targets_min": 7587
},
{
"epoch": 2.724946695095949,
"grad_norm": 0.29201653474293837,
"learning_rate": 2.044578985078215e-05,
"loss": 1.0242359638214111,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24316444993019104,
"step": 640,
"valid_targets_mean": 13845.4,
"valid_targets_min": 2933
},
{
"epoch": 2.7292110874200426,
"grad_norm": 0.3447320951372601,
"learning_rate": 2.0386359167913046e-05,
"loss": 0.9643347263336182,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24850095808506012,
"step": 641,
"valid_targets_mean": 15607.5,
"valid_targets_min": 12085
},
{
"epoch": 2.7334754797441363,
"grad_norm": 0.29140710920784874,
"learning_rate": 2.0326925072009485e-05,
"loss": 0.9665999412536621,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23093298077583313,
"step": 642,
"valid_targets_mean": 14645.0,
"valid_targets_min": 3174
},
{
"epoch": 2.7377398720682304,
"grad_norm": 0.30881055312250094,
"learning_rate": 2.0267488088102657e-05,
"loss": 1.0037893056869507,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23332089185714722,
"step": 643,
"valid_targets_mean": 15101.0,
"valid_targets_min": 6402
},
{
"epoch": 2.742004264392324,
"grad_norm": 0.28449112886998956,
"learning_rate": 2.0208048741249288e-05,
"loss": 0.9691751003265381,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25266605615615845,
"step": 644,
"valid_targets_mean": 15077.2,
"valid_targets_min": 9108
},
{
"epoch": 2.746268656716418,
"grad_norm": 0.3351438852568685,
"learning_rate": 2.014860755652695e-05,
"loss": 1.047579288482666,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2878621518611908,
"step": 645,
"valid_targets_mean": 15099.8,
"valid_targets_min": 6838
},
{
"epoch": 2.750533049040512,
"grad_norm": 0.2925694919153366,
"learning_rate": 2.0089165059029477e-05,
"loss": 1.0120222568511963,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2626033425331116,
"step": 646,
"valid_targets_mean": 15174.9,
"valid_targets_min": 4873
},
{
"epoch": 2.7547974413646057,
"grad_norm": 0.30476874327327724,
"learning_rate": 2.0029721773862277e-05,
"loss": 0.9922143220901489,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24901431798934937,
"step": 647,
"valid_targets_mean": 13627.9,
"valid_targets_min": 1811
},
{
"epoch": 2.7590618336886994,
"grad_norm": 0.2861044184251968,
"learning_rate": 1.997027822613773e-05,
"loss": 0.9913243055343628,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22186391055583954,
"step": 648,
"valid_targets_mean": 13473.2,
"valid_targets_min": 5255
},
{
"epoch": 2.763326226012793,
"grad_norm": 0.3215467503039592,
"learning_rate": 1.9910834940970533e-05,
"loss": 1.0141838788986206,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2568804621696472,
"step": 649,
"valid_targets_mean": 13940.1,
"valid_targets_min": 1938
},
{
"epoch": 2.767590618336887,
"grad_norm": 0.28526218260612457,
"learning_rate": 1.985139244347305e-05,
"loss": 1.024746298789978,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2506389915943146,
"step": 650,
"valid_targets_mean": 14478.3,
"valid_targets_min": 1684
},
{
"epoch": 2.771855010660981,
"grad_norm": 0.3255391066375804,
"learning_rate": 1.979195125875072e-05,
"loss": 1.0240997076034546,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.275799959897995,
"step": 651,
"valid_targets_mean": 14935.5,
"valid_targets_min": 6628
},
{
"epoch": 2.7761194029850746,
"grad_norm": 0.2913341418155478,
"learning_rate": 1.9732511911897353e-05,
"loss": 0.9887863397598267,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2484378069639206,
"step": 652,
"valid_targets_mean": 14193.0,
"valid_targets_min": 2212
},
{
"epoch": 2.7803837953091683,
"grad_norm": 0.26002973612839325,
"learning_rate": 1.9673074927990525e-05,
"loss": 0.9766459465026855,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21592479944229126,
"step": 653,
"valid_targets_mean": 13946.5,
"valid_targets_min": 2227
},
{
"epoch": 2.7846481876332625,
"grad_norm": 0.3011939249051517,
"learning_rate": 1.9613640832086957e-05,
"loss": 1.017755150794983,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2458583414554596,
"step": 654,
"valid_targets_mean": 14395.0,
"valid_targets_min": 2255
},
{
"epoch": 2.788912579957356,
"grad_norm": 0.27094161767688085,
"learning_rate": 1.9554210149217855e-05,
"loss": 0.9604583978652954,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23105964064598083,
"step": 655,
"valid_targets_mean": 14670.8,
"valid_targets_min": 1239
},
{
"epoch": 2.79317697228145,
"grad_norm": 0.28745214224753945,
"learning_rate": 1.9494783404384265e-05,
"loss": 0.9861606955528259,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2513197660446167,
"step": 656,
"valid_targets_mean": 13690.4,
"valid_targets_min": 2574
},
{
"epoch": 2.7974413646055436,
"grad_norm": 0.2694350691725643,
"learning_rate": 1.9435361122552437e-05,
"loss": 1.0003072023391724,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26699090003967285,
"step": 657,
"valid_targets_mean": 15698.2,
"valid_targets_min": 11218
},
{
"epoch": 2.8017057569296373,
"grad_norm": 0.30467579312383347,
"learning_rate": 1.9375943828649215e-05,
"loss": 1.0127203464508057,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24697598814964294,
"step": 658,
"valid_targets_mean": 13997.6,
"valid_targets_min": 4737
},
{
"epoch": 2.8059701492537314,
"grad_norm": 0.2986641340060596,
"learning_rate": 1.9316532047557378e-05,
"loss": 0.9938777089118958,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2532762885093689,
"step": 659,
"valid_targets_mean": 13795.0,
"valid_targets_min": 3402
},
{
"epoch": 2.810234541577825,
"grad_norm": 0.27330007093437675,
"learning_rate": 1.9257126304110998e-05,
"loss": 0.98386549949646,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24930505454540253,
"step": 660,
"valid_targets_mean": 14952.9,
"valid_targets_min": 6734
},
{
"epoch": 2.814498933901919,
"grad_norm": 0.3238728707341304,
"learning_rate": 1.919772712309081e-05,
"loss": 1.0038477182388306,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22611001133918762,
"step": 661,
"valid_targets_mean": 14572.7,
"valid_targets_min": 1356
},
{
"epoch": 2.818763326226013,
"grad_norm": 0.274903170095313,
"learning_rate": 1.9138335029219572e-05,
"loss": 0.9993883371353149,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.253467321395874,
"step": 662,
"valid_targets_mean": 15381.4,
"valid_targets_min": 4878
},
{
"epoch": 2.8230277185501067,
"grad_norm": 0.30407654470800843,
"learning_rate": 1.9078950547157458e-05,
"loss": 1.0459842681884766,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2509838938713074,
"step": 663,
"valid_targets_mean": 14729.2,
"valid_targets_min": 5173
},
{
"epoch": 2.8272921108742004,
"grad_norm": 0.3061123867070012,
"learning_rate": 1.9019574201497387e-05,
"loss": 0.9924187064170837,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2563045620918274,
"step": 664,
"valid_targets_mean": 14750.4,
"valid_targets_min": 5714
},
{
"epoch": 2.831556503198294,
"grad_norm": 0.30193406567165587,
"learning_rate": 1.8960206516760396e-05,
"loss": 1.0398309230804443,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2820361256599426,
"step": 665,
"valid_targets_mean": 15179.4,
"valid_targets_min": 9622
},
{
"epoch": 2.835820895522388,
"grad_norm": 0.2929563961568466,
"learning_rate": 1.890084801739102e-05,
"loss": 0.9553192853927612,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2163141965866089,
"step": 666,
"valid_targets_mean": 14456.8,
"valid_targets_min": 1677
},
{
"epoch": 2.840085287846482,
"grad_norm": 0.3091559645635843,
"learning_rate": 1.884149922775265e-05,
"loss": 0.9848713874816895,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2401236593723297,
"step": 667,
"valid_targets_mean": 14748.1,
"valid_targets_min": 7099
},
{
"epoch": 2.8443496801705757,
"grad_norm": 0.30089460795798023,
"learning_rate": 1.878216067212291e-05,
"loss": 1.0005512237548828,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23389124870300293,
"step": 668,
"valid_targets_mean": 14425.2,
"valid_targets_min": 5268
},
{
"epoch": 2.8486140724946694,
"grad_norm": 0.3261962138625656,
"learning_rate": 1.8722832874689007e-05,
"loss": 1.0311071872711182,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2664957046508789,
"step": 669,
"valid_targets_mean": 15614.3,
"valid_targets_min": 11742
},
{
"epoch": 2.8528784648187635,
"grad_norm": 0.2878368826066572,
"learning_rate": 1.8663516359543123e-05,
"loss": 0.9968549013137817,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24377457797527313,
"step": 670,
"valid_targets_mean": 13797.7,
"valid_targets_min": 2331
},
{
"epoch": 2.857142857142857,
"grad_norm": 0.31838290235824035,
"learning_rate": 1.860421165067775e-05,
"loss": 1.0099412202835083,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24596422910690308,
"step": 671,
"valid_targets_mean": 15301.4,
"valid_targets_min": 3307
},
{
"epoch": 2.861407249466951,
"grad_norm": 0.2754651147467406,
"learning_rate": 1.8544919271981125e-05,
"loss": 0.9846042394638062,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23170757293701172,
"step": 672,
"valid_targets_mean": 14041.2,
"valid_targets_min": 2267
},
{
"epoch": 2.8656716417910446,
"grad_norm": 0.32847468922418677,
"learning_rate": 1.8485639747232535e-05,
"loss": 1.005096435546875,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25937849283218384,
"step": 673,
"valid_targets_mean": 15096.3,
"valid_targets_min": 9399
},
{
"epoch": 2.8699360341151388,
"grad_norm": 0.30458243305632854,
"learning_rate": 1.8426373600097723e-05,
"loss": 1.0177894830703735,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25794124603271484,
"step": 674,
"valid_targets_mean": 15325.4,
"valid_targets_min": 8396
},
{
"epoch": 2.8742004264392325,
"grad_norm": 0.3261755820068459,
"learning_rate": 1.836712135412424e-05,
"loss": 1.002445101737976,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25215405225753784,
"step": 675,
"valid_targets_mean": 15016.1,
"valid_targets_min": 7838
},
{
"epoch": 2.878464818763326,
"grad_norm": 0.2997312968171682,
"learning_rate": 1.8307883532736878e-05,
"loss": 1.0102388858795166,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24711883068084717,
"step": 676,
"valid_targets_mean": 15054.6,
"valid_targets_min": 3859
},
{
"epoch": 2.88272921108742,
"grad_norm": 0.2929198430346057,
"learning_rate": 1.8248660659232964e-05,
"loss": 0.9696129560470581,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23157832026481628,
"step": 677,
"valid_targets_mean": 14770.5,
"valid_targets_min": 5402
},
{
"epoch": 2.886993603411514,
"grad_norm": 0.28073241284593736,
"learning_rate": 1.8189453256777798e-05,
"loss": 1.013519525527954,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25993987917900085,
"step": 678,
"valid_targets_mean": 14648.9,
"valid_targets_min": 4446
},
{
"epoch": 2.8912579957356077,
"grad_norm": 0.2946695587948662,
"learning_rate": 1.8130261848399996e-05,
"loss": 0.9844130277633667,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2361820936203003,
"step": 679,
"valid_targets_mean": 14444.9,
"valid_targets_min": 3093
},
{
"epoch": 2.8955223880597014,
"grad_norm": 0.28079565484091723,
"learning_rate": 1.8071086956986916e-05,
"loss": 0.956390380859375,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22418171167373657,
"step": 680,
"valid_targets_mean": 13660.1,
"valid_targets_min": 1932
},
{
"epoch": 2.8997867803837956,
"grad_norm": 0.2801289330593702,
"learning_rate": 1.8011929105279967e-05,
"loss": 0.9912029504776001,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2350948601961136,
"step": 681,
"valid_targets_mean": 14456.2,
"valid_targets_min": 5121
},
{
"epoch": 2.9040511727078893,
"grad_norm": 0.27325381262770354,
"learning_rate": 1.795278881587007e-05,
"loss": 0.9531004428863525,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24051615595817566,
"step": 682,
"valid_targets_mean": 14905.6,
"valid_targets_min": 1896
},
{
"epoch": 2.908315565031983,
"grad_norm": 0.2986474799033835,
"learning_rate": 1.7893666611192962e-05,
"loss": 1.0013391971588135,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2637110948562622,
"step": 683,
"valid_targets_mean": 14420.5,
"valid_targets_min": 6043
},
{
"epoch": 2.9125799573560767,
"grad_norm": 0.2625521170168768,
"learning_rate": 1.783456301352467e-05,
"loss": 1.032405972480774,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2396593987941742,
"step": 684,
"valid_targets_mean": 14147.8,
"valid_targets_min": 2852
},
{
"epoch": 2.9168443496801704,
"grad_norm": 0.27774105361299584,
"learning_rate": 1.7775478544976813e-05,
"loss": 0.9582983255386353,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24214479327201843,
"step": 685,
"valid_targets_mean": 14339.9,
"valid_targets_min": 1861
},
{
"epoch": 2.9211087420042645,
"grad_norm": 0.309414666717297,
"learning_rate": 1.7716413727492035e-05,
"loss": 1.0138694047927856,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2687375843524933,
"step": 686,
"valid_targets_mean": 14603.4,
"valid_targets_min": 4177
},
{
"epoch": 2.925373134328358,
"grad_norm": 0.2813401428555624,
"learning_rate": 1.7657369082839392e-05,
"loss": 1.0201051235198975,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24503937363624573,
"step": 687,
"valid_targets_mean": 14408.7,
"valid_targets_min": 6466
},
{
"epoch": 2.929637526652452,
"grad_norm": 0.2780914157152459,
"learning_rate": 1.7598345132609747e-05,
"loss": 1.0258854627609253,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24974925816059113,
"step": 688,
"valid_targets_mean": 14399.9,
"valid_targets_min": 2770
},
{
"epoch": 2.933901918976546,
"grad_norm": 0.27826477070991834,
"learning_rate": 1.7539342398211132e-05,
"loss": 1.002000331878662,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.269614040851593,
"step": 689,
"valid_targets_mean": 14672.0,
"valid_targets_min": 2071
},
{
"epoch": 2.9381663113006398,
"grad_norm": 0.26151797799696985,
"learning_rate": 1.748036140086416e-05,
"loss": 1.014590859413147,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2685566246509552,
"step": 690,
"valid_targets_mean": 15134.9,
"valid_targets_min": 2612
},
{
"epoch": 2.9424307036247335,
"grad_norm": 0.27864426558205724,
"learning_rate": 1.742140266159744e-05,
"loss": 0.9970508813858032,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24841861426830292,
"step": 691,
"valid_targets_mean": 14269.2,
"valid_targets_min": 1758
},
{
"epoch": 2.946695095948827,
"grad_norm": 0.25963872761453904,
"learning_rate": 1.7362466701242943e-05,
"loss": 1.0036523342132568,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23161643743515015,
"step": 692,
"valid_targets_mean": 13665.4,
"valid_targets_min": 1414
},
{
"epoch": 2.950959488272921,
"grad_norm": 0.2715714971551824,
"learning_rate": 1.7303554040431426e-05,
"loss": 1.000980019569397,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23404458165168762,
"step": 693,
"valid_targets_mean": 14589.4,
"valid_targets_min": 7691
},
{
"epoch": 2.955223880597015,
"grad_norm": 0.2895240322777501,
"learning_rate": 1.7244665199587812e-05,
"loss": 0.9945222735404968,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2587818503379822,
"step": 694,
"valid_targets_mean": 15091.8,
"valid_targets_min": 7829
},
{
"epoch": 2.9594882729211087,
"grad_norm": 0.2491244470969691,
"learning_rate": 1.7185800698926594e-05,
"loss": 1.0258584022521973,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2739405632019043,
"step": 695,
"valid_targets_mean": 14661.2,
"valid_targets_min": 10498
},
{
"epoch": 2.9637526652452024,
"grad_norm": 0.2577629779341349,
"learning_rate": 1.7126961058447276e-05,
"loss": 0.9935309886932373,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23259735107421875,
"step": 696,
"valid_targets_mean": 15374.6,
"valid_targets_min": 7090
},
{
"epoch": 2.9680170575692966,
"grad_norm": 0.2610574679004603,
"learning_rate": 1.706814679792973e-05,
"loss": 1.0331902503967285,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25708138942718506,
"step": 697,
"valid_targets_mean": 14630.6,
"valid_targets_min": 1022
},
{
"epoch": 2.9722814498933903,
"grad_norm": 0.2635939674737759,
"learning_rate": 1.7009358436929632e-05,
"loss": 0.984321653842926,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23862984776496887,
"step": 698,
"valid_targets_mean": 14091.2,
"valid_targets_min": 2014
},
{
"epoch": 2.976545842217484,
"grad_norm": 0.2680053987936683,
"learning_rate": 1.6950596494773855e-05,
"loss": 1.0147403478622437,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2726633548736572,
"step": 699,
"valid_targets_mean": 15212.6,
"valid_targets_min": 3210
},
{
"epoch": 2.9808102345415777,
"grad_norm": 0.2753441804695818,
"learning_rate": 1.6891861490555906e-05,
"loss": 1.0429158210754395,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2542388141155243,
"step": 700,
"valid_targets_mean": 13402.4,
"valid_targets_min": 4405
},
{
"epoch": 2.9850746268656714,
"grad_norm": 0.29212240811701157,
"learning_rate": 1.683315394313132e-05,
"loss": 0.9809648990631104,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24427923560142517,
"step": 701,
"valid_targets_mean": 14898.9,
"valid_targets_min": 1376
},
{
"epoch": 2.9893390191897655,
"grad_norm": 0.2847232073089366,
"learning_rate": 1.677447437111309e-05,
"loss": 0.9754581451416016,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2438046932220459,
"step": 702,
"valid_targets_mean": 13953.7,
"valid_targets_min": 3392
},
{
"epoch": 2.9936034115138592,
"grad_norm": 0.26259731802338676,
"learning_rate": 1.671582329286707e-05,
"loss": 1.025207281112671,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27571746706962585,
"step": 703,
"valid_targets_mean": 14422.6,
"valid_targets_min": 1137
},
{
"epoch": 2.997867803837953,
"grad_norm": 0.28446340160710176,
"learning_rate": 1.66572012265074e-05,
"loss": 1.0176316499710083,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2568015456199646,
"step": 704,
"valid_targets_mean": 14873.5,
"valid_targets_min": 8982
},
{
"epoch": 3.0,
"grad_norm": 0.329072326861756,
"learning_rate": 1.6598608689891953e-05,
"loss": 0.9181491136550903,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.40637677907943726,
"step": 705,
"valid_targets_mean": 14252.5,
"valid_targets_min": 714
},
{
"epoch": 3.0042643923240937,
"grad_norm": 0.3417539224541602,
"learning_rate": 1.654004620061773e-05,
"loss": 0.9819753170013428,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25378790497779846,
"step": 706,
"valid_targets_mean": 14935.4,
"valid_targets_min": 2227
},
{
"epoch": 3.008528784648188,
"grad_norm": 0.29505060833606567,
"learning_rate": 1.6481514276016297e-05,
"loss": 0.956446647644043,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22329074144363403,
"step": 707,
"valid_targets_mean": 13370.4,
"valid_targets_min": 1780
},
{
"epoch": 3.0127931769722816,
"grad_norm": 0.33835427633326093,
"learning_rate": 1.6423013433149207e-05,
"loss": 0.9884039163589478,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23932109773159027,
"step": 708,
"valid_targets_mean": 14046.8,
"valid_targets_min": 1756
},
{
"epoch": 3.0170575692963753,
"grad_norm": 0.261647860420719,
"learning_rate": 1.636454418880347e-05,
"loss": 0.9924356937408447,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24471953511238098,
"step": 709,
"valid_targets_mean": 14270.6,
"valid_targets_min": 1366
},
{
"epoch": 3.021321961620469,
"grad_norm": 0.32856814358729175,
"learning_rate": 1.630610705948693e-05,
"loss": 0.9875960350036621,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24989363551139832,
"step": 710,
"valid_targets_mean": 15469.6,
"valid_targets_min": 1397
},
{
"epoch": 3.025586353944563,
"grad_norm": 0.2696120998027373,
"learning_rate": 1.6247702561423753e-05,
"loss": 1.0232791900634766,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24895143508911133,
"step": 711,
"valid_targets_mean": 14785.2,
"valid_targets_min": 2659
},
{
"epoch": 3.029850746268657,
"grad_norm": 0.36040449956817955,
"learning_rate": 1.6189331210549828e-05,
"loss": 0.9841296076774597,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2596629858016968,
"step": 712,
"valid_targets_mean": 14634.5,
"valid_targets_min": 6716
},
{
"epoch": 3.0341151385927505,
"grad_norm": 0.2838400723401922,
"learning_rate": 1.613099352250825e-05,
"loss": 0.9826107025146484,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2429734766483307,
"step": 713,
"valid_targets_mean": 14157.4,
"valid_targets_min": 1760
},
{
"epoch": 3.038379530916844,
"grad_norm": 0.32517783745240114,
"learning_rate": 1.6072690012644717e-05,
"loss": 0.9862861633300781,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24178437888622284,
"step": 714,
"valid_targets_mean": 15151.0,
"valid_targets_min": 8214
},
{
"epoch": 3.0426439232409384,
"grad_norm": 0.3017129052830993,
"learning_rate": 1.6014421196003022e-05,
"loss": 0.9499567747116089,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2725258469581604,
"step": 715,
"valid_targets_mean": 15267.2,
"valid_targets_min": 6651
},
{
"epoch": 3.046908315565032,
"grad_norm": 0.3536692325479161,
"learning_rate": 1.5956187587320468e-05,
"loss": 0.9644232988357544,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2405238002538681,
"step": 716,
"valid_targets_mean": 15007.3,
"valid_targets_min": 6623
},
{
"epoch": 3.0511727078891258,
"grad_norm": 0.31421883625625663,
"learning_rate": 1.5897989701023355e-05,
"loss": 0.9658553600311279,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24571488797664642,
"step": 717,
"valid_targets_mean": 14385.4,
"valid_targets_min": 2839
},
{
"epoch": 3.0554371002132195,
"grad_norm": 0.31629713516785807,
"learning_rate": 1.58398280512224e-05,
"loss": 0.979788064956665,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.253682017326355,
"step": 718,
"valid_targets_mean": 14743.2,
"valid_targets_min": 3428
},
{
"epoch": 3.0597014925373136,
"grad_norm": 0.34674368334551675,
"learning_rate": 1.5781703151708215e-05,
"loss": 0.9843800067901611,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2317553162574768,
"step": 719,
"valid_targets_mean": 13908.6,
"valid_targets_min": 2140
},
{
"epoch": 3.0639658848614073,
"grad_norm": 0.2913863501730682,
"learning_rate": 1.5723615515946773e-05,
"loss": 0.9665867686271667,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23495791852474213,
"step": 720,
"valid_targets_mean": 13845.8,
"valid_targets_min": 2723
},
{
"epoch": 3.068230277185501,
"grad_norm": 0.37921521385833407,
"learning_rate": 1.5665565657074874e-05,
"loss": 0.9476820230484009,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2518354654312134,
"step": 721,
"valid_targets_mean": 14853.0,
"valid_targets_min": 4328
},
{
"epoch": 3.0724946695095947,
"grad_norm": 0.3061956068087769,
"learning_rate": 1.560755408789558e-05,
"loss": 0.9628287553787231,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21817487478256226,
"step": 722,
"valid_targets_mean": 12729.1,
"valid_targets_min": 2303
},
{
"epoch": 3.076759061833689,
"grad_norm": 0.378303072054375,
"learning_rate": 1.5549581320873715e-05,
"loss": 1.0098230838775635,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2575799226760864,
"step": 723,
"valid_targets_mean": 15057.9,
"valid_targets_min": 7488
},
{
"epoch": 3.0810234541577826,
"grad_norm": 0.2890372455815842,
"learning_rate": 1.5491647868131343e-05,
"loss": 1.0204254388809204,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26172223687171936,
"step": 724,
"valid_targets_mean": 15596.0,
"valid_targets_min": 7283
},
{
"epoch": 3.0852878464818763,
"grad_norm": 0.3420399710935773,
"learning_rate": 1.5433754241443223e-05,
"loss": 0.9512300491333008,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2369207739830017,
"step": 725,
"valid_targets_mean": 14835.4,
"valid_targets_min": 6151
},
{
"epoch": 3.08955223880597,
"grad_norm": 0.34038150397640266,
"learning_rate": 1.53759009522323e-05,
"loss": 1.0124759674072266,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2753083109855652,
"step": 726,
"valid_targets_mean": 15197.3,
"valid_targets_min": 8987
},
{
"epoch": 3.093816631130064,
"grad_norm": 0.3100192375950978,
"learning_rate": 1.5318088511565185e-05,
"loss": 1.0002617835998535,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21985961496829987,
"step": 727,
"valid_targets_mean": 14557.7,
"valid_targets_min": 4288
},
{
"epoch": 3.098081023454158,
"grad_norm": 0.33281032554112766,
"learning_rate": 1.5260317430147627e-05,
"loss": 0.9853769540786743,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2552061975002289,
"step": 728,
"valid_targets_mean": 15146.8,
"valid_targets_min": 6993
},
{
"epoch": 3.1023454157782515,
"grad_norm": 0.32340051408977954,
"learning_rate": 1.5202588218320024e-05,
"loss": 1.0069472789764404,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2594277858734131,
"step": 729,
"valid_targets_mean": 14658.7,
"valid_targets_min": 9374
},
{
"epoch": 3.106609808102345,
"grad_norm": 0.3459546909784368,
"learning_rate": 1.5144901386052924e-05,
"loss": 0.988615870475769,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2513861358165741,
"step": 730,
"valid_targets_mean": 14178.2,
"valid_targets_min": 1157
},
{
"epoch": 3.1108742004264394,
"grad_norm": 0.30440414834422264,
"learning_rate": 1.5087257442942467e-05,
"loss": 0.9952294230461121,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24073612689971924,
"step": 731,
"valid_targets_mean": 14035.7,
"valid_targets_min": 2033
},
{
"epoch": 3.115138592750533,
"grad_norm": 0.3283844116398468,
"learning_rate": 1.502965689820593e-05,
"loss": 0.9941107034683228,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2571861147880554,
"step": 732,
"valid_targets_mean": 14857.7,
"valid_targets_min": 5823
},
{
"epoch": 3.1194029850746268,
"grad_norm": 0.31863785306298104,
"learning_rate": 1.4972100260677222e-05,
"loss": 0.9742693901062012,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2496413141489029,
"step": 733,
"valid_targets_mean": 15568.8,
"valid_targets_min": 9668
},
{
"epoch": 3.1236673773987205,
"grad_norm": 0.2980907358091392,
"learning_rate": 1.4914588038802383e-05,
"loss": 0.953710675239563,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25917893648147583,
"step": 734,
"valid_targets_mean": 15243.9,
"valid_targets_min": 7651
},
{
"epoch": 3.1279317697228146,
"grad_norm": 0.3154771559803136,
"learning_rate": 1.4857120740635084e-05,
"loss": 0.9636905193328857,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2144869565963745,
"step": 735,
"valid_targets_mean": 14190.0,
"valid_targets_min": 3093
},
{
"epoch": 3.1321961620469083,
"grad_norm": 0.283184831707923,
"learning_rate": 1.4799698873832153e-05,
"loss": 0.970219612121582,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24540294706821442,
"step": 736,
"valid_targets_mean": 14513.7,
"valid_targets_min": 4489
},
{
"epoch": 3.136460554371002,
"grad_norm": 0.2974204072610502,
"learning_rate": 1.4742322945649073e-05,
"loss": 0.9687181711196899,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23797650635242462,
"step": 737,
"valid_targets_mean": 15115.2,
"valid_targets_min": 3630
},
{
"epoch": 3.140724946695096,
"grad_norm": 0.25739824883202206,
"learning_rate": 1.4684993462935532e-05,
"loss": 1.0161409378051758,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24516227841377258,
"step": 738,
"valid_targets_mean": 13897.5,
"valid_targets_min": 2132
},
{
"epoch": 3.14498933901919,
"grad_norm": 0.2999031415958146,
"learning_rate": 1.462771093213092e-05,
"loss": 0.9935801029205322,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2576490640640259,
"step": 739,
"valid_targets_mean": 14842.8,
"valid_targets_min": 4455
},
{
"epoch": 3.1492537313432836,
"grad_norm": 0.29360540339054153,
"learning_rate": 1.4570475859259856e-05,
"loss": 1.0185129642486572,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25551605224609375,
"step": 740,
"valid_targets_mean": 14303.8,
"valid_targets_min": 3999
},
{
"epoch": 3.1535181236673773,
"grad_norm": 0.2819035828390607,
"learning_rate": 1.4513288749927714e-05,
"loss": 0.9626775979995728,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24636633694171906,
"step": 741,
"valid_targets_mean": 14189.3,
"valid_targets_min": 7031
},
{
"epoch": 3.1577825159914714,
"grad_norm": 0.28144076026143533,
"learning_rate": 1.4456150109316192e-05,
"loss": 1.010709285736084,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2493475377559662,
"step": 742,
"valid_targets_mean": 15032.7,
"valid_targets_min": 6873
},
{
"epoch": 3.162046908315565,
"grad_norm": 0.29620172180454607,
"learning_rate": 1.4399060442178798e-05,
"loss": 1.0011711120605469,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24875086545944214,
"step": 743,
"valid_targets_mean": 14197.8,
"valid_targets_min": 2689
},
{
"epoch": 3.166311300639659,
"grad_norm": 0.27460647158921325,
"learning_rate": 1.4342020252836437e-05,
"loss": 0.9482408165931702,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23761501908302307,
"step": 744,
"valid_targets_mean": 14842.0,
"valid_targets_min": 4959
},
{
"epoch": 3.1705756929637525,
"grad_norm": 0.2811109311749601,
"learning_rate": 1.4285030045172913e-05,
"loss": 0.9633879661560059,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25517749786376953,
"step": 745,
"valid_targets_mean": 15101.4,
"valid_targets_min": 7179
},
{
"epoch": 3.1748400852878467,
"grad_norm": 0.31868948949925263,
"learning_rate": 1.422809032263052e-05,
"loss": 0.9744983315467834,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2595503330230713,
"step": 746,
"valid_targets_mean": 15064.9,
"valid_targets_min": 3556
},
{
"epoch": 3.1791044776119404,
"grad_norm": 0.27981253592367966,
"learning_rate": 1.4171201588205566e-05,
"loss": 0.951709508895874,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24453382194042206,
"step": 747,
"valid_targets_mean": 14460.3,
"valid_targets_min": 3546
},
{
"epoch": 3.183368869936034,
"grad_norm": 0.2880960969576025,
"learning_rate": 1.4114364344443935e-05,
"loss": 0.9665570259094238,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23244787752628326,
"step": 748,
"valid_targets_mean": 15058.3,
"valid_targets_min": 5619
},
{
"epoch": 3.1876332622601278,
"grad_norm": 0.29060526455432706,
"learning_rate": 1.4057579093436653e-05,
"loss": 0.9781259894371033,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23163039982318878,
"step": 749,
"valid_targets_mean": 14682.0,
"valid_targets_min": 4993
},
{
"epoch": 3.191897654584222,
"grad_norm": 0.2658497276672498,
"learning_rate": 1.400084633681546e-05,
"loss": 0.9583557844161987,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2123161256313324,
"step": 750,
"valid_targets_mean": 14937.3,
"valid_targets_min": 6130
},
{
"epoch": 3.1961620469083156,
"grad_norm": 0.2748386771475929,
"learning_rate": 1.3944166575748355e-05,
"loss": 1.0065157413482666,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24055948853492737,
"step": 751,
"valid_targets_mean": 15015.8,
"valid_targets_min": 6065
},
{
"epoch": 3.2004264392324093,
"grad_norm": 0.2551588931694033,
"learning_rate": 1.3887540310935187e-05,
"loss": 0.9470630288124084,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.247154101729393,
"step": 752,
"valid_targets_mean": 14598.2,
"valid_targets_min": 2048
},
{
"epoch": 3.204690831556503,
"grad_norm": 0.2940780684005928,
"learning_rate": 1.3830968042603226e-05,
"loss": 0.9945131540298462,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23863181471824646,
"step": 753,
"valid_targets_mean": 13820.2,
"valid_targets_min": 1730
},
{
"epoch": 3.208955223880597,
"grad_norm": 0.2648880071040607,
"learning_rate": 1.3774450270502762e-05,
"loss": 0.9359656572341919,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22216373682022095,
"step": 754,
"valid_targets_mean": 14340.8,
"valid_targets_min": 1380
},
{
"epoch": 3.213219616204691,
"grad_norm": 0.291391264645764,
"learning_rate": 1.3717987493902656e-05,
"loss": 0.9581259489059448,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23209252953529358,
"step": 755,
"valid_targets_mean": 14867.7,
"valid_targets_min": 2135
},
{
"epoch": 3.2174840085287846,
"grad_norm": 0.26780135742435446,
"learning_rate": 1.3661580211585947e-05,
"loss": 1.0013266801834106,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24638405442237854,
"step": 756,
"valid_targets_mean": 14753.9,
"valid_targets_min": 1968
},
{
"epoch": 3.2217484008528783,
"grad_norm": 0.2636036578450671,
"learning_rate": 1.3605228921845457e-05,
"loss": 0.9701790809631348,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25354689359664917,
"step": 757,
"valid_targets_mean": 15058.9,
"valid_targets_min": 1677
},
{
"epoch": 3.2260127931769724,
"grad_norm": 0.27198763337914833,
"learning_rate": 1.3548934122479373e-05,
"loss": 0.992609977722168,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25720351934432983,
"step": 758,
"valid_targets_mean": 14217.5,
"valid_targets_min": 4859
},
{
"epoch": 3.230277185501066,
"grad_norm": 0.2762059201189316,
"learning_rate": 1.349269631078686e-05,
"loss": 0.9707850813865662,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24857297539710999,
"step": 759,
"valid_targets_mean": 15206.1,
"valid_targets_min": 5725
},
{
"epoch": 3.23454157782516,
"grad_norm": 0.2759388909625414,
"learning_rate": 1.3436515983563659e-05,
"loss": 1.0266224145889282,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2438332587480545,
"step": 760,
"valid_targets_mean": 14201.3,
"valid_targets_min": 1971
},
{
"epoch": 3.2388059701492535,
"grad_norm": 0.2465084879297926,
"learning_rate": 1.3380393637097692e-05,
"loss": 0.9265196323394775,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.226420059800148,
"step": 761,
"valid_targets_mean": 14595.2,
"valid_targets_min": 4471
},
{
"epoch": 3.2430703624733477,
"grad_norm": 0.24311935209083857,
"learning_rate": 1.3324329767164708e-05,
"loss": 0.9605081081390381,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23155008256435394,
"step": 762,
"valid_targets_mean": 14623.3,
"valid_targets_min": 6527
},
{
"epoch": 3.2473347547974414,
"grad_norm": 0.28258689167490697,
"learning_rate": 1.3268324869023878e-05,
"loss": 0.9768404364585876,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2243225872516632,
"step": 763,
"valid_targets_mean": 13988.0,
"valid_targets_min": 1741
},
{
"epoch": 3.251599147121535,
"grad_norm": 0.26309024409635645,
"learning_rate": 1.3212379437413421e-05,
"loss": 1.0088846683502197,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2539817690849304,
"step": 764,
"valid_targets_mean": 13731.8,
"valid_targets_min": 2767
},
{
"epoch": 3.2558635394456292,
"grad_norm": 0.27539579259693137,
"learning_rate": 1.3156493966546236e-05,
"loss": 0.960427463054657,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2437533736228943,
"step": 765,
"valid_targets_mean": 15253.4,
"valid_targets_min": 10283
},
{
"epoch": 3.260127931769723,
"grad_norm": 0.2653907047853398,
"learning_rate": 1.3100668950105534e-05,
"loss": 0.9802002310752869,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24038583040237427,
"step": 766,
"valid_targets_mean": 13939.0,
"valid_targets_min": 2151
},
{
"epoch": 3.2643923240938166,
"grad_norm": 0.2623493315499297,
"learning_rate": 1.3044904881240507e-05,
"loss": 0.9168298244476318,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22533482313156128,
"step": 767,
"valid_targets_mean": 14715.1,
"valid_targets_min": 1182
},
{
"epoch": 3.2686567164179103,
"grad_norm": 0.32310144720030354,
"learning_rate": 1.2989202252561926e-05,
"loss": 0.9944812059402466,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2704041004180908,
"step": 768,
"valid_targets_mean": 15047.2,
"valid_targets_min": 9284
},
{
"epoch": 3.272921108742004,
"grad_norm": 0.26244209038363603,
"learning_rate": 1.2933561556137806e-05,
"loss": 0.9888614416122437,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2533096969127655,
"step": 769,
"valid_targets_mean": 15175.8,
"valid_targets_min": 8491
},
{
"epoch": 3.277185501066098,
"grad_norm": 0.2532907694607067,
"learning_rate": 1.2877983283489062e-05,
"loss": 0.9520066976547241,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2536150813102722,
"step": 770,
"valid_targets_mean": 15184.2,
"valid_targets_min": 7985
},
{
"epoch": 3.281449893390192,
"grad_norm": 0.26781215436772277,
"learning_rate": 1.2822467925585186e-05,
"loss": 0.9979465007781982,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.263954222202301,
"step": 771,
"valid_targets_mean": 14969.1,
"valid_targets_min": 4675
},
{
"epoch": 3.2857142857142856,
"grad_norm": 0.2608385336238836,
"learning_rate": 1.2767015972839879e-05,
"loss": 0.9394721388816833,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24243156611919403,
"step": 772,
"valid_targets_mean": 14638.4,
"valid_targets_min": 2025
},
{
"epoch": 3.2899786780383797,
"grad_norm": 0.2487866675739251,
"learning_rate": 1.2711627915106728e-05,
"loss": 0.975515604019165,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20965570211410522,
"step": 773,
"valid_targets_mean": 14076.6,
"valid_targets_min": 2062
},
{
"epoch": 3.2942430703624734,
"grad_norm": 0.2510762402174945,
"learning_rate": 1.2656304241674877e-05,
"loss": 0.9717892408370972,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2080877125263214,
"step": 774,
"valid_targets_mean": 13653.0,
"valid_targets_min": 1617
},
{
"epoch": 3.298507462686567,
"grad_norm": 0.30211205613009207,
"learning_rate": 1.2601045441264734e-05,
"loss": 0.9691690802574158,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24019382894039154,
"step": 775,
"valid_targets_mean": 14468.7,
"valid_targets_min": 1497
},
{
"epoch": 3.302771855010661,
"grad_norm": 0.23849976778362944,
"learning_rate": 1.2545852002023599e-05,
"loss": 0.9814242124557495,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22221355140209198,
"step": 776,
"valid_targets_mean": 13769.2,
"valid_targets_min": 1414
},
{
"epoch": 3.307036247334755,
"grad_norm": 0.3050255894382961,
"learning_rate": 1.2490724411521406e-05,
"loss": 1.0499078035354614,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2555561661720276,
"step": 777,
"valid_targets_mean": 14771.3,
"valid_targets_min": 2909
},
{
"epoch": 3.3113006396588487,
"grad_norm": 0.24722631371566617,
"learning_rate": 1.243566315674637e-05,
"loss": 0.9678243398666382,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25532081723213196,
"step": 778,
"valid_targets_mean": 14883.9,
"valid_targets_min": 3976
},
{
"epoch": 3.3155650319829424,
"grad_norm": 0.28305369699191174,
"learning_rate": 1.238066872410073e-05,
"loss": 1.048391342163086,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2570115327835083,
"step": 779,
"valid_targets_mean": 13571.6,
"valid_targets_min": 2936
},
{
"epoch": 3.319829424307036,
"grad_norm": 0.2681526312865143,
"learning_rate": 1.2325741599396418e-05,
"loss": 0.9866071939468384,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23067453503608704,
"step": 780,
"valid_targets_mean": 13806.5,
"valid_targets_min": 2177
},
{
"epoch": 3.3240938166311302,
"grad_norm": 0.26656876730810636,
"learning_rate": 1.2270882267850765e-05,
"loss": 0.928637683391571,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23767060041427612,
"step": 781,
"valid_targets_mean": 15658.2,
"valid_targets_min": 6294
},
{
"epoch": 3.328358208955224,
"grad_norm": 0.29797608884424853,
"learning_rate": 1.2216091214082248e-05,
"loss": 0.9275143146514893,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23108075559139252,
"step": 782,
"valid_targets_mean": 14416.9,
"valid_targets_min": 3910
},
{
"epoch": 3.3326226012793176,
"grad_norm": 0.24359151295047798,
"learning_rate": 1.2161368922106192e-05,
"loss": 0.9925769567489624,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26450932025909424,
"step": 783,
"valid_targets_mean": 15729.0,
"valid_targets_min": 10634
},
{
"epoch": 3.3368869936034113,
"grad_norm": 0.2818400310256115,
"learning_rate": 1.2106715875330475e-05,
"loss": 0.9968470931053162,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2680169939994812,
"step": 784,
"valid_targets_mean": 14808.2,
"valid_targets_min": 3531
},
{
"epoch": 3.3411513859275055,
"grad_norm": 0.2677582666064544,
"learning_rate": 1.2052132556551275e-05,
"loss": 1.0421117544174194,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23851540684700012,
"step": 785,
"valid_targets_mean": 13779.9,
"valid_targets_min": 1919
},
{
"epoch": 3.345415778251599,
"grad_norm": 0.25621439378166544,
"learning_rate": 1.1997619447948814e-05,
"loss": 0.9955507516860962,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24843750894069672,
"step": 786,
"valid_targets_mean": 14286.3,
"valid_targets_min": 2432
},
{
"epoch": 3.349680170575693,
"grad_norm": 0.2869822205087454,
"learning_rate": 1.1943177031083094e-05,
"loss": 1.018493890762329,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2717875838279724,
"step": 787,
"valid_targets_mean": 15721.7,
"valid_targets_min": 9700
},
{
"epoch": 3.3539445628997866,
"grad_norm": 0.24344917057840126,
"learning_rate": 1.1888805786889621e-05,
"loss": 1.013110876083374,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25155869126319885,
"step": 788,
"valid_targets_mean": 15286.8,
"valid_targets_min": 3694
},
{
"epoch": 3.3582089552238807,
"grad_norm": 0.28861897558310295,
"learning_rate": 1.183450619567518e-05,
"loss": 0.9786880016326904,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24753284454345703,
"step": 789,
"valid_targets_mean": 14251.6,
"valid_targets_min": 4490
},
{
"epoch": 3.3624733475479744,
"grad_norm": 0.2633144404311006,
"learning_rate": 1.1780278737113581e-05,
"loss": 1.0120458602905273,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24441072344779968,
"step": 790,
"valid_targets_mean": 14975.3,
"valid_targets_min": 3680
},
{
"epoch": 3.366737739872068,
"grad_norm": 0.2803292068269423,
"learning_rate": 1.1726123890241439e-05,
"loss": 0.9977100491523743,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25507649779319763,
"step": 791,
"valid_targets_mean": 14009.7,
"valid_targets_min": 3023
},
{
"epoch": 3.3710021321961623,
"grad_norm": 0.26765143753677234,
"learning_rate": 1.1672042133453925e-05,
"loss": 0.9835935235023499,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2534845471382141,
"step": 792,
"valid_targets_mean": 14919.7,
"valid_targets_min": 6013
},
{
"epoch": 3.375266524520256,
"grad_norm": 0.25324643234075256,
"learning_rate": 1.1618033944500527e-05,
"loss": 0.9231183528900146,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22209623456001282,
"step": 793,
"valid_targets_mean": 14836.0,
"valid_targets_min": 2406
},
{
"epoch": 3.3795309168443497,
"grad_norm": 0.27382533446583085,
"learning_rate": 1.1564099800480864e-05,
"loss": 0.9658839702606201,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22201785445213318,
"step": 794,
"valid_targets_mean": 13774.2,
"valid_targets_min": 1670
},
{
"epoch": 3.3837953091684434,
"grad_norm": 0.23574579575274768,
"learning_rate": 1.151024017784045e-05,
"loss": 0.9784061312675476,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2386031597852707,
"step": 795,
"valid_targets_mean": 14262.1,
"valid_targets_min": 4572
},
{
"epoch": 3.388059701492537,
"grad_norm": 0.26446525011364513,
"learning_rate": 1.1456455552366488e-05,
"loss": 0.9620468616485596,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2624356150627136,
"step": 796,
"valid_targets_mean": 15688.0,
"valid_targets_min": 10994
},
{
"epoch": 3.3923240938166312,
"grad_norm": 0.2946516604731879,
"learning_rate": 1.1402746399183671e-05,
"loss": 0.9715833067893982,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2458597719669342,
"step": 797,
"valid_targets_mean": 14943.1,
"valid_targets_min": 6999
},
{
"epoch": 3.396588486140725,
"grad_norm": 0.25100464784214727,
"learning_rate": 1.1349113192749986e-05,
"loss": 0.979851484298706,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2558574378490448,
"step": 798,
"valid_targets_mean": 15075.8,
"valid_targets_min": 6989
},
{
"epoch": 3.4008528784648187,
"grad_norm": 0.28293072370460143,
"learning_rate": 1.1295556406852488e-05,
"loss": 0.9970759749412537,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23473231494426727,
"step": 799,
"valid_targets_mean": 14168.6,
"valid_targets_min": 1460
},
{
"epoch": 3.405117270788913,
"grad_norm": 0.26993063050031846,
"learning_rate": 1.1242076514603201e-05,
"loss": 0.9559547901153564,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21587030589580536,
"step": 800,
"valid_targets_mean": 14430.6,
"valid_targets_min": 2882
},
{
"epoch": 3.4093816631130065,
"grad_norm": 0.24685229128332098,
"learning_rate": 1.1188673988434831e-05,
"loss": 0.9920517206192017,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24107736349105835,
"step": 801,
"valid_targets_mean": 13998.1,
"valid_targets_min": 3138
},
{
"epoch": 3.4136460554371,
"grad_norm": 0.2755427283976288,
"learning_rate": 1.1135349300096667e-05,
"loss": 0.9662362933158875,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24288468062877655,
"step": 802,
"valid_targets_mean": 15185.6,
"valid_targets_min": 3257
},
{
"epoch": 3.417910447761194,
"grad_norm": 0.25239907237737214,
"learning_rate": 1.1082102920650397e-05,
"loss": 0.9824597835540771,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22753871977329254,
"step": 803,
"valid_targets_mean": 14153.7,
"valid_targets_min": 4364
},
{
"epoch": 3.4221748400852876,
"grad_norm": 0.2562666347543531,
"learning_rate": 1.102893532046593e-05,
"loss": 1.0019625425338745,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2592023015022278,
"step": 804,
"valid_targets_mean": 14353.4,
"valid_targets_min": 2195
},
{
"epoch": 3.4264392324093818,
"grad_norm": 0.28238824416216146,
"learning_rate": 1.0975846969217258e-05,
"loss": 0.968468189239502,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25971686840057373,
"step": 805,
"valid_targets_mean": 14756.9,
"valid_targets_min": 7238
},
{
"epoch": 3.4307036247334755,
"grad_norm": 0.26599977569448535,
"learning_rate": 1.092283833587829e-05,
"loss": 0.983420729637146,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25543704628944397,
"step": 806,
"valid_targets_mean": 14806.7,
"valid_targets_min": 4322
},
{
"epoch": 3.434968017057569,
"grad_norm": 0.2638984292339725,
"learning_rate": 1.086990988871873e-05,
"loss": 0.9687063097953796,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23694062232971191,
"step": 807,
"valid_targets_mean": 14201.8,
"valid_targets_min": 2523
},
{
"epoch": 3.4392324093816633,
"grad_norm": 0.2622674577937183,
"learning_rate": 1.0817062095299929e-05,
"loss": 0.9997848272323608,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2400442361831665,
"step": 808,
"valid_targets_mean": 13671.8,
"valid_targets_min": 1212
},
{
"epoch": 3.443496801705757,
"grad_norm": 0.2569999483268217,
"learning_rate": 1.0764295422470755e-05,
"loss": 0.9746481776237488,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23871907591819763,
"step": 809,
"valid_targets_mean": 14831.8,
"valid_targets_min": 3113
},
{
"epoch": 3.4477611940298507,
"grad_norm": 0.2757061743608293,
"learning_rate": 1.0711610336363477e-05,
"loss": 0.994911253452301,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25191304087638855,
"step": 810,
"valid_targets_mean": 14924.7,
"valid_targets_min": 4915
},
{
"epoch": 3.4520255863539444,
"grad_norm": 0.2549562317547253,
"learning_rate": 1.065900730238961e-05,
"loss": 0.9198431968688965,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2329210340976715,
"step": 811,
"valid_targets_mean": 15488.8,
"valid_targets_min": 1409
},
{
"epoch": 3.4562899786780386,
"grad_norm": 0.2534486974336105,
"learning_rate": 1.0606486785235879e-05,
"loss": 0.985055148601532,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24948279559612274,
"step": 812,
"valid_targets_mean": 14707.6,
"valid_targets_min": 7077
},
{
"epoch": 3.4605543710021323,
"grad_norm": 0.26322629067765024,
"learning_rate": 1.0554049248860045e-05,
"loss": 1.0189073085784912,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.244399756193161,
"step": 813,
"valid_targets_mean": 13376.9,
"valid_targets_min": 2264
},
{
"epoch": 3.464818763326226,
"grad_norm": 0.25987012261112424,
"learning_rate": 1.0501695156486819e-05,
"loss": 0.9842698574066162,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23711949586868286,
"step": 814,
"valid_targets_mean": 14910.6,
"valid_targets_min": 2733
},
{
"epoch": 3.4690831556503197,
"grad_norm": 0.24191801293970758,
"learning_rate": 1.0449424970603796e-05,
"loss": 0.946979820728302,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22759191691875458,
"step": 815,
"valid_targets_mean": 14495.3,
"valid_targets_min": 2024
},
{
"epoch": 3.473347547974414,
"grad_norm": 0.2540091751033316,
"learning_rate": 1.0397239152957356e-05,
"loss": 0.9637709259986877,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2339923232793808,
"step": 816,
"valid_targets_mean": 14073.7,
"valid_targets_min": 1239
},
{
"epoch": 3.4776119402985075,
"grad_norm": 0.24222044744241059,
"learning_rate": 1.034513816454858e-05,
"loss": 1.0322394371032715,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2537650167942047,
"step": 817,
"valid_targets_mean": 14066.1,
"valid_targets_min": 1944
},
{
"epoch": 3.481876332622601,
"grad_norm": 0.2611709250817118,
"learning_rate": 1.0293122465629186e-05,
"loss": 0.9560979604721069,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24220697581768036,
"step": 818,
"valid_targets_mean": 14672.9,
"valid_targets_min": 3004
},
{
"epoch": 3.486140724946695,
"grad_norm": 0.26355777917374235,
"learning_rate": 1.0241192515697432e-05,
"loss": 0.9867568016052246,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23415711522102356,
"step": 819,
"valid_targets_mean": 14388.9,
"valid_targets_min": 2995
},
{
"epoch": 3.490405117270789,
"grad_norm": 0.2371250728550352,
"learning_rate": 1.0189348773494135e-05,
"loss": 1.0202665328979492,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2818511128425598,
"step": 820,
"valid_targets_mean": 14920.1,
"valid_targets_min": 4167
},
{
"epoch": 3.4946695095948828,
"grad_norm": 0.259705177489916,
"learning_rate": 1.0137591696998514e-05,
"loss": 0.9820563197135925,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24459995329380035,
"step": 821,
"valid_targets_mean": 14204.5,
"valid_targets_min": 1495
},
{
"epoch": 3.4989339019189765,
"grad_norm": 0.265988138101001,
"learning_rate": 1.0085921743424225e-05,
"loss": 0.9641842842102051,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2623577117919922,
"step": 822,
"valid_targets_mean": 14899.5,
"valid_targets_min": 4092
},
{
"epoch": 3.50319829424307,
"grad_norm": 0.2388400445828997,
"learning_rate": 1.0034339369215288e-05,
"loss": 1.0143928527832031,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2637423872947693,
"step": 823,
"valid_targets_mean": 15294.1,
"valid_targets_min": 9094
},
{
"epoch": 3.5074626865671643,
"grad_norm": 0.2803895403000936,
"learning_rate": 9.982845030042068e-06,
"loss": 0.9518420696258545,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2300497591495514,
"step": 824,
"valid_targets_mean": 14787.1,
"valid_targets_min": 3972
},
{
"epoch": 3.511727078891258,
"grad_norm": 0.24699293670834366,
"learning_rate": 9.931439180797237e-06,
"loss": 1.0005815029144287,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2434898316860199,
"step": 825,
"valid_targets_mean": 15400.7,
"valid_targets_min": 8805
},
{
"epoch": 3.5159914712153517,
"grad_norm": 0.26869982611870297,
"learning_rate": 9.880122275591752e-06,
"loss": 0.9343856573104858,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25968652963638306,
"step": 826,
"valid_targets_mean": 14202.7,
"valid_targets_min": 3132
},
{
"epoch": 3.520255863539446,
"grad_norm": 0.29846154633481226,
"learning_rate": 9.828894767750865e-06,
"loss": 1.0360630750656128,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2631513178348541,
"step": 827,
"valid_targets_mean": 15087.4,
"valid_targets_min": 2986
},
{
"epoch": 3.5245202558635396,
"grad_norm": 0.231628468381195,
"learning_rate": 9.777757109810102e-06,
"loss": 0.9682170152664185,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24099406599998474,
"step": 828,
"valid_targets_mean": 14234.5,
"valid_targets_min": 2212
},
{
"epoch": 3.5287846481876333,
"grad_norm": 0.25089908418380913,
"learning_rate": 9.726709753511275e-06,
"loss": 0.9408371448516846,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24183693528175354,
"step": 829,
"valid_targets_mean": 15226.1,
"valid_targets_min": 8810
},
{
"epoch": 3.533049040511727,
"grad_norm": 0.27121917659055517,
"learning_rate": 9.675753149798474e-06,
"loss": 0.9771254658699036,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24990615248680115,
"step": 830,
"valid_targets_mean": 14781.8,
"valid_targets_min": 6994
},
{
"epoch": 3.5373134328358207,
"grad_norm": 0.2502357665660496,
"learning_rate": 9.624887748814118e-06,
"loss": 1.0215736627578735,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23912081122398376,
"step": 831,
"valid_targets_mean": 15214.8,
"valid_targets_min": 10179
},
{
"epoch": 3.541577825159915,
"grad_norm": 0.2600784963343472,
"learning_rate": 9.574113999894909e-06,
"loss": 0.9992471933364868,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22977708280086517,
"step": 832,
"valid_targets_mean": 13955.2,
"valid_targets_min": 2242
},
{
"epoch": 3.5458422174840085,
"grad_norm": 0.2516815603897745,
"learning_rate": 9.523432351567979e-06,
"loss": 0.9578772783279419,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24671056866645813,
"step": 833,
"valid_targets_mean": 15254.7,
"valid_targets_min": 7740
},
{
"epoch": 3.550106609808102,
"grad_norm": 0.24192388857352504,
"learning_rate": 9.472843251546792e-06,
"loss": 1.0055651664733887,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23401792347431183,
"step": 834,
"valid_targets_mean": 13342.5,
"valid_targets_min": 2585
},
{
"epoch": 3.5543710021321964,
"grad_norm": 0.27778145777402363,
"learning_rate": 9.422347146727294e-06,
"loss": 0.9918792843818665,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24214953184127808,
"step": 835,
"valid_targets_mean": 14498.8,
"valid_targets_min": 1281
},
{
"epoch": 3.55863539445629,
"grad_norm": 0.2614800511266557,
"learning_rate": 9.371944483183912e-06,
"loss": 0.961499035358429,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2294447422027588,
"step": 836,
"valid_targets_mean": 14546.1,
"valid_targets_min": 5910
},
{
"epoch": 3.5628997867803838,
"grad_norm": 0.24803139384138795,
"learning_rate": 9.321635706165635e-06,
"loss": 0.9757760763168335,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2558143734931946,
"step": 837,
"valid_targets_mean": 15266.3,
"valid_targets_min": 8894
},
{
"epoch": 3.5671641791044775,
"grad_norm": 0.26421904120538203,
"learning_rate": 9.271421260092075e-06,
"loss": 0.9891831874847412,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24245423078536987,
"step": 838,
"valid_targets_mean": 14179.2,
"valid_targets_min": 7842
},
{
"epoch": 3.571428571428571,
"grad_norm": 0.24120372404398885,
"learning_rate": 9.221301588549519e-06,
"loss": 0.942074179649353,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25345689058303833,
"step": 839,
"valid_targets_mean": 14947.5,
"valid_targets_min": 9230
},
{
"epoch": 3.5756929637526653,
"grad_norm": 0.24432645883901663,
"learning_rate": 9.171277134287057e-06,
"loss": 0.9449573755264282,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22649499773979187,
"step": 840,
"valid_targets_mean": 14274.2,
"valid_targets_min": 1554
},
{
"epoch": 3.579957356076759,
"grad_norm": 0.24438018740047743,
"learning_rate": 9.121348339212634e-06,
"loss": 1.0226428508758545,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25364571809768677,
"step": 841,
"valid_targets_mean": 14984.0,
"valid_targets_min": 2854
},
{
"epoch": 3.5842217484008527,
"grad_norm": 0.23228082229773042,
"learning_rate": 9.07151564438916e-06,
"loss": 1.0022697448730469,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2715730667114258,
"step": 842,
"valid_targets_mean": 14656.7,
"valid_targets_min": 6455
},
{
"epoch": 3.588486140724947,
"grad_norm": 0.22153785708655307,
"learning_rate": 9.021779490030611e-06,
"loss": 0.9454774260520935,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22272752225399017,
"step": 843,
"valid_targets_mean": 14127.5,
"valid_targets_min": 934
},
{
"epoch": 3.5927505330490406,
"grad_norm": 0.23925613158013162,
"learning_rate": 8.972140315498119e-06,
"loss": 0.9721479415893555,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2528744637966156,
"step": 844,
"valid_targets_mean": 15379.6,
"valid_targets_min": 8062
},
{
"epoch": 3.5970149253731343,
"grad_norm": 0.24416093619066498,
"learning_rate": 8.922598559296154e-06,
"loss": 0.9860814809799194,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24769961833953857,
"step": 845,
"valid_targets_mean": 15095.0,
"valid_targets_min": 9098
},
{
"epoch": 3.6012793176972284,
"grad_norm": 0.24450290094743976,
"learning_rate": 8.873154659068582e-06,
"loss": 0.9304975271224976,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2089354395866394,
"step": 846,
"valid_targets_mean": 14593.3,
"valid_targets_min": 4305
},
{
"epoch": 3.605543710021322,
"grad_norm": 0.2359163136679419,
"learning_rate": 8.823809051594816e-06,
"loss": 0.9832009673118591,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23041662573814392,
"step": 847,
"valid_targets_mean": 13953.9,
"valid_targets_min": 1518
},
{
"epoch": 3.609808102345416,
"grad_norm": 0.2245217144041866,
"learning_rate": 8.774562172785988e-06,
"loss": 0.9642736315727234,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2261798083782196,
"step": 848,
"valid_targets_mean": 14167.8,
"valid_targets_min": 2377
},
{
"epoch": 3.6140724946695095,
"grad_norm": 0.26510925350020154,
"learning_rate": 8.725414457681063e-06,
"loss": 1.0208244323730469,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2765214443206787,
"step": 849,
"valid_targets_mean": 15571.1,
"valid_targets_min": 3899
},
{
"epoch": 3.6183368869936032,
"grad_norm": 0.23189319110965834,
"learning_rate": 8.676366340443017e-06,
"loss": 0.9406700134277344,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22266805171966553,
"step": 850,
"valid_targets_mean": 14893.3,
"valid_targets_min": 6060
},
{
"epoch": 3.6226012793176974,
"grad_norm": 0.2535988557752394,
"learning_rate": 8.627418254355e-06,
"loss": 0.9919254183769226,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2515752613544464,
"step": 851,
"valid_targets_mean": 14232.6,
"valid_targets_min": 1176
},
{
"epoch": 3.626865671641791,
"grad_norm": 0.23243500837942946,
"learning_rate": 8.578570631816474e-06,
"loss": 0.9726200103759766,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23803666234016418,
"step": 852,
"valid_targets_mean": 14178.5,
"valid_targets_min": 7984
},
{
"epoch": 3.631130063965885,
"grad_norm": 0.24950532558501276,
"learning_rate": 8.529823904339472e-06,
"loss": 0.9699271321296692,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.251629114151001,
"step": 853,
"valid_targets_mean": 13938.4,
"valid_targets_min": 4176
},
{
"epoch": 3.635394456289979,
"grad_norm": 0.24895272568179946,
"learning_rate": 8.481178502544684e-06,
"loss": 0.9935591220855713,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22088086605072021,
"step": 854,
"valid_targets_mean": 14311.4,
"valid_targets_min": 3387
},
{
"epoch": 3.6396588486140726,
"grad_norm": 0.24976757519865808,
"learning_rate": 8.43263485615774e-06,
"loss": 0.9702616930007935,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2503027319908142,
"step": 855,
"valid_targets_mean": 15099.9,
"valid_targets_min": 4158
},
{
"epoch": 3.6439232409381663,
"grad_norm": 0.2652806510317654,
"learning_rate": 8.384193394005372e-06,
"loss": 0.9816626310348511,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25915277004241943,
"step": 856,
"valid_targets_mean": 14864.0,
"valid_targets_min": 9550
},
{
"epoch": 3.64818763326226,
"grad_norm": 0.24571537156487527,
"learning_rate": 8.33585454401161e-06,
"loss": 0.9883707761764526,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23713959753513336,
"step": 857,
"valid_targets_mean": 14149.2,
"valid_targets_min": 5806
},
{
"epoch": 3.6524520255863537,
"grad_norm": 0.23524174221929256,
"learning_rate": 8.287618733194073e-06,
"loss": 0.9662632346153259,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23696158826351166,
"step": 858,
"valid_targets_mean": 14875.3,
"valid_targets_min": 4261
},
{
"epoch": 3.656716417910448,
"grad_norm": 0.22667413416922827,
"learning_rate": 8.239486387660096e-06,
"loss": 0.9632445573806763,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2446478307247162,
"step": 859,
"valid_targets_mean": 14762.0,
"valid_targets_min": 2243
},
{
"epoch": 3.6609808102345416,
"grad_norm": 0.24089377081144345,
"learning_rate": 8.191457932603052e-06,
"loss": 0.9559063911437988,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2483818233013153,
"step": 860,
"valid_targets_mean": 14555.2,
"valid_targets_min": 2317
},
{
"epoch": 3.6652452025586353,
"grad_norm": 0.2588786545230032,
"learning_rate": 8.143533792298545e-06,
"loss": 0.9960157871246338,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2394169569015503,
"step": 861,
"valid_targets_mean": 13622.2,
"valid_targets_min": 1189
},
{
"epoch": 3.6695095948827294,
"grad_norm": 0.2235205743092298,
"learning_rate": 8.095714390100698e-06,
"loss": 0.9713449478149414,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25745829939842224,
"step": 862,
"valid_targets_mean": 14786.1,
"valid_targets_min": 5323
},
{
"epoch": 3.673773987206823,
"grad_norm": 0.22968864712171463,
"learning_rate": 8.048000148438375e-06,
"loss": 0.9706517457962036,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2478504478931427,
"step": 863,
"valid_targets_mean": 14706.8,
"valid_targets_min": 2050
},
{
"epoch": 3.678038379530917,
"grad_norm": 0.22612857332196562,
"learning_rate": 8.000391488811485e-06,
"loss": 0.9807164669036865,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24754057824611664,
"step": 864,
"valid_targets_mean": 14964.3,
"valid_targets_min": 4977
},
{
"epoch": 3.6823027718550105,
"grad_norm": 0.25539163939478027,
"learning_rate": 7.952888831787215e-06,
"loss": 1.0361860990524292,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24390782415866852,
"step": 865,
"valid_targets_mean": 14916.1,
"valid_targets_min": 5985
},
{
"epoch": 3.6865671641791042,
"grad_norm": 0.2455547337289385,
"learning_rate": 7.905492596996391e-06,
"loss": 0.9568573236465454,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23050349950790405,
"step": 866,
"valid_targets_mean": 14725.6,
"valid_targets_min": 6183
},
{
"epoch": 3.6908315565031984,
"grad_norm": 0.22769656736556496,
"learning_rate": 7.858203203129668e-06,
"loss": 0.9520964026451111,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24754388630390167,
"step": 867,
"valid_targets_mean": 15242.0,
"valid_targets_min": 7757
},
{
"epoch": 3.695095948827292,
"grad_norm": 0.24458247633032892,
"learning_rate": 7.811021067933919e-06,
"loss": 0.9987329840660095,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24426715075969696,
"step": 868,
"valid_targets_mean": 14525.9,
"valid_targets_min": 1895
},
{
"epoch": 3.699360341151386,
"grad_norm": 0.24146868219803297,
"learning_rate": 7.763946608208504e-06,
"loss": 0.9650008678436279,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22406381368637085,
"step": 869,
"valid_targets_mean": 14157.7,
"valid_targets_min": 2294
},
{
"epoch": 3.70362473347548,
"grad_norm": 0.21233317092547138,
"learning_rate": 7.716980239801588e-06,
"loss": 0.9769718050956726,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22000914812088013,
"step": 870,
"valid_targets_mean": 13162.8,
"valid_targets_min": 2704
},
{
"epoch": 3.7078891257995736,
"grad_norm": 0.25981603859930497,
"learning_rate": 7.670122377606495e-06,
"loss": 1.0230576992034912,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25247544050216675,
"step": 871,
"valid_targets_mean": 13542.7,
"valid_targets_min": 1022
},
{
"epoch": 3.7121535181236673,
"grad_norm": 0.24120562129813655,
"learning_rate": 7.623373435557988e-06,
"loss": 0.9463640451431274,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2375239133834839,
"step": 872,
"valid_targets_mean": 15127.0,
"valid_targets_min": 6449
},
{
"epoch": 3.716417910447761,
"grad_norm": 0.2245030678554318,
"learning_rate": 7.5767338266286775e-06,
"loss": 0.9786025285720825,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23157241940498352,
"step": 873,
"valid_targets_mean": 13910.7,
"valid_targets_min": 5037
},
{
"epoch": 3.7206823027718547,
"grad_norm": 0.23288844831269276,
"learning_rate": 7.530203962825331e-06,
"loss": 0.9911025762557983,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21122583746910095,
"step": 874,
"valid_targets_mean": 13119.2,
"valid_targets_min": 2490
},
{
"epoch": 3.724946695095949,
"grad_norm": 0.27222483816263005,
"learning_rate": 7.483784255185249e-06,
"loss": 0.9990006685256958,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24723437428474426,
"step": 875,
"valid_targets_mean": 14785.9,
"valid_targets_min": 3874
},
{
"epoch": 3.7292110874200426,
"grad_norm": 0.2644117293321211,
"learning_rate": 7.437475113772632e-06,
"loss": 1.0078997611999512,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25119057297706604,
"step": 876,
"valid_targets_mean": 15057.7,
"valid_targets_min": 9723
},
{
"epoch": 3.7334754797441363,
"grad_norm": 0.24724171688530638,
"learning_rate": 7.391276947674932e-06,
"loss": 0.9665708541870117,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23952801525592804,
"step": 877,
"valid_targets_mean": 14631.8,
"valid_targets_min": 6130
},
{
"epoch": 3.7377398720682304,
"grad_norm": 0.28647957077023595,
"learning_rate": 7.345190164999307e-06,
"loss": 0.967820405960083,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23053833842277527,
"step": 878,
"valid_targets_mean": 14925.0,
"valid_targets_min": 7657
},
{
"epoch": 3.742004264392324,
"grad_norm": 0.2327400527390114,
"learning_rate": 7.299215172868947e-06,
"loss": 0.9955414533615112,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2453860640525818,
"step": 879,
"valid_targets_mean": 13903.8,
"valid_targets_min": 1726
},
{
"epoch": 3.746268656716418,
"grad_norm": 0.2207936733218975,
"learning_rate": 7.2533523774194865e-06,
"loss": 0.9483203887939453,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26135653257369995,
"step": 880,
"valid_targets_mean": 15312.8,
"valid_targets_min": 11925
},
{
"epoch": 3.750533049040512,
"grad_norm": 0.2300862632957973,
"learning_rate": 7.2076021837954616e-06,
"loss": 0.9806277751922607,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23874348402023315,
"step": 881,
"valid_targets_mean": 14891.8,
"valid_targets_min": 1848
},
{
"epoch": 3.7547974413646057,
"grad_norm": 0.24870143297414857,
"learning_rate": 7.161964996146689e-06,
"loss": 1.046656608581543,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24634422361850739,
"step": 882,
"valid_targets_mean": 14668.8,
"valid_targets_min": 4126
},
{
"epoch": 3.7590618336886994,
"grad_norm": 0.22147895784835617,
"learning_rate": 7.116441217624708e-06,
"loss": 0.9934045076370239,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27002787590026855,
"step": 883,
"valid_targets_mean": 15408.2,
"valid_targets_min": 9820
},
{
"epoch": 3.763326226012793,
"grad_norm": 0.2446394869202921,
"learning_rate": 7.071031250379228e-06,
"loss": 0.996993899345398,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24206006526947021,
"step": 884,
"valid_targets_mean": 14343.3,
"valid_targets_min": 4333
},
{
"epoch": 3.767590618336887,
"grad_norm": 0.24998670653421667,
"learning_rate": 7.0257354955545466e-06,
"loss": 1.007996916770935,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24389483034610748,
"step": 885,
"valid_targets_mean": 15130.5,
"valid_targets_min": 7247
},
{
"epoch": 3.771855010660981,
"grad_norm": 0.2409640520795428,
"learning_rate": 6.980554353286066e-06,
"loss": 0.9907573461532593,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25695574283599854,
"step": 886,
"valid_targets_mean": 14252.0,
"valid_targets_min": 2331
},
{
"epoch": 3.7761194029850746,
"grad_norm": 0.21630757494791464,
"learning_rate": 6.935488222696676e-06,
"loss": 0.9466689229011536,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23345476388931274,
"step": 887,
"valid_targets_mean": 14437.1,
"valid_targets_min": 2454
},
{
"epoch": 3.7803837953091683,
"grad_norm": 0.21812104909145605,
"learning_rate": 6.890537501893302e-06,
"loss": 1.0114989280700684,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26563623547554016,
"step": 888,
"valid_targets_mean": 15158.0,
"valid_targets_min": 5390
},
{
"epoch": 3.7846481876332625,
"grad_norm": 0.22583982632122657,
"learning_rate": 6.845702587963352e-06,
"loss": 0.9820560812950134,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22860771417617798,
"step": 889,
"valid_targets_mean": 14585.2,
"valid_targets_min": 4294
},
{
"epoch": 3.788912579957356,
"grad_norm": 0.22781777077904905,
"learning_rate": 6.800983876971192e-06,
"loss": 1.0018254518508911,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25380298495292664,
"step": 890,
"valid_targets_mean": 15159.2,
"valid_targets_min": 4537
},
{
"epoch": 3.79317697228145,
"grad_norm": 0.245946021804642,
"learning_rate": 6.756381763954718e-06,
"loss": 0.9879237413406372,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2271505445241928,
"step": 891,
"valid_targets_mean": 14488.2,
"valid_targets_min": 4167
},
{
"epoch": 3.7974413646055436,
"grad_norm": 0.21935290451621,
"learning_rate": 6.7118966429217645e-06,
"loss": 0.9949040412902832,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2252422273159027,
"step": 892,
"valid_targets_mean": 13439.8,
"valid_targets_min": 2234
},
{
"epoch": 3.8017057569296373,
"grad_norm": 0.2326597771006886,
"learning_rate": 6.667528906846714e-06,
"loss": 0.9881210923194885,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2523738741874695,
"step": 893,
"valid_targets_mean": 15426.2,
"valid_targets_min": 7311
},
{
"epoch": 3.8059701492537314,
"grad_norm": 0.24603226496570071,
"learning_rate": 6.623278947666974e-06,
"loss": 1.0030843019485474,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25523436069488525,
"step": 894,
"valid_targets_mean": 14742.0,
"valid_targets_min": 2248
},
{
"epoch": 3.810234541577825,
"grad_norm": 0.266631934837499,
"learning_rate": 6.579147156279538e-06,
"loss": 0.9971247315406799,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2442849725484848,
"step": 895,
"valid_targets_mean": 14518.3,
"valid_targets_min": 5077
},
{
"epoch": 3.814498933901919,
"grad_norm": 0.22661067454202902,
"learning_rate": 6.535133922537513e-06,
"loss": 1.0048599243164062,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25347477197647095,
"step": 896,
"valid_targets_mean": 15031.2,
"valid_targets_min": 3446
},
{
"epoch": 3.818763326226013,
"grad_norm": 0.24279285266836087,
"learning_rate": 6.491239635246709e-06,
"loss": 0.9471845626831055,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25361740589141846,
"step": 897,
"valid_targets_mean": 14355.9,
"valid_targets_min": 3785
},
{
"epoch": 3.8230277185501067,
"grad_norm": 0.22447031258250963,
"learning_rate": 6.447464682162143e-06,
"loss": 0.9708333015441895,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2511172890663147,
"step": 898,
"valid_targets_mean": 14254.9,
"valid_targets_min": 5724
},
{
"epoch": 3.8272921108742004,
"grad_norm": 0.2180126504875139,
"learning_rate": 6.403809449984704e-06,
"loss": 0.9517656564712524,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22828862071037292,
"step": 899,
"valid_targets_mean": 13606.8,
"valid_targets_min": 2633
},
{
"epoch": 3.831556503198294,
"grad_norm": 0.22768302089677078,
"learning_rate": 6.3602743243576405e-06,
"loss": 1.0056332349777222,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24649082124233246,
"step": 900,
"valid_targets_mean": 14836.6,
"valid_targets_min": 2264
},
{
"epoch": 3.835820895522388,
"grad_norm": 0.23360775368290362,
"learning_rate": 6.316859689863222e-06,
"loss": 0.979081928730011,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2140704244375229,
"step": 901,
"valid_targets_mean": 14308.3,
"valid_targets_min": 5342
},
{
"epoch": 3.840085287846482,
"grad_norm": 0.22409249934660494,
"learning_rate": 6.273565930019316e-06,
"loss": 0.9827362298965454,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22869783639907837,
"step": 902,
"valid_targets_mean": 13936.7,
"valid_targets_min": 1649
},
{
"epoch": 3.8443496801705757,
"grad_norm": 0.21947597286366416,
"learning_rate": 6.230393427276e-06,
"loss": 0.9563350677490234,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23218029737472534,
"step": 903,
"valid_targets_mean": 15035.5,
"valid_targets_min": 6891
},
{
"epoch": 3.8486140724946694,
"grad_norm": 0.2294472946303094,
"learning_rate": 6.187342563012198e-06,
"loss": 0.9648277759552002,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22801150381565094,
"step": 904,
"valid_targets_mean": 15162.3,
"valid_targets_min": 7415
},
{
"epoch": 3.8528784648187635,
"grad_norm": 0.23831859235363342,
"learning_rate": 6.144413717532269e-06,
"loss": 0.9997645020484924,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24504047632217407,
"step": 905,
"valid_targets_mean": 14434.5,
"valid_targets_min": 4254
},
{
"epoch": 3.857142857142857,
"grad_norm": 0.21316337592556336,
"learning_rate": 6.1016072700627106e-06,
"loss": 1.013451099395752,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2638471722602844,
"step": 906,
"valid_targets_mean": 15279.8,
"valid_targets_min": 9661
},
{
"epoch": 3.861407249466951,
"grad_norm": 0.21560295735447713,
"learning_rate": 6.058923598748756e-06,
"loss": 0.9879148006439209,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25007355213165283,
"step": 907,
"valid_targets_mean": 14704.1,
"valid_targets_min": 4873
},
{
"epoch": 3.8656716417910446,
"grad_norm": 0.22081513779623307,
"learning_rate": 6.016363080651066e-06,
"loss": 0.972460925579071,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26785242557525635,
"step": 908,
"valid_targets_mean": 15310.9,
"valid_targets_min": 1417
},
{
"epoch": 3.8699360341151388,
"grad_norm": 0.215554369596329,
"learning_rate": 5.973926091742386e-06,
"loss": 0.9702866077423096,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2544465959072113,
"step": 909,
"valid_targets_mean": 14615.7,
"valid_targets_min": 2662
},
{
"epoch": 3.8742004264392325,
"grad_norm": 0.2270168812979479,
"learning_rate": 5.931613006904196e-06,
"loss": 1.041187047958374,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2589257061481476,
"step": 910,
"valid_targets_mean": 14657.2,
"valid_targets_min": 2024
},
{
"epoch": 3.878464818763326,
"grad_norm": 0.22359388614658796,
"learning_rate": 5.889424199923473e-06,
"loss": 1.0261526107788086,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2758401036262512,
"step": 911,
"valid_targets_mean": 14530.1,
"valid_targets_min": 2574
},
{
"epoch": 3.88272921108742,
"grad_norm": 0.22384565420989688,
"learning_rate": 5.847360043489318e-06,
"loss": 0.9904541969299316,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23720858991146088,
"step": 912,
"valid_targets_mean": 13643.0,
"valid_targets_min": 4384
},
{
"epoch": 3.886993603411514,
"grad_norm": 0.23154427689601847,
"learning_rate": 5.805420909189683e-06,
"loss": 0.966805636882782,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23835605382919312,
"step": 913,
"valid_targets_mean": 13716.4,
"valid_targets_min": 1999
},
{
"epoch": 3.8912579957356077,
"grad_norm": 0.2143041362190162,
"learning_rate": 5.7636071675081076e-06,
"loss": 0.9406954646110535,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22822436690330505,
"step": 914,
"valid_targets_mean": 14781.8,
"valid_targets_min": 4557
},
{
"epoch": 3.8955223880597014,
"grad_norm": 0.2239489404420758,
"learning_rate": 5.721919187820431e-06,
"loss": 0.966067910194397,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23978188633918762,
"step": 915,
"valid_targets_mean": 14888.3,
"valid_targets_min": 1662
},
{
"epoch": 3.8997867803837956,
"grad_norm": 0.2246564522473887,
"learning_rate": 5.6803573383915265e-06,
"loss": 0.9739153981208801,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23945488035678864,
"step": 916,
"valid_targets_mean": 14734.2,
"valid_targets_min": 6220
},
{
"epoch": 3.9040511727078893,
"grad_norm": 0.23128830802806832,
"learning_rate": 5.638921986372064e-06,
"loss": 0.9694392085075378,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2503975033760071,
"step": 917,
"valid_targets_mean": 14521.5,
"valid_targets_min": 4590
},
{
"epoch": 3.908315565031983,
"grad_norm": 0.22103661486834023,
"learning_rate": 5.5976134977952315e-06,
"loss": 0.9809643030166626,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2672732472419739,
"step": 918,
"valid_targets_mean": 14542.9,
"valid_targets_min": 4690
},
{
"epoch": 3.9125799573560767,
"grad_norm": 0.21739956435689023,
"learning_rate": 5.556432237573564e-06,
"loss": 0.9921892285346985,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24295605719089508,
"step": 919,
"valid_targets_mean": 14229.2,
"valid_targets_min": 1722
},
{
"epoch": 3.9168443496801704,
"grad_norm": 0.22604645101729312,
"learning_rate": 5.5153785694956416e-06,
"loss": 0.956708550453186,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24477112293243408,
"step": 920,
"valid_targets_mean": 14657.9,
"valid_targets_min": 5751
},
{
"epoch": 3.9211087420042645,
"grad_norm": 0.2404884445232945,
"learning_rate": 5.474452856222942e-06,
"loss": 1.0439990758895874,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25359728932380676,
"step": 921,
"valid_targets_mean": 14060.7,
"valid_targets_min": 2813
},
{
"epoch": 3.925373134328358,
"grad_norm": 0.22032750526822323,
"learning_rate": 5.433655459286611e-06,
"loss": 0.9538367986679077,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23562100529670715,
"step": 922,
"valid_targets_mean": 14302.5,
"valid_targets_min": 4541
},
{
"epoch": 3.929637526652452,
"grad_norm": 0.20552263463345719,
"learning_rate": 5.392986739084238e-06,
"loss": 1.000390887260437,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24285882711410522,
"step": 923,
"valid_targets_mean": 14002.6,
"valid_targets_min": 3538
},
{
"epoch": 3.933901918976546,
"grad_norm": 0.22429960515785585,
"learning_rate": 5.352447054876755e-06,
"loss": 0.9766531586647034,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22854523360729218,
"step": 924,
"valid_targets_mean": 13950.4,
"valid_targets_min": 1861
},
{
"epoch": 3.9381663113006398,
"grad_norm": 0.2415085567962944,
"learning_rate": 5.31203676478516e-06,
"loss": 1.0060484409332275,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25534623861312866,
"step": 925,
"valid_targets_mean": 15236.5,
"valid_targets_min": 1413
},
{
"epoch": 3.9424307036247335,
"grad_norm": 0.22915738361343682,
"learning_rate": 5.271756225787434e-06,
"loss": 0.9796045422554016,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25547105073928833,
"step": 926,
"valid_targets_mean": 15215.2,
"valid_targets_min": 6385
},
{
"epoch": 3.946695095948827,
"grad_norm": 0.25163106173324307,
"learning_rate": 5.231605793715348e-06,
"loss": 0.9589823484420776,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22687679529190063,
"step": 927,
"valid_targets_mean": 14792.0,
"valid_targets_min": 6755
},
{
"epoch": 3.950959488272921,
"grad_norm": 0.24040509681493633,
"learning_rate": 5.191585823251335e-06,
"loss": 0.9927637577056885,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22231948375701904,
"step": 928,
"valid_targets_mean": 14103.3,
"valid_targets_min": 1337
},
{
"epoch": 3.955223880597015,
"grad_norm": 0.22924082425507014,
"learning_rate": 5.151696667925348e-06,
"loss": 0.9865278005599976,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25896745920181274,
"step": 929,
"valid_targets_mean": 14469.6,
"valid_targets_min": 2411
},
{
"epoch": 3.9594882729211087,
"grad_norm": 0.19879146511999657,
"learning_rate": 5.111938680111732e-06,
"loss": 0.9674332737922668,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2582390010356903,
"step": 930,
"valid_targets_mean": 14435.3,
"valid_targets_min": 2399
},
{
"epoch": 3.9637526652452024,
"grad_norm": 0.22126743586442613,
"learning_rate": 5.072312211026125e-06,
"loss": 1.012916922569275,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2825927734375,
"step": 931,
"valid_targets_mean": 14951.0,
"valid_targets_min": 1843
},
{
"epoch": 3.9680170575692966,
"grad_norm": 0.21867166536576235,
"learning_rate": 5.032817610722369e-06,
"loss": 0.9658184051513672,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23165291547775269,
"step": 932,
"valid_targets_mean": 14676.0,
"valid_targets_min": 5449
},
{
"epoch": 3.9722814498933903,
"grad_norm": 0.22364207275323592,
"learning_rate": 4.993455228089366e-06,
"loss": 1.0015931129455566,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24776685237884521,
"step": 933,
"valid_targets_mean": 14040.2,
"valid_targets_min": 2181
},
{
"epoch": 3.976545842217484,
"grad_norm": 0.2042925699256337,
"learning_rate": 4.954225410848048e-06,
"loss": 1.0132436752319336,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2558859586715698,
"step": 934,
"valid_targets_mean": 14669.7,
"valid_targets_min": 6432
},
{
"epoch": 3.9808102345415777,
"grad_norm": 0.22223779728157847,
"learning_rate": 4.915128505548284e-06,
"loss": 1.002284288406372,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2467479109764099,
"step": 935,
"valid_targets_mean": 14697.2,
"valid_targets_min": 1573
},
{
"epoch": 3.9850746268656714,
"grad_norm": 0.22124383489771185,
"learning_rate": 4.8761648575658145e-06,
"loss": 0.9923639297485352,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2363635003566742,
"step": 936,
"valid_targets_mean": 14532.8,
"valid_targets_min": 3243
},
{
"epoch": 3.9893390191897655,
"grad_norm": 0.21530875229060228,
"learning_rate": 4.837334811099217e-06,
"loss": 0.9767214059829712,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2520718276500702,
"step": 937,
"valid_targets_mean": 14174.2,
"valid_targets_min": 3072
},
{
"epoch": 3.9936034115138592,
"grad_norm": 0.21172490339949648,
"learning_rate": 4.7986387091668365e-06,
"loss": 0.9671895503997803,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24724167585372925,
"step": 938,
"valid_targets_mean": 14576.5,
"valid_targets_min": 6313
},
{
"epoch": 3.997867803837953,
"grad_norm": 0.22865146944208778,
"learning_rate": 4.760076893603791e-06,
"loss": 0.9730648994445801,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24118828773498535,
"step": 939,
"valid_targets_mean": 14682.5,
"valid_targets_min": 3668
},
{
"epoch": 4.0,
"grad_norm": 0.26952511801055457,
"learning_rate": 4.721649705058926e-06,
"loss": 0.9118285179138184,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4505543112754822,
"step": 940,
"valid_targets_mean": 15165.0,
"valid_targets_min": 6436
},
{
"epoch": 4.004264392324094,
"grad_norm": 0.2598615455809434,
"learning_rate": 4.683357482991819e-06,
"loss": 1.0042850971221924,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2683258354663849,
"step": 941,
"valid_targets_mean": 14863.8,
"valid_targets_min": 2691
},
{
"epoch": 4.008528784648187,
"grad_norm": 0.2496957929931509,
"learning_rate": 4.645200565669776e-06,
"loss": 1.0006030797958374,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2577746510505676,
"step": 942,
"valid_targets_mean": 15028.4,
"valid_targets_min": 3047
},
{
"epoch": 4.0127931769722816,
"grad_norm": 0.21150850214146644,
"learning_rate": 4.607179290164823e-06,
"loss": 1.0080355405807495,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23704233765602112,
"step": 943,
"valid_targets_mean": 14554.3,
"valid_targets_min": 3704
},
{
"epoch": 4.017057569296376,
"grad_norm": 0.2375880728968116,
"learning_rate": 4.569293992350783e-06,
"loss": 0.9192696213722229,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22098088264465332,
"step": 944,
"valid_targets_mean": 14898.5,
"valid_targets_min": 2985
},
{
"epoch": 4.021321961620469,
"grad_norm": 0.2206007314421585,
"learning_rate": 4.531545006900244e-06,
"loss": 0.9563088417053223,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2254941761493683,
"step": 945,
"valid_targets_mean": 14250.9,
"valid_targets_min": 2251
},
{
"epoch": 4.025586353944563,
"grad_norm": 0.23986481760882775,
"learning_rate": 4.493932667281646e-06,
"loss": 0.9235143661499023,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.213372141122818,
"step": 946,
"valid_targets_mean": 15698.6,
"valid_targets_min": 11052
},
{
"epoch": 4.029850746268656,
"grad_norm": 0.2214768351800122,
"learning_rate": 4.456457305756321e-06,
"loss": 0.9056645631790161,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2257278859615326,
"step": 947,
"valid_targets_mean": 14185.8,
"valid_targets_min": 1860
},
{
"epoch": 4.0341151385927505,
"grad_norm": 0.23103840555371336,
"learning_rate": 4.419119253375557e-06,
"loss": 1.0127660036087036,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2503008544445038,
"step": 948,
"valid_targets_mean": 14074.9,
"valid_targets_min": 2264
},
{
"epoch": 4.038379530916845,
"grad_norm": 0.249133136138053,
"learning_rate": 4.381918839977675e-06,
"loss": 1.013496994972229,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24352091550827026,
"step": 949,
"valid_targets_mean": 14197.3,
"valid_targets_min": 4327
},
{
"epoch": 4.042643923240938,
"grad_norm": 0.2466218442362566,
"learning_rate": 4.344856394185122e-06,
"loss": 0.9960319995880127,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23479261994361877,
"step": 950,
"valid_targets_mean": 14938.5,
"valid_targets_min": 5877
},
{
"epoch": 4.046908315565032,
"grad_norm": 0.22403008035660443,
"learning_rate": 4.307932243401538e-06,
"loss": 0.9637834429740906,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22966812551021576,
"step": 951,
"valid_targets_mean": 14600.7,
"valid_targets_min": 1397
},
{
"epoch": 4.051172707889126,
"grad_norm": 0.2214812448472233,
"learning_rate": 4.271146713808927e-06,
"loss": 0.9625729322433472,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22703179717063904,
"step": 952,
"valid_targets_mean": 13891.4,
"valid_targets_min": 6092
},
{
"epoch": 4.0554371002132195,
"grad_norm": 0.23835313549674086,
"learning_rate": 4.234500130364698e-06,
"loss": 0.9672271013259888,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22592604160308838,
"step": 953,
"valid_targets_mean": 13705.8,
"valid_targets_min": 3021
},
{
"epoch": 4.059701492537314,
"grad_norm": 0.2431168490906322,
"learning_rate": 4.197992816798851e-06,
"loss": 0.9610116481781006,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24816054105758667,
"step": 954,
"valid_targets_mean": 15000.9,
"valid_targets_min": 9098
},
{
"epoch": 4.063965884861407,
"grad_norm": 0.21649784955636678,
"learning_rate": 4.161625095611101e-06,
"loss": 0.9844383001327515,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22619038820266724,
"step": 955,
"valid_targets_mean": 13627.3,
"valid_targets_min": 2041
},
{
"epoch": 4.068230277185501,
"grad_norm": 0.21242720170627966,
"learning_rate": 4.125397288068007e-06,
"loss": 0.9343520998954773,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2527115046977997,
"step": 956,
"valid_targets_mean": 15573.6,
"valid_targets_min": 8774
},
{
"epoch": 4.072494669509595,
"grad_norm": 0.23885201853505186,
"learning_rate": 4.089309714200187e-06,
"loss": 0.9805846214294434,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23342227935791016,
"step": 957,
"valid_targets_mean": 14154.8,
"valid_targets_min": 4403
},
{
"epoch": 4.076759061833688,
"grad_norm": 0.2409175417470154,
"learning_rate": 4.0533626927994185e-06,
"loss": 0.9530068635940552,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22673815488815308,
"step": 958,
"valid_targets_mean": 14209.4,
"valid_targets_min": 1429
},
{
"epoch": 4.081023454157783,
"grad_norm": 0.20095032940579705,
"learning_rate": 4.017556541415888e-06,
"loss": 0.9678086042404175,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24230854213237762,
"step": 959,
"valid_targets_mean": 14519.5,
"valid_targets_min": 4361
},
{
"epoch": 4.085287846481877,
"grad_norm": 0.20738540785399912,
"learning_rate": 3.981891576355352e-06,
"loss": 0.9462642073631287,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.256551057100296,
"step": 960,
"valid_targets_mean": 15968.7,
"valid_targets_min": 11170
},
{
"epoch": 4.08955223880597,
"grad_norm": 0.21889372697262374,
"learning_rate": 3.946368112676346e-06,
"loss": 0.9328886270523071,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23367977142333984,
"step": 961,
"valid_targets_mean": 14337.7,
"valid_targets_min": 5469
},
{
"epoch": 4.093816631130064,
"grad_norm": 0.21906660866558375,
"learning_rate": 3.9109864641874166e-06,
"loss": 0.9877804517745972,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2542395293712616,
"step": 962,
"valid_targets_mean": 14935.7,
"valid_targets_min": 8954
},
{
"epoch": 4.098081023454157,
"grad_norm": 0.20273824597016718,
"learning_rate": 3.875746943444316e-06,
"loss": 0.9596878290176392,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2125156819820404,
"step": 963,
"valid_targets_mean": 14245.0,
"valid_targets_min": 1666
},
{
"epoch": 4.1023454157782515,
"grad_norm": 0.21324639176915003,
"learning_rate": 3.840649861747278e-06,
"loss": 0.9820946455001831,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25652480125427246,
"step": 964,
"valid_targets_mean": 14939.0,
"valid_targets_min": 5564
},
{
"epoch": 4.106609808102346,
"grad_norm": 0.2271780304804354,
"learning_rate": 3.8056955291382667e-06,
"loss": 0.9720626473426819,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2259848415851593,
"step": 965,
"valid_targets_mean": 14278.5,
"valid_targets_min": 2855
},
{
"epoch": 4.110874200426439,
"grad_norm": 0.21758482051590056,
"learning_rate": 3.7708842543981928e-06,
"loss": 1.0157244205474854,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22084775567054749,
"step": 966,
"valid_targets_mean": 13830.6,
"valid_targets_min": 3608
},
{
"epoch": 4.115138592750533,
"grad_norm": 0.2178449259177168,
"learning_rate": 3.736216345044237e-06,
"loss": 0.9629996418952942,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2448056936264038,
"step": 967,
"valid_targets_mean": 15243.3,
"valid_targets_min": 9348
},
{
"epoch": 4.119402985074627,
"grad_norm": 0.23068970986223994,
"learning_rate": 3.7016921073271084e-06,
"loss": 0.9766483902931213,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25123000144958496,
"step": 968,
"valid_targets_mean": 14588.3,
"valid_targets_min": 5330
},
{
"epoch": 4.1236673773987205,
"grad_norm": 0.20603833187982268,
"learning_rate": 3.6673118462283453e-06,
"loss": 0.9764162302017212,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2223636358976364,
"step": 969,
"valid_targets_mean": 14617.9,
"valid_targets_min": 7151
},
{
"epoch": 4.127931769722815,
"grad_norm": 0.21537174550511082,
"learning_rate": 3.6330758654576227e-06,
"loss": 0.987112283706665,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2530418634414673,
"step": 970,
"valid_targets_mean": 15085.6,
"valid_targets_min": 5562
},
{
"epoch": 4.132196162046908,
"grad_norm": 0.21251058017992036,
"learning_rate": 3.598984467450055e-06,
"loss": 0.931121826171875,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2508315443992615,
"step": 971,
"valid_targets_mean": 15062.1,
"valid_targets_min": 7551
},
{
"epoch": 4.136460554371002,
"grad_norm": 0.19532450425042266,
"learning_rate": 3.565037953363546e-06,
"loss": 0.9461972713470459,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24638235569000244,
"step": 972,
"valid_targets_mean": 15131.4,
"valid_targets_min": 4479
},
{
"epoch": 4.140724946695096,
"grad_norm": 0.21113282093820956,
"learning_rate": 3.5312366230761154e-06,
"loss": 0.9400416016578674,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24616287648677826,
"step": 973,
"valid_targets_mean": 14582.4,
"valid_targets_min": 2490
},
{
"epoch": 4.144989339019189,
"grad_norm": 0.1978320467183934,
"learning_rate": 3.497580775183258e-06,
"loss": 0.9511521458625793,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21084284782409668,
"step": 974,
"valid_targets_mean": 13831.5,
"valid_targets_min": 3354
},
{
"epoch": 4.149253731343284,
"grad_norm": 0.2010512956277587,
"learning_rate": 3.464070706995295e-06,
"loss": 1.0143787860870361,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2493945062160492,
"step": 975,
"valid_targets_mean": 15031.1,
"valid_targets_min": 2622
},
{
"epoch": 4.153518123667378,
"grad_norm": 0.2171565984670613,
"learning_rate": 3.4307067145347417e-06,
"loss": 0.9798712730407715,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22808438539505005,
"step": 976,
"valid_targets_mean": 13718.6,
"valid_targets_min": 2764
},
{
"epoch": 4.157782515991471,
"grad_norm": 0.2100321812880822,
"learning_rate": 3.397489092533739e-06,
"loss": 0.9285037517547607,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2307242751121521,
"step": 977,
"valid_targets_mean": 15139.5,
"valid_targets_min": 4248
},
{
"epoch": 4.162046908315565,
"grad_norm": 0.19920637644670364,
"learning_rate": 3.364418134431371e-06,
"loss": 0.9553232192993164,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24909673631191254,
"step": 978,
"valid_targets_mean": 14692.8,
"valid_targets_min": 5739
},
{
"epoch": 4.166311300639659,
"grad_norm": 0.2098162057936045,
"learning_rate": 3.331494132371149e-06,
"loss": 1.0109907388687134,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2588352560997009,
"step": 979,
"valid_targets_mean": 14593.1,
"valid_targets_min": 6091
},
{
"epoch": 4.1705756929637525,
"grad_norm": 0.20353019856800333,
"learning_rate": 3.2987173771983816e-06,
"loss": 0.9764183759689331,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2452232539653778,
"step": 980,
"valid_targets_mean": 13589.6,
"valid_targets_min": 1670
},
{
"epoch": 4.174840085287847,
"grad_norm": 0.21252727700791268,
"learning_rate": 3.266088158457634e-06,
"loss": 0.994144856929779,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23629000782966614,
"step": 981,
"valid_targets_mean": 15645.4,
"valid_targets_min": 1730
},
{
"epoch": 4.17910447761194,
"grad_norm": 0.20962826973153406,
"learning_rate": 3.233606764390147e-06,
"loss": 0.9840140342712402,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26156654953956604,
"step": 982,
"valid_targets_mean": 14633.2,
"valid_targets_min": 2488
},
{
"epoch": 4.183368869936034,
"grad_norm": 0.20671678278867006,
"learning_rate": 3.2012734819313127e-06,
"loss": 0.9591784477233887,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2380722612142563,
"step": 983,
"valid_targets_mean": 14881.7,
"valid_targets_min": 2565
},
{
"epoch": 4.187633262260128,
"grad_norm": 0.20511799080332893,
"learning_rate": 3.1690885967081187e-06,
"loss": 0.9503059387207031,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23584482073783875,
"step": 984,
"valid_targets_mean": 14258.9,
"valid_targets_min": 4410
},
{
"epoch": 4.1918976545842215,
"grad_norm": 0.21463917773699412,
"learning_rate": 3.1370523930366393e-06,
"loss": 1.0117886066436768,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27862632274627686,
"step": 985,
"valid_targets_mean": 15395.7,
"valid_targets_min": 7915
},
{
"epoch": 4.196162046908316,
"grad_norm": 0.2093726292598158,
"learning_rate": 3.105165153919525e-06,
"loss": 0.9581783413887024,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24928846955299377,
"step": 986,
"valid_targets_mean": 14460.1,
"valid_targets_min": 7483
},
{
"epoch": 4.20042643923241,
"grad_norm": 0.2222992030682058,
"learning_rate": 3.073427161043492e-06,
"loss": 0.9946070313453674,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24092058837413788,
"step": 987,
"valid_targets_mean": 14355.3,
"valid_targets_min": 1923
},
{
"epoch": 4.204690831556503,
"grad_norm": 0.20625953296432958,
"learning_rate": 3.0418386947768463e-06,
"loss": 0.9738461971282959,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.262671560049057,
"step": 988,
"valid_targets_mean": 15095.2,
"valid_targets_min": 1015
},
{
"epoch": 4.208955223880597,
"grad_norm": 0.19665303464106015,
"learning_rate": 3.01040003416698e-06,
"loss": 0.9926748275756836,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28288888931274414,
"step": 989,
"valid_targets_mean": 15240.2,
"valid_targets_min": 11135
},
{
"epoch": 4.21321961620469,
"grad_norm": 0.25115515156728374,
"learning_rate": 2.97911145693796e-06,
"loss": 0.9960339069366455,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2687056064605713,
"step": 990,
"valid_targets_mean": 14847.2,
"valid_targets_min": 2936
},
{
"epoch": 4.217484008528785,
"grad_norm": 0.2106643095843318,
"learning_rate": 2.947973239488009e-06,
"loss": 0.9805059432983398,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2532857656478882,
"step": 991,
"valid_targets_mean": 14551.3,
"valid_targets_min": 2467
},
{
"epoch": 4.221748400852879,
"grad_norm": 0.21502802400470067,
"learning_rate": 2.91698565688711e-06,
"loss": 0.9603044986724854,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2556474506855011,
"step": 992,
"valid_targets_mean": 14250.3,
"valid_targets_min": 1495
},
{
"epoch": 4.226012793176972,
"grad_norm": 0.20760424832292068,
"learning_rate": 2.886148982874566e-06,
"loss": 0.9346092939376831,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25037434697151184,
"step": 993,
"valid_targets_mean": 14932.4,
"valid_targets_min": 5263
},
{
"epoch": 4.230277185501066,
"grad_norm": 0.1998437758947691,
"learning_rate": 2.8554634898565668e-06,
"loss": 0.9752610325813293,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23575522005558014,
"step": 994,
"valid_targets_mean": 14343.8,
"valid_targets_min": 2517
},
{
"epoch": 4.23454157782516,
"grad_norm": 0.21425012639834637,
"learning_rate": 2.824929448903806e-06,
"loss": 0.9946762323379517,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22817380726337433,
"step": 995,
"valid_targets_mean": 15112.9,
"valid_targets_min": 4219
},
{
"epoch": 4.2388059701492535,
"grad_norm": 0.20532225386032876,
"learning_rate": 2.794547129749059e-06,
"loss": 0.9420288801193237,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2333347499370575,
"step": 996,
"valid_targets_mean": 15241.3,
"valid_targets_min": 4205
},
{
"epoch": 4.243070362473348,
"grad_norm": 0.21482359658075506,
"learning_rate": 2.7643168007848255e-06,
"loss": 0.9856697916984558,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23364725708961487,
"step": 997,
"valid_targets_mean": 13191.8,
"valid_targets_min": 1469
},
{
"epoch": 4.247334754797441,
"grad_norm": 0.19816002866144786,
"learning_rate": 2.734238729060956e-06,
"loss": 0.9388452172279358,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23158328235149384,
"step": 998,
"valid_targets_mean": 14693.8,
"valid_targets_min": 6332
},
{
"epoch": 4.251599147121535,
"grad_norm": 0.19331845274108647,
"learning_rate": 2.7043131802822653e-06,
"loss": 0.9250015020370483,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23296083509922028,
"step": 999,
"valid_targets_mean": 14107.6,
"valid_targets_min": 1448
},
{
"epoch": 4.255863539445629,
"grad_norm": 0.2201391523017833,
"learning_rate": 2.674540418806222e-06,
"loss": 0.9823698401451111,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22890284657478333,
"step": 1000,
"valid_targets_mean": 13355.3,
"valid_targets_min": 1938
},
{
"epoch": 4.2601279317697225,
"grad_norm": 0.2032741949241354,
"learning_rate": 2.6449207076405857e-06,
"loss": 0.9904990792274475,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25959524512290955,
"step": 1001,
"valid_targets_mean": 13930.4,
"valid_targets_min": 3569
},
{
"epoch": 4.264392324093817,
"grad_norm": 0.19940973362259065,
"learning_rate": 2.6154543084411035e-06,
"loss": 0.9995609521865845,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23636648058891296,
"step": 1002,
"valid_targets_mean": 13908.2,
"valid_targets_min": 3123
},
{
"epoch": 4.268656716417911,
"grad_norm": 0.18849042085274592,
"learning_rate": 2.5861414815091834e-06,
"loss": 0.9617021083831787,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23850838840007782,
"step": 1003,
"valid_targets_mean": 14161.7,
"valid_targets_min": 3380
},
{
"epoch": 4.272921108742004,
"grad_norm": 0.19094650959348478,
"learning_rate": 2.5569824857895987e-06,
"loss": 0.9622359275817871,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2301647961139679,
"step": 1004,
"valid_targets_mean": 15462.8,
"valid_targets_min": 8361
},
{
"epoch": 4.277185501066098,
"grad_norm": 0.20207137277949228,
"learning_rate": 2.5279775788682083e-06,
"loss": 0.9702969789505005,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22978103160858154,
"step": 1005,
"valid_targets_mean": 13786.1,
"valid_targets_min": 1132
},
{
"epoch": 4.281449893390192,
"grad_norm": 0.1980151959217989,
"learning_rate": 2.499127016969671e-06,
"loss": 0.997840404510498,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2729083299636841,
"step": 1006,
"valid_targets_mean": 15124.5,
"valid_targets_min": 7044
},
{
"epoch": 4.285714285714286,
"grad_norm": 0.19261362795010986,
"learning_rate": 2.4704310549551934e-06,
"loss": 0.9802528023719788,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24483956396579742,
"step": 1007,
"valid_targets_mean": 14889.1,
"valid_targets_min": 2810
},
{
"epoch": 4.28997867803838,
"grad_norm": 0.19177431216588642,
"learning_rate": 2.441889946320266e-06,
"loss": 0.9607895612716675,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22158975899219513,
"step": 1008,
"valid_targets_mean": 14288.4,
"valid_targets_min": 790
},
{
"epoch": 4.294243070362473,
"grad_norm": 0.20398516323125138,
"learning_rate": 2.4135039431924233e-06,
"loss": 1.0122029781341553,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23977097868919373,
"step": 1009,
"valid_targets_mean": 14440.8,
"valid_targets_min": 2303
},
{
"epoch": 4.298507462686567,
"grad_norm": 0.19793787015176892,
"learning_rate": 2.3852732963290426e-06,
"loss": 0.9613388180732727,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24488984048366547,
"step": 1010,
"valid_targets_mean": 15772.8,
"valid_targets_min": 11202
},
{
"epoch": 4.302771855010661,
"grad_norm": 0.19676291396618317,
"learning_rate": 2.3571982551150853e-06,
"loss": 0.9711207151412964,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2269301414489746,
"step": 1011,
"valid_targets_mean": 14957.4,
"valid_targets_min": 1896
},
{
"epoch": 4.3070362473347545,
"grad_norm": 0.203833072396682,
"learning_rate": 2.329279067560937e-06,
"loss": 0.95209801197052,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23200687766075134,
"step": 1012,
"valid_targets_mean": 15725.0,
"valid_targets_min": 10802
},
{
"epoch": 4.311300639658849,
"grad_norm": 0.1964883187880883,
"learning_rate": 2.301515980300182e-06,
"loss": 1.010817289352417,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24898159503936768,
"step": 1013,
"valid_targets_mean": 14878.5,
"valid_targets_min": 4121
},
{
"epoch": 4.315565031982943,
"grad_norm": 0.1898322181946224,
"learning_rate": 2.2739092385874527e-06,
"loss": 0.9674122333526611,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24440470337867737,
"step": 1014,
"valid_targets_mean": 15091.1,
"valid_targets_min": 9708
},
{
"epoch": 4.319829424307036,
"grad_norm": 0.19301254137228901,
"learning_rate": 2.2464590862962443e-06,
"loss": 1.007246494293213,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25044572353363037,
"step": 1015,
"valid_targets_mean": 15506.5,
"valid_targets_min": 11439
},
{
"epoch": 4.32409381663113,
"grad_norm": 0.20822308446025029,
"learning_rate": 2.219165765916769e-06,
"loss": 0.9655307531356812,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2484723925590515,
"step": 1016,
"valid_targets_mean": 13985.8,
"valid_targets_min": 5122
},
{
"epoch": 4.3283582089552235,
"grad_norm": 0.1934116882562992,
"learning_rate": 2.192029518553798e-06,
"loss": 0.9374470710754395,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24636460840702057,
"step": 1017,
"valid_targets_mean": 14612.4,
"valid_targets_min": 2367
},
{
"epoch": 4.332622601279318,
"grad_norm": 0.1882233968320926,
"learning_rate": 2.165050583924566e-06,
"loss": 0.9549652338027954,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24371199309825897,
"step": 1018,
"valid_targets_mean": 14470.8,
"valid_targets_min": 1843
},
{
"epoch": 4.336886993603412,
"grad_norm": 0.19491440983190494,
"learning_rate": 2.1382292003566163e-06,
"loss": 0.9814637899398804,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24736478924751282,
"step": 1019,
"valid_targets_mean": 14428.2,
"valid_targets_min": 2854
},
{
"epoch": 4.341151385927505,
"grad_norm": 0.19348582439512307,
"learning_rate": 2.1115656047857213e-06,
"loss": 0.9029624462127686,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22049419581890106,
"step": 1020,
"valid_targets_mean": 14133.2,
"valid_targets_min": 6151
},
{
"epoch": 4.345415778251599,
"grad_norm": 0.19689546268055425,
"learning_rate": 2.0850600327537806e-06,
"loss": 0.9489182233810425,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23130947351455688,
"step": 1021,
"valid_targets_mean": 13676.0,
"valid_targets_min": 1755
},
{
"epoch": 4.349680170575693,
"grad_norm": 0.21680025079180115,
"learning_rate": 2.058712718406719e-06,
"loss": 1.0014092922210693,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24298085272312164,
"step": 1022,
"valid_targets_mean": 14311.4,
"valid_targets_min": 3247
},
{
"epoch": 4.353944562899787,
"grad_norm": 0.19883892276598125,
"learning_rate": 2.032523894492471e-06,
"loss": 1.0021915435791016,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2545217275619507,
"step": 1023,
"valid_targets_mean": 15110.4,
"valid_targets_min": 1532
},
{
"epoch": 4.358208955223881,
"grad_norm": 0.19658289092130304,
"learning_rate": 2.0064937923588634e-06,
"loss": 1.0034961700439453,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25181353092193604,
"step": 1024,
"valid_targets_mean": 14426.9,
"valid_targets_min": 5173
},
{
"epoch": 4.362473347547974,
"grad_norm": 0.20280454994094435,
"learning_rate": 1.9806226419516195e-06,
"loss": 0.9881649017333984,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2471427172422409,
"step": 1025,
"valid_targets_mean": 14459.1,
"valid_targets_min": 4839
},
{
"epoch": 4.366737739872068,
"grad_norm": 0.19758701399656536,
"learning_rate": 1.954910671812298e-06,
"loss": 0.9592493772506714,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22283393144607544,
"step": 1026,
"valid_targets_mean": 14732.3,
"valid_targets_min": 1356
},
{
"epoch": 4.371002132196162,
"grad_norm": 0.20655855190403255,
"learning_rate": 1.9293581090762894e-06,
"loss": 0.9723584651947021,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2688632011413574,
"step": 1027,
"valid_targets_mean": 14489.7,
"valid_targets_min": 6514
},
{
"epoch": 4.3752665245202556,
"grad_norm": 0.18928942678209912,
"learning_rate": 1.9039651794708058e-06,
"loss": 0.9399305582046509,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2241872400045395,
"step": 1028,
"valid_targets_mean": 14407.0,
"valid_targets_min": 4268
},
{
"epoch": 4.37953091684435,
"grad_norm": 0.19666747835129786,
"learning_rate": 1.8787321073128817e-06,
"loss": 0.9942531585693359,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27074459195137024,
"step": 1029,
"valid_targets_mean": 15288.0,
"valid_targets_min": 11404
},
{
"epoch": 4.383795309168444,
"grad_norm": 0.2101409121194208,
"learning_rate": 1.8536591155073958e-06,
"loss": 1.0341134071350098,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24674727022647858,
"step": 1030,
"valid_targets_mean": 14246.6,
"valid_targets_min": 2476
},
{
"epoch": 4.388059701492537,
"grad_norm": 0.18631607942909864,
"learning_rate": 1.8287464255451181e-06,
"loss": 0.9730774164199829,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23140370845794678,
"step": 1031,
"valid_targets_mean": 14085.7,
"valid_targets_min": 4573
},
{
"epoch": 4.392324093816631,
"grad_norm": 0.19758703749563059,
"learning_rate": 1.803994257500714e-06,
"loss": 0.9657065272331238,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2400664985179901,
"step": 1032,
"valid_targets_mean": 14661.8,
"valid_targets_min": 2977
},
{
"epoch": 4.396588486140725,
"grad_norm": 0.19551350846462764,
"learning_rate": 1.7794028300308474e-06,
"loss": 0.9861183762550354,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2520785927772522,
"step": 1033,
"valid_targets_mean": 14641.4,
"valid_targets_min": 1570
},
{
"epoch": 4.400852878464819,
"grad_norm": 0.19925587301074782,
"learning_rate": 1.7549723603722003e-06,
"loss": 0.9705492258071899,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24231970310211182,
"step": 1034,
"valid_targets_mean": 15059.8,
"valid_targets_min": 4959
},
{
"epoch": 4.405117270788913,
"grad_norm": 0.19374055288057854,
"learning_rate": 1.730703064339605e-06,
"loss": 0.9851311445236206,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25250244140625,
"step": 1035,
"valid_targets_mean": 15045.9,
"valid_targets_min": 6417
},
{
"epoch": 4.409381663113006,
"grad_norm": 0.19565910507434792,
"learning_rate": 1.7065951563241022e-06,
"loss": 0.9457612037658691,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2272084653377533,
"step": 1036,
"valid_targets_mean": 14376.1,
"valid_targets_min": 2529
},
{
"epoch": 4.4136460554371,
"grad_norm": 0.20118356334045073,
"learning_rate": 1.682648849291051e-06,
"loss": 1.0145244598388672,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23717108368873596,
"step": 1037,
"valid_targets_mean": 14745.7,
"valid_targets_min": 4989
},
{
"epoch": 4.417910447761194,
"grad_norm": 0.21430267101742012,
"learning_rate": 1.6588643547782579e-06,
"loss": 1.0064364671707153,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.246231347322464,
"step": 1038,
"valid_targets_mean": 14692.8,
"valid_targets_min": 6686
},
{
"epoch": 4.422174840085288,
"grad_norm": 0.21878191303956324,
"learning_rate": 1.6352418828941052e-06,
"loss": 0.9757519960403442,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23531365394592285,
"step": 1039,
"valid_targets_mean": 14086.5,
"valid_targets_min": 5272
},
{
"epoch": 4.426439232409382,
"grad_norm": 0.20380700736786128,
"learning_rate": 1.6117816423156952e-06,
"loss": 0.9778440594673157,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25534456968307495,
"step": 1040,
"valid_targets_mean": 14957.0,
"valid_targets_min": 8445
},
{
"epoch": 4.430703624733475,
"grad_norm": 0.19698958768813593,
"learning_rate": 1.5884838402870029e-06,
"loss": 0.9737083911895752,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2419891208410263,
"step": 1041,
"valid_targets_mean": 14819.6,
"valid_targets_min": 1627
},
{
"epoch": 4.434968017057569,
"grad_norm": 0.19579630291077935,
"learning_rate": 1.5653486826170384e-06,
"loss": 0.98288494348526,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24798744916915894,
"step": 1042,
"valid_targets_mean": 14243.4,
"valid_targets_min": 2334
},
{
"epoch": 4.439232409381663,
"grad_norm": 0.1908871461023108,
"learning_rate": 1.5423763736780583e-06,
"loss": 0.9392582774162292,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22092154622077942,
"step": 1043,
"valid_targets_mean": 13959.9,
"valid_targets_min": 2249
},
{
"epoch": 4.443496801705757,
"grad_norm": 0.19569440179212702,
"learning_rate": 1.5195671164037173e-06,
"loss": 0.9626212120056152,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25484955310821533,
"step": 1044,
"valid_targets_mean": 14064.9,
"valid_targets_min": 934
},
{
"epoch": 4.447761194029851,
"grad_norm": 0.2062871958262136,
"learning_rate": 1.496921112287315e-06,
"loss": 0.9724099636077881,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24300584197044373,
"step": 1045,
"valid_targets_mean": 15046.3,
"valid_targets_min": 6694
},
{
"epoch": 4.452025586353945,
"grad_norm": 0.193355511893332,
"learning_rate": 1.4744385613799894e-06,
"loss": 0.9834390878677368,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2343788743019104,
"step": 1046,
"valid_targets_mean": 14326.0,
"valid_targets_min": 6455
},
{
"epoch": 4.456289978678038,
"grad_norm": 0.19879252661733948,
"learning_rate": 1.4521196622889644e-06,
"loss": 0.9382596611976624,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2293034791946411,
"step": 1047,
"valid_targets_mean": 13610.3,
"valid_targets_min": 2201
},
{
"epoch": 4.460554371002132,
"grad_norm": 0.19757281174151833,
"learning_rate": 1.4299646121757892e-06,
"loss": 0.9946603178977966,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2413906753063202,
"step": 1048,
"valid_targets_mean": 15320.3,
"valid_targets_min": 6751
},
{
"epoch": 4.464818763326226,
"grad_norm": 0.18561266334145715,
"learning_rate": 1.4079736067545912e-06,
"loss": 0.9563462734222412,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24462030827999115,
"step": 1049,
"valid_targets_mean": 15228.7,
"valid_targets_min": 6663
},
{
"epoch": 4.46908315565032,
"grad_norm": 0.19151475827533837,
"learning_rate": 1.3861468402903634e-06,
"loss": 0.9560338258743286,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2419666349887848,
"step": 1050,
"valid_targets_mean": 14435.8,
"valid_targets_min": 2343
},
{
"epoch": 4.473347547974414,
"grad_norm": 0.1987630047053824,
"learning_rate": 1.3644845055972322e-06,
"loss": 0.9236841201782227,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21292690932750702,
"step": 1051,
"valid_targets_mean": 14298.3,
"valid_targets_min": 5524
},
{
"epoch": 4.477611940298507,
"grad_norm": 0.19194131819944682,
"learning_rate": 1.3429867940367626e-06,
"loss": 0.9673594832420349,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2729540169239044,
"step": 1052,
"valid_targets_mean": 15544.0,
"valid_targets_min": 8747
},
{
"epoch": 4.481876332622601,
"grad_norm": 0.192812446226727,
"learning_rate": 1.321653895516264e-06,
"loss": 0.9610645771026611,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22623619437217712,
"step": 1053,
"valid_targets_mean": 14384.1,
"valid_targets_min": 2210
},
{
"epoch": 4.486140724946695,
"grad_norm": 0.19921613895103593,
"learning_rate": 1.3004859984871199e-06,
"loss": 0.9949779510498047,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.238198384642601,
"step": 1054,
"valid_targets_mean": 14810.5,
"valid_targets_min": 3268
},
{
"epoch": 4.490405117270789,
"grad_norm": 0.2165221604049567,
"learning_rate": 1.279483289943102e-06,
"loss": 0.948443591594696,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22628512978553772,
"step": 1055,
"valid_targets_mean": 14849.5,
"valid_targets_min": 5249
},
{
"epoch": 4.494669509594883,
"grad_norm": 0.20231454197466772,
"learning_rate": 1.2586459554187558e-06,
"loss": 0.9572373032569885,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22259369492530823,
"step": 1056,
"valid_targets_mean": 13745.0,
"valid_targets_min": 2013
},
{
"epoch": 4.498933901918977,
"grad_norm": 0.18715519061479605,
"learning_rate": 1.2379741789877175e-06,
"loss": 0.9423749446868896,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22759225964546204,
"step": 1057,
"valid_targets_mean": 13998.2,
"valid_targets_min": 1699
},
{
"epoch": 4.50319829424307,
"grad_norm": 0.19620227290225017,
"learning_rate": 1.2174681432611245e-06,
"loss": 0.959303617477417,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2522681951522827,
"step": 1058,
"valid_targets_mean": 15055.7,
"valid_targets_min": 5225
},
{
"epoch": 4.507462686567164,
"grad_norm": 0.1889099106230078,
"learning_rate": 1.1971280293859811e-06,
"loss": 0.9340790510177612,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.261797159910202,
"step": 1059,
"valid_targets_mean": 15610.4,
"valid_targets_min": 8812
},
{
"epoch": 4.5117270788912585,
"grad_norm": 0.18480707566845292,
"learning_rate": 1.17695401704357e-06,
"loss": 0.9508934020996094,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23876410722732544,
"step": 1060,
"valid_targets_mean": 14518.6,
"valid_targets_min": 4628
},
{
"epoch": 4.515991471215352,
"grad_norm": 0.19695395466616883,
"learning_rate": 1.1569462844478552e-06,
"loss": 0.9892035722732544,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2530513107776642,
"step": 1061,
"valid_targets_mean": 14293.0,
"valid_targets_min": 7136
},
{
"epoch": 4.520255863539446,
"grad_norm": 0.1882358661033066,
"learning_rate": 1.1371050083439107e-06,
"loss": 0.9638294577598572,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2503116726875305,
"step": 1062,
"valid_targets_mean": 14417.9,
"valid_targets_min": 3406
},
{
"epoch": 4.524520255863539,
"grad_norm": 0.20777062678587696,
"learning_rate": 1.1174303640063622e-06,
"loss": 0.9722185134887695,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25119921565055847,
"step": 1063,
"valid_targets_mean": 14916.6,
"valid_targets_min": 8177
},
{
"epoch": 4.528784648187633,
"grad_norm": 0.19873176172073242,
"learning_rate": 1.097922525237849e-06,
"loss": 0.991715133190155,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24108682572841644,
"step": 1064,
"valid_targets_mean": 15386.4,
"valid_targets_min": 6365
},
{
"epoch": 4.533049040511727,
"grad_norm": 0.19890055604921067,
"learning_rate": 1.078581664367455e-06,
"loss": 0.9921541213989258,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.263067364692688,
"step": 1065,
"valid_targets_mean": 14550.4,
"valid_targets_min": 4874
},
{
"epoch": 4.537313432835821,
"grad_norm": 0.19636402581022136,
"learning_rate": 1.0594079522492274e-06,
"loss": 0.9642506837844849,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24342098832130432,
"step": 1066,
"valid_targets_mean": 14769.9,
"valid_targets_min": 7369
},
{
"epoch": 4.541577825159915,
"grad_norm": 0.18699904506849158,
"learning_rate": 1.040401558260633e-06,
"loss": 0.9328286647796631,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22978878021240234,
"step": 1067,
"valid_targets_mean": 15161.9,
"valid_targets_min": 4365
},
{
"epoch": 4.545842217484008,
"grad_norm": 0.1986441092033793,
"learning_rate": 1.0215626503010911e-06,
"loss": 0.9636072516441345,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24951043725013733,
"step": 1068,
"valid_targets_mean": 15064.1,
"valid_targets_min": 5965
},
{
"epoch": 4.550106609808102,
"grad_norm": 0.1923777508292909,
"learning_rate": 1.002891394790475e-06,
"loss": 0.9635332822799683,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2602200210094452,
"step": 1069,
"valid_targets_mean": 15075.1,
"valid_targets_min": 10306
},
{
"epoch": 4.554371002132196,
"grad_norm": 0.19845653439920236,
"learning_rate": 9.843879566676273e-07,
"loss": 0.9370394945144653,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21623185276985168,
"step": 1070,
"valid_targets_mean": 14604.8,
"valid_targets_min": 7174
},
{
"epoch": 4.55863539445629,
"grad_norm": 0.1889495311593519,
"learning_rate": 9.660524993889386e-07,
"loss": 0.9778931736946106,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24932819604873657,
"step": 1071,
"valid_targets_mean": 14791.7,
"valid_targets_min": 2852
},
{
"epoch": 4.562899786780384,
"grad_norm": 0.1998996367095556,
"learning_rate": 9.478851849268733e-07,
"loss": 1.0181862115859985,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24299973249435425,
"step": 1072,
"valid_targets_mean": 15323.3,
"valid_targets_min": 4432
},
{
"epoch": 4.567164179104478,
"grad_norm": 0.1820641069793803,
"learning_rate": 9.298861737685527e-07,
"loss": 0.9861880540847778,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25722503662109375,
"step": 1073,
"valid_targets_mean": 15189.2,
"valid_targets_min": 4444
},
{
"epoch": 4.571428571428571,
"grad_norm": 0.18153700558157096,
"learning_rate": 9.120556249143341e-07,
"loss": 0.9695085287094116,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25585031509399414,
"step": 1074,
"valid_targets_mean": 15192.9,
"valid_targets_min": 6463
},
{
"epoch": 4.575692963752665,
"grad_norm": 0.19006038413199366,
"learning_rate": 8.943936958763988e-07,
"loss": 0.9866265058517456,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.243064284324646,
"step": 1075,
"valid_targets_mean": 14832.0,
"valid_targets_min": 6920
},
{
"epoch": 4.5799573560767595,
"grad_norm": 0.19175574211225055,
"learning_rate": 8.769005426773836e-07,
"loss": 0.963053822517395,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25238820910453796,
"step": 1076,
"valid_targets_mean": 14330.1,
"valid_targets_min": 3132
},
{
"epoch": 4.584221748400853,
"grad_norm": 0.19962468737797834,
"learning_rate": 8.595763198489714e-07,
"loss": 0.9740028977394104,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2691187858581543,
"step": 1077,
"valid_targets_mean": 15214.1,
"valid_targets_min": 2728
},
{
"epoch": 4.588486140724947,
"grad_norm": 0.19543672587397726,
"learning_rate": 8.42421180430546e-07,
"loss": 0.9532477855682373,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24406878650188446,
"step": 1078,
"valid_targets_mean": 13688.9,
"valid_targets_min": 3217
},
{
"epoch": 4.59275053304904,
"grad_norm": 0.19673814011906443,
"learning_rate": 8.254352759678386e-07,
"loss": 0.9715753793716431,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24512049555778503,
"step": 1079,
"valid_targets_mean": 14655.3,
"valid_targets_min": 1422
},
{
"epoch": 4.597014925373134,
"grad_norm": 0.1933483442932517,
"learning_rate": 8.086187565115877e-07,
"loss": 0.9847027063369751,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25676167011260986,
"step": 1080,
"valid_targets_mean": 14677.1,
"valid_targets_min": 6142
},
{
"epoch": 4.601279317697228,
"grad_norm": 0.1923172399437775,
"learning_rate": 7.919717706162067e-07,
"loss": 0.9372127056121826,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22452899813652039,
"step": 1081,
"valid_targets_mean": 15138.4,
"valid_targets_min": 4384
},
{
"epoch": 4.605543710021322,
"grad_norm": 0.18797774715751278,
"learning_rate": 7.754944653384777e-07,
"loss": 0.9522218704223633,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23005110025405884,
"step": 1082,
"valid_targets_mean": 14424.0,
"valid_targets_min": 3680
},
{
"epoch": 4.609808102345416,
"grad_norm": 0.20412765681703315,
"learning_rate": 7.591869862362534e-07,
"loss": 0.9851837158203125,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2484971135854721,
"step": 1083,
"valid_targets_mean": 13968.2,
"valid_targets_min": 2376
},
{
"epoch": 4.61407249466951,
"grad_norm": 0.18705295806126554,
"learning_rate": 7.430494773671682e-07,
"loss": 0.9888862371444702,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22050131857395172,
"step": 1084,
"valid_targets_mean": 13599.2,
"valid_targets_min": 1005
},
{
"epoch": 4.618336886993603,
"grad_norm": 0.18103862779139593,
"learning_rate": 7.270820812873714e-07,
"loss": 0.9169750809669495,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22809740900993347,
"step": 1085,
"valid_targets_mean": 14565.4,
"valid_targets_min": 3230
},
{
"epoch": 4.622601279317697,
"grad_norm": 0.20129642792242997,
"learning_rate": 7.112849390502563e-07,
"loss": 0.9655887484550476,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23568880558013916,
"step": 1086,
"valid_targets_mean": 14391.3,
"valid_targets_min": 5144
},
{
"epoch": 4.6268656716417915,
"grad_norm": 0.19291335895387385,
"learning_rate": 6.956581902052306e-07,
"loss": 0.9082891941070557,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25022265315055847,
"step": 1087,
"valid_targets_mean": 14731.2,
"valid_targets_min": 4783
},
{
"epoch": 4.631130063965885,
"grad_norm": 0.18812618459759783,
"learning_rate": 6.802019727964593e-07,
"loss": 0.9885333180427551,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24837151169776917,
"step": 1088,
"valid_targets_mean": 15128.8,
"valid_targets_min": 7616
},
{
"epoch": 4.635394456289979,
"grad_norm": 0.18344580845464492,
"learning_rate": 6.64916423361679e-07,
"loss": 0.9694564342498779,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2605583667755127,
"step": 1089,
"valid_targets_mean": 15145.5,
"valid_targets_min": 2543
},
{
"epoch": 4.639658848614072,
"grad_norm": 0.19009505707846464,
"learning_rate": 6.498016769309567e-07,
"loss": 0.9988362193107605,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25263822078704834,
"step": 1090,
"valid_targets_mean": 14835.9,
"valid_targets_min": 6481
},
{
"epoch": 4.643923240938166,
"grad_norm": 0.18706541614986036,
"learning_rate": 6.348578670255224e-07,
"loss": 0.9596368074417114,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2152118682861328,
"step": 1091,
"valid_targets_mean": 14126.3,
"valid_targets_min": 2377
},
{
"epoch": 4.6481876332622605,
"grad_norm": 0.18810915341347045,
"learning_rate": 6.200851256565799e-07,
"loss": 0.9877229928970337,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2393847107887268,
"step": 1092,
"valid_targets_mean": 15427.7,
"valid_targets_min": 5751
},
{
"epoch": 4.652452025586354,
"grad_norm": 0.19928457189939688,
"learning_rate": 6.054835833241357e-07,
"loss": 1.0273993015289307,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2642330229282379,
"step": 1093,
"valid_targets_mean": 15013.9,
"valid_targets_min": 4037
},
{
"epoch": 4.656716417910448,
"grad_norm": 0.1889990280202245,
"learning_rate": 5.910533690158593e-07,
"loss": 0.940179705619812,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23338699340820312,
"step": 1094,
"valid_targets_mean": 14892.3,
"valid_targets_min": 3271
},
{
"epoch": 4.660980810234541,
"grad_norm": 0.18391492586969269,
"learning_rate": 5.767946102059307e-07,
"loss": 0.9359939694404602,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2344275861978531,
"step": 1095,
"valid_targets_mean": 14785.7,
"valid_targets_min": 8805
},
{
"epoch": 4.665245202558635,
"grad_norm": 0.19734063165504975,
"learning_rate": 5.627074328539173e-07,
"loss": 0.9584230780601501,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23185734450817108,
"step": 1096,
"valid_targets_mean": 14196.5,
"valid_targets_min": 2432
},
{
"epoch": 4.669509594882729,
"grad_norm": 0.18300224884424285,
"learning_rate": 5.487919614036741e-07,
"loss": 0.9888830184936523,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24649932980537415,
"step": 1097,
"valid_targets_mean": 14630.5,
"valid_targets_min": 927
},
{
"epoch": 4.673773987206823,
"grad_norm": 0.18499557400386807,
"learning_rate": 5.350483187822231e-07,
"loss": 0.9847787618637085,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22201907634735107,
"step": 1098,
"valid_targets_mean": 14386.3,
"valid_targets_min": 2252
},
{
"epoch": 4.678038379530917,
"grad_norm": 0.19238368327234123,
"learning_rate": 5.214766263986848e-07,
"loss": 0.9672181606292725,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23221909999847412,
"step": 1099,
"valid_targets_mean": 14210.4,
"valid_targets_min": 4534
},
{
"epoch": 4.682302771855011,
"grad_norm": 0.21105431763048013,
"learning_rate": 5.080770041431926e-07,
"loss": 0.9740506410598755,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2598978281021118,
"step": 1100,
"valid_targets_mean": 14957.4,
"valid_targets_min": 7315
},
{
"epoch": 4.686567164179104,
"grad_norm": 0.19163944687988396,
"learning_rate": 4.948495703858492e-07,
"loss": 1.0037099123001099,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24747025966644287,
"step": 1101,
"valid_targets_mean": 15043.2,
"valid_targets_min": 7809
},
{
"epoch": 4.690831556503198,
"grad_norm": 0.2028138879391803,
"learning_rate": 4.81794441975667e-07,
"loss": 0.9758346080780029,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24678604304790497,
"step": 1102,
"valid_targets_mean": 14021.8,
"valid_targets_min": 4164
},
{
"epoch": 4.6950959488272925,
"grad_norm": 0.18419384239308548,
"learning_rate": 4.689117342395388e-07,
"loss": 0.9496323466300964,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24128226935863495,
"step": 1103,
"valid_targets_mean": 14995.4,
"valid_targets_min": 7534
},
{
"epoch": 4.699360341151386,
"grad_norm": 0.21041750416967223,
"learning_rate": 4.5620156098122204e-07,
"loss": 0.9582512974739075,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24600455164909363,
"step": 1104,
"valid_targets_mean": 15277.9,
"valid_targets_min": 5342
},
{
"epoch": 4.70362473347548,
"grad_norm": 0.18497727482724743,
"learning_rate": 4.4366403448033334e-07,
"loss": 0.9745293855667114,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.241338849067688,
"step": 1105,
"valid_targets_mean": 14005.7,
"valid_targets_min": 3799
},
{
"epoch": 4.707889125799573,
"grad_norm": 0.19615234574871981,
"learning_rate": 4.3129926549136057e-07,
"loss": 0.9649718999862671,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21115264296531677,
"step": 1106,
"valid_targets_mean": 13501.6,
"valid_targets_min": 1542
},
{
"epoch": 4.712153518123667,
"grad_norm": 0.1837567434240439,
"learning_rate": 4.191073632426701e-07,
"loss": 0.9577633738517761,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2420632541179657,
"step": 1107,
"valid_targets_mean": 14789.1,
"valid_targets_min": 2956
},
{
"epoch": 4.7164179104477615,
"grad_norm": 0.20259924674259924,
"learning_rate": 4.0708843543555643e-07,
"loss": 0.9597268104553223,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25918832421302795,
"step": 1108,
"valid_targets_mean": 14971.0,
"valid_targets_min": 7082
},
{
"epoch": 4.720682302771855,
"grad_norm": 0.1836845346354727,
"learning_rate": 3.95242588243292e-07,
"loss": 0.9578450322151184,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24787406623363495,
"step": 1109,
"valid_targets_mean": 14803.8,
"valid_targets_min": 5534
},
{
"epoch": 4.724946695095949,
"grad_norm": 0.1899517621683741,
"learning_rate": 3.8356992631017e-07,
"loss": 0.9749801158905029,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2569088637828827,
"step": 1110,
"valid_targets_mean": 14341.0,
"valid_targets_min": 4140
},
{
"epoch": 4.729211087420042,
"grad_norm": 0.18434897929537367,
"learning_rate": 3.720705527506008e-07,
"loss": 0.9631055593490601,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22997114062309265,
"step": 1111,
"valid_targets_mean": 14496.9,
"valid_targets_min": 2722
},
{
"epoch": 4.733475479744136,
"grad_norm": 0.18472017417978503,
"learning_rate": 3.60744569148197e-07,
"loss": 0.9523324966430664,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25216734409332275,
"step": 1112,
"valid_targets_mean": 15027.6,
"valid_targets_min": 8215
},
{
"epoch": 4.73773987206823,
"grad_norm": 0.18397508985131536,
"learning_rate": 3.4959207555485873e-07,
"loss": 0.9814821481704712,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24376820027828217,
"step": 1113,
"valid_targets_mean": 14313.0,
"valid_targets_min": 1735
},
{
"epoch": 4.742004264392325,
"grad_norm": 0.1911943136789838,
"learning_rate": 3.3861317048992317e-07,
"loss": 0.9813393950462341,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24430274963378906,
"step": 1114,
"valid_targets_mean": 14462.2,
"valid_targets_min": 4500
},
{
"epoch": 4.746268656716418,
"grad_norm": 0.1873194680136805,
"learning_rate": 3.278079509392562e-07,
"loss": 0.9734345078468323,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2578297555446625,
"step": 1115,
"valid_targets_mean": 15003.0,
"valid_targets_min": 1331
},
{
"epoch": 4.750533049040512,
"grad_norm": 0.1908834484987274,
"learning_rate": 3.171765123544224e-07,
"loss": 0.9803054332733154,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26190507411956787,
"step": 1116,
"valid_targets_mean": 14364.8,
"valid_targets_min": 1532
},
{
"epoch": 4.754797441364605,
"grad_norm": 0.19667911628591056,
"learning_rate": 3.06718948651834e-07,
"loss": 0.9418925046920776,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24983903765678406,
"step": 1117,
"valid_targets_mean": 15741.3,
"valid_targets_min": 9257
},
{
"epoch": 4.759061833688699,
"grad_norm": 0.1857840585589905,
"learning_rate": 2.964353522119168e-07,
"loss": 0.9536681771278381,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23696255683898926,
"step": 1118,
"valid_targets_mean": 14265.1,
"valid_targets_min": 3473
},
{
"epoch": 4.7633262260127935,
"grad_norm": 0.18211523776490568,
"learning_rate": 2.863258138783032e-07,
"loss": 0.9456994533538818,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2278173565864563,
"step": 1119,
"valid_targets_mean": 15495.7,
"valid_targets_min": 7691
},
{
"epoch": 4.767590618336887,
"grad_norm": 0.18772012264434046,
"learning_rate": 2.7639042295702245e-07,
"loss": 0.990449070930481,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.255142480134964,
"step": 1120,
"valid_targets_mean": 14624.2,
"valid_targets_min": 2602
},
{
"epoch": 4.771855010660981,
"grad_norm": 0.17794325021838514,
"learning_rate": 2.666292672157056e-07,
"loss": 1.004270076751709,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2521766722202301,
"step": 1121,
"valid_targets_mean": 15172.8,
"valid_targets_min": 4005
},
{
"epoch": 4.776119402985074,
"grad_norm": 0.18852994886358498,
"learning_rate": 2.570424328828325e-07,
"loss": 1.0067514181137085,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.261335551738739,
"step": 1122,
"valid_targets_mean": 14962.2,
"valid_targets_min": 6855
},
{
"epoch": 4.780383795309168,
"grad_norm": 0.18114120226091554,
"learning_rate": 2.4763000464694377e-07,
"loss": 0.9487168192863464,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24179330468177795,
"step": 1123,
"valid_targets_mean": 15655.1,
"valid_targets_min": 7801
},
{
"epoch": 4.7846481876332625,
"grad_norm": 0.1910696091037759,
"learning_rate": 2.383920656559102e-07,
"loss": 0.946317195892334,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22754478454589844,
"step": 1124,
"valid_targets_mean": 14220.1,
"valid_targets_min": 1366
},
{
"epoch": 4.788912579957356,
"grad_norm": 0.1740135498338563,
"learning_rate": 2.2932869751619568e-07,
"loss": 0.923102855682373,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24337363243103027,
"step": 1125,
"valid_targets_mean": 14937.3,
"valid_targets_min": 4412
},
{
"epoch": 4.79317697228145,
"grad_norm": 0.19122995459784278,
"learning_rate": 2.2043998029212643e-07,
"loss": 0.9572536945343018,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2463478446006775,
"step": 1126,
"valid_targets_mean": 14208.7,
"valid_targets_min": 3253
},
{
"epoch": 4.797441364605544,
"grad_norm": 0.182450314547117,
"learning_rate": 2.1172599250519398e-07,
"loss": 0.9736548662185669,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.228672057390213,
"step": 1127,
"valid_targets_mean": 14014.3,
"valid_targets_min": 4944
},
{
"epoch": 4.801705756929637,
"grad_norm": 0.19054240714571696,
"learning_rate": 2.0318681113336013e-07,
"loss": 1.0034470558166504,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28851258754730225,
"step": 1128,
"valid_targets_mean": 15012.4,
"valid_targets_min": 7985
},
{
"epoch": 4.8059701492537314,
"grad_norm": 0.1814757020638321,
"learning_rate": 1.9482251161037302e-07,
"loss": 0.9663975834846497,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2275254726409912,
"step": 1129,
"valid_targets_mean": 15023.7,
"valid_targets_min": 4122
},
{
"epoch": 4.810234541577826,
"grad_norm": 0.18317281762468177,
"learning_rate": 1.866331678251032e-07,
"loss": 0.9956861734390259,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2160797119140625,
"step": 1130,
"valid_targets_mean": 13762.8,
"valid_targets_min": 2047
},
{
"epoch": 4.814498933901919,
"grad_norm": 0.18636653048095259,
"learning_rate": 1.7861885212088869e-07,
"loss": 0.9476275444030762,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2452719658613205,
"step": 1131,
"valid_targets_mean": 15130.3,
"valid_targets_min": 7622
},
{
"epoch": 4.818763326226013,
"grad_norm": 0.1870893164405279,
"learning_rate": 1.7077963529490204e-07,
"loss": 0.975817084312439,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2513091564178467,
"step": 1132,
"valid_targets_mean": 14312.8,
"valid_targets_min": 5570
},
{
"epoch": 4.823027718550106,
"grad_norm": 0.18847022911078076,
"learning_rate": 1.6311558659751535e-07,
"loss": 0.9581156373023987,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22693569958209991,
"step": 1133,
"valid_targets_mean": 14382.9,
"valid_targets_min": 1264
},
{
"epoch": 4.8272921108742,
"grad_norm": 0.18763662045198543,
"learning_rate": 1.5562677373169855e-07,
"loss": 0.9886241555213928,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27293211221694946,
"step": 1134,
"valid_targets_mean": 14403.2,
"valid_targets_min": 2343
},
{
"epoch": 4.8315565031982945,
"grad_norm": 0.20077979460564824,
"learning_rate": 1.483132628524131e-07,
"loss": 1.0126842260360718,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2457951009273529,
"step": 1135,
"valid_targets_mean": 13454.9,
"valid_targets_min": 1662
},
{
"epoch": 4.835820895522388,
"grad_norm": 0.18687809459757265,
"learning_rate": 1.4117511856603262e-07,
"loss": 0.964205801486969,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24054095149040222,
"step": 1136,
"valid_targets_mean": 14659.2,
"valid_targets_min": 7141
},
{
"epoch": 4.840085287846482,
"grad_norm": 0.17951977462647573,
"learning_rate": 1.342124039297721e-07,
"loss": 0.9916234016418457,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2584913671016693,
"step": 1137,
"valid_targets_mean": 14581.5,
"valid_targets_min": 3649
},
{
"epoch": 4.844349680170575,
"grad_norm": 0.1876770296121893,
"learning_rate": 1.2742518045112396e-07,
"loss": 0.9750006198883057,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25414639711380005,
"step": 1138,
"valid_targets_mean": 14580.6,
"valid_targets_min": 4957
},
{
"epoch": 4.848614072494669,
"grad_norm": 0.1814102798805498,
"learning_rate": 1.2081350808732518e-07,
"loss": 0.919538676738739,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25270211696624756,
"step": 1139,
"valid_targets_mean": 14550.2,
"valid_targets_min": 5470
},
{
"epoch": 4.8528784648187635,
"grad_norm": 0.1950808693960726,
"learning_rate": 1.143774452448243e-07,
"loss": 1.0680432319641113,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2641150951385498,
"step": 1140,
"valid_targets_mean": 14606.9,
"valid_targets_min": 2001
},
{
"epoch": 4.857142857142857,
"grad_norm": 0.17672726951039464,
"learning_rate": 1.0811704877875528e-07,
"loss": 0.9682325124740601,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22097748517990112,
"step": 1141,
"valid_targets_mean": 14835.0,
"valid_targets_min": 2783
},
{
"epoch": 4.861407249466951,
"grad_norm": 0.1833672772740864,
"learning_rate": 1.0203237399245336e-07,
"loss": 0.9909142851829529,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2378884106874466,
"step": 1142,
"valid_targets_mean": 14631.3,
"valid_targets_min": 3338
},
{
"epoch": 4.865671641791045,
"grad_norm": 0.18592331736700526,
"learning_rate": 9.612347463694882e-08,
"loss": 0.9562100172042847,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24233251810073853,
"step": 1143,
"valid_targets_mean": 15098.5,
"valid_targets_min": 3303
},
{
"epoch": 4.869936034115138,
"grad_norm": 0.18082248229448622,
"learning_rate": 9.039040291050738e-08,
"loss": 0.9645106792449951,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25318577885627747,
"step": 1144,
"valid_targets_mean": 15200.3,
"valid_targets_min": 3911
},
{
"epoch": 4.8742004264392325,
"grad_norm": 0.18353141910274656,
"learning_rate": 8.483320945815499e-08,
"loss": 0.9630197286605835,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24674592912197113,
"step": 1145,
"valid_targets_mean": 15471.6,
"valid_targets_min": 2373
},
{
"epoch": 4.878464818763327,
"grad_norm": 0.18176404653949083,
"learning_rate": 7.945194337124262e-08,
"loss": 0.9523903131484985,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24069157242774963,
"step": 1146,
"valid_targets_mean": 15206.4,
"valid_targets_min": 4402
},
{
"epoch": 4.88272921108742,
"grad_norm": 0.19201063070974406,
"learning_rate": 7.424665218700444e-08,
"loss": 1.0448331832885742,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2784254550933838,
"step": 1147,
"valid_targets_mean": 15334.1,
"valid_targets_min": 9093
},
{
"epoch": 4.886993603411514,
"grad_norm": 0.1783617740470841,
"learning_rate": 6.921738188814254e-08,
"loss": 0.9594995379447937,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2339986264705658,
"step": 1148,
"valid_targets_mean": 14965.5,
"valid_targets_min": 7527
},
{
"epoch": 4.891257995735607,
"grad_norm": 0.18561765833213933,
"learning_rate": 6.436417690241614e-08,
"loss": 0.9661248922348022,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21553745865821838,
"step": 1149,
"valid_targets_mean": 13662.2,
"valid_targets_min": 1875
},
{
"epoch": 4.895522388059701,
"grad_norm": 0.18503637692193856,
"learning_rate": 5.968708010225532e-08,
"loss": 0.9736925363540649,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.268303781747818,
"step": 1150,
"valid_targets_mean": 15144.8,
"valid_targets_min": 9892
},
{
"epoch": 4.899786780383796,
"grad_norm": 0.17701878705856514,
"learning_rate": 5.518613280437901e-08,
"loss": 0.9437923431396484,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2272908240556717,
"step": 1151,
"valid_targets_mean": 14869.4,
"valid_targets_min": 2024
},
{
"epoch": 4.904051172707889,
"grad_norm": 0.181032855927092,
"learning_rate": 5.0861374769426433e-08,
"loss": 0.9633027911186218,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2316833883523941,
"step": 1152,
"valid_targets_mean": 13918.9,
"valid_targets_min": 4828
},
{
"epoch": 4.908315565031983,
"grad_norm": 0.18263654557472672,
"learning_rate": 4.671284420161071e-08,
"loss": 0.9642020463943481,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23549358546733856,
"step": 1153,
"valid_targets_mean": 14218.9,
"valid_targets_min": 4878
},
{
"epoch": 4.912579957356077,
"grad_norm": 0.17852479124594317,
"learning_rate": 4.274057774838136e-08,
"loss": 0.9545692205429077,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23123939335346222,
"step": 1154,
"valid_targets_mean": 14188.5,
"valid_targets_min": 1968
},
{
"epoch": 4.91684434968017,
"grad_norm": 0.1762245137912565,
"learning_rate": 3.894461050010012e-08,
"loss": 0.9212028384208679,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19921639561653137,
"step": 1155,
"valid_targets_mean": 13616.8,
"valid_targets_min": 1976
},
{
"epoch": 4.9211087420042645,
"grad_norm": 0.1895851250116108,
"learning_rate": 3.5324975989725615e-08,
"loss": 0.9664217233657837,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2327403724193573,
"step": 1156,
"valid_targets_mean": 14298.6,
"valid_targets_min": 1530
},
{
"epoch": 4.925373134328359,
"grad_norm": 0.17718130578936006,
"learning_rate": 3.188170619252473e-08,
"loss": 0.9604615569114685,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24611347913742065,
"step": 1157,
"valid_targets_mean": 15502.1,
"valid_targets_min": 10452
},
{
"epoch": 4.929637526652452,
"grad_norm": 0.17461171312398904,
"learning_rate": 2.8614831525786147e-08,
"loss": 0.9696751832962036,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23947827517986298,
"step": 1158,
"valid_targets_mean": 15015.1,
"valid_targets_min": 8581
},
{
"epoch": 4.933901918976546,
"grad_norm": 0.1958379764084762,
"learning_rate": 2.552438084855613e-08,
"loss": 0.9708175659179688,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23773235082626343,
"step": 1159,
"valid_targets_mean": 14752.8,
"valid_targets_min": 3297
},
{
"epoch": 4.938166311300639,
"grad_norm": 0.1879404170175882,
"learning_rate": 2.2610381461372068e-08,
"loss": 1.0133110284805298,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2715913951396942,
"step": 1160,
"valid_targets_mean": 15038.8,
"valid_targets_min": 7635
},
{
"epoch": 4.9424307036247335,
"grad_norm": 0.1767207674354065,
"learning_rate": 1.987285910603598e-08,
"loss": 0.9827720522880554,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24032725393772125,
"step": 1161,
"valid_targets_mean": 14799.3,
"valid_targets_min": 3960
},
{
"epoch": 4.946695095948828,
"grad_norm": 0.19329327315064376,
"learning_rate": 1.7311837965379164e-08,
"loss": 0.981905460357666,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2515614926815033,
"step": 1162,
"valid_targets_mean": 15367.5,
"valid_targets_min": 7569
},
{
"epoch": 4.950959488272921,
"grad_norm": 0.1801223332568921,
"learning_rate": 1.4927340663046798e-08,
"loss": 0.9775525331497192,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.251977801322937,
"step": 1163,
"valid_targets_mean": 14676.9,
"valid_targets_min": 2093
},
{
"epoch": 4.955223880597015,
"grad_norm": 0.18051575173508805,
"learning_rate": 1.2719388263300325e-08,
"loss": 0.9846411943435669,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23140552639961243,
"step": 1164,
"valid_targets_mean": 13634.5,
"valid_targets_min": 2356
},
{
"epoch": 4.959488272921108,
"grad_norm": 0.17883061175729384,
"learning_rate": 1.0688000270839827e-08,
"loss": 1.003123164176941,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2499091625213623,
"step": 1165,
"valid_targets_mean": 14019.7,
"valid_targets_min": 1756
},
{
"epoch": 4.963752665245202,
"grad_norm": 0.20200364697082135,
"learning_rate": 8.833194630615271e-09,
"loss": 0.9979058504104614,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2524784207344055,
"step": 1166,
"valid_targets_mean": 14548.3,
"valid_targets_min": 7809
},
{
"epoch": 4.968017057569297,
"grad_norm": 0.18544115356837396,
"learning_rate": 7.154987727682194e-09,
"loss": 0.9676626920700073,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24133968353271484,
"step": 1167,
"valid_targets_mean": 14866.4,
"valid_targets_min": 4105
},
{
"epoch": 4.97228144989339,
"grad_norm": 0.17635779057501313,
"learning_rate": 5.6533943870462625e-09,
"loss": 0.9879981279373169,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24625255167484283,
"step": 1168,
"valid_targets_mean": 14967.6,
"valid_targets_min": 3003
},
{
"epoch": 4.976545842217484,
"grad_norm": 0.1845314169679861,
"learning_rate": 4.328427873541152e-09,
"loss": 1.0250025987625122,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2580295205116272,
"step": 1169,
"valid_targets_mean": 15066.1,
"valid_targets_min": 8759
},
{
"epoch": 4.980810234541578,
"grad_norm": 0.1861702009125637,
"learning_rate": 3.1800998917086432e-09,
"loss": 0.9731056094169617,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2466808259487152,
"step": 1170,
"valid_targets_mean": 15083.5,
"valid_targets_min": 5835
},
{
"epoch": 4.985074626865671,
"grad_norm": 0.17734566957344822,
"learning_rate": 2.2084205856920393e-09,
"loss": 0.9649834632873535,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2296658605337143,
"step": 1171,
"valid_targets_mean": 14655.2,
"valid_targets_min": 3045
},
{
"epoch": 4.9893390191897655,
"grad_norm": 0.1865006084861443,
"learning_rate": 1.4133985391473482e-09,
"loss": 1.0229482650756836,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2613421678543091,
"step": 1172,
"valid_targets_mean": 14752.4,
"valid_targets_min": 3992
},
{
"epoch": 4.99360341151386,
"grad_norm": 0.18027432476731153,
"learning_rate": 7.950407751722288e-10,
"loss": 0.9098262786865234,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22018353641033173,
"step": 1173,
"valid_targets_mean": 14614.0,
"valid_targets_min": 4385
},
{
"epoch": 4.997867803837953,
"grad_norm": 0.17741020957006148,
"learning_rate": 3.5335275624159835e-10,
"loss": 0.9602517485618591,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24667122960090637,
"step": 1174,
"valid_targets_mean": 15355.7,
"valid_targets_min": 8230
},
{
"epoch": 5.0,
"grad_norm": 0.2558608122090385,
"learning_rate": 8.833838415212014e-11,
"loss": 1.0513684749603271,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4691426753997803,
"step": 1175,
"valid_targets_mean": 13499.5,
"valid_targets_min": 3345
},
{
"epoch": 5.0,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4691426753997803,
"step": 1175,
"total_flos": 1658037051588608.0,
"train_loss": 1.0214215231449046,
"train_runtime": 6105.894,
"train_samples_per_second": 24.566,
"train_steps_per_second": 0.192,
"valid_targets_mean": 13499.5,
"valid_targets_min": 3345
}
],
"logging_steps": 1,
"max_steps": 1175,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1658037051588608.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}