Files
sft__ot30k_SmolLM2-1.7B-16k…/trainer_state.json
ModelHub XC 2fa421e8d0 初始化项目,由ModelHub XC社区提供模型
Model: open-sci/sft__ot30k_SmolLM2-1.7B-16k-SFT-Tulu3-decontaminated
Source: Original Platform
2026-04-28 08:43:03 +08:00

12973 lines
375 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 1175,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0042643923240938165,
"grad_norm": 50.790045190656855,
"learning_rate": 0.0,
"loss": 2.170248031616211,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5367974638938904,
"step": 1,
"valid_targets_mean": 14321.9,
"valid_targets_min": 2475
},
{
"epoch": 0.008528784648187633,
"grad_norm": 46.779308529450724,
"learning_rate": 3.3898305084745766e-07,
"loss": 2.188108205795288,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5509403944015503,
"step": 2,
"valid_targets_mean": 14621.8,
"valid_targets_min": 3312
},
{
"epoch": 0.01279317697228145,
"grad_norm": 45.17243380366014,
"learning_rate": 6.779661016949153e-07,
"loss": 2.130862236022949,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5246433019638062,
"step": 3,
"valid_targets_mean": 15019.3,
"valid_targets_min": 2408
},
{
"epoch": 0.017057569296375266,
"grad_norm": 47.185009501724274,
"learning_rate": 1.016949152542373e-06,
"loss": 2.1715633869171143,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5440387725830078,
"step": 4,
"valid_targets_mean": 13981.1,
"valid_targets_min": 1353
},
{
"epoch": 0.021321961620469083,
"grad_norm": 44.57211946253561,
"learning_rate": 1.3559322033898307e-06,
"loss": 2.1357712745666504,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5245351791381836,
"step": 5,
"valid_targets_mean": 14371.6,
"valid_targets_min": 4931
},
{
"epoch": 0.0255863539445629,
"grad_norm": 44.09480015186751,
"learning_rate": 1.6949152542372882e-06,
"loss": 2.0977840423583984,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5290247201919556,
"step": 6,
"valid_targets_mean": 14774.8,
"valid_targets_min": 2091
},
{
"epoch": 0.029850746268656716,
"grad_norm": 44.95465500866499,
"learning_rate": 2.033898305084746e-06,
"loss": 2.1269590854644775,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.540243923664093,
"step": 7,
"valid_targets_mean": 15021.7,
"valid_targets_min": 5734
},
{
"epoch": 0.03411513859275053,
"grad_norm": 45.532705317520396,
"learning_rate": 2.372881355932204e-06,
"loss": 2.130779981613159,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5535479784011841,
"step": 8,
"valid_targets_mean": 15463.4,
"valid_targets_min": 5095
},
{
"epoch": 0.03837953091684435,
"grad_norm": 45.065532325499156,
"learning_rate": 2.7118644067796613e-06,
"loss": 2.1472434997558594,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.562113881111145,
"step": 9,
"valid_targets_mean": 15351.3,
"valid_targets_min": 12169
},
{
"epoch": 0.042643923240938165,
"grad_norm": 44.571271806685175,
"learning_rate": 3.0508474576271192e-06,
"loss": 2.1082420349121094,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5148895382881165,
"step": 10,
"valid_targets_mean": 15040.1,
"valid_targets_min": 3013
},
{
"epoch": 0.046908315565031986,
"grad_norm": 42.425308158522355,
"learning_rate": 3.3898305084745763e-06,
"loss": 2.1142380237579346,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5395100116729736,
"step": 11,
"valid_targets_mean": 14724.2,
"valid_targets_min": 2001
},
{
"epoch": 0.0511727078891258,
"grad_norm": 39.31864273361584,
"learning_rate": 3.7288135593220342e-06,
"loss": 2.0478177070617676,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5402687788009644,
"step": 12,
"valid_targets_mean": 15195.5,
"valid_targets_min": 1428
},
{
"epoch": 0.05543710021321962,
"grad_norm": 40.830049863509025,
"learning_rate": 4.067796610169492e-06,
"loss": 2.0825119018554688,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5617735385894775,
"step": 13,
"valid_targets_mean": 15551.3,
"valid_targets_min": 11106
},
{
"epoch": 0.05970149253731343,
"grad_norm": 38.04846079305305,
"learning_rate": 4.40677966101695e-06,
"loss": 2.046898603439331,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5019025802612305,
"step": 14,
"valid_targets_mean": 14848.9,
"valid_targets_min": 9958
},
{
"epoch": 0.06396588486140725,
"grad_norm": 34.95663903713111,
"learning_rate": 4.745762711864408e-06,
"loss": 2.0071847438812256,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5296506285667419,
"step": 15,
"valid_targets_mean": 15191.9,
"valid_targets_min": 3766
},
{
"epoch": 0.06823027718550106,
"grad_norm": 27.054249981999543,
"learning_rate": 5.084745762711865e-06,
"loss": 1.8666037321090698,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.45470499992370605,
"step": 16,
"valid_targets_mean": 15216.2,
"valid_targets_min": 6239
},
{
"epoch": 0.07249466950959488,
"grad_norm": 26.261466564870013,
"learning_rate": 5.423728813559323e-06,
"loss": 1.916226863861084,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.47518885135650635,
"step": 17,
"valid_targets_mean": 15284.0,
"valid_targets_min": 2914
},
{
"epoch": 0.0767590618336887,
"grad_norm": 27.232268863118826,
"learning_rate": 5.7627118644067805e-06,
"loss": 1.9307100772857666,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5244933366775513,
"step": 18,
"valid_targets_mean": 15771.8,
"valid_targets_min": 12412
},
{
"epoch": 0.08102345415778252,
"grad_norm": 27.47901741613614,
"learning_rate": 6.1016949152542385e-06,
"loss": 1.8952523469924927,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4579699635505676,
"step": 19,
"valid_targets_mean": 14159.2,
"valid_targets_min": 2017
},
{
"epoch": 0.08528784648187633,
"grad_norm": 26.829717100318437,
"learning_rate": 6.440677966101695e-06,
"loss": 1.9043647050857544,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.47290629148483276,
"step": 20,
"valid_targets_mean": 14576.8,
"valid_targets_min": 1870
},
{
"epoch": 0.08955223880597014,
"grad_norm": 14.549811010244184,
"learning_rate": 6.779661016949153e-06,
"loss": 1.7417538166046143,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.42348814010620117,
"step": 21,
"valid_targets_mean": 14887.2,
"valid_targets_min": 2398
},
{
"epoch": 0.09381663113006397,
"grad_norm": 12.900341344592826,
"learning_rate": 7.1186440677966106e-06,
"loss": 1.6977145671844482,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.41473180055618286,
"step": 22,
"valid_targets_mean": 14468.2,
"valid_targets_min": 5296
},
{
"epoch": 0.09808102345415778,
"grad_norm": 12.245233952586673,
"learning_rate": 7.4576271186440685e-06,
"loss": 1.72165846824646,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4268421232700348,
"step": 23,
"valid_targets_mean": 15792.5,
"valid_targets_min": 8823
},
{
"epoch": 0.1023454157782516,
"grad_norm": 11.887975704497741,
"learning_rate": 7.796610169491526e-06,
"loss": 1.6677861213684082,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4327390491962433,
"step": 24,
"valid_targets_mean": 15825.5,
"valid_targets_min": 8654
},
{
"epoch": 0.10660980810234541,
"grad_norm": 10.062587166710248,
"learning_rate": 8.135593220338983e-06,
"loss": 1.6380515098571777,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.37642329931259155,
"step": 25,
"valid_targets_mean": 13636.7,
"valid_targets_min": 2118
},
{
"epoch": 0.11087420042643924,
"grad_norm": 10.24381012683112,
"learning_rate": 8.47457627118644e-06,
"loss": 1.6145225763320923,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3890722990036011,
"step": 26,
"valid_targets_mean": 13839.9,
"valid_targets_min": 2406
},
{
"epoch": 0.11513859275053305,
"grad_norm": 8.757077786632445,
"learning_rate": 8.8135593220339e-06,
"loss": 1.551984190940857,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.38796466588974,
"step": 27,
"valid_targets_mean": 15686.6,
"valid_targets_min": 9771
},
{
"epoch": 0.11940298507462686,
"grad_norm": 6.502960108459334,
"learning_rate": 9.152542372881356e-06,
"loss": 1.5993372201919556,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3797028660774231,
"step": 28,
"valid_targets_mean": 14795.3,
"valid_targets_min": 5847
},
{
"epoch": 0.12366737739872068,
"grad_norm": 4.686333165951496,
"learning_rate": 9.491525423728815e-06,
"loss": 1.499677300453186,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3852105736732483,
"step": 29,
"valid_targets_mean": 14799.2,
"valid_targets_min": 3518
},
{
"epoch": 0.1279317697228145,
"grad_norm": 4.4376965727817215,
"learning_rate": 9.830508474576272e-06,
"loss": 1.5449557304382324,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3900710344314575,
"step": 30,
"valid_targets_mean": 15237.7,
"valid_targets_min": 7796
},
{
"epoch": 0.13219616204690832,
"grad_norm": 3.7222824944675934,
"learning_rate": 1.016949152542373e-05,
"loss": 1.4495660066604614,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.36171507835388184,
"step": 31,
"valid_targets_mean": 14774.0,
"valid_targets_min": 1794
},
{
"epoch": 0.13646055437100213,
"grad_norm": 3.336876802962369,
"learning_rate": 1.0508474576271188e-05,
"loss": 1.4464428424835205,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.35169264674186707,
"step": 32,
"valid_targets_mean": 15008.9,
"valid_targets_min": 2413
},
{
"epoch": 0.14072494669509594,
"grad_norm": 3.3623869726867768,
"learning_rate": 1.0847457627118645e-05,
"loss": 1.509070873260498,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.382954865694046,
"step": 33,
"valid_targets_mean": 15760.6,
"valid_targets_min": 9774
},
{
"epoch": 0.14498933901918976,
"grad_norm": 3.077441715939849,
"learning_rate": 1.1186440677966102e-05,
"loss": 1.4670354127883911,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.38647815585136414,
"step": 34,
"valid_targets_mean": 15682.1,
"valid_targets_min": 11789
},
{
"epoch": 0.14925373134328357,
"grad_norm": 2.7875707604785407,
"learning_rate": 1.1525423728813561e-05,
"loss": 1.439798355102539,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.369956374168396,
"step": 35,
"valid_targets_mean": 15191.7,
"valid_targets_min": 5297
},
{
"epoch": 0.1535181236673774,
"grad_norm": 2.7217130683106054,
"learning_rate": 1.1864406779661018e-05,
"loss": 1.429795503616333,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.37528812885284424,
"step": 36,
"valid_targets_mean": 15571.7,
"valid_targets_min": 8792
},
{
"epoch": 0.15778251599147122,
"grad_norm": 2.452349939554238,
"learning_rate": 1.2203389830508477e-05,
"loss": 1.4219616651535034,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3755497932434082,
"step": 37,
"valid_targets_mean": 15548.4,
"valid_targets_min": 5026
},
{
"epoch": 0.16204690831556504,
"grad_norm": 2.0406075011883225,
"learning_rate": 1.2542372881355932e-05,
"loss": 1.4063204526901245,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.36107003688812256,
"step": 38,
"valid_targets_mean": 15412.9,
"valid_targets_min": 7438
},
{
"epoch": 0.16631130063965885,
"grad_norm": 1.8192030106266306,
"learning_rate": 1.288135593220339e-05,
"loss": 1.4204202890396118,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3498168885707855,
"step": 39,
"valid_targets_mean": 15181.0,
"valid_targets_min": 4903
},
{
"epoch": 0.17057569296375266,
"grad_norm": 1.502103623020904,
"learning_rate": 1.3220338983050848e-05,
"loss": 1.3523015975952148,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.35391467809677124,
"step": 40,
"valid_targets_mean": 14799.0,
"valid_targets_min": 5119
},
{
"epoch": 0.17484008528784648,
"grad_norm": 1.2704982545285404,
"learning_rate": 1.3559322033898305e-05,
"loss": 1.3674108982086182,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3485657870769501,
"step": 41,
"valid_targets_mean": 15266.0,
"valid_targets_min": 7127
},
{
"epoch": 0.1791044776119403,
"grad_norm": 1.279513706830006,
"learning_rate": 1.3898305084745764e-05,
"loss": 1.3254966735839844,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31960922479629517,
"step": 42,
"valid_targets_mean": 15077.7,
"valid_targets_min": 7788
},
{
"epoch": 0.18336886993603413,
"grad_norm": 1.1317311262001388,
"learning_rate": 1.4237288135593221e-05,
"loss": 1.3530699014663696,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3286569118499756,
"step": 43,
"valid_targets_mean": 14599.4,
"valid_targets_min": 2746
},
{
"epoch": 0.18763326226012794,
"grad_norm": 1.0641683167005649,
"learning_rate": 1.4576271186440678e-05,
"loss": 1.3112722635269165,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3280591368675232,
"step": 44,
"valid_targets_mean": 15166.9,
"valid_targets_min": 1286
},
{
"epoch": 0.19189765458422176,
"grad_norm": 0.9774596445143066,
"learning_rate": 1.4915254237288137e-05,
"loss": 1.3113558292388916,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31965869665145874,
"step": 45,
"valid_targets_mean": 14425.3,
"valid_targets_min": 1628
},
{
"epoch": 0.19616204690831557,
"grad_norm": 0.8886761466812477,
"learning_rate": 1.5254237288135594e-05,
"loss": 1.3103699684143066,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3260655701160431,
"step": 46,
"valid_targets_mean": 14651.3,
"valid_targets_min": 3805
},
{
"epoch": 0.20042643923240938,
"grad_norm": 0.867150505024027,
"learning_rate": 1.5593220338983053e-05,
"loss": 1.3560914993286133,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.351958304643631,
"step": 47,
"valid_targets_mean": 15030.9,
"valid_targets_min": 2768
},
{
"epoch": 0.2046908315565032,
"grad_norm": 0.6978933550897056,
"learning_rate": 1.593220338983051e-05,
"loss": 1.2680150270462036,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3324684500694275,
"step": 48,
"valid_targets_mean": 15424.2,
"valid_targets_min": 6409
},
{
"epoch": 0.208955223880597,
"grad_norm": 0.6586270729941489,
"learning_rate": 1.6271186440677967e-05,
"loss": 1.2598673105239868,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2970837652683258,
"step": 49,
"valid_targets_mean": 14532.2,
"valid_targets_min": 3775
},
{
"epoch": 0.21321961620469082,
"grad_norm": 0.6298545489956912,
"learning_rate": 1.6610169491525424e-05,
"loss": 1.2781181335449219,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32271119952201843,
"step": 50,
"valid_targets_mean": 14875.0,
"valid_targets_min": 2632
},
{
"epoch": 0.21748400852878466,
"grad_norm": 0.611408985629821,
"learning_rate": 1.694915254237288e-05,
"loss": 1.2840487957000732,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.33150315284729004,
"step": 51,
"valid_targets_mean": 15284.7,
"valid_targets_min": 2340
},
{
"epoch": 0.22174840085287847,
"grad_norm": 0.5231312351893965,
"learning_rate": 1.728813559322034e-05,
"loss": 1.2553491592407227,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32812148332595825,
"step": 52,
"valid_targets_mean": 14851.4,
"valid_targets_min": 3202
},
{
"epoch": 0.2260127931769723,
"grad_norm": 0.49883544363495685,
"learning_rate": 1.76271186440678e-05,
"loss": 1.2403368949890137,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.33563610911369324,
"step": 53,
"valid_targets_mean": 14730.1,
"valid_targets_min": 6607
},
{
"epoch": 0.2302771855010661,
"grad_norm": 0.5070577245361446,
"learning_rate": 1.7966101694915256e-05,
"loss": 1.2707104682922363,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3156570792198181,
"step": 54,
"valid_targets_mean": 15551.0,
"valid_targets_min": 10611
},
{
"epoch": 0.2345415778251599,
"grad_norm": 0.45288562442399766,
"learning_rate": 1.8305084745762713e-05,
"loss": 1.279565453529358,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3232952654361725,
"step": 55,
"valid_targets_mean": 15073.3,
"valid_targets_min": 6688
},
{
"epoch": 0.23880597014925373,
"grad_norm": 0.5290725780424694,
"learning_rate": 1.864406779661017e-05,
"loss": 1.2902295589447021,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31880152225494385,
"step": 56,
"valid_targets_mean": 13424.9,
"valid_targets_min": 1585
},
{
"epoch": 0.24307036247334754,
"grad_norm": 0.4538847755630704,
"learning_rate": 1.898305084745763e-05,
"loss": 1.2620139122009277,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.323620080947876,
"step": 57,
"valid_targets_mean": 14232.4,
"valid_targets_min": 4414
},
{
"epoch": 0.24733475479744135,
"grad_norm": 0.4305521236043809,
"learning_rate": 1.9322033898305087e-05,
"loss": 1.2902776002883911,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3640017807483673,
"step": 58,
"valid_targets_mean": 14867.7,
"valid_targets_min": 5976
},
{
"epoch": 0.2515991471215352,
"grad_norm": 0.37477169557749734,
"learning_rate": 1.9661016949152545e-05,
"loss": 1.2305490970611572,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32125768065452576,
"step": 59,
"valid_targets_mean": 15468.5,
"valid_targets_min": 6915
},
{
"epoch": 0.255863539445629,
"grad_norm": 0.3817273149939568,
"learning_rate": 2e-05,
"loss": 1.2429394721984863,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2957238256931305,
"step": 60,
"valid_targets_mean": 14228.8,
"valid_targets_min": 2944
},
{
"epoch": 0.2601279317697228,
"grad_norm": 0.3570598150770374,
"learning_rate": 2.033898305084746e-05,
"loss": 1.261240005493164,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32640916109085083,
"step": 61,
"valid_targets_mean": 15100.5,
"valid_targets_min": 1716
},
{
"epoch": 0.26439232409381663,
"grad_norm": 0.3645149378751711,
"learning_rate": 2.0677966101694916e-05,
"loss": 1.2335644960403442,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30785736441612244,
"step": 62,
"valid_targets_mean": 14905.6,
"valid_targets_min": 2641
},
{
"epoch": 0.26865671641791045,
"grad_norm": 0.33892261474262636,
"learning_rate": 2.1016949152542376e-05,
"loss": 1.201655626296997,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29098716378211975,
"step": 63,
"valid_targets_mean": 14317.0,
"valid_targets_min": 3219
},
{
"epoch": 0.27292110874200426,
"grad_norm": 0.37778201349086643,
"learning_rate": 2.1355932203389833e-05,
"loss": 1.2824424505233765,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3140537142753601,
"step": 64,
"valid_targets_mean": 14770.4,
"valid_targets_min": 3917
},
{
"epoch": 0.2771855010660981,
"grad_norm": 0.3788695246680898,
"learning_rate": 2.169491525423729e-05,
"loss": 1.2558549642562866,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.33100056648254395,
"step": 65,
"valid_targets_mean": 15132.9,
"valid_targets_min": 3888
},
{
"epoch": 0.2814498933901919,
"grad_norm": 0.31284044068396766,
"learning_rate": 2.2033898305084748e-05,
"loss": 1.2463667392730713,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32382726669311523,
"step": 66,
"valid_targets_mean": 14523.2,
"valid_targets_min": 4563
},
{
"epoch": 0.2857142857142857,
"grad_norm": 0.33675881541153835,
"learning_rate": 2.2372881355932205e-05,
"loss": 1.211066484451294,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31038177013397217,
"step": 67,
"valid_targets_mean": 15063.5,
"valid_targets_min": 1522
},
{
"epoch": 0.2899786780383795,
"grad_norm": 0.30764419001783483,
"learning_rate": 2.2711864406779665e-05,
"loss": 1.2415111064910889,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32569655776023865,
"step": 68,
"valid_targets_mean": 14883.2,
"valid_targets_min": 5211
},
{
"epoch": 0.2942430703624733,
"grad_norm": 0.31007462729839713,
"learning_rate": 2.3050847457627122e-05,
"loss": 1.2593109607696533,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28648436069488525,
"step": 69,
"valid_targets_mean": 15228.7,
"valid_targets_min": 4369
},
{
"epoch": 0.29850746268656714,
"grad_norm": 0.29151985488501436,
"learning_rate": 2.338983050847458e-05,
"loss": 1.2496416568756104,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3293963372707367,
"step": 70,
"valid_targets_mean": 14773.6,
"valid_targets_min": 2391
},
{
"epoch": 0.302771855010661,
"grad_norm": 0.26553622861690374,
"learning_rate": 2.3728813559322036e-05,
"loss": 1.1612809896469116,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28408282995224,
"step": 71,
"valid_targets_mean": 15099.8,
"valid_targets_min": 6361
},
{
"epoch": 0.3070362473347548,
"grad_norm": 0.282168884696635,
"learning_rate": 2.406779661016949e-05,
"loss": 1.199310064315796,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30075767636299133,
"step": 72,
"valid_targets_mean": 14710.6,
"valid_targets_min": 2834
},
{
"epoch": 0.31130063965884863,
"grad_norm": 0.26313781745202286,
"learning_rate": 2.4406779661016954e-05,
"loss": 1.2104355096817017,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31199613213539124,
"step": 73,
"valid_targets_mean": 15397.6,
"valid_targets_min": 7400
},
{
"epoch": 0.31556503198294245,
"grad_norm": 0.2916206397471456,
"learning_rate": 2.474576271186441e-05,
"loss": 1.229551076889038,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2988601326942444,
"step": 74,
"valid_targets_mean": 15627.9,
"valid_targets_min": 8606
},
{
"epoch": 0.31982942430703626,
"grad_norm": 0.29466291213180473,
"learning_rate": 2.5084745762711865e-05,
"loss": 1.227362871170044,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30007031559944153,
"step": 75,
"valid_targets_mean": 14642.2,
"valid_targets_min": 3142
},
{
"epoch": 0.32409381663113007,
"grad_norm": 0.2549008942709039,
"learning_rate": 2.5423728813559322e-05,
"loss": 1.2207566499710083,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30205586552619934,
"step": 76,
"valid_targets_mean": 14562.2,
"valid_targets_min": 3393
},
{
"epoch": 0.3283582089552239,
"grad_norm": 0.2576377148416891,
"learning_rate": 2.576271186440678e-05,
"loss": 1.1718388795852661,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2792397141456604,
"step": 77,
"valid_targets_mean": 14332.6,
"valid_targets_min": 2705
},
{
"epoch": 0.3326226012793177,
"grad_norm": 0.26834680952288187,
"learning_rate": 2.610169491525424e-05,
"loss": 1.2270526885986328,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3089393079280853,
"step": 78,
"valid_targets_mean": 14822.8,
"valid_targets_min": 4243
},
{
"epoch": 0.3368869936034115,
"grad_norm": 0.2592568670684148,
"learning_rate": 2.6440677966101696e-05,
"loss": 1.1853585243225098,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29312610626220703,
"step": 79,
"valid_targets_mean": 14435.2,
"valid_targets_min": 4632
},
{
"epoch": 0.3411513859275053,
"grad_norm": 0.28595591728726166,
"learning_rate": 2.6779661016949153e-05,
"loss": 1.1915867328643799,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2720337212085724,
"step": 80,
"valid_targets_mean": 13855.5,
"valid_targets_min": 2021
},
{
"epoch": 0.34541577825159914,
"grad_norm": 0.2573363604315349,
"learning_rate": 2.711864406779661e-05,
"loss": 1.211092233657837,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31523624062538147,
"step": 81,
"valid_targets_mean": 14988.9,
"valid_targets_min": 5031
},
{
"epoch": 0.34968017057569295,
"grad_norm": 0.2342943139176118,
"learning_rate": 2.7457627118644068e-05,
"loss": 1.1675188541412354,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29798319935798645,
"step": 82,
"valid_targets_mean": 15173.8,
"valid_targets_min": 8052
},
{
"epoch": 0.35394456289978676,
"grad_norm": 0.19877480713294712,
"learning_rate": 2.7796610169491528e-05,
"loss": 1.1393442153930664,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28084176778793335,
"step": 83,
"valid_targets_mean": 15112.2,
"valid_targets_min": 4150
},
{
"epoch": 0.3582089552238806,
"grad_norm": 0.27179759318244323,
"learning_rate": 2.8135593220338985e-05,
"loss": 1.2124311923980713,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32078754901885986,
"step": 84,
"valid_targets_mean": 14934.1,
"valid_targets_min": 2774
},
{
"epoch": 0.3624733475479744,
"grad_norm": 0.23832409069098376,
"learning_rate": 2.8474576271186442e-05,
"loss": 1.1817233562469482,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2796356678009033,
"step": 85,
"valid_targets_mean": 14164.3,
"valid_targets_min": 714
},
{
"epoch": 0.36673773987206826,
"grad_norm": 0.2688215630024535,
"learning_rate": 2.88135593220339e-05,
"loss": 1.1816399097442627,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2843422591686249,
"step": 86,
"valid_targets_mean": 14402.0,
"valid_targets_min": 2864
},
{
"epoch": 0.37100213219616207,
"grad_norm": 0.22649654551596493,
"learning_rate": 2.9152542372881356e-05,
"loss": 1.1729528903961182,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28629714250564575,
"step": 87,
"valid_targets_mean": 14807.8,
"valid_targets_min": 5090
},
{
"epoch": 0.3752665245202559,
"grad_norm": 0.257460340358337,
"learning_rate": 2.9491525423728817e-05,
"loss": 1.2199232578277588,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32469049096107483,
"step": 88,
"valid_targets_mean": 14527.9,
"valid_targets_min": 4227
},
{
"epoch": 0.3795309168443497,
"grad_norm": 0.21834766547678558,
"learning_rate": 2.9830508474576274e-05,
"loss": 1.1780121326446533,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29766976833343506,
"step": 89,
"valid_targets_mean": 14080.5,
"valid_targets_min": 1811
},
{
"epoch": 0.3837953091684435,
"grad_norm": 0.22473034066650938,
"learning_rate": 3.016949152542373e-05,
"loss": 1.212519884109497,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3092614412307739,
"step": 90,
"valid_targets_mean": 14296.3,
"valid_targets_min": 4603
},
{
"epoch": 0.3880597014925373,
"grad_norm": 0.22423131093306164,
"learning_rate": 3.0508474576271188e-05,
"loss": 1.2223443984985352,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30183011293411255,
"step": 91,
"valid_targets_mean": 14442.9,
"valid_targets_min": 1387
},
{
"epoch": 0.39232409381663114,
"grad_norm": 0.18683420496979372,
"learning_rate": 3.084745762711865e-05,
"loss": 1.1356593370437622,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2865544855594635,
"step": 92,
"valid_targets_mean": 15076.7,
"valid_targets_min": 9011
},
{
"epoch": 0.39658848614072495,
"grad_norm": 0.23936333466282408,
"learning_rate": 3.1186440677966106e-05,
"loss": 1.2264689207077026,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3072168827056885,
"step": 93,
"valid_targets_mean": 14920.3,
"valid_targets_min": 5698
},
{
"epoch": 0.40085287846481876,
"grad_norm": 0.21590792040283036,
"learning_rate": 3.152542372881356e-05,
"loss": 1.1622889041900635,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30374616384506226,
"step": 94,
"valid_targets_mean": 14341.4,
"valid_targets_min": 1469
},
{
"epoch": 0.4051172707889126,
"grad_norm": 0.2069156721423661,
"learning_rate": 3.186440677966102e-05,
"loss": 1.1822681427001953,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31677788496017456,
"step": 95,
"valid_targets_mean": 14810.8,
"valid_targets_min": 7825
},
{
"epoch": 0.4093816631130064,
"grad_norm": 0.24683228222573494,
"learning_rate": 3.2203389830508473e-05,
"loss": 1.1557495594024658,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2958802878856659,
"step": 96,
"valid_targets_mean": 14433.6,
"valid_targets_min": 7417
},
{
"epoch": 0.4136460554371002,
"grad_norm": 0.18561933156683028,
"learning_rate": 3.2542372881355934e-05,
"loss": 1.1642088890075684,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30265098810195923,
"step": 97,
"valid_targets_mean": 15319.0,
"valid_targets_min": 3176
},
{
"epoch": 0.417910447761194,
"grad_norm": 0.17643190018249677,
"learning_rate": 3.2881355932203394e-05,
"loss": 1.1388523578643799,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29216450452804565,
"step": 98,
"valid_targets_mean": 15300.7,
"valid_targets_min": 8249
},
{
"epoch": 0.42217484008528783,
"grad_norm": 0.2148851445465397,
"learning_rate": 3.322033898305085e-05,
"loss": 1.175142526626587,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3050539493560791,
"step": 99,
"valid_targets_mean": 15150.8,
"valid_targets_min": 2770
},
{
"epoch": 0.42643923240938164,
"grad_norm": 0.22119889412054472,
"learning_rate": 3.355932203389831e-05,
"loss": 1.1640441417694092,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2903299331665039,
"step": 100,
"valid_targets_mean": 15056.3,
"valid_targets_min": 7409
},
{
"epoch": 0.43070362473347545,
"grad_norm": 0.23698273121150892,
"learning_rate": 3.389830508474576e-05,
"loss": 1.1739205121994019,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2950419783592224,
"step": 101,
"valid_targets_mean": 15414.8,
"valid_targets_min": 7871
},
{
"epoch": 0.4349680170575693,
"grad_norm": 0.24734898745279307,
"learning_rate": 3.423728813559322e-05,
"loss": 1.2048437595367432,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30449986457824707,
"step": 102,
"valid_targets_mean": 15500.8,
"valid_targets_min": 6371
},
{
"epoch": 0.43923240938166314,
"grad_norm": 0.22838037683126972,
"learning_rate": 3.457627118644068e-05,
"loss": 1.1827456951141357,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30405759811401367,
"step": 103,
"valid_targets_mean": 15590.4,
"valid_targets_min": 9132
},
{
"epoch": 0.44349680170575695,
"grad_norm": 0.1899632820175523,
"learning_rate": 3.491525423728814e-05,
"loss": 1.1353181600570679,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27248257398605347,
"step": 104,
"valid_targets_mean": 13723.0,
"valid_targets_min": 1629
},
{
"epoch": 0.44776119402985076,
"grad_norm": 0.1893495240420767,
"learning_rate": 3.52542372881356e-05,
"loss": 1.1199802160263062,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.271077036857605,
"step": 105,
"valid_targets_mean": 15511.4,
"valid_targets_min": 7773
},
{
"epoch": 0.4520255863539446,
"grad_norm": 0.1870760141178231,
"learning_rate": 3.559322033898305e-05,
"loss": 1.1274532079696655,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28740498423576355,
"step": 106,
"valid_targets_mean": 15417.5,
"valid_targets_min": 8403
},
{
"epoch": 0.4562899786780384,
"grad_norm": 0.18128773432383705,
"learning_rate": 3.593220338983051e-05,
"loss": 1.1023752689361572,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2651640474796295,
"step": 107,
"valid_targets_mean": 15189.9,
"valid_targets_min": 2801
},
{
"epoch": 0.4605543710021322,
"grad_norm": 0.20722213303473414,
"learning_rate": 3.627118644067797e-05,
"loss": 1.1903117895126343,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31424200534820557,
"step": 108,
"valid_targets_mean": 15165.4,
"valid_targets_min": 2289
},
{
"epoch": 0.464818763326226,
"grad_norm": 0.22007617972531787,
"learning_rate": 3.6610169491525426e-05,
"loss": 1.1847251653671265,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29289525747299194,
"step": 109,
"valid_targets_mean": 14900.2,
"valid_targets_min": 3567
},
{
"epoch": 0.4690831556503198,
"grad_norm": 0.18609065046651505,
"learning_rate": 3.6949152542372886e-05,
"loss": 1.1395936012268066,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2833573520183563,
"step": 110,
"valid_targets_mean": 14091.4,
"valid_targets_min": 1764
},
{
"epoch": 0.47334754797441364,
"grad_norm": 0.1896764397690769,
"learning_rate": 3.728813559322034e-05,
"loss": 1.1483747959136963,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29909899830818176,
"step": 111,
"valid_targets_mean": 15633.7,
"valid_targets_min": 9677
},
{
"epoch": 0.47761194029850745,
"grad_norm": 0.21657203678307657,
"learning_rate": 3.76271186440678e-05,
"loss": 1.2104594707489014,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.304294228553772,
"step": 112,
"valid_targets_mean": 15184.1,
"valid_targets_min": 3399
},
{
"epoch": 0.48187633262260127,
"grad_norm": 0.2318529429539571,
"learning_rate": 3.796610169491526e-05,
"loss": 1.1787766218185425,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3219996690750122,
"step": 113,
"valid_targets_mean": 14875.3,
"valid_targets_min": 4874
},
{
"epoch": 0.4861407249466951,
"grad_norm": 0.23313336032796225,
"learning_rate": 3.8305084745762714e-05,
"loss": 1.1813091039657593,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2842368185520172,
"step": 114,
"valid_targets_mean": 14792.7,
"valid_targets_min": 3378
},
{
"epoch": 0.4904051172707889,
"grad_norm": 0.2865459503023693,
"learning_rate": 3.8644067796610175e-05,
"loss": 1.1326351165771484,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29066264629364014,
"step": 115,
"valid_targets_mean": 15213.5,
"valid_targets_min": 2215
},
{
"epoch": 0.4946695095948827,
"grad_norm": 0.18733606326268779,
"learning_rate": 3.898305084745763e-05,
"loss": 1.1477142572402954,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2806292176246643,
"step": 116,
"valid_targets_mean": 14922.4,
"valid_targets_min": 5314
},
{
"epoch": 0.4989339019189765,
"grad_norm": 0.20824519757855833,
"learning_rate": 3.932203389830509e-05,
"loss": 1.1102975606918335,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27820122241973877,
"step": 117,
"valid_targets_mean": 15095.6,
"valid_targets_min": 1502
},
{
"epoch": 0.5031982942430704,
"grad_norm": 0.21111065222975572,
"learning_rate": 3.966101694915255e-05,
"loss": 1.1591339111328125,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27559608221054077,
"step": 118,
"valid_targets_mean": 14135.6,
"valid_targets_min": 1385
},
{
"epoch": 0.5074626865671642,
"grad_norm": 0.2322836217059597,
"learning_rate": 4e-05,
"loss": 1.1544227600097656,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2784320116043091,
"step": 119,
"valid_targets_mean": 14575.9,
"valid_targets_min": 3951
},
{
"epoch": 0.511727078891258,
"grad_norm": 0.2710200716654449,
"learning_rate": 3.999991166161585e-05,
"loss": 1.2046982049942017,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2911280393600464,
"step": 120,
"valid_targets_mean": 14812.0,
"valid_targets_min": 1616
},
{
"epoch": 0.5159914712153518,
"grad_norm": 0.23275165068835507,
"learning_rate": 3.999964664724376e-05,
"loss": 1.149275541305542,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2707687020301819,
"step": 121,
"valid_targets_mean": 14733.4,
"valid_targets_min": 2274
},
{
"epoch": 0.5202558635394456,
"grad_norm": 0.2188757010741002,
"learning_rate": 3.999920495922483e-05,
"loss": 1.1391420364379883,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2791445255279541,
"step": 122,
"valid_targets_mean": 14549.0,
"valid_targets_min": 5406
},
{
"epoch": 0.5245202558635395,
"grad_norm": 0.18189706908612469,
"learning_rate": 3.999858660146085e-05,
"loss": 1.1473329067230225,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28349411487579346,
"step": 123,
"valid_targets_mean": 16022.8,
"valid_targets_min": 12461
},
{
"epoch": 0.5287846481876333,
"grad_norm": 0.18166911355855728,
"learning_rate": 3.999779157941431e-05,
"loss": 1.1291731595993042,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29129359126091003,
"step": 124,
"valid_targets_mean": 15672.1,
"valid_targets_min": 8473
},
{
"epoch": 0.5330490405117271,
"grad_norm": 0.20528213770322606,
"learning_rate": 3.99968199001083e-05,
"loss": 1.152724027633667,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27808108925819397,
"step": 125,
"valid_targets_mean": 14904.5,
"valid_targets_min": 3339
},
{
"epoch": 0.5373134328358209,
"grad_norm": 0.21969182243753804,
"learning_rate": 3.999567157212646e-05,
"loss": 1.111305832862854,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28850311040878296,
"step": 126,
"valid_targets_mean": 15135.7,
"valid_targets_min": 2671
},
{
"epoch": 0.5415778251599147,
"grad_norm": 0.18648321802421775,
"learning_rate": 3.9994346605612955e-05,
"loss": 1.1140379905700684,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28390681743621826,
"step": 127,
"valid_targets_mean": 15173.3,
"valid_targets_min": 3389
},
{
"epoch": 0.5458422174840085,
"grad_norm": 0.23530061155007936,
"learning_rate": 3.999284501227232e-05,
"loss": 1.1197749376296997,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2651119828224182,
"step": 128,
"valid_targets_mean": 14356.0,
"valid_targets_min": 2255
},
{
"epoch": 0.5501066098081023,
"grad_norm": 0.20756437909189696,
"learning_rate": 3.9991166805369393e-05,
"loss": 1.1634900569915771,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30470719933509827,
"step": 129,
"valid_targets_mean": 14560.9,
"valid_targets_min": 2591
},
{
"epoch": 0.5543710021321961,
"grad_norm": 0.17806553404432293,
"learning_rate": 3.9989311999729166e-05,
"loss": 1.1636724472045898,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3105810880661011,
"step": 130,
"valid_targets_mean": 14886.5,
"valid_targets_min": 6638
},
{
"epoch": 0.55863539445629,
"grad_norm": 0.18548318006086834,
"learning_rate": 3.99872806117367e-05,
"loss": 1.1348915100097656,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2712128162384033,
"step": 131,
"valid_targets_mean": 14285.8,
"valid_targets_min": 2962
},
{
"epoch": 0.5628997867803838,
"grad_norm": 0.18014829073575278,
"learning_rate": 3.998507265933696e-05,
"loss": 1.138692855834961,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2761489450931549,
"step": 132,
"valid_targets_mean": 14861.4,
"valid_targets_min": 4176
},
{
"epoch": 0.5671641791044776,
"grad_norm": 0.19763664170497103,
"learning_rate": 3.9982688162034624e-05,
"loss": 1.1620148420333862,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28988659381866455,
"step": 133,
"valid_targets_mean": 15048.1,
"valid_targets_min": 2960
},
{
"epoch": 0.5714285714285714,
"grad_norm": 0.19597942625944698,
"learning_rate": 3.998012714089397e-05,
"loss": 1.1668446063995361,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29461246728897095,
"step": 134,
"valid_targets_mean": 14693.3,
"valid_targets_min": 5121
},
{
"epoch": 0.5756929637526652,
"grad_norm": 0.18817064634454994,
"learning_rate": 3.997738961853863e-05,
"loss": 1.1524537801742554,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29617851972579956,
"step": 135,
"valid_targets_mean": 15131.8,
"valid_targets_min": 2618
},
{
"epoch": 0.579957356076759,
"grad_norm": 0.17696374148992294,
"learning_rate": 3.9974475619151445e-05,
"loss": 1.121335744857788,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27633947134017944,
"step": 136,
"valid_targets_mean": 15332.3,
"valid_targets_min": 5632
},
{
"epoch": 0.5842217484008528,
"grad_norm": 0.18262162123023695,
"learning_rate": 3.997138516847422e-05,
"loss": 1.094153642654419,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28466087579727173,
"step": 137,
"valid_targets_mean": 15506.9,
"valid_targets_min": 7124
},
{
"epoch": 0.5884861407249466,
"grad_norm": 0.17335247209101903,
"learning_rate": 3.9968118293807476e-05,
"loss": 1.1545405387878418,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2700340747833252,
"step": 138,
"valid_targets_mean": 14411.7,
"valid_targets_min": 1794
},
{
"epoch": 0.5927505330490405,
"grad_norm": 0.18739288689825637,
"learning_rate": 3.996467502401028e-05,
"loss": 1.1366984844207764,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2842878997325897,
"step": 139,
"valid_targets_mean": 15288.6,
"valid_targets_min": 3103
},
{
"epoch": 0.5970149253731343,
"grad_norm": 0.1694782461744407,
"learning_rate": 3.9961055389499904e-05,
"loss": 1.1021239757537842,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27984392642974854,
"step": 140,
"valid_targets_mean": 15177.2,
"valid_targets_min": 6389
},
{
"epoch": 0.6012793176972282,
"grad_norm": 0.17506510062496997,
"learning_rate": 3.995725942225162e-05,
"loss": 1.1278494596481323,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26832592487335205,
"step": 141,
"valid_targets_mean": 13890.7,
"valid_targets_min": 2127
},
{
"epoch": 0.605543710021322,
"grad_norm": 0.1978149550323814,
"learning_rate": 3.995328715579839e-05,
"loss": 1.13820219039917,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2887899875640869,
"step": 142,
"valid_targets_mean": 15583.9,
"valid_targets_min": 7852
},
{
"epoch": 0.6098081023454158,
"grad_norm": 0.15715197732905914,
"learning_rate": 3.994913862523058e-05,
"loss": 1.103088140487671,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29257047176361084,
"step": 143,
"valid_targets_mean": 15232.1,
"valid_targets_min": 3068
},
{
"epoch": 0.6140724946695096,
"grad_norm": 0.17529266877105765,
"learning_rate": 3.9944813867195624e-05,
"loss": 1.1433424949645996,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24143539369106293,
"step": 144,
"valid_targets_mean": 13227.5,
"valid_targets_min": 976
},
{
"epoch": 0.6183368869936035,
"grad_norm": 0.2004398857873124,
"learning_rate": 3.9940312919897744e-05,
"loss": 1.1780171394348145,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3126358687877655,
"step": 145,
"valid_targets_mean": 15729.8,
"valid_targets_min": 9974
},
{
"epoch": 0.6226012793176973,
"grad_norm": 0.14474520036548985,
"learning_rate": 3.993563582309759e-05,
"loss": 1.1073493957519531,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25966954231262207,
"step": 146,
"valid_targets_mean": 15278.3,
"valid_targets_min": 4366
},
{
"epoch": 0.6268656716417911,
"grad_norm": 0.18882046907257752,
"learning_rate": 3.993078261811186e-05,
"loss": 1.1332874298095703,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28338098526000977,
"step": 147,
"valid_targets_mean": 15488.8,
"valid_targets_min": 1314
},
{
"epoch": 0.6311300639658849,
"grad_norm": 0.1673714307844799,
"learning_rate": 3.9925753347813e-05,
"loss": 1.179833173751831,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29067516326904297,
"step": 148,
"valid_targets_mean": 14679.1,
"valid_targets_min": 2834
},
{
"epoch": 0.6353944562899787,
"grad_norm": 0.180023287639483,
"learning_rate": 3.992054805662876e-05,
"loss": 1.1655330657958984,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2942131757736206,
"step": 149,
"valid_targets_mean": 14476.6,
"valid_targets_min": 2432
},
{
"epoch": 0.6396588486140725,
"grad_norm": 0.15842311678929336,
"learning_rate": 3.991516679054185e-05,
"loss": 1.1212868690490723,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2804386615753174,
"step": 150,
"valid_targets_mean": 14696.7,
"valid_targets_min": 5334
},
{
"epoch": 0.6439232409381663,
"grad_norm": 0.13966841128795948,
"learning_rate": 3.9909609597089496e-05,
"loss": 1.1268808841705322,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27525758743286133,
"step": 151,
"valid_targets_mean": 14507.3,
"valid_targets_min": 4150
},
{
"epoch": 0.6481876332622601,
"grad_norm": 0.17531682600850934,
"learning_rate": 3.9903876525363055e-05,
"loss": 1.15196692943573,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.285283625125885,
"step": 152,
"valid_targets_mean": 15145.8,
"valid_targets_min": 7847
},
{
"epoch": 0.652452025586354,
"grad_norm": 0.15831262867201742,
"learning_rate": 3.989796762600755e-05,
"loss": 1.120331048965454,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26696890592575073,
"step": 153,
"valid_targets_mean": 14839.2,
"valid_targets_min": 5196
},
{
"epoch": 0.6567164179104478,
"grad_norm": 0.15356917733409717,
"learning_rate": 3.9891882951221246e-05,
"loss": 1.2072778940200806,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2713392376899719,
"step": 154,
"valid_targets_mean": 14136.1,
"valid_targets_min": 2060
},
{
"epoch": 0.6609808102345416,
"grad_norm": 0.15946919062196696,
"learning_rate": 3.988562255475518e-05,
"loss": 1.1008577346801758,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2804443836212158,
"step": 155,
"valid_targets_mean": 15568.1,
"valid_targets_min": 7018
},
{
"epoch": 0.6652452025586354,
"grad_norm": 0.1473945663169557,
"learning_rate": 3.987918649191268e-05,
"loss": 1.142858624458313,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28863316774368286,
"step": 156,
"valid_targets_mean": 14499.2,
"valid_targets_min": 6380
},
{
"epoch": 0.6695095948827292,
"grad_norm": 0.1464149921971081,
"learning_rate": 3.987257481954888e-05,
"loss": 1.1440634727478027,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3091897964477539,
"step": 157,
"valid_targets_mean": 15097.0,
"valid_targets_min": 5838
},
{
"epoch": 0.673773987206823,
"grad_norm": 0.1559438697550048,
"learning_rate": 3.9865787596070236e-05,
"loss": 1.1320838928222656,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28108370304107666,
"step": 158,
"valid_targets_mean": 14891.9,
"valid_targets_min": 5531
},
{
"epoch": 0.6780383795309168,
"grad_norm": 0.1463860512495006,
"learning_rate": 3.9858824881433975e-05,
"loss": 1.146373987197876,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26698851585388184,
"step": 159,
"valid_targets_mean": 14650.7,
"valid_targets_min": 5075
},
{
"epoch": 0.6823027718550106,
"grad_norm": 0.14800624166047252,
"learning_rate": 3.9851686737147585e-05,
"loss": 1.1788896322250366,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2629927694797516,
"step": 160,
"valid_targets_mean": 14034.7,
"valid_targets_min": 819
},
{
"epoch": 0.6865671641791045,
"grad_norm": 0.1484680361376596,
"learning_rate": 3.9844373226268305e-05,
"loss": 1.0999665260314941,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2718093991279602,
"step": 161,
"valid_targets_mean": 14443.7,
"valid_targets_min": 2480
},
{
"epoch": 0.6908315565031983,
"grad_norm": 0.1625380040504571,
"learning_rate": 3.983688441340249e-05,
"loss": 1.1586577892303467,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3006063997745514,
"step": 162,
"valid_targets_mean": 14396.2,
"valid_targets_min": 2776
},
{
"epoch": 0.6950959488272921,
"grad_norm": 0.15551941405753084,
"learning_rate": 3.98292203647051e-05,
"loss": 1.1422550678253174,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28483057022094727,
"step": 163,
"valid_targets_mean": 14615.3,
"valid_targets_min": 4080
},
{
"epoch": 0.6993603411513859,
"grad_norm": 0.1497683685070015,
"learning_rate": 3.982138114787912e-05,
"loss": 1.1422796249389648,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28290224075317383,
"step": 164,
"valid_targets_mean": 15051.8,
"valid_targets_min": 6250
},
{
"epoch": 0.7036247334754797,
"grad_norm": 0.12406388675079885,
"learning_rate": 3.98133668321749e-05,
"loss": 1.10989248752594,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2753506898880005,
"step": 165,
"valid_targets_mean": 14864.9,
"valid_targets_min": 3870
},
{
"epoch": 0.7078891257995735,
"grad_norm": 0.15908448688320512,
"learning_rate": 3.980517748838963e-05,
"loss": 1.2064687013626099,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28861990571022034,
"step": 166,
"valid_targets_mean": 14201.4,
"valid_targets_min": 1111
},
{
"epoch": 0.7121535181236673,
"grad_norm": 0.13241862947179323,
"learning_rate": 3.979681318886664e-05,
"loss": 1.1194779872894287,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28165143728256226,
"step": 167,
"valid_targets_mean": 14527.2,
"valid_targets_min": 2850
},
{
"epoch": 0.7164179104477612,
"grad_norm": 0.1526864709951152,
"learning_rate": 3.978827400749481e-05,
"loss": 1.1441411972045898,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2705109715461731,
"step": 168,
"valid_targets_mean": 14438.6,
"valid_targets_min": 2028
},
{
"epoch": 0.720682302771855,
"grad_norm": 0.13713852184898634,
"learning_rate": 3.977956001970788e-05,
"loss": 1.1992135047912598,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31088030338287354,
"step": 169,
"valid_targets_mean": 15543.1,
"valid_targets_min": 9188
},
{
"epoch": 0.7249466950959488,
"grad_norm": 0.14966552540143047,
"learning_rate": 3.977067130248381e-05,
"loss": 1.1361427307128906,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28439095616340637,
"step": 170,
"valid_targets_mean": 15418.8,
"valid_targets_min": 5937
},
{
"epoch": 0.7292110874200426,
"grad_norm": 0.13928076177764379,
"learning_rate": 3.9761607934344095e-05,
"loss": 1.078423023223877,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26448649168014526,
"step": 171,
"valid_targets_mean": 14226.8,
"valid_targets_min": 3232
},
{
"epoch": 0.7334754797441365,
"grad_norm": 0.13495845491612382,
"learning_rate": 3.975236999535306e-05,
"loss": 1.09339439868927,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26359957456588745,
"step": 172,
"valid_targets_mean": 14790.3,
"valid_targets_min": 3333
},
{
"epoch": 0.7377398720682303,
"grad_norm": 0.12058657267096401,
"learning_rate": 3.974295756711717e-05,
"loss": 1.138909101486206,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2752223014831543,
"step": 173,
"valid_targets_mean": 14813.6,
"valid_targets_min": 2999
},
{
"epoch": 0.7420042643923241,
"grad_norm": 0.14248053847759223,
"learning_rate": 3.9733370732784296e-05,
"loss": 1.15055251121521,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29128724336624146,
"step": 174,
"valid_targets_mean": 14909.1,
"valid_targets_min": 2310
},
{
"epoch": 0.746268656716418,
"grad_norm": 0.15695848742650992,
"learning_rate": 3.972360957704298e-05,
"loss": 1.193650245666504,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26371991634368896,
"step": 175,
"valid_targets_mean": 14879.4,
"valid_targets_min": 1882
},
{
"epoch": 0.7505330490405118,
"grad_norm": 0.13939373132104746,
"learning_rate": 3.97136741861217e-05,
"loss": 1.107863187789917,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29141396284103394,
"step": 176,
"valid_targets_mean": 15880.4,
"valid_targets_min": 11548
},
{
"epoch": 0.7547974413646056,
"grad_norm": 0.143994364121272,
"learning_rate": 3.970356464778808e-05,
"loss": 1.1400837898254395,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2879534661769867,
"step": 177,
"valid_targets_mean": 15005.6,
"valid_targets_min": 3066
},
{
"epoch": 0.7590618336886994,
"grad_norm": 0.13660346643495577,
"learning_rate": 3.969328105134817e-05,
"loss": 1.123429298400879,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2834112346172333,
"step": 178,
"valid_targets_mean": 15478.3,
"valid_targets_min": 1579
},
{
"epoch": 0.7633262260127932,
"grad_norm": 0.13491222978566783,
"learning_rate": 3.9682823487645584e-05,
"loss": 1.1422916650772095,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2854680120944977,
"step": 179,
"valid_targets_mean": 14752.8,
"valid_targets_min": 5344
},
{
"epoch": 0.767590618336887,
"grad_norm": 0.1332118908758463,
"learning_rate": 3.9672192049060745e-05,
"loss": 1.1008455753326416,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26276886463165283,
"step": 180,
"valid_targets_mean": 14767.6,
"valid_targets_min": 4143
},
{
"epoch": 0.7718550106609808,
"grad_norm": 0.1373151893452771,
"learning_rate": 3.966138682951008e-05,
"loss": 1.1371111869812012,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.300091028213501,
"step": 181,
"valid_targets_mean": 15338.1,
"valid_targets_min": 3939
},
{
"epoch": 0.7761194029850746,
"grad_norm": 0.13795655344725627,
"learning_rate": 3.9650407924445147e-05,
"loss": 1.1715991497039795,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2965927720069885,
"step": 182,
"valid_targets_mean": 14912.2,
"valid_targets_min": 1474
},
{
"epoch": 0.7803837953091685,
"grad_norm": 0.13539932831986434,
"learning_rate": 3.963925543085181e-05,
"loss": 1.130144476890564,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2717178463935852,
"step": 183,
"valid_targets_mean": 14541.8,
"valid_targets_min": 4293
},
{
"epoch": 0.7846481876332623,
"grad_norm": 0.1283206245736198,
"learning_rate": 3.96279294472494e-05,
"loss": 1.054412841796875,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2640993595123291,
"step": 184,
"valid_targets_mean": 14504.5,
"valid_targets_min": 1357
},
{
"epoch": 0.7889125799573561,
"grad_norm": 0.12619880431755937,
"learning_rate": 3.961643007368984e-05,
"loss": 1.1118097305297852,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28860723972320557,
"step": 185,
"valid_targets_mean": 15696.8,
"valid_targets_min": 12143
},
{
"epoch": 0.7931769722814499,
"grad_norm": 0.14664943656055512,
"learning_rate": 3.960475741175671e-05,
"loss": 1.1636567115783691,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2976000905036926,
"step": 186,
"valid_targets_mean": 14328.2,
"valid_targets_min": 1235
},
{
"epoch": 0.7974413646055437,
"grad_norm": 0.13587138316424627,
"learning_rate": 3.959291156456444e-05,
"loss": 1.1082062721252441,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2804204821586609,
"step": 187,
"valid_targets_mean": 14589.9,
"valid_targets_min": 1383
},
{
"epoch": 0.8017057569296375,
"grad_norm": 0.11576610260324276,
"learning_rate": 3.9580892636757334e-05,
"loss": 1.0934619903564453,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2750215530395508,
"step": 188,
"valid_targets_mean": 14877.4,
"valid_targets_min": 1719
},
{
"epoch": 0.8059701492537313,
"grad_norm": 0.15045642773107953,
"learning_rate": 3.9568700734508645e-05,
"loss": 1.1125788688659668,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27089452743530273,
"step": 189,
"valid_targets_mean": 14498.4,
"valid_targets_min": 1974
},
{
"epoch": 0.8102345415778252,
"grad_norm": 0.122886924452415,
"learning_rate": 3.955633596551967e-05,
"loss": 1.151255488395691,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3037102520465851,
"step": 190,
"valid_targets_mean": 14792.8,
"valid_targets_min": 2469
},
{
"epoch": 0.814498933901919,
"grad_norm": 0.14899832983861802,
"learning_rate": 3.9543798439018776e-05,
"loss": 1.126554250717163,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.278128445148468,
"step": 191,
"valid_targets_mean": 14862.5,
"valid_targets_min": 2464
},
{
"epoch": 0.8187633262260128,
"grad_norm": 0.13132235585574215,
"learning_rate": 3.953108826576046e-05,
"loss": 1.0974340438842773,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3035191595554352,
"step": 192,
"valid_targets_mean": 15164.5,
"valid_targets_min": 4317
},
{
"epoch": 0.8230277185501066,
"grad_norm": 0.130521931104875,
"learning_rate": 3.9518205558024334e-05,
"loss": 1.1168773174285889,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2616330683231354,
"step": 193,
"valid_targets_mean": 14815.7,
"valid_targets_min": 2603
},
{
"epoch": 0.8272921108742004,
"grad_norm": 0.1341247926241614,
"learning_rate": 3.9505150429614154e-05,
"loss": 1.1154228448867798,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2859729528427124,
"step": 194,
"valid_targets_mean": 14534.9,
"valid_targets_min": 4658
},
{
"epoch": 0.8315565031982942,
"grad_norm": 0.12691633298520677,
"learning_rate": 3.949192299585681e-05,
"loss": 1.1170120239257812,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25178277492523193,
"step": 195,
"valid_targets_mean": 14242.1,
"valid_targets_min": 3649
},
{
"epoch": 0.835820895522388,
"grad_norm": 0.12846012014419028,
"learning_rate": 3.9478523373601325e-05,
"loss": 1.112591028213501,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2866249084472656,
"step": 196,
"valid_targets_mean": 15200.2,
"valid_targets_min": 6631
},
{
"epoch": 0.8400852878464818,
"grad_norm": 0.13725011956395136,
"learning_rate": 3.946495168121778e-05,
"loss": 1.106331467628479,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29948368668556213,
"step": 197,
"valid_targets_mean": 14759.3,
"valid_targets_min": 4909
},
{
"epoch": 0.8443496801705757,
"grad_norm": 0.13034881754079844,
"learning_rate": 3.9451208038596325e-05,
"loss": 1.0999813079833984,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26882147789001465,
"step": 198,
"valid_targets_mean": 14875.5,
"valid_targets_min": 4081
},
{
"epoch": 0.8486140724946695,
"grad_norm": 0.1225494722450067,
"learning_rate": 3.943729256714608e-05,
"loss": 1.1127738952636719,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2914237380027771,
"step": 199,
"valid_targets_mean": 15551.2,
"valid_targets_min": 10433
},
{
"epoch": 0.8528784648187633,
"grad_norm": 0.1300877292503286,
"learning_rate": 3.942320538979408e-05,
"loss": 1.103322148323059,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2825484573841095,
"step": 200,
"valid_targets_mean": 15506.2,
"valid_targets_min": 7338
},
{
"epoch": 0.8571428571428571,
"grad_norm": 0.14098802712240638,
"learning_rate": 3.9408946630984144e-05,
"loss": 1.0541939735412598,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2697862982749939,
"step": 201,
"valid_targets_mean": 15275.3,
"valid_targets_min": 6483
},
{
"epoch": 0.8614072494669509,
"grad_norm": 0.14224101556395885,
"learning_rate": 3.939451641667587e-05,
"loss": 1.0901778936386108,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2783012390136719,
"step": 202,
"valid_targets_mean": 15515.6,
"valid_targets_min": 5670
},
{
"epoch": 0.8656716417910447,
"grad_norm": 0.16125617750462215,
"learning_rate": 3.937991487434342e-05,
"loss": 1.105463981628418,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2722461521625519,
"step": 203,
"valid_targets_mean": 14926.7,
"valid_targets_min": 6670
},
{
"epoch": 0.8699360341151386,
"grad_norm": 0.13209796282365102,
"learning_rate": 3.9365142132974484e-05,
"loss": 1.1652926206588745,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3184700608253479,
"step": 204,
"valid_targets_mean": 15101.2,
"valid_targets_min": 4098
},
{
"epoch": 0.8742004264392325,
"grad_norm": 0.13427219796772571,
"learning_rate": 3.935019832306905e-05,
"loss": 1.1196999549865723,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2864650785923004,
"step": 205,
"valid_targets_mean": 14890.8,
"valid_targets_min": 5785
},
{
"epoch": 0.8784648187633263,
"grad_norm": 0.12296597238206593,
"learning_rate": 3.933508357663832e-05,
"loss": 1.1197096109390259,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28627169132232666,
"step": 206,
"valid_targets_mean": 14440.3,
"valid_targets_min": 4342
},
{
"epoch": 0.8827292110874201,
"grad_norm": 0.15038922539621744,
"learning_rate": 3.9319798027203544e-05,
"loss": 1.0915428400039673,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28127023577690125,
"step": 207,
"valid_targets_mean": 14453.1,
"valid_targets_min": 583
},
{
"epoch": 0.8869936034115139,
"grad_norm": 0.148957816472187,
"learning_rate": 3.930434180979478e-05,
"loss": 1.110160231590271,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2755948007106781,
"step": 208,
"valid_targets_mean": 14177.1,
"valid_targets_min": 2004
},
{
"epoch": 0.8912579957356077,
"grad_norm": 0.1506910772213843,
"learning_rate": 3.928871506094975e-05,
"loss": 1.1149358749389648,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29100799560546875,
"step": 209,
"valid_targets_mean": 15418.4,
"valid_targets_min": 12817
},
{
"epoch": 0.8955223880597015,
"grad_norm": 0.1484641438960075,
"learning_rate": 3.927291791871264e-05,
"loss": 1.126612901687622,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26460331678390503,
"step": 210,
"valid_targets_mean": 14732.7,
"valid_targets_min": 3427
},
{
"epoch": 0.8997867803837953,
"grad_norm": 0.12901650114907645,
"learning_rate": 3.925695052263284e-05,
"loss": 1.1095227003097534,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26923397183418274,
"step": 211,
"valid_targets_mean": 14885.7,
"valid_targets_min": 2366
},
{
"epoch": 0.9040511727078892,
"grad_norm": 0.13562134156105302,
"learning_rate": 3.924081301376375e-05,
"loss": 1.0953495502471924,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2668260931968689,
"step": 212,
"valid_targets_mean": 14759.1,
"valid_targets_min": 1783
},
{
"epoch": 0.908315565031983,
"grad_norm": 0.14446206741417195,
"learning_rate": 3.9224505534661525e-05,
"loss": 1.1020824909210205,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25572067499160767,
"step": 213,
"valid_targets_mean": 14785.4,
"valid_targets_min": 4867
},
{
"epoch": 0.9125799573560768,
"grad_norm": 0.12584964455269773,
"learning_rate": 3.92080282293838e-05,
"loss": 1.1246337890625,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29202979803085327,
"step": 214,
"valid_targets_mean": 15084.0,
"valid_targets_min": 1758
},
{
"epoch": 0.9168443496801706,
"grad_norm": 0.14600926284692883,
"learning_rate": 3.9191381243488417e-05,
"loss": 1.0968735218048096,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28475409746170044,
"step": 215,
"valid_targets_mean": 14981.6,
"valid_targets_min": 3745
},
{
"epoch": 0.9211087420042644,
"grad_norm": 0.13297033649676046,
"learning_rate": 3.9174564724032167e-05,
"loss": 1.118091106414795,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25068044662475586,
"step": 216,
"valid_targets_mean": 14151.1,
"valid_targets_min": 950
},
{
"epoch": 0.9253731343283582,
"grad_norm": 0.13574354964287708,
"learning_rate": 3.9157578819569455e-05,
"loss": 1.0985863208770752,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2713858187198639,
"step": 217,
"valid_targets_mean": 14968.6,
"valid_targets_min": 5942
},
{
"epoch": 0.929637526652452,
"grad_norm": 0.1304172223665982,
"learning_rate": 3.9140423680151036e-05,
"loss": 1.094936490058899,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2801072895526886,
"step": 218,
"valid_targets_mean": 14687.3,
"valid_targets_min": 2303
},
{
"epoch": 0.9339019189765458,
"grad_norm": 0.14586104833661614,
"learning_rate": 3.9123099457322625e-05,
"loss": 1.1430864334106445,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29844510555267334,
"step": 219,
"valid_targets_mean": 15042.7,
"valid_targets_min": 2496
},
{
"epoch": 0.9381663113006397,
"grad_norm": 0.11896042929231765,
"learning_rate": 3.9105606304123605e-05,
"loss": 1.1179921627044678,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26598095893859863,
"step": 220,
"valid_targets_mean": 15458.4,
"valid_targets_min": 1422
},
{
"epoch": 0.9424307036247335,
"grad_norm": 0.13542883335058106,
"learning_rate": 3.908794437508567e-05,
"loss": 1.113782525062561,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27418214082717896,
"step": 221,
"valid_targets_mean": 14993.8,
"valid_targets_min": 8475
},
{
"epoch": 0.9466950959488273,
"grad_norm": 0.12098801862055243,
"learning_rate": 3.907011382623145e-05,
"loss": 1.1109488010406494,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27835947275161743,
"step": 222,
"valid_targets_mean": 14980.2,
"valid_targets_min": 2477
},
{
"epoch": 0.9509594882729211,
"grad_norm": 0.1446411585075237,
"learning_rate": 3.905211481507313e-05,
"loss": 1.1192302703857422,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29389917850494385,
"step": 223,
"valid_targets_mean": 14959.2,
"valid_targets_min": 5807
},
{
"epoch": 0.9552238805970149,
"grad_norm": 0.13006340729995194,
"learning_rate": 3.903394750061106e-05,
"loss": 1.1109366416931152,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.283753365278244,
"step": 224,
"valid_targets_mean": 14750.3,
"valid_targets_min": 3084
},
{
"epoch": 0.9594882729211087,
"grad_norm": 0.12444018866956234,
"learning_rate": 3.9015612043332375e-05,
"loss": 1.0865435600280762,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2794000506401062,
"step": 225,
"valid_targets_mean": 15057.6,
"valid_targets_min": 1988
},
{
"epoch": 0.9637526652452025,
"grad_norm": 0.13100673247005967,
"learning_rate": 3.8997108605209535e-05,
"loss": 1.091223955154419,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2665339708328247,
"step": 226,
"valid_targets_mean": 14488.4,
"valid_targets_min": 5745
},
{
"epoch": 0.9680170575692963,
"grad_norm": 0.14432664762257136,
"learning_rate": 3.897843734969891e-05,
"loss": 1.0613495111465454,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25530362129211426,
"step": 227,
"valid_targets_mean": 15354.0,
"valid_targets_min": 9319
},
{
"epoch": 0.9722814498933902,
"grad_norm": 0.1295131341857359,
"learning_rate": 3.895959844173937e-05,
"loss": 1.1064207553863525,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2939533293247223,
"step": 228,
"valid_targets_mean": 14773.6,
"valid_targets_min": 5422
},
{
"epoch": 0.976545842217484,
"grad_norm": 0.1266835881218933,
"learning_rate": 3.8940592047750774e-05,
"loss": 1.1071289777755737,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25003114342689514,
"step": 229,
"valid_targets_mean": 14048.9,
"valid_targets_min": 4287
},
{
"epoch": 0.9808102345415778,
"grad_norm": 0.1334513405570654,
"learning_rate": 3.892141833563255e-05,
"loss": 1.1168997287750244,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2899852991104126,
"step": 230,
"valid_targets_mean": 15266.1,
"valid_targets_min": 7873
},
{
"epoch": 0.9850746268656716,
"grad_norm": 0.12948007424703104,
"learning_rate": 3.8902077474762155e-05,
"loss": 1.0858094692230225,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2779843211174011,
"step": 231,
"valid_targets_mean": 14952.2,
"valid_targets_min": 2521
},
{
"epoch": 0.9893390191897654,
"grad_norm": 0.12821152626259633,
"learning_rate": 3.888256963599364e-05,
"loss": 1.100785493850708,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2768602967262268,
"step": 232,
"valid_targets_mean": 14126.9,
"valid_targets_min": 2079
},
{
"epoch": 0.9936034115138592,
"grad_norm": 0.1388168756240748,
"learning_rate": 3.886289499165609e-05,
"loss": 1.0943260192871094,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30819663405418396,
"step": 233,
"valid_targets_mean": 15548.9,
"valid_targets_min": 6662
},
{
"epoch": 0.997867803837953,
"grad_norm": 0.13013553730526728,
"learning_rate": 3.884305371555215e-05,
"loss": 1.1210851669311523,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2844943404197693,
"step": 234,
"valid_targets_mean": 15039.9,
"valid_targets_min": 7550
},
{
"epoch": 1.0,
"grad_norm": 0.15419807521284662,
"learning_rate": 3.882304598295643e-05,
"loss": 1.1342179775238037,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5554423332214355,
"step": 235,
"valid_targets_mean": 14957.4,
"valid_targets_min": 4497
},
{
"epoch": 1.004264392324094,
"grad_norm": 0.14832594302128513,
"learning_rate": 3.880287197061402e-05,
"loss": 1.1174304485321045,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2663096785545349,
"step": 236,
"valid_targets_mean": 14268.1,
"valid_targets_min": 1579
},
{
"epoch": 1.0085287846481876,
"grad_norm": 0.11880863072507944,
"learning_rate": 3.878253185673888e-05,
"loss": 1.0903974771499634,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27352577447891235,
"step": 237,
"valid_targets_mean": 15210.7,
"valid_targets_min": 5232
},
{
"epoch": 1.0127931769722816,
"grad_norm": 0.14804758953053165,
"learning_rate": 3.876202582101229e-05,
"loss": 1.1036317348480225,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2706872224807739,
"step": 238,
"valid_targets_mean": 14478.7,
"valid_targets_min": 3156
},
{
"epoch": 1.0170575692963753,
"grad_norm": 0.13086327011528612,
"learning_rate": 3.874135404458125e-05,
"loss": 1.1062583923339844,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2735649049282074,
"step": 239,
"valid_targets_mean": 14224.4,
"valid_targets_min": 2394
},
{
"epoch": 1.0213219616204692,
"grad_norm": 0.13332088768631248,
"learning_rate": 3.8720516710056905e-05,
"loss": 1.1267770528793335,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2770707905292511,
"step": 240,
"valid_targets_mean": 15087.3,
"valid_targets_min": 2365
},
{
"epoch": 1.0255863539445629,
"grad_norm": 0.12428623621161497,
"learning_rate": 3.8699514001512885e-05,
"loss": 1.098931908607483,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27049383521080017,
"step": 241,
"valid_targets_mean": 15256.3,
"valid_targets_min": 2378
},
{
"epoch": 1.0298507462686568,
"grad_norm": 0.1439726319791604,
"learning_rate": 3.867834610448374e-05,
"loss": 1.0865485668182373,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2768022418022156,
"step": 242,
"valid_targets_mean": 14890.1,
"valid_targets_min": 1821
},
{
"epoch": 1.0341151385927505,
"grad_norm": 0.11278081360532456,
"learning_rate": 3.865701320596324e-05,
"loss": 1.0894007682800293,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24118563532829285,
"step": 243,
"valid_targets_mean": 13464.3,
"valid_targets_min": 2748
},
{
"epoch": 1.0383795309168444,
"grad_norm": 0.1449049037554444,
"learning_rate": 3.863551549440277e-05,
"loss": 1.0954735279083252,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24541440606117249,
"step": 244,
"valid_targets_mean": 14150.5,
"valid_targets_min": 2252
},
{
"epoch": 1.0426439232409381,
"grad_norm": 0.13169050587302447,
"learning_rate": 3.861385315970964e-05,
"loss": 1.067154884338379,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26327186822891235,
"step": 245,
"valid_targets_mean": 14046.7,
"valid_targets_min": 2460
},
{
"epoch": 1.046908315565032,
"grad_norm": 0.13958573511012878,
"learning_rate": 3.859202639324542e-05,
"loss": 1.0889699459075928,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2618789076805115,
"step": 246,
"valid_targets_mean": 13850.7,
"valid_targets_min": 2743
},
{
"epoch": 1.0511727078891258,
"grad_norm": 0.13766781074458856,
"learning_rate": 3.8570035387824214e-05,
"loss": 1.0873351097106934,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2589748799800873,
"step": 247,
"valid_targets_mean": 14669.0,
"valid_targets_min": 1422
},
{
"epoch": 1.0554371002132197,
"grad_norm": 0.14171428522498714,
"learning_rate": 3.8547880337711036e-05,
"loss": 1.089716911315918,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25200155377388,
"step": 248,
"valid_targets_mean": 13806.2,
"valid_targets_min": 1686
},
{
"epoch": 1.0597014925373134,
"grad_norm": 0.14485898879525483,
"learning_rate": 3.8525561438620016e-05,
"loss": 1.1417646408081055,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31864017248153687,
"step": 249,
"valid_targets_mean": 15282.7,
"valid_targets_min": 1942
},
{
"epoch": 1.0639658848614073,
"grad_norm": 0.135343252461091,
"learning_rate": 3.850307888771269e-05,
"loss": 1.0761914253234863,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26799869537353516,
"step": 250,
"valid_targets_mean": 14998.7,
"valid_targets_min": 9633
},
{
"epoch": 1.068230277185501,
"grad_norm": 0.15849624526072872,
"learning_rate": 3.848043288359629e-05,
"loss": 1.08738374710083,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25412940979003906,
"step": 251,
"valid_targets_mean": 15102.2,
"valid_targets_min": 918
},
{
"epoch": 1.072494669509595,
"grad_norm": 0.1305684357841081,
"learning_rate": 3.8457623626321944e-05,
"loss": 1.0818676948547363,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2856181859970093,
"step": 252,
"valid_targets_mean": 15646.4,
"valid_targets_min": 10670
},
{
"epoch": 1.0767590618336886,
"grad_norm": 0.18617939840508396,
"learning_rate": 3.843465131738296e-05,
"loss": 1.1018372774124146,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29034340381622314,
"step": 253,
"valid_targets_mean": 15954.2,
"valid_targets_min": 12329
},
{
"epoch": 1.0810234541577826,
"grad_norm": 0.1508586557401326,
"learning_rate": 3.8411516159713e-05,
"loss": 1.0827605724334717,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27897909283638,
"step": 254,
"valid_targets_mean": 15004.8,
"valid_targets_min": 1469
},
{
"epoch": 1.0852878464818763,
"grad_norm": 0.14613227348168586,
"learning_rate": 3.838821835768431e-05,
"loss": 1.0547592639923096,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.259225070476532,
"step": 255,
"valid_targets_mean": 14690.4,
"valid_targets_min": 2857
},
{
"epoch": 1.0895522388059702,
"grad_norm": 0.16145844052520353,
"learning_rate": 3.83647581171059e-05,
"loss": 1.0921217203140259,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24286618828773499,
"step": 256,
"valid_targets_mean": 14299.9,
"valid_targets_min": 1769
},
{
"epoch": 1.0938166311300639,
"grad_norm": 0.12185397357763571,
"learning_rate": 3.8341135645221744e-05,
"loss": 1.075549602508545,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2710611820220947,
"step": 257,
"valid_targets_mean": 15126.9,
"valid_targets_min": 4921
},
{
"epoch": 1.0980810234541578,
"grad_norm": 0.17100699897849678,
"learning_rate": 3.831735115070895e-05,
"loss": 1.0602529048919678,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2660970091819763,
"step": 258,
"valid_targets_mean": 14821.5,
"valid_targets_min": 1540
},
{
"epoch": 1.1023454157782515,
"grad_norm": 0.10522306123354634,
"learning_rate": 3.8293404843675904e-05,
"loss": 1.075049877166748,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2423074096441269,
"step": 259,
"valid_targets_mean": 15089.9,
"valid_targets_min": 7013
},
{
"epoch": 1.1066098081023454,
"grad_norm": 0.17747972680014706,
"learning_rate": 3.8269296935660395e-05,
"loss": 1.078465461730957,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25448617339134216,
"step": 260,
"valid_targets_mean": 14341.2,
"valid_targets_min": 4757
},
{
"epoch": 1.1108742004264391,
"grad_norm": 0.1086132454278875,
"learning_rate": 3.82450276396278e-05,
"loss": 1.0865874290466309,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.271104633808136,
"step": 261,
"valid_targets_mean": 14731.7,
"valid_targets_min": 5798
},
{
"epoch": 1.115138592750533,
"grad_norm": 0.1433739056767183,
"learning_rate": 3.822059716996916e-05,
"loss": 1.0663318634033203,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2696720361709595,
"step": 262,
"valid_targets_mean": 14853.6,
"valid_targets_min": 3326
},
{
"epoch": 1.1194029850746268,
"grad_norm": 0.1299031072559473,
"learning_rate": 3.819600574249929e-05,
"loss": 1.0397834777832031,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2643989324569702,
"step": 263,
"valid_targets_mean": 15993.3,
"valid_targets_min": 11892
},
{
"epoch": 1.1236673773987207,
"grad_norm": 0.14090883541170693,
"learning_rate": 3.817125357445489e-05,
"loss": 1.108483076095581,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2813621163368225,
"step": 264,
"valid_targets_mean": 14684.8,
"valid_targets_min": 3168
},
{
"epoch": 1.1279317697228146,
"grad_norm": 0.12439320569250074,
"learning_rate": 3.814634088449261e-05,
"loss": 1.0688656568527222,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24935731291770935,
"step": 265,
"valid_targets_mean": 15106.6,
"valid_targets_min": 5531
},
{
"epoch": 1.1321961620469083,
"grad_norm": 0.12057175511413992,
"learning_rate": 3.812126789268712e-05,
"loss": 1.0883033275604248,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27489930391311646,
"step": 266,
"valid_targets_mean": 15746.8,
"valid_targets_min": 11441
},
{
"epoch": 1.136460554371002,
"grad_norm": 0.11827871544961882,
"learning_rate": 3.80960348205292e-05,
"loss": 1.132977843284607,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26096606254577637,
"step": 267,
"valid_targets_mean": 13970.7,
"valid_targets_min": 2079
},
{
"epoch": 1.140724946695096,
"grad_norm": 0.1464166498076275,
"learning_rate": 3.807064189092372e-05,
"loss": 1.1327526569366455,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2805986702442169,
"step": 268,
"valid_targets_mean": 14159.4,
"valid_targets_min": 3761
},
{
"epoch": 1.1449893390191899,
"grad_norm": 0.1269926664130488,
"learning_rate": 3.804508932818771e-05,
"loss": 1.0420353412628174,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.257866233587265,
"step": 269,
"valid_targets_mean": 15314.1,
"valid_targets_min": 8727
},
{
"epoch": 1.1492537313432836,
"grad_norm": 0.11830814377555596,
"learning_rate": 3.801937735804838e-05,
"loss": 1.0889430046081543,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26712143421173096,
"step": 270,
"valid_targets_mean": 14388.7,
"valid_targets_min": 3917
},
{
"epoch": 1.1535181236673775,
"grad_norm": 0.13757483749987642,
"learning_rate": 3.799350620764114e-05,
"loss": 1.056231141090393,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26202642917633057,
"step": 271,
"valid_targets_mean": 15144.8,
"valid_targets_min": 5375
},
{
"epoch": 1.1577825159914712,
"grad_norm": 0.12052227300706919,
"learning_rate": 3.7967476105507535e-05,
"loss": 1.052736759185791,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2620790898799896,
"step": 272,
"valid_targets_mean": 14506.4,
"valid_targets_min": 4050
},
{
"epoch": 1.1620469083155651,
"grad_norm": 0.1413035094399248,
"learning_rate": 3.7941287281593284e-05,
"loss": 1.063258409500122,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29303738474845886,
"step": 273,
"valid_targets_mean": 15518.4,
"valid_targets_min": 2433
},
{
"epoch": 1.1663113006396588,
"grad_norm": 0.12648462313373993,
"learning_rate": 3.7914939967246227e-05,
"loss": 1.0919417142868042,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26150012016296387,
"step": 274,
"valid_targets_mean": 14888.8,
"valid_targets_min": 5565
},
{
"epoch": 1.1705756929637527,
"grad_norm": 0.13614134627727864,
"learning_rate": 3.7888434395214285e-05,
"loss": 1.0795249938964844,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2505633234977722,
"step": 275,
"valid_targets_mean": 13772.5,
"valid_targets_min": 3323
},
{
"epoch": 1.1748400852878464,
"grad_norm": 0.1335296993591303,
"learning_rate": 3.786177079964339e-05,
"loss": 1.0871176719665527,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26828983426094055,
"step": 276,
"valid_targets_mean": 14900.1,
"valid_targets_min": 6668
},
{
"epoch": 1.1791044776119404,
"grad_norm": 0.11536802042295306,
"learning_rate": 3.783494941607544e-05,
"loss": 1.100544810295105,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2667681872844696,
"step": 277,
"valid_targets_mean": 14130.2,
"valid_targets_min": 1357
},
{
"epoch": 1.183368869936034,
"grad_norm": 0.13476544317466366,
"learning_rate": 3.780797048144621e-05,
"loss": 1.0960979461669922,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2654196321964264,
"step": 278,
"valid_targets_mean": 14693.0,
"valid_targets_min": 7570
},
{
"epoch": 1.187633262260128,
"grad_norm": 0.1206209090055457,
"learning_rate": 3.7780834234083236e-05,
"loss": 1.0933949947357178,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28174376487731934,
"step": 279,
"valid_targets_mean": 15056.8,
"valid_targets_min": 3329
},
{
"epoch": 1.1918976545842217,
"grad_norm": 0.1371892681600439,
"learning_rate": 3.775354091370376e-05,
"loss": 1.06509268283844,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26960083842277527,
"step": 280,
"valid_targets_mean": 15729.2,
"valid_targets_min": 10373
},
{
"epoch": 1.1961620469083156,
"grad_norm": 0.15002131655927678,
"learning_rate": 3.772609076141255e-05,
"loss": 1.0583593845367432,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2785288095474243,
"step": 281,
"valid_targets_mean": 15002.1,
"valid_targets_min": 6835
},
{
"epoch": 1.2004264392324093,
"grad_norm": 0.14303727644109057,
"learning_rate": 3.769848401969982e-05,
"loss": 1.084068775177002,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27171340584754944,
"step": 282,
"valid_targets_mean": 15454.7,
"valid_targets_min": 10409
},
{
"epoch": 1.2046908315565032,
"grad_norm": 0.18103626349953328,
"learning_rate": 3.767072093243907e-05,
"loss": 1.1330029964447021,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2785246968269348,
"step": 283,
"valid_targets_mean": 15136.9,
"valid_targets_min": 4653
},
{
"epoch": 1.208955223880597,
"grad_norm": 0.13298985920442377,
"learning_rate": 3.7642801744884915e-05,
"loss": 1.1546975374221802,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2804200351238251,
"step": 284,
"valid_targets_mean": 14707.0,
"valid_targets_min": 2075
},
{
"epoch": 1.2132196162046909,
"grad_norm": 0.11773615532294529,
"learning_rate": 3.761472670367096e-05,
"loss": 1.0487704277038574,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24607539176940918,
"step": 285,
"valid_targets_mean": 15109.0,
"valid_targets_min": 3679
},
{
"epoch": 1.2174840085287846,
"grad_norm": 0.15896186199143064,
"learning_rate": 3.758649605680758e-05,
"loss": 1.0542798042297363,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26041775941848755,
"step": 286,
"valid_targets_mean": 14734.3,
"valid_targets_min": 7371
},
{
"epoch": 1.2217484008528785,
"grad_norm": 0.11629818493972385,
"learning_rate": 3.755811005367974e-05,
"loss": 1.0769155025482178,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2768469750881195,
"step": 287,
"valid_targets_mean": 15378.3,
"valid_targets_min": 9901
},
{
"epoch": 1.2260127931769722,
"grad_norm": 0.15317536049975403,
"learning_rate": 3.752956894504481e-05,
"loss": 1.0881221294403076,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2508203685283661,
"step": 288,
"valid_targets_mean": 13490.2,
"valid_targets_min": 4864
},
{
"epoch": 1.2302771855010661,
"grad_norm": 0.12954766231178697,
"learning_rate": 3.750087298303033e-05,
"loss": 1.0730881690979004,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2726203203201294,
"step": 289,
"valid_targets_mean": 14460.5,
"valid_targets_min": 1257
},
{
"epoch": 1.2345415778251598,
"grad_norm": 0.12507778245593257,
"learning_rate": 3.7472022421131795e-05,
"loss": 1.0751492977142334,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2857722043991089,
"step": 290,
"valid_targets_mean": 15260.0,
"valid_targets_min": 7189
},
{
"epoch": 1.2388059701492538,
"grad_norm": 0.15587805506906033,
"learning_rate": 3.7443017514210406e-05,
"loss": 1.099067211151123,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2564665377140045,
"step": 291,
"valid_targets_mean": 14848.7,
"valid_targets_min": 4038
},
{
"epoch": 1.2430703624733475,
"grad_norm": 0.11756774331916273,
"learning_rate": 3.7413858518490825e-05,
"loss": 1.12516188621521,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28718727827072144,
"step": 292,
"valid_targets_mean": 14068.1,
"valid_targets_min": 3639
},
{
"epoch": 1.2473347547974414,
"grad_norm": 0.12283437611787308,
"learning_rate": 3.7384545691558895e-05,
"loss": 1.0807175636291504,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26683443784713745,
"step": 293,
"valid_targets_mean": 14908.2,
"valid_targets_min": 6526
},
{
"epoch": 1.251599147121535,
"grad_norm": 0.13281035030612356,
"learning_rate": 3.735507929235941e-05,
"loss": 1.0679543018341064,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28069236874580383,
"step": 294,
"valid_targets_mean": 14810.7,
"valid_targets_min": 3027
},
{
"epoch": 1.255863539445629,
"grad_norm": 0.14243050458611214,
"learning_rate": 3.732545958119378e-05,
"loss": 1.068037748336792,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2620224952697754,
"step": 295,
"valid_targets_mean": 15038.1,
"valid_targets_min": 6629
},
{
"epoch": 1.260127931769723,
"grad_norm": 0.12019922924167692,
"learning_rate": 3.729568681971774e-05,
"loss": 1.0990270376205444,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25651150941848755,
"step": 296,
"valid_targets_mean": 14217.9,
"valid_targets_min": 4531
},
{
"epoch": 1.2643923240938166,
"grad_norm": 0.1491423573676665,
"learning_rate": 3.726576127093905e-05,
"loss": 1.1150866746902466,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2706458568572998,
"step": 297,
"valid_targets_mean": 15299.6,
"valid_targets_min": 4706
},
{
"epoch": 1.2686567164179103,
"grad_norm": 0.10268285779318712,
"learning_rate": 3.7235683199215177e-05,
"loss": 1.0659804344177246,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2478877604007721,
"step": 298,
"valid_targets_mean": 13945.4,
"valid_targets_min": 3752
},
{
"epoch": 1.2729211087420043,
"grad_norm": 0.18644928909399677,
"learning_rate": 3.7205452870250944e-05,
"loss": 1.0774941444396973,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25649482011795044,
"step": 299,
"valid_targets_mean": 15283.8,
"valid_targets_min": 5485
},
{
"epoch": 1.2771855010660982,
"grad_norm": 0.10657869894944769,
"learning_rate": 3.7175070551096204e-05,
"loss": 1.0419113636016846,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2594824433326721,
"step": 300,
"valid_targets_mean": 15005.8,
"valid_targets_min": 5446
},
{
"epoch": 1.2814498933901919,
"grad_norm": 0.1570629764202123,
"learning_rate": 3.7144536510143436e-05,
"loss": 1.0955400466918945,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2824366092681885,
"step": 301,
"valid_targets_mean": 14508.9,
"valid_targets_min": 1732
},
{
"epoch": 1.2857142857142856,
"grad_norm": 0.12046527386393832,
"learning_rate": 3.711385101712544e-05,
"loss": 1.0840914249420166,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2630878686904907,
"step": 302,
"valid_targets_mean": 14425.5,
"valid_targets_min": 714
},
{
"epoch": 1.2899786780383795,
"grad_norm": 0.16778554640063564,
"learning_rate": 3.708301434311289e-05,
"loss": 1.0854768753051758,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22701036930084229,
"step": 303,
"valid_targets_mean": 13604.7,
"valid_targets_min": 1132
},
{
"epoch": 1.2942430703624734,
"grad_norm": 0.10936643631687112,
"learning_rate": 3.7052026760511996e-05,
"loss": 1.0484946966171265,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28306519985198975,
"step": 304,
"valid_targets_mean": 14979.1,
"valid_targets_min": 4469
},
{
"epoch": 1.2985074626865671,
"grad_norm": 0.14558359920557873,
"learning_rate": 3.7020888543062046e-05,
"loss": 1.0519603490829468,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.288472980260849,
"step": 305,
"valid_targets_mean": 15788.1,
"valid_targets_min": 7730
},
{
"epoch": 1.302771855010661,
"grad_norm": 0.11589645762962353,
"learning_rate": 3.6989599965833024e-05,
"loss": 1.1148842573165894,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30141183733940125,
"step": 306,
"valid_targets_mean": 15547.2,
"valid_targets_min": 7453
},
{
"epoch": 1.3070362473347548,
"grad_norm": 0.12471176158773442,
"learning_rate": 3.695816130522317e-05,
"loss": 1.111924648284912,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2694132924079895,
"step": 307,
"valid_targets_mean": 14122.2,
"valid_targets_min": 3615
},
{
"epoch": 1.3113006396588487,
"grad_norm": 0.11638807047738552,
"learning_rate": 3.692657283895651e-05,
"loss": 1.072528600692749,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28918564319610596,
"step": 308,
"valid_targets_mean": 14801.9,
"valid_targets_min": 2371
},
{
"epoch": 1.3155650319829424,
"grad_norm": 0.12111815195562205,
"learning_rate": 3.689483484608048e-05,
"loss": 1.1112918853759766,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26899421215057373,
"step": 309,
"valid_targets_mean": 14900.5,
"valid_targets_min": 1828
},
{
"epoch": 1.3198294243070363,
"grad_norm": 0.1245083633247677,
"learning_rate": 3.6862947606963364e-05,
"loss": 1.1408207416534424,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26744699478149414,
"step": 310,
"valid_targets_mean": 14643.7,
"valid_targets_min": 2167
},
{
"epoch": 1.32409381663113,
"grad_norm": 0.12496946153990728,
"learning_rate": 3.6830911403291885e-05,
"loss": 1.0849456787109375,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26361972093582153,
"step": 311,
"valid_targets_mean": 13660.8,
"valid_targets_min": 3902
},
{
"epoch": 1.328358208955224,
"grad_norm": 0.12954559428204301,
"learning_rate": 3.679872651806869e-05,
"loss": 1.1030840873718262,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2831391394138336,
"step": 312,
"valid_targets_mean": 14744.0,
"valid_targets_min": 3576
},
{
"epoch": 1.3326226012793176,
"grad_norm": 0.1312577209697291,
"learning_rate": 3.676639323560986e-05,
"loss": 1.0543076992034912,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2637162208557129,
"step": 313,
"valid_targets_mean": 14281.3,
"valid_targets_min": 2552
},
{
"epoch": 1.3368869936034116,
"grad_norm": 0.12824651697136208,
"learning_rate": 3.6733911841542365e-05,
"loss": 1.071939468383789,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25401657819747925,
"step": 314,
"valid_targets_mean": 14670.4,
"valid_targets_min": 5175
},
{
"epoch": 1.3411513859275053,
"grad_norm": 0.1446700029967673,
"learning_rate": 3.6701282622801626e-05,
"loss": 1.0685203075408936,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2770332098007202,
"step": 315,
"valid_targets_mean": 14907.9,
"valid_targets_min": 6623
},
{
"epoch": 1.3454157782515992,
"grad_norm": 0.12940739382910618,
"learning_rate": 3.666850586762886e-05,
"loss": 1.0746192932128906,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23956233263015747,
"step": 316,
"valid_targets_mean": 14811.0,
"valid_targets_min": 1628
},
{
"epoch": 1.349680170575693,
"grad_norm": 0.11413470379819687,
"learning_rate": 3.663558186556863e-05,
"loss": 1.071451187133789,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28026705980300903,
"step": 317,
"valid_targets_mean": 14632.1,
"valid_targets_min": 4280
},
{
"epoch": 1.3539445628997868,
"grad_norm": 0.12106572305012814,
"learning_rate": 3.660251090746627e-05,
"loss": 1.0448592901229858,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2789224088191986,
"step": 318,
"valid_targets_mean": 15106.6,
"valid_targets_min": 3644
},
{
"epoch": 1.3582089552238805,
"grad_norm": 0.10720335635524811,
"learning_rate": 3.656929328546526e-05,
"loss": 1.1154221296310425,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29188746213912964,
"step": 319,
"valid_targets_mean": 15495.8,
"valid_targets_min": 7900
},
{
"epoch": 1.3624733475479744,
"grad_norm": 0.1031363776494928,
"learning_rate": 3.653592929300471e-05,
"loss": 1.0886147022247314,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28728219866752625,
"step": 320,
"valid_targets_mean": 15197.1,
"valid_targets_min": 4028
},
{
"epoch": 1.3667377398720681,
"grad_norm": 0.09877354984739944,
"learning_rate": 3.650241922481675e-05,
"loss": 1.1005971431732178,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2516176700592041,
"step": 321,
"valid_targets_mean": 14793.1,
"valid_targets_min": 790
},
{
"epoch": 1.371002132196162,
"grad_norm": 0.12162139075657939,
"learning_rate": 3.6468763376923886e-05,
"loss": 1.0966756343841553,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29920968413352966,
"step": 322,
"valid_targets_mean": 15084.0,
"valid_targets_min": 9424
},
{
"epoch": 1.375266524520256,
"grad_norm": 0.11338523434311469,
"learning_rate": 3.6434962046636464e-05,
"loss": 1.0764126777648926,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26871368288993835,
"step": 323,
"valid_targets_mean": 14493.0,
"valid_targets_min": 6413
},
{
"epoch": 1.3795309168443497,
"grad_norm": 0.1021787802517544,
"learning_rate": 3.6401015532549957e-05,
"loss": 1.0411429405212402,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2578125,
"step": 324,
"valid_targets_mean": 14568.0,
"valid_targets_min": 3212
},
{
"epoch": 1.3837953091684434,
"grad_norm": 0.11266268201860755,
"learning_rate": 3.6366924134542386e-05,
"loss": 1.1386549472808838,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27760809659957886,
"step": 325,
"valid_targets_mean": 14527.2,
"valid_targets_min": 3462
},
{
"epoch": 1.3880597014925373,
"grad_norm": 0.1243854058332879,
"learning_rate": 3.633268815377166e-05,
"loss": 1.067612886428833,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2844510078430176,
"step": 326,
"valid_targets_mean": 15790.6,
"valid_targets_min": 12765
},
{
"epoch": 1.3923240938166312,
"grad_norm": 0.12457675042727309,
"learning_rate": 3.6298307892672895e-05,
"loss": 1.1009567975997925,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25199833512306213,
"step": 327,
"valid_targets_mean": 13961.3,
"valid_targets_min": 1199
},
{
"epoch": 1.396588486140725,
"grad_norm": 0.12405821883090433,
"learning_rate": 3.626378365495577e-05,
"loss": 1.0614802837371826,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26193326711654663,
"step": 328,
"valid_targets_mean": 14690.6,
"valid_targets_min": 3389
},
{
"epoch": 1.4008528784648187,
"grad_norm": 0.12792072364392368,
"learning_rate": 3.622911574560181e-05,
"loss": 1.0712215900421143,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3001205325126648,
"step": 329,
"valid_targets_mean": 15184.8,
"valid_targets_min": 2477
},
{
"epoch": 1.4051172707889126,
"grad_norm": 0.11802506426659638,
"learning_rate": 3.6194304470861744e-05,
"loss": 1.0776985883712769,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24896389245986938,
"step": 330,
"valid_targets_mean": 14307.6,
"valid_targets_min": 1576
},
{
"epoch": 1.4093816631130065,
"grad_norm": 0.14193456034629987,
"learning_rate": 3.615935013825272e-05,
"loss": 1.0828070640563965,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24934512376785278,
"step": 331,
"valid_targets_mean": 14097.0,
"valid_targets_min": 2289
},
{
"epoch": 1.4136460554371002,
"grad_norm": 0.11454156731125559,
"learning_rate": 3.612425305655569e-05,
"loss": 1.0726284980773926,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23363390564918518,
"step": 332,
"valid_targets_mean": 13896.2,
"valid_targets_min": 4213
},
{
"epoch": 1.417910447761194,
"grad_norm": 0.1119485811276927,
"learning_rate": 3.6089013535812593e-05,
"loss": 1.0638244152069092,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27090781927108765,
"step": 333,
"valid_targets_mean": 14418.3,
"valid_targets_min": 2850
},
{
"epoch": 1.4221748400852878,
"grad_norm": 0.15578617722434687,
"learning_rate": 3.6053631887323656e-05,
"loss": 1.0885016918182373,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2501731216907501,
"step": 334,
"valid_targets_mean": 13383.7,
"valid_targets_min": 1372
},
{
"epoch": 1.4264392324093818,
"grad_norm": 0.11178099610928291,
"learning_rate": 3.601810842364465e-05,
"loss": 1.0959135293960571,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2762734591960907,
"step": 335,
"valid_targets_mean": 14974.3,
"valid_targets_min": 2770
},
{
"epoch": 1.4307036247334755,
"grad_norm": 0.12735356011823407,
"learning_rate": 3.598244345858412e-05,
"loss": 1.0883452892303467,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2735271751880646,
"step": 336,
"valid_targets_mean": 14451.0,
"valid_targets_min": 3723
},
{
"epoch": 1.4349680170575694,
"grad_norm": 0.14595604051059685,
"learning_rate": 3.594663730720059e-05,
"loss": 1.0951387882232666,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2695275545120239,
"step": 337,
"valid_targets_mean": 14800.2,
"valid_targets_min": 5120
},
{
"epoch": 1.439232409381663,
"grad_norm": 0.12801498029917474,
"learning_rate": 3.591069028579982e-05,
"loss": 1.075453281402588,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26048654317855835,
"step": 338,
"valid_targets_mean": 14641.3,
"valid_targets_min": 2632
},
{
"epoch": 1.443496801705757,
"grad_norm": 0.13185458746968054,
"learning_rate": 3.5874602711931994e-05,
"loss": 1.0665796995162964,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24829693138599396,
"step": 339,
"valid_targets_mean": 14248.4,
"valid_targets_min": 1286
},
{
"epoch": 1.4477611940298507,
"grad_norm": 0.10891407912156989,
"learning_rate": 3.5838374904388904e-05,
"loss": 1.102658987045288,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27470141649246216,
"step": 340,
"valid_targets_mean": 14699.8,
"valid_targets_min": 2641
},
{
"epoch": 1.4520255863539446,
"grad_norm": 0.13165961307673427,
"learning_rate": 3.580200718320115e-05,
"loss": 1.0745452642440796,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2544945478439331,
"step": 341,
"valid_targets_mean": 14787.8,
"valid_targets_min": 4107
},
{
"epoch": 1.4562899786780383,
"grad_norm": 0.13748001969414841,
"learning_rate": 3.576549986963531e-05,
"loss": 1.1194934844970703,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2657977342605591,
"step": 342,
"valid_targets_mean": 14735.9,
"valid_targets_min": 2093
},
{
"epoch": 1.4605543710021323,
"grad_norm": 0.10477711935401506,
"learning_rate": 3.5728853286191075e-05,
"loss": 1.072913408279419,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24339430034160614,
"step": 343,
"valid_targets_mean": 15050.7,
"valid_targets_min": 6088
},
{
"epoch": 1.464818763326226,
"grad_norm": 0.13206563549600187,
"learning_rate": 3.5692067756598465e-05,
"loss": 1.1285758018493652,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2717724144458771,
"step": 344,
"valid_targets_mean": 14498.3,
"valid_targets_min": 1700
},
{
"epoch": 1.4690831556503199,
"grad_norm": 0.1035041729683242,
"learning_rate": 3.5655143605814885e-05,
"loss": 1.127623200416565,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3060557544231415,
"step": 345,
"valid_targets_mean": 15268.4,
"valid_targets_min": 5483
},
{
"epoch": 1.4733475479744136,
"grad_norm": 0.1287861596639523,
"learning_rate": 3.561808116002232e-05,
"loss": 1.0981791019439697,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27787846326828003,
"step": 346,
"valid_targets_mean": 14271.1,
"valid_targets_min": 4778
},
{
"epoch": 1.4776119402985075,
"grad_norm": 0.10400314698445817,
"learning_rate": 3.5580880746624444e-05,
"loss": 1.150883674621582,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29765594005584717,
"step": 347,
"valid_targets_mean": 14915.9,
"valid_targets_min": 3438
},
{
"epoch": 1.4818763326226012,
"grad_norm": 0.11796248919050434,
"learning_rate": 3.5543542694243685e-05,
"loss": 1.0655814409255981,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26165351271629333,
"step": 348,
"valid_targets_mean": 14939.1,
"valid_targets_min": 3812
},
{
"epoch": 1.4861407249466951,
"grad_norm": 0.12131810205592578,
"learning_rate": 3.5506067332718355e-05,
"loss": 1.0898313522338867,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2613683342933655,
"step": 349,
"valid_targets_mean": 15062.3,
"valid_targets_min": 7177
},
{
"epoch": 1.4904051172707888,
"grad_norm": 0.11277146846030922,
"learning_rate": 3.546845499309976e-05,
"loss": 1.074210524559021,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2859177887439728,
"step": 350,
"valid_targets_mean": 14775.0,
"valid_targets_min": 1162
},
{
"epoch": 1.4946695095948828,
"grad_norm": 0.1125743066693261,
"learning_rate": 3.5430706007649225e-05,
"loss": 1.057504653930664,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2800464630126953,
"step": 351,
"valid_targets_mean": 15699.2,
"valid_targets_min": 12993
},
{
"epoch": 1.4989339019189765,
"grad_norm": 0.10877437021463807,
"learning_rate": 3.539282070983518e-05,
"loss": 1.037824273109436,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2656164765357971,
"step": 352,
"valid_targets_mean": 15310.8,
"valid_targets_min": 8363
},
{
"epoch": 1.5031982942430704,
"grad_norm": 0.11534094943493284,
"learning_rate": 3.535479943433023e-05,
"loss": 1.1390454769134521,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2967601418495178,
"step": 353,
"valid_targets_mean": 15405.8,
"valid_targets_min": 7774
},
{
"epoch": 1.5074626865671643,
"grad_norm": 0.10781391911043343,
"learning_rate": 3.5316642517008184e-05,
"loss": 1.0576155185699463,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2640992999076843,
"step": 354,
"valid_targets_mean": 14781.1,
"valid_targets_min": 6106
},
{
"epoch": 1.511727078891258,
"grad_norm": 0.11597676190927564,
"learning_rate": 3.5278350294941074e-05,
"loss": 1.0749003887176514,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2610117197036743,
"step": 355,
"valid_targets_mean": 15589.6,
"valid_targets_min": 10997
},
{
"epoch": 1.5159914712153517,
"grad_norm": 0.10241416092293668,
"learning_rate": 3.523992310639622e-05,
"loss": 1.1029225587844849,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2503296434879303,
"step": 356,
"valid_targets_mean": 14615.8,
"valid_targets_min": 3881
},
{
"epoch": 1.5202558635394456,
"grad_norm": 0.10442030403521325,
"learning_rate": 3.5201361290833165e-05,
"loss": 1.0899959802627563,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2842419445514679,
"step": 357,
"valid_targets_mean": 15131.1,
"valid_targets_min": 6438
},
{
"epoch": 1.5245202558635396,
"grad_norm": 0.09987263919645031,
"learning_rate": 3.516266518890079e-05,
"loss": 1.0917596817016602,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29235658049583435,
"step": 358,
"valid_targets_mean": 15751.2,
"valid_targets_min": 8085
},
{
"epoch": 1.5287846481876333,
"grad_norm": 0.10144690514214857,
"learning_rate": 3.512383514243419e-05,
"loss": 1.0388712882995605,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2658079266548157,
"step": 359,
"valid_targets_mean": 15070.4,
"valid_targets_min": 5750
},
{
"epoch": 1.533049040511727,
"grad_norm": 0.10670074757602906,
"learning_rate": 3.5084871494451716e-05,
"loss": 1.0851870775222778,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2761267423629761,
"step": 360,
"valid_targets_mean": 14674.6,
"valid_targets_min": 4970
},
{
"epoch": 1.537313432835821,
"grad_norm": 0.13550624376630838,
"learning_rate": 3.5045774589151955e-05,
"loss": 1.1175588369369507,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2806294560432434,
"step": 361,
"valid_targets_mean": 14360.2,
"valid_targets_min": 5217
},
{
"epoch": 1.5415778251599148,
"grad_norm": 0.1160123616496653,
"learning_rate": 3.500654477191064e-05,
"loss": 1.0992441177368164,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2676744759082794,
"step": 362,
"valid_targets_mean": 15394.1,
"valid_targets_min": 9383
},
{
"epoch": 1.5458422174840085,
"grad_norm": 0.1379411380537933,
"learning_rate": 3.496718238927764e-05,
"loss": 1.0704309940338135,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28866302967071533,
"step": 363,
"valid_targets_mean": 14926.6,
"valid_targets_min": 5543
},
{
"epoch": 1.5501066098081022,
"grad_norm": 0.11459417748764718,
"learning_rate": 3.492768778897388e-05,
"loss": 1.0766518115997314,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26298147439956665,
"step": 364,
"valid_targets_mean": 14691.7,
"valid_targets_min": 5379
},
{
"epoch": 1.5543710021321961,
"grad_norm": 0.13962495731920585,
"learning_rate": 3.4888061319888276e-05,
"loss": 1.0948641300201416,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2818235754966736,
"step": 365,
"valid_targets_mean": 14954.8,
"valid_targets_min": 6380
},
{
"epoch": 1.55863539445629,
"grad_norm": 0.14024882925786547,
"learning_rate": 3.484830333207466e-05,
"loss": 1.05232834815979,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24685053527355194,
"step": 366,
"valid_targets_mean": 14426.7,
"valid_targets_min": 1946
},
{
"epoch": 1.5628997867803838,
"grad_norm": 0.11749558563639338,
"learning_rate": 3.4808414176748666e-05,
"loss": 1.0615160465240479,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26881107687950134,
"step": 367,
"valid_targets_mean": 14904.8,
"valid_targets_min": 3346
},
{
"epoch": 1.5671641791044775,
"grad_norm": 0.12682181363550105,
"learning_rate": 3.476839420628466e-05,
"loss": 1.1117736101150513,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2780311405658722,
"step": 368,
"valid_targets_mean": 15296.2,
"valid_targets_min": 4781
},
{
"epoch": 1.5714285714285714,
"grad_norm": 0.13037371369646736,
"learning_rate": 3.472824377421257e-05,
"loss": 1.0968823432922363,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.271740198135376,
"step": 369,
"valid_targets_mean": 14465.7,
"valid_targets_min": 2541
},
{
"epoch": 1.5756929637526653,
"grad_norm": 0.14933048671018942,
"learning_rate": 3.4687963235214845e-05,
"loss": 1.089181900024414,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28124457597732544,
"step": 370,
"valid_targets_mean": 14964.9,
"valid_targets_min": 6430
},
{
"epoch": 1.579957356076759,
"grad_norm": 0.10907412780232184,
"learning_rate": 3.464755294512325e-05,
"loss": 1.0588440895080566,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28056174516677856,
"step": 371,
"valid_targets_mean": 14473.8,
"valid_targets_min": 2297
},
{
"epoch": 1.5842217484008527,
"grad_norm": 0.14953753298333028,
"learning_rate": 3.4607013260915765e-05,
"loss": 1.1049847602844238,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2830548584461212,
"step": 372,
"valid_targets_mean": 15298.8,
"valid_targets_min": 3393
},
{
"epoch": 1.5884861407249466,
"grad_norm": 0.10873163112780194,
"learning_rate": 3.4566344540713404e-05,
"loss": 1.0358188152313232,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2706957459449768,
"step": 373,
"valid_targets_mean": 15668.7,
"valid_targets_min": 3920
},
{
"epoch": 1.5927505330490406,
"grad_norm": 0.11698205233230456,
"learning_rate": 3.452554714377706e-05,
"loss": 1.0651031732559204,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28714728355407715,
"step": 374,
"valid_targets_mean": 15681.1,
"valid_targets_min": 8328
},
{
"epoch": 1.5970149253731343,
"grad_norm": 0.11071017387878183,
"learning_rate": 3.448462143050436e-05,
"loss": 1.0621004104614258,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25595471262931824,
"step": 375,
"valid_targets_mean": 14501.1,
"valid_targets_min": 4215
},
{
"epoch": 1.6012793176972282,
"grad_norm": 0.14376534752385098,
"learning_rate": 3.4443567762426444e-05,
"loss": 1.0872830152511597,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2558348476886749,
"step": 376,
"valid_targets_mean": 14658.4,
"valid_targets_min": 2487
},
{
"epoch": 1.6055437100213221,
"grad_norm": 0.12196747305937251,
"learning_rate": 3.440238650220477e-05,
"loss": 1.0826265811920166,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2578622102737427,
"step": 377,
"valid_targets_mean": 14727.9,
"valid_targets_min": 2655
},
{
"epoch": 1.6098081023454158,
"grad_norm": 0.12896634830847412,
"learning_rate": 3.4361078013627945e-05,
"loss": 1.064319133758545,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26188498735427856,
"step": 378,
"valid_targets_mean": 15258.2,
"valid_targets_min": 6548
},
{
"epoch": 1.6140724946695095,
"grad_norm": 0.12319401063348305,
"learning_rate": 3.4319642661608474e-05,
"loss": 1.0894337892532349,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26469263434410095,
"step": 379,
"valid_targets_mean": 14385.2,
"valid_targets_min": 2132
},
{
"epoch": 1.6183368869936035,
"grad_norm": 0.11053513057906855,
"learning_rate": 3.427808081217957e-05,
"loss": 1.122054100036621,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28404873609542847,
"step": 380,
"valid_targets_mean": 15179.6,
"valid_targets_min": 2591
},
{
"epoch": 1.6226012793176974,
"grad_norm": 0.1454053882522242,
"learning_rate": 3.423639283249189e-05,
"loss": 1.044264554977417,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2711837887763977,
"step": 381,
"valid_targets_mean": 14525.2,
"valid_targets_min": 2255
},
{
"epoch": 1.626865671641791,
"grad_norm": 0.11551695691219174,
"learning_rate": 3.419457909081032e-05,
"loss": 1.0583226680755615,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2718050181865692,
"step": 382,
"valid_targets_mean": 14423.9,
"valid_targets_min": 2865
},
{
"epoch": 1.6311300639658848,
"grad_norm": 0.14466450550667942,
"learning_rate": 3.415263995651069e-05,
"loss": 1.0639991760253906,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2571932375431061,
"step": 383,
"valid_targets_mean": 14661.0,
"valid_targets_min": 3144
},
{
"epoch": 1.6353944562899787,
"grad_norm": 0.12475348154804808,
"learning_rate": 3.411057580007653e-05,
"loss": 1.0943620204925537,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27506619691848755,
"step": 384,
"valid_targets_mean": 14811.4,
"valid_targets_min": 3535
},
{
"epoch": 1.6396588486140726,
"grad_norm": 0.14302915071496577,
"learning_rate": 3.4068386993095806e-05,
"loss": 1.0778902769088745,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2766057848930359,
"step": 385,
"valid_targets_mean": 14840.3,
"valid_targets_min": 4287
},
{
"epoch": 1.6439232409381663,
"grad_norm": 0.1180188877301216,
"learning_rate": 3.402607390825762e-05,
"loss": 1.0909117460250854,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2802852392196655,
"step": 386,
"valid_targets_mean": 15010.8,
"valid_targets_min": 5771
},
{
"epoch": 1.64818763326226,
"grad_norm": 0.13089171211986675,
"learning_rate": 3.398363691934894e-05,
"loss": 1.0828299522399902,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25012773275375366,
"step": 387,
"valid_targets_mean": 13943.8,
"valid_targets_min": 2088
},
{
"epoch": 1.652452025586354,
"grad_norm": 0.12237534327201931,
"learning_rate": 3.3941076401251244e-05,
"loss": 1.041419506072998,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2453901618719101,
"step": 388,
"valid_targets_mean": 15080.3,
"valid_targets_min": 2731
},
{
"epoch": 1.6567164179104479,
"grad_norm": 0.13086318292233995,
"learning_rate": 3.3898392729937295e-05,
"loss": 1.0613362789154053,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2675091624259949,
"step": 389,
"valid_targets_mean": 15001.9,
"valid_targets_min": 5555
},
{
"epoch": 1.6609808102345416,
"grad_norm": 0.11304032288068386,
"learning_rate": 3.385558628246774e-05,
"loss": 1.076442003250122,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2726753354072571,
"step": 390,
"valid_targets_mean": 15597.4,
"valid_targets_min": 2622
},
{
"epoch": 1.6652452025586353,
"grad_norm": 0.1190897954944693,
"learning_rate": 3.381265743698781e-05,
"loss": 1.097648024559021,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28474539518356323,
"step": 391,
"valid_targets_mean": 14942.6,
"valid_targets_min": 6294
},
{
"epoch": 1.6695095948827292,
"grad_norm": 0.12775310359227787,
"learning_rate": 3.3769606572724e-05,
"loss": 1.094165563583374,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2992146909236908,
"step": 392,
"valid_targets_mean": 15014.3,
"valid_targets_min": 5741
},
{
"epoch": 1.6737739872068231,
"grad_norm": 0.14495496452762205,
"learning_rate": 3.3726434069980686e-05,
"loss": 1.067185878753662,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2758198380470276,
"step": 393,
"valid_targets_mean": 14591.5,
"valid_targets_min": 7497
},
{
"epoch": 1.6780383795309168,
"grad_norm": 0.12361810148821586,
"learning_rate": 3.368314031013678e-05,
"loss": 1.0652775764465332,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25097572803497314,
"step": 394,
"valid_targets_mean": 14844.7,
"valid_targets_min": 2469
},
{
"epoch": 1.6823027718550105,
"grad_norm": 0.12963110186975382,
"learning_rate": 3.363972567564236e-05,
"loss": 1.0334590673446655,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26233670115470886,
"step": 395,
"valid_targets_mean": 15396.5,
"valid_targets_min": 4076
},
{
"epoch": 1.6865671641791045,
"grad_norm": 0.12723586535144157,
"learning_rate": 3.35961905500153e-05,
"loss": 1.0554689168930054,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2530735731124878,
"step": 396,
"valid_targets_mean": 14052.4,
"valid_targets_min": 583
},
{
"epoch": 1.6908315565031984,
"grad_norm": 0.13385106046467465,
"learning_rate": 3.3552535317837855e-05,
"loss": 1.0330636501312256,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26997625827789307,
"step": 397,
"valid_targets_mean": 15229.6,
"valid_targets_min": 7310
},
{
"epoch": 1.695095948827292,
"grad_norm": 0.14161022441301999,
"learning_rate": 3.35087603647533e-05,
"loss": 1.0561280250549316,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27111226320266724,
"step": 398,
"valid_targets_mean": 14640.5,
"valid_targets_min": 5156
},
{
"epoch": 1.6993603411513858,
"grad_norm": 0.12540099350168007,
"learning_rate": 3.346486607746249e-05,
"loss": 1.0786890983581543,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2515331506729126,
"step": 399,
"valid_targets_mean": 14456.3,
"valid_targets_min": 2706
},
{
"epoch": 1.7036247334754797,
"grad_norm": 0.13225597797368216,
"learning_rate": 3.342085284372047e-05,
"loss": 1.0656988620758057,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2741518020629883,
"step": 400,
"valid_targets_mean": 14828.6,
"valid_targets_min": 7909
},
{
"epoch": 1.7078891257995736,
"grad_norm": 0.12420832917978644,
"learning_rate": 3.337672105233303e-05,
"loss": 1.0593976974487305,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28430697321891785,
"step": 401,
"valid_targets_mean": 15189.5,
"valid_targets_min": 8192
},
{
"epoch": 1.7121535181236673,
"grad_norm": 0.09414277111755848,
"learning_rate": 3.3332471093153296e-05,
"loss": 1.0656559467315674,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26005491614341736,
"step": 402,
"valid_targets_mean": 13691.6,
"valid_targets_min": 1692
},
{
"epoch": 1.716417910447761,
"grad_norm": 0.12643311373908572,
"learning_rate": 3.3288103357078244e-05,
"loss": 1.0774431228637695,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27346837520599365,
"step": 403,
"valid_targets_mean": 14881.9,
"valid_targets_min": 2540
},
{
"epoch": 1.720682302771855,
"grad_norm": 0.10056017571548824,
"learning_rate": 3.324361823604529e-05,
"loss": 1.0104659795761108,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24728821218013763,
"step": 404,
"valid_targets_mean": 14765.2,
"valid_targets_min": 5334
},
{
"epoch": 1.724946695095949,
"grad_norm": 0.12926179256069892,
"learning_rate": 3.319901612302881e-05,
"loss": 1.0784205198287964,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2719265818595886,
"step": 405,
"valid_targets_mean": 14719.6,
"valid_targets_min": 1640
},
{
"epoch": 1.7292110874200426,
"grad_norm": 0.13405661708287817,
"learning_rate": 3.315429741203666e-05,
"loss": 1.1021925210952759,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2841549217700958,
"step": 406,
"valid_targets_mean": 14608.9,
"valid_targets_min": 5331
},
{
"epoch": 1.7334754797441365,
"grad_norm": 0.15464756892728024,
"learning_rate": 3.3109462498106705e-05,
"loss": 1.0628423690795898,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2621581554412842,
"step": 407,
"valid_targets_mean": 14532.0,
"valid_targets_min": 2679
},
{
"epoch": 1.7377398720682304,
"grad_norm": 0.13342598132568445,
"learning_rate": 3.306451177730333e-05,
"loss": 1.083683729171753,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25192922353744507,
"step": 408,
"valid_targets_mean": 14070.4,
"valid_targets_min": 1668
},
{
"epoch": 1.7420042643923241,
"grad_norm": 0.1269433236572954,
"learning_rate": 3.301944564671394e-05,
"loss": 1.0548983812332153,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26055648922920227,
"step": 409,
"valid_targets_mean": 14576.8,
"valid_targets_min": 7139
},
{
"epoch": 1.7462686567164178,
"grad_norm": 0.14703547699476646,
"learning_rate": 3.297426450444546e-05,
"loss": 1.1159348487854004,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27076256275177,
"step": 410,
"valid_targets_mean": 14292.3,
"valid_targets_min": 1475
},
{
"epoch": 1.7505330490405118,
"grad_norm": 0.10197192181409305,
"learning_rate": 3.292896874962078e-05,
"loss": 1.0458035469055176,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25788283348083496,
"step": 411,
"valid_targets_mean": 15507.5,
"valid_targets_min": 2480
},
{
"epoch": 1.7547974413646057,
"grad_norm": 0.1489970784949312,
"learning_rate": 3.2883558782375294e-05,
"loss": 1.1026990413665771,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29226580262184143,
"step": 412,
"valid_targets_mean": 15263.1,
"valid_targets_min": 4783
},
{
"epoch": 1.7590618336886994,
"grad_norm": 0.11466976398835169,
"learning_rate": 3.283803500385332e-05,
"loss": 1.0824158191680908,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2684157192707062,
"step": 413,
"valid_targets_mean": 14599.2,
"valid_targets_min": 2408
},
{
"epoch": 1.763326226012793,
"grad_norm": 0.13130715112735317,
"learning_rate": 3.2792397816204546e-05,
"loss": 1.0827077627182007,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2868741452693939,
"step": 414,
"valid_targets_mean": 14412.4,
"valid_targets_min": 1068
},
{
"epoch": 1.767590618336887,
"grad_norm": 0.140547167284465,
"learning_rate": 3.2746647622580524e-05,
"loss": 1.0429410934448242,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24891036748886108,
"step": 415,
"valid_targets_mean": 15049.4,
"valid_targets_min": 2241
},
{
"epoch": 1.771855010660981,
"grad_norm": 0.12616450226001252,
"learning_rate": 3.270078482713106e-05,
"loss": 1.0499267578125,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27030113339424133,
"step": 416,
"valid_targets_mean": 14881.8,
"valid_targets_min": 7545
},
{
"epoch": 1.7761194029850746,
"grad_norm": 0.11139463648724142,
"learning_rate": 3.265480983500069e-05,
"loss": 1.0741684436798096,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2622406780719757,
"step": 417,
"valid_targets_mean": 14172.5,
"valid_targets_min": 1877
},
{
"epoch": 1.7803837953091683,
"grad_norm": 0.10357766938227037,
"learning_rate": 3.260872305232507e-05,
"loss": 1.0451233386993408,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25643688440322876,
"step": 418,
"valid_targets_mean": 14493.0,
"valid_targets_min": 1864
},
{
"epoch": 1.7846481876332623,
"grad_norm": 0.11323009649143977,
"learning_rate": 3.256252488622738e-05,
"loss": 1.058302879333496,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26543742418289185,
"step": 419,
"valid_targets_mean": 15440.0,
"valid_targets_min": 10672
},
{
"epoch": 1.7889125799573562,
"grad_norm": 0.09500605448492094,
"learning_rate": 3.251621574481475e-05,
"loss": 1.1008851528167725,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2919309139251709,
"step": 420,
"valid_targets_mean": 14934.4,
"valid_targets_min": 1411
},
{
"epoch": 1.79317697228145,
"grad_norm": 0.10026409809880978,
"learning_rate": 3.246979603717467e-05,
"loss": 1.0398553609848022,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24688240885734558,
"step": 421,
"valid_targets_mean": 15338.0,
"valid_targets_min": 6373
},
{
"epoch": 1.7974413646055436,
"grad_norm": 0.10637793824507388,
"learning_rate": 3.242326617337133e-05,
"loss": 1.0642235279083252,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25144168734550476,
"step": 422,
"valid_targets_mean": 14103.1,
"valid_targets_min": 2156
},
{
"epoch": 1.8017057569296375,
"grad_norm": 0.1112609047665103,
"learning_rate": 3.2376626564442016e-05,
"loss": 1.0859400033950806,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2681505084037781,
"step": 423,
"valid_targets_mean": 15312.2,
"valid_targets_min": 7312
},
{
"epoch": 1.8059701492537314,
"grad_norm": 0.09902903723295896,
"learning_rate": 3.2329877622393515e-05,
"loss": 1.0946044921875,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2805511951446533,
"step": 424,
"valid_targets_mean": 14936.2,
"valid_targets_min": 7976
},
{
"epoch": 1.8102345415778252,
"grad_norm": 0.11097833926698866,
"learning_rate": 3.228301976019841e-05,
"loss": 1.0197291374206543,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24550142884254456,
"step": 425,
"valid_targets_mean": 14873.5,
"valid_targets_min": 2002
},
{
"epoch": 1.8144989339019189,
"grad_norm": 0.11486538208500076,
"learning_rate": 3.22360533917915e-05,
"loss": 1.0315191745758057,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2519758939743042,
"step": 426,
"valid_targets_mean": 14839.6,
"valid_targets_min": 1648
},
{
"epoch": 1.8187633262260128,
"grad_norm": 0.10629637636047039,
"learning_rate": 3.218897893206608e-05,
"loss": 1.1044940948486328,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2893705368041992,
"step": 427,
"valid_targets_mean": 15546.2,
"valid_targets_min": 11310
},
{
"epoch": 1.8230277185501067,
"grad_norm": 0.10264882731900092,
"learning_rate": 3.2141796796870335e-05,
"loss": 1.0161839723587036,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2706839442253113,
"step": 428,
"valid_targets_mean": 15591.0,
"valid_targets_min": 5277
},
{
"epoch": 1.8272921108742004,
"grad_norm": 0.10817961844103864,
"learning_rate": 3.2094507403003614e-05,
"loss": 1.0440422296524048,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2556973993778229,
"step": 429,
"valid_targets_mean": 14453.8,
"valid_targets_min": 5891
},
{
"epoch": 1.831556503198294,
"grad_norm": 0.10738883155066847,
"learning_rate": 3.2047111168212785e-05,
"loss": 1.0380173921585083,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24675682187080383,
"step": 430,
"valid_targets_mean": 14939.2,
"valid_targets_min": 4208
},
{
"epoch": 1.835820895522388,
"grad_norm": 0.12514250497768345,
"learning_rate": 3.1999608511188524e-05,
"loss": 1.0663530826568604,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24724867939949036,
"step": 431,
"valid_targets_mean": 14757.4,
"valid_targets_min": 5356
},
{
"epoch": 1.840085287846482,
"grad_norm": 0.11608314296561673,
"learning_rate": 3.1951999851561625e-05,
"loss": 1.0794222354888916,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2597395181655884,
"step": 432,
"valid_targets_mean": 14086.0,
"valid_targets_min": 3318
},
{
"epoch": 1.8443496801705757,
"grad_norm": 0.1075628756787644,
"learning_rate": 3.190428560989931e-05,
"loss": 1.0682449340820312,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26385074853897095,
"step": 433,
"valid_targets_mean": 14313.8,
"valid_targets_min": 4171
},
{
"epoch": 1.8486140724946694,
"grad_norm": 0.13116117414410683,
"learning_rate": 3.185646620770146e-05,
"loss": 1.0830940008163452,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.273065447807312,
"step": 434,
"valid_targets_mean": 14730.8,
"valid_targets_min": 5238
},
{
"epoch": 1.8528784648187633,
"grad_norm": 0.11804865936613032,
"learning_rate": 3.180854206739696e-05,
"loss": 1.082724928855896,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2746577858924866,
"step": 435,
"valid_targets_mean": 14328.6,
"valid_targets_min": 2063
},
{
"epoch": 1.8571428571428572,
"grad_norm": 0.10949828846379563,
"learning_rate": 3.176051361233991e-05,
"loss": 1.0387179851531982,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2705637514591217,
"step": 436,
"valid_targets_mean": 15597.6,
"valid_targets_min": 10814
},
{
"epoch": 1.861407249466951,
"grad_norm": 0.11561094279879677,
"learning_rate": 3.171238126680594e-05,
"loss": 1.0813112258911133,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25433382391929626,
"step": 437,
"valid_targets_mean": 14030.3,
"valid_targets_min": 3190
},
{
"epoch": 1.8656716417910446,
"grad_norm": 0.12798901681396424,
"learning_rate": 3.166414545598839e-05,
"loss": 1.0915324687957764,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26344460248947144,
"step": 438,
"valid_targets_mean": 15152.7,
"valid_targets_min": 7099
},
{
"epoch": 1.8699360341151388,
"grad_norm": 0.1271214513570374,
"learning_rate": 3.161580660599464e-05,
"loss": 1.094179630279541,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2800491452217102,
"step": 439,
"valid_targets_mean": 15732.6,
"valid_targets_min": 10404
},
{
"epoch": 1.8742004264392325,
"grad_norm": 0.10646008681655,
"learning_rate": 3.1567365143842264e-05,
"loss": 1.0482121706008911,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2668571472167969,
"step": 440,
"valid_targets_mean": 15047.2,
"valid_targets_min": 6021
},
{
"epoch": 1.8784648187633262,
"grad_norm": 0.10433435989144153,
"learning_rate": 3.1518821497455326e-05,
"loss": 1.0509119033813477,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25341445207595825,
"step": 441,
"valid_targets_mean": 13925.6,
"valid_targets_min": 1433
},
{
"epoch": 1.88272921108742,
"grad_norm": 0.10174469704920425,
"learning_rate": 3.147017609566054e-05,
"loss": 1.0561842918395996,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2720150351524353,
"step": 442,
"valid_targets_mean": 15477.6,
"valid_targets_min": 8415
},
{
"epoch": 1.886993603411514,
"grad_norm": 0.11118964172110678,
"learning_rate": 3.142142936818353e-05,
"loss": 1.0753270387649536,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.276233971118927,
"step": 443,
"valid_targets_mean": 14627.8,
"valid_targets_min": 1613
},
{
"epoch": 1.8912579957356077,
"grad_norm": 0.11814802413618256,
"learning_rate": 3.137258174564501e-05,
"loss": 1.042363166809082,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25078055262565613,
"step": 444,
"valid_targets_mean": 14520.2,
"valid_targets_min": 2962
},
{
"epoch": 1.8955223880597014,
"grad_norm": 0.11466348515085173,
"learning_rate": 3.1323633659556986e-05,
"loss": 1.09202241897583,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27767413854599,
"step": 445,
"valid_targets_mean": 15421.9,
"valid_targets_min": 2208
},
{
"epoch": 1.8997867803837953,
"grad_norm": 0.10291753255190214,
"learning_rate": 3.127458554231894e-05,
"loss": 1.0517168045043945,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23889979720115662,
"step": 446,
"valid_targets_mean": 14309.4,
"valid_targets_min": 5753
},
{
"epoch": 1.9040511727078893,
"grad_norm": 0.12461980461067836,
"learning_rate": 3.122543782721402e-05,
"loss": 1.050453543663025,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2615568935871124,
"step": 447,
"valid_targets_mean": 13918.3,
"valid_targets_min": 2475
},
{
"epoch": 1.908315565031983,
"grad_norm": 0.09428324002827486,
"learning_rate": 3.1176190948405194e-05,
"loss": 1.104711651802063,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2562685012817383,
"step": 448,
"valid_targets_mean": 14324.2,
"valid_targets_min": 1440
},
{
"epoch": 1.9125799573560767,
"grad_norm": 0.16893958621958116,
"learning_rate": 3.112684534093142e-05,
"loss": 1.0504666566848755,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2534116506576538,
"step": 449,
"valid_targets_mean": 15158.3,
"valid_targets_min": 7789
},
{
"epoch": 1.9168443496801706,
"grad_norm": 0.09952080562448751,
"learning_rate": 3.107740144070385e-05,
"loss": 1.0780071020126343,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26799800992012024,
"step": 450,
"valid_targets_mean": 14461.8,
"valid_targets_min": 2774
},
{
"epoch": 1.9211087420042645,
"grad_norm": 0.12942635643583963,
"learning_rate": 3.102785968450188e-05,
"loss": 1.0668766498565674,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.270796000957489,
"step": 451,
"valid_targets_mean": 15381.7,
"valid_targets_min": 4575
},
{
"epoch": 1.9253731343283582,
"grad_norm": 0.10894682530418832,
"learning_rate": 3.09782205099694e-05,
"loss": 1.0592687129974365,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25496721267700195,
"step": 452,
"valid_targets_mean": 14738.8,
"valid_targets_min": 1851
},
{
"epoch": 1.929637526652452,
"grad_norm": 0.1314620891674012,
"learning_rate": 3.092848435561084e-05,
"loss": 1.0456840991973877,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2802231013774872,
"step": 453,
"valid_targets_mean": 14959.6,
"valid_targets_min": 5240
},
{
"epoch": 1.9339019189765458,
"grad_norm": 0.1408044774027632,
"learning_rate": 3.0878651660787376e-05,
"loss": 1.0969831943511963,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27857857942581177,
"step": 454,
"valid_targets_mean": 15442.2,
"valid_targets_min": 8079
},
{
"epoch": 1.9381663113006398,
"grad_norm": 0.12883710039602786,
"learning_rate": 3.082872286571295e-05,
"loss": 1.0829414129257202,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28005450963974,
"step": 455,
"valid_targets_mean": 15453.0,
"valid_targets_min": 2824
},
{
"epoch": 1.9424307036247335,
"grad_norm": 0.13891804038641842,
"learning_rate": 3.077869841145049e-05,
"loss": 1.0985287427902222,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2765417695045471,
"step": 456,
"valid_targets_mean": 15028.1,
"valid_targets_min": 6473
},
{
"epoch": 1.9466950959488272,
"grad_norm": 0.12140438513294068,
"learning_rate": 3.0728578739907934e-05,
"loss": 1.0545512437820435,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2601082921028137,
"step": 457,
"valid_targets_mean": 14672.4,
"valid_targets_min": 4414
},
{
"epoch": 1.950959488272921,
"grad_norm": 0.12549491266772111,
"learning_rate": 3.067836429383437e-05,
"loss": 1.0950112342834473,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.269310861825943,
"step": 458,
"valid_targets_mean": 14378.7,
"valid_targets_min": 2366
},
{
"epoch": 1.955223880597015,
"grad_norm": 0.14450297661194325,
"learning_rate": 3.062805551681609e-05,
"loss": 1.0445308685302734,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.275421679019928,
"step": 459,
"valid_targets_mean": 15370.7,
"valid_targets_min": 8461
},
{
"epoch": 1.9594882729211087,
"grad_norm": 0.1336770981792758,
"learning_rate": 3.057765285327271e-05,
"loss": 1.063051462173462,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2548394203186035,
"step": 460,
"valid_targets_mean": 15211.4,
"valid_targets_min": 6680
},
{
"epoch": 1.9637526652452024,
"grad_norm": 0.13802337184331176,
"learning_rate": 3.0527156748453214e-05,
"loss": 1.0926513671875,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27013474702835083,
"step": 461,
"valid_targets_mean": 15154.7,
"valid_targets_min": 4077
},
{
"epoch": 1.9680170575692963,
"grad_norm": 0.1292011752261259,
"learning_rate": 3.047656764843203e-05,
"loss": 1.020573377609253,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25721079111099243,
"step": 462,
"valid_targets_mean": 15334.2,
"valid_targets_min": 4963
},
{
"epoch": 1.9722814498933903,
"grad_norm": 0.12136773774009396,
"learning_rate": 3.0425886000105094e-05,
"loss": 1.068652868270874,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2549925446510315,
"step": 463,
"valid_targets_mean": 14423.4,
"valid_targets_min": 2083
},
{
"epoch": 1.976545842217484,
"grad_norm": 0.13903637021737197,
"learning_rate": 3.0375112251185892e-05,
"loss": 1.0581138134002686,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28126290440559387,
"step": 464,
"valid_targets_mean": 14793.0,
"valid_targets_min": 5216
},
{
"epoch": 1.9808102345415777,
"grad_norm": 0.11383420277981499,
"learning_rate": 3.0324246850201527e-05,
"loss": 1.0606683492660522,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26790544390678406,
"step": 465,
"valid_targets_mean": 14773.3,
"valid_targets_min": 997
},
{
"epoch": 1.9850746268656716,
"grad_norm": 0.11989728522400829,
"learning_rate": 3.0273290246488732e-05,
"loss": 1.0910248756408691,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27009302377700806,
"step": 466,
"valid_targets_mean": 14553.0,
"valid_targets_min": 3103
},
{
"epoch": 1.9893390191897655,
"grad_norm": 0.0960490749851528,
"learning_rate": 3.0222242890189904e-05,
"loss": 1.051931381225586,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26700809597969055,
"step": 467,
"valid_targets_mean": 15334.7,
"valid_targets_min": 8463
},
{
"epoch": 1.9936034115138592,
"grad_norm": 0.138459359183186,
"learning_rate": 3.017110523224914e-05,
"loss": 1.0271093845367432,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26433560252189636,
"step": 468,
"valid_targets_mean": 15226.9,
"valid_targets_min": 4884
},
{
"epoch": 1.997867803837953,
"grad_norm": 0.11985609012079511,
"learning_rate": 3.011987772440825e-05,
"loss": 1.0503500699996948,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2652145028114319,
"step": 469,
"valid_targets_mean": 14280.6,
"valid_targets_min": 1974
},
{
"epoch": 2.0,
"grad_norm": 0.13892504913177423,
"learning_rate": 3.006856081920277e-05,
"loss": 1.0645921230316162,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5336452126502991,
"step": 470,
"valid_targets_mean": 15389.5,
"valid_targets_min": 10681
},
{
"epoch": 2.0042643923240937,
"grad_norm": 0.14654076190383902,
"learning_rate": 3.001715496995793e-05,
"loss": 1.0488369464874268,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2612645626068115,
"step": 471,
"valid_targets_mean": 14465.4,
"valid_targets_min": 7416
},
{
"epoch": 2.008528784648188,
"grad_norm": 0.09765859901344034,
"learning_rate": 2.9965660630784715e-05,
"loss": 1.0690792798995972,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27206361293792725,
"step": 472,
"valid_targets_mean": 15750.6,
"valid_targets_min": 11215
},
{
"epoch": 2.0127931769722816,
"grad_norm": 0.12410263632364689,
"learning_rate": 2.9914078256575782e-05,
"loss": 1.0798900127410889,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.262641966342926,
"step": 473,
"valid_targets_mean": 15144.8,
"valid_targets_min": 6244
},
{
"epoch": 2.0170575692963753,
"grad_norm": 0.10765759057868828,
"learning_rate": 2.9862408303001492e-05,
"loss": 1.1243364810943604,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3022628724575043,
"step": 474,
"valid_targets_mean": 14726.7,
"valid_targets_min": 2342
},
{
"epoch": 2.021321961620469,
"grad_norm": 0.11877832476358277,
"learning_rate": 2.9810651226505875e-05,
"loss": 1.0872790813446045,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28186672925949097,
"step": 475,
"valid_targets_mean": 14472.4,
"valid_targets_min": 2395
},
{
"epoch": 2.025586353944563,
"grad_norm": 0.12106720736020389,
"learning_rate": 2.9758807484302566e-05,
"loss": 1.0768089294433594,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25935864448547363,
"step": 476,
"valid_targets_mean": 14108.2,
"valid_targets_min": 2124
},
{
"epoch": 2.029850746268657,
"grad_norm": 0.10492971526449288,
"learning_rate": 2.9706877534370822e-05,
"loss": 1.0892443656921387,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.264812707901001,
"step": 477,
"valid_targets_mean": 14513.5,
"valid_targets_min": 2124
},
{
"epoch": 2.0341151385927505,
"grad_norm": 0.10812163248652905,
"learning_rate": 2.965486183545142e-05,
"loss": 1.060435175895691,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2632817029953003,
"step": 478,
"valid_targets_mean": 14508.8,
"valid_targets_min": 3080
},
{
"epoch": 2.038379530916844,
"grad_norm": 0.12476950594114133,
"learning_rate": 2.9602760847042645e-05,
"loss": 1.0474357604980469,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2663882076740265,
"step": 479,
"valid_targets_mean": 15315.6,
"valid_targets_min": 5085
},
{
"epoch": 2.0426439232409384,
"grad_norm": 0.10331436161121431,
"learning_rate": 2.955057502939621e-05,
"loss": 1.0485198497772217,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24855202436447144,
"step": 480,
"valid_targets_mean": 13571.3,
"valid_targets_min": 1473
},
{
"epoch": 2.046908315565032,
"grad_norm": 0.12705868998848419,
"learning_rate": 2.9498304843513193e-05,
"loss": 1.0671385526657104,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.263543963432312,
"step": 481,
"valid_targets_mean": 14991.1,
"valid_targets_min": 2646
},
{
"epoch": 2.0511727078891258,
"grad_norm": 0.12583377208587335,
"learning_rate": 2.9445950751139957e-05,
"loss": 1.0316877365112305,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2590932846069336,
"step": 482,
"valid_targets_mean": 15205.2,
"valid_targets_min": 8022
},
{
"epoch": 2.0554371002132195,
"grad_norm": 0.1033382884036626,
"learning_rate": 2.939351321476412e-05,
"loss": 1.0455005168914795,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25957465171813965,
"step": 483,
"valid_targets_mean": 15534.1,
"valid_targets_min": 10502
},
{
"epoch": 2.0597014925373136,
"grad_norm": 0.11256282136959297,
"learning_rate": 2.9340992697610393e-05,
"loss": 1.0130512714385986,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24798721075057983,
"step": 484,
"valid_targets_mean": 14460.0,
"valid_targets_min": 2138
},
{
"epoch": 2.0639658848614073,
"grad_norm": 0.10108481622924992,
"learning_rate": 2.9288389663636537e-05,
"loss": 1.0002269744873047,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23295074701309204,
"step": 485,
"valid_targets_mean": 14413.2,
"valid_targets_min": 4603
},
{
"epoch": 2.068230277185501,
"grad_norm": 0.10947225638999011,
"learning_rate": 2.923570457752925e-05,
"loss": 1.0374996662139893,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23345667123794556,
"step": 486,
"valid_targets_mean": 13273.5,
"valid_targets_min": 1864
},
{
"epoch": 2.0724946695095947,
"grad_norm": 0.10364319135942143,
"learning_rate": 2.9182937904700078e-05,
"loss": 1.0086736679077148,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2536025643348694,
"step": 487,
"valid_targets_mean": 14879.2,
"valid_targets_min": 6175
},
{
"epoch": 2.076759061833689,
"grad_norm": 0.12504876779626076,
"learning_rate": 2.9130090111281278e-05,
"loss": 1.0883104801177979,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2776550054550171,
"step": 488,
"valid_targets_mean": 14863.7,
"valid_targets_min": 6154
},
{
"epoch": 2.0810234541577826,
"grad_norm": 0.09758465515459959,
"learning_rate": 2.9077161664121722e-05,
"loss": 1.0496957302093506,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2773936986923218,
"step": 489,
"valid_targets_mean": 15143.3,
"valid_targets_min": 8949
},
{
"epoch": 2.0852878464818763,
"grad_norm": 0.11514922029821574,
"learning_rate": 2.902415303078275e-05,
"loss": 1.0517609119415283,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25558456778526306,
"step": 490,
"valid_targets_mean": 14162.9,
"valid_targets_min": 2296
},
{
"epoch": 2.08955223880597,
"grad_norm": 0.09452937643372553,
"learning_rate": 2.8971064679534072e-05,
"loss": 1.0327926874160767,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27184581756591797,
"step": 491,
"valid_targets_mean": 14765.9,
"valid_targets_min": 3752
},
{
"epoch": 2.093816631130064,
"grad_norm": 0.10052766633828619,
"learning_rate": 2.8917897079349604e-05,
"loss": 1.0536391735076904,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2497003972530365,
"step": 492,
"valid_targets_mean": 14619.1,
"valid_targets_min": 1784
},
{
"epoch": 2.098081023454158,
"grad_norm": 0.10265245574534791,
"learning_rate": 2.8864650699903336e-05,
"loss": 1.036287784576416,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2603163421154022,
"step": 493,
"valid_targets_mean": 14403.3,
"valid_targets_min": 6370
},
{
"epoch": 2.1023454157782515,
"grad_norm": 0.10298965529413852,
"learning_rate": 2.881132601156518e-05,
"loss": 1.0232374668121338,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2467612475156784,
"step": 494,
"valid_targets_mean": 14734.8,
"valid_targets_min": 4443
},
{
"epoch": 2.106609808102345,
"grad_norm": 0.09547113669015726,
"learning_rate": 2.8757923485396805e-05,
"loss": 1.0421087741851807,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.252534955739975,
"step": 495,
"valid_targets_mean": 14424.6,
"valid_targets_min": 3054
},
{
"epoch": 2.1108742004264394,
"grad_norm": 0.09405820960580409,
"learning_rate": 2.8704443593147517e-05,
"loss": 1.028683066368103,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26103419065475464,
"step": 496,
"valid_targets_mean": 14415.9,
"valid_targets_min": 1783
},
{
"epoch": 2.115138592750533,
"grad_norm": 0.08798921543478255,
"learning_rate": 2.8650886807250024e-05,
"loss": 1.0675265789031982,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25961798429489136,
"step": 497,
"valid_targets_mean": 15019.7,
"valid_targets_min": 790
},
{
"epoch": 2.1194029850746268,
"grad_norm": 0.12299984240915911,
"learning_rate": 2.8597253600816332e-05,
"loss": 1.0105128288269043,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24885162711143494,
"step": 498,
"valid_targets_mean": 14422.4,
"valid_targets_min": 583
},
{
"epoch": 2.1236673773987205,
"grad_norm": 0.10708533151927112,
"learning_rate": 2.8543544447633517e-05,
"loss": 1.06392502784729,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2735995650291443,
"step": 499,
"valid_targets_mean": 15586.0,
"valid_targets_min": 9093
},
{
"epoch": 2.1279317697228146,
"grad_norm": 0.1111465159995418,
"learning_rate": 2.8489759822159558e-05,
"loss": 1.0755326747894287,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26666438579559326,
"step": 500,
"valid_targets_mean": 14653.4,
"valid_targets_min": 3780
},
{
"epoch": 2.1321961620469083,
"grad_norm": 0.10381862287254379,
"learning_rate": 2.843590019951914e-05,
"loss": 1.0193350315093994,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2575969099998474,
"step": 501,
"valid_targets_mean": 15342.2,
"valid_targets_min": 6106
},
{
"epoch": 2.136460554371002,
"grad_norm": 0.15702765736050914,
"learning_rate": 2.838196605549948e-05,
"loss": 1.0695016384124756,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27278703451156616,
"step": 502,
"valid_targets_mean": 14913.7,
"valid_targets_min": 4352
},
{
"epoch": 2.140724946695096,
"grad_norm": 0.09524830986021622,
"learning_rate": 2.8327957866546082e-05,
"loss": 1.0777015686035156,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2562635540962219,
"step": 503,
"valid_targets_mean": 15117.6,
"valid_targets_min": 4562
},
{
"epoch": 2.14498933901919,
"grad_norm": 0.1257807444880808,
"learning_rate": 2.8273876109758568e-05,
"loss": 1.0276715755462646,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2588934302330017,
"step": 504,
"valid_targets_mean": 14904.7,
"valid_targets_min": 3462
},
{
"epoch": 2.1492537313432836,
"grad_norm": 0.11853517800630066,
"learning_rate": 2.8219721262886427e-05,
"loss": 1.0456304550170898,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26853859424591064,
"step": 505,
"valid_targets_mean": 15175.2,
"valid_targets_min": 7027
},
{
"epoch": 2.1535181236673773,
"grad_norm": 0.12490590365139391,
"learning_rate": 2.816549380432483e-05,
"loss": 1.0226656198501587,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2514593005180359,
"step": 506,
"valid_targets_mean": 15446.9,
"valid_targets_min": 5452
},
{
"epoch": 2.1577825159914714,
"grad_norm": 0.10570630724959912,
"learning_rate": 2.8111194213110386e-05,
"loss": 1.0459332466125488,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.263741672039032,
"step": 507,
"valid_targets_mean": 15202.8,
"valid_targets_min": 7273
},
{
"epoch": 2.162046908315565,
"grad_norm": 0.1244887321697419,
"learning_rate": 2.805682296891691e-05,
"loss": 1.08240807056427,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2581389248371124,
"step": 508,
"valid_targets_mean": 14719.4,
"valid_targets_min": 2806
},
{
"epoch": 2.166311300639659,
"grad_norm": 0.10604438243953074,
"learning_rate": 2.8002380552051186e-05,
"loss": 1.0506298542022705,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25669777393341064,
"step": 509,
"valid_targets_mean": 14633.6,
"valid_targets_min": 5078
},
{
"epoch": 2.1705756929637525,
"grad_norm": 0.09916450205483587,
"learning_rate": 2.7947867443448728e-05,
"loss": 1.0945768356323242,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.273698627948761,
"step": 510,
"valid_targets_mean": 15665.9,
"valid_targets_min": 7606
},
{
"epoch": 2.1748400852878467,
"grad_norm": 0.1451666319315318,
"learning_rate": 2.789328412466953e-05,
"loss": 1.0555355548858643,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25316354632377625,
"step": 511,
"valid_targets_mean": 14995.8,
"valid_targets_min": 3326
},
{
"epoch": 2.1791044776119404,
"grad_norm": 0.10069828710900193,
"learning_rate": 2.7838631077893813e-05,
"loss": 1.069288730621338,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25883394479751587,
"step": 512,
"valid_targets_mean": 14451.3,
"valid_targets_min": 2320
},
{
"epoch": 2.183368869936034,
"grad_norm": 0.12195550659367309,
"learning_rate": 2.7783908785917753e-05,
"loss": 1.0519754886627197,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2789909243583679,
"step": 513,
"valid_targets_mean": 14679.6,
"valid_targets_min": 4470
},
{
"epoch": 2.1876332622601278,
"grad_norm": 0.09827810793873502,
"learning_rate": 2.7729117732149244e-05,
"loss": 1.0605522394180298,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26741981506347656,
"step": 514,
"valid_targets_mean": 15088.2,
"valid_targets_min": 2524
},
{
"epoch": 2.191897654584222,
"grad_norm": 0.148086971370313,
"learning_rate": 2.7674258400603587e-05,
"loss": 1.0701409578323364,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29160070419311523,
"step": 515,
"valid_targets_mean": 15567.1,
"valid_targets_min": 3615
},
{
"epoch": 2.1961620469083156,
"grad_norm": 0.11372463082623945,
"learning_rate": 2.761933127589927e-05,
"loss": 1.040367603302002,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2650470435619354,
"step": 516,
"valid_targets_mean": 15260.5,
"valid_targets_min": 8467
},
{
"epoch": 2.2004264392324093,
"grad_norm": 0.11489693774950514,
"learning_rate": 2.7564336843253633e-05,
"loss": 1.0579705238342285,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2514723539352417,
"step": 517,
"valid_targets_mean": 14808.2,
"valid_targets_min": 4287
},
{
"epoch": 2.204690831556503,
"grad_norm": 0.13092540532321323,
"learning_rate": 2.7509275588478606e-05,
"loss": 1.0046842098236084,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24405673146247864,
"step": 518,
"valid_targets_mean": 14465.2,
"valid_targets_min": 2088
},
{
"epoch": 2.208955223880597,
"grad_norm": 0.1400019168183909,
"learning_rate": 2.7454147997976404e-05,
"loss": 1.0589232444763184,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26718807220458984,
"step": 519,
"valid_targets_mean": 15228.6,
"valid_targets_min": 8280
},
{
"epoch": 2.213219616204691,
"grad_norm": 0.11124391098691529,
"learning_rate": 2.7398954558735272e-05,
"loss": 1.0676054954528809,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2782477140426636,
"step": 520,
"valid_targets_mean": 14580.4,
"valid_targets_min": 4572
},
{
"epoch": 2.2174840085287846,
"grad_norm": 0.1412550977287731,
"learning_rate": 2.7343695758325125e-05,
"loss": 1.0848462581634521,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26414549350738525,
"step": 521,
"valid_targets_mean": 14224.1,
"valid_targets_min": 2748
},
{
"epoch": 2.2217484008528783,
"grad_norm": 0.1230055981160842,
"learning_rate": 2.7288372084893282e-05,
"loss": 1.0164406299591064,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2596801519393921,
"step": 522,
"valid_targets_mean": 15459.9,
"valid_targets_min": 1540
},
{
"epoch": 2.2260127931769724,
"grad_norm": 0.12744240689331374,
"learning_rate": 2.7232984027160126e-05,
"loss": 1.0437984466552734,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2318439483642578,
"step": 523,
"valid_targets_mean": 15135.6,
"valid_targets_min": 2091
},
{
"epoch": 2.230277185501066,
"grad_norm": 0.1612875067947998,
"learning_rate": 2.7177532074414822e-05,
"loss": 1.0547361373901367,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2623888850212097,
"step": 524,
"valid_targets_mean": 13888.8,
"valid_targets_min": 1240
},
{
"epoch": 2.23454157782516,
"grad_norm": 0.12041519813523655,
"learning_rate": 2.712201671651094e-05,
"loss": 1.059849500656128,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2720355987548828,
"step": 525,
"valid_targets_mean": 14897.0,
"valid_targets_min": 3212
},
{
"epoch": 2.2388059701492535,
"grad_norm": 0.14067840789590605,
"learning_rate": 2.7066438443862205e-05,
"loss": 1.0363714694976807,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24360054731369019,
"step": 526,
"valid_targets_mean": 14457.7,
"valid_targets_min": 3828
},
{
"epoch": 2.2430703624733477,
"grad_norm": 0.14427320836445826,
"learning_rate": 2.701079774743808e-05,
"loss": 1.0071572065353394,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2501201629638672,
"step": 527,
"valid_targets_mean": 15279.3,
"valid_targets_min": 6688
},
{
"epoch": 2.2473347547974414,
"grad_norm": 0.10563699141677124,
"learning_rate": 2.6955095118759496e-05,
"loss": 1.0626296997070312,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2656599283218384,
"step": 528,
"valid_targets_mean": 14363.7,
"valid_targets_min": 3823
},
{
"epoch": 2.251599147121535,
"grad_norm": 0.16100690940471393,
"learning_rate": 2.689933104989447e-05,
"loss": 1.0518825054168701,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25958943367004395,
"step": 529,
"valid_targets_mean": 15011.1,
"valid_targets_min": 2193
},
{
"epoch": 2.2558635394456292,
"grad_norm": 0.11964514918135785,
"learning_rate": 2.6843506033453777e-05,
"loss": 1.0230783224105835,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24286603927612305,
"step": 530,
"valid_targets_mean": 15347.9,
"valid_targets_min": 6526
},
{
"epoch": 2.260127931769723,
"grad_norm": 0.13360952002492774,
"learning_rate": 2.6787620562586587e-05,
"loss": 1.0393277406692505,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2762875258922577,
"step": 531,
"valid_targets_mean": 15673.6,
"valid_targets_min": 9246
},
{
"epoch": 2.2643923240938166,
"grad_norm": 0.13524679242007398,
"learning_rate": 2.673167513097613e-05,
"loss": 1.031490683555603,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25706005096435547,
"step": 532,
"valid_targets_mean": 14525.1,
"valid_targets_min": 2960
},
{
"epoch": 2.2686567164179103,
"grad_norm": 0.10535102533517332,
"learning_rate": 2.6675670232835297e-05,
"loss": 1.0020201206207275,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23740331828594208,
"step": 533,
"valid_targets_mean": 14035.9,
"valid_targets_min": 1882
},
{
"epoch": 2.272921108742004,
"grad_norm": 0.1458520051591525,
"learning_rate": 2.661960636290231e-05,
"loss": 1.051685094833374,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2684166729450226,
"step": 534,
"valid_targets_mean": 14944.8,
"valid_targets_min": 3425
},
{
"epoch": 2.277185501066098,
"grad_norm": 0.12900710771871615,
"learning_rate": 2.6563484016436346e-05,
"loss": 1.0715006589889526,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26723259687423706,
"step": 535,
"valid_targets_mean": 14942.6,
"valid_targets_min": 5829
},
{
"epoch": 2.281449893390192,
"grad_norm": 0.12004783567705744,
"learning_rate": 2.6507303689213143e-05,
"loss": 1.0671104192733765,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3115587532520294,
"step": 536,
"valid_targets_mean": 15606.5,
"valid_targets_min": 11520
},
{
"epoch": 2.2857142857142856,
"grad_norm": 0.10282452687968437,
"learning_rate": 2.6451065877520634e-05,
"loss": 1.0426161289215088,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26459482312202454,
"step": 537,
"valid_targets_mean": 15469.4,
"valid_targets_min": 8077
},
{
"epoch": 2.2899786780383797,
"grad_norm": 0.12209134268555552,
"learning_rate": 2.639477107815455e-05,
"loss": 1.000340223312378,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2603060007095337,
"step": 538,
"valid_targets_mean": 15048.8,
"valid_targets_min": 7278
},
{
"epoch": 2.2942430703624734,
"grad_norm": 0.12311188310647632,
"learning_rate": 2.633841978841406e-05,
"loss": 1.0291199684143066,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2517496943473816,
"step": 539,
"valid_targets_mean": 14726.7,
"valid_targets_min": 8625
},
{
"epoch": 2.298507462686567,
"grad_norm": 0.11903138455692189,
"learning_rate": 2.6282012506097347e-05,
"loss": 1.0493464469909668,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2605173587799072,
"step": 540,
"valid_targets_mean": 14719.3,
"valid_targets_min": 1522
},
{
"epoch": 2.302771855010661,
"grad_norm": 0.10950615934598747,
"learning_rate": 2.622554972949724e-05,
"loss": 1.0875517129898071,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.258272647857666,
"step": 541,
"valid_targets_mean": 13960.9,
"valid_targets_min": 1921
},
{
"epoch": 2.307036247334755,
"grad_norm": 0.12626593813951068,
"learning_rate": 2.6169031957396778e-05,
"loss": 1.0525660514831543,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2733648419380188,
"step": 542,
"valid_targets_mean": 14309.1,
"valid_targets_min": 2133
},
{
"epoch": 2.3113006396588487,
"grad_norm": 0.10880109845764573,
"learning_rate": 2.611245968906482e-05,
"loss": 1.0387375354766846,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2732793092727661,
"step": 543,
"valid_targets_mean": 14985.1,
"valid_targets_min": 3590
},
{
"epoch": 2.3155650319829424,
"grad_norm": 0.11600835728660512,
"learning_rate": 2.605583342425165e-05,
"loss": 1.0523663759231567,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24638208746910095,
"step": 544,
"valid_targets_mean": 14012.8,
"valid_targets_min": 1893
},
{
"epoch": 2.319829424307036,
"grad_norm": 0.12730261852879615,
"learning_rate": 2.5999153663184546e-05,
"loss": 1.1032425165176392,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2970106601715088,
"step": 545,
"valid_targets_mean": 14787.3,
"valid_targets_min": 4073
},
{
"epoch": 2.3240938166311302,
"grad_norm": 0.09408360904652946,
"learning_rate": 2.594242090656335e-05,
"loss": 1.0487980842590332,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28500109910964966,
"step": 546,
"valid_targets_mean": 15352.6,
"valid_targets_min": 1613
},
{
"epoch": 2.328358208955224,
"grad_norm": 0.12953695636281873,
"learning_rate": 2.5885635655556075e-05,
"loss": 1.0353131294250488,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2830761671066284,
"step": 547,
"valid_targets_mean": 15987.8,
"valid_targets_min": 12612
},
{
"epoch": 2.3326226012793176,
"grad_norm": 0.11541132531604792,
"learning_rate": 2.5828798411794443e-05,
"loss": 1.0664570331573486,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.238357812166214,
"step": 548,
"valid_targets_mean": 14100.9,
"valid_targets_min": 2760
},
{
"epoch": 2.3368869936034113,
"grad_norm": 0.11172920727522387,
"learning_rate": 2.5771909677369484e-05,
"loss": 1.026410698890686,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2699667513370514,
"step": 549,
"valid_targets_mean": 15412.9,
"valid_targets_min": 9229
},
{
"epoch": 2.3411513859275055,
"grad_norm": 0.11690823967802624,
"learning_rate": 2.571496995482709e-05,
"loss": 1.0822011232376099,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2635524868965149,
"step": 550,
"valid_targets_mean": 14972.8,
"valid_targets_min": 2851
},
{
"epoch": 2.345415778251599,
"grad_norm": 0.12618407659970443,
"learning_rate": 2.565797974716357e-05,
"loss": 1.0665310621261597,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24963496625423431,
"step": 551,
"valid_targets_mean": 14868.7,
"valid_targets_min": 5691
},
{
"epoch": 2.349680170575693,
"grad_norm": 0.10116100775051301,
"learning_rate": 2.5600939557821205e-05,
"loss": 1.021169900894165,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2607884407043457,
"step": 552,
"valid_targets_mean": 14750.9,
"valid_targets_min": 2857
},
{
"epoch": 2.3539445628997866,
"grad_norm": 0.10624136092421245,
"learning_rate": 2.5543849890683813e-05,
"loss": 1.0231151580810547,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26667192578315735,
"step": 553,
"valid_targets_mean": 15451.8,
"valid_targets_min": 8293
},
{
"epoch": 2.3582089552238807,
"grad_norm": 0.11314597081105966,
"learning_rate": 2.548671125007229e-05,
"loss": 1.025337815284729,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2604908347129822,
"step": 554,
"valid_targets_mean": 13587.4,
"valid_targets_min": 2477
},
{
"epoch": 2.3624733475479744,
"grad_norm": 0.09734334725076549,
"learning_rate": 2.5429524140740155e-05,
"loss": 1.0691876411437988,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27501150965690613,
"step": 555,
"valid_targets_mean": 14751.3,
"valid_targets_min": 4350
},
{
"epoch": 2.366737739872068,
"grad_norm": 0.12089441342104176,
"learning_rate": 2.537228906786908e-05,
"loss": 1.054142951965332,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26418209075927734,
"step": 556,
"valid_targets_mean": 15161.1,
"valid_targets_min": 8012
},
{
"epoch": 2.3710021321961623,
"grad_norm": 0.10876736149695052,
"learning_rate": 2.5315006537064473e-05,
"loss": 1.0506845712661743,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24916253983974457,
"step": 557,
"valid_targets_mean": 14328.4,
"valid_targets_min": 5090
},
{
"epoch": 2.375266524520256,
"grad_norm": 0.11063627553501197,
"learning_rate": 2.5257677054350927e-05,
"loss": 1.029079556465149,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.246652752161026,
"step": 558,
"valid_targets_mean": 14815.7,
"valid_targets_min": 2327
},
{
"epoch": 2.3795309168443497,
"grad_norm": 0.11486869153400572,
"learning_rate": 2.5200301126167857e-05,
"loss": 1.0258636474609375,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2491309493780136,
"step": 559,
"valid_targets_mean": 14963.6,
"valid_targets_min": 7539
},
{
"epoch": 2.3837953091684434,
"grad_norm": 0.09927324377094049,
"learning_rate": 2.514287925936492e-05,
"loss": 1.0641462802886963,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27500349283218384,
"step": 560,
"valid_targets_mean": 15140.9,
"valid_targets_min": 4632
},
{
"epoch": 2.388059701492537,
"grad_norm": 0.10288750250199045,
"learning_rate": 2.5085411961197626e-05,
"loss": 1.0823527574539185,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25461655855178833,
"step": 561,
"valid_targets_mean": 14990.3,
"valid_targets_min": 5484
},
{
"epoch": 2.3923240938166312,
"grad_norm": 0.13256053021297226,
"learning_rate": 2.502789973932278e-05,
"loss": 1.0496501922607422,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2626480460166931,
"step": 562,
"valid_targets_mean": 14573.9,
"valid_targets_min": 1669
},
{
"epoch": 2.396588486140725,
"grad_norm": 0.10248061933901362,
"learning_rate": 2.4970343101794073e-05,
"loss": 1.05353844165802,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25262895226478577,
"step": 563,
"valid_targets_mean": 14119.2,
"valid_targets_min": 2611
},
{
"epoch": 2.4008528784648187,
"grad_norm": 0.107275484222771,
"learning_rate": 2.4912742557057538e-05,
"loss": 1.0425084829330444,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25184744596481323,
"step": 564,
"valid_targets_mean": 14551.3,
"valid_targets_min": 1199
},
{
"epoch": 2.405117270788913,
"grad_norm": 0.11233400291999039,
"learning_rate": 2.485509861394708e-05,
"loss": 1.0836342573165894,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27323925495147705,
"step": 565,
"valid_targets_mean": 14915.9,
"valid_targets_min": 2938
},
{
"epoch": 2.4093816631130065,
"grad_norm": 0.10118669997629698,
"learning_rate": 2.4797411781679975e-05,
"loss": 0.997891902923584,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2830112874507904,
"step": 566,
"valid_targets_mean": 14926.4,
"valid_targets_min": 3260
},
{
"epoch": 2.4136460554371,
"grad_norm": 0.14380065598081848,
"learning_rate": 2.473968256985238e-05,
"loss": 1.0502943992614746,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2830333709716797,
"step": 567,
"valid_targets_mean": 15068.0,
"valid_targets_min": 6241
},
{
"epoch": 2.417910447761194,
"grad_norm": 0.10008203837515717,
"learning_rate": 2.4681911488434825e-05,
"loss": 1.0760055780410767,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24836665391921997,
"step": 568,
"valid_targets_mean": 14463.9,
"valid_targets_min": 3930
},
{
"epoch": 2.4221748400852876,
"grad_norm": 0.09836327892263215,
"learning_rate": 2.4624099047767702e-05,
"loss": 1.0567803382873535,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2802816927433014,
"step": 569,
"valid_targets_mean": 15400.0,
"valid_targets_min": 7478
},
{
"epoch": 2.4264392324093818,
"grad_norm": 0.09992321153620136,
"learning_rate": 2.4566245758556787e-05,
"loss": 1.0168672800064087,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24785345792770386,
"step": 570,
"valid_targets_mean": 14920.9,
"valid_targets_min": 2776
},
{
"epoch": 2.4307036247334755,
"grad_norm": 0.10037597222951485,
"learning_rate": 2.4508352131868664e-05,
"loss": 1.0484085083007812,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27643099427223206,
"step": 571,
"valid_targets_mean": 15005.2,
"valid_targets_min": 1389
},
{
"epoch": 2.434968017057569,
"grad_norm": 0.12136623582803767,
"learning_rate": 2.445041867912629e-05,
"loss": 1.0116479396820068,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26011645793914795,
"step": 572,
"valid_targets_mean": 14960.0,
"valid_targets_min": 4520
},
{
"epoch": 2.4392324093816633,
"grad_norm": 0.11241936997999565,
"learning_rate": 2.439244591210443e-05,
"loss": 1.0097274780273438,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2608654499053955,
"step": 573,
"valid_targets_mean": 15309.7,
"valid_targets_min": 7542
},
{
"epoch": 2.443496801705757,
"grad_norm": 0.11445438301863396,
"learning_rate": 2.4334434342925133e-05,
"loss": 1.03176748752594,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2606462240219116,
"step": 574,
"valid_targets_mean": 14829.2,
"valid_targets_min": 4136
},
{
"epoch": 2.4477611940298507,
"grad_norm": 0.10495839525238645,
"learning_rate": 2.4276384484053227e-05,
"loss": 1.017263650894165,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2691393494606018,
"step": 575,
"valid_targets_mean": 15787.1,
"valid_targets_min": 5599
},
{
"epoch": 2.4520255863539444,
"grad_norm": 0.13339390506906976,
"learning_rate": 2.4218296848291795e-05,
"loss": 1.0785963535308838,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2539163827896118,
"step": 576,
"valid_targets_mean": 14497.7,
"valid_targets_min": 7315
},
{
"epoch": 2.4562899786780386,
"grad_norm": 0.12957093965260985,
"learning_rate": 2.4160171948777603e-05,
"loss": 1.033412218093872,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27271324396133423,
"step": 577,
"valid_targets_mean": 15432.2,
"valid_targets_min": 6030
},
{
"epoch": 2.4605543710021323,
"grad_norm": 0.12244323692075133,
"learning_rate": 2.410201029897665e-05,
"loss": 1.0976747274398804,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28120583295822144,
"step": 578,
"valid_targets_mean": 15840.6,
"valid_targets_min": 12270
},
{
"epoch": 2.464818763326226,
"grad_norm": 0.12582676034955095,
"learning_rate": 2.4043812412679532e-05,
"loss": 1.0250262022018433,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2601637840270996,
"step": 579,
"valid_targets_mean": 15205.8,
"valid_targets_min": 6494
},
{
"epoch": 2.4690831556503197,
"grad_norm": 0.12580469890835344,
"learning_rate": 2.3985578803996985e-05,
"loss": 1.078566551208496,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2532144784927368,
"step": 580,
"valid_targets_mean": 14033.4,
"valid_targets_min": 1579
},
{
"epoch": 2.473347547974414,
"grad_norm": 0.11174049402117477,
"learning_rate": 2.392730998735529e-05,
"loss": 1.076801061630249,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26035818457603455,
"step": 581,
"valid_targets_mean": 15582.1,
"valid_targets_min": 3901
},
{
"epoch": 2.4776119402985075,
"grad_norm": 0.13591649752794477,
"learning_rate": 2.3869006477491755e-05,
"loss": 1.0493996143341064,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25792649388313293,
"step": 582,
"valid_targets_mean": 15696.8,
"valid_targets_min": 4585
},
{
"epoch": 2.481876332622601,
"grad_norm": 0.0950522497644923,
"learning_rate": 2.381066878945017e-05,
"loss": 1.0378646850585938,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26439088582992554,
"step": 583,
"valid_targets_mean": 15172.5,
"valid_targets_min": 2999
},
{
"epoch": 2.486140724946695,
"grad_norm": 0.1354084961137629,
"learning_rate": 2.3752297438576257e-05,
"loss": 1.030313491821289,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2559170424938202,
"step": 584,
"valid_targets_mean": 15268.1,
"valid_targets_min": 5344
},
{
"epoch": 2.490405117270789,
"grad_norm": 0.10265331295285009,
"learning_rate": 2.3693892940513074e-05,
"loss": 1.0901963710784912,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.267939031124115,
"step": 585,
"valid_targets_mean": 14409.6,
"valid_targets_min": 4600
},
{
"epoch": 2.4946695095948828,
"grad_norm": 0.12583451372831064,
"learning_rate": 2.3635455811196536e-05,
"loss": 1.06803560256958,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27083370089530945,
"step": 586,
"valid_targets_mean": 14663.4,
"valid_targets_min": 2241
},
{
"epoch": 2.4989339019189765,
"grad_norm": 0.11239371132679929,
"learning_rate": 2.3576986566850796e-05,
"loss": 1.0789930820465088,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23372890055179596,
"step": 587,
"valid_targets_mean": 13293.2,
"valid_targets_min": 3881
},
{
"epoch": 2.50319829424307,
"grad_norm": 0.10871538748917994,
"learning_rate": 2.351848572398371e-05,
"loss": 1.018543004989624,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24593549966812134,
"step": 588,
"valid_targets_mean": 13829.2,
"valid_targets_min": 1634
},
{
"epoch": 2.5074626865671643,
"grad_norm": 0.12245068189358427,
"learning_rate": 2.3459953799382276e-05,
"loss": 1.0276107788085938,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2681216597557068,
"step": 589,
"valid_targets_mean": 14876.3,
"valid_targets_min": 1179
},
{
"epoch": 2.511727078891258,
"grad_norm": 0.12396468143561087,
"learning_rate": 2.3401391310108054e-05,
"loss": 1.0470104217529297,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2828315794467926,
"step": 590,
"valid_targets_mean": 15137.2,
"valid_targets_min": 2132
},
{
"epoch": 2.5159914712153517,
"grad_norm": 0.10200845616471743,
"learning_rate": 2.3342798773492602e-05,
"loss": 1.0102992057800293,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2586694359779358,
"step": 591,
"valid_targets_mean": 14308.5,
"valid_targets_min": 6489
},
{
"epoch": 2.520255863539446,
"grad_norm": 0.10909927341005053,
"learning_rate": 2.328417670713294e-05,
"loss": 1.0673726797103882,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2538418769836426,
"step": 592,
"valid_targets_mean": 13910.1,
"valid_targets_min": 1629
},
{
"epoch": 2.5245202558635396,
"grad_norm": 0.12108120500001548,
"learning_rate": 2.3225525628886918e-05,
"loss": 1.0630145072937012,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24985308945178986,
"step": 593,
"valid_targets_mean": 14432.2,
"valid_targets_min": 1452
},
{
"epoch": 2.5287846481876333,
"grad_norm": 0.10183242128325715,
"learning_rate": 2.3166846056868687e-05,
"loss": 1.1043426990509033,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26531025767326355,
"step": 594,
"valid_targets_mean": 14467.5,
"valid_targets_min": 2025
},
{
"epoch": 2.533049040511727,
"grad_norm": 0.1215922302860279,
"learning_rate": 2.31081385094441e-05,
"loss": 1.0936028957366943,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2828601598739624,
"step": 595,
"valid_targets_mean": 14950.9,
"valid_targets_min": 1791
},
{
"epoch": 2.5373134328358207,
"grad_norm": 0.11486160173354906,
"learning_rate": 2.304940350522615e-05,
"loss": 1.0198354721069336,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2564467489719391,
"step": 596,
"valid_targets_mean": 15492.4,
"valid_targets_min": 5938
},
{
"epoch": 2.541577825159915,
"grad_norm": 0.12209475506650519,
"learning_rate": 2.299064156307037e-05,
"loss": 1.013113260269165,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2631484866142273,
"step": 597,
"valid_targets_mean": 15406.1,
"valid_targets_min": 7409
},
{
"epoch": 2.5458422174840085,
"grad_norm": 0.1203561337180622,
"learning_rate": 2.2931853202070275e-05,
"loss": 1.063555121421814,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26198774576187134,
"step": 598,
"valid_targets_mean": 14713.0,
"valid_targets_min": 5438
},
{
"epoch": 2.550106609808102,
"grad_norm": 0.1052190176714238,
"learning_rate": 2.2873038941552724e-05,
"loss": 1.0354630947113037,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2526911497116089,
"step": 599,
"valid_targets_mean": 15203.8,
"valid_targets_min": 5398
},
{
"epoch": 2.5543710021321964,
"grad_norm": 0.11305662624693819,
"learning_rate": 2.2814199301073412e-05,
"loss": 1.0255736112594604,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24174556136131287,
"step": 600,
"valid_targets_mean": 14636.3,
"valid_targets_min": 2864
},
{
"epoch": 2.55863539445629,
"grad_norm": 0.10992216917702095,
"learning_rate": 2.27553348004122e-05,
"loss": 1.0456774234771729,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26446160674095154,
"step": 601,
"valid_targets_mean": 14595.3,
"valid_targets_min": 2438
},
{
"epoch": 2.5628997867803838,
"grad_norm": 0.12243937686717783,
"learning_rate": 2.2696445959568577e-05,
"loss": 1.034912109375,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2511717975139618,
"step": 602,
"valid_targets_mean": 14794.9,
"valid_targets_min": 4607
},
{
"epoch": 2.5671641791044775,
"grad_norm": 0.1142548129129476,
"learning_rate": 2.2637533298757064e-05,
"loss": 1.0757339000701904,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26432859897613525,
"step": 603,
"valid_targets_mean": 14861.0,
"valid_targets_min": 6411
},
{
"epoch": 2.571428571428571,
"grad_norm": 0.13637563685961449,
"learning_rate": 2.2578597338402567e-05,
"loss": 1.06773042678833,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2747154235839844,
"step": 604,
"valid_targets_mean": 14348.2,
"valid_targets_min": 1464
},
{
"epoch": 2.5756929637526653,
"grad_norm": 0.10516547564188432,
"learning_rate": 2.2519638599135844e-05,
"loss": 1.0419844388961792,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2577703297138214,
"step": 605,
"valid_targets_mean": 14575.1,
"valid_targets_min": 1985
},
{
"epoch": 2.579957356076759,
"grad_norm": 0.1380551588227109,
"learning_rate": 2.2460657601788875e-05,
"loss": 1.0456421375274658,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26205676794052124,
"step": 606,
"valid_targets_mean": 14975.0,
"valid_targets_min": 5379
},
{
"epoch": 2.5842217484008527,
"grad_norm": 0.09776367679894095,
"learning_rate": 2.2401654867390256e-05,
"loss": 1.0459275245666504,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2624329924583435,
"step": 607,
"valid_targets_mean": 15002.1,
"valid_targets_min": 3892
},
{
"epoch": 2.588486140724947,
"grad_norm": 0.11100803708478867,
"learning_rate": 2.2342630917160605e-05,
"loss": 1.0408620834350586,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2793847322463989,
"step": 608,
"valid_targets_mean": 15514.0,
"valid_targets_min": 11398
},
{
"epoch": 2.5927505330490406,
"grad_norm": 0.1069956325660906,
"learning_rate": 2.2283586272507975e-05,
"loss": 1.0688080787658691,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2711299657821655,
"step": 609,
"valid_targets_mean": 15076.6,
"valid_targets_min": 4028
},
{
"epoch": 2.5970149253731343,
"grad_norm": 0.10430185525731406,
"learning_rate": 2.2224521455023193e-05,
"loss": 1.0676207542419434,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2783691883087158,
"step": 610,
"valid_targets_mean": 15037.4,
"valid_targets_min": 5704
},
{
"epoch": 2.6012793176972284,
"grad_norm": 0.12130993394649214,
"learning_rate": 2.216543698647534e-05,
"loss": 1.0438411235809326,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25039076805114746,
"step": 611,
"valid_targets_mean": 15198.2,
"valid_targets_min": 7861
},
{
"epoch": 2.605543710021322,
"grad_norm": 0.09933572538572656,
"learning_rate": 2.210633338880704e-05,
"loss": 1.0441901683807373,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2598780393600464,
"step": 612,
"valid_targets_mean": 14514.8,
"valid_targets_min": 2095
},
{
"epoch": 2.609808102345416,
"grad_norm": 0.10510212088154962,
"learning_rate": 2.204721118412994e-05,
"loss": 1.007887840270996,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24712875485420227,
"step": 613,
"valid_targets_mean": 15719.6,
"valid_targets_min": 7064
},
{
"epoch": 2.6140724946695095,
"grad_norm": 0.12157250964609065,
"learning_rate": 2.1988070894720037e-05,
"loss": 1.0408642292022705,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27903807163238525,
"step": 614,
"valid_targets_mean": 14801.3,
"valid_targets_min": 3424
},
{
"epoch": 2.6183368869936032,
"grad_norm": 0.10948158452482029,
"learning_rate": 2.192891304301309e-05,
"loss": 1.059133768081665,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2628347873687744,
"step": 615,
"valid_targets_mean": 15173.8,
"valid_targets_min": 6350
},
{
"epoch": 2.6226012793176974,
"grad_norm": 0.09943412757871808,
"learning_rate": 2.18697381516e-05,
"loss": 1.0339713096618652,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25985080003738403,
"step": 616,
"valid_targets_mean": 15364.6,
"valid_targets_min": 2549
},
{
"epoch": 2.626865671641791,
"grad_norm": 0.14626355180109923,
"learning_rate": 2.181054674322221e-05,
"loss": 1.0592225790023804,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2634222209453583,
"step": 617,
"valid_targets_mean": 15133.7,
"valid_targets_min": 3155
},
{
"epoch": 2.631130063965885,
"grad_norm": 0.12943488698610622,
"learning_rate": 2.1751339340767043e-05,
"loss": 1.0118565559387207,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2643774151802063,
"step": 618,
"valid_targets_mean": 14694.1,
"valid_targets_min": 5175
},
{
"epoch": 2.635394456289979,
"grad_norm": 0.12678721384266212,
"learning_rate": 2.169211646726313e-05,
"loss": 1.061182975769043,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25831732153892517,
"step": 619,
"valid_targets_mean": 14516.6,
"valid_targets_min": 1424
},
{
"epoch": 2.6396588486140726,
"grad_norm": 0.13879656324942302,
"learning_rate": 2.163287864587576e-05,
"loss": 1.0580122470855713,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2813308835029602,
"step": 620,
"valid_targets_mean": 15262.5,
"valid_targets_min": 2999
},
{
"epoch": 2.6439232409381663,
"grad_norm": 0.10883361846948157,
"learning_rate": 2.157362639990229e-05,
"loss": 1.0226809978485107,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26501500606536865,
"step": 621,
"valid_targets_mean": 15207.4,
"valid_targets_min": 4205
},
{
"epoch": 2.64818763326226,
"grad_norm": 0.13565539309033717,
"learning_rate": 2.151436025276747e-05,
"loss": 1.065739393234253,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.270394891500473,
"step": 622,
"valid_targets_mean": 14962.2,
"valid_targets_min": 6660
},
{
"epoch": 2.6524520255863537,
"grad_norm": 0.09854028333086322,
"learning_rate": 2.145508072801888e-05,
"loss": 1.0685371160507202,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29033178091049194,
"step": 623,
"valid_targets_mean": 15455.7,
"valid_targets_min": 9705
},
{
"epoch": 2.656716417910448,
"grad_norm": 0.11318088658698934,
"learning_rate": 2.1395788349322256e-05,
"loss": 1.082135796546936,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27790725231170654,
"step": 624,
"valid_targets_mean": 14922.1,
"valid_targets_min": 1106
},
{
"epoch": 2.6609808102345416,
"grad_norm": 0.10243948174873596,
"learning_rate": 2.133648364045689e-05,
"loss": 1.019806146621704,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22797921299934387,
"step": 625,
"valid_targets_mean": 13488.0,
"valid_targets_min": 1715
},
{
"epoch": 2.6652452025586353,
"grad_norm": 0.10047404613950953,
"learning_rate": 2.1277167125310996e-05,
"loss": 1.0143678188323975,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2520124018192291,
"step": 626,
"valid_targets_mean": 14776.7,
"valid_targets_min": 2289
},
{
"epoch": 2.6695095948827294,
"grad_norm": 0.09145231866156223,
"learning_rate": 2.1217839327877098e-05,
"loss": 1.040644645690918,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28392672538757324,
"step": 627,
"valid_targets_mean": 15072.8,
"valid_targets_min": 4011
},
{
"epoch": 2.673773987206823,
"grad_norm": 0.11401894390880575,
"learning_rate": 2.1158500772247352e-05,
"loss": 1.0729954242706299,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2513941824436188,
"step": 628,
"valid_targets_mean": 15610.4,
"valid_targets_min": 10526
},
{
"epoch": 2.678038379530917,
"grad_norm": 0.10828405956368052,
"learning_rate": 2.1099151982608985e-05,
"loss": 1.0410047769546509,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24934270977973938,
"step": 629,
"valid_targets_mean": 13862.6,
"valid_targets_min": 2641
},
{
"epoch": 2.6823027718550105,
"grad_norm": 0.09448126661051112,
"learning_rate": 2.1039793483239607e-05,
"loss": 1.0613449811935425,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2732912600040436,
"step": 630,
"valid_targets_mean": 15320.3,
"valid_targets_min": 4702
},
{
"epoch": 2.6865671641791042,
"grad_norm": 0.10889944780585988,
"learning_rate": 2.0980425798502616e-05,
"loss": 1.043823480606079,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27058395743370056,
"step": 631,
"valid_targets_mean": 14775.0,
"valid_targets_min": 5934
},
{
"epoch": 2.6908315565031984,
"grad_norm": 0.10598838076405855,
"learning_rate": 2.092104945284255e-05,
"loss": 1.0220303535461426,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26301729679107666,
"step": 632,
"valid_targets_mean": 14825.2,
"valid_targets_min": 3761
},
{
"epoch": 2.695095948827292,
"grad_norm": 0.096649254343725,
"learning_rate": 2.0861664970780434e-05,
"loss": 1.0587990283966064,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2864176630973816,
"step": 633,
"valid_targets_mean": 14869.1,
"valid_targets_min": 2366
},
{
"epoch": 2.699360341151386,
"grad_norm": 0.12492744041403084,
"learning_rate": 2.08022728769092e-05,
"loss": 1.0611028671264648,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2796041965484619,
"step": 634,
"valid_targets_mean": 15264.3,
"valid_targets_min": 9411
},
{
"epoch": 2.70362473347548,
"grad_norm": 0.08815457331053626,
"learning_rate": 2.0742873695889005e-05,
"loss": 1.029858112335205,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22491423785686493,
"step": 635,
"valid_targets_mean": 14082.1,
"valid_targets_min": 2521
},
{
"epoch": 2.7078891257995736,
"grad_norm": 0.11401596610202915,
"learning_rate": 2.0683467952442626e-05,
"loss": 1.0549201965332031,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26610907912254333,
"step": 636,
"valid_targets_mean": 14397.4,
"valid_targets_min": 1580
},
{
"epoch": 2.7121535181236673,
"grad_norm": 0.09170930694937314,
"learning_rate": 2.0624056171350785e-05,
"loss": 1.0417375564575195,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26557955145835876,
"step": 637,
"valid_targets_mean": 15208.9,
"valid_targets_min": 1582
},
{
"epoch": 2.716417910447761,
"grad_norm": 0.09373484805402636,
"learning_rate": 2.0564638877447566e-05,
"loss": 1.0557622909545898,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2590869665145874,
"step": 638,
"valid_targets_mean": 14894.8,
"valid_targets_min": 1261
},
{
"epoch": 2.7206823027718547,
"grad_norm": 0.11086311830705485,
"learning_rate": 2.0505216595615742e-05,
"loss": 1.0656099319458008,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2670261859893799,
"step": 639,
"valid_targets_mean": 15368.2,
"valid_targets_min": 8104
},
{
"epoch": 2.724946695095949,
"grad_norm": 0.099693187192522,
"learning_rate": 2.044578985078215e-05,
"loss": 1.066105842590332,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25466716289520264,
"step": 640,
"valid_targets_mean": 13972.0,
"valid_targets_min": 2953
},
{
"epoch": 2.7292110874200426,
"grad_norm": 0.09144786981608774,
"learning_rate": 2.0386359167913046e-05,
"loss": 1.005486011505127,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2582522928714752,
"step": 641,
"valid_targets_mean": 15838.5,
"valid_targets_min": 13199
},
{
"epoch": 2.7334754797441363,
"grad_norm": 0.09290876021103756,
"learning_rate": 2.0326925072009485e-05,
"loss": 1.00834321975708,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24156692624092102,
"step": 642,
"valid_targets_mean": 14860.7,
"valid_targets_min": 3311
},
{
"epoch": 2.7377398720682304,
"grad_norm": 0.08683659207009448,
"learning_rate": 2.0267488088102657e-05,
"loss": 1.0450940132141113,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24575310945510864,
"step": 643,
"valid_targets_mean": 15412.2,
"valid_targets_min": 6629
},
{
"epoch": 2.742004264392324,
"grad_norm": 0.11853027309215325,
"learning_rate": 2.0208048741249288e-05,
"loss": 1.0088589191436768,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2650904655456543,
"step": 644,
"valid_targets_mean": 15457.6,
"valid_targets_min": 9740
},
{
"epoch": 2.746268656716418,
"grad_norm": 0.09888933405924138,
"learning_rate": 2.014860755652695e-05,
"loss": 1.0865031480789185,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2950332760810852,
"step": 645,
"valid_targets_mean": 15230.5,
"valid_targets_min": 7258
},
{
"epoch": 2.750533049040512,
"grad_norm": 0.09077056957188934,
"learning_rate": 2.0089165059029477e-05,
"loss": 1.0576354265213013,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27440112829208374,
"step": 646,
"valid_targets_mean": 15387.4,
"valid_targets_min": 5120
},
{
"epoch": 2.7547974413646057,
"grad_norm": 0.12727069338504046,
"learning_rate": 2.0029721773862277e-05,
"loss": 1.040118932723999,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25948765873908997,
"step": 647,
"valid_targets_mean": 13869.9,
"valid_targets_min": 1854
},
{
"epoch": 2.7590618336886994,
"grad_norm": 0.09414659738141111,
"learning_rate": 1.997027822613773e-05,
"loss": 1.0343791246414185,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23842459917068481,
"step": 648,
"valid_targets_mean": 13911.2,
"valid_targets_min": 5753
},
{
"epoch": 2.763326226012793,
"grad_norm": 0.09760780079744967,
"learning_rate": 1.9910834940970533e-05,
"loss": 1.0626685619354248,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26802858710289,
"step": 649,
"valid_targets_mean": 14167.7,
"valid_targets_min": 1974
},
{
"epoch": 2.767590618336887,
"grad_norm": 0.10697492490503085,
"learning_rate": 1.985139244347305e-05,
"loss": 1.0661125183105469,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26134780049324036,
"step": 650,
"valid_targets_mean": 14691.3,
"valid_targets_min": 1668
},
{
"epoch": 2.771855010660981,
"grad_norm": 0.10702606984974812,
"learning_rate": 1.979195125875072e-05,
"loss": 1.0712954998016357,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28778448700904846,
"step": 651,
"valid_targets_mean": 15400.3,
"valid_targets_min": 7004
},
{
"epoch": 2.7761194029850746,
"grad_norm": 0.11922889580109083,
"learning_rate": 1.9732511911897353e-05,
"loss": 1.0359854698181152,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.259382426738739,
"step": 652,
"valid_targets_mean": 14325.9,
"valid_targets_min": 2297
},
{
"epoch": 2.7803837953091683,
"grad_norm": 0.0908638612768627,
"learning_rate": 1.9673074927990525e-05,
"loss": 1.029666543006897,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22730061411857605,
"step": 653,
"valid_targets_mean": 14054.5,
"valid_targets_min": 2204
},
{
"epoch": 2.7846481876332625,
"grad_norm": 0.0981224678390093,
"learning_rate": 1.9613640832086957e-05,
"loss": 1.0567508935928345,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2554134428501129,
"step": 654,
"valid_targets_mean": 14729.9,
"valid_targets_min": 2303
},
{
"epoch": 2.788912579957356,
"grad_norm": 0.09557671725625344,
"learning_rate": 1.9554210149217855e-05,
"loss": 0.9999919533729553,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24661889672279358,
"step": 655,
"valid_targets_mean": 14907.3,
"valid_targets_min": 1247
},
{
"epoch": 2.79317697228145,
"grad_norm": 0.1014944267267325,
"learning_rate": 1.9494783404384265e-05,
"loss": 1.0318797826766968,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2632007598876953,
"step": 656,
"valid_targets_mean": 13960.8,
"valid_targets_min": 2671
},
{
"epoch": 2.7974413646055436,
"grad_norm": 0.08523674515848591,
"learning_rate": 1.9435361122552437e-05,
"loss": 1.0454580783843994,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2793329358100891,
"step": 657,
"valid_targets_mean": 15929.2,
"valid_targets_min": 12378
},
{
"epoch": 2.8017057569296373,
"grad_norm": 0.13838290480866708,
"learning_rate": 1.9375943828649215e-05,
"loss": 1.056179404258728,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25556135177612305,
"step": 658,
"valid_targets_mean": 14254.8,
"valid_targets_min": 4890
},
{
"epoch": 2.8059701492537314,
"grad_norm": 0.0958833509051413,
"learning_rate": 1.9316532047557378e-05,
"loss": 1.0423905849456787,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2633678913116455,
"step": 659,
"valid_targets_mean": 14146.8,
"valid_targets_min": 3569
},
{
"epoch": 2.810234541577825,
"grad_norm": 0.10588928996243993,
"learning_rate": 1.9257126304110998e-05,
"loss": 1.0271477699279785,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26135048270225525,
"step": 660,
"valid_targets_mean": 15311.8,
"valid_targets_min": 7238
},
{
"epoch": 2.814498933901919,
"grad_norm": 0.09116876800147194,
"learning_rate": 1.919772712309081e-05,
"loss": 1.0520766973495483,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24236315488815308,
"step": 661,
"valid_targets_mean": 15055.7,
"valid_targets_min": 1422
},
{
"epoch": 2.818763326226013,
"grad_norm": 0.08819172131027095,
"learning_rate": 1.9138335029219572e-05,
"loss": 1.0467054843902588,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2655482888221741,
"step": 662,
"valid_targets_mean": 15517.8,
"valid_targets_min": 5075
},
{
"epoch": 2.8230277185501067,
"grad_norm": 0.09693945905139087,
"learning_rate": 1.9078950547157458e-05,
"loss": 1.0835331678390503,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2648315727710724,
"step": 663,
"valid_targets_mean": 14957.7,
"valid_targets_min": 5183
},
{
"epoch": 2.8272921108742004,
"grad_norm": 0.1007305055166871,
"learning_rate": 1.9019574201497387e-05,
"loss": 1.0432779788970947,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.268159955739975,
"step": 664,
"valid_targets_mean": 14936.0,
"valid_targets_min": 5933
},
{
"epoch": 2.831556503198294,
"grad_norm": 0.10633071458684165,
"learning_rate": 1.8960206516760396e-05,
"loss": 1.0779266357421875,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2926626205444336,
"step": 665,
"valid_targets_mean": 15472.3,
"valid_targets_min": 10243
},
{
"epoch": 2.835820895522388,
"grad_norm": 0.10396166205798389,
"learning_rate": 1.890084801739102e-05,
"loss": 1.0034468173980713,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22967484593391418,
"step": 666,
"valid_targets_mean": 14684.1,
"valid_targets_min": 1769
},
{
"epoch": 2.840085287846482,
"grad_norm": 0.09638580119199787,
"learning_rate": 1.884149922775265e-05,
"loss": 1.0276405811309814,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25226348638534546,
"step": 667,
"valid_targets_mean": 14988.4,
"valid_targets_min": 7267
},
{
"epoch": 2.8443496801705757,
"grad_norm": 0.1084264848469337,
"learning_rate": 1.878216067212291e-05,
"loss": 1.047234058380127,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24949602782726288,
"step": 668,
"valid_targets_mean": 14842.2,
"valid_targets_min": 5609
},
{
"epoch": 2.8486140724946694,
"grad_norm": 0.1086383727198765,
"learning_rate": 1.8722832874689007e-05,
"loss": 1.0727641582489014,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27868297696113586,
"step": 669,
"valid_targets_mean": 15820.2,
"valid_targets_min": 12116
},
{
"epoch": 2.8528784648187635,
"grad_norm": 0.10255326339360858,
"learning_rate": 1.8663516359543123e-05,
"loss": 1.0397084951400757,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2564503848552704,
"step": 670,
"valid_targets_mean": 14142.6,
"valid_targets_min": 2427
},
{
"epoch": 2.857142857142857,
"grad_norm": 0.10417892551941761,
"learning_rate": 1.860421165067775e-05,
"loss": 1.0493979454040527,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26024430990219116,
"step": 671,
"valid_targets_mean": 15493.0,
"valid_targets_min": 3446
},
{
"epoch": 2.861407249466951,
"grad_norm": 0.11404069874671212,
"learning_rate": 1.8544919271981125e-05,
"loss": 1.031550645828247,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2422025054693222,
"step": 672,
"valid_targets_mean": 14255.4,
"valid_targets_min": 2365
},
{
"epoch": 2.8656716417910446,
"grad_norm": 0.09200020440410975,
"learning_rate": 1.8485639747232535e-05,
"loss": 1.0501012802124023,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26862165331840515,
"step": 673,
"valid_targets_mean": 15296.3,
"valid_targets_min": 9904
},
{
"epoch": 2.8699360341151388,
"grad_norm": 0.10187007735608053,
"learning_rate": 1.8426373600097723e-05,
"loss": 1.0608158111572266,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26771557331085205,
"step": 674,
"valid_targets_mean": 15466.8,
"valid_targets_min": 8984
},
{
"epoch": 2.8742004264392325,
"grad_norm": 0.10036687917718154,
"learning_rate": 1.836712135412424e-05,
"loss": 1.0379321575164795,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2642658054828644,
"step": 675,
"valid_targets_mean": 15383.2,
"valid_targets_min": 8605
},
{
"epoch": 2.878464818763326,
"grad_norm": 0.10553277141761286,
"learning_rate": 1.8307883532736878e-05,
"loss": 1.0531381368637085,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2562502324581146,
"step": 676,
"valid_targets_mean": 15152.2,
"valid_targets_min": 4101
},
{
"epoch": 2.88272921108742,
"grad_norm": 0.11831413276872216,
"learning_rate": 1.8248660659232964e-05,
"loss": 1.0127050876617432,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24492374062538147,
"step": 677,
"valid_targets_mean": 14975.7,
"valid_targets_min": 5802
},
{
"epoch": 2.886993603411514,
"grad_norm": 0.10569822396333257,
"learning_rate": 1.8189453256777798e-05,
"loss": 1.0561635494232178,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27034202218055725,
"step": 678,
"valid_targets_mean": 14902.4,
"valid_targets_min": 4824
},
{
"epoch": 2.8912579957356077,
"grad_norm": 0.09405934992777647,
"learning_rate": 1.8130261848399996e-05,
"loss": 1.0346713066101074,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2444111555814743,
"step": 679,
"valid_targets_mean": 14605.8,
"valid_targets_min": 3224
},
{
"epoch": 2.8955223880597014,
"grad_norm": 0.09224547225200644,
"learning_rate": 1.8071086956986916e-05,
"loss": 1.0024101734161377,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2338026911020279,
"step": 680,
"valid_targets_mean": 14041.8,
"valid_targets_min": 2001
},
{
"epoch": 2.8997867803837956,
"grad_norm": 0.09486187094848662,
"learning_rate": 1.8011929105279967e-05,
"loss": 1.0379791259765625,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24782249331474304,
"step": 681,
"valid_targets_mean": 14689.2,
"valid_targets_min": 5201
},
{
"epoch": 2.9040511727078893,
"grad_norm": 0.09941225169123405,
"learning_rate": 1.795278881587007e-05,
"loss": 1.0000572204589844,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25220823287963867,
"step": 682,
"valid_targets_mean": 15114.9,
"valid_targets_min": 1974
},
{
"epoch": 2.908315565031983,
"grad_norm": 0.10310676050628857,
"learning_rate": 1.7893666611192962e-05,
"loss": 1.0490434169769287,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27309685945510864,
"step": 683,
"valid_targets_mean": 14711.1,
"valid_targets_min": 6430
},
{
"epoch": 2.9125799573560767,
"grad_norm": 0.08271677365501538,
"learning_rate": 1.783456301352467e-05,
"loss": 1.0730600357055664,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25433292984962463,
"step": 684,
"valid_targets_mean": 14433.8,
"valid_targets_min": 2865
},
{
"epoch": 2.9168443496801704,
"grad_norm": 0.09299727132373173,
"learning_rate": 1.7775478544976813e-05,
"loss": 1.0093896389007568,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25548744201660156,
"step": 685,
"valid_targets_mean": 14471.1,
"valid_targets_min": 1903
},
{
"epoch": 2.9211087420042645,
"grad_norm": 0.10065084732629,
"learning_rate": 1.7716413727492035e-05,
"loss": 1.0522160530090332,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27982860803604126,
"step": 686,
"valid_targets_mean": 14811.4,
"valid_targets_min": 4757
},
{
"epoch": 2.925373134328358,
"grad_norm": 0.11894198761188435,
"learning_rate": 1.7657369082839392e-05,
"loss": 1.0661197900772095,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26254287362098694,
"step": 687,
"valid_targets_mean": 14819.1,
"valid_targets_min": 6555
},
{
"epoch": 2.929637526652452,
"grad_norm": 0.08160775336225609,
"learning_rate": 1.7598345132609747e-05,
"loss": 1.0675498247146606,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2637835144996643,
"step": 688,
"valid_targets_mean": 14699.1,
"valid_targets_min": 2986
},
{
"epoch": 2.933901918976546,
"grad_norm": 0.09459313499815636,
"learning_rate": 1.7539342398211132e-05,
"loss": 1.0492291450500488,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28006109595298767,
"step": 689,
"valid_targets_mean": 14990.8,
"valid_targets_min": 2220
},
{
"epoch": 2.9381663113006398,
"grad_norm": 0.09449307185356025,
"learning_rate": 1.748036140086416e-05,
"loss": 1.0591932535171509,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2770892381668091,
"step": 690,
"valid_targets_mean": 15341.5,
"valid_targets_min": 2657
},
{
"epoch": 2.9424307036247335,
"grad_norm": 0.09296315101594112,
"learning_rate": 1.742140266159744e-05,
"loss": 1.0429664850234985,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2597864270210266,
"step": 691,
"valid_targets_mean": 14504.1,
"valid_targets_min": 1811
},
{
"epoch": 2.946695095948827,
"grad_norm": 0.08475856682483765,
"learning_rate": 1.7362466701242943e-05,
"loss": 1.0481195449829102,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24358433485031128,
"step": 692,
"valid_targets_mean": 13964.4,
"valid_targets_min": 1469
},
{
"epoch": 2.950959488272921,
"grad_norm": 0.09027294943267872,
"learning_rate": 1.7303554040431426e-05,
"loss": 1.0421435832977295,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24681347608566284,
"step": 693,
"valid_targets_mean": 14935.4,
"valid_targets_min": 8591
},
{
"epoch": 2.955223880597015,
"grad_norm": 0.0839739016581147,
"learning_rate": 1.7244665199587812e-05,
"loss": 1.033682107925415,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27021023631095886,
"step": 694,
"valid_targets_mean": 15466.5,
"valid_targets_min": 8408
},
{
"epoch": 2.9594882729211087,
"grad_norm": 0.08173455945996605,
"learning_rate": 1.7185800698926594e-05,
"loss": 1.0710523128509521,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2841835618019104,
"step": 695,
"valid_targets_mean": 15041.3,
"valid_targets_min": 11444
},
{
"epoch": 2.9637526652452024,
"grad_norm": 0.08848434516434239,
"learning_rate": 1.7126961058447276e-05,
"loss": 1.0324208736419678,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24441203474998474,
"step": 696,
"valid_targets_mean": 15568.1,
"valid_targets_min": 7312
},
{
"epoch": 2.9680170575692966,
"grad_norm": 0.10044429365981941,
"learning_rate": 1.706814679792973e-05,
"loss": 1.0719600915908813,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26880744099617004,
"step": 697,
"valid_targets_mean": 14781.2,
"valid_targets_min": 1017
},
{
"epoch": 2.9722814498933903,
"grad_norm": 0.09076415272551094,
"learning_rate": 1.7009358436929632e-05,
"loss": 1.0307958126068115,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24411264061927795,
"step": 698,
"valid_targets_mean": 14271.6,
"valid_targets_min": 2049
},
{
"epoch": 2.976545842217484,
"grad_norm": 0.09210955856047405,
"learning_rate": 1.6950596494773855e-05,
"loss": 1.0544676780700684,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27991074323654175,
"step": 699,
"valid_targets_mean": 15393.5,
"valid_targets_min": 3346
},
{
"epoch": 2.9808102345415777,
"grad_norm": 0.10288934723753675,
"learning_rate": 1.6891861490555906e-05,
"loss": 1.087593913078308,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2660730481147766,
"step": 700,
"valid_targets_mean": 13862.9,
"valid_targets_min": 4574
},
{
"epoch": 2.9850746268656714,
"grad_norm": 0.10865555318770762,
"learning_rate": 1.683315394313132e-05,
"loss": 1.0225476026535034,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2520694434642792,
"step": 701,
"valid_targets_mean": 15160.9,
"valid_targets_min": 1387
},
{
"epoch": 2.9893390191897655,
"grad_norm": 0.09438237715753746,
"learning_rate": 1.677447437111309e-05,
"loss": 1.0214576721191406,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.256600022315979,
"step": 702,
"valid_targets_mean": 14271.9,
"valid_targets_min": 3723
},
{
"epoch": 2.9936034115138592,
"grad_norm": 0.08834727879820044,
"learning_rate": 1.671582329286707e-05,
"loss": 1.066161870956421,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28262799978256226,
"step": 703,
"valid_targets_mean": 14603.6,
"valid_targets_min": 1158
},
{
"epoch": 2.997867803837953,
"grad_norm": 0.08907937443691644,
"learning_rate": 1.66572012265074e-05,
"loss": 1.0634660720825195,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2691338062286377,
"step": 704,
"valid_targets_mean": 15085.3,
"valid_targets_min": 9587
},
{
"epoch": 3.0,
"grad_norm": 0.12800644975192477,
"learning_rate": 1.6598608689891953e-05,
"loss": 0.9655362963676453,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.42965245246887207,
"step": 705,
"valid_targets_mean": 14399.2,
"valid_targets_min": 714
},
{
"epoch": 3.0042643923240937,
"grad_norm": 0.0966075643245703,
"learning_rate": 1.654004620061773e-05,
"loss": 1.0344445705413818,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2689897119998932,
"step": 706,
"valid_targets_mean": 15146.1,
"valid_targets_min": 2204
},
{
"epoch": 3.008528784648188,
"grad_norm": 0.1079883798589847,
"learning_rate": 1.6481514276016297e-05,
"loss": 1.016173005104065,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24318349361419678,
"step": 707,
"valid_targets_mean": 13645.3,
"valid_targets_min": 1860
},
{
"epoch": 3.0127931769722816,
"grad_norm": 0.09787193381476755,
"learning_rate": 1.6423013433149207e-05,
"loss": 1.0453088283538818,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.250877320766449,
"step": 708,
"valid_targets_mean": 14240.2,
"valid_targets_min": 1828
},
{
"epoch": 3.0170575692963753,
"grad_norm": 0.09282614762286959,
"learning_rate": 1.636454418880347e-05,
"loss": 1.045073390007019,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2581164240837097,
"step": 709,
"valid_targets_mean": 14470.3,
"valid_targets_min": 1433
},
{
"epoch": 3.021321961620469,
"grad_norm": 0.09449004440800723,
"learning_rate": 1.630610705948693e-05,
"loss": 1.0387874841690063,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25838711857795715,
"step": 710,
"valid_targets_mean": 15462.9,
"valid_targets_min": 1378
},
{
"epoch": 3.025586353944563,
"grad_norm": 0.10389798980629657,
"learning_rate": 1.6247702561423753e-05,
"loss": 1.076232671737671,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2638155221939087,
"step": 711,
"valid_targets_mean": 15069.9,
"valid_targets_min": 2655
},
{
"epoch": 3.029850746268657,
"grad_norm": 0.08297534210077734,
"learning_rate": 1.6189331210549828e-05,
"loss": 1.03501296043396,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27074500918388367,
"step": 712,
"valid_targets_mean": 14851.6,
"valid_targets_min": 6865
},
{
"epoch": 3.0341151385927505,
"grad_norm": 0.1211232230567513,
"learning_rate": 1.613099352250825e-05,
"loss": 1.0407111644744873,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2587260603904724,
"step": 713,
"valid_targets_mean": 14498.7,
"valid_targets_min": 1794
},
{
"epoch": 3.038379530916844,
"grad_norm": 0.09095746118093166,
"learning_rate": 1.6072690012644717e-05,
"loss": 1.0345542430877686,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2534143328666687,
"step": 714,
"valid_targets_mean": 15436.5,
"valid_targets_min": 8776
},
{
"epoch": 3.0426439232409384,
"grad_norm": 0.08902095291749587,
"learning_rate": 1.6014421196003022e-05,
"loss": 1.0071500539779663,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2860822081565857,
"step": 715,
"valid_targets_mean": 15381.8,
"valid_targets_min": 7119
},
{
"epoch": 3.046908315565032,
"grad_norm": 0.0998280174233871,
"learning_rate": 1.5956187587320468e-05,
"loss": 1.0196133852005005,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.257158488035202,
"step": 716,
"valid_targets_mean": 15293.0,
"valid_targets_min": 6810
},
{
"epoch": 3.0511727078891258,
"grad_norm": 0.08682173954913219,
"learning_rate": 1.5897989701023355e-05,
"loss": 1.0229237079620361,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25756093859672546,
"step": 717,
"valid_targets_mean": 14520.8,
"valid_targets_min": 2888
},
{
"epoch": 3.0554371002132195,
"grad_norm": 0.08737913315661264,
"learning_rate": 1.58398280512224e-05,
"loss": 1.0397634506225586,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2673379182815552,
"step": 718,
"valid_targets_mean": 14851.3,
"valid_targets_min": 3462
},
{
"epoch": 3.0597014925373136,
"grad_norm": 0.11209766886967511,
"learning_rate": 1.5781703151708215e-05,
"loss": 1.0350569486618042,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24551504850387573,
"step": 719,
"valid_targets_mean": 14170.4,
"valid_targets_min": 2259
},
{
"epoch": 3.0639658848614073,
"grad_norm": 0.0873560626410917,
"learning_rate": 1.5723615515946773e-05,
"loss": 1.0226738452911377,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24808737635612488,
"step": 720,
"valid_targets_mean": 14073.4,
"valid_targets_min": 2808
},
{
"epoch": 3.068230277185501,
"grad_norm": 0.0818518394347687,
"learning_rate": 1.5665565657074874e-05,
"loss": 1.0064092874526978,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26570916175842285,
"step": 721,
"valid_targets_mean": 15137.1,
"valid_targets_min": 4471
},
{
"epoch": 3.0724946695095947,
"grad_norm": 0.08924836568132075,
"learning_rate": 1.560755408789558e-05,
"loss": 1.0179588794708252,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23607303202152252,
"step": 722,
"valid_targets_mean": 13179.7,
"valid_targets_min": 2366
},
{
"epoch": 3.076759061833689,
"grad_norm": 0.09797054455620641,
"learning_rate": 1.5549581320873715e-05,
"loss": 1.0557875633239746,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2701112627983093,
"step": 723,
"valid_targets_mean": 15342.3,
"valid_targets_min": 8112
},
{
"epoch": 3.0810234541577826,
"grad_norm": 0.09576198787365933,
"learning_rate": 1.5491647868131343e-05,
"loss": 1.074175238609314,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27521955966949463,
"step": 724,
"valid_targets_mean": 15699.3,
"valid_targets_min": 7970
},
{
"epoch": 3.0852878464818763,
"grad_norm": 0.09888061179924362,
"learning_rate": 1.5433754241443223e-05,
"loss": 1.00786554813385,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2511468827724457,
"step": 725,
"valid_targets_mean": 15114.1,
"valid_targets_min": 6528
},
{
"epoch": 3.08955223880597,
"grad_norm": 0.09793442627923624,
"learning_rate": 1.53759009522323e-05,
"loss": 1.066227674484253,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2882387042045593,
"step": 726,
"valid_targets_mean": 15416.6,
"valid_targets_min": 9705
},
{
"epoch": 3.093816631130064,
"grad_norm": 0.10874989018487148,
"learning_rate": 1.5318088511565185e-05,
"loss": 1.0540597438812256,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2334098219871521,
"step": 727,
"valid_targets_mean": 14811.9,
"valid_targets_min": 4603
},
{
"epoch": 3.098081023454158,
"grad_norm": 0.10887111228658802,
"learning_rate": 1.5260317430147627e-05,
"loss": 1.0411701202392578,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26907092332839966,
"step": 728,
"valid_targets_mean": 15423.6,
"valid_targets_min": 7996
},
{
"epoch": 3.1023454157782515,
"grad_norm": 0.09474672934125986,
"learning_rate": 1.5202588218320024e-05,
"loss": 1.062959909439087,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27440589666366577,
"step": 729,
"valid_targets_mean": 15051.2,
"valid_targets_min": 10266
},
{
"epoch": 3.106609808102345,
"grad_norm": 0.12479139765419613,
"learning_rate": 1.5144901386052924e-05,
"loss": 1.0482529401779175,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2644003629684448,
"step": 730,
"valid_targets_mean": 14408.5,
"valid_targets_min": 1170
},
{
"epoch": 3.1108742004264394,
"grad_norm": 0.1190939558336681,
"learning_rate": 1.5087257442942467e-05,
"loss": 1.0438718795776367,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2560356557369232,
"step": 731,
"valid_targets_mean": 14297.5,
"valid_targets_min": 2063
},
{
"epoch": 3.115138592750533,
"grad_norm": 0.09211139347692805,
"learning_rate": 1.502965689820593e-05,
"loss": 1.0472971200942993,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2688388228416443,
"step": 732,
"valid_targets_mean": 15084.3,
"valid_targets_min": 6095
},
{
"epoch": 3.1194029850746268,
"grad_norm": 0.10217519839154926,
"learning_rate": 1.4972100260677222e-05,
"loss": 1.0276672840118408,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2611340284347534,
"step": 733,
"valid_targets_mean": 15733.6,
"valid_targets_min": 10570
},
{
"epoch": 3.1236673773987205,
"grad_norm": 0.10100137254304324,
"learning_rate": 1.4914588038802383e-05,
"loss": 1.013986587524414,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2715274691581726,
"step": 734,
"valid_targets_mean": 15451.9,
"valid_targets_min": 8164
},
{
"epoch": 3.1279317697228146,
"grad_norm": 0.09590402587002252,
"learning_rate": 1.4857120740635084e-05,
"loss": 1.0150028467178345,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22995221614837646,
"step": 735,
"valid_targets_mean": 14393.3,
"valid_targets_min": 3224
},
{
"epoch": 3.1321961620469083,
"grad_norm": 0.0931608093286139,
"learning_rate": 1.4799698873832153e-05,
"loss": 1.0242356061935425,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2570406496524811,
"step": 736,
"valid_targets_mean": 14658.0,
"valid_targets_min": 4752
},
{
"epoch": 3.136460554371002,
"grad_norm": 0.12882920488629043,
"learning_rate": 1.4742322945649073e-05,
"loss": 1.020544171333313,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2499198317527771,
"step": 737,
"valid_targets_mean": 15270.8,
"valid_targets_min": 3754
},
{
"epoch": 3.140724946695096,
"grad_norm": 0.09152776153119625,
"learning_rate": 1.4684993462935532e-05,
"loss": 1.0674588680267334,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25742030143737793,
"step": 738,
"valid_targets_mean": 14184.7,
"valid_targets_min": 2159
},
{
"epoch": 3.14498933901919,
"grad_norm": 0.08868227195162083,
"learning_rate": 1.462771093213092e-05,
"loss": 1.0410614013671875,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26647597551345825,
"step": 739,
"valid_targets_mean": 14999.5,
"valid_targets_min": 4763
},
{
"epoch": 3.1492537313432836,
"grad_norm": 0.09805915284911056,
"learning_rate": 1.4570475859259856e-05,
"loss": 1.0700287818908691,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2671729326248169,
"step": 740,
"valid_targets_mean": 14543.1,
"valid_targets_min": 4241
},
{
"epoch": 3.1535181236673773,
"grad_norm": 0.09831970751982794,
"learning_rate": 1.4513288749927714e-05,
"loss": 1.014609932899475,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2614646553993225,
"step": 741,
"valid_targets_mean": 14626.1,
"valid_targets_min": 7544
},
{
"epoch": 3.1577825159914714,
"grad_norm": 0.08775283241054559,
"learning_rate": 1.4456150109316192e-05,
"loss": 1.0609573125839233,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2644979953765869,
"step": 742,
"valid_targets_mean": 15274.3,
"valid_targets_min": 7076
},
{
"epoch": 3.162046908315565,
"grad_norm": 0.08815796513892146,
"learning_rate": 1.4399060442178798e-05,
"loss": 1.0535483360290527,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26069730520248413,
"step": 743,
"valid_targets_mean": 14454.4,
"valid_targets_min": 2747
},
{
"epoch": 3.166311300639659,
"grad_norm": 0.07945435266847604,
"learning_rate": 1.4342020252836437e-05,
"loss": 1.0029892921447754,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24885860085487366,
"step": 744,
"valid_targets_mean": 15031.0,
"valid_targets_min": 5119
},
{
"epoch": 3.1705756929637525,
"grad_norm": 0.10653046057731785,
"learning_rate": 1.4285030045172913e-05,
"loss": 1.0199320316314697,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26799827814102173,
"step": 745,
"valid_targets_mean": 15330.1,
"valid_targets_min": 7525
},
{
"epoch": 3.1748400852878467,
"grad_norm": 0.0842329636878113,
"learning_rate": 1.422809032263052e-05,
"loss": 1.026256799697876,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2702501118183136,
"step": 746,
"valid_targets_mean": 15265.4,
"valid_targets_min": 3922
},
{
"epoch": 3.1791044776119404,
"grad_norm": 0.081964176490656,
"learning_rate": 1.4171201588205566e-05,
"loss": 1.0050396919250488,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25963956117630005,
"step": 747,
"valid_targets_mean": 14697.8,
"valid_targets_min": 3662
},
{
"epoch": 3.183368869936034,
"grad_norm": 0.09545709255137813,
"learning_rate": 1.4114364344443935e-05,
"loss": 1.0147085189819336,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24517302215099335,
"step": 748,
"valid_targets_mean": 15314.1,
"valid_targets_min": 6045
},
{
"epoch": 3.1876332622601278,
"grad_norm": 0.07898969208815516,
"learning_rate": 1.4057579093436653e-05,
"loss": 1.0305474996566772,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2477126270532608,
"step": 749,
"valid_targets_mean": 14982.3,
"valid_targets_min": 5448
},
{
"epoch": 3.191897654584222,
"grad_norm": 0.08357378212331808,
"learning_rate": 1.400084633681546e-05,
"loss": 1.0168681144714355,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22804000973701477,
"step": 750,
"valid_targets_mean": 15144.0,
"valid_targets_min": 6307
},
{
"epoch": 3.1961620469083156,
"grad_norm": 0.08707590690686529,
"learning_rate": 1.3944166575748355e-05,
"loss": 1.0593773126602173,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2545916438102722,
"step": 751,
"valid_targets_mean": 15302.6,
"valid_targets_min": 6494
},
{
"epoch": 3.2004264392324093,
"grad_norm": 0.09822240582339954,
"learning_rate": 1.3887540310935187e-05,
"loss": 1.003342866897583,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2594917118549347,
"step": 752,
"valid_targets_mean": 14751.0,
"valid_targets_min": 2259
},
{
"epoch": 3.204690831556503,
"grad_norm": 0.08709540834030992,
"learning_rate": 1.3830968042603226e-05,
"loss": 1.0506478548049927,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2556101679801941,
"step": 753,
"valid_targets_mean": 14109.2,
"valid_targets_min": 1870
},
{
"epoch": 3.208955223880597,
"grad_norm": 0.09010887681364946,
"learning_rate": 1.3774450270502762e-05,
"loss": 0.995093047618866,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2390972077846527,
"step": 754,
"valid_targets_mean": 14549.6,
"valid_targets_min": 1385
},
{
"epoch": 3.213219616204691,
"grad_norm": 0.09392396378094517,
"learning_rate": 1.3717987493902656e-05,
"loss": 1.0172679424285889,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24836307764053345,
"step": 755,
"valid_targets_mean": 15020.7,
"valid_targets_min": 2162
},
{
"epoch": 3.2174840085287846,
"grad_norm": 0.0934883283220574,
"learning_rate": 1.3661580211585947e-05,
"loss": 1.048058032989502,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25592565536499023,
"step": 756,
"valid_targets_mean": 15018.7,
"valid_targets_min": 2052
},
{
"epoch": 3.2217484008528783,
"grad_norm": 0.09792396868886702,
"learning_rate": 1.3605228921845457e-05,
"loss": 1.021390676498413,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2654317021369934,
"step": 757,
"valid_targets_mean": 15263.4,
"valid_targets_min": 1769
},
{
"epoch": 3.2260127931769724,
"grad_norm": 0.09309475608287478,
"learning_rate": 1.3548934122479373e-05,
"loss": 1.0489925146102905,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26911330223083496,
"step": 758,
"valid_targets_mean": 14592.7,
"valid_targets_min": 5192
},
{
"epoch": 3.230277185501066,
"grad_norm": 0.08393644496387626,
"learning_rate": 1.349269631078686e-05,
"loss": 1.0268635749816895,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26067325472831726,
"step": 759,
"valid_targets_mean": 15398.3,
"valid_targets_min": 6104
},
{
"epoch": 3.23454157782516,
"grad_norm": 0.10672128903203676,
"learning_rate": 1.3436515983563659e-05,
"loss": 1.0773091316223145,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25594526529312134,
"step": 760,
"valid_targets_mean": 14472.5,
"valid_targets_min": 2071
},
{
"epoch": 3.2388059701492535,
"grad_norm": 0.08602383455711411,
"learning_rate": 1.3380393637097692e-05,
"loss": 0.9852066040039062,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24024848639965057,
"step": 761,
"valid_targets_mean": 14740.8,
"valid_targets_min": 4599
},
{
"epoch": 3.2430703624733477,
"grad_norm": 0.08711459473212163,
"learning_rate": 1.3324329767164708e-05,
"loss": 1.0126616954803467,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2438962608575821,
"step": 762,
"valid_targets_mean": 14971.6,
"valid_targets_min": 7000
},
{
"epoch": 3.2473347547974414,
"grad_norm": 0.10700771034341505,
"learning_rate": 1.3268324869023878e-05,
"loss": 1.0273852348327637,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23463326692581177,
"step": 763,
"valid_targets_mean": 14168.8,
"valid_targets_min": 1791
},
{
"epoch": 3.251599147121535,
"grad_norm": 0.0908818140117839,
"learning_rate": 1.3212379437413421e-05,
"loss": 1.0689035654067993,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26748207211494446,
"step": 764,
"valid_targets_mean": 13935.3,
"valid_targets_min": 3111
},
{
"epoch": 3.2558635394456292,
"grad_norm": 0.08904191404840264,
"learning_rate": 1.3156493966546236e-05,
"loss": 1.017478108406067,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25639939308166504,
"step": 765,
"valid_targets_mean": 15500.0,
"valid_targets_min": 11246
},
{
"epoch": 3.260127931769723,
"grad_norm": 0.08990591936584785,
"learning_rate": 1.3100668950105534e-05,
"loss": 1.0317257642745972,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2547551393508911,
"step": 766,
"valid_targets_mean": 14275.1,
"valid_targets_min": 2124
},
{
"epoch": 3.2643923240938166,
"grad_norm": 0.07967233196602748,
"learning_rate": 1.3044904881240507e-05,
"loss": 0.9703269004821777,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23579402267932892,
"step": 767,
"valid_targets_mean": 14848.2,
"valid_targets_min": 1183
},
{
"epoch": 3.2686567164179103,
"grad_norm": 0.09888289059073466,
"learning_rate": 1.2989202252561926e-05,
"loss": 1.0312223434448242,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27673935890197754,
"step": 768,
"valid_targets_mean": 15244.9,
"valid_targets_min": 10098
},
{
"epoch": 3.272921108742004,
"grad_norm": 0.10656929671281667,
"learning_rate": 1.2933561556137806e-05,
"loss": 1.0420056581497192,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26682281494140625,
"step": 769,
"valid_targets_mean": 15391.3,
"valid_targets_min": 8556
},
{
"epoch": 3.277185501066098,
"grad_norm": 0.09441416382065476,
"learning_rate": 1.2877983283489062e-05,
"loss": 1.0019264221191406,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2664748430252075,
"step": 770,
"valid_targets_mean": 15493.1,
"valid_targets_min": 8459
},
{
"epoch": 3.281449893390192,
"grad_norm": 0.08990039393288893,
"learning_rate": 1.2822467925585186e-05,
"loss": 1.0493959188461304,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2742432951927185,
"step": 771,
"valid_targets_mean": 15133.0,
"valid_targets_min": 4809
},
{
"epoch": 3.2857142857142856,
"grad_norm": 0.08358419050812697,
"learning_rate": 1.2767015972839879e-05,
"loss": 0.9922876358032227,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2552100419998169,
"step": 772,
"valid_targets_mean": 14905.7,
"valid_targets_min": 2055
},
{
"epoch": 3.2899786780383797,
"grad_norm": 0.09834313288741205,
"learning_rate": 1.2711627915106728e-05,
"loss": 1.0197315216064453,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22460338473320007,
"step": 773,
"valid_targets_mean": 14333.1,
"valid_targets_min": 2091
},
{
"epoch": 3.2942430703624734,
"grad_norm": 0.08287782605828424,
"learning_rate": 1.2656304241674877e-05,
"loss": 1.0270302295684814,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22859780490398407,
"step": 774,
"valid_targets_mean": 13977.2,
"valid_targets_min": 1616
},
{
"epoch": 3.298507462686567,
"grad_norm": 0.08852464909783203,
"learning_rate": 1.2601045441264734e-05,
"loss": 1.0251355171203613,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25335028767585754,
"step": 775,
"valid_targets_mean": 14786.3,
"valid_targets_min": 1440
},
{
"epoch": 3.302771855010661,
"grad_norm": 0.08893171919055332,
"learning_rate": 1.2545852002023599e-05,
"loss": 1.035538673400879,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23715808987617493,
"step": 776,
"valid_targets_mean": 14105.4,
"valid_targets_min": 1469
},
{
"epoch": 3.307036247334755,
"grad_norm": 0.08968860057598359,
"learning_rate": 1.2490724411521406e-05,
"loss": 1.1000900268554688,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26642662286758423,
"step": 777,
"valid_targets_mean": 15000.0,
"valid_targets_min": 3080
},
{
"epoch": 3.3113006396588487,
"grad_norm": 0.08646055380898798,
"learning_rate": 1.243566315674637e-05,
"loss": 1.022867202758789,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26644641160964966,
"step": 778,
"valid_targets_mean": 15140.9,
"valid_targets_min": 4136
},
{
"epoch": 3.3155650319829424,
"grad_norm": 0.09771195624050971,
"learning_rate": 1.238066872410073e-05,
"loss": 1.1028798818588257,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2694508135318756,
"step": 779,
"valid_targets_mean": 13780.2,
"valid_targets_min": 2914
},
{
"epoch": 3.319829424307036,
"grad_norm": 0.09907125640509128,
"learning_rate": 1.2325741599396418e-05,
"loss": 1.0412553548812866,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24542492628097534,
"step": 780,
"valid_targets_mean": 14005.5,
"valid_targets_min": 2250
},
{
"epoch": 3.3240938166311302,
"grad_norm": 0.09043274695084158,
"learning_rate": 1.2270882267850765e-05,
"loss": 0.974776566028595,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2505064308643341,
"step": 781,
"valid_targets_mean": 15828.2,
"valid_targets_min": 6805
},
{
"epoch": 3.328358208955224,
"grad_norm": 0.08819385852514708,
"learning_rate": 1.2216091214082248e-05,
"loss": 0.9863239526748657,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2462753802537918,
"step": 782,
"valid_targets_mean": 14715.3,
"valid_targets_min": 4136
},
{
"epoch": 3.3326226012793176,
"grad_norm": 0.08864037315292998,
"learning_rate": 1.2161368922106192e-05,
"loss": 1.0442707538604736,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2768702507019043,
"step": 783,
"valid_targets_mean": 15831.5,
"valid_targets_min": 11302
},
{
"epoch": 3.3368869936034113,
"grad_norm": 0.09255623316637619,
"learning_rate": 1.2106715875330475e-05,
"loss": 1.048423171043396,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27612558007240295,
"step": 784,
"valid_targets_mean": 14867.5,
"valid_targets_min": 3892
},
{
"epoch": 3.3411513859275055,
"grad_norm": 0.09357936738476177,
"learning_rate": 1.2052132556551275e-05,
"loss": 1.090922236442566,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25026872754096985,
"step": 785,
"valid_targets_mean": 14097.4,
"valid_targets_min": 2004
},
{
"epoch": 3.345415778251599,
"grad_norm": 0.08335719660288012,
"learning_rate": 1.1997619447948814e-05,
"loss": 1.0451135635375977,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2629640996456146,
"step": 786,
"valid_targets_mean": 14466.1,
"valid_targets_min": 2478
},
{
"epoch": 3.349680170575693,
"grad_norm": 0.12144505843352507,
"learning_rate": 1.1943177031083094e-05,
"loss": 1.0659239292144775,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2779039144515991,
"step": 787,
"valid_targets_mean": 15759.0,
"valid_targets_min": 10275
},
{
"epoch": 3.3539445628997866,
"grad_norm": 0.09509795265273671,
"learning_rate": 1.1888805786889621e-05,
"loss": 1.0627827644348145,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26427340507507324,
"step": 788,
"valid_targets_mean": 15404.2,
"valid_targets_min": 3780
},
{
"epoch": 3.3582089552238807,
"grad_norm": 0.08684363993738695,
"learning_rate": 1.183450619567518e-05,
"loss": 1.0289561748504639,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25893306732177734,
"step": 789,
"valid_targets_mean": 14487.5,
"valid_targets_min": 4562
},
{
"epoch": 3.3624733475479744,
"grad_norm": 0.09049543801727776,
"learning_rate": 1.1780278737113581e-05,
"loss": 1.0684177875518799,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2586260735988617,
"step": 790,
"valid_targets_mean": 15182.4,
"valid_targets_min": 3870
},
{
"epoch": 3.366737739872068,
"grad_norm": 0.09511393414317239,
"learning_rate": 1.1726123890241439e-05,
"loss": 1.0418816804885864,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26466333866119385,
"step": 791,
"valid_targets_mean": 14249.2,
"valid_targets_min": 3173
},
{
"epoch": 3.3710021321961623,
"grad_norm": 0.0833172917991391,
"learning_rate": 1.1672042133453925e-05,
"loss": 1.03490149974823,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2661225497722626,
"step": 792,
"valid_targets_mean": 15123.5,
"valid_targets_min": 6370
},
{
"epoch": 3.375266524520256,
"grad_norm": 0.09122665974378442,
"learning_rate": 1.1618033944500527e-05,
"loss": 0.9761382341384888,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23575016856193542,
"step": 793,
"valid_targets_mean": 15160.9,
"valid_targets_min": 2453
},
{
"epoch": 3.3795309168443497,
"grad_norm": 0.08375954561827709,
"learning_rate": 1.1564099800480864e-05,
"loss": 1.0144245624542236,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23563915491104126,
"step": 794,
"valid_targets_mean": 14153.6,
"valid_targets_min": 1701
},
{
"epoch": 3.3837953091684434,
"grad_norm": 0.1136926122525388,
"learning_rate": 1.151024017784045e-05,
"loss": 1.027612566947937,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25527799129486084,
"step": 795,
"valid_targets_mean": 14646.9,
"valid_targets_min": 4829
},
{
"epoch": 3.388059701492537,
"grad_norm": 0.08609159834005825,
"learning_rate": 1.1456455552366488e-05,
"loss": 1.0193374156951904,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2715807259082794,
"step": 796,
"valid_targets_mean": 15776.1,
"valid_targets_min": 12003
},
{
"epoch": 3.3923240938166312,
"grad_norm": 0.083867845288976,
"learning_rate": 1.1402746399183671e-05,
"loss": 1.021320104598999,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25966545939445496,
"step": 797,
"valid_targets_mean": 15247.9,
"valid_targets_min": 7778
},
{
"epoch": 3.396588486140725,
"grad_norm": 0.09215925032697382,
"learning_rate": 1.1349113192749986e-05,
"loss": 1.0328900814056396,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.267875075340271,
"step": 798,
"valid_targets_mean": 15269.8,
"valid_targets_min": 7239
},
{
"epoch": 3.4008528784648187,
"grad_norm": 0.08094579179999896,
"learning_rate": 1.1295556406852488e-05,
"loss": 1.0496559143066406,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24950623512268066,
"step": 799,
"valid_targets_mean": 14462.9,
"valid_targets_min": 1475
},
{
"epoch": 3.405117270788913,
"grad_norm": 0.07939076724256122,
"learning_rate": 1.1242076514603201e-05,
"loss": 1.0052372217178345,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23071691393852234,
"step": 800,
"valid_targets_mean": 14676.0,
"valid_targets_min": 3066
},
{
"epoch": 3.4093816631130065,
"grad_norm": 0.10488223002634041,
"learning_rate": 1.1188673988434831e-05,
"loss": 1.0472116470336914,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25849294662475586,
"step": 801,
"valid_targets_mean": 14420.9,
"valid_targets_min": 3393
},
{
"epoch": 3.4136460554371,
"grad_norm": 0.08589246757161735,
"learning_rate": 1.1135349300096667e-05,
"loss": 1.0184279680252075,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2548155188560486,
"step": 802,
"valid_targets_mean": 15368.0,
"valid_targets_min": 3409
},
{
"epoch": 3.417910447761194,
"grad_norm": 0.08333682776191335,
"learning_rate": 1.1082102920650397e-05,
"loss": 1.0307121276855469,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24245811998844147,
"step": 803,
"valid_targets_mean": 14432.2,
"valid_targets_min": 4607
},
{
"epoch": 3.4221748400852876,
"grad_norm": 0.0848050044592849,
"learning_rate": 1.102893532046593e-05,
"loss": 1.053544282913208,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2702012062072754,
"step": 804,
"valid_targets_mean": 14449.1,
"valid_targets_min": 2245
},
{
"epoch": 3.4264392324093818,
"grad_norm": 0.0891906939724688,
"learning_rate": 1.0975846969217258e-05,
"loss": 1.0232945680618286,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27444830536842346,
"step": 805,
"valid_targets_mean": 15194.3,
"valid_targets_min": 7986
},
{
"epoch": 3.4307036247334755,
"grad_norm": 0.10949028163406868,
"learning_rate": 1.092283833587829e-05,
"loss": 1.0414726734161377,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2692057490348816,
"step": 806,
"valid_targets_mean": 14994.5,
"valid_targets_min": 4552
},
{
"epoch": 3.434968017057569,
"grad_norm": 0.08958543707570064,
"learning_rate": 1.086990988871873e-05,
"loss": 1.023406744003296,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2501792907714844,
"step": 807,
"valid_targets_mean": 14401.8,
"valid_targets_min": 2574
},
{
"epoch": 3.4392324093816633,
"grad_norm": 0.08975523446559025,
"learning_rate": 1.0817062095299929e-05,
"loss": 1.051383376121521,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2544962763786316,
"step": 808,
"valid_targets_mean": 13971.6,
"valid_targets_min": 1228
},
{
"epoch": 3.443496801705757,
"grad_norm": 0.09703685485955625,
"learning_rate": 1.0764295422470755e-05,
"loss": 1.0269638299942017,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25050097703933716,
"step": 809,
"valid_targets_mean": 14947.8,
"valid_targets_min": 3223
},
{
"epoch": 3.4477611940298507,
"grad_norm": 0.09300711665012112,
"learning_rate": 1.0711610336363477e-05,
"loss": 1.0450351238250732,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2627946734428406,
"step": 810,
"valid_targets_mean": 15129.2,
"valid_targets_min": 5196
},
{
"epoch": 3.4520255863539444,
"grad_norm": 0.08382352073085601,
"learning_rate": 1.065900730238961e-05,
"loss": 0.9718806743621826,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24593904614448547,
"step": 811,
"valid_targets_mean": 15566.4,
"valid_targets_min": 1428
},
{
"epoch": 3.4562899786780386,
"grad_norm": 0.08119503430332328,
"learning_rate": 1.0606486785235879e-05,
"loss": 1.0306479930877686,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2581614851951599,
"step": 812,
"valid_targets_mean": 14889.2,
"valid_targets_min": 7534
},
{
"epoch": 3.4605543710021323,
"grad_norm": 0.08028081567325662,
"learning_rate": 1.0554049248860045e-05,
"loss": 1.0699286460876465,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.257074236869812,
"step": 813,
"valid_targets_mean": 13585.2,
"valid_targets_min": 2276
},
{
"epoch": 3.464818763326226,
"grad_norm": 0.10569534948008809,
"learning_rate": 1.0501695156486819e-05,
"loss": 1.031941294670105,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24841266870498657,
"step": 814,
"valid_targets_mean": 15114.4,
"valid_targets_min": 2661
},
{
"epoch": 3.4690831556503197,
"grad_norm": 0.10269222842321281,
"learning_rate": 1.0449424970603796e-05,
"loss": 0.9980067610740662,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2390216439962387,
"step": 815,
"valid_targets_mean": 14622.7,
"valid_targets_min": 2132
},
{
"epoch": 3.473347547974414,
"grad_norm": 0.08769027466424714,
"learning_rate": 1.0397239152957356e-05,
"loss": 1.0175775289535522,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24682655930519104,
"step": 816,
"valid_targets_mean": 14203.2,
"valid_targets_min": 1247
},
{
"epoch": 3.4776119402985075,
"grad_norm": 0.09103252026860258,
"learning_rate": 1.034513816454858e-05,
"loss": 1.074886441230774,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26357603073120117,
"step": 817,
"valid_targets_mean": 14293.2,
"valid_targets_min": 2038
},
{
"epoch": 3.481876332622601,
"grad_norm": 0.08612886065959924,
"learning_rate": 1.0293122465629186e-05,
"loss": 1.001979112625122,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2555549740791321,
"step": 818,
"valid_targets_mean": 14997.3,
"valid_targets_min": 3084
},
{
"epoch": 3.486140724946695,
"grad_norm": 0.08380119684770174,
"learning_rate": 1.0241192515697432e-05,
"loss": 1.028665542602539,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24578630924224854,
"step": 819,
"valid_targets_mean": 14548.8,
"valid_targets_min": 2986
},
{
"epoch": 3.490405117270789,
"grad_norm": 0.09646116991438991,
"learning_rate": 1.0189348773494135e-05,
"loss": 1.0738844871520996,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2945839762687683,
"step": 820,
"valid_targets_mean": 15371.6,
"valid_targets_min": 4414
},
{
"epoch": 3.4946695095948828,
"grad_norm": 0.08212133510972003,
"learning_rate": 1.0137591696998514e-05,
"loss": 1.0327086448669434,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2559362053871155,
"step": 821,
"valid_targets_mean": 14371.6,
"valid_targets_min": 1542
},
{
"epoch": 3.4989339019189765,
"grad_norm": 0.08462013635773709,
"learning_rate": 1.0085921743424225e-05,
"loss": 1.0202784538269043,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2736544609069824,
"step": 822,
"valid_targets_mean": 15118.9,
"valid_targets_min": 4470
},
{
"epoch": 3.50319829424307,
"grad_norm": 0.08282828440700495,
"learning_rate": 1.0034339369215288e-05,
"loss": 1.0661686658859253,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27339330315589905,
"step": 823,
"valid_targets_mean": 15402.6,
"valid_targets_min": 9918
},
{
"epoch": 3.5074626865671643,
"grad_norm": 0.0939885019575759,
"learning_rate": 9.982845030042068e-06,
"loss": 1.0056620836257935,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24403563141822815,
"step": 824,
"valid_targets_mean": 15011.3,
"valid_targets_min": 4139
},
{
"epoch": 3.511727078891258,
"grad_norm": 0.08441867702001134,
"learning_rate": 9.931439180797237e-06,
"loss": 1.0503032207489014,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2536422610282898,
"step": 825,
"valid_targets_mean": 15612.0,
"valid_targets_min": 9230
},
{
"epoch": 3.5159914712153517,
"grad_norm": 0.08594513150129277,
"learning_rate": 9.880122275591752e-06,
"loss": 0.9856399297714233,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2723175883293152,
"step": 826,
"valid_targets_mean": 14429.9,
"valid_targets_min": 3244
},
{
"epoch": 3.520255863539446,
"grad_norm": 0.09026586508608515,
"learning_rate": 9.828894767750865e-06,
"loss": 1.0838544368743896,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2742552161216736,
"step": 827,
"valid_targets_mean": 15251.6,
"valid_targets_min": 3087
},
{
"epoch": 3.5245202558635396,
"grad_norm": 0.07855066068637408,
"learning_rate": 9.777757109810102e-06,
"loss": 1.0196110010147095,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25420427322387695,
"step": 828,
"valid_targets_mean": 14387.2,
"valid_targets_min": 2297
},
{
"epoch": 3.5287846481876333,
"grad_norm": 0.08061819139145802,
"learning_rate": 9.726709753511275e-06,
"loss": 0.9941138029098511,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2532624304294586,
"step": 829,
"valid_targets_mean": 15536.9,
"valid_targets_min": 9734
},
{
"epoch": 3.533049040511727,
"grad_norm": 0.10392351976244897,
"learning_rate": 9.675753149798474e-06,
"loss": 1.0283386707305908,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26266804337501526,
"step": 830,
"valid_targets_mean": 15026.4,
"valid_targets_min": 7521
},
{
"epoch": 3.5373134328358207,
"grad_norm": 0.08752778954012838,
"learning_rate": 9.624887748814118e-06,
"loss": 1.0715502500534058,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2519451975822449,
"step": 831,
"valid_targets_mean": 15512.7,
"valid_targets_min": 10422
},
{
"epoch": 3.541577825159915,
"grad_norm": 0.08502642572582692,
"learning_rate": 9.574113999894909e-06,
"loss": 1.04764986038208,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2436976432800293,
"step": 832,
"valid_targets_mean": 14332.5,
"valid_targets_min": 2296
},
{
"epoch": 3.5458422174840085,
"grad_norm": 0.08183690163182732,
"learning_rate": 9.523432351567979e-06,
"loss": 1.009023904800415,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2624768018722534,
"step": 833,
"valid_targets_mean": 15468.8,
"valid_targets_min": 8079
},
{
"epoch": 3.550106609808102,
"grad_norm": 0.08505381444826653,
"learning_rate": 9.472843251546792e-06,
"loss": 1.0535686016082764,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24943134188652039,
"step": 834,
"valid_targets_mean": 13647.8,
"valid_targets_min": 2611
},
{
"epoch": 3.5543710021321964,
"grad_norm": 0.0897777424153501,
"learning_rate": 9.422347146727294e-06,
"loss": 1.0463916063308716,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25419020652770996,
"step": 835,
"valid_targets_mean": 14754.8,
"valid_targets_min": 1320
},
{
"epoch": 3.55863539445629,
"grad_norm": 0.08791761119999952,
"learning_rate": 9.371944483183912e-06,
"loss": 1.0125620365142822,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24228467047214508,
"step": 836,
"valid_targets_mean": 14726.8,
"valid_targets_min": 6332
},
{
"epoch": 3.5628997867803838,
"grad_norm": 0.07614687201404623,
"learning_rate": 9.321635706165635e-06,
"loss": 1.0277087688446045,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26706069707870483,
"step": 837,
"valid_targets_mean": 15485.4,
"valid_targets_min": 9379
},
{
"epoch": 3.5671641791044775,
"grad_norm": 0.0812121227567108,
"learning_rate": 9.271421260092075e-06,
"loss": 1.0401611328125,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2550191283226013,
"step": 838,
"valid_targets_mean": 14509.5,
"valid_targets_min": 8384
},
{
"epoch": 3.571428571428571,
"grad_norm": 0.0890663436170254,
"learning_rate": 9.221301588549519e-06,
"loss": 0.9943847060203552,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2645444869995117,
"step": 839,
"valid_targets_mean": 15257.1,
"valid_targets_min": 9607
},
{
"epoch": 3.5756929637526653,
"grad_norm": 0.08286455702656072,
"learning_rate": 9.171277134287057e-06,
"loss": 1.0006598234176636,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24054405093193054,
"step": 840,
"valid_targets_mean": 14489.2,
"valid_targets_min": 1593
},
{
"epoch": 3.579957356076759,
"grad_norm": 0.1077040295098192,
"learning_rate": 9.121348339212634e-06,
"loss": 1.0696783065795898,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2620352804660797,
"step": 841,
"valid_targets_mean": 15070.0,
"valid_targets_min": 3025
},
{
"epoch": 3.5842217484008527,
"grad_norm": 0.07951933388848838,
"learning_rate": 9.07151564438916e-06,
"loss": 1.0507144927978516,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2834031283855438,
"step": 842,
"valid_targets_mean": 14870.1,
"valid_targets_min": 6984
},
{
"epoch": 3.588486140724947,
"grad_norm": 0.082957792336547,
"learning_rate": 9.021779490030611e-06,
"loss": 1.0006059408187866,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23502129316329956,
"step": 843,
"valid_targets_mean": 14381.8,
"valid_targets_min": 950
},
{
"epoch": 3.5927505330490406,
"grad_norm": 0.07715242189716764,
"learning_rate": 8.972140315498119e-06,
"loss": 1.0210398435592651,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2652220129966736,
"step": 844,
"valid_targets_mean": 15549.3,
"valid_targets_min": 8712
},
{
"epoch": 3.5970149253731343,
"grad_norm": 0.08284734582275866,
"learning_rate": 8.922598559296154e-06,
"loss": 1.041926383972168,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2657508850097656,
"step": 845,
"valid_targets_mean": 15537.5,
"valid_targets_min": 10056
},
{
"epoch": 3.6012793176972284,
"grad_norm": 0.08480649073786375,
"learning_rate": 8.873154659068582e-06,
"loss": 0.990920901298523,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2244873195886612,
"step": 846,
"valid_targets_mean": 14691.8,
"valid_targets_min": 4286
},
{
"epoch": 3.605543710021322,
"grad_norm": 0.08549953988143626,
"learning_rate": 8.823809051594816e-06,
"loss": 1.0318598747253418,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24531647562980652,
"step": 847,
"valid_targets_mean": 14132.1,
"valid_targets_min": 1537
},
{
"epoch": 3.609808102345416,
"grad_norm": 0.10109613135718828,
"learning_rate": 8.774562172785988e-06,
"loss": 1.013717770576477,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23869401216506958,
"step": 848,
"valid_targets_mean": 14452.1,
"valid_targets_min": 2491
},
{
"epoch": 3.6140724946695095,
"grad_norm": 0.08165886038369316,
"learning_rate": 8.725414457681063e-06,
"loss": 1.063403844833374,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2838260531425476,
"step": 849,
"valid_targets_mean": 15697.1,
"valid_targets_min": 4350
},
{
"epoch": 3.6183368869936032,
"grad_norm": 0.08435369862288104,
"learning_rate": 8.676366340443017e-06,
"loss": 0.9874081611633301,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2375505268573761,
"step": 850,
"valid_targets_mean": 15112.3,
"valid_targets_min": 6566
},
{
"epoch": 3.6226012793176974,
"grad_norm": 0.09404947567739144,
"learning_rate": 8.627418254355e-06,
"loss": 1.0434298515319824,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26521697640419006,
"step": 851,
"valid_targets_mean": 14487.1,
"valid_targets_min": 1235
},
{
"epoch": 3.626865671641791,
"grad_norm": 0.08544312834408434,
"learning_rate": 8.578570631816474e-06,
"loss": 1.0267388820648193,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2539098262786865,
"step": 852,
"valid_targets_mean": 14514.5,
"valid_targets_min": 8625
},
{
"epoch": 3.631130063965885,
"grad_norm": 0.09434929655794602,
"learning_rate": 8.529823904339472e-06,
"loss": 1.0240907669067383,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26411551237106323,
"step": 853,
"valid_targets_mean": 14144.6,
"valid_targets_min": 4400
},
{
"epoch": 3.635394456289979,
"grad_norm": 0.08374456165741265,
"learning_rate": 8.481178502544684e-06,
"loss": 1.045487642288208,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23568733036518097,
"step": 854,
"valid_targets_mean": 14572.8,
"valid_targets_min": 3510
},
{
"epoch": 3.6396588486140726,
"grad_norm": 0.10784522513002642,
"learning_rate": 8.43263485615774e-06,
"loss": 1.023280143737793,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2622443437576294,
"step": 855,
"valid_targets_mean": 15251.7,
"valid_targets_min": 4597
},
{
"epoch": 3.6439232409381663,
"grad_norm": 0.10854481191429571,
"learning_rate": 8.384193394005372e-06,
"loss": 1.0276780128479004,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26878824830055237,
"step": 856,
"valid_targets_mean": 15089.5,
"valid_targets_min": 10868
},
{
"epoch": 3.64818763326226,
"grad_norm": 0.09826703831891268,
"learning_rate": 8.33585454401161e-06,
"loss": 1.0379751920700073,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24941225349903107,
"step": 857,
"valid_targets_mean": 14409.4,
"valid_targets_min": 6141
},
{
"epoch": 3.6524520255863537,
"grad_norm": 0.10278878232245207,
"learning_rate": 8.287618733194073e-06,
"loss": 1.0222830772399902,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25176340341567993,
"step": 858,
"valid_targets_mean": 15116.8,
"valid_targets_min": 4752
},
{
"epoch": 3.656716417910448,
"grad_norm": 0.1019910802283647,
"learning_rate": 8.239486387660096e-06,
"loss": 1.0164682865142822,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2575756311416626,
"step": 859,
"valid_targets_mean": 15095.1,
"valid_targets_min": 2327
},
{
"epoch": 3.6609808102345416,
"grad_norm": 0.09850413329710703,
"learning_rate": 8.191457932603052e-06,
"loss": 1.0082552433013916,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.266116201877594,
"step": 860,
"valid_targets_mean": 14914.2,
"valid_targets_min": 2351
},
{
"epoch": 3.6652452025586353,
"grad_norm": 0.11295918923336667,
"learning_rate": 8.143533792298545e-06,
"loss": 1.044511318206787,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2526169419288635,
"step": 861,
"valid_targets_mean": 13972.2,
"valid_targets_min": 1240
},
{
"epoch": 3.6695095948827294,
"grad_norm": 0.1049263909360616,
"learning_rate": 8.095714390100698e-06,
"loss": 1.027268886566162,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27266305685043335,
"step": 862,
"valid_targets_mean": 15099.7,
"valid_targets_min": 5832
},
{
"epoch": 3.673773987206823,
"grad_norm": 0.0856143507453554,
"learning_rate": 8.048000148438375e-06,
"loss": 1.0215270519256592,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2595754861831665,
"step": 863,
"valid_targets_mean": 14788.7,
"valid_targets_min": 2120
},
{
"epoch": 3.678038379530917,
"grad_norm": 0.07908065366604718,
"learning_rate": 8.000391488811485e-06,
"loss": 1.0316400527954102,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26067250967025757,
"step": 864,
"valid_targets_mean": 15144.5,
"valid_targets_min": 5179
},
{
"epoch": 3.6823027718550105,
"grad_norm": 0.0834953997636233,
"learning_rate": 7.952888831787215e-06,
"loss": 1.0844944715499878,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2583216428756714,
"step": 865,
"valid_targets_mean": 15116.6,
"valid_targets_min": 6478
},
{
"epoch": 3.6865671641791042,
"grad_norm": 0.08378814354500813,
"learning_rate": 7.905492596996391e-06,
"loss": 1.0059458017349243,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2446044236421585,
"step": 866,
"valid_targets_mean": 15007.4,
"valid_targets_min": 6362
},
{
"epoch": 3.6908315565031984,
"grad_norm": 0.07999522658287574,
"learning_rate": 7.858203203129668e-06,
"loss": 1.0042922496795654,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2598060369491577,
"step": 867,
"valid_targets_mean": 15510.6,
"valid_targets_min": 8530
},
{
"epoch": 3.695095948827292,
"grad_norm": 0.07987464923521465,
"learning_rate": 7.811021067933919e-06,
"loss": 1.0510263442993164,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2577937841415405,
"step": 868,
"valid_targets_mean": 14630.5,
"valid_targets_min": 2021
},
{
"epoch": 3.699360341151386,
"grad_norm": 0.07992043014716169,
"learning_rate": 7.763946608208504e-06,
"loss": 1.0136570930480957,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2394932359457016,
"step": 869,
"valid_targets_mean": 14532.7,
"valid_targets_min": 2449
},
{
"epoch": 3.70362473347548,
"grad_norm": 0.08927827389107539,
"learning_rate": 7.716980239801588e-06,
"loss": 1.0263105630874634,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23384422063827515,
"step": 870,
"valid_targets_mean": 13523.9,
"valid_targets_min": 2750
},
{
"epoch": 3.7078891257995736,
"grad_norm": 0.11056747979307219,
"learning_rate": 7.670122377606495e-06,
"loss": 1.0767567157745361,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2681053578853607,
"step": 871,
"valid_targets_mean": 13881.6,
"valid_targets_min": 1017
},
{
"epoch": 3.7121535181236673,
"grad_norm": 0.09470717288374518,
"learning_rate": 7.623373435557988e-06,
"loss": 0.9856612682342529,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24675241112709045,
"step": 872,
"valid_targets_mean": 15218.8,
"valid_targets_min": 7013
},
{
"epoch": 3.716417910447761,
"grad_norm": 0.07802928855827182,
"learning_rate": 7.5767338266286775e-06,
"loss": 1.035343885421753,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2520187497138977,
"step": 873,
"valid_targets_mean": 14378.0,
"valid_targets_min": 5190
},
{
"epoch": 3.7206823027718547,
"grad_norm": 0.08614787647067453,
"learning_rate": 7.530203962825331e-06,
"loss": 1.0437312126159668,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22808116674423218,
"step": 874,
"valid_targets_mean": 13500.3,
"valid_targets_min": 2591
},
{
"epoch": 3.724946695095949,
"grad_norm": 0.0874181178466684,
"learning_rate": 7.483784255185249e-06,
"loss": 1.0509166717529297,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26214781403541565,
"step": 875,
"valid_targets_mean": 15184.8,
"valid_targets_min": 3965
},
{
"epoch": 3.7292110874200426,
"grad_norm": 0.08075657614659544,
"learning_rate": 7.437475113772632e-06,
"loss": 1.0664618015289307,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2666980028152466,
"step": 876,
"valid_targets_mean": 15203.5,
"valid_targets_min": 10335
},
{
"epoch": 3.7334754797441363,
"grad_norm": 0.07477859358481892,
"learning_rate": 7.391276947674932e-06,
"loss": 1.022497534751892,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2488812357187271,
"step": 877,
"valid_targets_mean": 14774.1,
"valid_targets_min": 6826
},
{
"epoch": 3.7377398720682304,
"grad_norm": 0.08084873648482122,
"learning_rate": 7.345190164999307e-06,
"loss": 1.0268759727478027,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24368500709533691,
"step": 878,
"valid_targets_mean": 15164.0,
"valid_targets_min": 8113
},
{
"epoch": 3.742004264392324,
"grad_norm": 0.10466196957153916,
"learning_rate": 7.299215172868947e-06,
"loss": 1.0474107265472412,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2600746154785156,
"step": 879,
"valid_targets_mean": 14153.2,
"valid_targets_min": 1784
},
{
"epoch": 3.746268656716418,
"grad_norm": 0.08225310002176738,
"learning_rate": 7.2533523774194865e-06,
"loss": 0.9963715076446533,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2696053385734558,
"step": 880,
"valid_targets_mean": 15587.3,
"valid_targets_min": 12542
},
{
"epoch": 3.750533049040512,
"grad_norm": 0.0794321576113467,
"learning_rate": 7.2076021837954616e-06,
"loss": 1.0367623567581177,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2496190220117569,
"step": 881,
"valid_targets_mean": 14915.2,
"valid_targets_min": 1946
},
{
"epoch": 3.7547974413646057,
"grad_norm": 0.08209307196316702,
"learning_rate": 7.161964996146689e-06,
"loss": 1.0878024101257324,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26012054085731506,
"step": 882,
"valid_targets_mean": 15025.1,
"valid_targets_min": 4369
},
{
"epoch": 3.7590618336886994,
"grad_norm": 0.07259322999036513,
"learning_rate": 7.116441217624708e-06,
"loss": 1.0414860248565674,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27827656269073486,
"step": 883,
"valid_targets_mean": 15538.1,
"valid_targets_min": 9241
},
{
"epoch": 3.763326226012793,
"grad_norm": 0.08696090704266997,
"learning_rate": 7.071031250379228e-06,
"loss": 1.0478577613830566,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25591087341308594,
"step": 884,
"valid_targets_mean": 14598.1,
"valid_targets_min": 4809
},
{
"epoch": 3.767590618336887,
"grad_norm": 0.07693999903967946,
"learning_rate": 7.0257354955545466e-06,
"loss": 1.0575886964797974,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2571733593940735,
"step": 885,
"valid_targets_mean": 15282.5,
"valid_targets_min": 7892
},
{
"epoch": 3.771855010660981,
"grad_norm": 0.07565798434991149,
"learning_rate": 6.980554353286066e-06,
"loss": 1.0402332544326782,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2695630192756653,
"step": 886,
"valid_targets_mean": 14452.4,
"valid_targets_min": 2427
},
{
"epoch": 3.7761194029850746,
"grad_norm": 0.07909328602982077,
"learning_rate": 6.935488222696676e-06,
"loss": 1.0011862516403198,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2459937185049057,
"step": 887,
"valid_targets_mean": 14569.8,
"valid_targets_min": 2550
},
{
"epoch": 3.7803837953091683,
"grad_norm": 0.07818319017123801,
"learning_rate": 6.890537501893302e-06,
"loss": 1.0591342449188232,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27399516105651855,
"step": 888,
"valid_targets_mean": 15348.9,
"valid_targets_min": 5567
},
{
"epoch": 3.7846481876332625,
"grad_norm": 0.10386307909675437,
"learning_rate": 6.845702587963352e-06,
"loss": 1.0306096076965332,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24255593121051788,
"step": 889,
"valid_targets_mean": 14806.3,
"valid_targets_min": 4626
},
{
"epoch": 3.788912579957356,
"grad_norm": 0.09351565469204826,
"learning_rate": 6.800983876971192e-06,
"loss": 1.0489108562469482,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2679540812969208,
"step": 890,
"valid_targets_mean": 15298.2,
"valid_targets_min": 4554
},
{
"epoch": 3.79317697228145,
"grad_norm": 0.08060750467788766,
"learning_rate": 6.756381763954718e-06,
"loss": 1.0338560342788696,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23931586742401123,
"step": 891,
"valid_targets_mean": 14668.2,
"valid_targets_min": 4243
},
{
"epoch": 3.7974413646055436,
"grad_norm": 0.09301035975158325,
"learning_rate": 6.7118966429217645e-06,
"loss": 1.0454421043395996,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24078507721424103,
"step": 892,
"valid_targets_mean": 13780.3,
"valid_targets_min": 2241
},
{
"epoch": 3.8017057569296373,
"grad_norm": 0.09941275253824729,
"learning_rate": 6.667528906846714e-06,
"loss": 1.0338785648345947,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26446783542633057,
"step": 893,
"valid_targets_mean": 15612.3,
"valid_targets_min": 8208
},
{
"epoch": 3.8059701492537314,
"grad_norm": 0.0773705763130956,
"learning_rate": 6.623278947666974e-06,
"loss": 1.056181788444519,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2693255543708801,
"step": 894,
"valid_targets_mean": 14974.3,
"valid_targets_min": 2241
},
{
"epoch": 3.810234541577825,
"grad_norm": 0.09469480760796763,
"learning_rate": 6.579147156279538e-06,
"loss": 1.0422710180282593,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25310590863227844,
"step": 895,
"valid_targets_mean": 14745.0,
"valid_targets_min": 5518
},
{
"epoch": 3.814498933901919,
"grad_norm": 0.0844441965122201,
"learning_rate": 6.535133922537513e-06,
"loss": 1.051576852798462,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2646353840827942,
"step": 896,
"valid_targets_mean": 15239.8,
"valid_targets_min": 3569
},
{
"epoch": 3.818763326226013,
"grad_norm": 0.09791535485186709,
"learning_rate": 6.491239635246709e-06,
"loss": 1.0070401430130005,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26541510224342346,
"step": 897,
"valid_targets_mean": 14494.6,
"valid_targets_min": 3954
},
{
"epoch": 3.8230277185501067,
"grad_norm": 0.0804234348689826,
"learning_rate": 6.447464682162143e-06,
"loss": 1.0272142887115479,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26719045639038086,
"step": 898,
"valid_targets_mean": 14542.8,
"valid_targets_min": 5861
},
{
"epoch": 3.8272921108742004,
"grad_norm": 0.07719355729984614,
"learning_rate": 6.403809449984704e-06,
"loss": 1.005959153175354,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24028396606445312,
"step": 899,
"valid_targets_mean": 13866.1,
"valid_targets_min": 2611
},
{
"epoch": 3.831556503198294,
"grad_norm": 0.09003236822137275,
"learning_rate": 6.3602743243576405e-06,
"loss": 1.0521509647369385,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2590738534927368,
"step": 900,
"valid_targets_mean": 15023.3,
"valid_targets_min": 2394
},
{
"epoch": 3.835820895522388,
"grad_norm": 0.07941972186305471,
"learning_rate": 6.316859689863222e-06,
"loss": 1.030562400817871,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2312874048948288,
"step": 901,
"valid_targets_mean": 14708.4,
"valid_targets_min": 5908
},
{
"epoch": 3.840085287846482,
"grad_norm": 0.08724123781291919,
"learning_rate": 6.273565930019316e-06,
"loss": 1.0391294956207275,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24585530161857605,
"step": 902,
"valid_targets_mean": 14240.8,
"valid_targets_min": 1669
},
{
"epoch": 3.8443496801705757,
"grad_norm": 0.07910350722012889,
"learning_rate": 6.230393427276e-06,
"loss": 1.0073344707489014,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2446940541267395,
"step": 903,
"valid_targets_mean": 15278.1,
"valid_targets_min": 7088
},
{
"epoch": 3.8486140724946694,
"grad_norm": 0.0785625433640836,
"learning_rate": 6.187342563012198e-06,
"loss": 1.0122864246368408,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24303199350833893,
"step": 904,
"valid_targets_mean": 15377.3,
"valid_targets_min": 7820
},
{
"epoch": 3.8528784648187635,
"grad_norm": 0.0809758203341804,
"learning_rate": 6.144413717532269e-06,
"loss": 1.0475292205810547,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25803524255752563,
"step": 905,
"valid_targets_mean": 14760.0,
"valid_targets_min": 4632
},
{
"epoch": 3.857142857142857,
"grad_norm": 0.07554715750922045,
"learning_rate": 6.1016072700627106e-06,
"loss": 1.059013843536377,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2788810729980469,
"step": 906,
"valid_targets_mean": 15474.2,
"valid_targets_min": 10789
},
{
"epoch": 3.861407249466951,
"grad_norm": 0.08815171055493336,
"learning_rate": 6.058923598748756e-06,
"loss": 1.0405542850494385,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2618822455406189,
"step": 907,
"valid_targets_mean": 14841.5,
"valid_targets_min": 5120
},
{
"epoch": 3.8656716417910446,
"grad_norm": 0.09065135669638827,
"learning_rate": 6.016363080651066e-06,
"loss": 1.02406644821167,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2756415605545044,
"step": 908,
"valid_targets_mean": 15457.6,
"valid_targets_min": 1412
},
{
"epoch": 3.8699360341151388,
"grad_norm": 0.06956184698630777,
"learning_rate": 5.973926091742386e-06,
"loss": 1.0235509872436523,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2693585753440857,
"step": 909,
"valid_targets_mean": 15067.2,
"valid_targets_min": 2737
},
{
"epoch": 3.8742004264392325,
"grad_norm": 0.08197048416260708,
"learning_rate": 5.931613006904196e-06,
"loss": 1.0860520601272583,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2711891829967499,
"step": 910,
"valid_targets_mean": 14860.2,
"valid_targets_min": 2074
},
{
"epoch": 3.878464818763326,
"grad_norm": 0.08495311148379922,
"learning_rate": 5.889424199923473e-06,
"loss": 1.0736467838287354,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28341278433799744,
"step": 911,
"valid_targets_mean": 14724.0,
"valid_targets_min": 2671
},
{
"epoch": 3.88272921108742,
"grad_norm": 0.07777898132875226,
"learning_rate": 5.847360043489318e-06,
"loss": 1.0422983169555664,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2551887631416321,
"step": 912,
"valid_targets_mean": 14038.2,
"valid_targets_min": 4427
},
{
"epoch": 3.886993603411514,
"grad_norm": 0.07555614702580156,
"learning_rate": 5.805420909189683e-06,
"loss": 1.0190186500549316,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2498110830783844,
"step": 913,
"valid_targets_mean": 13927.7,
"valid_targets_min": 2133
},
{
"epoch": 3.8912579957356077,
"grad_norm": 0.07536806197422509,
"learning_rate": 5.7636071675081076e-06,
"loss": 0.9859417676925659,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23977015912532806,
"step": 914,
"valid_targets_mean": 14980.1,
"valid_targets_min": 4753
},
{
"epoch": 3.8955223880597014,
"grad_norm": 0.08129778873936544,
"learning_rate": 5.721919187820431e-06,
"loss": 1.0162174701690674,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.249849334359169,
"step": 915,
"valid_targets_mean": 15068.5,
"valid_targets_min": 1714
},
{
"epoch": 3.8997867803837956,
"grad_norm": 0.0844611542297401,
"learning_rate": 5.6803573383915265e-06,
"loss": 1.0243160724639893,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25158846378326416,
"step": 916,
"valid_targets_mean": 15044.6,
"valid_targets_min": 6670
},
{
"epoch": 3.9040511727078893,
"grad_norm": 0.09715740428020808,
"learning_rate": 5.638921986372064e-06,
"loss": 1.0170881748199463,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26079803705215454,
"step": 917,
"valid_targets_mean": 14739.3,
"valid_targets_min": 4791
},
{
"epoch": 3.908315565031983,
"grad_norm": 0.08186217636269322,
"learning_rate": 5.5976134977952315e-06,
"loss": 1.0299415588378906,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2786436080932617,
"step": 918,
"valid_targets_mean": 14845.4,
"valid_targets_min": 5095
},
{
"epoch": 3.9125799573560767,
"grad_norm": 0.0807959881624803,
"learning_rate": 5.556432237573564e-06,
"loss": 1.0457395315170288,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2563976049423218,
"step": 919,
"valid_targets_mean": 14489.6,
"valid_targets_min": 1727
},
{
"epoch": 3.9168443496801704,
"grad_norm": 0.08968468146141904,
"learning_rate": 5.5153785694956416e-06,
"loss": 1.0066556930541992,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.255768746137619,
"step": 920,
"valid_targets_mean": 14957.5,
"valid_targets_min": 6027
},
{
"epoch": 3.9211087420042645,
"grad_norm": 0.08029937904141751,
"learning_rate": 5.474452856222942e-06,
"loss": 1.0962214469909668,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26948976516723633,
"step": 921,
"valid_targets_mean": 14551.8,
"valid_targets_min": 2854
},
{
"epoch": 3.925373134328358,
"grad_norm": 0.07610502521435782,
"learning_rate": 5.433655459286611e-06,
"loss": 0.9986022710800171,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24567736685276031,
"step": 922,
"valid_targets_mean": 14516.1,
"valid_targets_min": 4692
},
{
"epoch": 3.929637526652452,
"grad_norm": 0.0813681926726865,
"learning_rate": 5.392986739084238e-06,
"loss": 1.052161693572998,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25333261489868164,
"step": 923,
"valid_targets_mean": 14106.4,
"valid_targets_min": 3663
},
{
"epoch": 3.933901918976546,
"grad_norm": 0.08172004028488414,
"learning_rate": 5.352447054876755e-06,
"loss": 1.0313013792037964,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24101045727729797,
"step": 924,
"valid_targets_mean": 14219.2,
"valid_targets_min": 1903
},
{
"epoch": 3.9381663113006398,
"grad_norm": 0.07506210209930378,
"learning_rate": 5.31203676478516e-06,
"loss": 1.0536484718322754,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26649099588394165,
"step": 925,
"valid_targets_mean": 15425.6,
"valid_targets_min": 1511
},
{
"epoch": 3.9424307036247335,
"grad_norm": 0.072633133405953,
"learning_rate": 5.271756225787434e-06,
"loss": 1.0264238119125366,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.270125150680542,
"step": 926,
"valid_targets_mean": 15440.1,
"valid_targets_min": 6631
},
{
"epoch": 3.946695095948827,
"grad_norm": 0.08527675393769442,
"learning_rate": 5.231605793715348e-06,
"loss": 1.0151232481002808,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2394932210445404,
"step": 927,
"valid_targets_mean": 15057.4,
"valid_targets_min": 7393
},
{
"epoch": 3.950959488272921,
"grad_norm": 0.10557885289457988,
"learning_rate": 5.191585823251335e-06,
"loss": 1.0420904159545898,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23350343108177185,
"step": 928,
"valid_targets_mean": 14315.7,
"valid_targets_min": 1357
},
{
"epoch": 3.955223880597015,
"grad_norm": 0.07403391899294842,
"learning_rate": 5.151696667925348e-06,
"loss": 1.034696340560913,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26972413063049316,
"step": 929,
"valid_targets_mean": 14701.4,
"valid_targets_min": 2446
},
{
"epoch": 3.9594882729211087,
"grad_norm": 0.07940484753771229,
"learning_rate": 5.111938680111732e-06,
"loss": 1.018942952156067,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2700170874595642,
"step": 930,
"valid_targets_mean": 14576.6,
"valid_targets_min": 2477
},
{
"epoch": 3.9637526652452024,
"grad_norm": 0.07412034004160253,
"learning_rate": 5.072312211026125e-06,
"loss": 1.0595375299453735,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28804612159729004,
"step": 931,
"valid_targets_mean": 15064.4,
"valid_targets_min": 1783
},
{
"epoch": 3.9680170575692966,
"grad_norm": 0.07385148862505388,
"learning_rate": 5.032817610722369e-06,
"loss": 1.0153112411499023,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2420693337917328,
"step": 932,
"valid_targets_mean": 14734.0,
"valid_targets_min": 5734
},
{
"epoch": 3.9722814498933903,
"grad_norm": 0.07857758438062754,
"learning_rate": 4.993455228089366e-06,
"loss": 1.0582267045974731,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2619363069534302,
"step": 933,
"valid_targets_mean": 14335.9,
"valid_targets_min": 2310
},
{
"epoch": 3.976545842217484,
"grad_norm": 0.08014441535975936,
"learning_rate": 4.954225410848048e-06,
"loss": 1.0665839910507202,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26797688007354736,
"step": 934,
"valid_targets_mean": 14975.0,
"valid_targets_min": 6684
},
{
"epoch": 3.9808102345415777,
"grad_norm": 0.07594610005732107,
"learning_rate": 4.915128505548284e-06,
"loss": 1.051414132118225,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2612667977809906,
"step": 935,
"valid_targets_mean": 14959.4,
"valid_targets_min": 1648
},
{
"epoch": 3.9850746268656714,
"grad_norm": 0.07315444416867845,
"learning_rate": 4.8761648575658145e-06,
"loss": 1.042731761932373,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2497975081205368,
"step": 936,
"valid_targets_mean": 14657.7,
"valid_targets_min": 3326
},
{
"epoch": 3.9893390191897655,
"grad_norm": 0.08179327051953446,
"learning_rate": 4.837334811099217e-06,
"loss": 1.02659010887146,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26457613706588745,
"step": 937,
"valid_targets_mean": 14456.5,
"valid_targets_min": 3188
},
{
"epoch": 3.9936034115138592,
"grad_norm": 0.07583105755160399,
"learning_rate": 4.7986387091668365e-06,
"loss": 1.025386095046997,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26111188530921936,
"step": 938,
"valid_targets_mean": 14776.4,
"valid_targets_min": 6350
},
{
"epoch": 3.997867803837953,
"grad_norm": 0.07316451888866704,
"learning_rate": 4.760076893603791e-06,
"loss": 1.0235109329223633,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25054243206977844,
"step": 939,
"valid_targets_mean": 14792.3,
"valid_targets_min": 4066
},
{
"epoch": 4.0,
"grad_norm": 0.09665112068128351,
"learning_rate": 4.721649705058926e-06,
"loss": 0.9654463529586792,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4785127639770508,
"step": 940,
"valid_targets_mean": 15362.8,
"valid_targets_min": 6870
},
{
"epoch": 4.004264392324094,
"grad_norm": 0.07694955817517563,
"learning_rate": 4.683357482991819e-06,
"loss": 1.0603652000427246,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28040334582328796,
"step": 941,
"valid_targets_mean": 14933.3,
"valid_targets_min": 2774
},
{
"epoch": 4.008528784648187,
"grad_norm": 0.08687304347329067,
"learning_rate": 4.645200565669776e-06,
"loss": 1.058302640914917,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27226993441581726,
"step": 942,
"valid_targets_mean": 15328.0,
"valid_targets_min": 3204
},
{
"epoch": 4.0127931769722816,
"grad_norm": 0.07633447398504972,
"learning_rate": 4.607179290164823e-06,
"loss": 1.0665823221206665,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2525603175163269,
"step": 943,
"valid_targets_mean": 14727.6,
"valid_targets_min": 3893
},
{
"epoch": 4.017057569296376,
"grad_norm": 0.07561892650399228,
"learning_rate": 4.569293992350783e-06,
"loss": 0.9884703159332275,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23644356429576874,
"step": 944,
"valid_targets_mean": 15077.0,
"valid_targets_min": 3168
},
{
"epoch": 4.021321961620469,
"grad_norm": 0.07846946482719135,
"learning_rate": 4.531545006900244e-06,
"loss": 1.0185267925262451,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2440236508846283,
"step": 945,
"valid_targets_mean": 14482.2,
"valid_targets_min": 2348
},
{
"epoch": 4.025586353944563,
"grad_norm": 0.0710106944898757,
"learning_rate": 4.493932667281646e-06,
"loss": 0.9847319722175598,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22929999232292175,
"step": 946,
"valid_targets_mean": 15876.1,
"valid_targets_min": 11782
},
{
"epoch": 4.029850746268656,
"grad_norm": 0.07091824957614329,
"learning_rate": 4.456457305756321e-06,
"loss": 0.9619096517562866,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2401641607284546,
"step": 947,
"valid_targets_mean": 14348.1,
"valid_targets_min": 1881
},
{
"epoch": 4.0341151385927505,
"grad_norm": 0.07607293193675675,
"learning_rate": 4.419119253375557e-06,
"loss": 1.0721756219863892,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2654160261154175,
"step": 948,
"valid_targets_mean": 14250.8,
"valid_targets_min": 2276
},
{
"epoch": 4.038379530916845,
"grad_norm": 0.07259281785500683,
"learning_rate": 4.381918839977675e-06,
"loss": 1.0606753826141357,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2531473934650421,
"step": 949,
"valid_targets_mean": 14412.8,
"valid_targets_min": 4632
},
{
"epoch": 4.042643923240938,
"grad_norm": 0.08118109856185132,
"learning_rate": 4.344856394185122e-06,
"loss": 1.0495662689208984,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24468651413917542,
"step": 950,
"valid_targets_mean": 15159.2,
"valid_targets_min": 6548
},
{
"epoch": 4.046908315565032,
"grad_norm": 0.09998418224519554,
"learning_rate": 4.307932243401538e-06,
"loss": 1.0190573930740356,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24616962671279907,
"step": 951,
"valid_targets_mean": 14866.3,
"valid_targets_min": 1378
},
{
"epoch": 4.051172707889126,
"grad_norm": 0.09469202139223815,
"learning_rate": 4.271146713808927e-06,
"loss": 1.0151586532592773,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24089258909225464,
"step": 952,
"valid_targets_mean": 14237.8,
"valid_targets_min": 6752
},
{
"epoch": 4.0554371002132195,
"grad_norm": 0.07921318660079313,
"learning_rate": 4.234500130364698e-06,
"loss": 1.0283575057983398,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24566279351711273,
"step": 953,
"valid_targets_mean": 13966.0,
"valid_targets_min": 3102
},
{
"epoch": 4.059701492537314,
"grad_norm": 0.07964596853684529,
"learning_rate": 4.197992816798851e-06,
"loss": 1.0123951435089111,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26131612062454224,
"step": 954,
"valid_targets_mean": 15317.1,
"valid_targets_min": 9724
},
{
"epoch": 4.063965884861407,
"grad_norm": 0.07276343367944109,
"learning_rate": 4.161625095611101e-06,
"loss": 1.0430335998535156,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2427283227443695,
"step": 955,
"valid_targets_mean": 13846.8,
"valid_targets_min": 1988
},
{
"epoch": 4.068230277185501,
"grad_norm": 0.08289405414794294,
"learning_rate": 4.125397288068007e-06,
"loss": 0.9959665536880493,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26747429370880127,
"step": 956,
"valid_targets_mean": 15741.6,
"valid_targets_min": 9550
},
{
"epoch": 4.072494669509595,
"grad_norm": 0.0941427414212429,
"learning_rate": 4.089309714200187e-06,
"loss": 1.0306720733642578,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24888575077056885,
"step": 957,
"valid_targets_mean": 14479.0,
"valid_targets_min": 4645
},
{
"epoch": 4.076759061833688,
"grad_norm": 0.08024261726161806,
"learning_rate": 4.0533626927994185e-06,
"loss": 1.010699987411499,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2400101125240326,
"step": 958,
"valid_targets_mean": 14365.2,
"valid_targets_min": 1469
},
{
"epoch": 4.081023454157783,
"grad_norm": 0.08383657437700691,
"learning_rate": 4.017556541415888e-06,
"loss": 1.0223714113235474,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25305095314979553,
"step": 959,
"valid_targets_mean": 14697.6,
"valid_targets_min": 4543
},
{
"epoch": 4.085287846481877,
"grad_norm": 0.06929096722852933,
"learning_rate": 3.981891576355352e-06,
"loss": 1.0068535804748535,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26727986335754395,
"step": 960,
"valid_targets_mean": 15997.8,
"valid_targets_min": 11888
},
{
"epoch": 4.08955223880597,
"grad_norm": 0.07531370708433341,
"learning_rate": 3.946368112676346e-06,
"loss": 0.9880790710449219,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24824348092079163,
"step": 961,
"valid_targets_mean": 14625.4,
"valid_targets_min": 5805
},
{
"epoch": 4.093816631130064,
"grad_norm": 0.10162148816151736,
"learning_rate": 3.9109864641874166e-06,
"loss": 1.04616379737854,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26773929595947266,
"step": 962,
"valid_targets_mean": 15228.8,
"valid_targets_min": 10158
},
{
"epoch": 4.098081023454157,
"grad_norm": 0.06935177859830995,
"learning_rate": 3.875746943444316e-06,
"loss": 1.014711618423462,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2293023318052292,
"step": 963,
"valid_targets_mean": 14586.9,
"valid_targets_min": 1683
},
{
"epoch": 4.1023454157782515,
"grad_norm": 0.07230353147219123,
"learning_rate": 3.840649861747278e-06,
"loss": 1.0353587865829468,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2691650986671448,
"step": 964,
"valid_targets_mean": 15218.3,
"valid_targets_min": 5957
},
{
"epoch": 4.106609808102346,
"grad_norm": 0.08871474277209189,
"learning_rate": 3.8056955291382667e-06,
"loss": 1.0306223630905151,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23993517458438873,
"step": 965,
"valid_targets_mean": 14445.4,
"valid_targets_min": 3093
},
{
"epoch": 4.110874200426439,
"grad_norm": 0.07560960717340694,
"learning_rate": 3.7708842543981928e-06,
"loss": 1.0723929405212402,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23837114870548248,
"step": 966,
"valid_targets_mean": 14178.5,
"valid_targets_min": 3837
},
{
"epoch": 4.115138592750533,
"grad_norm": 0.07000342089971023,
"learning_rate": 3.736216345044237e-06,
"loss": 1.0193781852722168,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2601016163825989,
"step": 967,
"valid_targets_mean": 15474.5,
"valid_targets_min": 10675
},
{
"epoch": 4.119402985074627,
"grad_norm": 0.07589849526555094,
"learning_rate": 3.7016921073271084e-06,
"loss": 1.0299336910247803,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26432567834854126,
"step": 968,
"valid_targets_mean": 14921.3,
"valid_targets_min": 5478
},
{
"epoch": 4.1236673773987205,
"grad_norm": 0.07763151155765381,
"learning_rate": 3.6673118462283453e-06,
"loss": 1.0336275100708008,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24046000838279724,
"step": 969,
"valid_targets_mean": 14821.7,
"valid_targets_min": 7388
},
{
"epoch": 4.127931769722815,
"grad_norm": 0.08528106397009493,
"learning_rate": 3.6330758654576227e-06,
"loss": 1.0443201065063477,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25907987356185913,
"step": 970,
"valid_targets_mean": 15185.0,
"valid_targets_min": 5704
},
{
"epoch": 4.132196162046908,
"grad_norm": 0.07761338166852766,
"learning_rate": 3.598984467450055e-06,
"loss": 0.9852661490440369,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26297616958618164,
"step": 971,
"valid_targets_mean": 15349.3,
"valid_targets_min": 8002
},
{
"epoch": 4.136460554371002,
"grad_norm": 0.08457441832162528,
"learning_rate": 3.565037953363546e-06,
"loss": 1.0084260702133179,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2558494210243225,
"step": 972,
"valid_targets_mean": 15253.0,
"valid_targets_min": 4456
},
{
"epoch": 4.140724946695096,
"grad_norm": 0.09004092446913044,
"learning_rate": 3.5312366230761154e-06,
"loss": 0.9975166320800781,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25923553109169006,
"step": 973,
"valid_targets_mean": 14746.0,
"valid_targets_min": 2591
},
{
"epoch": 4.144989339019189,
"grad_norm": 0.07987252291433689,
"learning_rate": 3.497580775183258e-06,
"loss": 1.007072925567627,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22519609332084656,
"step": 974,
"valid_targets_mean": 14035.6,
"valid_targets_min": 3378
},
{
"epoch": 4.149253731343284,
"grad_norm": 0.07537962159081893,
"learning_rate": 3.464070706995295e-06,
"loss": 1.0661035776138306,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26262167096138,
"step": 975,
"valid_targets_mean": 15155.7,
"valid_targets_min": 2804
},
{
"epoch": 4.153518123667378,
"grad_norm": 0.0770008882437443,
"learning_rate": 3.4307067145347417e-06,
"loss": 1.0307209491729736,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2410483956336975,
"step": 976,
"valid_targets_mean": 14065.8,
"valid_targets_min": 2854
},
{
"epoch": 4.157782515991471,
"grad_norm": 0.08222489461818047,
"learning_rate": 3.397489092533739e-06,
"loss": 0.9927602410316467,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24605350196361542,
"step": 977,
"valid_targets_mean": 15237.8,
"valid_targets_min": 4359
},
{
"epoch": 4.162046908315565,
"grad_norm": 0.07693172808089653,
"learning_rate": 3.364418134431371e-06,
"loss": 1.0070375204086304,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2607457935810089,
"step": 978,
"valid_targets_mean": 15019.0,
"valid_targets_min": 6113
},
{
"epoch": 4.166311300639659,
"grad_norm": 0.07609038645825848,
"learning_rate": 3.331494132371149e-06,
"loss": 1.067734956741333,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2723226845264435,
"step": 979,
"valid_targets_mean": 14850.1,
"valid_targets_min": 6481
},
{
"epoch": 4.1705756929637525,
"grad_norm": 0.09036706056283246,
"learning_rate": 3.2987173771983816e-06,
"loss": 1.0353126525878906,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2645860016345978,
"step": 980,
"valid_targets_mean": 13865.8,
"valid_targets_min": 1701
},
{
"epoch": 4.174840085287847,
"grad_norm": 0.07444923273861716,
"learning_rate": 3.266088158457634e-06,
"loss": 1.0469627380371094,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24626043438911438,
"step": 981,
"valid_targets_mean": 15693.0,
"valid_targets_min": 1823
},
{
"epoch": 4.17910447761194,
"grad_norm": 0.08060051372433151,
"learning_rate": 3.233606764390147e-06,
"loss": 1.0352073907852173,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27150383591651917,
"step": 982,
"valid_targets_mean": 14859.0,
"valid_targets_min": 2521
},
{
"epoch": 4.183368869936034,
"grad_norm": 0.07177347163427206,
"learning_rate": 3.2012734819313127e-06,
"loss": 1.0209245681762695,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2500031590461731,
"step": 983,
"valid_targets_mean": 14918.2,
"valid_targets_min": 2570
},
{
"epoch": 4.187633262260128,
"grad_norm": 0.07271972399433943,
"learning_rate": 3.1690885967081187e-06,
"loss": 1.0066120624542236,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24489077925682068,
"step": 984,
"valid_targets_mean": 14431.8,
"valid_targets_min": 4504
},
{
"epoch": 4.1918976545842215,
"grad_norm": 0.07720189666214854,
"learning_rate": 3.1370523930366393e-06,
"loss": 1.0655772686004639,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29166609048843384,
"step": 985,
"valid_targets_mean": 15563.3,
"valid_targets_min": 8619
},
{
"epoch": 4.196162046908316,
"grad_norm": 0.07770212977606279,
"learning_rate": 3.105165153919525e-06,
"loss": 1.0116127729415894,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2627195715904236,
"step": 986,
"valid_targets_mean": 14734.6,
"valid_targets_min": 8192
},
{
"epoch": 4.20042643923241,
"grad_norm": 0.07651083979721754,
"learning_rate": 3.073427161043492e-06,
"loss": 1.0490927696228027,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2582077085971832,
"step": 987,
"valid_targets_mean": 14594.6,
"valid_targets_min": 1921
},
{
"epoch": 4.204690831556503,
"grad_norm": 0.07640785916466152,
"learning_rate": 3.0418386947768463e-06,
"loss": 1.0269312858581543,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27077004313468933,
"step": 988,
"valid_targets_mean": 15284.5,
"valid_targets_min": 1025
},
{
"epoch": 4.208955223880597,
"grad_norm": 0.073572451297806,
"learning_rate": 3.01040003416698e-06,
"loss": 1.0495110750198364,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2981879413127899,
"step": 989,
"valid_targets_mean": 15558.4,
"valid_targets_min": 12694
},
{
"epoch": 4.21321961620469,
"grad_norm": 0.06993818307990839,
"learning_rate": 2.97911145693796e-06,
"loss": 1.0402381420135498,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27741050720214844,
"step": 990,
"valid_targets_mean": 14987.6,
"valid_targets_min": 2914
},
{
"epoch": 4.217484008528785,
"grad_norm": 0.07329490772794882,
"learning_rate": 2.947973239488009e-06,
"loss": 1.037589192390442,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2652382552623749,
"step": 991,
"valid_targets_mean": 14648.6,
"valid_targets_min": 2540
},
{
"epoch": 4.221748400852879,
"grad_norm": 0.087682726264505,
"learning_rate": 2.91698565688711e-06,
"loss": 1.0166399478912354,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27021846175193787,
"step": 992,
"valid_targets_mean": 14552.6,
"valid_targets_min": 1542
},
{
"epoch": 4.226012793176972,
"grad_norm": 0.07109782672383178,
"learning_rate": 2.886148982874566e-06,
"loss": 0.9839938879013062,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26262885332107544,
"step": 993,
"valid_targets_mean": 15046.0,
"valid_targets_min": 5766
},
{
"epoch": 4.230277185501066,
"grad_norm": 0.07017360486346118,
"learning_rate": 2.8554634898565668e-06,
"loss": 1.027765154838562,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24965378642082214,
"step": 994,
"valid_targets_mean": 14635.0,
"valid_targets_min": 2453
},
{
"epoch": 4.23454157782516,
"grad_norm": 0.06970586433829537,
"learning_rate": 2.824929448903806e-06,
"loss": 1.053523063659668,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24483788013458252,
"step": 995,
"valid_targets_mean": 15306.7,
"valid_targets_min": 4422
},
{
"epoch": 4.2388059701492535,
"grad_norm": 0.07184274637231955,
"learning_rate": 2.794547129749059e-06,
"loss": 0.9990878105163574,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24802438914775848,
"step": 996,
"valid_targets_mean": 15407.0,
"valid_targets_min": 4412
},
{
"epoch": 4.243070362473348,
"grad_norm": 0.06925109863912099,
"learning_rate": 2.7643168007848255e-06,
"loss": 1.0482103824615479,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25303930044174194,
"step": 997,
"valid_targets_mean": 13620.5,
"valid_targets_min": 1435
},
{
"epoch": 4.247334754797441,
"grad_norm": 0.06735821822689972,
"learning_rate": 2.734238729060956e-06,
"loss": 0.9925441741943359,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2445787787437439,
"step": 998,
"valid_targets_mean": 14932.4,
"valid_targets_min": 6850
},
{
"epoch": 4.251599147121535,
"grad_norm": 0.07138994852587088,
"learning_rate": 2.7043131802822653e-06,
"loss": 0.9845558404922485,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24605567753314972,
"step": 999,
"valid_targets_mean": 14351.1,
"valid_targets_min": 1464
},
{
"epoch": 4.255863539445629,
"grad_norm": 0.06786370583115167,
"learning_rate": 2.674540418806222e-06,
"loss": 1.0410676002502441,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24401737749576569,
"step": 1000,
"valid_targets_mean": 13642.3,
"valid_targets_min": 1974
},
{
"epoch": 4.2601279317697225,
"grad_norm": 0.07517027725363455,
"learning_rate": 2.6449207076405857e-06,
"loss": 1.0475417375564575,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2740405797958374,
"step": 1001,
"valid_targets_mean": 14205.2,
"valid_targets_min": 3655
},
{
"epoch": 4.264392324093817,
"grad_norm": 0.0719817351889638,
"learning_rate": 2.6154543084411035e-06,
"loss": 1.0560872554779053,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24943794310092926,
"step": 1002,
"valid_targets_mean": 14085.6,
"valid_targets_min": 3241
},
{
"epoch": 4.268656716417911,
"grad_norm": 0.07320803777615464,
"learning_rate": 2.5861414815091834e-06,
"loss": 1.025554895401001,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2566283941268921,
"step": 1003,
"valid_targets_mean": 14465.6,
"valid_targets_min": 3571
},
{
"epoch": 4.272921108742004,
"grad_norm": 0.07273708392360496,
"learning_rate": 2.5569824857895987e-06,
"loss": 1.0195801258087158,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2482813149690628,
"step": 1004,
"valid_targets_mean": 15730.6,
"valid_targets_min": 8771
},
{
"epoch": 4.277185501066098,
"grad_norm": 0.07326459178916632,
"learning_rate": 2.5279775788682083e-06,
"loss": 1.0226316452026367,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24337130784988403,
"step": 1005,
"valid_targets_mean": 14080.0,
"valid_targets_min": 1232
},
{
"epoch": 4.281449893390192,
"grad_norm": 0.07572586528562553,
"learning_rate": 2.499127016969671e-06,
"loss": 1.0520446300506592,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2823803722858429,
"step": 1006,
"valid_targets_mean": 15276.1,
"valid_targets_min": 7310
},
{
"epoch": 4.285714285714286,
"grad_norm": 0.0693548817022254,
"learning_rate": 2.4704310549551934e-06,
"loss": 1.0366793870925903,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26049014925956726,
"step": 1007,
"valid_targets_mean": 15119.7,
"valid_targets_min": 2743
},
{
"epoch": 4.28997867803838,
"grad_norm": 0.07494014315230327,
"learning_rate": 2.441889946320266e-06,
"loss": 1.0158147811889648,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23623259365558624,
"step": 1008,
"valid_targets_mean": 14626.1,
"valid_targets_min": 828
},
{
"epoch": 4.294243070362473,
"grad_norm": 0.07200429185575995,
"learning_rate": 2.4135039431924233e-06,
"loss": 1.0710257291793823,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2550296187400818,
"step": 1009,
"valid_targets_mean": 14658.4,
"valid_targets_min": 2366
},
{
"epoch": 4.298507462686567,
"grad_norm": 0.07758205609112181,
"learning_rate": 2.3852732963290426e-06,
"loss": 1.0198653936386108,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25936928391456604,
"step": 1010,
"valid_targets_mean": 15914.5,
"valid_targets_min": 11866
},
{
"epoch": 4.302771855010661,
"grad_norm": 0.0780502400085244,
"learning_rate": 2.3571982551150853e-06,
"loss": 1.025866150856018,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24136891961097717,
"step": 1011,
"valid_targets_mean": 15073.8,
"valid_targets_min": 1974
},
{
"epoch": 4.3070362473347545,
"grad_norm": 0.07807756375486362,
"learning_rate": 2.329279067560937e-06,
"loss": 1.004814624786377,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24564941227436066,
"step": 1012,
"valid_targets_mean": 15867.1,
"valid_targets_min": 11603
},
{
"epoch": 4.311300639658849,
"grad_norm": 0.06614656614991912,
"learning_rate": 2.301515980300182e-06,
"loss": 1.053879976272583,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25577622652053833,
"step": 1013,
"valid_targets_mean": 15025.0,
"valid_targets_min": 4192
},
{
"epoch": 4.315565031982943,
"grad_norm": 0.06535902664908347,
"learning_rate": 2.2739092385874527e-06,
"loss": 1.0165059566497803,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25913357734680176,
"step": 1014,
"valid_targets_mean": 15433.1,
"valid_targets_min": 10715
},
{
"epoch": 4.319829424307036,
"grad_norm": 0.07002332106979323,
"learning_rate": 2.2464590862962443e-06,
"loss": 1.05423903465271,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26438140869140625,
"step": 1015,
"valid_targets_mean": 15712.1,
"valid_targets_min": 12197
},
{
"epoch": 4.32409381663113,
"grad_norm": 0.08196991753288417,
"learning_rate": 2.219165765916769e-06,
"loss": 1.026976466178894,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26024115085601807,
"step": 1016,
"valid_targets_mean": 14208.7,
"valid_targets_min": 5468
},
{
"epoch": 4.3283582089552235,
"grad_norm": 0.07318657487755899,
"learning_rate": 2.192029518553798e-06,
"loss": 0.9968132972717285,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2568413317203522,
"step": 1017,
"valid_targets_mean": 14789.1,
"valid_targets_min": 2464
},
{
"epoch": 4.332622601279318,
"grad_norm": 0.06858060825507008,
"learning_rate": 2.165050583924566e-06,
"loss": 1.0049998760223389,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25639671087265015,
"step": 1018,
"valid_targets_mean": 14663.2,
"valid_targets_min": 1783
},
{
"epoch": 4.336886993603412,
"grad_norm": 0.07411521045428578,
"learning_rate": 2.1382292003566163e-06,
"loss": 1.0314619541168213,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2556789517402649,
"step": 1019,
"valid_targets_mean": 14624.2,
"valid_targets_min": 3025
},
{
"epoch": 4.341151385927505,
"grad_norm": 0.06511150425437823,
"learning_rate": 2.1115656047857213e-06,
"loss": 0.9586358070373535,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23440968990325928,
"step": 1020,
"valid_targets_mean": 14541.2,
"valid_targets_min": 6528
},
{
"epoch": 4.345415778251599,
"grad_norm": 0.06719230934513776,
"learning_rate": 2.0850600327537806e-06,
"loss": 1.0058658123016357,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24526192247867584,
"step": 1021,
"valid_targets_mean": 13832.8,
"valid_targets_min": 1874
},
{
"epoch": 4.349680170575693,
"grad_norm": 0.07113438156818028,
"learning_rate": 2.058712718406719e-06,
"loss": 1.0557451248168945,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25755804777145386,
"step": 1022,
"valid_targets_mean": 14744.3,
"valid_targets_min": 3343
},
{
"epoch": 4.353944562899787,
"grad_norm": 0.06669766102073219,
"learning_rate": 2.032523894492471e-06,
"loss": 1.0497808456420898,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2627623975276947,
"step": 1023,
"valid_targets_mean": 15213.4,
"valid_targets_min": 1537
},
{
"epoch": 4.358208955223881,
"grad_norm": 0.07607320541794989,
"learning_rate": 2.0064937923588634e-06,
"loss": 1.0534238815307617,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2619283199310303,
"step": 1024,
"valid_targets_mean": 14656.3,
"valid_targets_min": 5183
},
{
"epoch": 4.362473347547974,
"grad_norm": 0.08119606687645467,
"learning_rate": 1.9806226419516195e-06,
"loss": 1.0498461723327637,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26316332817077637,
"step": 1025,
"valid_targets_mean": 14722.5,
"valid_targets_min": 5331
},
{
"epoch": 4.366737739872068,
"grad_norm": 0.06862596308820929,
"learning_rate": 1.954910671812298e-06,
"loss": 1.0143101215362549,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2361828088760376,
"step": 1026,
"valid_targets_mean": 14906.2,
"valid_targets_min": 1422
},
{
"epoch": 4.371002132196162,
"grad_norm": 0.07326739032031071,
"learning_rate": 1.9293581090762894e-06,
"loss": 1.0275071859359741,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28308725357055664,
"step": 1027,
"valid_targets_mean": 14888.9,
"valid_targets_min": 6743
},
{
"epoch": 4.3752665245202556,
"grad_norm": 0.06863868612680125,
"learning_rate": 1.9039651794708058e-06,
"loss": 0.9960910081863403,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24030712246894836,
"step": 1028,
"valid_targets_mean": 14711.3,
"valid_targets_min": 4340
},
{
"epoch": 4.37953091684435,
"grad_norm": 0.07549350149667627,
"learning_rate": 1.8787321073128817e-06,
"loss": 1.0455996990203857,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2824949026107788,
"step": 1029,
"valid_targets_mean": 15550.6,
"valid_targets_min": 12301
},
{
"epoch": 4.383795309168444,
"grad_norm": 0.07038637724183405,
"learning_rate": 1.8536591155073958e-06,
"loss": 1.0913045406341553,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2597290277481079,
"step": 1030,
"valid_targets_mean": 14563.6,
"valid_targets_min": 2697
},
{
"epoch": 4.388059701492537,
"grad_norm": 0.07162871574818422,
"learning_rate": 1.8287464255451181e-06,
"loss": 1.0274858474731445,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24804572761058807,
"step": 1031,
"valid_targets_mean": 14384.5,
"valid_targets_min": 5063
},
{
"epoch": 4.392324093816631,
"grad_norm": 0.07930045599964562,
"learning_rate": 1.803994257500714e-06,
"loss": 1.024867057800293,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25626641511917114,
"step": 1032,
"valid_targets_mean": 14941.4,
"valid_targets_min": 2972
},
{
"epoch": 4.396588486140725,
"grad_norm": 0.07200737738660623,
"learning_rate": 1.7794028300308474e-06,
"loss": 1.0426933765411377,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2666964530944824,
"step": 1033,
"valid_targets_mean": 14783.8,
"valid_targets_min": 1582
},
{
"epoch": 4.400852878464819,
"grad_norm": 0.06776972771605225,
"learning_rate": 1.7549723603722003e-06,
"loss": 1.026928424835205,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2570304870605469,
"step": 1034,
"valid_targets_mean": 15220.8,
"valid_targets_min": 5119
},
{
"epoch": 4.405117270788913,
"grad_norm": 0.06979431930494794,
"learning_rate": 1.730703064339605e-06,
"loss": 1.0423942804336548,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26676464080810547,
"step": 1035,
"valid_targets_mean": 15308.4,
"valid_targets_min": 7131
},
{
"epoch": 4.409381663113006,
"grad_norm": 0.07193218377728709,
"learning_rate": 1.7065951563241022e-06,
"loss": 0.9990702271461487,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2415003776550293,
"step": 1036,
"valid_targets_mean": 14650.9,
"valid_targets_min": 2484
},
{
"epoch": 4.4136460554371,
"grad_norm": 0.06619207075683764,
"learning_rate": 1.682648849291051e-06,
"loss": 1.0610606670379639,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25422343611717224,
"step": 1037,
"valid_targets_mean": 15126.1,
"valid_targets_min": 5196
},
{
"epoch": 4.417910447761194,
"grad_norm": 0.07334265144582895,
"learning_rate": 1.6588643547782579e-06,
"loss": 1.0670053958892822,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2618800401687622,
"step": 1038,
"valid_targets_mean": 15004.0,
"valid_targets_min": 6704
},
{
"epoch": 4.422174840085288,
"grad_norm": 0.07771088694803348,
"learning_rate": 1.6352418828941052e-06,
"loss": 1.0343689918518066,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24691098928451538,
"step": 1039,
"valid_targets_mean": 14356.8,
"valid_targets_min": 5911
},
{
"epoch": 4.426439232409382,
"grad_norm": 0.08304686069812293,
"learning_rate": 1.6117816423156952e-06,
"loss": 1.0338213443756104,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2660481631755829,
"step": 1040,
"valid_targets_mean": 15205.1,
"valid_targets_min": 9283
},
{
"epoch": 4.430703624733475,
"grad_norm": 0.06920176032446822,
"learning_rate": 1.5884838402870029e-06,
"loss": 1.0237390995025635,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25461459159851074,
"step": 1041,
"valid_targets_mean": 14998.1,
"valid_targets_min": 1692
},
{
"epoch": 4.434968017057569,
"grad_norm": 0.0714054983112944,
"learning_rate": 1.5653486826170384e-06,
"loss": 1.0401220321655273,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2660042643547058,
"step": 1042,
"valid_targets_mean": 14499.9,
"valid_targets_min": 2371
},
{
"epoch": 4.439232409381663,
"grad_norm": 0.07396556116131836,
"learning_rate": 1.5423763736780583e-06,
"loss": 1.0034527778625488,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2357511669397354,
"step": 1043,
"valid_targets_mean": 14302.2,
"valid_targets_min": 2342
},
{
"epoch": 4.443496801705757,
"grad_norm": 0.07134451067312644,
"learning_rate": 1.5195671164037173e-06,
"loss": 1.0172343254089355,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27009013295173645,
"step": 1044,
"valid_targets_mean": 14220.5,
"valid_targets_min": 950
},
{
"epoch": 4.447761194029851,
"grad_norm": 0.06601604046921343,
"learning_rate": 1.496921112287315e-06,
"loss": 1.0245767831802368,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25725841522216797,
"step": 1045,
"valid_targets_mean": 15405.5,
"valid_targets_min": 7009
},
{
"epoch": 4.452025586353945,
"grad_norm": 0.068600962040597,
"learning_rate": 1.4744385613799894e-06,
"loss": 1.038365125656128,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2502679228782654,
"step": 1046,
"valid_targets_mean": 14626.3,
"valid_targets_min": 6602
},
{
"epoch": 4.456289978678038,
"grad_norm": 0.07591179379461509,
"learning_rate": 1.4521196622889644e-06,
"loss": 0.9995392560958862,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24727779626846313,
"step": 1047,
"valid_targets_mean": 13940.6,
"valid_targets_min": 2208
},
{
"epoch": 4.460554371002132,
"grad_norm": 0.06940371444223539,
"learning_rate": 1.4299646121757892e-06,
"loss": 1.057096242904663,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25691652297973633,
"step": 1048,
"valid_targets_mean": 15446.3,
"valid_targets_min": 7365
},
{
"epoch": 4.464818763326226,
"grad_norm": 0.06763819548862086,
"learning_rate": 1.4079736067545912e-06,
"loss": 1.0104975700378418,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25632327795028687,
"step": 1049,
"valid_targets_mean": 15491.4,
"valid_targets_min": 6949
},
{
"epoch": 4.46908315565032,
"grad_norm": 0.06514807434099039,
"learning_rate": 1.3861468402903634e-06,
"loss": 1.0043401718139648,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25664448738098145,
"step": 1050,
"valid_targets_mean": 14708.8,
"valid_targets_min": 2469
},
{
"epoch": 4.473347547974414,
"grad_norm": 0.0731922706826196,
"learning_rate": 1.3644845055972322e-06,
"loss": 0.9790605306625366,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.230549156665802,
"step": 1051,
"valid_targets_mean": 14711.8,
"valid_targets_min": 5728
},
{
"epoch": 4.477611940298507,
"grad_norm": 0.06594645842384818,
"learning_rate": 1.3429867940367626e-06,
"loss": 1.0149694681167603,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28294098377227783,
"step": 1052,
"valid_targets_mean": 15629.1,
"valid_targets_min": 9281
},
{
"epoch": 4.481876332622601,
"grad_norm": 0.0686014278034404,
"learning_rate": 1.321653895516264e-06,
"loss": 1.0112483501434326,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24371042847633362,
"step": 1053,
"valid_targets_mean": 14560.8,
"valid_targets_min": 2432
},
{
"epoch": 4.486140724946695,
"grad_norm": 0.0731223461819718,
"learning_rate": 1.3004859984871199e-06,
"loss": 1.0422847270965576,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2515747547149658,
"step": 1054,
"valid_targets_mean": 14970.4,
"valid_targets_min": 3323
},
{
"epoch": 4.490405117270789,
"grad_norm": 0.06687333199186574,
"learning_rate": 1.279483289943102e-06,
"loss": 1.011574387550354,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24524162709712982,
"step": 1055,
"valid_targets_mean": 15123.1,
"valid_targets_min": 5425
},
{
"epoch": 4.494669509594883,
"grad_norm": 0.07073431773921562,
"learning_rate": 1.2586459554187558e-06,
"loss": 1.0168395042419434,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.236406609416008,
"step": 1056,
"valid_targets_mean": 13915.4,
"valid_targets_min": 2083
},
{
"epoch": 4.498933901918977,
"grad_norm": 0.0634225450722247,
"learning_rate": 1.2379741789877175e-06,
"loss": 0.9951160550117493,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24048984050750732,
"step": 1057,
"valid_targets_mean": 14242.8,
"valid_targets_min": 1715
},
{
"epoch": 4.50319829424307,
"grad_norm": 0.07766800622585368,
"learning_rate": 1.2174681432611245e-06,
"loss": 1.0221624374389648,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26837795972824097,
"step": 1058,
"valid_targets_mean": 15247.6,
"valid_targets_min": 5344
},
{
"epoch": 4.507462686567164,
"grad_norm": 0.07060976347461718,
"learning_rate": 1.1971280293859811e-06,
"loss": 0.9929161071777344,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27398234605789185,
"step": 1059,
"valid_targets_mean": 15723.8,
"valid_targets_min": 9482
},
{
"epoch": 4.5117270788912585,
"grad_norm": 0.0753410465734406,
"learning_rate": 1.17695401704357e-06,
"loss": 1.0152283906936646,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2540476322174072,
"step": 1060,
"valid_targets_mean": 14697.4,
"valid_targets_min": 4926
},
{
"epoch": 4.515991471215352,
"grad_norm": 0.07061174409259466,
"learning_rate": 1.1569462844478552e-06,
"loss": 1.0479135513305664,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26879221200942993,
"step": 1061,
"valid_targets_mean": 14628.4,
"valid_targets_min": 7780
},
{
"epoch": 4.520255863539446,
"grad_norm": 0.07137569128746588,
"learning_rate": 1.1371050083439107e-06,
"loss": 1.0195355415344238,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26304522156715393,
"step": 1062,
"valid_targets_mean": 14773.9,
"valid_targets_min": 3384
},
{
"epoch": 4.524520255863539,
"grad_norm": 0.0680445565318658,
"learning_rate": 1.1174303640063622e-06,
"loss": 1.0261856317520142,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.262905091047287,
"step": 1063,
"valid_targets_mean": 15105.0,
"valid_targets_min": 9075
},
{
"epoch": 4.528784648187633,
"grad_norm": 0.0702614080770827,
"learning_rate": 1.097922525237849e-06,
"loss": 1.0566270351409912,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2552175521850586,
"step": 1064,
"valid_targets_mean": 15474.4,
"valid_targets_min": 6595
},
{
"epoch": 4.533049040511727,
"grad_norm": 0.06862292834965017,
"learning_rate": 1.078581664367455e-06,
"loss": 1.0485453605651855,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27617308497428894,
"step": 1065,
"valid_targets_mean": 14793.9,
"valid_targets_min": 5085
},
{
"epoch": 4.537313432835821,
"grad_norm": 0.06681411071007434,
"learning_rate": 1.0594079522492274e-06,
"loss": 1.0239062309265137,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2609993517398834,
"step": 1066,
"valid_targets_mean": 15103.1,
"valid_targets_min": 7909
},
{
"epoch": 4.541577825159915,
"grad_norm": 0.06528963957554342,
"learning_rate": 1.040401558260633e-06,
"loss": 0.9920482039451599,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24381262063980103,
"step": 1067,
"valid_targets_mean": 15355.5,
"valid_targets_min": 4640
},
{
"epoch": 4.545842217484008,
"grad_norm": 0.07676207346706825,
"learning_rate": 1.0215626503010911e-06,
"loss": 1.0099127292633057,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2594386339187622,
"step": 1068,
"valid_targets_mean": 15372.2,
"valid_targets_min": 6069
},
{
"epoch": 4.550106609808102,
"grad_norm": 0.06516871142633743,
"learning_rate": 1.002891394790475e-06,
"loss": 1.0267794132232666,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2759925127029419,
"step": 1069,
"valid_targets_mean": 15337.8,
"valid_targets_min": 11067
},
{
"epoch": 4.554371002132196,
"grad_norm": 0.06530391631580551,
"learning_rate": 9.843879566676273e-07,
"loss": 0.992304265499115,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23234936594963074,
"step": 1070,
"valid_targets_mean": 14723.0,
"valid_targets_min": 7371
},
{
"epoch": 4.55863539445629,
"grad_norm": 0.07210719157423057,
"learning_rate": 9.660524993889386e-07,
"loss": 1.0287081003189087,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2639193832874298,
"step": 1071,
"valid_targets_mean": 14991.1,
"valid_targets_min": 2865
},
{
"epoch": 4.562899786780384,
"grad_norm": 0.07594692212651825,
"learning_rate": 9.478851849268733e-07,
"loss": 1.064483404159546,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25527459383010864,
"step": 1072,
"valid_targets_mean": 15471.6,
"valid_targets_min": 4632
},
{
"epoch": 4.567164179104478,
"grad_norm": 0.07518135770790788,
"learning_rate": 9.298861737685527e-07,
"loss": 1.040311336517334,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26637256145477295,
"step": 1073,
"valid_targets_mean": 15332.1,
"valid_targets_min": 4525
},
{
"epoch": 4.571428571428571,
"grad_norm": 0.06628130113815535,
"learning_rate": 9.120556249143341e-07,
"loss": 1.0161117315292358,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2659457325935364,
"step": 1074,
"valid_targets_mean": 15334.8,
"valid_targets_min": 6423
},
{
"epoch": 4.575692963752665,
"grad_norm": 0.06477883899758395,
"learning_rate": 8.943936958763988e-07,
"loss": 1.0359610319137573,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25776997208595276,
"step": 1075,
"valid_targets_mean": 15012.3,
"valid_targets_min": 7695
},
{
"epoch": 4.5799573560767595,
"grad_norm": 0.07148636728641779,
"learning_rate": 8.769005426773836e-07,
"loss": 1.0297731161117554,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26610493659973145,
"step": 1076,
"valid_targets_mean": 14544.8,
"valid_targets_min": 3244
},
{
"epoch": 4.584221748400853,
"grad_norm": 0.06839633103821331,
"learning_rate": 8.595763198489714e-07,
"loss": 1.0279765129089355,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28054559230804443,
"step": 1077,
"valid_targets_mean": 15303.6,
"valid_targets_min": 2857
},
{
"epoch": 4.588486140724947,
"grad_norm": 0.06731727414613335,
"learning_rate": 8.42421180430546e-07,
"loss": 1.0172414779663086,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2570076584815979,
"step": 1078,
"valid_targets_mean": 13805.4,
"valid_targets_min": 3144
},
{
"epoch": 4.59275053304904,
"grad_norm": 0.06393586449266486,
"learning_rate": 8.254352759678386e-07,
"loss": 1.027620792388916,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25848716497421265,
"step": 1079,
"valid_targets_mean": 14849.2,
"valid_targets_min": 1385
},
{
"epoch": 4.597014925373134,
"grad_norm": 0.0686772807194171,
"learning_rate": 8.086187565115877e-07,
"loss": 1.0338844060897827,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26648616790771484,
"step": 1080,
"valid_targets_mean": 15024.0,
"valid_targets_min": 6453
},
{
"epoch": 4.601279317697228,
"grad_norm": 0.07337205384563839,
"learning_rate": 7.919717706162067e-07,
"loss": 0.9947189688682556,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2360054850578308,
"step": 1081,
"valid_targets_mean": 15262.1,
"valid_targets_min": 4427
},
{
"epoch": 4.605543710021322,
"grad_norm": 0.06594479533951486,
"learning_rate": 7.754944653384777e-07,
"loss": 1.0052752494812012,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24188333749771118,
"step": 1082,
"valid_targets_mean": 14649.6,
"valid_targets_min": 3870
},
{
"epoch": 4.609808102345416,
"grad_norm": 0.07064212719654153,
"learning_rate": 7.591869862362534e-07,
"loss": 1.0418689250946045,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26256895065307617,
"step": 1083,
"valid_targets_mean": 14202.5,
"valid_targets_min": 2408
},
{
"epoch": 4.61407249466951,
"grad_norm": 0.07087438339567025,
"learning_rate": 7.430494773671682e-07,
"loss": 1.0515038967132568,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23694118857383728,
"step": 1084,
"valid_targets_mean": 13859.9,
"valid_targets_min": 1023
},
{
"epoch": 4.618336886993603,
"grad_norm": 0.07274082838805951,
"learning_rate": 7.270820812873714e-07,
"loss": 0.9710164666175842,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2399815320968628,
"step": 1085,
"valid_targets_mean": 14702.4,
"valid_targets_min": 3313
},
{
"epoch": 4.622601279317697,
"grad_norm": 0.08432480970803712,
"learning_rate": 7.112849390502563e-07,
"loss": 1.022752285003662,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25002214312553406,
"step": 1086,
"valid_targets_mean": 14602.7,
"valid_targets_min": 5116
},
{
"epoch": 4.6268656716417915,
"grad_norm": 0.0696628864265594,
"learning_rate": 6.956581902052306e-07,
"loss": 0.9715498685836792,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2668806314468384,
"step": 1087,
"valid_targets_mean": 15010.3,
"valid_targets_min": 4978
},
{
"epoch": 4.631130063965885,
"grad_norm": 0.07163952955323827,
"learning_rate": 6.802019727964593e-07,
"loss": 1.032862663269043,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25951439142227173,
"step": 1088,
"valid_targets_mean": 15371.7,
"valid_targets_min": 8077
},
{
"epoch": 4.635394456289979,
"grad_norm": 0.06306104281711407,
"learning_rate": 6.64916423361679e-07,
"loss": 1.0174851417541504,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2721509337425232,
"step": 1089,
"valid_targets_mean": 15294.1,
"valid_targets_min": 2618
},
{
"epoch": 4.639658848614072,
"grad_norm": 0.06659952941914901,
"learning_rate": 6.498016769309567e-07,
"loss": 1.0645792484283447,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26195454597473145,
"step": 1090,
"valid_targets_mean": 14959.5,
"valid_targets_min": 7016
},
{
"epoch": 4.643923240938166,
"grad_norm": 0.06450710582873058,
"learning_rate": 6.348578670255224e-07,
"loss": 1.0212966203689575,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23479299247264862,
"step": 1091,
"valid_targets_mean": 14402.1,
"valid_targets_min": 2491
},
{
"epoch": 4.6481876332622605,
"grad_norm": 0.0725568214493117,
"learning_rate": 6.200851256565799e-07,
"loss": 1.0405863523483276,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2567938268184662,
"step": 1092,
"valid_targets_mean": 15573.6,
"valid_targets_min": 6038
},
{
"epoch": 4.652452025586354,
"grad_norm": 0.07133862694492665,
"learning_rate": 6.054835833241357e-07,
"loss": 1.095170497894287,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2838001549243927,
"step": 1093,
"valid_targets_mean": 15277.7,
"valid_targets_min": 4293
},
{
"epoch": 4.656716417910448,
"grad_norm": 0.07002347703730735,
"learning_rate": 5.910533690158593e-07,
"loss": 0.9981926679611206,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2459121197462082,
"step": 1094,
"valid_targets_mean": 14975.6,
"valid_targets_min": 3615
},
{
"epoch": 4.660980810234541,
"grad_norm": 0.06953398777655154,
"learning_rate": 5.767946102059307e-07,
"loss": 0.9948866367340088,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2484731674194336,
"step": 1095,
"valid_targets_mean": 15209.9,
"valid_targets_min": 9230
},
{
"epoch": 4.665245202558635,
"grad_norm": 0.07088669909338102,
"learning_rate": 5.627074328539173e-07,
"loss": 1.0146452188491821,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2458096444606781,
"step": 1096,
"valid_targets_mean": 14502.3,
"valid_targets_min": 2478
},
{
"epoch": 4.669509594882729,
"grad_norm": 0.06364041390975456,
"learning_rate": 5.487919614036741e-07,
"loss": 1.041628360748291,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2578613758087158,
"step": 1097,
"valid_targets_mean": 14867.4,
"valid_targets_min": 918
},
{
"epoch": 4.673773987206823,
"grad_norm": 0.06729007917374852,
"learning_rate": 5.350483187822231e-07,
"loss": 1.0418592691421509,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23668396472930908,
"step": 1098,
"valid_targets_mean": 14719.7,
"valid_targets_min": 2299
},
{
"epoch": 4.678038379530917,
"grad_norm": 0.07078240243018288,
"learning_rate": 5.214766263986848e-07,
"loss": 1.0214943885803223,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24630528688430786,
"step": 1099,
"valid_targets_mean": 14434.8,
"valid_targets_min": 4838
},
{
"epoch": 4.682302771855011,
"grad_norm": 0.0732667090432839,
"learning_rate": 5.080770041431926e-07,
"loss": 1.0413594245910645,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26989224553108215,
"step": 1100,
"valid_targets_mean": 15117.5,
"valid_targets_min": 7453
},
{
"epoch": 4.686567164179104,
"grad_norm": 0.07371846187994414,
"learning_rate": 4.948495703858492e-07,
"loss": 1.0573935508728027,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2608649432659149,
"step": 1101,
"valid_targets_mean": 15384.5,
"valid_targets_min": 8024
},
{
"epoch": 4.690831556503198,
"grad_norm": 0.0693879158573615,
"learning_rate": 4.81794441975667e-07,
"loss": 1.036195993423462,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2616111934185028,
"step": 1102,
"valid_targets_mean": 14345.1,
"valid_targets_min": 4227
},
{
"epoch": 4.6950959488272925,
"grad_norm": 0.06407155646007538,
"learning_rate": 4.689117342395388e-07,
"loss": 1.004786491394043,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25563374161720276,
"step": 1103,
"valid_targets_mean": 15178.0,
"valid_targets_min": 7904
},
{
"epoch": 4.699360341151386,
"grad_norm": 0.07298714830099647,
"learning_rate": 4.5620156098122204e-07,
"loss": 1.011723279953003,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2569216191768646,
"step": 1104,
"valid_targets_mean": 15558.1,
"valid_targets_min": 5908
},
{
"epoch": 4.70362473347548,
"grad_norm": 0.0685237557864966,
"learning_rate": 4.4366403448033334e-07,
"loss": 1.0312525033950806,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25594452023506165,
"step": 1105,
"valid_targets_mean": 14322.6,
"valid_targets_min": 3879
},
{
"epoch": 4.707889125799573,
"grad_norm": 0.0703474360253264,
"learning_rate": 4.3129926549136057e-07,
"loss": 1.0201470851898193,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22442176938056946,
"step": 1106,
"valid_targets_mean": 13867.3,
"valid_targets_min": 1629
},
{
"epoch": 4.712153518123667,
"grad_norm": 0.07336197186961974,
"learning_rate": 4.191073632426701e-07,
"loss": 1.0148887634277344,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25482404232025146,
"step": 1107,
"valid_targets_mean": 15000.6,
"valid_targets_min": 3103
},
{
"epoch": 4.7164179104477615,
"grad_norm": 0.07056139284812354,
"learning_rate": 4.0708843543555643e-07,
"loss": 1.0253814458847046,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2721610963344574,
"step": 1108,
"valid_targets_mean": 15198.7,
"valid_targets_min": 7499
},
{
"epoch": 4.720682302771855,
"grad_norm": 0.06803380374321673,
"learning_rate": 3.95242588243292e-07,
"loss": 1.0191898345947266,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26221030950546265,
"step": 1109,
"valid_targets_mean": 15062.7,
"valid_targets_min": 5829
},
{
"epoch": 4.724946695095949,
"grad_norm": 0.0725987925080408,
"learning_rate": 3.8356992631017e-07,
"loss": 1.02607262134552,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2717529535293579,
"step": 1110,
"valid_targets_mean": 14682.3,
"valid_targets_min": 5934
},
{
"epoch": 4.729211087420042,
"grad_norm": 0.0658803796851549,
"learning_rate": 3.720705527506008e-07,
"loss": 1.0193414688110352,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2442600578069687,
"step": 1111,
"valid_targets_mean": 14764.0,
"valid_targets_min": 2716
},
{
"epoch": 4.733475479744136,
"grad_norm": 0.06990139750491582,
"learning_rate": 3.60744569148197e-07,
"loss": 1.0192015171051025,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26712778210639954,
"step": 1112,
"valid_targets_mean": 15267.0,
"valid_targets_min": 8504
},
{
"epoch": 4.73773987206823,
"grad_norm": 0.06506833736536219,
"learning_rate": 3.4959207555485873e-07,
"loss": 1.0410380363464355,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2598136067390442,
"step": 1113,
"valid_targets_mean": 14534.2,
"valid_targets_min": 1877
},
{
"epoch": 4.742004264392325,
"grad_norm": 0.0709803119615405,
"learning_rate": 3.3861317048992317e-07,
"loss": 1.0387706756591797,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25980570912361145,
"step": 1114,
"valid_targets_mean": 14683.8,
"valid_targets_min": 4848
},
{
"epoch": 4.746268656716418,
"grad_norm": 0.0795429644147577,
"learning_rate": 3.278079509392562e-07,
"loss": 1.0260200500488281,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27000313997268677,
"step": 1115,
"valid_targets_mean": 15214.4,
"valid_targets_min": 1314
},
{
"epoch": 4.750533049040512,
"grad_norm": 0.07080126753595156,
"learning_rate": 3.171765123544224e-07,
"loss": 1.0400683879852295,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27333077788352966,
"step": 1116,
"valid_targets_mean": 14569.4,
"valid_targets_min": 1579
},
{
"epoch": 4.754797441364605,
"grad_norm": 0.06601794056438176,
"learning_rate": 3.06718948651834e-07,
"loss": 0.9994338154792786,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26492956280708313,
"step": 1117,
"valid_targets_mean": 15930.9,
"valid_targets_min": 10189
},
{
"epoch": 4.759061833688699,
"grad_norm": 0.07388133571360742,
"learning_rate": 2.964353522119168e-07,
"loss": 1.0069022178649902,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25079345703125,
"step": 1118,
"valid_targets_mean": 14631.0,
"valid_targets_min": 3805
},
{
"epoch": 4.7633262260127935,
"grad_norm": 0.07064063240173123,
"learning_rate": 2.863258138783032e-07,
"loss": 0.9954191446304321,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24160648882389069,
"step": 1119,
"valid_targets_mean": 15717.5,
"valid_targets_min": 8591
},
{
"epoch": 4.767590618336887,
"grad_norm": 0.06882403332283994,
"learning_rate": 2.7639042295702245e-07,
"loss": 1.0442975759506226,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26812103390693665,
"step": 1120,
"valid_targets_mean": 14855.8,
"valid_targets_min": 2705
},
{
"epoch": 4.771855010660981,
"grad_norm": 0.06332263057368195,
"learning_rate": 2.666292672157056e-07,
"loss": 1.0528706312179565,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26132845878601074,
"step": 1121,
"valid_targets_mean": 15290.7,
"valid_targets_min": 4054
},
{
"epoch": 4.776119402985074,
"grad_norm": 0.07675127563438205,
"learning_rate": 2.570424328828325e-07,
"loss": 1.0675115585327148,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2756962478160858,
"step": 1122,
"valid_targets_mean": 15214.0,
"valid_targets_min": 7186
},
{
"epoch": 4.780383795309168,
"grad_norm": 0.07197151758312617,
"learning_rate": 2.4763000464694377e-07,
"loss": 1.0107743740081787,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2568907141685486,
"step": 1123,
"valid_targets_mean": 15773.6,
"valid_targets_min": 8408
},
{
"epoch": 4.7846481876332625,
"grad_norm": 0.06500712771537732,
"learning_rate": 2.383920656559102e-07,
"loss": 1.0048414468765259,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23881423473358154,
"step": 1124,
"valid_targets_mean": 14540.1,
"valid_targets_min": 1433
},
{
"epoch": 4.788912579957356,
"grad_norm": 0.06381821089228037,
"learning_rate": 2.2932869751619568e-07,
"loss": 0.9843084812164307,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2623206377029419,
"step": 1125,
"valid_targets_mean": 15185.5,
"valid_targets_min": 4701
},
{
"epoch": 4.79317697228145,
"grad_norm": 0.06671285990992103,
"learning_rate": 2.2043998029212643e-07,
"loss": 1.0114637613296509,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2629712224006653,
"step": 1126,
"valid_targets_mean": 14384.1,
"valid_targets_min": 3439
},
{
"epoch": 4.797441364605544,
"grad_norm": 0.06733736034184398,
"learning_rate": 2.1172599250519398e-07,
"loss": 1.0296120643615723,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24320898950099945,
"step": 1127,
"valid_targets_mean": 14321.7,
"valid_targets_min": 5222
},
{
"epoch": 4.801705756929637,
"grad_norm": 0.06714757058555061,
"learning_rate": 2.0318681113336013e-07,
"loss": 1.0549672842025757,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2984343469142914,
"step": 1128,
"valid_targets_mean": 15225.4,
"valid_targets_min": 8459
},
{
"epoch": 4.8059701492537314,
"grad_norm": 0.06960677138396562,
"learning_rate": 1.9482251161037302e-07,
"loss": 1.021659016609192,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24553701281547546,
"step": 1129,
"valid_targets_mean": 15276.0,
"valid_targets_min": 4215
},
{
"epoch": 4.810234541577826,
"grad_norm": 0.06841403613884264,
"learning_rate": 1.866331678251032e-07,
"loss": 1.044973373413086,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23122069239616394,
"step": 1130,
"valid_targets_mean": 14052.2,
"valid_targets_min": 2103
},
{
"epoch": 4.814498933901919,
"grad_norm": 0.06404617257701016,
"learning_rate": 1.7861885212088869e-07,
"loss": 1.0095136165618896,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2586168646812439,
"step": 1131,
"valid_targets_mean": 15317.8,
"valid_targets_min": 8293
},
{
"epoch": 4.818763326226013,
"grad_norm": 0.07514066868771424,
"learning_rate": 1.7077963529490204e-07,
"loss": 1.0391095876693726,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26711028814315796,
"step": 1132,
"valid_targets_mean": 14660.0,
"valid_targets_min": 5829
},
{
"epoch": 4.823027718550106,
"grad_norm": 0.06940507091598883,
"learning_rate": 1.6311558659751535e-07,
"loss": 1.0170023441314697,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24466711282730103,
"step": 1133,
"valid_targets_mean": 14753.7,
"valid_targets_min": 1286
},
{
"epoch": 4.8272921108742,
"grad_norm": 0.06982357949012676,
"learning_rate": 1.5562677373169855e-07,
"loss": 1.0421884059906006,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2834610939025879,
"step": 1134,
"valid_targets_mean": 14628.7,
"valid_targets_min": 2398
},
{
"epoch": 4.8315565031982945,
"grad_norm": 0.07543983717415494,
"learning_rate": 1.483132628524131e-07,
"loss": 1.0707218647003174,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25955963134765625,
"step": 1135,
"valid_targets_mean": 13616.1,
"valid_targets_min": 1714
},
{
"epoch": 4.835820895522388,
"grad_norm": 0.07005877911193274,
"learning_rate": 1.4117511856603262e-07,
"loss": 1.0253279209136963,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25752508640289307,
"step": 1136,
"valid_targets_mean": 14949.2,
"valid_targets_min": 7892
},
{
"epoch": 4.840085287846482,
"grad_norm": 0.07538440424078686,
"learning_rate": 1.342124039297721e-07,
"loss": 1.05033278465271,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27345603704452515,
"step": 1137,
"valid_targets_mean": 15026.5,
"valid_targets_min": 3711
},
{
"epoch": 4.844349680170575,
"grad_norm": 0.07216648298727922,
"learning_rate": 1.2742518045112396e-07,
"loss": 1.0316473245620728,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26999539136886597,
"step": 1138,
"valid_targets_mean": 14801.5,
"valid_targets_min": 5212
},
{
"epoch": 4.848614072494669,
"grad_norm": 0.06808362405102462,
"learning_rate": 1.2081350808732518e-07,
"loss": 0.9786754846572876,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2707619369029999,
"step": 1139,
"valid_targets_mean": 14802.5,
"valid_targets_min": 5878
},
{
"epoch": 4.8528784648187635,
"grad_norm": 0.07003955658995022,
"learning_rate": 1.143774452448243e-07,
"loss": 1.1124558448791504,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2772119641304016,
"step": 1140,
"valid_targets_mean": 14827.2,
"valid_targets_min": 2060
},
{
"epoch": 4.857142857142857,
"grad_norm": 0.06466853838334986,
"learning_rate": 1.0811704877875528e-07,
"loss": 1.015834093093872,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23194001615047455,
"step": 1141,
"valid_targets_mean": 15103.3,
"valid_targets_min": 2931
},
{
"epoch": 4.861407249466951,
"grad_norm": 0.07089414025408837,
"learning_rate": 1.0203237399245336e-07,
"loss": 1.05367910861969,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2548222839832306,
"step": 1142,
"valid_targets_mean": 14965.0,
"valid_targets_min": 3366
},
{
"epoch": 4.865671641791045,
"grad_norm": 0.06378486026277501,
"learning_rate": 9.612347463694882e-08,
"loss": 1.0144537687301636,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25325194001197815,
"step": 1143,
"valid_targets_mean": 15282.1,
"valid_targets_min": 3329
},
{
"epoch": 4.869936034115138,
"grad_norm": 0.06362389374547285,
"learning_rate": 9.039040291050738e-08,
"loss": 1.017199993133545,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.267633855342865,
"step": 1144,
"valid_targets_mean": 15420.7,
"valid_targets_min": 4095
},
{
"epoch": 4.8742004264392325,
"grad_norm": 0.06777612886576416,
"learning_rate": 8.483320945815499e-08,
"loss": 1.0206801891326904,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25900164246559143,
"step": 1145,
"valid_targets_mean": 15594.0,
"valid_targets_min": 2624
},
{
"epoch": 4.878464818763327,
"grad_norm": 0.06845353018871973,
"learning_rate": 7.945194337124262e-08,
"loss": 1.012258529663086,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2568066120147705,
"step": 1146,
"valid_targets_mean": 15442.4,
"valid_targets_min": 4506
},
{
"epoch": 4.88272921108742,
"grad_norm": 0.06926892166019896,
"learning_rate": 7.424665218700444e-08,
"loss": 1.1038507223129272,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29018303751945496,
"step": 1147,
"valid_targets_mean": 15503.6,
"valid_targets_min": 9628
},
{
"epoch": 4.886993603411514,
"grad_norm": 0.06315295442237721,
"learning_rate": 6.921738188814254e-08,
"loss": 1.0237013101577759,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25076520442962646,
"step": 1148,
"valid_targets_mean": 15241.7,
"valid_targets_min": 7900
},
{
"epoch": 4.891257995735607,
"grad_norm": 0.06599875528952254,
"learning_rate": 6.436417690241614e-08,
"loss": 1.0230720043182373,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2280958890914917,
"step": 1149,
"valid_targets_mean": 13944.6,
"valid_targets_min": 2002
},
{
"epoch": 4.895522388059701,
"grad_norm": 0.06474129841007441,
"learning_rate": 5.968708010225532e-08,
"loss": 1.0266101360321045,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2811443507671356,
"step": 1150,
"valid_targets_mean": 15421.0,
"valid_targets_min": 10460
},
{
"epoch": 4.899786780383796,
"grad_norm": 0.06290320997335248,
"learning_rate": 5.518613280437901e-08,
"loss": 1.0010175704956055,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23834742605686188,
"step": 1151,
"valid_targets_mean": 14993.0,
"valid_targets_min": 2074
},
{
"epoch": 4.904051172707889,
"grad_norm": 0.06706259129263993,
"learning_rate": 5.0861374769426433e-08,
"loss": 1.023618459701538,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2484329491853714,
"step": 1152,
"valid_targets_mean": 14297.7,
"valid_targets_min": 4970
},
{
"epoch": 4.908315565031983,
"grad_norm": 0.06522941130543658,
"learning_rate": 4.671284420161071e-08,
"loss": 1.0196456909179688,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24993275105953217,
"step": 1153,
"valid_targets_mean": 14473.3,
"valid_targets_min": 5515
},
{
"epoch": 4.912579957356077,
"grad_norm": 0.06823336910362307,
"learning_rate": 4.274057774838136e-08,
"loss": 1.0153276920318604,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24666878581047058,
"step": 1154,
"valid_targets_mean": 14443.9,
"valid_targets_min": 1942
},
{
"epoch": 4.91684434968017,
"grad_norm": 0.06610513270076279,
"learning_rate": 3.894461050010012e-08,
"loss": 0.9828450679779053,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21773172914981842,
"step": 1155,
"valid_targets_mean": 14033.9,
"valid_targets_min": 2029
},
{
"epoch": 4.9211087420042645,
"grad_norm": 0.06996793504950291,
"learning_rate": 3.5324975989725615e-08,
"loss": 1.0309505462646484,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2472284585237503,
"step": 1156,
"valid_targets_mean": 14439.8,
"valid_targets_min": 1580
},
{
"epoch": 4.925373134328359,
"grad_norm": 0.06388795519447886,
"learning_rate": 3.188170619252473e-08,
"loss": 1.011868953704834,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2571587562561035,
"step": 1157,
"valid_targets_mean": 15696.2,
"valid_targets_min": 11166
},
{
"epoch": 4.929637526652452,
"grad_norm": 0.07129646520897767,
"learning_rate": 2.8614831525786147e-08,
"loss": 1.0241844654083252,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2564241290092468,
"step": 1158,
"valid_targets_mean": 15321.8,
"valid_targets_min": 9103
},
{
"epoch": 4.933901918976546,
"grad_norm": 0.0684903319385578,
"learning_rate": 2.552438084855613e-08,
"loss": 1.0295789241790771,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2541089355945587,
"step": 1159,
"valid_targets_mean": 15070.4,
"valid_targets_min": 3439
},
{
"epoch": 4.938166311300639,
"grad_norm": 0.07131368949668086,
"learning_rate": 2.2610381461372068e-08,
"loss": 1.066670298576355,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28374648094177246,
"step": 1160,
"valid_targets_mean": 15213.1,
"valid_targets_min": 7497
},
{
"epoch": 4.9424307036247335,
"grad_norm": 0.06726554839796847,
"learning_rate": 1.987285910603598e-08,
"loss": 1.0335086584091187,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25023555755615234,
"step": 1161,
"valid_targets_mean": 14933.2,
"valid_targets_min": 4176
},
{
"epoch": 4.946695095948828,
"grad_norm": 0.07738661305412707,
"learning_rate": 1.7311837965379164e-08,
"loss": 1.0337916612625122,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26403045654296875,
"step": 1162,
"valid_targets_mean": 15545.3,
"valid_targets_min": 7887
},
{
"epoch": 4.950959488272921,
"grad_norm": 0.06573063147229223,
"learning_rate": 1.4927340663046798e-08,
"loss": 1.0332417488098145,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2633013129234314,
"step": 1163,
"valid_targets_mean": 14736.1,
"valid_targets_min": 2169
},
{
"epoch": 4.955223880597015,
"grad_norm": 0.0682202091918646,
"learning_rate": 1.2719388263300325e-08,
"loss": 1.0377440452575684,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24617770314216614,
"step": 1164,
"valid_targets_mean": 14002.5,
"valid_targets_min": 2427
},
{
"epoch": 4.959488272921108,
"grad_norm": 0.06994186375652835,
"learning_rate": 1.0688000270839827e-08,
"loss": 1.0595359802246094,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26381662487983704,
"step": 1165,
"valid_targets_mean": 14263.2,
"valid_targets_min": 1828
},
{
"epoch": 4.963752665245202,
"grad_norm": 0.0760962519951349,
"learning_rate": 8.833194630615271e-09,
"loss": 1.0510656833648682,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26617372035980225,
"step": 1166,
"valid_targets_mean": 15018.2,
"valid_targets_min": 8113
},
{
"epoch": 4.968017057569297,
"grad_norm": 0.07119874327563927,
"learning_rate": 7.154987727682194e-09,
"loss": 1.029045581817627,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25893861055374146,
"step": 1167,
"valid_targets_mean": 15168.5,
"valid_targets_min": 4208
},
{
"epoch": 4.97228144989339,
"grad_norm": 0.07054456507076376,
"learning_rate": 5.6533943870462625e-09,
"loss": 1.040916919708252,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25821077823638916,
"step": 1168,
"valid_targets_mean": 15334.2,
"valid_targets_min": 3305
},
{
"epoch": 4.976545842217484,
"grad_norm": 0.07218125077908605,
"learning_rate": 4.328427873541152e-09,
"loss": 1.0736911296844482,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2677837014198303,
"step": 1169,
"valid_targets_mean": 15248.6,
"valid_targets_min": 9453
},
{
"epoch": 4.980810234541578,
"grad_norm": 0.06630067150008963,
"learning_rate": 3.1800998917086432e-09,
"loss": 1.0300840139389038,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26069217920303345,
"step": 1170,
"valid_targets_mean": 15299.4,
"valid_targets_min": 6430
},
{
"epoch": 4.985074626865671,
"grad_norm": 0.06887200852111346,
"learning_rate": 2.2084205856920393e-09,
"loss": 1.0282962322235107,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24474342167377472,
"step": 1171,
"valid_targets_mean": 14832.7,
"valid_targets_min": 3234
},
{
"epoch": 4.9893390191897655,
"grad_norm": 0.06800509684188603,
"learning_rate": 1.4133985391473482e-09,
"loss": 1.0762146711349487,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27158939838409424,
"step": 1172,
"valid_targets_mean": 14859.8,
"valid_targets_min": 4076
},
{
"epoch": 4.99360341151386,
"grad_norm": 0.06471716304235586,
"learning_rate": 7.950407751722288e-10,
"loss": 0.9734071493148804,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2380818873643875,
"step": 1173,
"valid_targets_mean": 14930.9,
"valid_targets_min": 6932
},
{
"epoch": 4.997867803837953,
"grad_norm": 0.07038236636255867,
"learning_rate": 3.5335275624159835e-10,
"loss": 1.0113223791122437,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2581731975078583,
"step": 1174,
"valid_targets_mean": 15478.7,
"valid_targets_min": 8933
},
{
"epoch": 5.0,
"grad_norm": 0.0946850373732479,
"learning_rate": 8.833838415212014e-11,
"loss": 1.1067698001861572,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.49867311120033264,
"step": 1175,
"valid_targets_mean": 13716.8,
"valid_targets_min": 3353
},
{
"epoch": 5.0,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.49867311120033264,
"step": 1175,
"total_flos": 1367557351931904.0,
"train_loss": 1.0911384893985505,
"train_runtime": 4706.648,
"train_samples_per_second": 31.87,
"train_steps_per_second": 0.25,
"valid_targets_mean": 13716.8,
"valid_targets_min": 3353
}
],
"logging_steps": 1,
"max_steps": 1175,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1367557351931904.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}